ngs_server 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (248) hide show
  1. data/bin/ngs_server +72 -50
  2. data/ext/bamtools/extconf.rb +3 -3
  3. data/ext/vcftools/Makefile +28 -0
  4. data/ext/vcftools/README.txt +36 -0
  5. data/ext/vcftools/cpp/.svn/all-wcprops +125 -0
  6. data/ext/vcftools/cpp/.svn/dir-prop-base +6 -0
  7. data/ext/vcftools/cpp/.svn/entries +708 -0
  8. data/ext/vcftools/cpp/.svn/text-base/Makefile.svn-base +46 -0
  9. data/ext/vcftools/cpp/.svn/text-base/dgeev.cpp.svn-base +146 -0
  10. data/ext/vcftools/cpp/.svn/text-base/dgeev.h.svn-base +43 -0
  11. data/ext/vcftools/cpp/.svn/text-base/output_log.cpp.svn-base +79 -0
  12. data/ext/vcftools/cpp/.svn/text-base/output_log.h.svn-base +34 -0
  13. data/ext/vcftools/cpp/.svn/text-base/parameters.cpp.svn-base +535 -0
  14. data/ext/vcftools/cpp/.svn/text-base/parameters.h.svn-base +154 -0
  15. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.cpp.svn-base +497 -0
  16. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.h.svn-base +190 -0
  17. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_getters.cpp.svn-base +421 -0
  18. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_setters.cpp.svn-base +482 -0
  19. data/ext/vcftools/cpp/.svn/text-base/vcf_file.cpp.svn-base +495 -0
  20. data/ext/vcftools/cpp/.svn/text-base/vcf_file.h.svn-base +184 -0
  21. data/ext/vcftools/cpp/.svn/text-base/vcf_file_diff.cpp.svn-base +1282 -0
  22. data/ext/vcftools/cpp/.svn/text-base/vcf_file_filters.cpp.svn-base +1215 -0
  23. data/ext/vcftools/cpp/.svn/text-base/vcf_file_format_convert.cpp.svn-base +1138 -0
  24. data/ext/vcftools/cpp/.svn/text-base/vcf_file_index.cpp.svn-base +171 -0
  25. data/ext/vcftools/cpp/.svn/text-base/vcf_file_output.cpp.svn-base +3012 -0
  26. data/ext/vcftools/cpp/.svn/text-base/vcftools.cpp.svn-base +107 -0
  27. data/ext/vcftools/cpp/.svn/text-base/vcftools.h.svn-base +25 -0
  28. data/ext/vcftools/cpp/Makefile +46 -0
  29. data/ext/vcftools/cpp/dgeev.cpp +146 -0
  30. data/ext/vcftools/cpp/dgeev.h +43 -0
  31. data/ext/vcftools/cpp/output_log.cpp +79 -0
  32. data/ext/vcftools/cpp/output_log.h +34 -0
  33. data/ext/vcftools/cpp/parameters.cpp +535 -0
  34. data/ext/vcftools/cpp/parameters.h +154 -0
  35. data/ext/vcftools/cpp/vcf_entry.cpp +497 -0
  36. data/ext/vcftools/cpp/vcf_entry.h +190 -0
  37. data/ext/vcftools/cpp/vcf_entry_getters.cpp +421 -0
  38. data/ext/vcftools/cpp/vcf_entry_setters.cpp +482 -0
  39. data/ext/vcftools/cpp/vcf_file.cpp +495 -0
  40. data/ext/vcftools/cpp/vcf_file.h +184 -0
  41. data/ext/vcftools/cpp/vcf_file_diff.cpp +1282 -0
  42. data/ext/vcftools/cpp/vcf_file_filters.cpp +1215 -0
  43. data/ext/vcftools/cpp/vcf_file_format_convert.cpp +1138 -0
  44. data/ext/vcftools/cpp/vcf_file_index.cpp +171 -0
  45. data/ext/vcftools/cpp/vcf_file_output.cpp +3012 -0
  46. data/ext/vcftools/cpp/vcftools.cpp +107 -0
  47. data/ext/vcftools/cpp/vcftools.h +25 -0
  48. data/ext/vcftools/examples/.svn/all-wcprops +185 -0
  49. data/ext/vcftools/examples/.svn/dir-prop-base +6 -0
  50. data/ext/vcftools/examples/.svn/entries +1048 -0
  51. data/ext/vcftools/examples/.svn/prop-base/perl-api-1.pl.svn-base +5 -0
  52. data/ext/vcftools/examples/.svn/text-base/annotate-test.vcf.svn-base +37 -0
  53. data/ext/vcftools/examples/.svn/text-base/annotate.out.svn-base +23 -0
  54. data/ext/vcftools/examples/.svn/text-base/annotate.txt.svn-base +7 -0
  55. data/ext/vcftools/examples/.svn/text-base/annotate2.out.svn-base +52 -0
  56. data/ext/vcftools/examples/.svn/text-base/annotate3.out.svn-base +23 -0
  57. data/ext/vcftools/examples/.svn/text-base/cmp-test-a-3.3.vcf.svn-base +12 -0
  58. data/ext/vcftools/examples/.svn/text-base/cmp-test-a.vcf.svn-base +12 -0
  59. data/ext/vcftools/examples/.svn/text-base/cmp-test-b-3.3.vcf.svn-base +12 -0
  60. data/ext/vcftools/examples/.svn/text-base/cmp-test-b.vcf.svn-base +12 -0
  61. data/ext/vcftools/examples/.svn/text-base/cmp-test.out.svn-base +53 -0
  62. data/ext/vcftools/examples/.svn/text-base/concat-a.vcf.svn-base +21 -0
  63. data/ext/vcftools/examples/.svn/text-base/concat-b.vcf.svn-base +13 -0
  64. data/ext/vcftools/examples/.svn/text-base/concat-c.vcf.svn-base +19 -0
  65. data/ext/vcftools/examples/.svn/text-base/concat.out.svn-base +39 -0
  66. data/ext/vcftools/examples/.svn/text-base/invalid-4.0.vcf.svn-base +31 -0
  67. data/ext/vcftools/examples/.svn/text-base/isec-n2-test.vcf.out.svn-base +19 -0
  68. data/ext/vcftools/examples/.svn/text-base/merge-test-a.vcf.svn-base +17 -0
  69. data/ext/vcftools/examples/.svn/text-base/merge-test-b.vcf.svn-base +17 -0
  70. data/ext/vcftools/examples/.svn/text-base/merge-test-c.vcf.svn-base +15 -0
  71. data/ext/vcftools/examples/.svn/text-base/merge-test.vcf.out.svn-base +31 -0
  72. data/ext/vcftools/examples/.svn/text-base/perl-api-1.pl.svn-base +46 -0
  73. data/ext/vcftools/examples/.svn/text-base/query-test.out.svn-base +6 -0
  74. data/ext/vcftools/examples/.svn/text-base/shuffle-test.vcf.svn-base +12 -0
  75. data/ext/vcftools/examples/.svn/text-base/subset.SNPs.out.svn-base +10 -0
  76. data/ext/vcftools/examples/.svn/text-base/subset.indels.out.svn-base +18 -0
  77. data/ext/vcftools/examples/.svn/text-base/subset.vcf.svn-base +21 -0
  78. data/ext/vcftools/examples/.svn/text-base/valid-3.3.vcf.svn-base +30 -0
  79. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.stats.svn-base +104 -0
  80. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.svn-base +34 -0
  81. data/ext/vcftools/examples/.svn/text-base/valid-4.1.vcf.svn-base +37 -0
  82. data/ext/vcftools/examples/annotate-test.vcf +37 -0
  83. data/ext/vcftools/examples/annotate.out +23 -0
  84. data/ext/vcftools/examples/annotate.txt +7 -0
  85. data/ext/vcftools/examples/annotate2.out +52 -0
  86. data/ext/vcftools/examples/annotate3.out +23 -0
  87. data/ext/vcftools/examples/cmp-test-a-3.3.vcf +12 -0
  88. data/ext/vcftools/examples/cmp-test-a.vcf +12 -0
  89. data/ext/vcftools/examples/cmp-test-b-3.3.vcf +12 -0
  90. data/ext/vcftools/examples/cmp-test-b.vcf +12 -0
  91. data/ext/vcftools/examples/cmp-test.out +53 -0
  92. data/ext/vcftools/examples/concat-a.vcf +21 -0
  93. data/ext/vcftools/examples/concat-b.vcf +13 -0
  94. data/ext/vcftools/examples/concat-c.vcf +19 -0
  95. data/ext/vcftools/examples/concat.out +39 -0
  96. data/ext/vcftools/examples/invalid-4.0.vcf +31 -0
  97. data/ext/vcftools/examples/isec-n2-test.vcf.out +19 -0
  98. data/ext/vcftools/examples/merge-test-a.vcf +17 -0
  99. data/ext/vcftools/examples/merge-test-b.vcf +17 -0
  100. data/ext/vcftools/examples/merge-test-c.vcf +15 -0
  101. data/ext/vcftools/examples/merge-test.vcf.out +31 -0
  102. data/ext/vcftools/examples/perl-api-1.pl +46 -0
  103. data/ext/vcftools/examples/query-test.out +6 -0
  104. data/ext/vcftools/examples/shuffle-test.vcf +12 -0
  105. data/ext/vcftools/examples/subset.SNPs.out +10 -0
  106. data/ext/vcftools/examples/subset.indels.out +18 -0
  107. data/ext/vcftools/examples/subset.vcf +21 -0
  108. data/ext/vcftools/examples/valid-3.3.vcf +30 -0
  109. data/ext/vcftools/examples/valid-4.0.vcf +34 -0
  110. data/ext/vcftools/examples/valid-4.0.vcf.stats +104 -0
  111. data/ext/vcftools/examples/valid-4.1.vcf +37 -0
  112. data/ext/vcftools/extconf.rb +2 -0
  113. data/ext/vcftools/perl/.svn/all-wcprops +149 -0
  114. data/ext/vcftools/perl/.svn/entries +844 -0
  115. data/ext/vcftools/perl/.svn/prop-base/fill-aa.svn-base +5 -0
  116. data/ext/vcftools/perl/.svn/prop-base/fill-an-ac.svn-base +5 -0
  117. data/ext/vcftools/perl/.svn/prop-base/fill-ref-md5.svn-base +5 -0
  118. data/ext/vcftools/perl/.svn/prop-base/tab-to-vcf.svn-base +5 -0
  119. data/ext/vcftools/perl/.svn/prop-base/test.t.svn-base +5 -0
  120. data/ext/vcftools/perl/.svn/prop-base/vcf-annotate.svn-base +5 -0
  121. data/ext/vcftools/perl/.svn/prop-base/vcf-compare.svn-base +5 -0
  122. data/ext/vcftools/perl/.svn/prop-base/vcf-concat.svn-base +5 -0
  123. data/ext/vcftools/perl/.svn/prop-base/vcf-convert.svn-base +5 -0
  124. data/ext/vcftools/perl/.svn/prop-base/vcf-fix-newlines.svn-base +5 -0
  125. data/ext/vcftools/perl/.svn/prop-base/vcf-isec.svn-base +5 -0
  126. data/ext/vcftools/perl/.svn/prop-base/vcf-merge.svn-base +5 -0
  127. data/ext/vcftools/perl/.svn/prop-base/vcf-query.svn-base +5 -0
  128. data/ext/vcftools/perl/.svn/prop-base/vcf-shuffle-cols.svn-base +5 -0
  129. data/ext/vcftools/perl/.svn/prop-base/vcf-sort.svn-base +5 -0
  130. data/ext/vcftools/perl/.svn/prop-base/vcf-stats.svn-base +5 -0
  131. data/ext/vcftools/perl/.svn/prop-base/vcf-subset.svn-base +5 -0
  132. data/ext/vcftools/perl/.svn/prop-base/vcf-to-tab.svn-base +5 -0
  133. data/ext/vcftools/perl/.svn/prop-base/vcf-validator.svn-base +5 -0
  134. data/ext/vcftools/perl/.svn/text-base/ChangeLog.svn-base +84 -0
  135. data/ext/vcftools/perl/.svn/text-base/FaSlice.pm.svn-base +214 -0
  136. data/ext/vcftools/perl/.svn/text-base/Makefile.svn-base +12 -0
  137. data/ext/vcftools/perl/.svn/text-base/Vcf.pm.svn-base +2853 -0
  138. data/ext/vcftools/perl/.svn/text-base/VcfStats.pm.svn-base +681 -0
  139. data/ext/vcftools/perl/.svn/text-base/fill-aa.svn-base +103 -0
  140. data/ext/vcftools/perl/.svn/text-base/fill-an-ac.svn-base +56 -0
  141. data/ext/vcftools/perl/.svn/text-base/fill-ref-md5.svn-base +204 -0
  142. data/ext/vcftools/perl/.svn/text-base/tab-to-vcf.svn-base +92 -0
  143. data/ext/vcftools/perl/.svn/text-base/test.t.svn-base +376 -0
  144. data/ext/vcftools/perl/.svn/text-base/vcf-annotate.svn-base +1099 -0
  145. data/ext/vcftools/perl/.svn/text-base/vcf-compare.svn-base +1193 -0
  146. data/ext/vcftools/perl/.svn/text-base/vcf-concat.svn-base +310 -0
  147. data/ext/vcftools/perl/.svn/text-base/vcf-convert.svn-base +180 -0
  148. data/ext/vcftools/perl/.svn/text-base/vcf-fix-newlines.svn-base +97 -0
  149. data/ext/vcftools/perl/.svn/text-base/vcf-isec.svn-base +660 -0
  150. data/ext/vcftools/perl/.svn/text-base/vcf-merge.svn-base +577 -0
  151. data/ext/vcftools/perl/.svn/text-base/vcf-query.svn-base +272 -0
  152. data/ext/vcftools/perl/.svn/text-base/vcf-shuffle-cols.svn-base +89 -0
  153. data/ext/vcftools/perl/.svn/text-base/vcf-sort.svn-base +79 -0
  154. data/ext/vcftools/perl/.svn/text-base/vcf-stats.svn-base +160 -0
  155. data/ext/vcftools/perl/.svn/text-base/vcf-subset.svn-base +206 -0
  156. data/ext/vcftools/perl/.svn/text-base/vcf-to-tab.svn-base +112 -0
  157. data/ext/vcftools/perl/.svn/text-base/vcf-validator.svn-base +145 -0
  158. data/ext/vcftools/perl/ChangeLog +84 -0
  159. data/ext/vcftools/perl/FaSlice.pm +214 -0
  160. data/ext/vcftools/perl/Makefile +12 -0
  161. data/ext/vcftools/perl/Vcf.pm +2853 -0
  162. data/ext/vcftools/perl/VcfStats.pm +681 -0
  163. data/ext/vcftools/perl/fill-aa +103 -0
  164. data/ext/vcftools/perl/fill-an-ac +56 -0
  165. data/ext/vcftools/perl/fill-ref-md5 +204 -0
  166. data/ext/vcftools/perl/tab-to-vcf +92 -0
  167. data/ext/vcftools/perl/test.t +376 -0
  168. data/ext/vcftools/perl/vcf-annotate +1099 -0
  169. data/ext/vcftools/perl/vcf-compare +1193 -0
  170. data/ext/vcftools/perl/vcf-concat +310 -0
  171. data/ext/vcftools/perl/vcf-convert +180 -0
  172. data/ext/vcftools/perl/vcf-fix-newlines +97 -0
  173. data/ext/vcftools/perl/vcf-isec +660 -0
  174. data/ext/vcftools/perl/vcf-merge +577 -0
  175. data/ext/vcftools/perl/vcf-query +286 -0
  176. data/ext/vcftools/perl/vcf-shuffle-cols +89 -0
  177. data/ext/vcftools/perl/vcf-sort +79 -0
  178. data/ext/vcftools/perl/vcf-stats +160 -0
  179. data/ext/vcftools/perl/vcf-subset +206 -0
  180. data/ext/vcftools/perl/vcf-to-tab +112 -0
  181. data/ext/vcftools/perl/vcf-validator +145 -0
  182. data/ext/vcftools/website/.svn/all-wcprops +41 -0
  183. data/ext/vcftools/website/.svn/entries +238 -0
  184. data/ext/vcftools/website/.svn/prop-base/VCF-poster.pdf.svn-base +5 -0
  185. data/ext/vcftools/website/.svn/prop-base/favicon.ico.svn-base +5 -0
  186. data/ext/vcftools/website/.svn/prop-base/favicon.png.svn-base +5 -0
  187. data/ext/vcftools/website/.svn/text-base/Makefile.svn-base +6 -0
  188. data/ext/vcftools/website/.svn/text-base/README.svn-base +2 -0
  189. data/ext/vcftools/website/.svn/text-base/VCF-poster.pdf.svn-base +0 -0
  190. data/ext/vcftools/website/.svn/text-base/default.css.svn-base +250 -0
  191. data/ext/vcftools/website/.svn/text-base/favicon.ico.svn-base +0 -0
  192. data/ext/vcftools/website/.svn/text-base/favicon.png.svn-base +0 -0
  193. data/ext/vcftools/website/Makefile +6 -0
  194. data/ext/vcftools/website/README +2 -0
  195. data/ext/vcftools/website/VCF-poster.pdf +0 -0
  196. data/ext/vcftools/website/default.css +250 -0
  197. data/ext/vcftools/website/favicon.ico +0 -0
  198. data/ext/vcftools/website/favicon.png +0 -0
  199. data/ext/vcftools/website/img/.svn/all-wcprops +53 -0
  200. data/ext/vcftools/website/img/.svn/entries +300 -0
  201. data/ext/vcftools/website/img/.svn/prop-base/bg.gif.svn-base +5 -0
  202. data/ext/vcftools/website/img/.svn/prop-base/bgcode.gif.svn-base +5 -0
  203. data/ext/vcftools/website/img/.svn/prop-base/bgcontainer.gif.svn-base +5 -0
  204. data/ext/vcftools/website/img/.svn/prop-base/bgul.gif.svn-base +5 -0
  205. data/ext/vcftools/website/img/.svn/prop-base/header.gif.svn-base +5 -0
  206. data/ext/vcftools/website/img/.svn/prop-base/li.gif.svn-base +5 -0
  207. data/ext/vcftools/website/img/.svn/prop-base/quote.gif.svn-base +5 -0
  208. data/ext/vcftools/website/img/.svn/prop-base/search.gif.svn-base +5 -0
  209. data/ext/vcftools/website/img/.svn/text-base/bg.gif.svn-base +0 -0
  210. data/ext/vcftools/website/img/.svn/text-base/bgcode.gif.svn-base +0 -0
  211. data/ext/vcftools/website/img/.svn/text-base/bgcontainer.gif.svn-base +0 -0
  212. data/ext/vcftools/website/img/.svn/text-base/bgul.gif.svn-base +0 -0
  213. data/ext/vcftools/website/img/.svn/text-base/header.gif.svn-base +0 -0
  214. data/ext/vcftools/website/img/.svn/text-base/li.gif.svn-base +0 -0
  215. data/ext/vcftools/website/img/.svn/text-base/quote.gif.svn-base +0 -0
  216. data/ext/vcftools/website/img/.svn/text-base/search.gif.svn-base +0 -0
  217. data/ext/vcftools/website/img/bg.gif +0 -0
  218. data/ext/vcftools/website/img/bgcode.gif +0 -0
  219. data/ext/vcftools/website/img/bgcontainer.gif +0 -0
  220. data/ext/vcftools/website/img/bgul.gif +0 -0
  221. data/ext/vcftools/website/img/header.gif +0 -0
  222. data/ext/vcftools/website/img/li.gif +0 -0
  223. data/ext/vcftools/website/img/quote.gif +0 -0
  224. data/ext/vcftools/website/img/search.gif +0 -0
  225. data/ext/vcftools/website/src/.svn/all-wcprops +53 -0
  226. data/ext/vcftools/website/src/.svn/entries +300 -0
  227. data/ext/vcftools/website/src/.svn/text-base/docs.inc.svn-base +202 -0
  228. data/ext/vcftools/website/src/.svn/text-base/index.inc.svn-base +52 -0
  229. data/ext/vcftools/website/src/.svn/text-base/index.php.svn-base +80 -0
  230. data/ext/vcftools/website/src/.svn/text-base/license.inc.svn-base +27 -0
  231. data/ext/vcftools/website/src/.svn/text-base/links.inc.svn-base +13 -0
  232. data/ext/vcftools/website/src/.svn/text-base/options.inc.svn-base +654 -0
  233. data/ext/vcftools/website/src/.svn/text-base/perl_module.inc.svn-base +249 -0
  234. data/ext/vcftools/website/src/.svn/text-base/specs.inc.svn-base +18 -0
  235. data/ext/vcftools/website/src/docs.inc +202 -0
  236. data/ext/vcftools/website/src/index.inc +52 -0
  237. data/ext/vcftools/website/src/index.php +80 -0
  238. data/ext/vcftools/website/src/license.inc +27 -0
  239. data/ext/vcftools/website/src/links.inc +13 -0
  240. data/ext/vcftools/website/src/options.inc +654 -0
  241. data/ext/vcftools/website/src/perl_module.inc +249 -0
  242. data/ext/vcftools/website/src/specs.inc +18 -0
  243. data/lib/config.ru +9 -0
  244. data/lib/ngs_server/add.rb +9 -0
  245. data/lib/ngs_server/version.rb +1 -1
  246. data/lib/ngs_server.rb +55 -3
  247. data/ngs_server.gemspec +5 -2
  248. metadata +296 -6
@@ -0,0 +1,660 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # Author: petr.danecek@sanger
4
+ #
5
+
6
+ use strict;
7
+ use warnings;
8
+ use Carp;
9
+ use Vcf;
10
+
11
+ my $opts = parse_params();
12
+ vcf_isec($opts);
13
+
14
+ exit;
15
+
16
+ #--------------------------------
17
+
18
+ sub error
19
+ {
20
+ my (@msg) = @_;
21
+ if ( scalar @msg )
22
+ {
23
+ croak @msg;
24
+ }
25
+ die
26
+ "About: Create intersections, unions, complements on bgzipped and tabix indexed VCF or tab-delimited files.\n",
27
+ " Note that lines from all files can be intermixed together on the output, which can yield\n",
28
+ " unexpected results.\n",
29
+ "Usage: vcf-isec [OPTIONS] file1.vcf file2.vcf ...\n",
30
+ "Options:\n",
31
+ " -a, --apply-filters Ignore lines where FILTER columns is anything else than PASS\n",
32
+ " -C, --chromosomes <list|file> Process the given chromosomes (comma-separated list or one chromosome per line in a file).\n",
33
+ " -c, --complement Output positions present in the first file but missing from the other files.\n",
34
+ " -d, --debug Debugging information\n",
35
+ " -f, --force Continue even if the script complains about differing columns, VCF versions, etc.\n",
36
+ " -o, --one-file-only Print only entries from the left-most file. Without -o, all unique positions will be printed.\n",
37
+ " -n, --nfiles [+-=]<int> Output positions present in this many (=), this many or more (+), or this many or fewer (-) files.\n",
38
+ " -p, --prefix <path> If present, multiple files will be created with all possible isec combinations. (Suitable for Venn Diagram analysis.)\n",
39
+ " -t, --tab <chr:pos:file> Tab-delimited file with indexes of chromosome and position columns. (1-based indexes)\n",
40
+ " -w, --win <int> In repetitive sequences, the same indel can be called at different positions. Consider\n",
41
+ " records this far apart as matching (be it a SNP or an indel).\n",
42
+ " -h, -?, --help This help message.\n",
43
+ "Examples:\n",
44
+ " bgzip file.vcf; tabix -p vcf file.vcf.gz\n",
45
+ " bgzip file.tab; tabix -s 1 -b 2 -e 2 file.tab.gz\n",
46
+ "\n";
47
+ }
48
+
49
+
50
+ sub parse_params
51
+ {
52
+ my $opts = { positions=>0, args=>[$0, @ARGV], force=>0, split=>0, report_from_all=>1, apply_filters=>0 };
53
+ while (defined(my $arg=shift(@ARGV)))
54
+ {
55
+ if ( $arg eq '-p' || $arg eq '--prefix' )
56
+ {
57
+ my $prefix = shift(@ARGV);
58
+ $$opts{prefix} = init_outdir($opts,$prefix);
59
+ $$opts{split} = 1;
60
+ next;
61
+ }
62
+ if ( $arg eq '-f' || $arg eq '--force' ) { $$opts{force}=1; next; }
63
+ if ( $arg eq '-a' || $arg eq '--apply-filters' ) { $$opts{apply_filters}=1; next; }
64
+ if ( $arg eq '-C' || $arg eq '--chromosomes' ) { $$opts{chromosomes}=shift(@ARGV); next; }
65
+ if ( $arg eq '-o' || $arg eq '--one-file-only' ) { $$opts{report_from_all}=0; next; }
66
+ if ( $arg eq '-c' || $arg eq '--complement' ) { $$opts{complement}=1; next; }
67
+ if ( $arg eq '-n' || $arg eq '--nfiles' )
68
+ {
69
+ my $nfiles = shift(@ARGV);
70
+ if ( !($nfiles=~/^([\-+=])(\d+)$/) ) { error("Could not parse: [$nfiles]\n"); }
71
+ $$opts{isec_op} = $1;
72
+ $$opts{isec_nfiles} = $2;
73
+ next;
74
+ }
75
+ if ( $arg eq '-d' || $arg eq '--debug' ) { $$opts{debug}=1; next; }
76
+ if ( $arg eq '-w' || $arg eq '--win' ) { $$opts{win}=shift(@ARGV); next; }
77
+ if ( $arg eq '-t' || $arg eq '--tab' )
78
+ {
79
+ my $tab = shift(@ARGV);
80
+ my ($chr,$pos,$file) = split(/:/,$tab);
81
+ push @{$$opts{files}}, Reader->new(file=>$file,chr=>$chr-1,pos=>$pos-1);
82
+ next;
83
+ }
84
+ if ( -e $arg ) { push @{$$opts{files}}, $arg; next }
85
+ if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
86
+ error("Unknown parameter or non-existent file \"$arg\". Run -h for help.\n");
87
+ }
88
+ if ( !exists($$opts{files}) ) { error("What files should be intersected?\n") }
89
+ if ( !$$opts{force} ) { $SIG{__WARN__} = sub { error(@_); } }
90
+ return $opts;
91
+ }
92
+
93
+ sub init_outdir
94
+ {
95
+ my ($opts,$prefix) = @_;
96
+ if ( $prefix=~m{/} )
97
+ {
98
+ # A directory should be created. This will populate dir and prefix, for example
99
+ # prefix -> dir prefix
100
+ # ----------------------------
101
+ # out out.dump
102
+ # out/ out/ out/out.dump
103
+ # out/xxx out/ out/xxx.dump
104
+ #
105
+ my $dir = '';
106
+ if ( $prefix=~m{/[^/]+$} ) { $dir=$`; }
107
+ elsif ( $prefix=~m{/([^/]+)/$} ) { $dir = $`.'/'.$1; $prefix = $dir.'/'.$1; }
108
+ elsif ( $prefix=~m{([^/]+)/?$} ) { $dir=$1; $prefix=$dir.'/'.$1; }
109
+ if ( $dir ) { `mkdir -p $dir`; }
110
+ }
111
+ return $prefix;
112
+ }
113
+
114
+ sub read_chrom_list
115
+ {
116
+ my ($fname) = @_;
117
+ my @chroms;
118
+ if ( -e $fname )
119
+ {
120
+ open(my $chrms,'<',$fname) or error("$fname: $!");
121
+ while (my $line=<$chrms>)
122
+ {
123
+ chomp($line);
124
+ push @chroms, $line;
125
+ }
126
+ close($chrms);
127
+ }
128
+ else
129
+ {
130
+ @chroms = split(/,/,$fname);
131
+ }
132
+ return (@chroms);
133
+ }
134
+
135
+ sub check_columns
136
+ {
137
+ my ($opts,$vcfs) = @_;
138
+
139
+ # Do the check for VCF files only
140
+ for (my $ivcf=0; $ivcf<@$vcfs; $ivcf++)
141
+ {
142
+ if ( !exists($$vcfs[$ivcf]{has_column}) ) { next; }
143
+
144
+ for (my $jvcf=0; $jvcf<$ivcf; $jvcf++)
145
+ {
146
+ if ( !exists($$vcfs[$ivcf]{has_column}) ) { next; }
147
+
148
+ if ( scalar @{$$vcfs[$ivcf]{columns}} != scalar @{$$vcfs[$jvcf]{columns}} )
149
+ {
150
+ my @icols = @{$$vcfs[$ivcf]{columns}};
151
+ my @jcols = @{$$vcfs[$jvcf]{columns}};
152
+ warn("Warning: The number of columns is different:\n",
153
+ scalar @icols - 9, ": ",
154
+ join(',',@icols[9..$#icols]),"\n",
155
+ scalar @jcols - 9, ": ",
156
+ join(',',@jcols[9..$#jcols]),"\n",
157
+ );
158
+ return;
159
+ }
160
+
161
+ for my $cname (keys %{$$vcfs[$ivcf]{has_column}})
162
+ {
163
+ if ( !exists($$vcfs[$jvcf]{has_column}{$cname}) or $$vcfs[$ivcf]{has_column}{$cname}!=$$vcfs[$jvcf]{has_column}{$cname} )
164
+ {
165
+ my @icols = @{$$vcfs[$ivcf]{columns}};
166
+ my @jcols = @{$$vcfs[$jvcf]{columns}};
167
+ warn("Warning: The column names do not match (e.g. $cname):\n",
168
+ join(',',@icols[9..$#icols]),"\n",
169
+ join(',',@jcols[9..$#jcols]),"\n",
170
+ );
171
+ return;
172
+ }
173
+ }
174
+ for my $cname (keys %{$$vcfs[$jvcf]{has_column}})
175
+ {
176
+ if ( !exists($$vcfs[$ivcf]{has_column}{$cname}) )
177
+ {
178
+ my @icols = @{$$vcfs[$ivcf]{columns}};
179
+ my @jcols = @{$$vcfs[$jvcf]{columns}};
180
+ warn("Warning: The column names do not match (e.g. $cname):\n",
181
+ join(',',@icols[9..$#icols]),"\n",
182
+ join(',',@jcols[9..$#jcols]),"\n",
183
+ );
184
+ return;
185
+ }
186
+ }
187
+ }
188
+ }
189
+ }
190
+
191
+ sub vcf_isec
192
+ {
193
+ my ($opts) = @_;
194
+
195
+ $$opts{match} = {};
196
+
197
+ # Open the VCF files and initialize the list of chromosomes
198
+ my @vcfs;
199
+ my (@chroms,%has_chrom);
200
+ if ( exists($$opts{chromosomes}) ) { @chroms = read_chrom_list($$opts{chromosomes}); }
201
+
202
+ my $source;
203
+ my $vcf_version;
204
+ my $vcf_version_warned;
205
+ for (my $ifile=0; $ifile<@{$$opts{files}}; $ifile++)
206
+ {
207
+ my $file = $$opts{files}[$ifile];
208
+ my $vcf = ref($file) eq '' ? Vcf->new(file=>$file) : $file;
209
+ $vcf->parse_header();
210
+ $vcf->close();
211
+ $$vcf{nread} = 0;
212
+ push @vcfs, $vcf;
213
+
214
+ # Check if the VCF versions are identical
215
+ if ( ref($file) eq '' )
216
+ {
217
+ if ( !defined $vcf_version ) { $vcf_version = $$vcf{version} }
218
+ if ( $vcf_version ne $$vcf{version} && !$vcf_version_warned )
219
+ {
220
+ warn("Warning: Mixed VCF format versions, use vcf-convert to unify.\n");
221
+ $vcf_version_warned = 1;
222
+ }
223
+ }
224
+
225
+ # Update the list of known chromosomes
226
+ if ( !exists($$opts{chromosomes}) )
227
+ {
228
+ my $chrms = $vcf->get_chromosomes();
229
+ for my $chr (@$chrms)
230
+ {
231
+ if ( exists($has_chrom{$chr}) ) { next; }
232
+ $has_chrom{$chr} = 1;
233
+ push @chroms, $chr;
234
+ }
235
+ }
236
+
237
+ if ( $ifile )
238
+ {
239
+ # To get the missig fields filled by the default values
240
+ for my $hline (@{$$vcf{header_lines}})
241
+ {
242
+ $vcfs[0]->add_header_line($hline,silent=>1);
243
+ }
244
+ $source .= ',';
245
+ }
246
+ $source .= "$ifile:$file";
247
+ $$vcf{vcf_isec_ID} = $ifile;
248
+ }
249
+ check_columns($opts,\@vcfs);
250
+
251
+ if ( !$vcfs[0]{delim} && !$$opts{split} )
252
+ {
253
+ $vcfs[0]->add_header_line({key=>'source',value=>join(' ',@{$$opts{args}})},append=>'timestamp');
254
+ $vcfs[0]->add_header_line({key=>'sourceFiles',value=>$source},append=>'timestamp');
255
+ $vcfs[0]->add_header_line({key=>'INFO',ID=>'SF',Number=>-1,Type=>'String',Description=>'Source File (index to sourceFiles, f when filtered)'},silent=>1);
256
+ print $vcfs[0]->format_header();
257
+ }
258
+
259
+ # Go through all the files simultaneously and get the stats.
260
+ for my $chr (@chroms)
261
+ {
262
+ # Open files
263
+ for my $vcf (@vcfs)
264
+ {
265
+ delete($$vcf{last_line});
266
+ $vcf->open(region=>$chr,parse_header=>1);
267
+ delete($$vcf{eof});
268
+ }
269
+ do_chrm_isec($opts,\@vcfs);
270
+ }
271
+
272
+ for my $vcf (@vcfs)
273
+ {
274
+ if ( !$$vcf{nread} ) { warn("Warning: Read 0 lines from $$vcf{file}, the tabix index may be broken.\n"); }
275
+ }
276
+ }
277
+
278
+
279
+ sub do_chrm_isec
280
+ {
281
+ my ($opts,$vcfs) = @_;
282
+
283
+ my $debug = $$opts{debug} ? 1 : 0;
284
+ my $win = $$opts{win} ? $$opts{win} : 0;
285
+ my $complement = $$opts{complement} ? 1 : 0;
286
+ my $report_from_all = $$opts{report_from_all} ? 1 : 0;
287
+ my $nfiles = scalar @{$$opts{files}};
288
+ my $isec_nfiles = $nfiles;
289
+ my $isec_op = '=';
290
+ if ( exists($$opts{isec_nfiles}) )
291
+ {
292
+ $isec_nfiles = $$opts{isec_nfiles};
293
+ $isec_op = $$opts{isec_op};
294
+ }
295
+ my $split = $$opts{split};
296
+
297
+ while (1)
298
+ {
299
+ my $grp = read_next_group($opts,$vcfs,$win);
300
+ if ( !$grp || !scalar @$grp ) { last }
301
+
302
+ if ( $debug )
303
+ {
304
+ print "Group:\n";
305
+ for my $rec (@$grp) { print "$$rec{chr}\t$$rec{pos}\t$$rec{vcf}{file}\n"; }
306
+ print "\n";
307
+ }
308
+
309
+ my %files;
310
+ my %srcs;
311
+ for my $rec (@$grp)
312
+ {
313
+ my $vcf = $$rec{vcf};
314
+ my $file = $$vcf{file};
315
+
316
+ push @{$files{$file}}, $rec;
317
+
318
+ my $src = $$vcf{vcf_isec_ID};
319
+ if ( !$$vcf{delim} )
320
+ {
321
+ # This is a VCF, check filters
322
+ my $fltr = $$rec{line}[6];
323
+ if ( !$split && $fltr ne $$vcf{filter_passed} && $fltr ne $$vcf{defaults}{default} ) { $src .= 'f'; }
324
+ }
325
+ $srcs{$$rec{pos}}{$src} = $rec;
326
+ }
327
+ if ( $split )
328
+ {
329
+ write_line($opts,$grp,\%srcs);
330
+ next;
331
+ }
332
+ my $nmatches = scalar keys %files;
333
+ if ( $complement )
334
+ {
335
+ my $file = $$vcfs[0]{file};
336
+ if ( !exists($files{$file}) ) { next; }
337
+ if ( $nmatches!=1 ) { next; }
338
+ }
339
+ elsif ( $isec_op eq '=' && $isec_nfiles!=$nmatches ) { next; }
340
+ elsif ( $isec_op eq '+' && $isec_nfiles>$nmatches ) { next; }
341
+ elsif ( $isec_op eq '-' && $isec_nfiles<$nmatches ) { next; }
342
+
343
+ # The hits are sorted by position in @$grp
344
+ my ($prev_chr,$prev_pos,$prev_id);
345
+ for my $rec (@$grp)
346
+ {
347
+ if ( !$report_from_all && $$rec{vcf}{vcf_isec_ID}!=0 ) { next; }
348
+ elsif ( defined $prev_chr && $prev_chr eq $$rec{chr} && $prev_pos eq $$rec{pos} && $prev_id ne $$rec{vcf}{vcf_isec_ID} ) { next; }
349
+
350
+ if ( !$$rec{vcf}{delim} )
351
+ {
352
+ # This is a VCF file, add annotation
353
+ my @tags = split(/;/,$$rec{line}[7]);
354
+ my $i;
355
+ for ($i=0; $i<@tags; $i++)
356
+ {
357
+ if ( $tags[$i] eq '.' or $tags[$i]=~/^SF=/ ) { last; }
358
+ }
359
+ my $src = join(',',sort keys %{$srcs{$$rec{pos}}});
360
+ $tags[$i] = 'SF='.$src;
361
+ $$rec{line}[7] = join(';',@tags);
362
+ print join("\t",@{$$rec{line}}) . "\n";
363
+ }
364
+ else
365
+ {
366
+ print $$rec{line};
367
+ }
368
+
369
+ $prev_chr = $$rec{chr};
370
+ $prev_pos = $$rec{pos};
371
+ $prev_id = $$rec{vcf}{vcf_isec_ID};
372
+ }
373
+ }
374
+ }
375
+
376
+ sub write_line
377
+ {
378
+ my ($opts,$grp,$srcs) = @_;
379
+
380
+ my $vcf = $$grp[0]{vcf};
381
+ my $file = $$vcf{file};
382
+ my $rec = $$grp[0];
383
+
384
+ for my $hash (values %$srcs)
385
+ {
386
+ my $src = join('_',sort keys %$hash);
387
+ if ( !exists($$opts{out_files}{$src}) )
388
+ {
389
+ open($$opts{out_files}{$src},"| bgzip -c > $$opts{prefix}$src.vcf.gz") or error("| bgzip -c > $$opts{prefix}$src.vcf.gz: $!");
390
+
391
+ print "Using file '$$opts{prefix}$src.vcf.gz' for records present in:\n";
392
+ for my $rec (sort values %$hash)
393
+ {
394
+ print "\t$$rec{vcf}{file}\n";
395
+ }
396
+ if ( !$$vcf{delim} )
397
+ {
398
+ my $fnames = join(',',sort values %$hash);
399
+ $vcf->add_header_line({key=>'source',value=>join(' ',@{$$opts{args}})},append=>'timestamp');
400
+ $vcf->add_header_line({key=>'sourceFiles',value=>$fnames},append=>'timestamp');
401
+ print {$$opts{out_files}{$src}} $vcf->format_header();
402
+ }
403
+ }
404
+ }
405
+
406
+ for my $pos (keys %$srcs)
407
+ {
408
+ my $rec = (values %{$$srcs{$pos}})[0];
409
+ my $src = join('_',sort keys %{$$srcs{$pos}});
410
+ my $fh = $$opts{out_files}{$src};
411
+
412
+ if ( !$$vcf{delim} )
413
+ {
414
+ print $fh join("\t",@{$$rec{line}}) . "\n";
415
+ }
416
+ else
417
+ {
418
+ print $fh $$rec{line};
419
+ }
420
+ if ( $$opts{one_per_group} ) { last; }
421
+ }
422
+ }
423
+
424
+ sub read_next_group
425
+ {
426
+ my ($opts,$vcfs,$win) = @_;
427
+
428
+ my @grp;
429
+ my $prev_vcf;
430
+ my $start;
431
+
432
+ while (1)
433
+ {
434
+ my $min_vcf = get_min_position($opts,$vcfs);
435
+
436
+ # No more lines in the buffer?
437
+ if ( !$min_vcf ) { last; }
438
+
439
+ # Nothing new has been added?
440
+ if ( $prev_vcf && $prev_vcf eq $$min_vcf{buf}[0] ) { last; }
441
+ $prev_vcf = $$min_vcf{buf}[0];
442
+
443
+ # Read everything what falls in the window. The window moves to encompass complete clusters.
444
+ if ( !$start or $start+$win >= $$min_vcf{buf}[0]{pos} )
445
+ {
446
+ my $rec = shift(@{$$min_vcf{buf}});
447
+ push @grp,$rec;
448
+
449
+ $start = $$rec{pos};
450
+ next;
451
+ }
452
+ }
453
+ return \@grp;
454
+ }
455
+
456
+ # Return the minimum position across all opened files. If there is no line in the file's buffer,
457
+ # advance to the next line.
458
+ sub get_min_position
459
+ {
460
+ my ($opts,$vcfs) = @_;
461
+
462
+ my ($min_pos,$min_vcf);
463
+ for my $vcf (@$vcfs)
464
+ {
465
+ # Check if there is a line in the buffer, if not, read. If still empty, the file reached eof
466
+ if ( !$$vcf{buf} or !scalar @{$$vcf{buf}} ) { read_line($opts,$vcf); }
467
+ if ( !$$vcf{buf} or !scalar @{$$vcf{buf}} ) { next; }
468
+
469
+ my $line = $$vcf{buf}[0];
470
+
471
+ # Designate this position as the minimum of all the files if:
472
+ # .. is this the first file?
473
+ if ( !$min_pos )
474
+ {
475
+ $min_pos = $$line{pos};
476
+ $min_vcf = $vcf;
477
+ next;
478
+ }
479
+
480
+ # .. has this file lower position?
481
+ if ( $min_pos>$$line{pos} )
482
+ {
483
+ $min_pos = $$line{pos};
484
+ $min_vcf = $vcf;
485
+ next;
486
+ }
487
+ }
488
+ return $min_vcf;
489
+ }
490
+
491
+ # Read one line from a VCF or Reader, split it and save it to a buffer.
492
+ sub read_line
493
+ {
494
+ my ($opts,$vcf) = @_;
495
+
496
+ if ( $$vcf{eof} ) { return; }
497
+
498
+ my $line = $vcf->next_line();
499
+ if ( !$line )
500
+ {
501
+ $$vcf{eof} = 1;
502
+ return;
503
+ }
504
+
505
+ $$vcf{nread}++;
506
+
507
+ my ($chr,$pos,$ref,$alt);
508
+ if ( $$vcf{delim} )
509
+ {
510
+ my @items = split($$vcf{delim},$line);
511
+
512
+ # Reader object
513
+ $chr = $items[$$vcf{chr}];
514
+ $pos = $items[$$vcf{pos}];
515
+ $ref = '';
516
+ $alt = '';
517
+ }
518
+ else
519
+ {
520
+ # We are reading VCF, not a tab-delimited file. Apply filters when requested.
521
+ my @items = split(/\t/,$line);
522
+ while ( $$opts{apply_filters} && $items[6] ne 'PASS' )
523
+ {
524
+ $line = $vcf->next_line();
525
+ if ( !$line )
526
+ {
527
+ $$vcf{eof} = 1;
528
+ return;
529
+ }
530
+ @items = split(/\t/,$line);
531
+ }
532
+ chomp($items[-1]);
533
+ $chr = $items[0];
534
+ $pos = $items[1];
535
+ $ref = $items[3];
536
+ $alt = $items[4];
537
+ $line = \@items;
538
+ }
539
+ if ( $$vcf{buf} && @{$$vcf{buf}} )
540
+ {
541
+ my $prev = $$vcf{buf}[-1];
542
+ if ( $$prev{pos} == $pos ) { warn("Position $chr:$pos appeared twice in $$vcf{file}\n"); }
543
+ }
544
+
545
+ push @{$$vcf{buf}}, { chr=>$chr, pos=>$pos, ref=>$ref, alt=>$alt, line=>$line, vcf=>$vcf };
546
+ return;
547
+ }
548
+
549
+
550
+ #---------------------------------
551
+
552
+ package Reader;
553
+
554
+ use strict;
555
+ use warnings;
556
+ use Carp;
557
+
558
+ sub new
559
+ {
560
+ my ($class,@args) = @_;
561
+ my $self = @args ? {@args} : {};
562
+ bless $self, ref($class) || $class;
563
+ if ( $$self{cmd} )
564
+ {
565
+ $$self{file} = '';
566
+ open($$self{fh},$$self{cmd}) or $self->throw("$$self{cmd}: $!");
567
+ }
568
+ if ( !$$self{file} && !$$self{fh} ) { $self->throw("Expected the file or fh option.\n"); }
569
+ if ( !$$self{delim} ) { $$self{delim} = qr/\t/; }
570
+ if ( !$$self{chr} ) { $$self{chr} = 0; } # the index of the chromosome column (indexed from 0)
571
+ if ( !$$self{pos} ) { $$self{pos} = 1; } # the index of the position column
572
+ return $self;
573
+ }
574
+
575
+ sub throw
576
+ {
577
+ my ($self,@msg) = @_;
578
+ confess @msg;
579
+ }
580
+
581
+ sub open
582
+ {
583
+ my ($self,%args) = @_;
584
+ if ( !$$self{file} ) { $self->throw(qq[The parameter "file" not set.\n]); }
585
+ $self->close();
586
+ if ( $$self{file}=~/\.gz$/i )
587
+ {
588
+ if ( exists($args{region}) && defined($args{region}) )
589
+ {
590
+ open($$self{fh},"tabix $$self{file} $args{region} |") or $self->throw("tabix $$self{file}: $!");
591
+ }
592
+ else
593
+ {
594
+ open($$self{fh},"gunzip -c $$self{file} |") or $self->throw("gunzip -c $$self{file} |: $!");
595
+ }
596
+ }
597
+ else
598
+ {
599
+ open($$self{fh},'<',$$self{file}) or $self->throw("$$self{file}: $!");
600
+ }
601
+ }
602
+
603
+ sub close
604
+ {
605
+ my ($self) = @_;
606
+ if ( !$$self{fh} ) { return; }
607
+ close($$self{fh});
608
+ delete($$self{fh});
609
+ delete($$self{buffer});
610
+ }
611
+
612
+ sub _unread_line
613
+ {
614
+ my ($self,$line) = @_;
615
+ unshift @{$$self{buffer}}, $line;
616
+ return;
617
+ }
618
+
619
+ sub next_line
620
+ {
621
+ my ($self) = @_;
622
+ my $line;
623
+ if ( $$self{buffer} && @{$$self{buffer}} ) { return shift(@{$$self{buffer}}); }
624
+ return readline($$self{fh});
625
+ }
626
+
627
+ sub parse_header
628
+ {
629
+ my ($self) = @_;
630
+ $self->open();
631
+ while (1)
632
+ {
633
+ my $line = $self->next_line();
634
+ if ( !$line ) { last; }
635
+ if ( $line=~/^#/ ) { push @{$$self{header}},$line; next; }
636
+ $self->_unread_line($line);
637
+ last;
638
+ }
639
+ }
640
+
641
+ sub format_header
642
+ {
643
+ my ($self) = @_;
644
+ if ( $$self{header} ) { return join('',@{$$self{header}}); }
645
+ return '';
646
+ }
647
+
648
+ sub get_chromosomes
649
+ {
650
+ my ($self) = @_;
651
+ if ( !$$self{file} ) { $self->throw(qq[The parameter "file" not set.\n]); }
652
+ my (@out) = `tabix -l $$self{file}`;
653
+ if ( $? )
654
+ {
655
+ $self->throw(qq[The command "tabix -l $$self{file}" exited with an error. Is the file tabix indexed?\n]);
656
+ }
657
+ for (my $i=0; $i<@out; $i++) { chomp($out[$i]); }
658
+ return \@out;
659
+ }
660
+