ngs_server 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (248) hide show
  1. data/bin/ngs_server +72 -50
  2. data/ext/bamtools/extconf.rb +3 -3
  3. data/ext/vcftools/Makefile +28 -0
  4. data/ext/vcftools/README.txt +36 -0
  5. data/ext/vcftools/cpp/.svn/all-wcprops +125 -0
  6. data/ext/vcftools/cpp/.svn/dir-prop-base +6 -0
  7. data/ext/vcftools/cpp/.svn/entries +708 -0
  8. data/ext/vcftools/cpp/.svn/text-base/Makefile.svn-base +46 -0
  9. data/ext/vcftools/cpp/.svn/text-base/dgeev.cpp.svn-base +146 -0
  10. data/ext/vcftools/cpp/.svn/text-base/dgeev.h.svn-base +43 -0
  11. data/ext/vcftools/cpp/.svn/text-base/output_log.cpp.svn-base +79 -0
  12. data/ext/vcftools/cpp/.svn/text-base/output_log.h.svn-base +34 -0
  13. data/ext/vcftools/cpp/.svn/text-base/parameters.cpp.svn-base +535 -0
  14. data/ext/vcftools/cpp/.svn/text-base/parameters.h.svn-base +154 -0
  15. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.cpp.svn-base +497 -0
  16. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.h.svn-base +190 -0
  17. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_getters.cpp.svn-base +421 -0
  18. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_setters.cpp.svn-base +482 -0
  19. data/ext/vcftools/cpp/.svn/text-base/vcf_file.cpp.svn-base +495 -0
  20. data/ext/vcftools/cpp/.svn/text-base/vcf_file.h.svn-base +184 -0
  21. data/ext/vcftools/cpp/.svn/text-base/vcf_file_diff.cpp.svn-base +1282 -0
  22. data/ext/vcftools/cpp/.svn/text-base/vcf_file_filters.cpp.svn-base +1215 -0
  23. data/ext/vcftools/cpp/.svn/text-base/vcf_file_format_convert.cpp.svn-base +1138 -0
  24. data/ext/vcftools/cpp/.svn/text-base/vcf_file_index.cpp.svn-base +171 -0
  25. data/ext/vcftools/cpp/.svn/text-base/vcf_file_output.cpp.svn-base +3012 -0
  26. data/ext/vcftools/cpp/.svn/text-base/vcftools.cpp.svn-base +107 -0
  27. data/ext/vcftools/cpp/.svn/text-base/vcftools.h.svn-base +25 -0
  28. data/ext/vcftools/cpp/Makefile +46 -0
  29. data/ext/vcftools/cpp/dgeev.cpp +146 -0
  30. data/ext/vcftools/cpp/dgeev.h +43 -0
  31. data/ext/vcftools/cpp/output_log.cpp +79 -0
  32. data/ext/vcftools/cpp/output_log.h +34 -0
  33. data/ext/vcftools/cpp/parameters.cpp +535 -0
  34. data/ext/vcftools/cpp/parameters.h +154 -0
  35. data/ext/vcftools/cpp/vcf_entry.cpp +497 -0
  36. data/ext/vcftools/cpp/vcf_entry.h +190 -0
  37. data/ext/vcftools/cpp/vcf_entry_getters.cpp +421 -0
  38. data/ext/vcftools/cpp/vcf_entry_setters.cpp +482 -0
  39. data/ext/vcftools/cpp/vcf_file.cpp +495 -0
  40. data/ext/vcftools/cpp/vcf_file.h +184 -0
  41. data/ext/vcftools/cpp/vcf_file_diff.cpp +1282 -0
  42. data/ext/vcftools/cpp/vcf_file_filters.cpp +1215 -0
  43. data/ext/vcftools/cpp/vcf_file_format_convert.cpp +1138 -0
  44. data/ext/vcftools/cpp/vcf_file_index.cpp +171 -0
  45. data/ext/vcftools/cpp/vcf_file_output.cpp +3012 -0
  46. data/ext/vcftools/cpp/vcftools.cpp +107 -0
  47. data/ext/vcftools/cpp/vcftools.h +25 -0
  48. data/ext/vcftools/examples/.svn/all-wcprops +185 -0
  49. data/ext/vcftools/examples/.svn/dir-prop-base +6 -0
  50. data/ext/vcftools/examples/.svn/entries +1048 -0
  51. data/ext/vcftools/examples/.svn/prop-base/perl-api-1.pl.svn-base +5 -0
  52. data/ext/vcftools/examples/.svn/text-base/annotate-test.vcf.svn-base +37 -0
  53. data/ext/vcftools/examples/.svn/text-base/annotate.out.svn-base +23 -0
  54. data/ext/vcftools/examples/.svn/text-base/annotate.txt.svn-base +7 -0
  55. data/ext/vcftools/examples/.svn/text-base/annotate2.out.svn-base +52 -0
  56. data/ext/vcftools/examples/.svn/text-base/annotate3.out.svn-base +23 -0
  57. data/ext/vcftools/examples/.svn/text-base/cmp-test-a-3.3.vcf.svn-base +12 -0
  58. data/ext/vcftools/examples/.svn/text-base/cmp-test-a.vcf.svn-base +12 -0
  59. data/ext/vcftools/examples/.svn/text-base/cmp-test-b-3.3.vcf.svn-base +12 -0
  60. data/ext/vcftools/examples/.svn/text-base/cmp-test-b.vcf.svn-base +12 -0
  61. data/ext/vcftools/examples/.svn/text-base/cmp-test.out.svn-base +53 -0
  62. data/ext/vcftools/examples/.svn/text-base/concat-a.vcf.svn-base +21 -0
  63. data/ext/vcftools/examples/.svn/text-base/concat-b.vcf.svn-base +13 -0
  64. data/ext/vcftools/examples/.svn/text-base/concat-c.vcf.svn-base +19 -0
  65. data/ext/vcftools/examples/.svn/text-base/concat.out.svn-base +39 -0
  66. data/ext/vcftools/examples/.svn/text-base/invalid-4.0.vcf.svn-base +31 -0
  67. data/ext/vcftools/examples/.svn/text-base/isec-n2-test.vcf.out.svn-base +19 -0
  68. data/ext/vcftools/examples/.svn/text-base/merge-test-a.vcf.svn-base +17 -0
  69. data/ext/vcftools/examples/.svn/text-base/merge-test-b.vcf.svn-base +17 -0
  70. data/ext/vcftools/examples/.svn/text-base/merge-test-c.vcf.svn-base +15 -0
  71. data/ext/vcftools/examples/.svn/text-base/merge-test.vcf.out.svn-base +31 -0
  72. data/ext/vcftools/examples/.svn/text-base/perl-api-1.pl.svn-base +46 -0
  73. data/ext/vcftools/examples/.svn/text-base/query-test.out.svn-base +6 -0
  74. data/ext/vcftools/examples/.svn/text-base/shuffle-test.vcf.svn-base +12 -0
  75. data/ext/vcftools/examples/.svn/text-base/subset.SNPs.out.svn-base +10 -0
  76. data/ext/vcftools/examples/.svn/text-base/subset.indels.out.svn-base +18 -0
  77. data/ext/vcftools/examples/.svn/text-base/subset.vcf.svn-base +21 -0
  78. data/ext/vcftools/examples/.svn/text-base/valid-3.3.vcf.svn-base +30 -0
  79. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.stats.svn-base +104 -0
  80. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.svn-base +34 -0
  81. data/ext/vcftools/examples/.svn/text-base/valid-4.1.vcf.svn-base +37 -0
  82. data/ext/vcftools/examples/annotate-test.vcf +37 -0
  83. data/ext/vcftools/examples/annotate.out +23 -0
  84. data/ext/vcftools/examples/annotate.txt +7 -0
  85. data/ext/vcftools/examples/annotate2.out +52 -0
  86. data/ext/vcftools/examples/annotate3.out +23 -0
  87. data/ext/vcftools/examples/cmp-test-a-3.3.vcf +12 -0
  88. data/ext/vcftools/examples/cmp-test-a.vcf +12 -0
  89. data/ext/vcftools/examples/cmp-test-b-3.3.vcf +12 -0
  90. data/ext/vcftools/examples/cmp-test-b.vcf +12 -0
  91. data/ext/vcftools/examples/cmp-test.out +53 -0
  92. data/ext/vcftools/examples/concat-a.vcf +21 -0
  93. data/ext/vcftools/examples/concat-b.vcf +13 -0
  94. data/ext/vcftools/examples/concat-c.vcf +19 -0
  95. data/ext/vcftools/examples/concat.out +39 -0
  96. data/ext/vcftools/examples/invalid-4.0.vcf +31 -0
  97. data/ext/vcftools/examples/isec-n2-test.vcf.out +19 -0
  98. data/ext/vcftools/examples/merge-test-a.vcf +17 -0
  99. data/ext/vcftools/examples/merge-test-b.vcf +17 -0
  100. data/ext/vcftools/examples/merge-test-c.vcf +15 -0
  101. data/ext/vcftools/examples/merge-test.vcf.out +31 -0
  102. data/ext/vcftools/examples/perl-api-1.pl +46 -0
  103. data/ext/vcftools/examples/query-test.out +6 -0
  104. data/ext/vcftools/examples/shuffle-test.vcf +12 -0
  105. data/ext/vcftools/examples/subset.SNPs.out +10 -0
  106. data/ext/vcftools/examples/subset.indels.out +18 -0
  107. data/ext/vcftools/examples/subset.vcf +21 -0
  108. data/ext/vcftools/examples/valid-3.3.vcf +30 -0
  109. data/ext/vcftools/examples/valid-4.0.vcf +34 -0
  110. data/ext/vcftools/examples/valid-4.0.vcf.stats +104 -0
  111. data/ext/vcftools/examples/valid-4.1.vcf +37 -0
  112. data/ext/vcftools/extconf.rb +2 -0
  113. data/ext/vcftools/perl/.svn/all-wcprops +149 -0
  114. data/ext/vcftools/perl/.svn/entries +844 -0
  115. data/ext/vcftools/perl/.svn/prop-base/fill-aa.svn-base +5 -0
  116. data/ext/vcftools/perl/.svn/prop-base/fill-an-ac.svn-base +5 -0
  117. data/ext/vcftools/perl/.svn/prop-base/fill-ref-md5.svn-base +5 -0
  118. data/ext/vcftools/perl/.svn/prop-base/tab-to-vcf.svn-base +5 -0
  119. data/ext/vcftools/perl/.svn/prop-base/test.t.svn-base +5 -0
  120. data/ext/vcftools/perl/.svn/prop-base/vcf-annotate.svn-base +5 -0
  121. data/ext/vcftools/perl/.svn/prop-base/vcf-compare.svn-base +5 -0
  122. data/ext/vcftools/perl/.svn/prop-base/vcf-concat.svn-base +5 -0
  123. data/ext/vcftools/perl/.svn/prop-base/vcf-convert.svn-base +5 -0
  124. data/ext/vcftools/perl/.svn/prop-base/vcf-fix-newlines.svn-base +5 -0
  125. data/ext/vcftools/perl/.svn/prop-base/vcf-isec.svn-base +5 -0
  126. data/ext/vcftools/perl/.svn/prop-base/vcf-merge.svn-base +5 -0
  127. data/ext/vcftools/perl/.svn/prop-base/vcf-query.svn-base +5 -0
  128. data/ext/vcftools/perl/.svn/prop-base/vcf-shuffle-cols.svn-base +5 -0
  129. data/ext/vcftools/perl/.svn/prop-base/vcf-sort.svn-base +5 -0
  130. data/ext/vcftools/perl/.svn/prop-base/vcf-stats.svn-base +5 -0
  131. data/ext/vcftools/perl/.svn/prop-base/vcf-subset.svn-base +5 -0
  132. data/ext/vcftools/perl/.svn/prop-base/vcf-to-tab.svn-base +5 -0
  133. data/ext/vcftools/perl/.svn/prop-base/vcf-validator.svn-base +5 -0
  134. data/ext/vcftools/perl/.svn/text-base/ChangeLog.svn-base +84 -0
  135. data/ext/vcftools/perl/.svn/text-base/FaSlice.pm.svn-base +214 -0
  136. data/ext/vcftools/perl/.svn/text-base/Makefile.svn-base +12 -0
  137. data/ext/vcftools/perl/.svn/text-base/Vcf.pm.svn-base +2853 -0
  138. data/ext/vcftools/perl/.svn/text-base/VcfStats.pm.svn-base +681 -0
  139. data/ext/vcftools/perl/.svn/text-base/fill-aa.svn-base +103 -0
  140. data/ext/vcftools/perl/.svn/text-base/fill-an-ac.svn-base +56 -0
  141. data/ext/vcftools/perl/.svn/text-base/fill-ref-md5.svn-base +204 -0
  142. data/ext/vcftools/perl/.svn/text-base/tab-to-vcf.svn-base +92 -0
  143. data/ext/vcftools/perl/.svn/text-base/test.t.svn-base +376 -0
  144. data/ext/vcftools/perl/.svn/text-base/vcf-annotate.svn-base +1099 -0
  145. data/ext/vcftools/perl/.svn/text-base/vcf-compare.svn-base +1193 -0
  146. data/ext/vcftools/perl/.svn/text-base/vcf-concat.svn-base +310 -0
  147. data/ext/vcftools/perl/.svn/text-base/vcf-convert.svn-base +180 -0
  148. data/ext/vcftools/perl/.svn/text-base/vcf-fix-newlines.svn-base +97 -0
  149. data/ext/vcftools/perl/.svn/text-base/vcf-isec.svn-base +660 -0
  150. data/ext/vcftools/perl/.svn/text-base/vcf-merge.svn-base +577 -0
  151. data/ext/vcftools/perl/.svn/text-base/vcf-query.svn-base +272 -0
  152. data/ext/vcftools/perl/.svn/text-base/vcf-shuffle-cols.svn-base +89 -0
  153. data/ext/vcftools/perl/.svn/text-base/vcf-sort.svn-base +79 -0
  154. data/ext/vcftools/perl/.svn/text-base/vcf-stats.svn-base +160 -0
  155. data/ext/vcftools/perl/.svn/text-base/vcf-subset.svn-base +206 -0
  156. data/ext/vcftools/perl/.svn/text-base/vcf-to-tab.svn-base +112 -0
  157. data/ext/vcftools/perl/.svn/text-base/vcf-validator.svn-base +145 -0
  158. data/ext/vcftools/perl/ChangeLog +84 -0
  159. data/ext/vcftools/perl/FaSlice.pm +214 -0
  160. data/ext/vcftools/perl/Makefile +12 -0
  161. data/ext/vcftools/perl/Vcf.pm +2853 -0
  162. data/ext/vcftools/perl/VcfStats.pm +681 -0
  163. data/ext/vcftools/perl/fill-aa +103 -0
  164. data/ext/vcftools/perl/fill-an-ac +56 -0
  165. data/ext/vcftools/perl/fill-ref-md5 +204 -0
  166. data/ext/vcftools/perl/tab-to-vcf +92 -0
  167. data/ext/vcftools/perl/test.t +376 -0
  168. data/ext/vcftools/perl/vcf-annotate +1099 -0
  169. data/ext/vcftools/perl/vcf-compare +1193 -0
  170. data/ext/vcftools/perl/vcf-concat +310 -0
  171. data/ext/vcftools/perl/vcf-convert +180 -0
  172. data/ext/vcftools/perl/vcf-fix-newlines +97 -0
  173. data/ext/vcftools/perl/vcf-isec +660 -0
  174. data/ext/vcftools/perl/vcf-merge +577 -0
  175. data/ext/vcftools/perl/vcf-query +286 -0
  176. data/ext/vcftools/perl/vcf-shuffle-cols +89 -0
  177. data/ext/vcftools/perl/vcf-sort +79 -0
  178. data/ext/vcftools/perl/vcf-stats +160 -0
  179. data/ext/vcftools/perl/vcf-subset +206 -0
  180. data/ext/vcftools/perl/vcf-to-tab +112 -0
  181. data/ext/vcftools/perl/vcf-validator +145 -0
  182. data/ext/vcftools/website/.svn/all-wcprops +41 -0
  183. data/ext/vcftools/website/.svn/entries +238 -0
  184. data/ext/vcftools/website/.svn/prop-base/VCF-poster.pdf.svn-base +5 -0
  185. data/ext/vcftools/website/.svn/prop-base/favicon.ico.svn-base +5 -0
  186. data/ext/vcftools/website/.svn/prop-base/favicon.png.svn-base +5 -0
  187. data/ext/vcftools/website/.svn/text-base/Makefile.svn-base +6 -0
  188. data/ext/vcftools/website/.svn/text-base/README.svn-base +2 -0
  189. data/ext/vcftools/website/.svn/text-base/VCF-poster.pdf.svn-base +0 -0
  190. data/ext/vcftools/website/.svn/text-base/default.css.svn-base +250 -0
  191. data/ext/vcftools/website/.svn/text-base/favicon.ico.svn-base +0 -0
  192. data/ext/vcftools/website/.svn/text-base/favicon.png.svn-base +0 -0
  193. data/ext/vcftools/website/Makefile +6 -0
  194. data/ext/vcftools/website/README +2 -0
  195. data/ext/vcftools/website/VCF-poster.pdf +0 -0
  196. data/ext/vcftools/website/default.css +250 -0
  197. data/ext/vcftools/website/favicon.ico +0 -0
  198. data/ext/vcftools/website/favicon.png +0 -0
  199. data/ext/vcftools/website/img/.svn/all-wcprops +53 -0
  200. data/ext/vcftools/website/img/.svn/entries +300 -0
  201. data/ext/vcftools/website/img/.svn/prop-base/bg.gif.svn-base +5 -0
  202. data/ext/vcftools/website/img/.svn/prop-base/bgcode.gif.svn-base +5 -0
  203. data/ext/vcftools/website/img/.svn/prop-base/bgcontainer.gif.svn-base +5 -0
  204. data/ext/vcftools/website/img/.svn/prop-base/bgul.gif.svn-base +5 -0
  205. data/ext/vcftools/website/img/.svn/prop-base/header.gif.svn-base +5 -0
  206. data/ext/vcftools/website/img/.svn/prop-base/li.gif.svn-base +5 -0
  207. data/ext/vcftools/website/img/.svn/prop-base/quote.gif.svn-base +5 -0
  208. data/ext/vcftools/website/img/.svn/prop-base/search.gif.svn-base +5 -0
  209. data/ext/vcftools/website/img/.svn/text-base/bg.gif.svn-base +0 -0
  210. data/ext/vcftools/website/img/.svn/text-base/bgcode.gif.svn-base +0 -0
  211. data/ext/vcftools/website/img/.svn/text-base/bgcontainer.gif.svn-base +0 -0
  212. data/ext/vcftools/website/img/.svn/text-base/bgul.gif.svn-base +0 -0
  213. data/ext/vcftools/website/img/.svn/text-base/header.gif.svn-base +0 -0
  214. data/ext/vcftools/website/img/.svn/text-base/li.gif.svn-base +0 -0
  215. data/ext/vcftools/website/img/.svn/text-base/quote.gif.svn-base +0 -0
  216. data/ext/vcftools/website/img/.svn/text-base/search.gif.svn-base +0 -0
  217. data/ext/vcftools/website/img/bg.gif +0 -0
  218. data/ext/vcftools/website/img/bgcode.gif +0 -0
  219. data/ext/vcftools/website/img/bgcontainer.gif +0 -0
  220. data/ext/vcftools/website/img/bgul.gif +0 -0
  221. data/ext/vcftools/website/img/header.gif +0 -0
  222. data/ext/vcftools/website/img/li.gif +0 -0
  223. data/ext/vcftools/website/img/quote.gif +0 -0
  224. data/ext/vcftools/website/img/search.gif +0 -0
  225. data/ext/vcftools/website/src/.svn/all-wcprops +53 -0
  226. data/ext/vcftools/website/src/.svn/entries +300 -0
  227. data/ext/vcftools/website/src/.svn/text-base/docs.inc.svn-base +202 -0
  228. data/ext/vcftools/website/src/.svn/text-base/index.inc.svn-base +52 -0
  229. data/ext/vcftools/website/src/.svn/text-base/index.php.svn-base +80 -0
  230. data/ext/vcftools/website/src/.svn/text-base/license.inc.svn-base +27 -0
  231. data/ext/vcftools/website/src/.svn/text-base/links.inc.svn-base +13 -0
  232. data/ext/vcftools/website/src/.svn/text-base/options.inc.svn-base +654 -0
  233. data/ext/vcftools/website/src/.svn/text-base/perl_module.inc.svn-base +249 -0
  234. data/ext/vcftools/website/src/.svn/text-base/specs.inc.svn-base +18 -0
  235. data/ext/vcftools/website/src/docs.inc +202 -0
  236. data/ext/vcftools/website/src/index.inc +52 -0
  237. data/ext/vcftools/website/src/index.php +80 -0
  238. data/ext/vcftools/website/src/license.inc +27 -0
  239. data/ext/vcftools/website/src/links.inc +13 -0
  240. data/ext/vcftools/website/src/options.inc +654 -0
  241. data/ext/vcftools/website/src/perl_module.inc +249 -0
  242. data/ext/vcftools/website/src/specs.inc +18 -0
  243. data/lib/config.ru +9 -0
  244. data/lib/ngs_server/add.rb +9 -0
  245. data/lib/ngs_server/version.rb +1 -1
  246. data/lib/ngs_server.rb +55 -3
  247. data/ngs_server.gemspec +5 -2
  248. metadata +296 -6
@@ -0,0 +1,577 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # Author: petr.danecek@sanger
4
+ #
5
+
6
+ use strict;
7
+ use warnings;
8
+ use Carp;
9
+ use Vcf;
10
+
11
+ my $opts = parse_params();
12
+ merge_vcf_files($opts);
13
+
14
+ exit;
15
+
16
+ #--------------------------------
17
+
18
+ sub error
19
+ {
20
+ my (@msg) = @_;
21
+ if ( scalar @msg ) { croak join('',@msg); }
22
+ die
23
+ "About: Merge the bgzipped and tabix indexed VCF files. (E.g. bgzip file.vcf; tabix -p vcf file.vcf.gz)\n",
24
+ "Usage: merge-vcf [OPTIONS] file1.vcf file2.vcf.gz ... > out.vcf\n",
25
+ "Options:\n",
26
+ " -c, --chromosomes <list|file> Same as -r, left for backward compatibility. Please do not use as it will be dropped in the future.\n",
27
+ " -d, --remove-duplicates If there should be two consecutive rows with the same chr:pos, print only the first one.\n",
28
+ " -H, --vcf-header <file> Use the VCF header\n",
29
+ " -h, -?, --help This help message.\n",
30
+ " -r, --regions <list|file> Do only the given regions (comma-separated list or one region per line in a file).\n",
31
+ " -s, --silent Try to be a bit more silent, no warnings about duplicate lines.\n",
32
+ "\n";
33
+ }
34
+
35
+
36
+ sub parse_params
37
+ {
38
+ my $opts = { args=>[$0, @ARGV] };
39
+ while (my $arg=shift(@ARGV))
40
+ {
41
+ if ( $arg eq '-d' || $arg eq '--remove-duplicates' ) { $$opts{rm_dups}=1; next; }
42
+ if ( $arg eq '-s' || $arg eq '--silent' ) { $$opts{silent_dups}=1; next; }
43
+ if ( $arg eq '-H' || $arg eq '--vcf-header' ) { $$opts{vcf_header}=shift(@ARGV); next; }
44
+ if ( $arg eq '-c' || $arg eq '--chromosomes' ) { $$opts{regions_list}=shift(@ARGV); next; }
45
+ if ( $arg eq '-r' || $arg eq '--regions' ) { $$opts{regions_list}=shift(@ARGV); next; }
46
+ if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
47
+ if ( -e $arg ) { push @{$$opts{files}},$arg; next; }
48
+ error("Unknown parameter or non-existent file \"$arg\". Run -? for help.\n");
49
+ }
50
+ if ( !exists($$opts{files}) ) { error() }
51
+ return $opts;
52
+ }
53
+
54
+
55
+ # Returns the common prefix of the files.
56
+ sub common_prefix
57
+ {
58
+ my ($files) = @_;
59
+ my @paths;
60
+ my $len = -1;
61
+ for my $file (@$files)
62
+ {
63
+ my @path = split(m{/+},$file);
64
+ if ( $len<0 || $len>scalar @path ) { $len=scalar @path; }
65
+ push @paths, \@path;
66
+ }
67
+ my @common;
68
+ for (my $i=0; $i<$len; $i++)
69
+ {
70
+ my $identical=1;
71
+ for (my $ifile=1; $ifile<scalar @paths; $ifile++)
72
+ {
73
+ if ( $paths[$ifile]->[$i] ne $paths[0]->[$i] ) { $identical=0; last; }
74
+ }
75
+ if ( !$identical ) { last; }
76
+ push @common, $paths[0]->[$i];
77
+ }
78
+ return join('/+',@common);
79
+ }
80
+
81
+
82
+ sub read_region_list
83
+ {
84
+ my ($opts) = @_;
85
+
86
+ my @regions = ();
87
+ if ( exists($$opts{regions_list}) )
88
+ {
89
+ if ( -e $$opts{regions_list} )
90
+ {
91
+ open(my $rgs,'<',$$opts{regions_list}) or error("$$opts{regions_list}: $!");
92
+ while (my $line=<$rgs>)
93
+ {
94
+ chomp($line);
95
+ push @regions, $line;
96
+ }
97
+ close($rgs);
98
+ }
99
+ else
100
+ {
101
+ @regions = split(/,/,$$opts{regions_list});
102
+ }
103
+ }
104
+ return (@regions);
105
+ }
106
+
107
+ sub check_AGtags_definition
108
+ {
109
+ my ($vcf) = @_;
110
+ if ( $$vcf{version} >= 4.1 ) { return; }
111
+
112
+ # Whatever is the value set to, the user takes the responsibility for the merging strategy used
113
+ if ( exists($ENV{DONT_FIX_VCF40_AG_TAGS}) ) { return; }
114
+
115
+ my @tags;
116
+ if ( exists($$vcf{header}{INFO}{PL}) && $$vcf{header}{INFO}{PL}{Number} ne '.' ) { push @tags, 'PL'; }
117
+ if ( exists($$vcf{header}{INFO}{GL}) && $$vcf{header}{INFO}{GL}{Number} ne '.' ) { push @tags, 'GL'; }
118
+ if ( exists($$vcf{header}{INFO}{AC}) && $$vcf{header}{INFO}{AC}{Number} ne '.' ) { push @tags, 'AC'; }
119
+ if ( exists($$vcf{header}{INFO}{AF}) && $$vcf{header}{INFO}{AF}{Number} ne '.' ) { push @tags, 'AF'; }
120
+
121
+ if ( !@tags ) { return; }
122
+
123
+ $ENV{DONT_FIX_VCF40_AG_TAGS} = 1;
124
+ my $tags = join(',',@tags);
125
+ print STDERR
126
+ "Warning: The $tags tag(s) will not be merged correctly for multiallelic sites.\n",
127
+ " To be handled correctly, please redefine with Number=. or set the environment\n",
128
+ " variable DONT_FIX_VCF40_AG_TAGS=0.\n";
129
+ }
130
+
131
+ sub init_cols
132
+ {
133
+ my ($opts,$vcf_out) = @_;
134
+
135
+ my $prefix;
136
+ my @regions = read_region_list($opts);
137
+ my @vcfs;
138
+ my @cols;
139
+ my %has_chrom;
140
+ my %col_names;
141
+ my $icol = 9;
142
+ my $ncols_total = 0;
143
+
144
+ if ( !$$opts{has_col_names} ) { $prefix = common_prefix($$opts{files}); }
145
+
146
+ # Go through all files and read header, obtain list of chromosomes. The file names will be used for columns, unless
147
+ # they were read from the header.
148
+ for my $file (@{$$opts{files}})
149
+ {
150
+ my $vcf = Vcf->new(file=>$file);
151
+ $vcf->parse_header();
152
+ check_AGtags_definition($vcf);
153
+ $vcf->close();
154
+ push @vcfs, $vcf;
155
+
156
+ # Precompute the weighting factor for the QUAL column
157
+ my $ncols = scalar @{$$vcf{columns}} - 9;
158
+ if ( $ncols<=0 ) { $ncols = 1; }
159
+ $$vcf{qual_weight} = 1.0*$ncols;
160
+ $ncols_total += $ncols;
161
+
162
+ # Update the list of known chromosomes
163
+ if ( !exists($$opts{regions_list}) )
164
+ {
165
+ my $chrms = $vcf->get_chromosomes();
166
+ for my $chr (@$chrms)
167
+ {
168
+ if ( exists($has_chrom{$chr}) ) { next; }
169
+ $has_chrom{$chr} = 1;
170
+ push @regions, $chr;
171
+ }
172
+ }
173
+
174
+ my $col_prefix = '';
175
+ if ( !$$opts{has_col_names} )
176
+ {
177
+ # Make the column names nice - strip common prefix and the suffix .vcf.gz
178
+ $col_prefix = $file;
179
+ $col_prefix =~ s{^/*$prefix/*}{};
180
+ $col_prefix =~ s/\.gz$//i;
181
+ $col_prefix =~ s/\.vcf$//i;
182
+ $col_prefix .= '_';
183
+ }
184
+
185
+ if ( !exists($$vcf{columns}) ) { error("No header present? $file\n"); }
186
+
187
+ # Create good names for the columns in the merged vcf file
188
+ my @vcf_cols = @{$$vcf{columns}};
189
+ $$vcf{__col_names} = [];
190
+ for my $col (@vcf_cols[9..$#vcf_cols])
191
+ {
192
+ my $col_name = $col;
193
+ if ( $$opts{has_col_names} )
194
+ {
195
+ if ( $icol >= @{$$vcf_out{columns}} ) { error("Fewer columns in the header than in the VCF files total.\n"); }
196
+ $col_name = $$vcf_out{columns}[$icol];
197
+ $icol++;
198
+
199
+ if ( exists($col_names{$col_name}) ) { error("The column names not unique in the header: $col_name\n"); }
200
+ }
201
+ else
202
+ {
203
+ if ( exists($col_names{$col_name}) ) { $col_name = $col_prefix.$col; }
204
+ if ( exists($col_names{$col_name}) ) { warn("FIXME: the column name [$col_name] not unique.\n"); }
205
+ }
206
+ warn("Using column name '$col_name' for $file:$col\n");
207
+ $col_names{$col_name} = 1;
208
+
209
+ push @cols, $col_name;
210
+ push @{$$vcf{__col_names}}, $col_name;
211
+ }
212
+ }
213
+
214
+ if ( $$opts{has_col_names} && $icol!=@{$$vcf_out{columns}} ) { error("More columns in the header than in the VCF files total.\n"); }
215
+
216
+ # QUAL weighting
217
+ for my $vcf (@vcfs)
218
+ {
219
+ $$vcf{qual_weight} /= $ncols_total;
220
+ }
221
+
222
+ $$opts{vcfs} = \@vcfs;
223
+ $$opts{cols} = \@cols;
224
+ $$opts{regions} = \@regions;
225
+ }
226
+
227
+
228
+ sub merge_vcf_files
229
+ {
230
+ my ($opts) = @_;
231
+
232
+ # Create output VCF
233
+ my $vcf_out;
234
+ if ( $$opts{vcf_header} )
235
+ {
236
+ $vcf_out = Vcf->new(file=>$$opts{vcf_header});
237
+ $vcf_out->parse_header();
238
+ if ( $$vcf_out{columns} && @{$$vcf_out{columns}} ) { $$opts{has_col_names}=1; }
239
+ }
240
+ else
241
+ {
242
+ $vcf_out = Vcf->new();
243
+ }
244
+
245
+ init_cols($opts,$vcf_out);
246
+ my @regions = @{$$opts{regions}};
247
+ my @cols = @{$$opts{cols}};
248
+ my @vcfs = @{$$opts{vcfs}};
249
+
250
+ # Get the header of the output VCF ready
251
+ $vcf_out->add_columns(@cols);
252
+ if ( !$$vcf_out{has_header} )
253
+ {
254
+ for my $vcf (@vcfs)
255
+ {
256
+ # To get the missig fields filled by the default values
257
+ for my $hline (@{$$vcf{header_lines}})
258
+ {
259
+ if ( $$hline{key} eq 'fileformat' ) { next; }
260
+ $vcf_out->add_header_line($hline,silent=>1);
261
+ }
262
+ }
263
+ }
264
+
265
+ # List source files
266
+ my $source;
267
+ for (my $i=0; $i<@vcfs; $i++)
268
+ {
269
+ if ( $i ) { $source .= ','; }
270
+ $source .= "$i:$vcfs[$i]{file}";
271
+ }
272
+ $vcf_out->add_header_line({key=>'source',value=>join(' ',@{$$opts{args}})},append=>'timestamp');
273
+ $vcf_out->add_header_line({key=>'sourceFiles',value=>$source},append=>'timestamp');
274
+ $vcf_out->add_header_line({key=>'INFO',ID=>'SF',Number=>-1,Type=>'String',Description=>'Source File (index to sourceFiles, f when filtered)'});
275
+
276
+ my $have_samples = @{$$vcf_out{columns}}>9 ? 1 : 0;
277
+
278
+ $vcf_out->recalc_ac_an($have_samples ? 2 : 0);
279
+ $vcf_out->add_header_line({key=>'INFO',ID=>'AC',Number=>-1,Type=>'Integer',Description=>'Allele count in genotypes'});
280
+ $vcf_out->add_header_line({key=>'INFO',ID=>'AN',Number=>1,Type=>'Integer',Description=>'Total number of alleles in called genotypes'});
281
+ print $vcf_out->format_header();
282
+
283
+ # Go through all VCF files simultaneously and output each line, one region at a time.
284
+ for my $region (@regions)
285
+ {
286
+ # Open files
287
+ for my $vcf (@vcfs)
288
+ {
289
+ delete($$vcf{last_line});
290
+ $vcf->open(region=>$region,parse_header=>1);
291
+ advance_position($vcf);
292
+ }
293
+
294
+ while (1)
295
+ {
296
+ my $pos = get_min_position(\@vcfs);
297
+ if ( !defined $pos ) { last; }
298
+
299
+ my %out;
300
+ $out{POS} = $pos;
301
+ $out{ID} = '.';
302
+ $out{ALT} = [];
303
+ $out{FORMAT} = [];
304
+ my %format;
305
+ my %info;
306
+ my @src_files;
307
+ my %filters;
308
+ my (@quals,@qual_weights,$qual_weights_sum,%ac,$an);
309
+
310
+ my %ref_alt_map = ();
311
+ # Find out the REFs and ALTs: in VCFv4.0, the REFs can differ and ALTs must be converted
312
+ for my $vcf (@vcfs)
313
+ {
314
+ my $line = $$vcf{last_line};
315
+ if ( !$line or $pos ne $$line{POS} ) { next; }
316
+ if ( !exists($out{CHROM}) ) { $out{CHROM} = $$line{CHROM}; }
317
+ my $ref = $$line{REF};
318
+ for my $alt (@{$$line{ALT}}) { $ref_alt_map{$ref}{$alt}=$alt; }
319
+ }
320
+ # Do the REF,ALT conversion only when necessary
321
+ my $new_ref;
322
+ if ( scalar keys %ref_alt_map > 1 )
323
+ {
324
+ $new_ref = $vcf_out->fill_ref_alt_mapping(\%ref_alt_map);
325
+ }
326
+ if ( !$have_samples )
327
+ {
328
+ # Do not loose information from the ALT column when samples are not present
329
+ my %alts;
330
+ for my $vcf (@vcfs)
331
+ {
332
+ my $line = $$vcf{last_line};
333
+ if ( !$line or $pos ne $$line{POS} ) { next; }
334
+ my $ref = $$line{REF};
335
+ for my $alt (@{$$line{ALT}}) { $alts{$ref_alt_map{$ref}{$alt}}=1; }
336
+ $out{ALT} = [ keys %alts ];
337
+ }
338
+ }
339
+ for (my $ivcf=0; $ivcf<@vcfs; $ivcf++)
340
+ {
341
+ my $vcf = $vcfs[$ivcf];
342
+ my $line = $$vcf{last_line};
343
+
344
+ # If this file does not have a record for this position, then for all its columns output undef gtype
345
+ if ( !$line or $pos ne $$line{POS} )
346
+ {
347
+ for (my $i=0; $i<@{$$vcf{__col_names}}; $i++)
348
+ {
349
+ my $name = $$vcf{__col_names}->[$i];
350
+ $out{gtypes}{$name}{GT} = $$vcf_out{defaults}{GT};
351
+ }
352
+ next;
353
+ }
354
+
355
+ # Check if the site has been filtered
356
+ if ( scalar @{$$line{FILTER}}>1 or ($$line{FILTER}[0] ne $$vcf{filter_passed} && $$line{FILTER}[0] ne $$vcf{defaults}{default}) )
357
+ {
358
+ push @src_files,$ivcf.'f';
359
+ }
360
+ else
361
+ {
362
+ push @src_files,$ivcf;
363
+ }
364
+
365
+ # Collect information for the FILTER field
366
+ for my $flt (@{$$line{FILTER}})
367
+ {
368
+ if ( $flt eq $$vcf{filter_passed} )
369
+ {
370
+ $filters{$$vcf_out{filter_passed}} = 1;
371
+ }
372
+ elsif ( $flt ne $$vcf{defaults}{default} )
373
+ {
374
+ $filters{$flt} = 1;
375
+ }
376
+ }
377
+
378
+ # Collect information for the QUAL field
379
+ if ( $$line{QUAL} ne $$vcf{defaults}{QUAL} && $$line{QUAL} ne $$vcf{defaults}{default} && $$line{QUAL}>0 )
380
+ {
381
+ push @quals,$$line{QUAL};
382
+ push @qual_weights,$$vcf{qual_weight};
383
+ $qual_weights_sum += $$vcf{qual_weight};
384
+ }
385
+
386
+ if ( $$line{ID} ne '.' && $out{ID} eq '.' ) { $out{ID}=$$line{ID}; }
387
+
388
+ # Remember the FORMAT fields
389
+ for my $field (@{$$line{FORMAT}}) { $format{$field} = 1; }
390
+
391
+ # VCF without genotypes: calculate AC,AN if present
392
+ if ( !$have_samples )
393
+ {
394
+ if ( exists($$line{INFO}{AN}) ) { $an += $$line{INFO}{AN}; }
395
+ if ( exists($$line{INFO}{AC}) )
396
+ {
397
+ my (@acs) = split(/,/,$$line{INFO}{AC});
398
+ for (my $i=0; $i<@acs; $i++)
399
+ {
400
+ my $alt = $ref_alt_map{$$line{REF}}{$$line{ALT}[$i]};
401
+ $ac{$alt} += $acs[$i];
402
+ }
403
+ }
404
+ }
405
+
406
+ # Join the INFO field
407
+ for my $inf (keys %{$$line{INFO}})
408
+ {
409
+ # When conflicting INFO fields are present, use the first one
410
+ if ( exists($info{$inf}) ) { next; }
411
+ $info{$inf} = $$line{INFO}{$inf};
412
+ }
413
+
414
+ my $ref = $$line{REF};
415
+
416
+ # The ALT column may change after the merge, take care of ALT dependent tags such as GL.
417
+ if ( $have_samples )
418
+ {
419
+ if ( defined $new_ref )
420
+ {
421
+ $vcf->parse_AGtags($line,\%ref_alt_map,$$line{REF});
422
+ }
423
+ else
424
+ {
425
+ $vcf->parse_AGtags($line);
426
+ }
427
+ }
428
+
429
+ # Now fill in the genotype information for each column
430
+ for (my $i=0; $i<@{$$vcf{__col_names}}; $i++)
431
+ {
432
+ my $ori_name = $$vcf{columns}->[$i+9];
433
+ my $out_name = $$vcf{__col_names}->[$i];
434
+
435
+ $out{gtypes}{$out_name} = $$line{gtypes}{$ori_name};
436
+
437
+ # This is to convert 0/1 to G/C
438
+ my ($alleles,$seps,$is_phased,$is_empty) = $vcf->parse_haplotype($line,$ori_name);
439
+ if ( defined $new_ref )
440
+ {
441
+ my @als;
442
+ for my $al (@$alleles)
443
+ {
444
+ push @als, exists($ref_alt_map{$ref}{$al}) ? $ref_alt_map{$ref}{$al} : '.';
445
+ }
446
+ $out{gtypes}{$out_name}{GT} = $vcf->format_haplotype(\@als,$seps);
447
+ }
448
+ else
449
+ {
450
+ $out{gtypes}{$out_name}{GT} = $vcf->format_haplotype($alleles,$seps);
451
+ }
452
+ }
453
+ $out{REF} = defined $new_ref ? $new_ref : $ref;
454
+ advance_position($vcf,$opts);
455
+ }
456
+
457
+ $out{INFO} = { %info };
458
+ $out{INFO}{SF} = join(',',@src_files);
459
+
460
+ # Output the QUAL information
461
+ my $qual;
462
+ for (my $i=0; $i<@quals; $i++)
463
+ {
464
+ $qual += $quals[$i] * $qual_weights[$i] * (1.0 / $qual_weights_sum);
465
+ }
466
+ $out{QUAL} = defined $qual ? sprintf("%.2f",$qual) : $$vcf_out{defaults}{QUAL};
467
+
468
+ # Output the FILTER information: remove PASS or missing value if some other information
469
+ # is present.
470
+ delete($filters{$$vcf_out{defaults}{default}});
471
+ if ( exists($filters{$$vcf_out{filter_passed}}) && scalar keys %filters > 1 )
472
+ {
473
+ delete($filters{$$vcf_out{filter_passed}});
474
+ }
475
+ $out{FILTER} = [ keys %filters ];
476
+ if ( !@{$out{FILTER}} ) { push @{$out{FILTER}},$$vcf_out{defaults}{default}; }
477
+
478
+ # The GT field must come as first
479
+ delete($format{GT});
480
+ $out{FORMAT} = ['GT'];
481
+ for my $key (keys %format) { push @{$out{FORMAT}},$key; }
482
+
483
+ if ( $have_samples )
484
+ {
485
+ $vcf_out->format_genotype_strings(\%out);
486
+ }
487
+ else
488
+ {
489
+ if ( defined $an ) { $out{INFO}{AN}=$an; }
490
+ if ( scalar keys %ac )
491
+ {
492
+ my @acs;
493
+ for my $alt (@{$out{ALT}})
494
+ {
495
+ # Some of the files may not have AC, the AC count can be undefined in such a case.
496
+ push @acs, exists($ac{$alt}) ? $ac{$alt} : 0;
497
+ }
498
+ $out{INFO}{AC} = join(',',@acs);
499
+ }
500
+ }
501
+ print $vcf_out->format_line(\%out);
502
+ }
503
+ }
504
+ }
505
+
506
+
507
+ sub advance_position
508
+ {
509
+ my ($vcf,$opts) = @_;
510
+
511
+ if ( exists($$vcf{last_line}) && !$$vcf{last_line} ) { return; }
512
+
513
+ my $line;
514
+ while (!$line)
515
+ {
516
+ $line = $vcf->next_data_hash();
517
+ if ( !$line )
518
+ {
519
+ $$vcf{last_line} = $line;
520
+ return;
521
+ }
522
+ if ( !$$vcf{last_line} ) { last; }
523
+
524
+ if ( $$vcf{last_line}{POS} eq $$line{POS} )
525
+ {
526
+ print STDERR "The position appeared twice: $$vcf{file} .. $$line{CHROM}:$$line{POS}\n" unless $$opts{silent_dups};
527
+
528
+ # This is the only reason for the while loop: if ignoring dups, get the next line
529
+ if ( $$opts{rm_dups} )
530
+ {
531
+ undef($line);
532
+ }
533
+ }
534
+ elsif ( $$vcf{last_line}{POS} > $$line{POS})
535
+ {
536
+ error("Wrong order: $$vcf{file} .. $$line{CHROM}:$$line{POS} comes after $$vcf{last_line}{CHROM}:$$vcf{last_line}{POS}\n");
537
+ }
538
+ }
539
+
540
+ $$vcf{last_line} = $line;
541
+
542
+ return;
543
+ }
544
+
545
+
546
+ sub get_min_position
547
+ {
548
+ my ($vcfs) = @_;
549
+ my ($pos,$ref);
550
+ for my $vcf (@$vcfs)
551
+ {
552
+ my $line = $$vcf{last_line};
553
+ if ( !$line ) { next; }
554
+
555
+ # Designate this position as the minimum of all the files if:
556
+ # .. is this the first file?
557
+ if ( !defined $pos )
558
+ {
559
+ $pos = $$line{POS};
560
+ $ref = $$line{REF};
561
+
562
+ next;
563
+ }
564
+
565
+ # .. has this file lower position?
566
+ if ( $pos>$$line{POS} )
567
+ {
568
+ $pos = $$line{POS};
569
+ $ref = $$line{REF};
570
+
571
+ next;
572
+ }
573
+ }
574
+ return $pos;
575
+ }
576
+
577
+