ngs_server 0.1 → 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. data/bin/ngs_server +72 -50
  2. data/ext/bamtools/extconf.rb +3 -3
  3. data/ext/vcftools/Makefile +28 -0
  4. data/ext/vcftools/README.txt +36 -0
  5. data/ext/vcftools/cpp/.svn/all-wcprops +125 -0
  6. data/ext/vcftools/cpp/.svn/dir-prop-base +6 -0
  7. data/ext/vcftools/cpp/.svn/entries +708 -0
  8. data/ext/vcftools/cpp/.svn/text-base/Makefile.svn-base +46 -0
  9. data/ext/vcftools/cpp/.svn/text-base/dgeev.cpp.svn-base +146 -0
  10. data/ext/vcftools/cpp/.svn/text-base/dgeev.h.svn-base +43 -0
  11. data/ext/vcftools/cpp/.svn/text-base/output_log.cpp.svn-base +79 -0
  12. data/ext/vcftools/cpp/.svn/text-base/output_log.h.svn-base +34 -0
  13. data/ext/vcftools/cpp/.svn/text-base/parameters.cpp.svn-base +535 -0
  14. data/ext/vcftools/cpp/.svn/text-base/parameters.h.svn-base +154 -0
  15. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.cpp.svn-base +497 -0
  16. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.h.svn-base +190 -0
  17. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_getters.cpp.svn-base +421 -0
  18. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_setters.cpp.svn-base +482 -0
  19. data/ext/vcftools/cpp/.svn/text-base/vcf_file.cpp.svn-base +495 -0
  20. data/ext/vcftools/cpp/.svn/text-base/vcf_file.h.svn-base +184 -0
  21. data/ext/vcftools/cpp/.svn/text-base/vcf_file_diff.cpp.svn-base +1282 -0
  22. data/ext/vcftools/cpp/.svn/text-base/vcf_file_filters.cpp.svn-base +1215 -0
  23. data/ext/vcftools/cpp/.svn/text-base/vcf_file_format_convert.cpp.svn-base +1138 -0
  24. data/ext/vcftools/cpp/.svn/text-base/vcf_file_index.cpp.svn-base +171 -0
  25. data/ext/vcftools/cpp/.svn/text-base/vcf_file_output.cpp.svn-base +3012 -0
  26. data/ext/vcftools/cpp/.svn/text-base/vcftools.cpp.svn-base +107 -0
  27. data/ext/vcftools/cpp/.svn/text-base/vcftools.h.svn-base +25 -0
  28. data/ext/vcftools/cpp/Makefile +46 -0
  29. data/ext/vcftools/cpp/dgeev.cpp +146 -0
  30. data/ext/vcftools/cpp/dgeev.h +43 -0
  31. data/ext/vcftools/cpp/output_log.cpp +79 -0
  32. data/ext/vcftools/cpp/output_log.h +34 -0
  33. data/ext/vcftools/cpp/parameters.cpp +535 -0
  34. data/ext/vcftools/cpp/parameters.h +154 -0
  35. data/ext/vcftools/cpp/vcf_entry.cpp +497 -0
  36. data/ext/vcftools/cpp/vcf_entry.h +190 -0
  37. data/ext/vcftools/cpp/vcf_entry_getters.cpp +421 -0
  38. data/ext/vcftools/cpp/vcf_entry_setters.cpp +482 -0
  39. data/ext/vcftools/cpp/vcf_file.cpp +495 -0
  40. data/ext/vcftools/cpp/vcf_file.h +184 -0
  41. data/ext/vcftools/cpp/vcf_file_diff.cpp +1282 -0
  42. data/ext/vcftools/cpp/vcf_file_filters.cpp +1215 -0
  43. data/ext/vcftools/cpp/vcf_file_format_convert.cpp +1138 -0
  44. data/ext/vcftools/cpp/vcf_file_index.cpp +171 -0
  45. data/ext/vcftools/cpp/vcf_file_output.cpp +3012 -0
  46. data/ext/vcftools/cpp/vcftools.cpp +107 -0
  47. data/ext/vcftools/cpp/vcftools.h +25 -0
  48. data/ext/vcftools/examples/.svn/all-wcprops +185 -0
  49. data/ext/vcftools/examples/.svn/dir-prop-base +6 -0
  50. data/ext/vcftools/examples/.svn/entries +1048 -0
  51. data/ext/vcftools/examples/.svn/prop-base/perl-api-1.pl.svn-base +5 -0
  52. data/ext/vcftools/examples/.svn/text-base/annotate-test.vcf.svn-base +37 -0
  53. data/ext/vcftools/examples/.svn/text-base/annotate.out.svn-base +23 -0
  54. data/ext/vcftools/examples/.svn/text-base/annotate.txt.svn-base +7 -0
  55. data/ext/vcftools/examples/.svn/text-base/annotate2.out.svn-base +52 -0
  56. data/ext/vcftools/examples/.svn/text-base/annotate3.out.svn-base +23 -0
  57. data/ext/vcftools/examples/.svn/text-base/cmp-test-a-3.3.vcf.svn-base +12 -0
  58. data/ext/vcftools/examples/.svn/text-base/cmp-test-a.vcf.svn-base +12 -0
  59. data/ext/vcftools/examples/.svn/text-base/cmp-test-b-3.3.vcf.svn-base +12 -0
  60. data/ext/vcftools/examples/.svn/text-base/cmp-test-b.vcf.svn-base +12 -0
  61. data/ext/vcftools/examples/.svn/text-base/cmp-test.out.svn-base +53 -0
  62. data/ext/vcftools/examples/.svn/text-base/concat-a.vcf.svn-base +21 -0
  63. data/ext/vcftools/examples/.svn/text-base/concat-b.vcf.svn-base +13 -0
  64. data/ext/vcftools/examples/.svn/text-base/concat-c.vcf.svn-base +19 -0
  65. data/ext/vcftools/examples/.svn/text-base/concat.out.svn-base +39 -0
  66. data/ext/vcftools/examples/.svn/text-base/invalid-4.0.vcf.svn-base +31 -0
  67. data/ext/vcftools/examples/.svn/text-base/isec-n2-test.vcf.out.svn-base +19 -0
  68. data/ext/vcftools/examples/.svn/text-base/merge-test-a.vcf.svn-base +17 -0
  69. data/ext/vcftools/examples/.svn/text-base/merge-test-b.vcf.svn-base +17 -0
  70. data/ext/vcftools/examples/.svn/text-base/merge-test-c.vcf.svn-base +15 -0
  71. data/ext/vcftools/examples/.svn/text-base/merge-test.vcf.out.svn-base +31 -0
  72. data/ext/vcftools/examples/.svn/text-base/perl-api-1.pl.svn-base +46 -0
  73. data/ext/vcftools/examples/.svn/text-base/query-test.out.svn-base +6 -0
  74. data/ext/vcftools/examples/.svn/text-base/shuffle-test.vcf.svn-base +12 -0
  75. data/ext/vcftools/examples/.svn/text-base/subset.SNPs.out.svn-base +10 -0
  76. data/ext/vcftools/examples/.svn/text-base/subset.indels.out.svn-base +18 -0
  77. data/ext/vcftools/examples/.svn/text-base/subset.vcf.svn-base +21 -0
  78. data/ext/vcftools/examples/.svn/text-base/valid-3.3.vcf.svn-base +30 -0
  79. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.stats.svn-base +104 -0
  80. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.svn-base +34 -0
  81. data/ext/vcftools/examples/.svn/text-base/valid-4.1.vcf.svn-base +37 -0
  82. data/ext/vcftools/examples/annotate-test.vcf +37 -0
  83. data/ext/vcftools/examples/annotate.out +23 -0
  84. data/ext/vcftools/examples/annotate.txt +7 -0
  85. data/ext/vcftools/examples/annotate2.out +52 -0
  86. data/ext/vcftools/examples/annotate3.out +23 -0
  87. data/ext/vcftools/examples/cmp-test-a-3.3.vcf +12 -0
  88. data/ext/vcftools/examples/cmp-test-a.vcf +12 -0
  89. data/ext/vcftools/examples/cmp-test-b-3.3.vcf +12 -0
  90. data/ext/vcftools/examples/cmp-test-b.vcf +12 -0
  91. data/ext/vcftools/examples/cmp-test.out +53 -0
  92. data/ext/vcftools/examples/concat-a.vcf +21 -0
  93. data/ext/vcftools/examples/concat-b.vcf +13 -0
  94. data/ext/vcftools/examples/concat-c.vcf +19 -0
  95. data/ext/vcftools/examples/concat.out +39 -0
  96. data/ext/vcftools/examples/invalid-4.0.vcf +31 -0
  97. data/ext/vcftools/examples/isec-n2-test.vcf.out +19 -0
  98. data/ext/vcftools/examples/merge-test-a.vcf +17 -0
  99. data/ext/vcftools/examples/merge-test-b.vcf +17 -0
  100. data/ext/vcftools/examples/merge-test-c.vcf +15 -0
  101. data/ext/vcftools/examples/merge-test.vcf.out +31 -0
  102. data/ext/vcftools/examples/perl-api-1.pl +46 -0
  103. data/ext/vcftools/examples/query-test.out +6 -0
  104. data/ext/vcftools/examples/shuffle-test.vcf +12 -0
  105. data/ext/vcftools/examples/subset.SNPs.out +10 -0
  106. data/ext/vcftools/examples/subset.indels.out +18 -0
  107. data/ext/vcftools/examples/subset.vcf +21 -0
  108. data/ext/vcftools/examples/valid-3.3.vcf +30 -0
  109. data/ext/vcftools/examples/valid-4.0.vcf +34 -0
  110. data/ext/vcftools/examples/valid-4.0.vcf.stats +104 -0
  111. data/ext/vcftools/examples/valid-4.1.vcf +37 -0
  112. data/ext/vcftools/extconf.rb +2 -0
  113. data/ext/vcftools/perl/.svn/all-wcprops +149 -0
  114. data/ext/vcftools/perl/.svn/entries +844 -0
  115. data/ext/vcftools/perl/.svn/prop-base/fill-aa.svn-base +5 -0
  116. data/ext/vcftools/perl/.svn/prop-base/fill-an-ac.svn-base +5 -0
  117. data/ext/vcftools/perl/.svn/prop-base/fill-ref-md5.svn-base +5 -0
  118. data/ext/vcftools/perl/.svn/prop-base/tab-to-vcf.svn-base +5 -0
  119. data/ext/vcftools/perl/.svn/prop-base/test.t.svn-base +5 -0
  120. data/ext/vcftools/perl/.svn/prop-base/vcf-annotate.svn-base +5 -0
  121. data/ext/vcftools/perl/.svn/prop-base/vcf-compare.svn-base +5 -0
  122. data/ext/vcftools/perl/.svn/prop-base/vcf-concat.svn-base +5 -0
  123. data/ext/vcftools/perl/.svn/prop-base/vcf-convert.svn-base +5 -0
  124. data/ext/vcftools/perl/.svn/prop-base/vcf-fix-newlines.svn-base +5 -0
  125. data/ext/vcftools/perl/.svn/prop-base/vcf-isec.svn-base +5 -0
  126. data/ext/vcftools/perl/.svn/prop-base/vcf-merge.svn-base +5 -0
  127. data/ext/vcftools/perl/.svn/prop-base/vcf-query.svn-base +5 -0
  128. data/ext/vcftools/perl/.svn/prop-base/vcf-shuffle-cols.svn-base +5 -0
  129. data/ext/vcftools/perl/.svn/prop-base/vcf-sort.svn-base +5 -0
  130. data/ext/vcftools/perl/.svn/prop-base/vcf-stats.svn-base +5 -0
  131. data/ext/vcftools/perl/.svn/prop-base/vcf-subset.svn-base +5 -0
  132. data/ext/vcftools/perl/.svn/prop-base/vcf-to-tab.svn-base +5 -0
  133. data/ext/vcftools/perl/.svn/prop-base/vcf-validator.svn-base +5 -0
  134. data/ext/vcftools/perl/.svn/text-base/ChangeLog.svn-base +84 -0
  135. data/ext/vcftools/perl/.svn/text-base/FaSlice.pm.svn-base +214 -0
  136. data/ext/vcftools/perl/.svn/text-base/Makefile.svn-base +12 -0
  137. data/ext/vcftools/perl/.svn/text-base/Vcf.pm.svn-base +2853 -0
  138. data/ext/vcftools/perl/.svn/text-base/VcfStats.pm.svn-base +681 -0
  139. data/ext/vcftools/perl/.svn/text-base/fill-aa.svn-base +103 -0
  140. data/ext/vcftools/perl/.svn/text-base/fill-an-ac.svn-base +56 -0
  141. data/ext/vcftools/perl/.svn/text-base/fill-ref-md5.svn-base +204 -0
  142. data/ext/vcftools/perl/.svn/text-base/tab-to-vcf.svn-base +92 -0
  143. data/ext/vcftools/perl/.svn/text-base/test.t.svn-base +376 -0
  144. data/ext/vcftools/perl/.svn/text-base/vcf-annotate.svn-base +1099 -0
  145. data/ext/vcftools/perl/.svn/text-base/vcf-compare.svn-base +1193 -0
  146. data/ext/vcftools/perl/.svn/text-base/vcf-concat.svn-base +310 -0
  147. data/ext/vcftools/perl/.svn/text-base/vcf-convert.svn-base +180 -0
  148. data/ext/vcftools/perl/.svn/text-base/vcf-fix-newlines.svn-base +97 -0
  149. data/ext/vcftools/perl/.svn/text-base/vcf-isec.svn-base +660 -0
  150. data/ext/vcftools/perl/.svn/text-base/vcf-merge.svn-base +577 -0
  151. data/ext/vcftools/perl/.svn/text-base/vcf-query.svn-base +272 -0
  152. data/ext/vcftools/perl/.svn/text-base/vcf-shuffle-cols.svn-base +89 -0
  153. data/ext/vcftools/perl/.svn/text-base/vcf-sort.svn-base +79 -0
  154. data/ext/vcftools/perl/.svn/text-base/vcf-stats.svn-base +160 -0
  155. data/ext/vcftools/perl/.svn/text-base/vcf-subset.svn-base +206 -0
  156. data/ext/vcftools/perl/.svn/text-base/vcf-to-tab.svn-base +112 -0
  157. data/ext/vcftools/perl/.svn/text-base/vcf-validator.svn-base +145 -0
  158. data/ext/vcftools/perl/ChangeLog +84 -0
  159. data/ext/vcftools/perl/FaSlice.pm +214 -0
  160. data/ext/vcftools/perl/Makefile +12 -0
  161. data/ext/vcftools/perl/Vcf.pm +2853 -0
  162. data/ext/vcftools/perl/VcfStats.pm +681 -0
  163. data/ext/vcftools/perl/fill-aa +103 -0
  164. data/ext/vcftools/perl/fill-an-ac +56 -0
  165. data/ext/vcftools/perl/fill-ref-md5 +204 -0
  166. data/ext/vcftools/perl/tab-to-vcf +92 -0
  167. data/ext/vcftools/perl/test.t +376 -0
  168. data/ext/vcftools/perl/vcf-annotate +1099 -0
  169. data/ext/vcftools/perl/vcf-compare +1193 -0
  170. data/ext/vcftools/perl/vcf-concat +310 -0
  171. data/ext/vcftools/perl/vcf-convert +180 -0
  172. data/ext/vcftools/perl/vcf-fix-newlines +97 -0
  173. data/ext/vcftools/perl/vcf-isec +660 -0
  174. data/ext/vcftools/perl/vcf-merge +577 -0
  175. data/ext/vcftools/perl/vcf-query +286 -0
  176. data/ext/vcftools/perl/vcf-shuffle-cols +89 -0
  177. data/ext/vcftools/perl/vcf-sort +79 -0
  178. data/ext/vcftools/perl/vcf-stats +160 -0
  179. data/ext/vcftools/perl/vcf-subset +206 -0
  180. data/ext/vcftools/perl/vcf-to-tab +112 -0
  181. data/ext/vcftools/perl/vcf-validator +145 -0
  182. data/ext/vcftools/website/.svn/all-wcprops +41 -0
  183. data/ext/vcftools/website/.svn/entries +238 -0
  184. data/ext/vcftools/website/.svn/prop-base/VCF-poster.pdf.svn-base +5 -0
  185. data/ext/vcftools/website/.svn/prop-base/favicon.ico.svn-base +5 -0
  186. data/ext/vcftools/website/.svn/prop-base/favicon.png.svn-base +5 -0
  187. data/ext/vcftools/website/.svn/text-base/Makefile.svn-base +6 -0
  188. data/ext/vcftools/website/.svn/text-base/README.svn-base +2 -0
  189. data/ext/vcftools/website/.svn/text-base/VCF-poster.pdf.svn-base +0 -0
  190. data/ext/vcftools/website/.svn/text-base/default.css.svn-base +250 -0
  191. data/ext/vcftools/website/.svn/text-base/favicon.ico.svn-base +0 -0
  192. data/ext/vcftools/website/.svn/text-base/favicon.png.svn-base +0 -0
  193. data/ext/vcftools/website/Makefile +6 -0
  194. data/ext/vcftools/website/README +2 -0
  195. data/ext/vcftools/website/VCF-poster.pdf +0 -0
  196. data/ext/vcftools/website/default.css +250 -0
  197. data/ext/vcftools/website/favicon.ico +0 -0
  198. data/ext/vcftools/website/favicon.png +0 -0
  199. data/ext/vcftools/website/img/.svn/all-wcprops +53 -0
  200. data/ext/vcftools/website/img/.svn/entries +300 -0
  201. data/ext/vcftools/website/img/.svn/prop-base/bg.gif.svn-base +5 -0
  202. data/ext/vcftools/website/img/.svn/prop-base/bgcode.gif.svn-base +5 -0
  203. data/ext/vcftools/website/img/.svn/prop-base/bgcontainer.gif.svn-base +5 -0
  204. data/ext/vcftools/website/img/.svn/prop-base/bgul.gif.svn-base +5 -0
  205. data/ext/vcftools/website/img/.svn/prop-base/header.gif.svn-base +5 -0
  206. data/ext/vcftools/website/img/.svn/prop-base/li.gif.svn-base +5 -0
  207. data/ext/vcftools/website/img/.svn/prop-base/quote.gif.svn-base +5 -0
  208. data/ext/vcftools/website/img/.svn/prop-base/search.gif.svn-base +5 -0
  209. data/ext/vcftools/website/img/.svn/text-base/bg.gif.svn-base +0 -0
  210. data/ext/vcftools/website/img/.svn/text-base/bgcode.gif.svn-base +0 -0
  211. data/ext/vcftools/website/img/.svn/text-base/bgcontainer.gif.svn-base +0 -0
  212. data/ext/vcftools/website/img/.svn/text-base/bgul.gif.svn-base +0 -0
  213. data/ext/vcftools/website/img/.svn/text-base/header.gif.svn-base +0 -0
  214. data/ext/vcftools/website/img/.svn/text-base/li.gif.svn-base +0 -0
  215. data/ext/vcftools/website/img/.svn/text-base/quote.gif.svn-base +0 -0
  216. data/ext/vcftools/website/img/.svn/text-base/search.gif.svn-base +0 -0
  217. data/ext/vcftools/website/img/bg.gif +0 -0
  218. data/ext/vcftools/website/img/bgcode.gif +0 -0
  219. data/ext/vcftools/website/img/bgcontainer.gif +0 -0
  220. data/ext/vcftools/website/img/bgul.gif +0 -0
  221. data/ext/vcftools/website/img/header.gif +0 -0
  222. data/ext/vcftools/website/img/li.gif +0 -0
  223. data/ext/vcftools/website/img/quote.gif +0 -0
  224. data/ext/vcftools/website/img/search.gif +0 -0
  225. data/ext/vcftools/website/src/.svn/all-wcprops +53 -0
  226. data/ext/vcftools/website/src/.svn/entries +300 -0
  227. data/ext/vcftools/website/src/.svn/text-base/docs.inc.svn-base +202 -0
  228. data/ext/vcftools/website/src/.svn/text-base/index.inc.svn-base +52 -0
  229. data/ext/vcftools/website/src/.svn/text-base/index.php.svn-base +80 -0
  230. data/ext/vcftools/website/src/.svn/text-base/license.inc.svn-base +27 -0
  231. data/ext/vcftools/website/src/.svn/text-base/links.inc.svn-base +13 -0
  232. data/ext/vcftools/website/src/.svn/text-base/options.inc.svn-base +654 -0
  233. data/ext/vcftools/website/src/.svn/text-base/perl_module.inc.svn-base +249 -0
  234. data/ext/vcftools/website/src/.svn/text-base/specs.inc.svn-base +18 -0
  235. data/ext/vcftools/website/src/docs.inc +202 -0
  236. data/ext/vcftools/website/src/index.inc +52 -0
  237. data/ext/vcftools/website/src/index.php +80 -0
  238. data/ext/vcftools/website/src/license.inc +27 -0
  239. data/ext/vcftools/website/src/links.inc +13 -0
  240. data/ext/vcftools/website/src/options.inc +654 -0
  241. data/ext/vcftools/website/src/perl_module.inc +249 -0
  242. data/ext/vcftools/website/src/specs.inc +18 -0
  243. data/lib/config.ru +9 -0
  244. data/lib/ngs_server/add.rb +9 -0
  245. data/lib/ngs_server/version.rb +1 -1
  246. data/lib/ngs_server.rb +55 -3
  247. data/ngs_server.gemspec +5 -2
  248. metadata +296 -6
@@ -0,0 +1,310 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # Author: petr.danecek@sanger
4
+ #
5
+
6
+ use strict;
7
+ use warnings;
8
+ use Carp;
9
+ use Vcf;
10
+
11
+ my $opts = parse_params();
12
+ if ( $$opts{check_columns} )
13
+ {
14
+ check_columns($opts);
15
+ }
16
+ elsif ( !exists($$opts{sort}) )
17
+ {
18
+ concat($opts);
19
+ }
20
+ else
21
+ {
22
+ concat_merge($opts);
23
+ }
24
+
25
+ exit;
26
+
27
+ #--------------------------------
28
+
29
+ sub error
30
+ {
31
+ my (@msg) = @_;
32
+ if ( scalar @msg )
33
+ {
34
+ croak @msg;
35
+ }
36
+ die
37
+ "About: Convenience tool for concatenating VCF files. In the basic mode it does not \n",
38
+ " do anything fancy except for a sanity check that all files have the same columns.\n",
39
+ " When run with the -s option, it will perform a partial merge sort, looking at\n",
40
+ " a limited number of open jobs simultaneously.\n",
41
+ "Usage: vcf-concat [OPTIONS] A.vcf.gz B.vcf.gz C.vcf.gz > out.vcf\n",
42
+ "Options:\n",
43
+ " -c, --check-columns Do not concatenate, only check if the columns agree.\n",
44
+ " -f, --files <file> Read the list of files from a file.\n",
45
+ " -s, --merge-sort <int> Allow small overlaps in N consecutive files.\n",
46
+ " -h, -?, --help This help message.\n",
47
+ "\n";
48
+ }
49
+
50
+ sub parse_params
51
+ {
52
+ my $opts = { files=>[] };
53
+ while (my $arg=shift(@ARGV))
54
+ {
55
+ if ( $arg eq '-s' || $arg eq '--merge-sort' ) { $$opts{sort}=shift(@ARGV); next; }
56
+ if ( $arg eq '-c' || $arg eq '--check-columns' ) { $$opts{check_columns}=1; next; }
57
+ if ( $arg eq '-f' || $arg eq '--files' )
58
+ {
59
+ my $files = shift(@ARGV);
60
+ open(my $fh,'<',$files) or error("$files: $!");
61
+ while (my $line=<$fh>)
62
+ {
63
+ chomp($line);
64
+ push @{$$opts{files}},$line;
65
+ }
66
+ close($fh);
67
+ next;
68
+ }
69
+ if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
70
+ if ( -e $arg ) { push @{$$opts{files}},$arg; next }
71
+ error("Unknown parameter \"$arg\". Run -h for help.\n");
72
+ }
73
+ if ( ! @{$$opts{files}} ) { error("No files to concat?\n") }
74
+ return $opts;
75
+ }
76
+
77
+ sub check_columns
78
+ {
79
+ my ($opts) = @_;
80
+ my @columns;
81
+ for my $file (@{$$opts{files}})
82
+ {
83
+ my $vcf = Vcf->new(file=>$file);
84
+ $vcf->parse_header();
85
+
86
+ if ( @columns )
87
+ {
88
+ if ( @columns != @{$$vcf{columns}} ) { warn("Different number of columns in [$file].\n"); }
89
+ for (my $i=0; $i<@columns; $i++)
90
+ {
91
+ if ( $$vcf{columns}[$i] ne $columns[$i] ) { warn("The column names do not agree in [$file].\n"); last; }
92
+ }
93
+ }
94
+ else
95
+ {
96
+ @columns = @{$$vcf{columns}};
97
+ }
98
+ $vcf->close();
99
+ }
100
+ }
101
+
102
+ sub concat
103
+ {
104
+ my ($opts) = @_;
105
+ my @columns;
106
+ for my $file (@{$$opts{files}})
107
+ {
108
+ my $vcf = Vcf->new(file=>$file);
109
+ $vcf->parse_header();
110
+
111
+ if ( @columns )
112
+ {
113
+ if ( @columns != @{$$vcf{columns}} ) { error("Different number of columns in [$file].\n"); }
114
+ for (my $i=0; $i<@columns; $i++)
115
+ {
116
+ if ( $$vcf{columns}[$i] ne $columns[$i] ) { error("The column names do not agree in [$file].\n"); }
117
+ }
118
+ }
119
+ else
120
+ {
121
+ @columns = @{$$vcf{columns}};
122
+ print $vcf->format_header();
123
+ }
124
+ while (my $line=$vcf->next_line())
125
+ {
126
+ print $line;
127
+ }
128
+ }
129
+ }
130
+
131
+ sub get_chromosomes
132
+ {
133
+ my ($files) = @_;
134
+ my @out;
135
+ my %has_chrm;
136
+ for my $file (@$files)
137
+ {
138
+ my $vcf = Vcf->new(file=>$file);
139
+ my $chrms = $vcf->get_chromosomes();
140
+ for my $chr (@$chrms)
141
+ {
142
+ if ( exists($has_chrm{$chr}) ) { next; }
143
+ $has_chrm{$chr} = 1;
144
+ push @out,$chr;
145
+ }
146
+ }
147
+ return \@out;
148
+ }
149
+
150
+ sub concat_merge
151
+ {
152
+ my ($opts) = @_;
153
+
154
+ my $header_printed = 0;
155
+ my $chroms = get_chromosomes($$opts{files});
156
+ for my $chr (@$chroms)
157
+ {
158
+ my $reader = Reader->new(files=>$$opts{files},nsort=>$$opts{sort},seq=>$chr,header_printed=>$header_printed);
159
+ $header_printed = 1;
160
+ $reader->open_next();
161
+ while (1)
162
+ {
163
+ my $line = $reader->next_line();
164
+ if ( !defined $line )
165
+ {
166
+ if ( !$reader->open_next() ) { last; }
167
+ next;
168
+ }
169
+ print $line;
170
+ }
171
+ }
172
+ if ( !$header_printed )
173
+ {
174
+ my $vcf = Vcf->new(file=>$$opts{files}[0]);
175
+ $vcf->parse_header();
176
+ print $vcf->format_header();
177
+ }
178
+ }
179
+
180
+ #---------------------------------
181
+
182
+ package Reader;
183
+
184
+ use strict;
185
+ use warnings;
186
+ use Carp;
187
+ use Vcf;
188
+
189
+ sub new
190
+ {
191
+ my ($class,@args) = @_;
192
+ my $self = @args ? {@args} : {};
193
+ bless $self, ref($class) || $class;
194
+ if ( !$$self{files} ) { $self->throw("Expected the files option.\n"); }
195
+ if ( !$$self{nsort} ) { $$self{nsort} = 2; }
196
+ if ( $$self{nsort}>@{$$self{files}} ) { $$self{nsort} = scalar @{$$self{files}}; }
197
+ $$self{idxs} = undef;
198
+ $$self{vcfs} = undef;
199
+ return $self;
200
+ }
201
+
202
+ sub throw
203
+ {
204
+ my ($self,@msg) = @_;
205
+ confess @msg;
206
+ }
207
+
208
+ sub print_header
209
+ {
210
+ my ($self,$vcf) = @_;
211
+ if ( $$self{header_printed} ) { return; }
212
+ print $vcf->format_header();
213
+ $$self{header_printed} = 1;
214
+ }
215
+
216
+
217
+ # Open VCF, parse header, check column names and when callled for the first time, output the VCF header.
218
+ sub open_vcf
219
+ {
220
+ my ($self,$file) = @_;
221
+ my $vcf = Vcf->new(file=>$file,region=>$$self{seq},print_header=>1);
222
+ $vcf->parse_header();
223
+ if ( !exists($$self{columns}) )
224
+ {
225
+ $$self{columns} = [ @{$$vcf{columns}} ];
226
+ }
227
+ else
228
+ {
229
+ if ( @{$$self{columns}} != @{$$vcf{columns}} ) { $self->throw("Different number of columns in [$file].\n"); }
230
+ for (my $i=0; $i<@{$$self{columns}}; $i++)
231
+ {
232
+ if ( $$vcf{columns}[$i] ne $$self{columns}[$i] ) { $self->throw("The column names do not agree in [$file].\n"); }
233
+ }
234
+ }
235
+ $self->print_header($vcf);
236
+ return $vcf;
237
+ }
238
+
239
+ sub open_next
240
+ {
241
+ my ($self) = @_;
242
+
243
+ if ( !defined $$self{idxs} )
244
+ {
245
+ for (my $i=0; $i<$$self{nsort}; $i++)
246
+ {
247
+ $$self{idxs}[$i] = $i;
248
+ }
249
+ }
250
+ else
251
+ {
252
+ my $prev = $$self{idxs}[-1];
253
+
254
+ shift(@{$$self{idxs}});
255
+ shift(@{$$self{vcfs}});
256
+
257
+ if ( $prev+1 < @{$$self{files}} )
258
+ {
259
+ # New file to be opened
260
+ push @{$$self{idxs}}, $prev+1;
261
+ }
262
+ }
263
+ for (my $i=0; $i<@{$$self{idxs}}; $i++)
264
+ {
265
+ if ( exists($$self{vcfs}[$i]) ) { next; }
266
+ my $idx = $$self{idxs}[$i];
267
+ $$self{vcfs}[$i] = $self->open_vcf($$self{files}[$idx]);
268
+ }
269
+ if ( !@{$$self{idxs}} ) { return 0; }
270
+ return 1;
271
+ }
272
+
273
+ sub next_line
274
+ {
275
+ my ($self) = @_;
276
+
277
+ my $min = $$self{vcfs}[0]->next_line();
278
+ if ( !defined $min ) { return undef; }
279
+
280
+ if ( !($min=~/^(\S+)\t(\d+)/) ) { $self->throw("Could not parse the line: $min\n"); }
281
+ my $min_chr = $1;
282
+ my $min_pos = $2;
283
+ my $min_vcf = $$self{vcfs}[0];
284
+
285
+ for (my $i=1; $i<@{$$self{vcfs}}; $i++)
286
+ {
287
+ if ( !exists($$self{vcfs}[$i]) ) { next; }
288
+ my $line = $$self{vcfs}[$i]->next_line();
289
+ if ( !defined $line ) { next; }
290
+ if ( !($line=~/^(\S+)\t(\d+)/) ) { $self->throw("Could not parse the line: $line\n"); }
291
+ my $chr = $1;
292
+ my $pos = $2;
293
+
294
+ if ( $chr ne $min_chr ) { $self->throw("FIXME: When run with the -s option, only one chromosome can be present.\n"); }
295
+ if ( $min_pos > $pos )
296
+ {
297
+ $min_pos = $pos;
298
+ $min_vcf->_unread_line($min);
299
+ $min_vcf = $$self{vcfs}[$i];
300
+ $min = $line;
301
+ }
302
+ else
303
+ {
304
+ $$self{vcfs}[$i]->_unread_line($line);
305
+ }
306
+ }
307
+ return $min;
308
+ }
309
+
310
+
@@ -0,0 +1,180 @@
1
+ #!/usr/bin/env perl
2
+
3
+ use strict;
4
+ use warnings;
5
+ use Carp;
6
+ use Vcf;
7
+ use FindBin;
8
+ use lib "$FindBin::Bin";
9
+ use FaSlice;
10
+
11
+ my $opts = parse_params();
12
+ convert_file($opts);
13
+
14
+ exit;
15
+
16
+ #--------------------------------
17
+
18
+ sub error
19
+ {
20
+ my (@msg) = @_;
21
+ if ( scalar @msg )
22
+ {
23
+ croak @msg;
24
+ }
25
+ die
26
+ "About: Convert between VCF versions, currently to VCFv4.0 only.\n",
27
+ "Usage: cat in.vcf | vcf-convert [OPTIONS] > out.vcf\n",
28
+ "Options:\n",
29
+ " -r, --refseq <file> The reference sequence in samtools faindexed fasta file. (Not required with SNPs only.)\n",
30
+ " -v, --version <string> 4.0\n",
31
+ " -h, -?, --help This help message.\n",
32
+ "\n";
33
+ }
34
+
35
+
36
+ sub parse_params
37
+ {
38
+ my $opts = { version=>'4.0' };
39
+ while (my $arg=shift(@ARGV))
40
+ {
41
+ if ( $arg eq '-r' || $arg eq '--refseq' ) { $$opts{refseq}=shift(@ARGV); next; }
42
+ if ( $arg eq '-v' || $arg eq '--version' ) { $$opts{version}=shift(@ARGV); next; }
43
+ if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
44
+ error("Unknown parameter \"$arg\". Run -h for help.\n");
45
+ }
46
+ return $opts;
47
+ }
48
+
49
+
50
+ sub convert_file
51
+ {
52
+ my ($opts) = @_;
53
+
54
+ if ( $$opts{version} ne '4.0' ) { error("Sorry, 4.0 is currently the only supported output version.\n"); }
55
+
56
+ # How to recognise a number
57
+ my $FLOAT_RE = qr/^\-?\d+\.?\d*(?:[eE][+-]\d+)?$/;
58
+
59
+ my $vcf_in = Vcf->new(fh=>\*STDIN);
60
+ my $vcf_out = Vcf->new(version=>'4.0');
61
+
62
+ # Convert the header
63
+ $vcf_in->parse_header();
64
+ for (my $i=1; $i<@{$$vcf_in{header_lines}}; $i++)
65
+ {
66
+ $vcf_out->add_header_line($$vcf_in{header_lines}[$i]);
67
+ }
68
+ $vcf_out->add_columns(@{$$vcf_in{columns}});
69
+ print $vcf_out->format_header();
70
+
71
+ # Convert each data line
72
+ my $fa;
73
+ while (my $x=$vcf_in->next_data_hash())
74
+ {
75
+ # Convert missing (default) FORMAT values
76
+ for my $gt (values %{$$x{gtypes}})
77
+ {
78
+ for my $field (@{$$x{FORMAT}})
79
+ {
80
+ # Skip the GT tag, so that ploidy information is not lost ("./." would become ".")
81
+ if ( $field eq 'GT' ) { next; }
82
+ if ( $field eq 'FT' && $$gt{$field} eq $$vcf_in{filter_passed} ) { $$gt{$field}=$$vcf_out{filter_passed}; }
83
+ if ( exists($$vcf_in{defaults}{$field}) && $$vcf_in{defaults}{$field} eq $$gt{$field} )
84
+ {
85
+ $$gt{$field} = $$vcf_out{defaults}{$field};
86
+ next;
87
+ }
88
+ if ( exists($$vcf_in{header}{FORMAT}{$field}{default}) && $$vcf_in{header}{FORMAT}{$field}{default} eq $$gt{$field} )
89
+ {
90
+ delete($$gt{$field});
91
+ next;
92
+ }
93
+ }
94
+ }
95
+
96
+
97
+ # Change missing QUAL: In case they are numbers, do numeric comparison, as -1.0 is sometimes used instead of -1
98
+ if ( $$x{QUAL} eq $$vcf_in{defaults}{QUAL}
99
+ or ($$x{QUAL}=~$FLOAT_RE && $$vcf_in{defaults}{QUAL}=~$FLOAT_RE && $$x{QUAL}==$$vcf_in{defaults}{QUAL}) )
100
+ {
101
+ $$x{QUAL} = $$vcf_out{defaults}{QUAL};
102
+ }
103
+ for (my $i=0; $i<@{$$x{FILTER}}; $i++)
104
+ {
105
+ if ( $$x{FILTER}[$i] eq $$vcf_in{filter_passed} ) { $$x{FILTER}[$i] = $$vcf_out{filter_passed}; }
106
+ }
107
+
108
+ # Parse the ALT column and see if there are indels
109
+ my $has_indel = 0;
110
+ for my $alt (@{$$x{ALT}})
111
+ {
112
+ my ($type,$len,$ht) = $vcf_in->event_type($x,$alt);
113
+ if ( $type eq 's' or $type eq 'r' ) { next; }
114
+ if ( $type ne 'i' ) { error("FIXME: expected indel at $$x{CHROM}:$$x{POS}\n"); }
115
+
116
+ $has_indel = 1;
117
+ }
118
+
119
+ # If there is an indel, new REF and ALT must be changed
120
+ if ( $has_indel )
121
+ {
122
+ my $map = {};
123
+ my $alt_to_mapref = {};
124
+
125
+ for my $alt (@{$$x{ALT}})
126
+ {
127
+ my ($type,$len,$ht) = $vcf_in->event_type($x,$alt);
128
+ if ( $type eq 's' or $type eq 'r' )
129
+ {
130
+ $$alt_to_mapref{$alt} = { ref=>$$x{REF}, alt=>$alt };
131
+ $$map{$$x{REF}}{$alt} = 1;
132
+ next;
133
+ }
134
+
135
+ if ( $type eq 'i' && $len>0 )
136
+ {
137
+ my $tmp = $$x{REF}.$ht;
138
+ $$alt_to_mapref{$alt} = { ref=>$$x{REF}, alt=>$tmp };
139
+ $$map{$$x{REF}}{$tmp} = 1;
140
+ next;
141
+ }
142
+ elsif ( $type eq 'i' && $len<0 )
143
+ {
144
+ if ( !$fa )
145
+ {
146
+ if ( !$$opts{refseq} ) { error("Indels present, missing the -r option.\n"); }
147
+ $fa = FaSlice->new(file=>$$opts{refseq},size=>1_000_000);
148
+ }
149
+ my $ref = $fa->get_slice($$x{CHROM},$$x{POS},$$x{POS}+abs($len));
150
+ my $first = substr($ref,0,1);
151
+
152
+ # Sanity check
153
+ if ( $$x{REF} ne $first )
154
+ {
155
+ error("Sanity check failed: the ref does not agree at $$x{CHROM}:$$x{POS} .. [$$x{REF}] in .fa, [$first] in .vcf\n");
156
+ }
157
+
158
+ $$alt_to_mapref{$alt} = { ref=>$ref, alt=>$$x{REF} };
159
+ $$map{$ref}{$$x{REF}} = 1;
160
+ next;
161
+ }
162
+ else
163
+ {
164
+ error("Uh, FIXME: $$x{CHROM}:$$x{POS} [$type] [$len] [$ht]\n");
165
+ }
166
+ }
167
+
168
+ $$x{REF} = $vcf_out->fill_ref_alt_mapping($map);
169
+ for (my $i=0; $i<@{$$x{ALT}}; $i++)
170
+ {
171
+ my $ori_ref = $$alt_to_mapref{$$x{ALT}[$i]}{ref};
172
+ my $ori_alt = $$alt_to_mapref{$$x{ALT}[$i]}{alt};
173
+ $$x{ALT}[$i] = $$map{$ori_ref}{$ori_alt};
174
+ }
175
+ }
176
+
177
+ print $vcf_out->format_line($x);
178
+ }
179
+ }
180
+
@@ -0,0 +1,97 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # Authors: Adam Auton, Petr Danecek
4
+ # (C) 2011
5
+
6
+ use strict;
7
+ use warnings;
8
+ use Carp;
9
+
10
+ my $opts = parse_params();
11
+ fix_file($opts);
12
+
13
+ exit;
14
+
15
+ #--------------------------------
16
+
17
+ sub error
18
+ {
19
+ my (@msg) = @_;
20
+ if ( scalar @msg ) { confess @msg; }
21
+ die
22
+ "About: Reads in a VCF file with any (commonly used) newline representation and outputs with the\n",
23
+ " current system's newline representation.\n",
24
+ "Usage: vcf-fix-newlines [OPTIONS]\n",
25
+ "Options:\n",
26
+ " -i, --info Report if the file is consistent with the current platform based.\n",
27
+ " -h, -?, --help This help message.\n",
28
+ "Example:\n",
29
+ " vcf-fix-newlines -i file.vcf\n",
30
+ " vcf-fix-newlines file.vcf.gz > out.vcf\n",
31
+ " cat file.vcf | vcf-fix-newlines > out.vcf\n",
32
+ "\n";
33
+ }
34
+
35
+ sub parse_params
36
+ {
37
+ my $opts = {};
38
+ while (defined(my $arg=shift(@ARGV)))
39
+ {
40
+ if ( $arg eq '-i' || $arg eq '--info' ) { $$opts{info}=1; next }
41
+ if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
42
+ if ( !exists($$opts{vcf}) && -e $arg ) { $$opts{vcf}=$arg; next; }
43
+ error("Unknown parameter \"$arg\". Run -h for help.\n");
44
+ }
45
+ return $opts;
46
+ }
47
+
48
+ sub fix_file
49
+ {
50
+ my ($opts) = @_;
51
+
52
+ my $fh = \*STDIN;
53
+ if ( $$opts{vcf} )
54
+ {
55
+ if ( $$opts{vcf}=~/\.gz/i )
56
+ {
57
+ open($fh,"gunzip -c $$opts{vcf} |") or error("gunzip -c $$opts{vcf}: $!\n");
58
+ }
59
+ else
60
+ {
61
+ open($fh,'<',$$opts{vcf}) or error("$$opts{vcf}: $!\n");
62
+ }
63
+ }
64
+
65
+ # Read a small 1kb sample
66
+ binmode $fh or error("binmode: $!");
67
+ local $/ = \1024;
68
+ my $buf = <$fh>;
69
+ if ( !defined $buf ) { error("No data read.\n"); }
70
+
71
+ # Check the origin
72
+ my ($in,$nl);
73
+ if ( $buf=~/\015\012/ ) { $in = 'Windows'; $nl=$&; }
74
+ elsif ( $buf=~/\015/ && !($buf=~/\012/) ) { $in = 'Old Mac'; $nl=$&; }
75
+ elsif ( $buf=~/\012/ && !($buf=~/\015/) ) { $in = 'UNIX'; $nl=$&; }
76
+ else
77
+ {
78
+ error("FIXME: Unable to determine the system which produced the file.\n");
79
+ }
80
+ if ( defined $in ) { warn("The file was generated on $in compatible system.\n"); }
81
+ if ( $$opts{info} ) { close($fh); return; }
82
+ if ( $nl eq "\n" ) { warn("No conversion needed.\n"); return; }
83
+
84
+ # Read the file and do the conversion
85
+ local $/ = $nl;
86
+ $buf .= <$fh>;
87
+ $buf =~ s/$nl/\n/g;
88
+ print $buf;
89
+
90
+ while($buf = <$fh>)
91
+ {
92
+ $buf =~ s/$nl/\n/g;
93
+ print $buf;
94
+ }
95
+ close($fh);
96
+ }
97
+