ngs_server 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (248) hide show
  1. data/bin/ngs_server +72 -50
  2. data/ext/bamtools/extconf.rb +3 -3
  3. data/ext/vcftools/Makefile +28 -0
  4. data/ext/vcftools/README.txt +36 -0
  5. data/ext/vcftools/cpp/.svn/all-wcprops +125 -0
  6. data/ext/vcftools/cpp/.svn/dir-prop-base +6 -0
  7. data/ext/vcftools/cpp/.svn/entries +708 -0
  8. data/ext/vcftools/cpp/.svn/text-base/Makefile.svn-base +46 -0
  9. data/ext/vcftools/cpp/.svn/text-base/dgeev.cpp.svn-base +146 -0
  10. data/ext/vcftools/cpp/.svn/text-base/dgeev.h.svn-base +43 -0
  11. data/ext/vcftools/cpp/.svn/text-base/output_log.cpp.svn-base +79 -0
  12. data/ext/vcftools/cpp/.svn/text-base/output_log.h.svn-base +34 -0
  13. data/ext/vcftools/cpp/.svn/text-base/parameters.cpp.svn-base +535 -0
  14. data/ext/vcftools/cpp/.svn/text-base/parameters.h.svn-base +154 -0
  15. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.cpp.svn-base +497 -0
  16. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.h.svn-base +190 -0
  17. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_getters.cpp.svn-base +421 -0
  18. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_setters.cpp.svn-base +482 -0
  19. data/ext/vcftools/cpp/.svn/text-base/vcf_file.cpp.svn-base +495 -0
  20. data/ext/vcftools/cpp/.svn/text-base/vcf_file.h.svn-base +184 -0
  21. data/ext/vcftools/cpp/.svn/text-base/vcf_file_diff.cpp.svn-base +1282 -0
  22. data/ext/vcftools/cpp/.svn/text-base/vcf_file_filters.cpp.svn-base +1215 -0
  23. data/ext/vcftools/cpp/.svn/text-base/vcf_file_format_convert.cpp.svn-base +1138 -0
  24. data/ext/vcftools/cpp/.svn/text-base/vcf_file_index.cpp.svn-base +171 -0
  25. data/ext/vcftools/cpp/.svn/text-base/vcf_file_output.cpp.svn-base +3012 -0
  26. data/ext/vcftools/cpp/.svn/text-base/vcftools.cpp.svn-base +107 -0
  27. data/ext/vcftools/cpp/.svn/text-base/vcftools.h.svn-base +25 -0
  28. data/ext/vcftools/cpp/Makefile +46 -0
  29. data/ext/vcftools/cpp/dgeev.cpp +146 -0
  30. data/ext/vcftools/cpp/dgeev.h +43 -0
  31. data/ext/vcftools/cpp/output_log.cpp +79 -0
  32. data/ext/vcftools/cpp/output_log.h +34 -0
  33. data/ext/vcftools/cpp/parameters.cpp +535 -0
  34. data/ext/vcftools/cpp/parameters.h +154 -0
  35. data/ext/vcftools/cpp/vcf_entry.cpp +497 -0
  36. data/ext/vcftools/cpp/vcf_entry.h +190 -0
  37. data/ext/vcftools/cpp/vcf_entry_getters.cpp +421 -0
  38. data/ext/vcftools/cpp/vcf_entry_setters.cpp +482 -0
  39. data/ext/vcftools/cpp/vcf_file.cpp +495 -0
  40. data/ext/vcftools/cpp/vcf_file.h +184 -0
  41. data/ext/vcftools/cpp/vcf_file_diff.cpp +1282 -0
  42. data/ext/vcftools/cpp/vcf_file_filters.cpp +1215 -0
  43. data/ext/vcftools/cpp/vcf_file_format_convert.cpp +1138 -0
  44. data/ext/vcftools/cpp/vcf_file_index.cpp +171 -0
  45. data/ext/vcftools/cpp/vcf_file_output.cpp +3012 -0
  46. data/ext/vcftools/cpp/vcftools.cpp +107 -0
  47. data/ext/vcftools/cpp/vcftools.h +25 -0
  48. data/ext/vcftools/examples/.svn/all-wcprops +185 -0
  49. data/ext/vcftools/examples/.svn/dir-prop-base +6 -0
  50. data/ext/vcftools/examples/.svn/entries +1048 -0
  51. data/ext/vcftools/examples/.svn/prop-base/perl-api-1.pl.svn-base +5 -0
  52. data/ext/vcftools/examples/.svn/text-base/annotate-test.vcf.svn-base +37 -0
  53. data/ext/vcftools/examples/.svn/text-base/annotate.out.svn-base +23 -0
  54. data/ext/vcftools/examples/.svn/text-base/annotate.txt.svn-base +7 -0
  55. data/ext/vcftools/examples/.svn/text-base/annotate2.out.svn-base +52 -0
  56. data/ext/vcftools/examples/.svn/text-base/annotate3.out.svn-base +23 -0
  57. data/ext/vcftools/examples/.svn/text-base/cmp-test-a-3.3.vcf.svn-base +12 -0
  58. data/ext/vcftools/examples/.svn/text-base/cmp-test-a.vcf.svn-base +12 -0
  59. data/ext/vcftools/examples/.svn/text-base/cmp-test-b-3.3.vcf.svn-base +12 -0
  60. data/ext/vcftools/examples/.svn/text-base/cmp-test-b.vcf.svn-base +12 -0
  61. data/ext/vcftools/examples/.svn/text-base/cmp-test.out.svn-base +53 -0
  62. data/ext/vcftools/examples/.svn/text-base/concat-a.vcf.svn-base +21 -0
  63. data/ext/vcftools/examples/.svn/text-base/concat-b.vcf.svn-base +13 -0
  64. data/ext/vcftools/examples/.svn/text-base/concat-c.vcf.svn-base +19 -0
  65. data/ext/vcftools/examples/.svn/text-base/concat.out.svn-base +39 -0
  66. data/ext/vcftools/examples/.svn/text-base/invalid-4.0.vcf.svn-base +31 -0
  67. data/ext/vcftools/examples/.svn/text-base/isec-n2-test.vcf.out.svn-base +19 -0
  68. data/ext/vcftools/examples/.svn/text-base/merge-test-a.vcf.svn-base +17 -0
  69. data/ext/vcftools/examples/.svn/text-base/merge-test-b.vcf.svn-base +17 -0
  70. data/ext/vcftools/examples/.svn/text-base/merge-test-c.vcf.svn-base +15 -0
  71. data/ext/vcftools/examples/.svn/text-base/merge-test.vcf.out.svn-base +31 -0
  72. data/ext/vcftools/examples/.svn/text-base/perl-api-1.pl.svn-base +46 -0
  73. data/ext/vcftools/examples/.svn/text-base/query-test.out.svn-base +6 -0
  74. data/ext/vcftools/examples/.svn/text-base/shuffle-test.vcf.svn-base +12 -0
  75. data/ext/vcftools/examples/.svn/text-base/subset.SNPs.out.svn-base +10 -0
  76. data/ext/vcftools/examples/.svn/text-base/subset.indels.out.svn-base +18 -0
  77. data/ext/vcftools/examples/.svn/text-base/subset.vcf.svn-base +21 -0
  78. data/ext/vcftools/examples/.svn/text-base/valid-3.3.vcf.svn-base +30 -0
  79. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.stats.svn-base +104 -0
  80. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.svn-base +34 -0
  81. data/ext/vcftools/examples/.svn/text-base/valid-4.1.vcf.svn-base +37 -0
  82. data/ext/vcftools/examples/annotate-test.vcf +37 -0
  83. data/ext/vcftools/examples/annotate.out +23 -0
  84. data/ext/vcftools/examples/annotate.txt +7 -0
  85. data/ext/vcftools/examples/annotate2.out +52 -0
  86. data/ext/vcftools/examples/annotate3.out +23 -0
  87. data/ext/vcftools/examples/cmp-test-a-3.3.vcf +12 -0
  88. data/ext/vcftools/examples/cmp-test-a.vcf +12 -0
  89. data/ext/vcftools/examples/cmp-test-b-3.3.vcf +12 -0
  90. data/ext/vcftools/examples/cmp-test-b.vcf +12 -0
  91. data/ext/vcftools/examples/cmp-test.out +53 -0
  92. data/ext/vcftools/examples/concat-a.vcf +21 -0
  93. data/ext/vcftools/examples/concat-b.vcf +13 -0
  94. data/ext/vcftools/examples/concat-c.vcf +19 -0
  95. data/ext/vcftools/examples/concat.out +39 -0
  96. data/ext/vcftools/examples/invalid-4.0.vcf +31 -0
  97. data/ext/vcftools/examples/isec-n2-test.vcf.out +19 -0
  98. data/ext/vcftools/examples/merge-test-a.vcf +17 -0
  99. data/ext/vcftools/examples/merge-test-b.vcf +17 -0
  100. data/ext/vcftools/examples/merge-test-c.vcf +15 -0
  101. data/ext/vcftools/examples/merge-test.vcf.out +31 -0
  102. data/ext/vcftools/examples/perl-api-1.pl +46 -0
  103. data/ext/vcftools/examples/query-test.out +6 -0
  104. data/ext/vcftools/examples/shuffle-test.vcf +12 -0
  105. data/ext/vcftools/examples/subset.SNPs.out +10 -0
  106. data/ext/vcftools/examples/subset.indels.out +18 -0
  107. data/ext/vcftools/examples/subset.vcf +21 -0
  108. data/ext/vcftools/examples/valid-3.3.vcf +30 -0
  109. data/ext/vcftools/examples/valid-4.0.vcf +34 -0
  110. data/ext/vcftools/examples/valid-4.0.vcf.stats +104 -0
  111. data/ext/vcftools/examples/valid-4.1.vcf +37 -0
  112. data/ext/vcftools/extconf.rb +2 -0
  113. data/ext/vcftools/perl/.svn/all-wcprops +149 -0
  114. data/ext/vcftools/perl/.svn/entries +844 -0
  115. data/ext/vcftools/perl/.svn/prop-base/fill-aa.svn-base +5 -0
  116. data/ext/vcftools/perl/.svn/prop-base/fill-an-ac.svn-base +5 -0
  117. data/ext/vcftools/perl/.svn/prop-base/fill-ref-md5.svn-base +5 -0
  118. data/ext/vcftools/perl/.svn/prop-base/tab-to-vcf.svn-base +5 -0
  119. data/ext/vcftools/perl/.svn/prop-base/test.t.svn-base +5 -0
  120. data/ext/vcftools/perl/.svn/prop-base/vcf-annotate.svn-base +5 -0
  121. data/ext/vcftools/perl/.svn/prop-base/vcf-compare.svn-base +5 -0
  122. data/ext/vcftools/perl/.svn/prop-base/vcf-concat.svn-base +5 -0
  123. data/ext/vcftools/perl/.svn/prop-base/vcf-convert.svn-base +5 -0
  124. data/ext/vcftools/perl/.svn/prop-base/vcf-fix-newlines.svn-base +5 -0
  125. data/ext/vcftools/perl/.svn/prop-base/vcf-isec.svn-base +5 -0
  126. data/ext/vcftools/perl/.svn/prop-base/vcf-merge.svn-base +5 -0
  127. data/ext/vcftools/perl/.svn/prop-base/vcf-query.svn-base +5 -0
  128. data/ext/vcftools/perl/.svn/prop-base/vcf-shuffle-cols.svn-base +5 -0
  129. data/ext/vcftools/perl/.svn/prop-base/vcf-sort.svn-base +5 -0
  130. data/ext/vcftools/perl/.svn/prop-base/vcf-stats.svn-base +5 -0
  131. data/ext/vcftools/perl/.svn/prop-base/vcf-subset.svn-base +5 -0
  132. data/ext/vcftools/perl/.svn/prop-base/vcf-to-tab.svn-base +5 -0
  133. data/ext/vcftools/perl/.svn/prop-base/vcf-validator.svn-base +5 -0
  134. data/ext/vcftools/perl/.svn/text-base/ChangeLog.svn-base +84 -0
  135. data/ext/vcftools/perl/.svn/text-base/FaSlice.pm.svn-base +214 -0
  136. data/ext/vcftools/perl/.svn/text-base/Makefile.svn-base +12 -0
  137. data/ext/vcftools/perl/.svn/text-base/Vcf.pm.svn-base +2853 -0
  138. data/ext/vcftools/perl/.svn/text-base/VcfStats.pm.svn-base +681 -0
  139. data/ext/vcftools/perl/.svn/text-base/fill-aa.svn-base +103 -0
  140. data/ext/vcftools/perl/.svn/text-base/fill-an-ac.svn-base +56 -0
  141. data/ext/vcftools/perl/.svn/text-base/fill-ref-md5.svn-base +204 -0
  142. data/ext/vcftools/perl/.svn/text-base/tab-to-vcf.svn-base +92 -0
  143. data/ext/vcftools/perl/.svn/text-base/test.t.svn-base +376 -0
  144. data/ext/vcftools/perl/.svn/text-base/vcf-annotate.svn-base +1099 -0
  145. data/ext/vcftools/perl/.svn/text-base/vcf-compare.svn-base +1193 -0
  146. data/ext/vcftools/perl/.svn/text-base/vcf-concat.svn-base +310 -0
  147. data/ext/vcftools/perl/.svn/text-base/vcf-convert.svn-base +180 -0
  148. data/ext/vcftools/perl/.svn/text-base/vcf-fix-newlines.svn-base +97 -0
  149. data/ext/vcftools/perl/.svn/text-base/vcf-isec.svn-base +660 -0
  150. data/ext/vcftools/perl/.svn/text-base/vcf-merge.svn-base +577 -0
  151. data/ext/vcftools/perl/.svn/text-base/vcf-query.svn-base +272 -0
  152. data/ext/vcftools/perl/.svn/text-base/vcf-shuffle-cols.svn-base +89 -0
  153. data/ext/vcftools/perl/.svn/text-base/vcf-sort.svn-base +79 -0
  154. data/ext/vcftools/perl/.svn/text-base/vcf-stats.svn-base +160 -0
  155. data/ext/vcftools/perl/.svn/text-base/vcf-subset.svn-base +206 -0
  156. data/ext/vcftools/perl/.svn/text-base/vcf-to-tab.svn-base +112 -0
  157. data/ext/vcftools/perl/.svn/text-base/vcf-validator.svn-base +145 -0
  158. data/ext/vcftools/perl/ChangeLog +84 -0
  159. data/ext/vcftools/perl/FaSlice.pm +214 -0
  160. data/ext/vcftools/perl/Makefile +12 -0
  161. data/ext/vcftools/perl/Vcf.pm +2853 -0
  162. data/ext/vcftools/perl/VcfStats.pm +681 -0
  163. data/ext/vcftools/perl/fill-aa +103 -0
  164. data/ext/vcftools/perl/fill-an-ac +56 -0
  165. data/ext/vcftools/perl/fill-ref-md5 +204 -0
  166. data/ext/vcftools/perl/tab-to-vcf +92 -0
  167. data/ext/vcftools/perl/test.t +376 -0
  168. data/ext/vcftools/perl/vcf-annotate +1099 -0
  169. data/ext/vcftools/perl/vcf-compare +1193 -0
  170. data/ext/vcftools/perl/vcf-concat +310 -0
  171. data/ext/vcftools/perl/vcf-convert +180 -0
  172. data/ext/vcftools/perl/vcf-fix-newlines +97 -0
  173. data/ext/vcftools/perl/vcf-isec +660 -0
  174. data/ext/vcftools/perl/vcf-merge +577 -0
  175. data/ext/vcftools/perl/vcf-query +286 -0
  176. data/ext/vcftools/perl/vcf-shuffle-cols +89 -0
  177. data/ext/vcftools/perl/vcf-sort +79 -0
  178. data/ext/vcftools/perl/vcf-stats +160 -0
  179. data/ext/vcftools/perl/vcf-subset +206 -0
  180. data/ext/vcftools/perl/vcf-to-tab +112 -0
  181. data/ext/vcftools/perl/vcf-validator +145 -0
  182. data/ext/vcftools/website/.svn/all-wcprops +41 -0
  183. data/ext/vcftools/website/.svn/entries +238 -0
  184. data/ext/vcftools/website/.svn/prop-base/VCF-poster.pdf.svn-base +5 -0
  185. data/ext/vcftools/website/.svn/prop-base/favicon.ico.svn-base +5 -0
  186. data/ext/vcftools/website/.svn/prop-base/favicon.png.svn-base +5 -0
  187. data/ext/vcftools/website/.svn/text-base/Makefile.svn-base +6 -0
  188. data/ext/vcftools/website/.svn/text-base/README.svn-base +2 -0
  189. data/ext/vcftools/website/.svn/text-base/VCF-poster.pdf.svn-base +0 -0
  190. data/ext/vcftools/website/.svn/text-base/default.css.svn-base +250 -0
  191. data/ext/vcftools/website/.svn/text-base/favicon.ico.svn-base +0 -0
  192. data/ext/vcftools/website/.svn/text-base/favicon.png.svn-base +0 -0
  193. data/ext/vcftools/website/Makefile +6 -0
  194. data/ext/vcftools/website/README +2 -0
  195. data/ext/vcftools/website/VCF-poster.pdf +0 -0
  196. data/ext/vcftools/website/default.css +250 -0
  197. data/ext/vcftools/website/favicon.ico +0 -0
  198. data/ext/vcftools/website/favicon.png +0 -0
  199. data/ext/vcftools/website/img/.svn/all-wcprops +53 -0
  200. data/ext/vcftools/website/img/.svn/entries +300 -0
  201. data/ext/vcftools/website/img/.svn/prop-base/bg.gif.svn-base +5 -0
  202. data/ext/vcftools/website/img/.svn/prop-base/bgcode.gif.svn-base +5 -0
  203. data/ext/vcftools/website/img/.svn/prop-base/bgcontainer.gif.svn-base +5 -0
  204. data/ext/vcftools/website/img/.svn/prop-base/bgul.gif.svn-base +5 -0
  205. data/ext/vcftools/website/img/.svn/prop-base/header.gif.svn-base +5 -0
  206. data/ext/vcftools/website/img/.svn/prop-base/li.gif.svn-base +5 -0
  207. data/ext/vcftools/website/img/.svn/prop-base/quote.gif.svn-base +5 -0
  208. data/ext/vcftools/website/img/.svn/prop-base/search.gif.svn-base +5 -0
  209. data/ext/vcftools/website/img/.svn/text-base/bg.gif.svn-base +0 -0
  210. data/ext/vcftools/website/img/.svn/text-base/bgcode.gif.svn-base +0 -0
  211. data/ext/vcftools/website/img/.svn/text-base/bgcontainer.gif.svn-base +0 -0
  212. data/ext/vcftools/website/img/.svn/text-base/bgul.gif.svn-base +0 -0
  213. data/ext/vcftools/website/img/.svn/text-base/header.gif.svn-base +0 -0
  214. data/ext/vcftools/website/img/.svn/text-base/li.gif.svn-base +0 -0
  215. data/ext/vcftools/website/img/.svn/text-base/quote.gif.svn-base +0 -0
  216. data/ext/vcftools/website/img/.svn/text-base/search.gif.svn-base +0 -0
  217. data/ext/vcftools/website/img/bg.gif +0 -0
  218. data/ext/vcftools/website/img/bgcode.gif +0 -0
  219. data/ext/vcftools/website/img/bgcontainer.gif +0 -0
  220. data/ext/vcftools/website/img/bgul.gif +0 -0
  221. data/ext/vcftools/website/img/header.gif +0 -0
  222. data/ext/vcftools/website/img/li.gif +0 -0
  223. data/ext/vcftools/website/img/quote.gif +0 -0
  224. data/ext/vcftools/website/img/search.gif +0 -0
  225. data/ext/vcftools/website/src/.svn/all-wcprops +53 -0
  226. data/ext/vcftools/website/src/.svn/entries +300 -0
  227. data/ext/vcftools/website/src/.svn/text-base/docs.inc.svn-base +202 -0
  228. data/ext/vcftools/website/src/.svn/text-base/index.inc.svn-base +52 -0
  229. data/ext/vcftools/website/src/.svn/text-base/index.php.svn-base +80 -0
  230. data/ext/vcftools/website/src/.svn/text-base/license.inc.svn-base +27 -0
  231. data/ext/vcftools/website/src/.svn/text-base/links.inc.svn-base +13 -0
  232. data/ext/vcftools/website/src/.svn/text-base/options.inc.svn-base +654 -0
  233. data/ext/vcftools/website/src/.svn/text-base/perl_module.inc.svn-base +249 -0
  234. data/ext/vcftools/website/src/.svn/text-base/specs.inc.svn-base +18 -0
  235. data/ext/vcftools/website/src/docs.inc +202 -0
  236. data/ext/vcftools/website/src/index.inc +52 -0
  237. data/ext/vcftools/website/src/index.php +80 -0
  238. data/ext/vcftools/website/src/license.inc +27 -0
  239. data/ext/vcftools/website/src/links.inc +13 -0
  240. data/ext/vcftools/website/src/options.inc +654 -0
  241. data/ext/vcftools/website/src/perl_module.inc +249 -0
  242. data/ext/vcftools/website/src/specs.inc +18 -0
  243. data/lib/config.ru +9 -0
  244. data/lib/ngs_server/add.rb +9 -0
  245. data/lib/ngs_server/version.rb +1 -1
  246. data/lib/ngs_server.rb +55 -3
  247. data/ngs_server.gemspec +5 -2
  248. metadata +296 -6
@@ -0,0 +1,310 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # Author: petr.danecek@sanger
4
+ #
5
+
6
+ use strict;
7
+ use warnings;
8
+ use Carp;
9
+ use Vcf;
10
+
11
+ my $opts = parse_params();
12
+ if ( $$opts{check_columns} )
13
+ {
14
+ check_columns($opts);
15
+ }
16
+ elsif ( !exists($$opts{sort}) )
17
+ {
18
+ concat($opts);
19
+ }
20
+ else
21
+ {
22
+ concat_merge($opts);
23
+ }
24
+
25
+ exit;
26
+
27
+ #--------------------------------
28
+
29
+ sub error
30
+ {
31
+ my (@msg) = @_;
32
+ if ( scalar @msg )
33
+ {
34
+ croak @msg;
35
+ }
36
+ die
37
+ "About: Convenience tool for concatenating VCF files. In the basic mode it does not \n",
38
+ " do anything fancy except for a sanity check that all files have the same columns.\n",
39
+ " When run with the -s option, it will perform a partial merge sort, looking at\n",
40
+ " a limited number of open jobs simultaneously.\n",
41
+ "Usage: vcf-concat [OPTIONS] A.vcf.gz B.vcf.gz C.vcf.gz > out.vcf\n",
42
+ "Options:\n",
43
+ " -c, --check-columns Do not concatenate, only check if the columns agree.\n",
44
+ " -f, --files <file> Read the list of files from a file.\n",
45
+ " -s, --merge-sort <int> Allow small overlaps in N consecutive files.\n",
46
+ " -h, -?, --help This help message.\n",
47
+ "\n";
48
+ }
49
+
50
+ sub parse_params
51
+ {
52
+ my $opts = { files=>[] };
53
+ while (my $arg=shift(@ARGV))
54
+ {
55
+ if ( $arg eq '-s' || $arg eq '--merge-sort' ) { $$opts{sort}=shift(@ARGV); next; }
56
+ if ( $arg eq '-c' || $arg eq '--check-columns' ) { $$opts{check_columns}=1; next; }
57
+ if ( $arg eq '-f' || $arg eq '--files' )
58
+ {
59
+ my $files = shift(@ARGV);
60
+ open(my $fh,'<',$files) or error("$files: $!");
61
+ while (my $line=<$fh>)
62
+ {
63
+ chomp($line);
64
+ push @{$$opts{files}},$line;
65
+ }
66
+ close($fh);
67
+ next;
68
+ }
69
+ if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
70
+ if ( -e $arg ) { push @{$$opts{files}},$arg; next }
71
+ error("Unknown parameter \"$arg\". Run -h for help.\n");
72
+ }
73
+ if ( ! @{$$opts{files}} ) { error("No files to concat?\n") }
74
+ return $opts;
75
+ }
76
+
77
+ sub check_columns
78
+ {
79
+ my ($opts) = @_;
80
+ my @columns;
81
+ for my $file (@{$$opts{files}})
82
+ {
83
+ my $vcf = Vcf->new(file=>$file);
84
+ $vcf->parse_header();
85
+
86
+ if ( @columns )
87
+ {
88
+ if ( @columns != @{$$vcf{columns}} ) { warn("Different number of columns in [$file].\n"); }
89
+ for (my $i=0; $i<@columns; $i++)
90
+ {
91
+ if ( $$vcf{columns}[$i] ne $columns[$i] ) { warn("The column names do not agree in [$file].\n"); last; }
92
+ }
93
+ }
94
+ else
95
+ {
96
+ @columns = @{$$vcf{columns}};
97
+ }
98
+ $vcf->close();
99
+ }
100
+ }
101
+
102
+ sub concat
103
+ {
104
+ my ($opts) = @_;
105
+ my @columns;
106
+ for my $file (@{$$opts{files}})
107
+ {
108
+ my $vcf = Vcf->new(file=>$file);
109
+ $vcf->parse_header();
110
+
111
+ if ( @columns )
112
+ {
113
+ if ( @columns != @{$$vcf{columns}} ) { error("Different number of columns in [$file].\n"); }
114
+ for (my $i=0; $i<@columns; $i++)
115
+ {
116
+ if ( $$vcf{columns}[$i] ne $columns[$i] ) { error("The column names do not agree in [$file].\n"); }
117
+ }
118
+ }
119
+ else
120
+ {
121
+ @columns = @{$$vcf{columns}};
122
+ print $vcf->format_header();
123
+ }
124
+ while (my $line=$vcf->next_line())
125
+ {
126
+ print $line;
127
+ }
128
+ }
129
+ }
130
+
131
+ sub get_chromosomes
132
+ {
133
+ my ($files) = @_;
134
+ my @out;
135
+ my %has_chrm;
136
+ for my $file (@$files)
137
+ {
138
+ my $vcf = Vcf->new(file=>$file);
139
+ my $chrms = $vcf->get_chromosomes();
140
+ for my $chr (@$chrms)
141
+ {
142
+ if ( exists($has_chrm{$chr}) ) { next; }
143
+ $has_chrm{$chr} = 1;
144
+ push @out,$chr;
145
+ }
146
+ }
147
+ return \@out;
148
+ }
149
+
150
+ sub concat_merge
151
+ {
152
+ my ($opts) = @_;
153
+
154
+ my $header_printed = 0;
155
+ my $chroms = get_chromosomes($$opts{files});
156
+ for my $chr (@$chroms)
157
+ {
158
+ my $reader = Reader->new(files=>$$opts{files},nsort=>$$opts{sort},seq=>$chr,header_printed=>$header_printed);
159
+ $header_printed = 1;
160
+ $reader->open_next();
161
+ while (1)
162
+ {
163
+ my $line = $reader->next_line();
164
+ if ( !defined $line )
165
+ {
166
+ if ( !$reader->open_next() ) { last; }
167
+ next;
168
+ }
169
+ print $line;
170
+ }
171
+ }
172
+ if ( !$header_printed )
173
+ {
174
+ my $vcf = Vcf->new(file=>$$opts{files}[0]);
175
+ $vcf->parse_header();
176
+ print $vcf->format_header();
177
+ }
178
+ }
179
+
180
+ #---------------------------------
181
+
182
+ package Reader;
183
+
184
+ use strict;
185
+ use warnings;
186
+ use Carp;
187
+ use Vcf;
188
+
189
+ sub new
190
+ {
191
+ my ($class,@args) = @_;
192
+ my $self = @args ? {@args} : {};
193
+ bless $self, ref($class) || $class;
194
+ if ( !$$self{files} ) { $self->throw("Expected the files option.\n"); }
195
+ if ( !$$self{nsort} ) { $$self{nsort} = 2; }
196
+ if ( $$self{nsort}>@{$$self{files}} ) { $$self{nsort} = scalar @{$$self{files}}; }
197
+ $$self{idxs} = undef;
198
+ $$self{vcfs} = undef;
199
+ return $self;
200
+ }
201
+
202
+ sub throw
203
+ {
204
+ my ($self,@msg) = @_;
205
+ confess @msg;
206
+ }
207
+
208
+ sub print_header
209
+ {
210
+ my ($self,$vcf) = @_;
211
+ if ( $$self{header_printed} ) { return; }
212
+ print $vcf->format_header();
213
+ $$self{header_printed} = 1;
214
+ }
215
+
216
+
217
+ # Open VCF, parse header, check column names and when callled for the first time, output the VCF header.
218
+ sub open_vcf
219
+ {
220
+ my ($self,$file) = @_;
221
+ my $vcf = Vcf->new(file=>$file,region=>$$self{seq},print_header=>1);
222
+ $vcf->parse_header();
223
+ if ( !exists($$self{columns}) )
224
+ {
225
+ $$self{columns} = [ @{$$vcf{columns}} ];
226
+ }
227
+ else
228
+ {
229
+ if ( @{$$self{columns}} != @{$$vcf{columns}} ) { $self->throw("Different number of columns in [$file].\n"); }
230
+ for (my $i=0; $i<@{$$self{columns}}; $i++)
231
+ {
232
+ if ( $$vcf{columns}[$i] ne $$self{columns}[$i] ) { $self->throw("The column names do not agree in [$file].\n"); }
233
+ }
234
+ }
235
+ $self->print_header($vcf);
236
+ return $vcf;
237
+ }
238
+
239
+ sub open_next
240
+ {
241
+ my ($self) = @_;
242
+
243
+ if ( !defined $$self{idxs} )
244
+ {
245
+ for (my $i=0; $i<$$self{nsort}; $i++)
246
+ {
247
+ $$self{idxs}[$i] = $i;
248
+ }
249
+ }
250
+ else
251
+ {
252
+ my $prev = $$self{idxs}[-1];
253
+
254
+ shift(@{$$self{idxs}});
255
+ shift(@{$$self{vcfs}});
256
+
257
+ if ( $prev+1 < @{$$self{files}} )
258
+ {
259
+ # New file to be opened
260
+ push @{$$self{idxs}}, $prev+1;
261
+ }
262
+ }
263
+ for (my $i=0; $i<@{$$self{idxs}}; $i++)
264
+ {
265
+ if ( exists($$self{vcfs}[$i]) ) { next; }
266
+ my $idx = $$self{idxs}[$i];
267
+ $$self{vcfs}[$i] = $self->open_vcf($$self{files}[$idx]);
268
+ }
269
+ if ( !@{$$self{idxs}} ) { return 0; }
270
+ return 1;
271
+ }
272
+
273
+ sub next_line
274
+ {
275
+ my ($self) = @_;
276
+
277
+ my $min = $$self{vcfs}[0]->next_line();
278
+ if ( !defined $min ) { return undef; }
279
+
280
+ if ( !($min=~/^(\S+)\t(\d+)/) ) { $self->throw("Could not parse the line: $min\n"); }
281
+ my $min_chr = $1;
282
+ my $min_pos = $2;
283
+ my $min_vcf = $$self{vcfs}[0];
284
+
285
+ for (my $i=1; $i<@{$$self{vcfs}}; $i++)
286
+ {
287
+ if ( !exists($$self{vcfs}[$i]) ) { next; }
288
+ my $line = $$self{vcfs}[$i]->next_line();
289
+ if ( !defined $line ) { next; }
290
+ if ( !($line=~/^(\S+)\t(\d+)/) ) { $self->throw("Could not parse the line: $line\n"); }
291
+ my $chr = $1;
292
+ my $pos = $2;
293
+
294
+ if ( $chr ne $min_chr ) { $self->throw("FIXME: When run with the -s option, only one chromosome can be present.\n"); }
295
+ if ( $min_pos > $pos )
296
+ {
297
+ $min_pos = $pos;
298
+ $min_vcf->_unread_line($min);
299
+ $min_vcf = $$self{vcfs}[$i];
300
+ $min = $line;
301
+ }
302
+ else
303
+ {
304
+ $$self{vcfs}[$i]->_unread_line($line);
305
+ }
306
+ }
307
+ return $min;
308
+ }
309
+
310
+
@@ -0,0 +1,180 @@
1
+ #!/usr/bin/env perl
2
+
3
+ use strict;
4
+ use warnings;
5
+ use Carp;
6
+ use Vcf;
7
+ use FindBin;
8
+ use lib "$FindBin::Bin";
9
+ use FaSlice;
10
+
11
+ my $opts = parse_params();
12
+ convert_file($opts);
13
+
14
+ exit;
15
+
16
+ #--------------------------------
17
+
18
+ sub error
19
+ {
20
+ my (@msg) = @_;
21
+ if ( scalar @msg )
22
+ {
23
+ croak @msg;
24
+ }
25
+ die
26
+ "About: Convert between VCF versions, currently to VCFv4.0 only.\n",
27
+ "Usage: cat in.vcf | vcf-convert [OPTIONS] > out.vcf\n",
28
+ "Options:\n",
29
+ " -r, --refseq <file> The reference sequence in samtools faindexed fasta file. (Not required with SNPs only.)\n",
30
+ " -v, --version <string> 4.0\n",
31
+ " -h, -?, --help This help message.\n",
32
+ "\n";
33
+ }
34
+
35
+
36
+ sub parse_params
37
+ {
38
+ my $opts = { version=>'4.0' };
39
+ while (my $arg=shift(@ARGV))
40
+ {
41
+ if ( $arg eq '-r' || $arg eq '--refseq' ) { $$opts{refseq}=shift(@ARGV); next; }
42
+ if ( $arg eq '-v' || $arg eq '--version' ) { $$opts{version}=shift(@ARGV); next; }
43
+ if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
44
+ error("Unknown parameter \"$arg\". Run -h for help.\n");
45
+ }
46
+ return $opts;
47
+ }
48
+
49
+
50
+ sub convert_file
51
+ {
52
+ my ($opts) = @_;
53
+
54
+ if ( $$opts{version} ne '4.0' ) { error("Sorry, 4.0 is currently the only supported output version.\n"); }
55
+
56
+ # How to recognise a number
57
+ my $FLOAT_RE = qr/^\-?\d+\.?\d*(?:[eE][+-]\d+)?$/;
58
+
59
+ my $vcf_in = Vcf->new(fh=>\*STDIN);
60
+ my $vcf_out = Vcf->new(version=>'4.0');
61
+
62
+ # Convert the header
63
+ $vcf_in->parse_header();
64
+ for (my $i=1; $i<@{$$vcf_in{header_lines}}; $i++)
65
+ {
66
+ $vcf_out->add_header_line($$vcf_in{header_lines}[$i]);
67
+ }
68
+ $vcf_out->add_columns(@{$$vcf_in{columns}});
69
+ print $vcf_out->format_header();
70
+
71
+ # Convert each data line
72
+ my $fa;
73
+ while (my $x=$vcf_in->next_data_hash())
74
+ {
75
+ # Convert missing (default) FORMAT values
76
+ for my $gt (values %{$$x{gtypes}})
77
+ {
78
+ for my $field (@{$$x{FORMAT}})
79
+ {
80
+ # Skip the GT tag, so that ploidy information is not lost ("./." would become ".")
81
+ if ( $field eq 'GT' ) { next; }
82
+ if ( $field eq 'FT' && $$gt{$field} eq $$vcf_in{filter_passed} ) { $$gt{$field}=$$vcf_out{filter_passed}; }
83
+ if ( exists($$vcf_in{defaults}{$field}) && $$vcf_in{defaults}{$field} eq $$gt{$field} )
84
+ {
85
+ $$gt{$field} = $$vcf_out{defaults}{$field};
86
+ next;
87
+ }
88
+ if ( exists($$vcf_in{header}{FORMAT}{$field}{default}) && $$vcf_in{header}{FORMAT}{$field}{default} eq $$gt{$field} )
89
+ {
90
+ delete($$gt{$field});
91
+ next;
92
+ }
93
+ }
94
+ }
95
+
96
+
97
+ # Change missing QUAL: In case they are numbers, do numeric comparison, as -1.0 is sometimes used instead of -1
98
+ if ( $$x{QUAL} eq $$vcf_in{defaults}{QUAL}
99
+ or ($$x{QUAL}=~$FLOAT_RE && $$vcf_in{defaults}{QUAL}=~$FLOAT_RE && $$x{QUAL}==$$vcf_in{defaults}{QUAL}) )
100
+ {
101
+ $$x{QUAL} = $$vcf_out{defaults}{QUAL};
102
+ }
103
+ for (my $i=0; $i<@{$$x{FILTER}}; $i++)
104
+ {
105
+ if ( $$x{FILTER}[$i] eq $$vcf_in{filter_passed} ) { $$x{FILTER}[$i] = $$vcf_out{filter_passed}; }
106
+ }
107
+
108
+ # Parse the ALT column and see if there are indels
109
+ my $has_indel = 0;
110
+ for my $alt (@{$$x{ALT}})
111
+ {
112
+ my ($type,$len,$ht) = $vcf_in->event_type($x,$alt);
113
+ if ( $type eq 's' or $type eq 'r' ) { next; }
114
+ if ( $type ne 'i' ) { error("FIXME: expected indel at $$x{CHROM}:$$x{POS}\n"); }
115
+
116
+ $has_indel = 1;
117
+ }
118
+
119
+ # If there is an indel, new REF and ALT must be changed
120
+ if ( $has_indel )
121
+ {
122
+ my $map = {};
123
+ my $alt_to_mapref = {};
124
+
125
+ for my $alt (@{$$x{ALT}})
126
+ {
127
+ my ($type,$len,$ht) = $vcf_in->event_type($x,$alt);
128
+ if ( $type eq 's' or $type eq 'r' )
129
+ {
130
+ $$alt_to_mapref{$alt} = { ref=>$$x{REF}, alt=>$alt };
131
+ $$map{$$x{REF}}{$alt} = 1;
132
+ next;
133
+ }
134
+
135
+ if ( $type eq 'i' && $len>0 )
136
+ {
137
+ my $tmp = $$x{REF}.$ht;
138
+ $$alt_to_mapref{$alt} = { ref=>$$x{REF}, alt=>$tmp };
139
+ $$map{$$x{REF}}{$tmp} = 1;
140
+ next;
141
+ }
142
+ elsif ( $type eq 'i' && $len<0 )
143
+ {
144
+ if ( !$fa )
145
+ {
146
+ if ( !$$opts{refseq} ) { error("Indels present, missing the -r option.\n"); }
147
+ $fa = FaSlice->new(file=>$$opts{refseq},size=>1_000_000);
148
+ }
149
+ my $ref = $fa->get_slice($$x{CHROM},$$x{POS},$$x{POS}+abs($len));
150
+ my $first = substr($ref,0,1);
151
+
152
+ # Sanity check
153
+ if ( $$x{REF} ne $first )
154
+ {
155
+ error("Sanity check failed: the ref does not agree at $$x{CHROM}:$$x{POS} .. [$$x{REF}] in .fa, [$first] in .vcf\n");
156
+ }
157
+
158
+ $$alt_to_mapref{$alt} = { ref=>$ref, alt=>$$x{REF} };
159
+ $$map{$ref}{$$x{REF}} = 1;
160
+ next;
161
+ }
162
+ else
163
+ {
164
+ error("Uh, FIXME: $$x{CHROM}:$$x{POS} [$type] [$len] [$ht]\n");
165
+ }
166
+ }
167
+
168
+ $$x{REF} = $vcf_out->fill_ref_alt_mapping($map);
169
+ for (my $i=0; $i<@{$$x{ALT}}; $i++)
170
+ {
171
+ my $ori_ref = $$alt_to_mapref{$$x{ALT}[$i]}{ref};
172
+ my $ori_alt = $$alt_to_mapref{$$x{ALT}[$i]}{alt};
173
+ $$x{ALT}[$i] = $$map{$ori_ref}{$ori_alt};
174
+ }
175
+ }
176
+
177
+ print $vcf_out->format_line($x);
178
+ }
179
+ }
180
+
@@ -0,0 +1,97 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # Authors: Adam Auton, Petr Danecek
4
+ # (C) 2011
5
+
6
+ use strict;
7
+ use warnings;
8
+ use Carp;
9
+
10
+ my $opts = parse_params();
11
+ fix_file($opts);
12
+
13
+ exit;
14
+
15
+ #--------------------------------
16
+
17
+ sub error
18
+ {
19
+ my (@msg) = @_;
20
+ if ( scalar @msg ) { confess @msg; }
21
+ die
22
+ "About: Reads in a VCF file with any (commonly used) newline representation and outputs with the\n",
23
+ " current system's newline representation.\n",
24
+ "Usage: vcf-fix-newlines [OPTIONS]\n",
25
+ "Options:\n",
26
+ " -i, --info Report if the file is consistent with the current platform based.\n",
27
+ " -h, -?, --help This help message.\n",
28
+ "Example:\n",
29
+ " vcf-fix-newlines -i file.vcf\n",
30
+ " vcf-fix-newlines file.vcf.gz > out.vcf\n",
31
+ " cat file.vcf | vcf-fix-newlines > out.vcf\n",
32
+ "\n";
33
+ }
34
+
35
+ sub parse_params
36
+ {
37
+ my $opts = {};
38
+ while (defined(my $arg=shift(@ARGV)))
39
+ {
40
+ if ( $arg eq '-i' || $arg eq '--info' ) { $$opts{info}=1; next }
41
+ if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
42
+ if ( !exists($$opts{vcf}) && -e $arg ) { $$opts{vcf}=$arg; next; }
43
+ error("Unknown parameter \"$arg\". Run -h for help.\n");
44
+ }
45
+ return $opts;
46
+ }
47
+
48
+ sub fix_file
49
+ {
50
+ my ($opts) = @_;
51
+
52
+ my $fh = \*STDIN;
53
+ if ( $$opts{vcf} )
54
+ {
55
+ if ( $$opts{vcf}=~/\.gz/i )
56
+ {
57
+ open($fh,"gunzip -c $$opts{vcf} |") or error("gunzip -c $$opts{vcf}: $!\n");
58
+ }
59
+ else
60
+ {
61
+ open($fh,'<',$$opts{vcf}) or error("$$opts{vcf}: $!\n");
62
+ }
63
+ }
64
+
65
+ # Read a small 1kb sample
66
+ binmode $fh or error("binmode: $!");
67
+ local $/ = \1024;
68
+ my $buf = <$fh>;
69
+ if ( !defined $buf ) { error("No data read.\n"); }
70
+
71
+ # Check the origin
72
+ my ($in,$nl);
73
+ if ( $buf=~/\015\012/ ) { $in = 'Windows'; $nl=$&; }
74
+ elsif ( $buf=~/\015/ && !($buf=~/\012/) ) { $in = 'Old Mac'; $nl=$&; }
75
+ elsif ( $buf=~/\012/ && !($buf=~/\015/) ) { $in = 'UNIX'; $nl=$&; }
76
+ else
77
+ {
78
+ error("FIXME: Unable to determine the system which produced the file.\n");
79
+ }
80
+ if ( defined $in ) { warn("The file was generated on $in compatible system.\n"); }
81
+ if ( $$opts{info} ) { close($fh); return; }
82
+ if ( $nl eq "\n" ) { warn("No conversion needed.\n"); return; }
83
+
84
+ # Read the file and do the conversion
85
+ local $/ = $nl;
86
+ $buf .= <$fh>;
87
+ $buf =~ s/$nl/\n/g;
88
+ print $buf;
89
+
90
+ while($buf = <$fh>)
91
+ {
92
+ $buf =~ s/$nl/\n/g;
93
+ print $buf;
94
+ }
95
+ close($fh);
96
+ }
97
+