ngs_server 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (248) hide show
  1. data/bin/ngs_server +72 -50
  2. data/ext/bamtools/extconf.rb +3 -3
  3. data/ext/vcftools/Makefile +28 -0
  4. data/ext/vcftools/README.txt +36 -0
  5. data/ext/vcftools/cpp/.svn/all-wcprops +125 -0
  6. data/ext/vcftools/cpp/.svn/dir-prop-base +6 -0
  7. data/ext/vcftools/cpp/.svn/entries +708 -0
  8. data/ext/vcftools/cpp/.svn/text-base/Makefile.svn-base +46 -0
  9. data/ext/vcftools/cpp/.svn/text-base/dgeev.cpp.svn-base +146 -0
  10. data/ext/vcftools/cpp/.svn/text-base/dgeev.h.svn-base +43 -0
  11. data/ext/vcftools/cpp/.svn/text-base/output_log.cpp.svn-base +79 -0
  12. data/ext/vcftools/cpp/.svn/text-base/output_log.h.svn-base +34 -0
  13. data/ext/vcftools/cpp/.svn/text-base/parameters.cpp.svn-base +535 -0
  14. data/ext/vcftools/cpp/.svn/text-base/parameters.h.svn-base +154 -0
  15. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.cpp.svn-base +497 -0
  16. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.h.svn-base +190 -0
  17. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_getters.cpp.svn-base +421 -0
  18. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_setters.cpp.svn-base +482 -0
  19. data/ext/vcftools/cpp/.svn/text-base/vcf_file.cpp.svn-base +495 -0
  20. data/ext/vcftools/cpp/.svn/text-base/vcf_file.h.svn-base +184 -0
  21. data/ext/vcftools/cpp/.svn/text-base/vcf_file_diff.cpp.svn-base +1282 -0
  22. data/ext/vcftools/cpp/.svn/text-base/vcf_file_filters.cpp.svn-base +1215 -0
  23. data/ext/vcftools/cpp/.svn/text-base/vcf_file_format_convert.cpp.svn-base +1138 -0
  24. data/ext/vcftools/cpp/.svn/text-base/vcf_file_index.cpp.svn-base +171 -0
  25. data/ext/vcftools/cpp/.svn/text-base/vcf_file_output.cpp.svn-base +3012 -0
  26. data/ext/vcftools/cpp/.svn/text-base/vcftools.cpp.svn-base +107 -0
  27. data/ext/vcftools/cpp/.svn/text-base/vcftools.h.svn-base +25 -0
  28. data/ext/vcftools/cpp/Makefile +46 -0
  29. data/ext/vcftools/cpp/dgeev.cpp +146 -0
  30. data/ext/vcftools/cpp/dgeev.h +43 -0
  31. data/ext/vcftools/cpp/output_log.cpp +79 -0
  32. data/ext/vcftools/cpp/output_log.h +34 -0
  33. data/ext/vcftools/cpp/parameters.cpp +535 -0
  34. data/ext/vcftools/cpp/parameters.h +154 -0
  35. data/ext/vcftools/cpp/vcf_entry.cpp +497 -0
  36. data/ext/vcftools/cpp/vcf_entry.h +190 -0
  37. data/ext/vcftools/cpp/vcf_entry_getters.cpp +421 -0
  38. data/ext/vcftools/cpp/vcf_entry_setters.cpp +482 -0
  39. data/ext/vcftools/cpp/vcf_file.cpp +495 -0
  40. data/ext/vcftools/cpp/vcf_file.h +184 -0
  41. data/ext/vcftools/cpp/vcf_file_diff.cpp +1282 -0
  42. data/ext/vcftools/cpp/vcf_file_filters.cpp +1215 -0
  43. data/ext/vcftools/cpp/vcf_file_format_convert.cpp +1138 -0
  44. data/ext/vcftools/cpp/vcf_file_index.cpp +171 -0
  45. data/ext/vcftools/cpp/vcf_file_output.cpp +3012 -0
  46. data/ext/vcftools/cpp/vcftools.cpp +107 -0
  47. data/ext/vcftools/cpp/vcftools.h +25 -0
  48. data/ext/vcftools/examples/.svn/all-wcprops +185 -0
  49. data/ext/vcftools/examples/.svn/dir-prop-base +6 -0
  50. data/ext/vcftools/examples/.svn/entries +1048 -0
  51. data/ext/vcftools/examples/.svn/prop-base/perl-api-1.pl.svn-base +5 -0
  52. data/ext/vcftools/examples/.svn/text-base/annotate-test.vcf.svn-base +37 -0
  53. data/ext/vcftools/examples/.svn/text-base/annotate.out.svn-base +23 -0
  54. data/ext/vcftools/examples/.svn/text-base/annotate.txt.svn-base +7 -0
  55. data/ext/vcftools/examples/.svn/text-base/annotate2.out.svn-base +52 -0
  56. data/ext/vcftools/examples/.svn/text-base/annotate3.out.svn-base +23 -0
  57. data/ext/vcftools/examples/.svn/text-base/cmp-test-a-3.3.vcf.svn-base +12 -0
  58. data/ext/vcftools/examples/.svn/text-base/cmp-test-a.vcf.svn-base +12 -0
  59. data/ext/vcftools/examples/.svn/text-base/cmp-test-b-3.3.vcf.svn-base +12 -0
  60. data/ext/vcftools/examples/.svn/text-base/cmp-test-b.vcf.svn-base +12 -0
  61. data/ext/vcftools/examples/.svn/text-base/cmp-test.out.svn-base +53 -0
  62. data/ext/vcftools/examples/.svn/text-base/concat-a.vcf.svn-base +21 -0
  63. data/ext/vcftools/examples/.svn/text-base/concat-b.vcf.svn-base +13 -0
  64. data/ext/vcftools/examples/.svn/text-base/concat-c.vcf.svn-base +19 -0
  65. data/ext/vcftools/examples/.svn/text-base/concat.out.svn-base +39 -0
  66. data/ext/vcftools/examples/.svn/text-base/invalid-4.0.vcf.svn-base +31 -0
  67. data/ext/vcftools/examples/.svn/text-base/isec-n2-test.vcf.out.svn-base +19 -0
  68. data/ext/vcftools/examples/.svn/text-base/merge-test-a.vcf.svn-base +17 -0
  69. data/ext/vcftools/examples/.svn/text-base/merge-test-b.vcf.svn-base +17 -0
  70. data/ext/vcftools/examples/.svn/text-base/merge-test-c.vcf.svn-base +15 -0
  71. data/ext/vcftools/examples/.svn/text-base/merge-test.vcf.out.svn-base +31 -0
  72. data/ext/vcftools/examples/.svn/text-base/perl-api-1.pl.svn-base +46 -0
  73. data/ext/vcftools/examples/.svn/text-base/query-test.out.svn-base +6 -0
  74. data/ext/vcftools/examples/.svn/text-base/shuffle-test.vcf.svn-base +12 -0
  75. data/ext/vcftools/examples/.svn/text-base/subset.SNPs.out.svn-base +10 -0
  76. data/ext/vcftools/examples/.svn/text-base/subset.indels.out.svn-base +18 -0
  77. data/ext/vcftools/examples/.svn/text-base/subset.vcf.svn-base +21 -0
  78. data/ext/vcftools/examples/.svn/text-base/valid-3.3.vcf.svn-base +30 -0
  79. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.stats.svn-base +104 -0
  80. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.svn-base +34 -0
  81. data/ext/vcftools/examples/.svn/text-base/valid-4.1.vcf.svn-base +37 -0
  82. data/ext/vcftools/examples/annotate-test.vcf +37 -0
  83. data/ext/vcftools/examples/annotate.out +23 -0
  84. data/ext/vcftools/examples/annotate.txt +7 -0
  85. data/ext/vcftools/examples/annotate2.out +52 -0
  86. data/ext/vcftools/examples/annotate3.out +23 -0
  87. data/ext/vcftools/examples/cmp-test-a-3.3.vcf +12 -0
  88. data/ext/vcftools/examples/cmp-test-a.vcf +12 -0
  89. data/ext/vcftools/examples/cmp-test-b-3.3.vcf +12 -0
  90. data/ext/vcftools/examples/cmp-test-b.vcf +12 -0
  91. data/ext/vcftools/examples/cmp-test.out +53 -0
  92. data/ext/vcftools/examples/concat-a.vcf +21 -0
  93. data/ext/vcftools/examples/concat-b.vcf +13 -0
  94. data/ext/vcftools/examples/concat-c.vcf +19 -0
  95. data/ext/vcftools/examples/concat.out +39 -0
  96. data/ext/vcftools/examples/invalid-4.0.vcf +31 -0
  97. data/ext/vcftools/examples/isec-n2-test.vcf.out +19 -0
  98. data/ext/vcftools/examples/merge-test-a.vcf +17 -0
  99. data/ext/vcftools/examples/merge-test-b.vcf +17 -0
  100. data/ext/vcftools/examples/merge-test-c.vcf +15 -0
  101. data/ext/vcftools/examples/merge-test.vcf.out +31 -0
  102. data/ext/vcftools/examples/perl-api-1.pl +46 -0
  103. data/ext/vcftools/examples/query-test.out +6 -0
  104. data/ext/vcftools/examples/shuffle-test.vcf +12 -0
  105. data/ext/vcftools/examples/subset.SNPs.out +10 -0
  106. data/ext/vcftools/examples/subset.indels.out +18 -0
  107. data/ext/vcftools/examples/subset.vcf +21 -0
  108. data/ext/vcftools/examples/valid-3.3.vcf +30 -0
  109. data/ext/vcftools/examples/valid-4.0.vcf +34 -0
  110. data/ext/vcftools/examples/valid-4.0.vcf.stats +104 -0
  111. data/ext/vcftools/examples/valid-4.1.vcf +37 -0
  112. data/ext/vcftools/extconf.rb +2 -0
  113. data/ext/vcftools/perl/.svn/all-wcprops +149 -0
  114. data/ext/vcftools/perl/.svn/entries +844 -0
  115. data/ext/vcftools/perl/.svn/prop-base/fill-aa.svn-base +5 -0
  116. data/ext/vcftools/perl/.svn/prop-base/fill-an-ac.svn-base +5 -0
  117. data/ext/vcftools/perl/.svn/prop-base/fill-ref-md5.svn-base +5 -0
  118. data/ext/vcftools/perl/.svn/prop-base/tab-to-vcf.svn-base +5 -0
  119. data/ext/vcftools/perl/.svn/prop-base/test.t.svn-base +5 -0
  120. data/ext/vcftools/perl/.svn/prop-base/vcf-annotate.svn-base +5 -0
  121. data/ext/vcftools/perl/.svn/prop-base/vcf-compare.svn-base +5 -0
  122. data/ext/vcftools/perl/.svn/prop-base/vcf-concat.svn-base +5 -0
  123. data/ext/vcftools/perl/.svn/prop-base/vcf-convert.svn-base +5 -0
  124. data/ext/vcftools/perl/.svn/prop-base/vcf-fix-newlines.svn-base +5 -0
  125. data/ext/vcftools/perl/.svn/prop-base/vcf-isec.svn-base +5 -0
  126. data/ext/vcftools/perl/.svn/prop-base/vcf-merge.svn-base +5 -0
  127. data/ext/vcftools/perl/.svn/prop-base/vcf-query.svn-base +5 -0
  128. data/ext/vcftools/perl/.svn/prop-base/vcf-shuffle-cols.svn-base +5 -0
  129. data/ext/vcftools/perl/.svn/prop-base/vcf-sort.svn-base +5 -0
  130. data/ext/vcftools/perl/.svn/prop-base/vcf-stats.svn-base +5 -0
  131. data/ext/vcftools/perl/.svn/prop-base/vcf-subset.svn-base +5 -0
  132. data/ext/vcftools/perl/.svn/prop-base/vcf-to-tab.svn-base +5 -0
  133. data/ext/vcftools/perl/.svn/prop-base/vcf-validator.svn-base +5 -0
  134. data/ext/vcftools/perl/.svn/text-base/ChangeLog.svn-base +84 -0
  135. data/ext/vcftools/perl/.svn/text-base/FaSlice.pm.svn-base +214 -0
  136. data/ext/vcftools/perl/.svn/text-base/Makefile.svn-base +12 -0
  137. data/ext/vcftools/perl/.svn/text-base/Vcf.pm.svn-base +2853 -0
  138. data/ext/vcftools/perl/.svn/text-base/VcfStats.pm.svn-base +681 -0
  139. data/ext/vcftools/perl/.svn/text-base/fill-aa.svn-base +103 -0
  140. data/ext/vcftools/perl/.svn/text-base/fill-an-ac.svn-base +56 -0
  141. data/ext/vcftools/perl/.svn/text-base/fill-ref-md5.svn-base +204 -0
  142. data/ext/vcftools/perl/.svn/text-base/tab-to-vcf.svn-base +92 -0
  143. data/ext/vcftools/perl/.svn/text-base/test.t.svn-base +376 -0
  144. data/ext/vcftools/perl/.svn/text-base/vcf-annotate.svn-base +1099 -0
  145. data/ext/vcftools/perl/.svn/text-base/vcf-compare.svn-base +1193 -0
  146. data/ext/vcftools/perl/.svn/text-base/vcf-concat.svn-base +310 -0
  147. data/ext/vcftools/perl/.svn/text-base/vcf-convert.svn-base +180 -0
  148. data/ext/vcftools/perl/.svn/text-base/vcf-fix-newlines.svn-base +97 -0
  149. data/ext/vcftools/perl/.svn/text-base/vcf-isec.svn-base +660 -0
  150. data/ext/vcftools/perl/.svn/text-base/vcf-merge.svn-base +577 -0
  151. data/ext/vcftools/perl/.svn/text-base/vcf-query.svn-base +272 -0
  152. data/ext/vcftools/perl/.svn/text-base/vcf-shuffle-cols.svn-base +89 -0
  153. data/ext/vcftools/perl/.svn/text-base/vcf-sort.svn-base +79 -0
  154. data/ext/vcftools/perl/.svn/text-base/vcf-stats.svn-base +160 -0
  155. data/ext/vcftools/perl/.svn/text-base/vcf-subset.svn-base +206 -0
  156. data/ext/vcftools/perl/.svn/text-base/vcf-to-tab.svn-base +112 -0
  157. data/ext/vcftools/perl/.svn/text-base/vcf-validator.svn-base +145 -0
  158. data/ext/vcftools/perl/ChangeLog +84 -0
  159. data/ext/vcftools/perl/FaSlice.pm +214 -0
  160. data/ext/vcftools/perl/Makefile +12 -0
  161. data/ext/vcftools/perl/Vcf.pm +2853 -0
  162. data/ext/vcftools/perl/VcfStats.pm +681 -0
  163. data/ext/vcftools/perl/fill-aa +103 -0
  164. data/ext/vcftools/perl/fill-an-ac +56 -0
  165. data/ext/vcftools/perl/fill-ref-md5 +204 -0
  166. data/ext/vcftools/perl/tab-to-vcf +92 -0
  167. data/ext/vcftools/perl/test.t +376 -0
  168. data/ext/vcftools/perl/vcf-annotate +1099 -0
  169. data/ext/vcftools/perl/vcf-compare +1193 -0
  170. data/ext/vcftools/perl/vcf-concat +310 -0
  171. data/ext/vcftools/perl/vcf-convert +180 -0
  172. data/ext/vcftools/perl/vcf-fix-newlines +97 -0
  173. data/ext/vcftools/perl/vcf-isec +660 -0
  174. data/ext/vcftools/perl/vcf-merge +577 -0
  175. data/ext/vcftools/perl/vcf-query +286 -0
  176. data/ext/vcftools/perl/vcf-shuffle-cols +89 -0
  177. data/ext/vcftools/perl/vcf-sort +79 -0
  178. data/ext/vcftools/perl/vcf-stats +160 -0
  179. data/ext/vcftools/perl/vcf-subset +206 -0
  180. data/ext/vcftools/perl/vcf-to-tab +112 -0
  181. data/ext/vcftools/perl/vcf-validator +145 -0
  182. data/ext/vcftools/website/.svn/all-wcprops +41 -0
  183. data/ext/vcftools/website/.svn/entries +238 -0
  184. data/ext/vcftools/website/.svn/prop-base/VCF-poster.pdf.svn-base +5 -0
  185. data/ext/vcftools/website/.svn/prop-base/favicon.ico.svn-base +5 -0
  186. data/ext/vcftools/website/.svn/prop-base/favicon.png.svn-base +5 -0
  187. data/ext/vcftools/website/.svn/text-base/Makefile.svn-base +6 -0
  188. data/ext/vcftools/website/.svn/text-base/README.svn-base +2 -0
  189. data/ext/vcftools/website/.svn/text-base/VCF-poster.pdf.svn-base +0 -0
  190. data/ext/vcftools/website/.svn/text-base/default.css.svn-base +250 -0
  191. data/ext/vcftools/website/.svn/text-base/favicon.ico.svn-base +0 -0
  192. data/ext/vcftools/website/.svn/text-base/favicon.png.svn-base +0 -0
  193. data/ext/vcftools/website/Makefile +6 -0
  194. data/ext/vcftools/website/README +2 -0
  195. data/ext/vcftools/website/VCF-poster.pdf +0 -0
  196. data/ext/vcftools/website/default.css +250 -0
  197. data/ext/vcftools/website/favicon.ico +0 -0
  198. data/ext/vcftools/website/favicon.png +0 -0
  199. data/ext/vcftools/website/img/.svn/all-wcprops +53 -0
  200. data/ext/vcftools/website/img/.svn/entries +300 -0
  201. data/ext/vcftools/website/img/.svn/prop-base/bg.gif.svn-base +5 -0
  202. data/ext/vcftools/website/img/.svn/prop-base/bgcode.gif.svn-base +5 -0
  203. data/ext/vcftools/website/img/.svn/prop-base/bgcontainer.gif.svn-base +5 -0
  204. data/ext/vcftools/website/img/.svn/prop-base/bgul.gif.svn-base +5 -0
  205. data/ext/vcftools/website/img/.svn/prop-base/header.gif.svn-base +5 -0
  206. data/ext/vcftools/website/img/.svn/prop-base/li.gif.svn-base +5 -0
  207. data/ext/vcftools/website/img/.svn/prop-base/quote.gif.svn-base +5 -0
  208. data/ext/vcftools/website/img/.svn/prop-base/search.gif.svn-base +5 -0
  209. data/ext/vcftools/website/img/.svn/text-base/bg.gif.svn-base +0 -0
  210. data/ext/vcftools/website/img/.svn/text-base/bgcode.gif.svn-base +0 -0
  211. data/ext/vcftools/website/img/.svn/text-base/bgcontainer.gif.svn-base +0 -0
  212. data/ext/vcftools/website/img/.svn/text-base/bgul.gif.svn-base +0 -0
  213. data/ext/vcftools/website/img/.svn/text-base/header.gif.svn-base +0 -0
  214. data/ext/vcftools/website/img/.svn/text-base/li.gif.svn-base +0 -0
  215. data/ext/vcftools/website/img/.svn/text-base/quote.gif.svn-base +0 -0
  216. data/ext/vcftools/website/img/.svn/text-base/search.gif.svn-base +0 -0
  217. data/ext/vcftools/website/img/bg.gif +0 -0
  218. data/ext/vcftools/website/img/bgcode.gif +0 -0
  219. data/ext/vcftools/website/img/bgcontainer.gif +0 -0
  220. data/ext/vcftools/website/img/bgul.gif +0 -0
  221. data/ext/vcftools/website/img/header.gif +0 -0
  222. data/ext/vcftools/website/img/li.gif +0 -0
  223. data/ext/vcftools/website/img/quote.gif +0 -0
  224. data/ext/vcftools/website/img/search.gif +0 -0
  225. data/ext/vcftools/website/src/.svn/all-wcprops +53 -0
  226. data/ext/vcftools/website/src/.svn/entries +300 -0
  227. data/ext/vcftools/website/src/.svn/text-base/docs.inc.svn-base +202 -0
  228. data/ext/vcftools/website/src/.svn/text-base/index.inc.svn-base +52 -0
  229. data/ext/vcftools/website/src/.svn/text-base/index.php.svn-base +80 -0
  230. data/ext/vcftools/website/src/.svn/text-base/license.inc.svn-base +27 -0
  231. data/ext/vcftools/website/src/.svn/text-base/links.inc.svn-base +13 -0
  232. data/ext/vcftools/website/src/.svn/text-base/options.inc.svn-base +654 -0
  233. data/ext/vcftools/website/src/.svn/text-base/perl_module.inc.svn-base +249 -0
  234. data/ext/vcftools/website/src/.svn/text-base/specs.inc.svn-base +18 -0
  235. data/ext/vcftools/website/src/docs.inc +202 -0
  236. data/ext/vcftools/website/src/index.inc +52 -0
  237. data/ext/vcftools/website/src/index.php +80 -0
  238. data/ext/vcftools/website/src/license.inc +27 -0
  239. data/ext/vcftools/website/src/links.inc +13 -0
  240. data/ext/vcftools/website/src/options.inc +654 -0
  241. data/ext/vcftools/website/src/perl_module.inc +249 -0
  242. data/ext/vcftools/website/src/specs.inc +18 -0
  243. data/lib/config.ru +9 -0
  244. data/lib/ngs_server/add.rb +9 -0
  245. data/lib/ngs_server/version.rb +1 -1
  246. data/lib/ngs_server.rb +55 -3
  247. data/ngs_server.gemspec +5 -2
  248. metadata +296 -6
@@ -0,0 +1,286 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # Author: petr.danecek@sanger
4
+ #
5
+
6
+ use strict;
7
+ use warnings;
8
+ use Carp;
9
+ use Vcf;
10
+
11
+ my $opts = parse_params();
12
+ read_data($opts);
13
+
14
+ exit;
15
+
16
+ #--------------------------------
17
+
18
+ sub error
19
+ {
20
+ my (@msg) = @_;
21
+ if ( scalar @msg ) { confess @msg; }
22
+ die
23
+ "Usage: query-vcf [OPTIONS] file.vcf.gz\n",
24
+ "Options:\n",
25
+ " -c, --columns <NA001,NA002,..> List of comma-separated column names.\n",
26
+ " -f, --format <string> The default is '%CHROM:%POS\\t%REF[\\t%SAMPLE=%GT]\\n'\n",
27
+ " -l, --list-columns List columns.\n",
28
+ " -r, --region chr:from-to Retrieve the region. (Runs tabix.)\n",
29
+ " -h, -?, --help This help message.\n",
30
+ "Examples:\n",
31
+ " query-vcf file.vcf.gz 1:1000-2000 -c NA001,NA002,NA003\n",
32
+ " query-vcf file.vcf.gz -r 1:1000-2000 -f '%CHROM:%POS\\t%REF\\t%ALT[\\t%SAMPLE:%*=,]\\n'\n",
33
+ " query-vcf file.vcf.gz -f '[%GT\\t]%LINE\\n'\n",
34
+ " query-vcf file.vcf.gz -f '%CHROM\\t%POS\\t%INFO/DP\\t%FILTER\\n'\n",
35
+ "\n";
36
+ }
37
+
38
+
39
+ sub parse_params
40
+ {
41
+ my $opts = { columns=>'', format_string=>"%CHROM:%POS\t%REF[\t%SAMPLE=%GT]\n" };
42
+ while (my $arg=shift(@ARGV))
43
+ {
44
+ if ( $arg eq '-f' || $arg eq '--format' ) { $$opts{format_string}=shift(@ARGV); next }
45
+ if ( $arg eq '-c' || $arg eq '--columns' ) { $$opts{columns}=shift(@ARGV); next }
46
+ if ( $arg eq '-l' || $arg eq '--list-columns' ) { $$opts{list_columns}=1; next }
47
+ if ( $arg eq '-r' || $arg eq '--region' ) { $$opts{region}=shift(@ARGV); next }
48
+ if ( -e $arg or $arg=~m{^(?:ftp|http)://} ) { $$opts{file}=$arg; next; }
49
+ if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
50
+ if ( !exists($$opts{region}) && exists($$opts{file}) && ($arg=~/^[^:]+:[0-9,]+-[0-9,]+$/ or $arg=~/^[^\:]+$/) ) { $$opts{region}=$arg; next; }
51
+ error("Unknown parameter or non-existent file \"$arg\". Run -h for help.\n");
52
+ }
53
+ if ( !exists($$opts{file}) && exists($$opts{region}) ) { error("The region cannot be used when streaming the file.\n"); }
54
+ return $opts;
55
+ }
56
+
57
+ sub parse_format_string
58
+ {
59
+ my ($str,$hash) = @_;
60
+ my (@arr,%idx,$join1,$join2);
61
+ $str =~ s/\\n/\n/g;
62
+ $str =~ s/\\t/\t/g;
63
+ while ($str)
64
+ {
65
+ if ( !($str=~/%/) )
66
+ {
67
+ push @arr,$str;
68
+ last;
69
+ }
70
+
71
+ my $before = $`;
72
+ $str = $';
73
+
74
+ my $match;
75
+ if ( $str=~/^[*](.)(.)/ )
76
+ {
77
+ $match = '*'; $join1=$1; $join2=$2;
78
+ }
79
+ elsif ( $str=~m{([A-Za-z0-9/]+)} )
80
+ {
81
+ $match = $1;
82
+ }
83
+ else { error("FIXME: $str"); }
84
+
85
+ if ( $before ) { push @arr,$before; }
86
+ push @arr,'.'; # If the tag is not present in the VCF, a missing value ('.') will be printed instead.
87
+ $idx{$match} = $#arr;
88
+ $str = $';
89
+ }
90
+ $$hash{format} = \@arr;
91
+ $$hash{idx} = \%idx;
92
+ $$hash{join1} = $join1;
93
+ $$hash{join2} = $join2;
94
+ }
95
+
96
+ sub parse_format
97
+ {
98
+ my ($opts,$cols) = @_;
99
+
100
+ $$opts{before} = {};
101
+ $$opts{repeat} = {};
102
+ $$opts{after} = {};
103
+
104
+ my ($before,$repeat,$after);
105
+
106
+ my $str = $$opts{format_string};
107
+ $before = $str;
108
+
109
+ if ( $str=~/\[([^\]]+)\]/ )
110
+ {
111
+ $before = $`;
112
+ $repeat = $1;
113
+ $after = $';
114
+ }
115
+ if ( $before ) { parse_format_string($before,$$opts{before}); }
116
+ if ( $repeat ) { parse_format_string($repeat,$$opts{repeat}); }
117
+ if ( $after ) { parse_format_string($after,$$opts{after}); }
118
+ }
119
+
120
+ sub copy_array
121
+ {
122
+ my ($arr) = @_;
123
+ my @out;
124
+ for my $item (@$arr) { push @out,$item; }
125
+ return @out;
126
+ }
127
+
128
+ sub get_columns
129
+ {
130
+ my ($vcf) = @_;
131
+ my @cols = ();
132
+ my $ncols = @{$$vcf{columns}};
133
+ for (my $i=9; $i<$ncols; $i++)
134
+ {
135
+ push @cols, $$vcf{columns}[$i];
136
+ }
137
+ return \@cols;
138
+ }
139
+
140
+ sub list_columns
141
+ {
142
+ my ($opts) = @_;
143
+ my $cols = get_columns($$opts{vcf});
144
+ for my $col (@$cols) { print "$col\n"; }
145
+ }
146
+
147
+ sub read_data
148
+ {
149
+ my ($opts) = @_;
150
+
151
+ my %args = ( print_header=>1 );
152
+ if ( $$opts{region} ) { $args{region} = $$opts{region}; }
153
+ if ( exists($$opts{file}) ) { $args{file} = $$opts{file}; }
154
+ else { $args{fh} = \*STDIN; }
155
+
156
+ my $vcf = Vcf->new(%args);
157
+ $$opts{vcf} = $vcf;
158
+ $vcf->parse_header();
159
+
160
+ if ( $$opts{list_columns} ) { list_columns($opts); exit; }
161
+
162
+ my @cols = split(/,/,$$opts{columns});
163
+ if ( !@cols ) { @cols = @{get_columns($$opts{vcf})}; }
164
+
165
+
166
+ # The hash opts will be filled with the keys 'before','repeat','after' with formatting information
167
+ parse_format($opts);
168
+
169
+ while (my $line=$vcf->next_line())
170
+ {
171
+ my $x=$vcf->next_data_hash($line);
172
+
173
+ # Fill everything what comes before the repeat []
174
+ # Code repetition and not very nice, should be changed at some point...
175
+ if ( $$opts{before} )
176
+ {
177
+ my (@out) = copy_array($$opts{before}{format});
178
+
179
+ while (my ($colname,$idx) = each %{$$opts{before}{idx}})
180
+ {
181
+ if ( $colname eq 'LINE' ) { chomp($line); $out[$idx] = "$line"; next; }
182
+ if ( $colname eq 'ALT' ) { $out[$idx] = join(',',@{$$x{ALT}}); next; }
183
+ if ( $colname eq 'FILTER' ) { $out[$idx] = join(';',@{$$x{FILTER}}); next; }
184
+
185
+ if ( $colname=~m{INFO/(.+)} )
186
+ {
187
+ if ( exists($$x{INFO}{$1}) && !defined($$x{INFO}{$1}) )
188
+ {
189
+ # It is a flag
190
+ $out[$idx] = 'True';
191
+ }
192
+ else
193
+ {
194
+ $out[$idx] = $$x{INFO}{$1};
195
+ }
196
+ next;
197
+ }
198
+ if ( exists($$x{$colname}) ) { $out[$idx] = $$x{$colname}; }
199
+ }
200
+ for (my $i=0; $i<@out; $i++) {
201
+ if (!defined($out[$i])) { $out[$i]='.'; }
202
+ if( 'HASH' eq ref $out[$i] ) {
203
+ my $info_str = "{";
204
+ while( my ($k, $v) = each %{$out[$i]} ) {
205
+ if ($info_str ne "{") {$info_str .= ',';}
206
+ $info_str .= '"' . $k . '"' . ":" . '"' . $v . '"';
207
+ #print "key: $k, value: $v.\n";
208
+ }
209
+ $out[$i] = $info_str . "}";
210
+ }
211
+ }
212
+ print join('',@out);
213
+ }
214
+
215
+ # Fill the repeaty stuff (the sample columns)
216
+ if ( $$opts{repeat} )
217
+ {
218
+ for my $col (@cols)
219
+ {
220
+ my ($alleles,$seps,$is_phased,$is_empty) = $vcf->parse_haplotype($x,$col);
221
+ my (@out) = copy_array($$opts{repeat}{format});
222
+ while (my ($colname,$idx) = each %{$$opts{repeat}{idx}})
223
+ {
224
+ if ( exists($$x{gtypes}{$col}{$colname}) ) { $out[$idx] = $$x{gtypes}{$col}{$colname}; }
225
+ elsif ( exists($$x{$colname}) ) { $out[$idx] = $$x{$colname}; }
226
+ }
227
+ if ( exists($$opts{repeat}{idx}{SAMPLE}) ) { $out[$$opts{repeat}{idx}{SAMPLE}] = $col; }
228
+ if ( exists($$opts{repeat}{idx}{GT}) )
229
+ {
230
+ my $tmp = $$alleles[0];
231
+ for (my $i=0; $i<@$seps; $i++) { $tmp .= $$seps[$i].$$alleles[$i+1]; }
232
+ $out[$$opts{repeat}{idx}{GT}] = $tmp;
233
+ }
234
+ if ( exists($$opts{repeat}{idx}{'*'}) )
235
+ {
236
+ my $sep1 = $$opts{repeat}{join1};
237
+ my $sep2 = $$opts{repeat}{join2};
238
+ my @tmp;
239
+ while (my ($key,$value)=each(%{$$x{gtypes}{$col}}))
240
+ {
241
+ if ( $key eq 'GT' )
242
+ {
243
+ $value = $$alleles[0];
244
+ for (my $i=0; $i<@$seps; $i++) { $value .= $$seps[$i].$$alleles[$i+1]; }
245
+ }
246
+ push @tmp, $key.$sep1.$value;
247
+ }
248
+ my $idx = $$opts{repeat}{idx}{'*'};
249
+ $out[$idx] = join($sep2,@tmp);
250
+ }
251
+ for (my $i=0; $i<@out; $i++) { if (!defined($out[$i])) { $out[$i]='.'; } }
252
+ print join('',@out);
253
+ }
254
+ }
255
+
256
+ # Fill everything what comes after the repeat ([])
257
+ if ( $$opts{after} )
258
+ {
259
+ my (@out) = copy_array($$opts{after}{format});
260
+ while (my ($colname,$idx) = each %{$$opts{after}{idx}})
261
+ {
262
+ if ( $colname eq 'LINE' ) { chomp($line); $out[$idx] = $line; next; }
263
+ if ( $colname eq 'ALT' ) { $out[$idx] = join(',',@{$$x{ALT}}); next; }
264
+ if ( $colname eq 'FILTER' ) { $out[$idx] = join(';',@{$$x{FILTER}}); next; }
265
+ if ( $colname=~m{INFO/(.+)} )
266
+ {
267
+ if ( exists($$x{INFO}{$1}) && !defined($$x{INFO}{$1}) )
268
+ {
269
+ # It is a flag
270
+ $out[$idx] = 'True';
271
+ }
272
+ else
273
+ {
274
+ $out[$idx] = $$x{INFO}{$1};
275
+ }
276
+ next;
277
+ }
278
+ if ( exists($$x{$colname}) ) { $out[$idx] = $$x{$colname}; }
279
+ }
280
+ for (my $i=0; $i<@out; $i++) { if (!defined($out[$i])) { $out[$i]='.'; } }
281
+ print join('',@out);
282
+ }
283
+ }
284
+ }
285
+
286
+
@@ -0,0 +1,89 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # Author: petr.danecek@sanger
4
+ #
5
+
6
+ use strict;
7
+ use warnings;
8
+ use Carp;
9
+ use Vcf;
10
+
11
+ my $opts = parse_params();
12
+ concat($opts);
13
+
14
+ exit;
15
+
16
+ #--------------------------------
17
+
18
+ sub error
19
+ {
20
+ my (@msg) = @_;
21
+ if ( scalar @msg )
22
+ {
23
+ croak @msg;
24
+ }
25
+ die
26
+ "About: Reorder columns to match the order in the template VCF.\n",
27
+ "Usage: vcf-shuffle-cols [OPTIONS] -t template.vcf.gz file.vcf.gz > out.vcf\n",
28
+ "Options:\n",
29
+ " -t, --template <file> The file with the correct order of the columns.\n",
30
+ " -h, -?, --help This help message.\n",
31
+ "\n";
32
+ }
33
+
34
+ sub parse_params
35
+ {
36
+ my $opts = {};
37
+ while (my $arg=shift(@ARGV))
38
+ {
39
+ if ( $arg eq '-t' || $arg eq '--template' ) { $$opts{template}=shift(@ARGV); next; }
40
+ if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
41
+ if ( -e $arg ) { $$opts{file}=$arg; next }
42
+ error("Unknown parameter \"$arg\". Run -h for help.\n");
43
+ }
44
+ if ( !exists($$opts{template}) ) { error("Missing the -t option.\n"); }
45
+ return $opts;
46
+ }
47
+
48
+ sub concat
49
+ {
50
+ my ($opts) = @_;
51
+ my $tmpl = Vcf->new(file=>$$opts{template});
52
+ $tmpl->parse_header();
53
+ $tmpl->close();
54
+
55
+ my $vcf = $$opts{file} ? Vcf->new(file=>$$opts{file}) : Vcf->new(fh=>\*STDIN);
56
+ $vcf->parse_header();
57
+
58
+ # Check if one-to-one correspondence can be found and create a mapping
59
+ my @new_to_old = ();
60
+ for my $tcol (@{$$tmpl{columns}})
61
+ {
62
+ if ( !exists($$vcf{has_column}{$tcol}) ) { error("TODO: the column names do not match\n"); }
63
+ }
64
+ for my $vcol (@{$$vcf{columns}})
65
+ {
66
+ if ( !exists($$tmpl{has_column}{$vcol}) ) { error("TODO: the column names do not match\n"); }
67
+ my $new = $$tmpl{has_column}{$vcol} - 1;
68
+ my $old = $$vcf{has_column}{$vcol} - 1;
69
+ $new_to_old[$new] = $old;
70
+ }
71
+
72
+
73
+ # Output the header with modified column order
74
+ my $ncols = @{$$tmpl{columns}} - 1;
75
+ my @cols = @{$$tmpl{columns}}[9..$ncols];
76
+ print $vcf->format_header(\@cols);
77
+
78
+ while (my $x=$vcf->next_data_array())
79
+ {
80
+ print $$x[0];
81
+ for (my $i=1; $i<=$ncols; $i++)
82
+ {
83
+ my $idx = $new_to_old[$i];
84
+ print "\t".$$x[$idx];
85
+ }
86
+ print "\n";
87
+ }
88
+ }
89
+
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # Author: petr.danecek@sanger
4
+ #
5
+
6
+ use strict;
7
+ use warnings;
8
+ use Carp;
9
+
10
+ my $opts = parse_params();
11
+ sort_vcf($opts);
12
+
13
+ exit;
14
+
15
+ #--------------------------------
16
+
17
+ sub error
18
+ {
19
+ my (@msg) = @_;
20
+ if ( scalar @msg )
21
+ {
22
+ croak @msg;
23
+ }
24
+ die
25
+ "Usage: vcf-sort > out.vcf\n",
26
+ " cat file.vcf | vcf-sort > out.vcf\n",
27
+ "Options:\n",
28
+ " -h, -?, --help This help message.\n",
29
+ "\n";
30
+ }
31
+
32
+ sub parse_params
33
+ {
34
+ my $opts = {};
35
+ while (my $arg=shift(@ARGV))
36
+ {
37
+ if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
38
+ if ( -e $arg ) { $$opts{file}=$arg; next }
39
+ error("Unknown parameter \"$arg\". Run -h for help.\n");
40
+ }
41
+ return $opts;
42
+ }
43
+
44
+ sub sort_vcf
45
+ {
46
+ my ($opts) = @_;
47
+
48
+ my $fh;
49
+ if ( exists($$opts{file}) )
50
+ {
51
+ if ( $$opts{file}=~/\.gz$/i )
52
+ {
53
+ open($fh,"gunzip -c $$opts{file} |") or error("$$opts{file}: $!");
54
+ }
55
+ else
56
+ {
57
+ open($fh,'<',$$opts{file}) or error("$$opts{file}: $!");
58
+ }
59
+ }
60
+ else { $fh = *STDIN; }
61
+
62
+ my $cmd = 'sort -k 1,1d -k 2,2n';
63
+ open(my $sort_fh,"| $cmd") or error("$cmd: $!");
64
+
65
+ my $unflushed = select(STDOUT);
66
+ $| = 1;
67
+ while (my $line=<$fh>)
68
+ {
69
+ if ( $line=~/^#/ ) { print $line; next; }
70
+ print $sort_fh $line;
71
+ last;
72
+ }
73
+ select($unflushed);
74
+ while (my $line=<$fh>)
75
+ {
76
+ print $sort_fh $line;
77
+ }
78
+ }
79
+
@@ -0,0 +1,160 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # Author: petr.danecek@sanger
4
+ #
5
+
6
+ use strict;
7
+ use warnings;
8
+ use Carp;
9
+ use VcfStats;
10
+
11
+ my $opts = parse_params();
12
+ vcf_stats($opts);
13
+
14
+ exit;
15
+
16
+ #--------------------------------
17
+
18
+ sub error
19
+ {
20
+ my (@msg) = @_;
21
+ if ( scalar @msg )
22
+ {
23
+ croak @msg;
24
+ }
25
+ die
26
+ "Usage: vcf-stats [OPTIONS] file.vcf.gz\n",
27
+ "Options:\n",
28
+ " -d, --dump <file> Take an existing dump file and recreate the files (works with -p)\n",
29
+ " -f, --filters <filter1,filter2> List of filters such as column/field (any value), column/field=bin:max (cluster in bins),column/field=value (exact value)\n",
30
+ " -p, --prefix <dir/string> Prefix of output files. If slashes are present, directories will be created.\n",
31
+ " -s, --samples <list> Process only the listed samples, - for none. Excluding unwanted samples may increase performance considerably.\n",
32
+ " -h, -?, --help This help message.\n",
33
+ "\n",
34
+ "Examples:\n",
35
+ " # Calculate stats separately for the filter field, quality and non-indels\n",
36
+ " vcf-stats file.vcf.gz -f FILTER,QUAL=10:200,INFO/INDEL=False -p out/\n",
37
+ "\n",
38
+ " # Calculate stats for all samples\n",
39
+ " vcf-stats file.vcf.gz -f FORMAT/DP=10:200 -p out/\n",
40
+ "\n",
41
+ " # Calculate stats only for the sample NA00001\n",
42
+ " vcf-stats file.vcf.gz -f SAMPLE/NA00001/DP=1:200 -p out/\n",
43
+ "\n",
44
+ " vcf-stats file.vcf.gz > perl.dump\n",
45
+ "\n";
46
+ }
47
+
48
+
49
+ sub parse_params
50
+ {
51
+ my $opts = { filters=>{}, filter_param=>'' };
52
+ while (my $arg=shift(@ARGV))
53
+ {
54
+ if ( $arg eq '-d' || $arg eq '--dump' ) { $$opts{dump}=shift(@ARGV); next; }
55
+ if ( $arg eq '-f' || $arg eq '--filters' ) { $$opts{filter_param}=shift(@ARGV); next; }
56
+ if ( $arg eq '-p' || $arg eq '--prefix' ) { $$opts{prefix}=shift(@ARGV); next; }
57
+ if ( $arg eq '-s' || $arg eq '--samples' )
58
+ {
59
+ my $samples = shift(@ARGV);
60
+ $$opts{samples} = [ split(/,/,$samples) ];
61
+ next;
62
+ }
63
+ if ( -e $arg ) { $$opts{file} = $arg; next }
64
+ if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
65
+ error("Unknown parameter or nonexistent file: \"$arg\". Run -h for help.\n");
66
+ }
67
+ if ( exists($$opts{dump}) && !exists($$opts{prefix}) ) { error("Expected -p option with -d.\n"); }
68
+ return $opts;
69
+ }
70
+
71
+
72
+ sub init_filters
73
+ {
74
+ my ($opts,$vcf) = @_;
75
+
76
+ for my $filter (split(/,/,$$opts{filter_param}))
77
+ {
78
+ my ($key,$value) = split(/=/,$filter);
79
+
80
+ my $rec = { value=>$value, exact=>0, any=>0, bin=>0, is_flag=>0 };
81
+ if ( $key=~m{^INFO/} )
82
+ {
83
+ my $tag = $';
84
+ $$rec{tag} = $tag;
85
+ if ( exists($$vcf{header}{'INFO'}) && exists($$vcf{header}{'INFO'}{$tag}) && $$vcf{header}{'INFO'}{$tag}{Type} eq 'Flag' )
86
+ {
87
+ $$rec{is_flag} = 1;
88
+ $$rec{value} = $value eq 'False' ? 0 : 1;
89
+ $key = "INFO/$tag=". ($$rec{value} ? 'True':'False');
90
+ }
91
+ }
92
+ elsif ( $key eq 'INFO' )
93
+ {
94
+ # All INFO flags should be counted
95
+ for my $tag (keys %{$$vcf{header}{'INFO'}})
96
+ {
97
+ if ( $$vcf{header}{'INFO'}{$tag}{Type} ne 'Flag' ) { next; }
98
+ $$opts{filters}{"INFO/$tag=True"} = { %$rec, is_flag=>1, value=>1, tag=>$tag };
99
+ }
100
+ next;
101
+ }
102
+
103
+ if ( ! defined $value )
104
+ {
105
+ $$rec{any} = 1;
106
+ }
107
+ elsif ( $value=~/^(.+):(.+)$/ )
108
+ {
109
+ $$rec{bin} = 1;
110
+ $$rec{bin_size} = $1;
111
+ $$rec{max} = $2;
112
+ }
113
+ else
114
+ {
115
+ $$rec{exact} = 1;
116
+ }
117
+ $$opts{filters}{$key} = $rec;
118
+ }
119
+ }
120
+
121
+
122
+ sub vcf_stats
123
+ {
124
+ my ($opts) = @_;
125
+
126
+ if ( exists($$opts{dump}) )
127
+ {
128
+ # Use existing dump to recreate the files
129
+ my $vcf = VcfStats->new(file=>'/dev/null');
130
+ $$vcf{stats} = do $$opts{dump};
131
+ $vcf->save_stats($$opts{prefix});
132
+ return;
133
+ }
134
+
135
+ # Open the VCF file
136
+ my $vcf = $$opts{file} ? VcfStats->new(file=>$$opts{file}) : VcfStats->new(fh=>\*STDIN);
137
+ $vcf->parse_header();
138
+ init_filters($opts,$vcf);
139
+
140
+ # Include only requested samples
141
+ if ( exists $$opts{samples} )
142
+ {
143
+ my @include = ();
144
+ if ( scalar @{$$opts{samples}}>1 or $$opts{samples}[0] ne '-' )
145
+ {
146
+ for my $sample (@{$$opts{samples}}) { push @include,$sample; }
147
+ }
148
+ $vcf->set_samples(include=>\@include);
149
+ }
150
+
151
+ while (my $rec=$vcf->next_data_hash())
152
+ {
153
+ $vcf->collect_stats($rec,$$opts{filters});
154
+ }
155
+
156
+ $vcf->save_stats($$opts{prefix});
157
+ }
158
+
159
+
160
+