ngs_server 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (248) hide show
  1. data/bin/ngs_server +72 -50
  2. data/ext/bamtools/extconf.rb +3 -3
  3. data/ext/vcftools/Makefile +28 -0
  4. data/ext/vcftools/README.txt +36 -0
  5. data/ext/vcftools/cpp/.svn/all-wcprops +125 -0
  6. data/ext/vcftools/cpp/.svn/dir-prop-base +6 -0
  7. data/ext/vcftools/cpp/.svn/entries +708 -0
  8. data/ext/vcftools/cpp/.svn/text-base/Makefile.svn-base +46 -0
  9. data/ext/vcftools/cpp/.svn/text-base/dgeev.cpp.svn-base +146 -0
  10. data/ext/vcftools/cpp/.svn/text-base/dgeev.h.svn-base +43 -0
  11. data/ext/vcftools/cpp/.svn/text-base/output_log.cpp.svn-base +79 -0
  12. data/ext/vcftools/cpp/.svn/text-base/output_log.h.svn-base +34 -0
  13. data/ext/vcftools/cpp/.svn/text-base/parameters.cpp.svn-base +535 -0
  14. data/ext/vcftools/cpp/.svn/text-base/parameters.h.svn-base +154 -0
  15. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.cpp.svn-base +497 -0
  16. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.h.svn-base +190 -0
  17. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_getters.cpp.svn-base +421 -0
  18. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_setters.cpp.svn-base +482 -0
  19. data/ext/vcftools/cpp/.svn/text-base/vcf_file.cpp.svn-base +495 -0
  20. data/ext/vcftools/cpp/.svn/text-base/vcf_file.h.svn-base +184 -0
  21. data/ext/vcftools/cpp/.svn/text-base/vcf_file_diff.cpp.svn-base +1282 -0
  22. data/ext/vcftools/cpp/.svn/text-base/vcf_file_filters.cpp.svn-base +1215 -0
  23. data/ext/vcftools/cpp/.svn/text-base/vcf_file_format_convert.cpp.svn-base +1138 -0
  24. data/ext/vcftools/cpp/.svn/text-base/vcf_file_index.cpp.svn-base +171 -0
  25. data/ext/vcftools/cpp/.svn/text-base/vcf_file_output.cpp.svn-base +3012 -0
  26. data/ext/vcftools/cpp/.svn/text-base/vcftools.cpp.svn-base +107 -0
  27. data/ext/vcftools/cpp/.svn/text-base/vcftools.h.svn-base +25 -0
  28. data/ext/vcftools/cpp/Makefile +46 -0
  29. data/ext/vcftools/cpp/dgeev.cpp +146 -0
  30. data/ext/vcftools/cpp/dgeev.h +43 -0
  31. data/ext/vcftools/cpp/output_log.cpp +79 -0
  32. data/ext/vcftools/cpp/output_log.h +34 -0
  33. data/ext/vcftools/cpp/parameters.cpp +535 -0
  34. data/ext/vcftools/cpp/parameters.h +154 -0
  35. data/ext/vcftools/cpp/vcf_entry.cpp +497 -0
  36. data/ext/vcftools/cpp/vcf_entry.h +190 -0
  37. data/ext/vcftools/cpp/vcf_entry_getters.cpp +421 -0
  38. data/ext/vcftools/cpp/vcf_entry_setters.cpp +482 -0
  39. data/ext/vcftools/cpp/vcf_file.cpp +495 -0
  40. data/ext/vcftools/cpp/vcf_file.h +184 -0
  41. data/ext/vcftools/cpp/vcf_file_diff.cpp +1282 -0
  42. data/ext/vcftools/cpp/vcf_file_filters.cpp +1215 -0
  43. data/ext/vcftools/cpp/vcf_file_format_convert.cpp +1138 -0
  44. data/ext/vcftools/cpp/vcf_file_index.cpp +171 -0
  45. data/ext/vcftools/cpp/vcf_file_output.cpp +3012 -0
  46. data/ext/vcftools/cpp/vcftools.cpp +107 -0
  47. data/ext/vcftools/cpp/vcftools.h +25 -0
  48. data/ext/vcftools/examples/.svn/all-wcprops +185 -0
  49. data/ext/vcftools/examples/.svn/dir-prop-base +6 -0
  50. data/ext/vcftools/examples/.svn/entries +1048 -0
  51. data/ext/vcftools/examples/.svn/prop-base/perl-api-1.pl.svn-base +5 -0
  52. data/ext/vcftools/examples/.svn/text-base/annotate-test.vcf.svn-base +37 -0
  53. data/ext/vcftools/examples/.svn/text-base/annotate.out.svn-base +23 -0
  54. data/ext/vcftools/examples/.svn/text-base/annotate.txt.svn-base +7 -0
  55. data/ext/vcftools/examples/.svn/text-base/annotate2.out.svn-base +52 -0
  56. data/ext/vcftools/examples/.svn/text-base/annotate3.out.svn-base +23 -0
  57. data/ext/vcftools/examples/.svn/text-base/cmp-test-a-3.3.vcf.svn-base +12 -0
  58. data/ext/vcftools/examples/.svn/text-base/cmp-test-a.vcf.svn-base +12 -0
  59. data/ext/vcftools/examples/.svn/text-base/cmp-test-b-3.3.vcf.svn-base +12 -0
  60. data/ext/vcftools/examples/.svn/text-base/cmp-test-b.vcf.svn-base +12 -0
  61. data/ext/vcftools/examples/.svn/text-base/cmp-test.out.svn-base +53 -0
  62. data/ext/vcftools/examples/.svn/text-base/concat-a.vcf.svn-base +21 -0
  63. data/ext/vcftools/examples/.svn/text-base/concat-b.vcf.svn-base +13 -0
  64. data/ext/vcftools/examples/.svn/text-base/concat-c.vcf.svn-base +19 -0
  65. data/ext/vcftools/examples/.svn/text-base/concat.out.svn-base +39 -0
  66. data/ext/vcftools/examples/.svn/text-base/invalid-4.0.vcf.svn-base +31 -0
  67. data/ext/vcftools/examples/.svn/text-base/isec-n2-test.vcf.out.svn-base +19 -0
  68. data/ext/vcftools/examples/.svn/text-base/merge-test-a.vcf.svn-base +17 -0
  69. data/ext/vcftools/examples/.svn/text-base/merge-test-b.vcf.svn-base +17 -0
  70. data/ext/vcftools/examples/.svn/text-base/merge-test-c.vcf.svn-base +15 -0
  71. data/ext/vcftools/examples/.svn/text-base/merge-test.vcf.out.svn-base +31 -0
  72. data/ext/vcftools/examples/.svn/text-base/perl-api-1.pl.svn-base +46 -0
  73. data/ext/vcftools/examples/.svn/text-base/query-test.out.svn-base +6 -0
  74. data/ext/vcftools/examples/.svn/text-base/shuffle-test.vcf.svn-base +12 -0
  75. data/ext/vcftools/examples/.svn/text-base/subset.SNPs.out.svn-base +10 -0
  76. data/ext/vcftools/examples/.svn/text-base/subset.indels.out.svn-base +18 -0
  77. data/ext/vcftools/examples/.svn/text-base/subset.vcf.svn-base +21 -0
  78. data/ext/vcftools/examples/.svn/text-base/valid-3.3.vcf.svn-base +30 -0
  79. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.stats.svn-base +104 -0
  80. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.svn-base +34 -0
  81. data/ext/vcftools/examples/.svn/text-base/valid-4.1.vcf.svn-base +37 -0
  82. data/ext/vcftools/examples/annotate-test.vcf +37 -0
  83. data/ext/vcftools/examples/annotate.out +23 -0
  84. data/ext/vcftools/examples/annotate.txt +7 -0
  85. data/ext/vcftools/examples/annotate2.out +52 -0
  86. data/ext/vcftools/examples/annotate3.out +23 -0
  87. data/ext/vcftools/examples/cmp-test-a-3.3.vcf +12 -0
  88. data/ext/vcftools/examples/cmp-test-a.vcf +12 -0
  89. data/ext/vcftools/examples/cmp-test-b-3.3.vcf +12 -0
  90. data/ext/vcftools/examples/cmp-test-b.vcf +12 -0
  91. data/ext/vcftools/examples/cmp-test.out +53 -0
  92. data/ext/vcftools/examples/concat-a.vcf +21 -0
  93. data/ext/vcftools/examples/concat-b.vcf +13 -0
  94. data/ext/vcftools/examples/concat-c.vcf +19 -0
  95. data/ext/vcftools/examples/concat.out +39 -0
  96. data/ext/vcftools/examples/invalid-4.0.vcf +31 -0
  97. data/ext/vcftools/examples/isec-n2-test.vcf.out +19 -0
  98. data/ext/vcftools/examples/merge-test-a.vcf +17 -0
  99. data/ext/vcftools/examples/merge-test-b.vcf +17 -0
  100. data/ext/vcftools/examples/merge-test-c.vcf +15 -0
  101. data/ext/vcftools/examples/merge-test.vcf.out +31 -0
  102. data/ext/vcftools/examples/perl-api-1.pl +46 -0
  103. data/ext/vcftools/examples/query-test.out +6 -0
  104. data/ext/vcftools/examples/shuffle-test.vcf +12 -0
  105. data/ext/vcftools/examples/subset.SNPs.out +10 -0
  106. data/ext/vcftools/examples/subset.indels.out +18 -0
  107. data/ext/vcftools/examples/subset.vcf +21 -0
  108. data/ext/vcftools/examples/valid-3.3.vcf +30 -0
  109. data/ext/vcftools/examples/valid-4.0.vcf +34 -0
  110. data/ext/vcftools/examples/valid-4.0.vcf.stats +104 -0
  111. data/ext/vcftools/examples/valid-4.1.vcf +37 -0
  112. data/ext/vcftools/extconf.rb +2 -0
  113. data/ext/vcftools/perl/.svn/all-wcprops +149 -0
  114. data/ext/vcftools/perl/.svn/entries +844 -0
  115. data/ext/vcftools/perl/.svn/prop-base/fill-aa.svn-base +5 -0
  116. data/ext/vcftools/perl/.svn/prop-base/fill-an-ac.svn-base +5 -0
  117. data/ext/vcftools/perl/.svn/prop-base/fill-ref-md5.svn-base +5 -0
  118. data/ext/vcftools/perl/.svn/prop-base/tab-to-vcf.svn-base +5 -0
  119. data/ext/vcftools/perl/.svn/prop-base/test.t.svn-base +5 -0
  120. data/ext/vcftools/perl/.svn/prop-base/vcf-annotate.svn-base +5 -0
  121. data/ext/vcftools/perl/.svn/prop-base/vcf-compare.svn-base +5 -0
  122. data/ext/vcftools/perl/.svn/prop-base/vcf-concat.svn-base +5 -0
  123. data/ext/vcftools/perl/.svn/prop-base/vcf-convert.svn-base +5 -0
  124. data/ext/vcftools/perl/.svn/prop-base/vcf-fix-newlines.svn-base +5 -0
  125. data/ext/vcftools/perl/.svn/prop-base/vcf-isec.svn-base +5 -0
  126. data/ext/vcftools/perl/.svn/prop-base/vcf-merge.svn-base +5 -0
  127. data/ext/vcftools/perl/.svn/prop-base/vcf-query.svn-base +5 -0
  128. data/ext/vcftools/perl/.svn/prop-base/vcf-shuffle-cols.svn-base +5 -0
  129. data/ext/vcftools/perl/.svn/prop-base/vcf-sort.svn-base +5 -0
  130. data/ext/vcftools/perl/.svn/prop-base/vcf-stats.svn-base +5 -0
  131. data/ext/vcftools/perl/.svn/prop-base/vcf-subset.svn-base +5 -0
  132. data/ext/vcftools/perl/.svn/prop-base/vcf-to-tab.svn-base +5 -0
  133. data/ext/vcftools/perl/.svn/prop-base/vcf-validator.svn-base +5 -0
  134. data/ext/vcftools/perl/.svn/text-base/ChangeLog.svn-base +84 -0
  135. data/ext/vcftools/perl/.svn/text-base/FaSlice.pm.svn-base +214 -0
  136. data/ext/vcftools/perl/.svn/text-base/Makefile.svn-base +12 -0
  137. data/ext/vcftools/perl/.svn/text-base/Vcf.pm.svn-base +2853 -0
  138. data/ext/vcftools/perl/.svn/text-base/VcfStats.pm.svn-base +681 -0
  139. data/ext/vcftools/perl/.svn/text-base/fill-aa.svn-base +103 -0
  140. data/ext/vcftools/perl/.svn/text-base/fill-an-ac.svn-base +56 -0
  141. data/ext/vcftools/perl/.svn/text-base/fill-ref-md5.svn-base +204 -0
  142. data/ext/vcftools/perl/.svn/text-base/tab-to-vcf.svn-base +92 -0
  143. data/ext/vcftools/perl/.svn/text-base/test.t.svn-base +376 -0
  144. data/ext/vcftools/perl/.svn/text-base/vcf-annotate.svn-base +1099 -0
  145. data/ext/vcftools/perl/.svn/text-base/vcf-compare.svn-base +1193 -0
  146. data/ext/vcftools/perl/.svn/text-base/vcf-concat.svn-base +310 -0
  147. data/ext/vcftools/perl/.svn/text-base/vcf-convert.svn-base +180 -0
  148. data/ext/vcftools/perl/.svn/text-base/vcf-fix-newlines.svn-base +97 -0
  149. data/ext/vcftools/perl/.svn/text-base/vcf-isec.svn-base +660 -0
  150. data/ext/vcftools/perl/.svn/text-base/vcf-merge.svn-base +577 -0
  151. data/ext/vcftools/perl/.svn/text-base/vcf-query.svn-base +272 -0
  152. data/ext/vcftools/perl/.svn/text-base/vcf-shuffle-cols.svn-base +89 -0
  153. data/ext/vcftools/perl/.svn/text-base/vcf-sort.svn-base +79 -0
  154. data/ext/vcftools/perl/.svn/text-base/vcf-stats.svn-base +160 -0
  155. data/ext/vcftools/perl/.svn/text-base/vcf-subset.svn-base +206 -0
  156. data/ext/vcftools/perl/.svn/text-base/vcf-to-tab.svn-base +112 -0
  157. data/ext/vcftools/perl/.svn/text-base/vcf-validator.svn-base +145 -0
  158. data/ext/vcftools/perl/ChangeLog +84 -0
  159. data/ext/vcftools/perl/FaSlice.pm +214 -0
  160. data/ext/vcftools/perl/Makefile +12 -0
  161. data/ext/vcftools/perl/Vcf.pm +2853 -0
  162. data/ext/vcftools/perl/VcfStats.pm +681 -0
  163. data/ext/vcftools/perl/fill-aa +103 -0
  164. data/ext/vcftools/perl/fill-an-ac +56 -0
  165. data/ext/vcftools/perl/fill-ref-md5 +204 -0
  166. data/ext/vcftools/perl/tab-to-vcf +92 -0
  167. data/ext/vcftools/perl/test.t +376 -0
  168. data/ext/vcftools/perl/vcf-annotate +1099 -0
  169. data/ext/vcftools/perl/vcf-compare +1193 -0
  170. data/ext/vcftools/perl/vcf-concat +310 -0
  171. data/ext/vcftools/perl/vcf-convert +180 -0
  172. data/ext/vcftools/perl/vcf-fix-newlines +97 -0
  173. data/ext/vcftools/perl/vcf-isec +660 -0
  174. data/ext/vcftools/perl/vcf-merge +577 -0
  175. data/ext/vcftools/perl/vcf-query +286 -0
  176. data/ext/vcftools/perl/vcf-shuffle-cols +89 -0
  177. data/ext/vcftools/perl/vcf-sort +79 -0
  178. data/ext/vcftools/perl/vcf-stats +160 -0
  179. data/ext/vcftools/perl/vcf-subset +206 -0
  180. data/ext/vcftools/perl/vcf-to-tab +112 -0
  181. data/ext/vcftools/perl/vcf-validator +145 -0
  182. data/ext/vcftools/website/.svn/all-wcprops +41 -0
  183. data/ext/vcftools/website/.svn/entries +238 -0
  184. data/ext/vcftools/website/.svn/prop-base/VCF-poster.pdf.svn-base +5 -0
  185. data/ext/vcftools/website/.svn/prop-base/favicon.ico.svn-base +5 -0
  186. data/ext/vcftools/website/.svn/prop-base/favicon.png.svn-base +5 -0
  187. data/ext/vcftools/website/.svn/text-base/Makefile.svn-base +6 -0
  188. data/ext/vcftools/website/.svn/text-base/README.svn-base +2 -0
  189. data/ext/vcftools/website/.svn/text-base/VCF-poster.pdf.svn-base +0 -0
  190. data/ext/vcftools/website/.svn/text-base/default.css.svn-base +250 -0
  191. data/ext/vcftools/website/.svn/text-base/favicon.ico.svn-base +0 -0
  192. data/ext/vcftools/website/.svn/text-base/favicon.png.svn-base +0 -0
  193. data/ext/vcftools/website/Makefile +6 -0
  194. data/ext/vcftools/website/README +2 -0
  195. data/ext/vcftools/website/VCF-poster.pdf +0 -0
  196. data/ext/vcftools/website/default.css +250 -0
  197. data/ext/vcftools/website/favicon.ico +0 -0
  198. data/ext/vcftools/website/favicon.png +0 -0
  199. data/ext/vcftools/website/img/.svn/all-wcprops +53 -0
  200. data/ext/vcftools/website/img/.svn/entries +300 -0
  201. data/ext/vcftools/website/img/.svn/prop-base/bg.gif.svn-base +5 -0
  202. data/ext/vcftools/website/img/.svn/prop-base/bgcode.gif.svn-base +5 -0
  203. data/ext/vcftools/website/img/.svn/prop-base/bgcontainer.gif.svn-base +5 -0
  204. data/ext/vcftools/website/img/.svn/prop-base/bgul.gif.svn-base +5 -0
  205. data/ext/vcftools/website/img/.svn/prop-base/header.gif.svn-base +5 -0
  206. data/ext/vcftools/website/img/.svn/prop-base/li.gif.svn-base +5 -0
  207. data/ext/vcftools/website/img/.svn/prop-base/quote.gif.svn-base +5 -0
  208. data/ext/vcftools/website/img/.svn/prop-base/search.gif.svn-base +5 -0
  209. data/ext/vcftools/website/img/.svn/text-base/bg.gif.svn-base +0 -0
  210. data/ext/vcftools/website/img/.svn/text-base/bgcode.gif.svn-base +0 -0
  211. data/ext/vcftools/website/img/.svn/text-base/bgcontainer.gif.svn-base +0 -0
  212. data/ext/vcftools/website/img/.svn/text-base/bgul.gif.svn-base +0 -0
  213. data/ext/vcftools/website/img/.svn/text-base/header.gif.svn-base +0 -0
  214. data/ext/vcftools/website/img/.svn/text-base/li.gif.svn-base +0 -0
  215. data/ext/vcftools/website/img/.svn/text-base/quote.gif.svn-base +0 -0
  216. data/ext/vcftools/website/img/.svn/text-base/search.gif.svn-base +0 -0
  217. data/ext/vcftools/website/img/bg.gif +0 -0
  218. data/ext/vcftools/website/img/bgcode.gif +0 -0
  219. data/ext/vcftools/website/img/bgcontainer.gif +0 -0
  220. data/ext/vcftools/website/img/bgul.gif +0 -0
  221. data/ext/vcftools/website/img/header.gif +0 -0
  222. data/ext/vcftools/website/img/li.gif +0 -0
  223. data/ext/vcftools/website/img/quote.gif +0 -0
  224. data/ext/vcftools/website/img/search.gif +0 -0
  225. data/ext/vcftools/website/src/.svn/all-wcprops +53 -0
  226. data/ext/vcftools/website/src/.svn/entries +300 -0
  227. data/ext/vcftools/website/src/.svn/text-base/docs.inc.svn-base +202 -0
  228. data/ext/vcftools/website/src/.svn/text-base/index.inc.svn-base +52 -0
  229. data/ext/vcftools/website/src/.svn/text-base/index.php.svn-base +80 -0
  230. data/ext/vcftools/website/src/.svn/text-base/license.inc.svn-base +27 -0
  231. data/ext/vcftools/website/src/.svn/text-base/links.inc.svn-base +13 -0
  232. data/ext/vcftools/website/src/.svn/text-base/options.inc.svn-base +654 -0
  233. data/ext/vcftools/website/src/.svn/text-base/perl_module.inc.svn-base +249 -0
  234. data/ext/vcftools/website/src/.svn/text-base/specs.inc.svn-base +18 -0
  235. data/ext/vcftools/website/src/docs.inc +202 -0
  236. data/ext/vcftools/website/src/index.inc +52 -0
  237. data/ext/vcftools/website/src/index.php +80 -0
  238. data/ext/vcftools/website/src/license.inc +27 -0
  239. data/ext/vcftools/website/src/links.inc +13 -0
  240. data/ext/vcftools/website/src/options.inc +654 -0
  241. data/ext/vcftools/website/src/perl_module.inc +249 -0
  242. data/ext/vcftools/website/src/specs.inc +18 -0
  243. data/lib/config.ru +9 -0
  244. data/lib/ngs_server/add.rb +9 -0
  245. data/lib/ngs_server/version.rb +1 -1
  246. data/lib/ngs_server.rb +55 -3
  247. data/ngs_server.gemspec +5 -2
  248. metadata +296 -6
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # Notes:
4
+ # * The AA files can be downloaded from ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/pilot_data/technical/reference/ancestral_alignments
5
+ # * The program runs samtools, therefore the AA files must be gzipped (not b2zipped).
6
+ #
7
+ # support: pd3@sanger
8
+
9
+ use strict;
10
+ use warnings;
11
+ use Carp;
12
+ use Vcf;
13
+ use FindBin;
14
+ use lib "$FindBin::Bin";
15
+ use FaSlice;
16
+
17
+ my $opts = parse_params();
18
+ fill_aa($opts,$$opts{aa_file});
19
+
20
+ exit;
21
+
22
+ #--------------------------------
23
+
24
+ sub error
25
+ {
26
+ my (@msg) = @_;
27
+ if ( scalar @msg ) { confess @msg; }
28
+ die
29
+ "Usage: fill-aa [OPTIONS] < in.vcf >out.vcf\n",
30
+ "Options:\n",
31
+ " -a, --ancestral-allele <prefix> Prefix to ancestral allele chromosome files.\n",
32
+ " -h, -?, --help This help message.\n",
33
+ "Example:\n",
34
+ " (zcat file.vcf | grep ^#; zcat file.vcf | grep -v ^# | sort -k 1,1d -k 2,2n;) | fill-aa -a human_ancestor_ 2>test.err | gzip -c >out.vcf.gz \n",
35
+ "\n";
36
+ }
37
+
38
+
39
+ sub parse_params
40
+ {
41
+ my $opts = {};
42
+ while (my $arg=shift(@ARGV))
43
+ {
44
+ if ( $arg eq '-a' || $arg eq '--ancestral-allele' ) { $$opts{aa_file} = shift(@ARGV); next }
45
+ if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
46
+ error("Unknown parameter \"$arg\". Run -h for help.\n");
47
+ }
48
+ if ( !exists($$opts{aa_file}) ) { error("Missing the -a option.\n") }
49
+ return $opts;
50
+ }
51
+
52
+
53
+ sub fill_aa
54
+ {
55
+ my ($opts,$aa_fname) = @_;
56
+
57
+ my $n_unknown = 0;
58
+ my $n_filled = 0;
59
+
60
+ my $vcf = Vcf->new(fh=>\*STDIN);
61
+ $vcf->parse_header();
62
+ $vcf->add_header_line({key=>'INFO',ID=>'AA',Number=>1,Type=>'String',
63
+ Description=>'Ancestral Allele, ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/pilot_data/technical/reference/ancestral_alignments/README'});
64
+ print $vcf->format_header();
65
+
66
+ my $fa;
67
+ while (my $rec = $vcf->next_data_hash())
68
+ {
69
+ my $chr = $$rec{CHROM};
70
+ my $pos = $$rec{POS};
71
+
72
+ my $fname = $aa_fname;
73
+ if ( ! -e $fname )
74
+ {
75
+ if ( -e "$fname$chr.fa.gz" ) { $fname = "$fname$chr.fa.gz"; }
76
+ else { error(qq[Neither "$fname" nor "$fname$chr.fa.gz" exists.\n]); }
77
+ }
78
+ if ( !$fa or $$fa{file} ne $fname )
79
+ {
80
+ $fa = FaSlice->new(file=>$fname, size=>100_000);
81
+ }
82
+
83
+ my $aa = $fa->get_base($chr,$pos);
84
+
85
+ if ( $aa )
86
+ {
87
+ $$rec{$$vcf{columns}[7]}{AA} = $aa;
88
+ $n_filled++;
89
+ }
90
+ else
91
+ {
92
+ $$rec{$$vcf{columns}[7]}{AA} = '.';
93
+ $n_unknown++;
94
+ }
95
+ print $vcf->format_line($rec);
96
+ }
97
+
98
+ print STDERR
99
+ "No AAs .. $n_unknown\n",
100
+ "AAs filled .. $n_filled\n";
101
+ }
102
+
103
+
@@ -0,0 +1,56 @@
1
+ #!/usr/bin/env perl
2
+
3
+ use strict;
4
+ use warnings;
5
+ use Carp;
6
+ use Vcf;
7
+
8
+ my $opts = parse_params();
9
+ fill_an_ac($$opts{file});
10
+
11
+ exit;
12
+
13
+ #--------------------------------
14
+
15
+ sub error
16
+ {
17
+ my (@msg) = @_;
18
+ if ( scalar @msg ) { confess @msg; }
19
+ die
20
+ "Usage: fill-an-ac [OPTIONS] < in.vcf >out.vcf\n",
21
+ "Options:\n",
22
+ " -h, -?, --help This help message.\n",
23
+ "\n";
24
+ }
25
+
26
+
27
+ sub parse_params
28
+ {
29
+ my $opts = {};
30
+ while (my $arg=shift(@ARGV))
31
+ {
32
+ if ( -e $arg ) { $$opts{file} = $arg; next }
33
+ if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
34
+ error("Unknown parameter \"$arg\". Run -h for help.\n");
35
+ }
36
+ return $opts;
37
+ }
38
+
39
+
40
+ sub fill_an_ac
41
+ {
42
+ my ($file) = @_;
43
+
44
+ my $vcf = $file ? Vcf->new(file=>$file) : Vcf->new(fh=>\*STDIN);
45
+ $vcf->parse_header();
46
+ $vcf->add_header_line({key=>'INFO',ID=>'AC',Number=>-1,Type=>'Integer',Description=>'Allele count in genotypes'});
47
+ $vcf->add_header_line({key=>'INFO',ID=>'AN',Number=>1,Type=>'Integer',Description=>'Total number of alleles in called genotypes'});
48
+ print $vcf->format_header();
49
+ $vcf->recalc_ac_an(2);
50
+
51
+ while (my $rec=$vcf->next_data_hash())
52
+ {
53
+ print $vcf->format_line($rec);
54
+ }
55
+ }
56
+
@@ -0,0 +1,204 @@
1
+ #!/usr/bin/env perl
2
+
3
+ use strict;
4
+ use warnings;
5
+ use Carp;
6
+ use IPC::Open2;
7
+ use Vcf;
8
+
9
+ my $opts = parse_params();
10
+ fill_ref_md5($opts);
11
+
12
+ exit;
13
+
14
+ #--------------------------------
15
+
16
+ sub error
17
+ {
18
+ my (@msg) = @_;
19
+ if ( scalar @msg ) { confess @msg; }
20
+ die
21
+ "About: The script computes MD5 sum of the reference sequence and inserts\n",
22
+ " 'reference' and 'contig' tags into header as recommended by VCFv4.1.\n",
23
+ " The VCF file must be compressed and tabix indexed, as it takes advantage\n",
24
+ " of the lightning fast tabix reheader functionality.\n",
25
+ "Usage: fill-ref-md5 [OPTIONS] in.vcf.gz out.vcf.gz\n",
26
+ "Options:\n",
27
+ " -d, --dictionary <file> Where to read/write computed MD5s. Opened in append mode, existing records are not touched.\n",
28
+ " -i, --info <AS:xx,SP:xx,TX:xx> Optional info on reference assembly (AS), species (SP), taxonomy (TX)\n",
29
+ " -r, --refseq <file> The reference sequence in fasta format indexed by samtools faidx\n",
30
+ " -h, -?, --help This help message.\n",
31
+ "Examples:\n",
32
+ " fill-ref-md5 -i AS:NCBIM37,SP:\"Mus\\ Musculus\" -r NCBIM37_um.fa -d NCBIM37_um.fa.dict in.vcf.gz out.vcf.gz\n",
33
+ "\n";
34
+ }
35
+
36
+
37
+ sub parse_params
38
+ {
39
+ my $opts = {};
40
+ while (my $arg=shift(@ARGV))
41
+ {
42
+ if ( $arg eq '-i' || $arg eq '--info' ) { $$opts{info}=shift(@ARGV); next; }
43
+ if ( $arg eq '-r' || $arg eq '--refseq' ) { $$opts{refseq}=shift(@ARGV); next; }
44
+ if ( $arg eq '-d' || $arg eq '--dictionary' ) { $$opts{dictionary}=shift(@ARGV); next; }
45
+ if ( -e $arg && !exists($$opts{file}) ) { $$opts{file} = $arg; next }
46
+ if ( exists($$opts{file}) && !exists($$opts{outfile}) ) { $$opts{outfile} = $arg; next }
47
+ if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
48
+ error("Unknown parameter \"$arg\" or non-existent file. Run -h for help.\n");
49
+ }
50
+ if ( !exists($$opts{refseq}) && !exists($$opts{dictionary}) ) { error("Expected one of -d or -r options\n"); }
51
+ if ( !exists($$opts{file}) ) { error("No input VCF file given.\n"); }
52
+ if ( !exists($$opts{outfile}) ) { error("No output VCF file given.\n"); }
53
+ return $opts;
54
+ }
55
+
56
+ sub read_dict
57
+ {
58
+ my ($dict) = @_;
59
+ my $out = {};
60
+ if ( !$dict or !-e $dict ) { return $out }
61
+
62
+ open(my $fh,'<',$dict) or error("$dict: $!");
63
+ my $line=<$fh>;
64
+ if ( $line ne "\@HD\tVN:1.0\tSO:unsorted\n" ) { error("Could not parse $dict: $line"); }
65
+
66
+ while (my $line=<$fh>)
67
+ {
68
+ chomp($line);
69
+
70
+ # @SQ SN:5 LN:152537259 UR:file:/lustre/scratch102/projects/mouse/ref/NCBIM37_um.fa M5:f90804fb8fe9cb06076d51a710fb4563
71
+ my @items = split(/\t/,$line);
72
+ if ( @items != 5 ) { error("Could not parse $dict: $line"); }
73
+ my $item = shift(@items);
74
+ if ( $item ne '@SQ' ) { next; }
75
+ my $rec = {};
76
+ for my $item (@items)
77
+ {
78
+ if ( !($item=~/^([^:]+):(.+)$/) ) { error("Could not parse $dict: [$item] [$line]"); }
79
+ $$rec{$1} = $2;
80
+ }
81
+ if ( !exists($$rec{SN}) ) { error("No SN in [$dict] [$line]?"); }
82
+ $$out{$$rec{SN}} = $rec;
83
+ }
84
+ close($fh);
85
+
86
+ return $out;
87
+ }
88
+
89
+ sub add_to_dictionary
90
+ {
91
+ my ($opts,$dict,$chr) = @_;
92
+ if ( !exists($$opts{refseq}) ) { error("The chromosome [$chr] not present in the dictionary and no reference sequence given.\n"); }
93
+
94
+ my($md5_in,$md5_out,$ok,$len);
95
+ eval { open2($md5_out,$md5_in,'md5sum'); $ok=1; };
96
+ if ( !$ok ) { error("md5sum: $!"); }
97
+
98
+ my $cmd = "samtools faidx $$opts{refseq} $chr";
99
+ open(my $refseq,"$cmd |") or error("$cmd: $!");
100
+ # get rid of the first ">$chr" line.
101
+ <$refseq>;
102
+ while (my $line=<$refseq>)
103
+ {
104
+ chomp($line);
105
+ print $md5_in $line;
106
+ $len += length($line);
107
+ }
108
+ close($refseq);
109
+
110
+ close($md5_in);
111
+ my @md5 = <$md5_out>;
112
+ close($md5_out);
113
+ $md5[0] =~ s/\s+.*$//;
114
+ chomp($md5[0]);
115
+
116
+ if ( !$len ) { error("The sequence [$chr] not present in $$opts{refseq}\n"); }
117
+
118
+ $$dict{$chr} = { dirty=>1, SN=>$chr, LN=>$len, UR=>'file://'.$$opts{refseq}, M5=>$md5[0] };
119
+ $$dict{dirty} = 1;
120
+ }
121
+
122
+ sub write_dictionary
123
+ {
124
+ my ($opts,$dict) = @_;
125
+ if ( !$$dict{dirty} or !exists($$opts{dictionary}) ) { return }
126
+
127
+ my $needs_header = !-e $$opts{dictionary} ? 1 : 0;
128
+
129
+ open(my $fh,'>>',$$opts{dictionary}) or error("$$opts{dictionary}: $!");
130
+ print $fh "\@HD\tVN:1.0\tSO:unsorted\n" unless !$needs_header;
131
+ for my $key (sort keys %$dict)
132
+ {
133
+ if ( ref($$dict{$key}) ne 'HASH' or !$$dict{$key}{dirty} ) { next; }
134
+ my $sn = $$dict{$key}{SN};
135
+ my $ln = $$dict{$key}{LN};
136
+ my $ur = $$dict{$key}{UR};
137
+ my $m5 = $$dict{$key}{M5};
138
+ print $fh "\@SQ\tSN:$sn\tLN:$ln\tUR:$ur\tM5:$m5\n";
139
+ }
140
+ close($fh);
141
+ }
142
+
143
+ sub write_header
144
+ {
145
+ my ($opts,$dict,$chroms) = @_;
146
+
147
+ my %info;
148
+ if ( exists($$opts{info}) )
149
+ {
150
+ $$opts{info} =~ s/AS:/assembly:/;
151
+ $$opts{info} =~ s/SP:/species:/;
152
+ $$opts{info} =~ s/TX:/taxonomy:/;
153
+ for my $item (split(/,/,$$opts{info}))
154
+ {
155
+ my ($key,$value) = split(/:/,$item);
156
+ if ( !defined $value ) { error("Could not parse the info: [$item] [$$opts{info}]"); }
157
+ $info{$key} = $value;
158
+ }
159
+ }
160
+
161
+ my $vcf = Vcf->new(file=>$$opts{file});
162
+ $vcf->parse_header();
163
+
164
+ my $uri = $$opts{refseq}=~m{^[^/:]+:} ? '' : 'file:';
165
+ $vcf->add_header_line({key=>'reference', value=>"$uri$$opts{refseq}"});
166
+ for my $chrom (@$chroms)
167
+ {
168
+ my %line =
169
+ (
170
+ key => 'contig',
171
+ ID => $$dict{$chrom}{SN},
172
+ length => $$dict{$chrom}{LN},
173
+ md5 => $$dict{$chrom}{M5},
174
+ %info
175
+ );
176
+ $vcf->add_header_line(\%line);
177
+ }
178
+
179
+ open(my $out,'>',"$$opts{outfile}.header") or error("$$opts{outfile}.header: $!");
180
+ print $out $vcf->format_header();
181
+ close($out);
182
+ }
183
+
184
+ sub fill_ref_md5
185
+ {
186
+ my ($opts) = @_;
187
+
188
+ # List chromosomes
189
+ my @chroms = `tabix -l $$opts{file}`;
190
+ if ( $? ) { error("The command failed: tabix -l $$opts{file}\n"); }
191
+
192
+ # Read dictionary
193
+ my $dict = read_dict($$opts{dictionary},\@chroms);
194
+ for my $chr (@chroms)
195
+ {
196
+ chomp($chr);
197
+ if ( !exists($$dict{$chr}) ) { add_to_dictionary($opts,$dict,$chr); }
198
+ }
199
+ write_dictionary($opts,$dict);
200
+ write_header($opts,$dict,\@chroms);
201
+
202
+ `tabix -r $$opts{outfile}.header $$opts{file} > $$opts{outfile}`;
203
+ }
204
+
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # Author: petr.danecek@sanger
4
+ #
5
+
6
+ use strict;
7
+ use warnings;
8
+ use Carp;
9
+ use Vcf;
10
+ use FaSlice;
11
+
12
+ my $opts = parse_params();
13
+ tab_to_vcf();
14
+
15
+ exit;
16
+
17
+ #--------------------------------
18
+
19
+ sub error
20
+ {
21
+ my (@msg) = @_;
22
+ if ( scalar @msg ) { confess @msg; }
23
+ die
24
+ "Usage: tab-to-vcf [OPTIONS]\n",
25
+ "Options:\n",
26
+ " -i, --id <string> The column ID.\n",
27
+ " -r, --ref <fasta-file> The reference sequence (optional).\n",
28
+ " -h, -?, --help This help message.\n",
29
+ "\n";
30
+ }
31
+
32
+
33
+ sub parse_params
34
+ {
35
+ my $opts = {};
36
+ while (my $arg=shift(@ARGV))
37
+ {
38
+ if ( $arg eq '-i' || $arg eq '--id' ) { $$opts{id} = shift(@ARGV); next }
39
+ if ( $arg eq '-r' || $arg eq '--ref' ) { $$opts{refseq} = shift(@ARGV); next }
40
+ if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
41
+ error("Unknown parameter \"$arg\". Run -h for help.\n");
42
+ }
43
+ if ( !exists($$opts{id}) ) { error("Missing the -i option.\n") }
44
+ return $opts;
45
+ }
46
+
47
+
48
+ sub tab_to_vcf
49
+ {
50
+ my ($data,$prefix) = @_;
51
+
52
+ my $refseq = $$opts{refseq} ? FaSlice->new(file=>$$opts{refseq},size=>1_000_000) : undef;
53
+
54
+ my $id = $$opts{id};
55
+
56
+ my $vcf_out = Vcf->new();
57
+ $vcf_out->add_columns($id);
58
+ $vcf_out->add_header_line({key=>'FORMAT',ID=>'GT',Number=>'1',Type=>'String',Description=>"Genotype"});
59
+ print $vcf_out->format_header();
60
+
61
+ while (my $line=<STDIN>)
62
+ {
63
+ if ( $line=~/^#/ ) { next; }
64
+
65
+ # 11 86881024 CT
66
+ my @items = split(/\t/,$line);
67
+ if ( $items[2] eq '*' ) { next; }
68
+
69
+ my $chr = $items[0];
70
+ my $pos = $items[1];
71
+ my $snp = $items[2];
72
+
73
+ if ( !($pos=~/^\d+$/) ) { error("Could not parse the line: $line"); }
74
+ if ( !($snp=~/^([ACGT])([ACGT])$/) ) { error("Could not parse the line: $line"); }
75
+ $snp = "$1/$2";
76
+
77
+ my %out;
78
+ $out{CHROM} = $chr;
79
+ $out{POS} = $pos;
80
+ $out{ID} = '.';
81
+ $out{ALT} = [];
82
+ $out{REF} = $refseq->get_base($chr,$pos);
83
+ $out{QUAL} = '.';
84
+ $out{FILTER} = ['.'];
85
+ $out{FORMAT} = ['GT'];
86
+ $out{gtypes}{$id}{GT} = $snp;
87
+
88
+ $vcf_out->format_genotype_strings(\%out);
89
+ print $vcf_out->format_line(\%out);
90
+ }
91
+ }
92
+