ngs_server 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (248) hide show
  1. data/bin/ngs_server +72 -50
  2. data/ext/bamtools/extconf.rb +3 -3
  3. data/ext/vcftools/Makefile +28 -0
  4. data/ext/vcftools/README.txt +36 -0
  5. data/ext/vcftools/cpp/.svn/all-wcprops +125 -0
  6. data/ext/vcftools/cpp/.svn/dir-prop-base +6 -0
  7. data/ext/vcftools/cpp/.svn/entries +708 -0
  8. data/ext/vcftools/cpp/.svn/text-base/Makefile.svn-base +46 -0
  9. data/ext/vcftools/cpp/.svn/text-base/dgeev.cpp.svn-base +146 -0
  10. data/ext/vcftools/cpp/.svn/text-base/dgeev.h.svn-base +43 -0
  11. data/ext/vcftools/cpp/.svn/text-base/output_log.cpp.svn-base +79 -0
  12. data/ext/vcftools/cpp/.svn/text-base/output_log.h.svn-base +34 -0
  13. data/ext/vcftools/cpp/.svn/text-base/parameters.cpp.svn-base +535 -0
  14. data/ext/vcftools/cpp/.svn/text-base/parameters.h.svn-base +154 -0
  15. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.cpp.svn-base +497 -0
  16. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.h.svn-base +190 -0
  17. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_getters.cpp.svn-base +421 -0
  18. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_setters.cpp.svn-base +482 -0
  19. data/ext/vcftools/cpp/.svn/text-base/vcf_file.cpp.svn-base +495 -0
  20. data/ext/vcftools/cpp/.svn/text-base/vcf_file.h.svn-base +184 -0
  21. data/ext/vcftools/cpp/.svn/text-base/vcf_file_diff.cpp.svn-base +1282 -0
  22. data/ext/vcftools/cpp/.svn/text-base/vcf_file_filters.cpp.svn-base +1215 -0
  23. data/ext/vcftools/cpp/.svn/text-base/vcf_file_format_convert.cpp.svn-base +1138 -0
  24. data/ext/vcftools/cpp/.svn/text-base/vcf_file_index.cpp.svn-base +171 -0
  25. data/ext/vcftools/cpp/.svn/text-base/vcf_file_output.cpp.svn-base +3012 -0
  26. data/ext/vcftools/cpp/.svn/text-base/vcftools.cpp.svn-base +107 -0
  27. data/ext/vcftools/cpp/.svn/text-base/vcftools.h.svn-base +25 -0
  28. data/ext/vcftools/cpp/Makefile +46 -0
  29. data/ext/vcftools/cpp/dgeev.cpp +146 -0
  30. data/ext/vcftools/cpp/dgeev.h +43 -0
  31. data/ext/vcftools/cpp/output_log.cpp +79 -0
  32. data/ext/vcftools/cpp/output_log.h +34 -0
  33. data/ext/vcftools/cpp/parameters.cpp +535 -0
  34. data/ext/vcftools/cpp/parameters.h +154 -0
  35. data/ext/vcftools/cpp/vcf_entry.cpp +497 -0
  36. data/ext/vcftools/cpp/vcf_entry.h +190 -0
  37. data/ext/vcftools/cpp/vcf_entry_getters.cpp +421 -0
  38. data/ext/vcftools/cpp/vcf_entry_setters.cpp +482 -0
  39. data/ext/vcftools/cpp/vcf_file.cpp +495 -0
  40. data/ext/vcftools/cpp/vcf_file.h +184 -0
  41. data/ext/vcftools/cpp/vcf_file_diff.cpp +1282 -0
  42. data/ext/vcftools/cpp/vcf_file_filters.cpp +1215 -0
  43. data/ext/vcftools/cpp/vcf_file_format_convert.cpp +1138 -0
  44. data/ext/vcftools/cpp/vcf_file_index.cpp +171 -0
  45. data/ext/vcftools/cpp/vcf_file_output.cpp +3012 -0
  46. data/ext/vcftools/cpp/vcftools.cpp +107 -0
  47. data/ext/vcftools/cpp/vcftools.h +25 -0
  48. data/ext/vcftools/examples/.svn/all-wcprops +185 -0
  49. data/ext/vcftools/examples/.svn/dir-prop-base +6 -0
  50. data/ext/vcftools/examples/.svn/entries +1048 -0
  51. data/ext/vcftools/examples/.svn/prop-base/perl-api-1.pl.svn-base +5 -0
  52. data/ext/vcftools/examples/.svn/text-base/annotate-test.vcf.svn-base +37 -0
  53. data/ext/vcftools/examples/.svn/text-base/annotate.out.svn-base +23 -0
  54. data/ext/vcftools/examples/.svn/text-base/annotate.txt.svn-base +7 -0
  55. data/ext/vcftools/examples/.svn/text-base/annotate2.out.svn-base +52 -0
  56. data/ext/vcftools/examples/.svn/text-base/annotate3.out.svn-base +23 -0
  57. data/ext/vcftools/examples/.svn/text-base/cmp-test-a-3.3.vcf.svn-base +12 -0
  58. data/ext/vcftools/examples/.svn/text-base/cmp-test-a.vcf.svn-base +12 -0
  59. data/ext/vcftools/examples/.svn/text-base/cmp-test-b-3.3.vcf.svn-base +12 -0
  60. data/ext/vcftools/examples/.svn/text-base/cmp-test-b.vcf.svn-base +12 -0
  61. data/ext/vcftools/examples/.svn/text-base/cmp-test.out.svn-base +53 -0
  62. data/ext/vcftools/examples/.svn/text-base/concat-a.vcf.svn-base +21 -0
  63. data/ext/vcftools/examples/.svn/text-base/concat-b.vcf.svn-base +13 -0
  64. data/ext/vcftools/examples/.svn/text-base/concat-c.vcf.svn-base +19 -0
  65. data/ext/vcftools/examples/.svn/text-base/concat.out.svn-base +39 -0
  66. data/ext/vcftools/examples/.svn/text-base/invalid-4.0.vcf.svn-base +31 -0
  67. data/ext/vcftools/examples/.svn/text-base/isec-n2-test.vcf.out.svn-base +19 -0
  68. data/ext/vcftools/examples/.svn/text-base/merge-test-a.vcf.svn-base +17 -0
  69. data/ext/vcftools/examples/.svn/text-base/merge-test-b.vcf.svn-base +17 -0
  70. data/ext/vcftools/examples/.svn/text-base/merge-test-c.vcf.svn-base +15 -0
  71. data/ext/vcftools/examples/.svn/text-base/merge-test.vcf.out.svn-base +31 -0
  72. data/ext/vcftools/examples/.svn/text-base/perl-api-1.pl.svn-base +46 -0
  73. data/ext/vcftools/examples/.svn/text-base/query-test.out.svn-base +6 -0
  74. data/ext/vcftools/examples/.svn/text-base/shuffle-test.vcf.svn-base +12 -0
  75. data/ext/vcftools/examples/.svn/text-base/subset.SNPs.out.svn-base +10 -0
  76. data/ext/vcftools/examples/.svn/text-base/subset.indels.out.svn-base +18 -0
  77. data/ext/vcftools/examples/.svn/text-base/subset.vcf.svn-base +21 -0
  78. data/ext/vcftools/examples/.svn/text-base/valid-3.3.vcf.svn-base +30 -0
  79. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.stats.svn-base +104 -0
  80. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.svn-base +34 -0
  81. data/ext/vcftools/examples/.svn/text-base/valid-4.1.vcf.svn-base +37 -0
  82. data/ext/vcftools/examples/annotate-test.vcf +37 -0
  83. data/ext/vcftools/examples/annotate.out +23 -0
  84. data/ext/vcftools/examples/annotate.txt +7 -0
  85. data/ext/vcftools/examples/annotate2.out +52 -0
  86. data/ext/vcftools/examples/annotate3.out +23 -0
  87. data/ext/vcftools/examples/cmp-test-a-3.3.vcf +12 -0
  88. data/ext/vcftools/examples/cmp-test-a.vcf +12 -0
  89. data/ext/vcftools/examples/cmp-test-b-3.3.vcf +12 -0
  90. data/ext/vcftools/examples/cmp-test-b.vcf +12 -0
  91. data/ext/vcftools/examples/cmp-test.out +53 -0
  92. data/ext/vcftools/examples/concat-a.vcf +21 -0
  93. data/ext/vcftools/examples/concat-b.vcf +13 -0
  94. data/ext/vcftools/examples/concat-c.vcf +19 -0
  95. data/ext/vcftools/examples/concat.out +39 -0
  96. data/ext/vcftools/examples/invalid-4.0.vcf +31 -0
  97. data/ext/vcftools/examples/isec-n2-test.vcf.out +19 -0
  98. data/ext/vcftools/examples/merge-test-a.vcf +17 -0
  99. data/ext/vcftools/examples/merge-test-b.vcf +17 -0
  100. data/ext/vcftools/examples/merge-test-c.vcf +15 -0
  101. data/ext/vcftools/examples/merge-test.vcf.out +31 -0
  102. data/ext/vcftools/examples/perl-api-1.pl +46 -0
  103. data/ext/vcftools/examples/query-test.out +6 -0
  104. data/ext/vcftools/examples/shuffle-test.vcf +12 -0
  105. data/ext/vcftools/examples/subset.SNPs.out +10 -0
  106. data/ext/vcftools/examples/subset.indels.out +18 -0
  107. data/ext/vcftools/examples/subset.vcf +21 -0
  108. data/ext/vcftools/examples/valid-3.3.vcf +30 -0
  109. data/ext/vcftools/examples/valid-4.0.vcf +34 -0
  110. data/ext/vcftools/examples/valid-4.0.vcf.stats +104 -0
  111. data/ext/vcftools/examples/valid-4.1.vcf +37 -0
  112. data/ext/vcftools/extconf.rb +2 -0
  113. data/ext/vcftools/perl/.svn/all-wcprops +149 -0
  114. data/ext/vcftools/perl/.svn/entries +844 -0
  115. data/ext/vcftools/perl/.svn/prop-base/fill-aa.svn-base +5 -0
  116. data/ext/vcftools/perl/.svn/prop-base/fill-an-ac.svn-base +5 -0
  117. data/ext/vcftools/perl/.svn/prop-base/fill-ref-md5.svn-base +5 -0
  118. data/ext/vcftools/perl/.svn/prop-base/tab-to-vcf.svn-base +5 -0
  119. data/ext/vcftools/perl/.svn/prop-base/test.t.svn-base +5 -0
  120. data/ext/vcftools/perl/.svn/prop-base/vcf-annotate.svn-base +5 -0
  121. data/ext/vcftools/perl/.svn/prop-base/vcf-compare.svn-base +5 -0
  122. data/ext/vcftools/perl/.svn/prop-base/vcf-concat.svn-base +5 -0
  123. data/ext/vcftools/perl/.svn/prop-base/vcf-convert.svn-base +5 -0
  124. data/ext/vcftools/perl/.svn/prop-base/vcf-fix-newlines.svn-base +5 -0
  125. data/ext/vcftools/perl/.svn/prop-base/vcf-isec.svn-base +5 -0
  126. data/ext/vcftools/perl/.svn/prop-base/vcf-merge.svn-base +5 -0
  127. data/ext/vcftools/perl/.svn/prop-base/vcf-query.svn-base +5 -0
  128. data/ext/vcftools/perl/.svn/prop-base/vcf-shuffle-cols.svn-base +5 -0
  129. data/ext/vcftools/perl/.svn/prop-base/vcf-sort.svn-base +5 -0
  130. data/ext/vcftools/perl/.svn/prop-base/vcf-stats.svn-base +5 -0
  131. data/ext/vcftools/perl/.svn/prop-base/vcf-subset.svn-base +5 -0
  132. data/ext/vcftools/perl/.svn/prop-base/vcf-to-tab.svn-base +5 -0
  133. data/ext/vcftools/perl/.svn/prop-base/vcf-validator.svn-base +5 -0
  134. data/ext/vcftools/perl/.svn/text-base/ChangeLog.svn-base +84 -0
  135. data/ext/vcftools/perl/.svn/text-base/FaSlice.pm.svn-base +214 -0
  136. data/ext/vcftools/perl/.svn/text-base/Makefile.svn-base +12 -0
  137. data/ext/vcftools/perl/.svn/text-base/Vcf.pm.svn-base +2853 -0
  138. data/ext/vcftools/perl/.svn/text-base/VcfStats.pm.svn-base +681 -0
  139. data/ext/vcftools/perl/.svn/text-base/fill-aa.svn-base +103 -0
  140. data/ext/vcftools/perl/.svn/text-base/fill-an-ac.svn-base +56 -0
  141. data/ext/vcftools/perl/.svn/text-base/fill-ref-md5.svn-base +204 -0
  142. data/ext/vcftools/perl/.svn/text-base/tab-to-vcf.svn-base +92 -0
  143. data/ext/vcftools/perl/.svn/text-base/test.t.svn-base +376 -0
  144. data/ext/vcftools/perl/.svn/text-base/vcf-annotate.svn-base +1099 -0
  145. data/ext/vcftools/perl/.svn/text-base/vcf-compare.svn-base +1193 -0
  146. data/ext/vcftools/perl/.svn/text-base/vcf-concat.svn-base +310 -0
  147. data/ext/vcftools/perl/.svn/text-base/vcf-convert.svn-base +180 -0
  148. data/ext/vcftools/perl/.svn/text-base/vcf-fix-newlines.svn-base +97 -0
  149. data/ext/vcftools/perl/.svn/text-base/vcf-isec.svn-base +660 -0
  150. data/ext/vcftools/perl/.svn/text-base/vcf-merge.svn-base +577 -0
  151. data/ext/vcftools/perl/.svn/text-base/vcf-query.svn-base +272 -0
  152. data/ext/vcftools/perl/.svn/text-base/vcf-shuffle-cols.svn-base +89 -0
  153. data/ext/vcftools/perl/.svn/text-base/vcf-sort.svn-base +79 -0
  154. data/ext/vcftools/perl/.svn/text-base/vcf-stats.svn-base +160 -0
  155. data/ext/vcftools/perl/.svn/text-base/vcf-subset.svn-base +206 -0
  156. data/ext/vcftools/perl/.svn/text-base/vcf-to-tab.svn-base +112 -0
  157. data/ext/vcftools/perl/.svn/text-base/vcf-validator.svn-base +145 -0
  158. data/ext/vcftools/perl/ChangeLog +84 -0
  159. data/ext/vcftools/perl/FaSlice.pm +214 -0
  160. data/ext/vcftools/perl/Makefile +12 -0
  161. data/ext/vcftools/perl/Vcf.pm +2853 -0
  162. data/ext/vcftools/perl/VcfStats.pm +681 -0
  163. data/ext/vcftools/perl/fill-aa +103 -0
  164. data/ext/vcftools/perl/fill-an-ac +56 -0
  165. data/ext/vcftools/perl/fill-ref-md5 +204 -0
  166. data/ext/vcftools/perl/tab-to-vcf +92 -0
  167. data/ext/vcftools/perl/test.t +376 -0
  168. data/ext/vcftools/perl/vcf-annotate +1099 -0
  169. data/ext/vcftools/perl/vcf-compare +1193 -0
  170. data/ext/vcftools/perl/vcf-concat +310 -0
  171. data/ext/vcftools/perl/vcf-convert +180 -0
  172. data/ext/vcftools/perl/vcf-fix-newlines +97 -0
  173. data/ext/vcftools/perl/vcf-isec +660 -0
  174. data/ext/vcftools/perl/vcf-merge +577 -0
  175. data/ext/vcftools/perl/vcf-query +286 -0
  176. data/ext/vcftools/perl/vcf-shuffle-cols +89 -0
  177. data/ext/vcftools/perl/vcf-sort +79 -0
  178. data/ext/vcftools/perl/vcf-stats +160 -0
  179. data/ext/vcftools/perl/vcf-subset +206 -0
  180. data/ext/vcftools/perl/vcf-to-tab +112 -0
  181. data/ext/vcftools/perl/vcf-validator +145 -0
  182. data/ext/vcftools/website/.svn/all-wcprops +41 -0
  183. data/ext/vcftools/website/.svn/entries +238 -0
  184. data/ext/vcftools/website/.svn/prop-base/VCF-poster.pdf.svn-base +5 -0
  185. data/ext/vcftools/website/.svn/prop-base/favicon.ico.svn-base +5 -0
  186. data/ext/vcftools/website/.svn/prop-base/favicon.png.svn-base +5 -0
  187. data/ext/vcftools/website/.svn/text-base/Makefile.svn-base +6 -0
  188. data/ext/vcftools/website/.svn/text-base/README.svn-base +2 -0
  189. data/ext/vcftools/website/.svn/text-base/VCF-poster.pdf.svn-base +0 -0
  190. data/ext/vcftools/website/.svn/text-base/default.css.svn-base +250 -0
  191. data/ext/vcftools/website/.svn/text-base/favicon.ico.svn-base +0 -0
  192. data/ext/vcftools/website/.svn/text-base/favicon.png.svn-base +0 -0
  193. data/ext/vcftools/website/Makefile +6 -0
  194. data/ext/vcftools/website/README +2 -0
  195. data/ext/vcftools/website/VCF-poster.pdf +0 -0
  196. data/ext/vcftools/website/default.css +250 -0
  197. data/ext/vcftools/website/favicon.ico +0 -0
  198. data/ext/vcftools/website/favicon.png +0 -0
  199. data/ext/vcftools/website/img/.svn/all-wcprops +53 -0
  200. data/ext/vcftools/website/img/.svn/entries +300 -0
  201. data/ext/vcftools/website/img/.svn/prop-base/bg.gif.svn-base +5 -0
  202. data/ext/vcftools/website/img/.svn/prop-base/bgcode.gif.svn-base +5 -0
  203. data/ext/vcftools/website/img/.svn/prop-base/bgcontainer.gif.svn-base +5 -0
  204. data/ext/vcftools/website/img/.svn/prop-base/bgul.gif.svn-base +5 -0
  205. data/ext/vcftools/website/img/.svn/prop-base/header.gif.svn-base +5 -0
  206. data/ext/vcftools/website/img/.svn/prop-base/li.gif.svn-base +5 -0
  207. data/ext/vcftools/website/img/.svn/prop-base/quote.gif.svn-base +5 -0
  208. data/ext/vcftools/website/img/.svn/prop-base/search.gif.svn-base +5 -0
  209. data/ext/vcftools/website/img/.svn/text-base/bg.gif.svn-base +0 -0
  210. data/ext/vcftools/website/img/.svn/text-base/bgcode.gif.svn-base +0 -0
  211. data/ext/vcftools/website/img/.svn/text-base/bgcontainer.gif.svn-base +0 -0
  212. data/ext/vcftools/website/img/.svn/text-base/bgul.gif.svn-base +0 -0
  213. data/ext/vcftools/website/img/.svn/text-base/header.gif.svn-base +0 -0
  214. data/ext/vcftools/website/img/.svn/text-base/li.gif.svn-base +0 -0
  215. data/ext/vcftools/website/img/.svn/text-base/quote.gif.svn-base +0 -0
  216. data/ext/vcftools/website/img/.svn/text-base/search.gif.svn-base +0 -0
  217. data/ext/vcftools/website/img/bg.gif +0 -0
  218. data/ext/vcftools/website/img/bgcode.gif +0 -0
  219. data/ext/vcftools/website/img/bgcontainer.gif +0 -0
  220. data/ext/vcftools/website/img/bgul.gif +0 -0
  221. data/ext/vcftools/website/img/header.gif +0 -0
  222. data/ext/vcftools/website/img/li.gif +0 -0
  223. data/ext/vcftools/website/img/quote.gif +0 -0
  224. data/ext/vcftools/website/img/search.gif +0 -0
  225. data/ext/vcftools/website/src/.svn/all-wcprops +53 -0
  226. data/ext/vcftools/website/src/.svn/entries +300 -0
  227. data/ext/vcftools/website/src/.svn/text-base/docs.inc.svn-base +202 -0
  228. data/ext/vcftools/website/src/.svn/text-base/index.inc.svn-base +52 -0
  229. data/ext/vcftools/website/src/.svn/text-base/index.php.svn-base +80 -0
  230. data/ext/vcftools/website/src/.svn/text-base/license.inc.svn-base +27 -0
  231. data/ext/vcftools/website/src/.svn/text-base/links.inc.svn-base +13 -0
  232. data/ext/vcftools/website/src/.svn/text-base/options.inc.svn-base +654 -0
  233. data/ext/vcftools/website/src/.svn/text-base/perl_module.inc.svn-base +249 -0
  234. data/ext/vcftools/website/src/.svn/text-base/specs.inc.svn-base +18 -0
  235. data/ext/vcftools/website/src/docs.inc +202 -0
  236. data/ext/vcftools/website/src/index.inc +52 -0
  237. data/ext/vcftools/website/src/index.php +80 -0
  238. data/ext/vcftools/website/src/license.inc +27 -0
  239. data/ext/vcftools/website/src/links.inc +13 -0
  240. data/ext/vcftools/website/src/options.inc +654 -0
  241. data/ext/vcftools/website/src/perl_module.inc +249 -0
  242. data/ext/vcftools/website/src/specs.inc +18 -0
  243. data/lib/config.ru +9 -0
  244. data/lib/ngs_server/add.rb +9 -0
  245. data/lib/ngs_server/version.rb +1 -1
  246. data/lib/ngs_server.rb +55 -3
  247. data/ngs_server.gemspec +5 -2
  248. metadata +296 -6
@@ -0,0 +1,681 @@
1
+ #
2
+ # Author: petr.danecek@sanger
3
+ #
4
+
5
+ =head1 NAME
6
+
7
+ VcfStats.pm. Module for collecting stats from VCF files.
8
+
9
+ =head1 SYNOPSIS
10
+
11
+ use VcfStats;
12
+
13
+ my $vstats = VcfStats->new(file=>'example.vcf.gz');
14
+ while (my $x=$vstats->next_data_hash())
15
+ {
16
+ $vstats->collect_stats($x);
17
+ }
18
+ $vstats->dump();
19
+
20
+ =cut
21
+
22
+ package VcfStats;
23
+ use strict;
24
+ use warnings;
25
+ use Carp;
26
+ use Data::Dumper;
27
+ use base qw(Vcf);
28
+
29
+ =head2 new
30
+
31
+ About : Creates new VcfStats.
32
+ Usage : my $vstats = VcfStats->new(file=>'my.vcf');
33
+ Args : See Vcf.pm
34
+
35
+ =cut
36
+
37
+ sub new
38
+ {
39
+ my ($class,@args) = @_;
40
+ my $self = $class->SUPER::new(@args);
41
+ for my $version (@{$$self{versions}})
42
+ {
43
+ if ( $self->isa($version) ) { eval "use base qw($version)"; }
44
+ }
45
+ bless($self,$class);
46
+ return $self;
47
+ }
48
+
49
+
50
+ sub parse_header
51
+ {
52
+ my ($self,@args) = @_;
53
+ $self->SUPER::parse_header(@args);
54
+ }
55
+
56
+ =head2 get_stats_key
57
+
58
+ About : Creates relevant stats hash key, used by select_stats
59
+ Usage :
60
+ Args [1]: Hash with filter definition (value to match, range, etc.)
61
+ [2]: Prefix of the stat
62
+ [3]: Value of the filter
63
+
64
+ =cut
65
+
66
+ sub get_stats_key
67
+ {
68
+ my ($self,$filter,$key,$value) = @_;
69
+
70
+ my $stat_key;
71
+ if ( $$filter{exact} )
72
+ {
73
+ if ( $value ne $$filter{value} ) { next; }
74
+ $stat_key = $key.'/'.$value;
75
+ }
76
+ elsif ( $value eq '.' ) { $stat_key = $key.'/.'; }
77
+ elsif ( $$filter{any} ) { $stat_key = $key.'/'.$value; }
78
+ elsif ( $$filter{bin} )
79
+ {
80
+ my $bin = int($value/$$filter{bin_size}) * $$filter{bin_size};
81
+ if ( $bin>$$filter{max} ) { $bin=">$$filter{max}"; }
82
+ $stat_key = $key.'/'.$bin;
83
+ }
84
+ else { $self->throw("TODO: $key...\n"); }
85
+ return $stat_key;
86
+ }
87
+
88
+ =head2 select_stats
89
+
90
+ About : Selects relevant stats hashes
91
+ Usage :
92
+ Args [1]: Hash record from next_data_hash
93
+ [2]: Filters
94
+
95
+ =cut
96
+
97
+ sub select_stats
98
+ {
99
+ my ($self,$rec,$filters) = @_;
100
+
101
+ if ( !exists($$self{stats}{all}) ) { $$self{stats}{all}={}; }
102
+ my @mandatory = ( $$self{stats}{all} );
103
+
104
+ my %samples;
105
+ for my $sample (keys %{$$rec{gtypes}})
106
+ {
107
+ if ( !exists($$self{stats}{samples}{$sample}) )
108
+ {
109
+ $$self{stats}{samples}{$sample} = {};
110
+ }
111
+ push @{$samples{$sample}}, $$self{stats}{samples}{$sample};
112
+ }
113
+
114
+ if ( !defined $filters ) { return (\@mandatory,\%samples); }
115
+
116
+ while (my ($key,$filter) = each %$filters)
117
+ {
118
+ if ( $key eq 'FILTER' )
119
+ {
120
+ for my $value (@{$$rec{FILTER}})
121
+ {
122
+ my $stats_key = $self->get_stats_key($filter,$key,$value);
123
+ if ( !exists($$self{stats}{$stats_key}) ) { $$self{stats}{$stats_key}={}; }
124
+ push @mandatory, $$self{stats}{$stats_key};
125
+ }
126
+ }
127
+ elsif ( $key eq 'QUAL' )
128
+ {
129
+ my $stats_key = $self->get_stats_key($filter,$key,$$rec{QUAL});
130
+ if ( !exists($$self{stats}{$stats_key}) ) { $$self{stats}{$stats_key}={}; }
131
+ push @mandatory, $$self{stats}{$stats_key};
132
+ }
133
+ elsif ( $key=~m{^INFO/} )
134
+ {
135
+ if ( $$filter{is_flag} )
136
+ {
137
+ if ( $$filter{value} && !exists($$rec{INFO}{$$filter{tag}}) ) { next; }
138
+ elsif ( !$$filter{value} && exists($$rec{INFO}{$$filter{tag}}) ) { next; }
139
+ if ( !exists($$self{stats}{$key}) ) { $$self{stats}{$key}={}; }
140
+ push @mandatory, $$self{stats}{$key};
141
+ next;
142
+ }
143
+ elsif ( exists($$rec{INFO}{$$filter{tag}}) )
144
+ {
145
+ my $stats_key = $self->get_stats_key($filter,$key,$$rec{INFO}{$$filter{tag}});
146
+ if ( !exists($$self{stats}{$stats_key}) ) { $$self{stats}{$stats_key}={}; }
147
+ push @mandatory, $$self{stats}{$stats_key};
148
+ }
149
+ }
150
+ elsif ( $key=~m{^FORMAT/([^/]+)$} )
151
+ {
152
+ while (my ($sample,$hash) = each %{$$rec{gtypes}})
153
+ {
154
+ if ( !exists($$hash{$1}) ) { next; }
155
+ my $stats_key = $self->get_stats_key($filter,$1,$$hash{$1});
156
+ if ( !exists($$self{stats}{samples}{$sample}{user}{$stats_key}) ) { $$self{stats}{samples}{$sample}{user}{$stats_key}={}; }
157
+ push @{$samples{$sample}}, $$self{stats}{samples}{$sample}{user}{$stats_key};
158
+ }
159
+ }
160
+ elsif ( $key=~m{^SAMPLE/([^/]+)/([^/]+)$} )
161
+ {
162
+ if ( !exists($$rec{gtypes}{$1}{$2}) ) { next; }
163
+ my $stats_key = $self->get_stats_key($filter,$2,$$rec{gtypes}{$1}{$2});
164
+ if ( !exists($$self{stats}{samples}{$1}{user}{$stats_key}) ) { $$self{stats}{samples}{$1}{user}{$stats_key}={} }
165
+ push @{$samples{$1}}, $$self{stats}{samples}{$1}{user}{$stats_key};
166
+ }
167
+ else { $self->throw("The feature currently not recognised: $key.\n"); }
168
+ }
169
+ return (\@mandatory,\%samples);
170
+ }
171
+
172
+ =head2 collect_stats
173
+
174
+ About : Collect stats
175
+ Usage : my $x=$vstats->next_data_hash(); $vstats->collect_stats($x);
176
+ Args :
177
+
178
+ =cut
179
+
180
+ sub collect_stats
181
+ {
182
+ my ($self,$rec,$filters) = @_;
183
+
184
+ # Ts/Tv and custom numbers based on INFO, QUAL etc. for the mandatory columns
185
+ my ($mandatory_stats,$sample_stats) = $self->select_stats($rec,$filters);
186
+ $self->collect_stats_mandatory($rec,$mandatory_stats);
187
+
188
+ # Ts/Tv for samples
189
+ while (my ($sample,$stats) = each %$sample_stats)
190
+ {
191
+ $self->collect_stats_sample($rec,$sample,$stats);
192
+ }
193
+
194
+ my %type_keys = ( r=>'ref', s=>'snp', i=>'indel' );
195
+
196
+ # Private calls and the number of shared SNPs. Check if:
197
+ # - there is a nonref variant present only in this sample (samples->sample_name->private)
198
+ # - there is a nonref variant in N samples (samples->all->shared)
199
+ # - there is a non-empty call (samples->sample_name->count)
200
+ my $shared = 0;
201
+ my $sample_name;
202
+ while (my ($sample,$stats) = each %$sample_stats)
203
+ {
204
+ my ($alleles,$seps,$is_phased,$is_empty) = $self->parse_haplotype($rec,$sample);
205
+ if ( $is_empty ) { next; }
206
+
207
+ my $is_hom=1;
208
+ my %types;
209
+ my $is_ref = 1;
210
+ for my $al (@$alleles)
211
+ {
212
+ if ( $$alleles[0] ne $al ) { $is_hom=0; }
213
+ my ($type,$len,$ht) = $self->event_type($rec,$al);
214
+ $types{$type} = 1;
215
+ if ( $type eq 'r' ) { next; }
216
+ $is_ref = 0;
217
+ }
218
+ for my $stat (@$stats) { $$stat{count}++; }
219
+
220
+ for my $type (keys %types)
221
+ {
222
+ my $key = exists($type_keys{$type}) ? $type_keys{$type} : 'other';
223
+ $key .= '_count';
224
+ for my $stat (@$stats) { $$stat{$key}++; }
225
+ }
226
+ my $key;
227
+ if ( exists($types{r}) )
228
+ {
229
+ if ( $is_hom ) { $key='hom_RR'; }
230
+ else { $key='het_RA' }
231
+ }
232
+ elsif ( $is_hom ) { $key='hom_AA'; }
233
+ else { $key='het_AA'; }
234
+ $key .= '_count';
235
+ for my $stat (@$stats) { $$stat{$key}++; }
236
+
237
+ $key = $is_phased ? 'phased' : 'unphased';
238
+ for my $stat (@$stats) { $$stat{$key}++; }
239
+
240
+ if ( $is_ref ) { next; }
241
+ $shared++;
242
+ if ( !defined $sample_name ) { $sample_name = $sample; }
243
+ }
244
+ $$self{stats}{all}{shared}{$shared}++;
245
+ if ( $shared==1 )
246
+ {
247
+ for my $stat (@{$$sample_stats{$sample_name}}) { $$stat{private}++; }
248
+ }
249
+ }
250
+
251
+
252
+ =head2 collect_stats_mandatory
253
+
254
+ About : Collect stats based on mandatory columns
255
+ Usage : my $x=$vstats->next_data_hash(); $vstats->collect_stats_mandatory($x);
256
+ Args :
257
+
258
+ =cut
259
+
260
+ sub collect_stats_mandatory
261
+ {
262
+ my ($self,$rec,$stats) = @_;
263
+
264
+ # How many mono,bi,tri-allelic etc sites are there
265
+ my $nalt = 0;
266
+ if ( !scalar keys %{$$rec{gtypes}} )
267
+ {
268
+ $nalt = scalar @{$$rec{ALT}};
269
+ if ( $nalt==1 && $$rec{ALT}[0] eq '.' ) { $nalt=0 }
270
+ }
271
+ elsif ( exists($$rec{INFO}{AC}) )
272
+ {
273
+ for my $ac (split(/,/,$$rec{INFO}{AC}))
274
+ {
275
+ if ( $ac ) { $nalt++; }
276
+ }
277
+ }
278
+ else
279
+ {
280
+ my ($an,$ac,$acs) = $self->calc_an_ac($$rec{gtypes});
281
+ for my $ac (@$acs)
282
+ {
283
+ if ( $ac ) { $nalt++; }
284
+ }
285
+ }
286
+
287
+ my %types;
288
+ for my $alt (@{$$rec{ALT}})
289
+ {
290
+ if ( $alt eq '.' ) { $alt=$$rec{REF}; }
291
+ my $type = $self->add_variant($rec,$alt,$stats);
292
+ $types{$type} = 1;
293
+ }
294
+
295
+
296
+ # Increment counters
297
+ for my $stat (@$stats)
298
+ {
299
+ $$stat{'nalt_'.$nalt}++;
300
+ $$stat{count}++;
301
+ for my $type (keys %types)
302
+ {
303
+ $$stat{$type.'_count'}++;
304
+ }
305
+ }
306
+ }
307
+
308
+
309
+ =head2 collect_stats_sample
310
+
311
+ About : Collect stats for given sample
312
+ Usage : my $x=$vstats->next_data_hash(); $vstats->collect_stats_sample($x,'NA0001');
313
+ Args [1] hash row from next_data_hash
314
+ [2] sample name
315
+ [3] stats to collect
316
+
317
+ =cut
318
+
319
+ sub collect_stats_sample
320
+ {
321
+ my ($self,$rec,$sample,$stats) = @_;
322
+
323
+ my ($alleles,$seps,$is_phased,$is_empty) = $self->parse_haplotype($rec,$sample);
324
+ if ( @$alleles > 2 ) { $self->throw("FIXME: currently handling diploid data only (easy to fix)\n"); }
325
+ my $prev;
326
+ for my $al (@$alleles)
327
+ {
328
+ if ( !defined $prev or $prev ne $al )
329
+ {
330
+ # Only heterozygous SNPs will be counted twice
331
+ $self->add_variant($rec,$al,$stats);
332
+ }
333
+ $prev = $al;
334
+ }
335
+ }
336
+
337
+
338
+ =head2 add_variant
339
+
340
+ About : Register mutation type in the selected pool
341
+ Usage : $vstats->add_variant('A','AT',$stats);
342
+ $vstats->add_variant($rec,'AT',$stats);
343
+ Args [1] Reference haplotype or VCF data line parsed by next_data_hash
344
+ [2] Variant haplotype
345
+ [3] Array of hash stats
346
+ Returns : The event type (snp,indel,ref)
347
+
348
+ =cut
349
+
350
+ sub add_variant
351
+ {
352
+ my ($self,$ref,$alt,$stats) = @_;
353
+ my $key_type = 'other';
354
+ my %key_subt;
355
+ if ( $alt eq '.' )
356
+ {
357
+ $key_type = 'missing';
358
+ }
359
+ else
360
+ {
361
+ my ($type,$len,$ht) = $self->event_type($ref,$alt);
362
+ if ( $type eq 's' )
363
+ {
364
+ $key_type = 'snp';
365
+
366
+ # The SNP can be encoded for example as GTTTTTTT>CTTTTTTT
367
+ my $ref_str = ref($ref) eq 'HASH' ? $$ref{REF} : $ref;
368
+ my $ref_len = length($ref_str);
369
+ if ( $ref_len>1 )
370
+ {
371
+ for (my $i=0; $i<$ref_len; $i++)
372
+ {
373
+ my $ref_nt = substr($ref_str,$i,1);
374
+ my $alt_nt = substr($alt,$i,1);
375
+ if ( $ref_nt ne $alt_nt )
376
+ {
377
+ $key_subt{$ref_nt.'>'.$alt_nt}++;
378
+ }
379
+ }
380
+ }
381
+ else
382
+ {
383
+ $key_subt{$ref_str.'>'.$alt}++;
384
+ }
385
+ }
386
+ elsif ( $type eq 'i' ) { $key_type = 'indel'; $key_subt{$len}++; }
387
+ elsif ( $type eq 'r' ) { $key_type = 'ref'; }
388
+ }
389
+ for my $stat (@$stats)
390
+ {
391
+ if ( %key_subt )
392
+ {
393
+ while (my ($subt,$value)=each %key_subt)
394
+ {
395
+ $$stat{$key_type}{$subt}+=$value;
396
+ }
397
+ }
398
+ else
399
+ {
400
+ $$stat{$key_type}++;
401
+ }
402
+ }
403
+ return $key_type;
404
+ }
405
+
406
+
407
+ =head2 dump
408
+
409
+ About : Produce Data::Dumper dump of the collected stats
410
+ Usage :
411
+ Args :
412
+ Returns : The dump.
413
+
414
+ =cut
415
+
416
+ sub dump
417
+ {
418
+ my ($self) = @_;
419
+ return Dumper($$self{stats});
420
+ }
421
+
422
+
423
+ sub _calc_tstv
424
+ {
425
+ my ($self,$stat) = @_;
426
+
427
+ my $ts = 0;
428
+ for my $mut qw(A>G G>A C>T T>C)
429
+ {
430
+ if ( exists($$stat{$mut}) ) { $ts += $$stat{$mut}; }
431
+ }
432
+ my $tv = 0;
433
+ for my $mut qw(A>C C>A G>T T>G A>T T>A C>G G>C)
434
+ {
435
+ if ( exists($$stat{$mut}) ) { $tv += $$stat{$mut}; }
436
+ }
437
+ my $ratio = $tv ? $ts/$tv : 0;
438
+ return ($ts,$tv,$ratio);
439
+ }
440
+
441
+
442
+ =head2 dump_tstv
443
+
444
+ About : Calculate transitions/transversions ratio and output string
445
+ Usage :
446
+ Args :
447
+ Returns : Formatted string
448
+
449
+ =cut
450
+
451
+ sub dump_tstv
452
+ {
453
+ my ($self,$stats) = @_;
454
+ my $out = "#Transitions\tTransversions\tts/tv\tSample\n";
455
+ for my $key (sort keys %$stats)
456
+ {
457
+ if ( !exists($$stats{$key}{snp}) ) { next; }
458
+ my $stat = $$stats{$key}{snp};
459
+ my ($ts,$tv,$ratio) = $self->_calc_tstv($stat);
460
+ $out .= sprintf "%d\t%d\t%.2f\t%s\n", $ts,$tv,$ratio,$key;
461
+ }
462
+ return $out;
463
+ }
464
+
465
+
466
+ =head2 dump_qual_tstv
467
+
468
+ About : Calculate marginal transitions/transversions ratios for QUAL/* stats
469
+ Usage :
470
+ Args :
471
+ Returns : Formatted string
472
+
473
+ =cut
474
+
475
+ sub dump_qual_tstv
476
+ {
477
+ my ($self,$file) = @_;
478
+ my @values;
479
+ for my $stat (keys %{$$self{stats}})
480
+ {
481
+ if ( !($stat=~m{^QUAL/(.+)}) ) { next; }
482
+ my $qual = $1;
483
+ # The quality record can be also of the form ">200". Exclude these from numeric comparison
484
+ if ( !($qual=~/^[0-9.]+$/) ) { $qual = "#$qual"; }
485
+ my $count = $$self{stats}{$stat}{count};
486
+ if ( !exists($$self{stats}{$stat}{snp}) ) { next; }
487
+ my ($ts,$tv,$ratio) = $self->_calc_tstv($$self{stats}{$stat}{snp});
488
+ push @values, [$qual,$count,$ratio];
489
+ }
490
+ my @svalues = sort { if ($$a[0]=~/^#/ or $$b[0]=~/^#/) { return $$a[0] cmp $$b[0]; } return $$a[0] <=> $$b[0]; } @values;
491
+ my $out = "#Quality\tMarginal count\tMarginal Ts/Tv\n";
492
+ for my $val (@svalues)
493
+ {
494
+ if ( $$val[0]=~/^#/ )
495
+ {
496
+ $out .= sprintf "%s\t%d\t%.2f\n", $$val[0],$$val[1],$$val[2];
497
+ }
498
+ else
499
+ {
500
+ $out .= sprintf "%.2f\t%d\t%.2f\n", $$val[0],$$val[1],$$val[2];
501
+ }
502
+ }
503
+ return $out;
504
+ }
505
+
506
+
507
+ =head2 dump_counts
508
+
509
+ About :
510
+ Usage :
511
+ Args :
512
+ Returns : Formatted string
513
+
514
+ =cut
515
+
516
+ sub dump_counts
517
+ {
518
+ my ($self) = @_;
519
+ my $out = "#Count\tFilter\n";
520
+ for my $key (sort keys %{$$self{stats}})
521
+ {
522
+ if ( !exists($$self{stats}{$key}{count}) ) { next; }
523
+ $out .= sprintf "%d\t%s\n", $$self{stats}{$key}{count},$key;
524
+ }
525
+ for my $key (sort keys %{$$self{stats}{samples}})
526
+ {
527
+ if ( !exists($$self{stats}{samples}{$key}{count}) ) { next; }
528
+ $out .= sprintf "%d\tsamples/%s\n", $$self{stats}{samples}{$key}{count},$key;
529
+ }
530
+ return $out;
531
+ }
532
+
533
+ sub dump_snp_counts
534
+ {
535
+ my ($self) = @_;
536
+ my $out = "#Count\tFilter\n";
537
+ for my $key (sort keys %{$$self{stats}})
538
+ {
539
+ if ( !exists($$self{stats}{$key}{snp_count}) ) { next; }
540
+ $out .= sprintf "%d\t%s\n",$$self{stats}{$key}{snp_count},$key;
541
+ }
542
+ for my $key (sort keys %{$$self{stats}{samples}})
543
+ {
544
+ if ( !exists($$self{stats}{samples}{$key}{snp_count}) ) { next; }
545
+ $out .= sprintf "%d\tsamples/%s\n", $$self{stats}{samples}{$key}{snp_count},$key;
546
+ }
547
+ return $out;
548
+ }
549
+
550
+ sub dump_indel_counts
551
+ {
552
+ my ($self) = @_;
553
+ my $out = "#Count\tFilter\n";
554
+ for my $key (sort keys %{$$self{stats}})
555
+ {
556
+ if ( !exists($$self{stats}{$key}{indel_count}) ) { next; }
557
+ $out .= sprintf "%d\t%s\n",$$self{stats}{$key}{indel_count},$key;
558
+ }
559
+ for my $key (sort keys %{$$self{stats}{samples}})
560
+ {
561
+ if ( !exists($$self{stats}{samples}{$key}{indel_count}) ) { next; }
562
+ $out .= sprintf "%d\tsamples/%s\n", $$self{stats}{samples}{$key}{indel_count},$key;
563
+ }
564
+ return $out;
565
+ }
566
+
567
+ sub dump_shared_counts
568
+ {
569
+ my ($self) = @_;
570
+ my $out = "#Shared SNPs\tFrequency\n";
571
+ for my $key (sort {$a<=>$b} keys %{$$self{stats}{all}{shared}})
572
+ {
573
+ $out .= sprintf "%d\t%s\n", $key,$$self{stats}{all}{shared}{$key};
574
+ }
575
+ return $out;
576
+ }
577
+
578
+ sub dump_private_counts
579
+ {
580
+ my ($self) = @_;
581
+ my $out = "#Private SNPs\tSample\n";
582
+ for my $key (sort keys %{$$self{stats}{samples}})
583
+ {
584
+ if ( !exists($$self{stats}{samples}{$key}{private}) ) { next; }
585
+ $out .= sprintf "%d\t%s\n", $$self{stats}{samples}{$key}{private},$key;
586
+ }
587
+ return $out;
588
+ }
589
+
590
+ sub _init_path
591
+ {
592
+ my ($self,$prefix) = @_;
593
+ if ( $prefix=~m{/} )
594
+ {
595
+ # A directory should be created. This will populate dir and prefix, for example
596
+ # prefix -> dir prefix
597
+ # ----------------------------
598
+ # out out.dump
599
+ # out/ out/ out/out.dump
600
+ # out/xxx out/ out/xxx.dump
601
+ #
602
+ my $dir = '';
603
+ if ( $prefix=~m{/[^/]+$} ) { $dir=$`; }
604
+ elsif ( $prefix=~m{/([^/]+)/$} ) { $dir = $`.'/'.$1; $prefix = $dir.'/'.$1; }
605
+ elsif ( $prefix=~m{([^/]+)/?$} ) { $dir=$1; $prefix=$dir.'/'.$1; }
606
+ if ( $dir ) { `mkdir -p $dir`; }
607
+ }
608
+ return $prefix;
609
+ }
610
+
611
+ sub legend
612
+ {
613
+ my ($self) = @_;
614
+ return q[
615
+ count
616
+ Number of positions with known genotype
617
+
618
+ nalt_X
619
+ Number of monoallelic (X=0), biallelic (X=1), etc. sites
620
+
621
+ ref, ref_count
622
+ Number of sites containing reference allele
623
+
624
+ shared
625
+ Number of sites having a non-reference allele in 0,1,2,etc samples
626
+
627
+ snp_count
628
+ Number of positions with SNPs
629
+ ];
630
+ }
631
+
632
+
633
+
634
+ =head2 save_stats
635
+
636
+ About : Save all collected stats to files
637
+ Usage :
638
+ Args : The prefix of output files. Non-existent directories will be created.
639
+ Returns : N/A
640
+
641
+ =cut
642
+
643
+ sub save_stats
644
+ {
645
+ my ($self,$prefix) = @_;
646
+
647
+ if ( !defined $prefix )
648
+ {
649
+ print $self->dump();
650
+ return;
651
+ }
652
+
653
+ my $path = $self->_init_path($prefix);
654
+ $self->_write_file($path.'.legend', $self->legend());
655
+ $self->_write_file($path.'.dump', $self->dump());
656
+ $self->_write_file($path.'.tstv', $self->dump_tstv($$self{stats}));
657
+ $self->_write_file($path.'.counts', $self->dump_counts());
658
+ $self->_write_file($path.'.snps', $self->dump_snp_counts());
659
+ $self->_write_file($path.'.indels', $self->dump_indel_counts());
660
+ $self->_write_file($path.'.qual-tstv',$self->dump_qual_tstv);
661
+ $self->_write_file($path.'.shared',$self->dump_shared_counts());
662
+ $self->_write_file($path.'.private',$self->dump_private_counts());
663
+
664
+ if ( exists($$self{stats}{samples}) )
665
+ {
666
+ $self->_write_file($path.'.samples-tstv',$self->dump_tstv($$self{stats}{samples}));
667
+ }
668
+ }
669
+
670
+
671
+ sub _write_file
672
+ {
673
+ my ($self,$fname,$text) = @_;
674
+ open(my $fh,'>',$fname) or $self->throw("$fname: $!");
675
+ print $fh $text;
676
+ close($fh);
677
+ }
678
+
679
+
680
+ 1;
681
+