ngs_server 0.1 → 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. data/bin/ngs_server +72 -50
  2. data/ext/bamtools/extconf.rb +3 -3
  3. data/ext/vcftools/Makefile +28 -0
  4. data/ext/vcftools/README.txt +36 -0
  5. data/ext/vcftools/cpp/.svn/all-wcprops +125 -0
  6. data/ext/vcftools/cpp/.svn/dir-prop-base +6 -0
  7. data/ext/vcftools/cpp/.svn/entries +708 -0
  8. data/ext/vcftools/cpp/.svn/text-base/Makefile.svn-base +46 -0
  9. data/ext/vcftools/cpp/.svn/text-base/dgeev.cpp.svn-base +146 -0
  10. data/ext/vcftools/cpp/.svn/text-base/dgeev.h.svn-base +43 -0
  11. data/ext/vcftools/cpp/.svn/text-base/output_log.cpp.svn-base +79 -0
  12. data/ext/vcftools/cpp/.svn/text-base/output_log.h.svn-base +34 -0
  13. data/ext/vcftools/cpp/.svn/text-base/parameters.cpp.svn-base +535 -0
  14. data/ext/vcftools/cpp/.svn/text-base/parameters.h.svn-base +154 -0
  15. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.cpp.svn-base +497 -0
  16. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.h.svn-base +190 -0
  17. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_getters.cpp.svn-base +421 -0
  18. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_setters.cpp.svn-base +482 -0
  19. data/ext/vcftools/cpp/.svn/text-base/vcf_file.cpp.svn-base +495 -0
  20. data/ext/vcftools/cpp/.svn/text-base/vcf_file.h.svn-base +184 -0
  21. data/ext/vcftools/cpp/.svn/text-base/vcf_file_diff.cpp.svn-base +1282 -0
  22. data/ext/vcftools/cpp/.svn/text-base/vcf_file_filters.cpp.svn-base +1215 -0
  23. data/ext/vcftools/cpp/.svn/text-base/vcf_file_format_convert.cpp.svn-base +1138 -0
  24. data/ext/vcftools/cpp/.svn/text-base/vcf_file_index.cpp.svn-base +171 -0
  25. data/ext/vcftools/cpp/.svn/text-base/vcf_file_output.cpp.svn-base +3012 -0
  26. data/ext/vcftools/cpp/.svn/text-base/vcftools.cpp.svn-base +107 -0
  27. data/ext/vcftools/cpp/.svn/text-base/vcftools.h.svn-base +25 -0
  28. data/ext/vcftools/cpp/Makefile +46 -0
  29. data/ext/vcftools/cpp/dgeev.cpp +146 -0
  30. data/ext/vcftools/cpp/dgeev.h +43 -0
  31. data/ext/vcftools/cpp/output_log.cpp +79 -0
  32. data/ext/vcftools/cpp/output_log.h +34 -0
  33. data/ext/vcftools/cpp/parameters.cpp +535 -0
  34. data/ext/vcftools/cpp/parameters.h +154 -0
  35. data/ext/vcftools/cpp/vcf_entry.cpp +497 -0
  36. data/ext/vcftools/cpp/vcf_entry.h +190 -0
  37. data/ext/vcftools/cpp/vcf_entry_getters.cpp +421 -0
  38. data/ext/vcftools/cpp/vcf_entry_setters.cpp +482 -0
  39. data/ext/vcftools/cpp/vcf_file.cpp +495 -0
  40. data/ext/vcftools/cpp/vcf_file.h +184 -0
  41. data/ext/vcftools/cpp/vcf_file_diff.cpp +1282 -0
  42. data/ext/vcftools/cpp/vcf_file_filters.cpp +1215 -0
  43. data/ext/vcftools/cpp/vcf_file_format_convert.cpp +1138 -0
  44. data/ext/vcftools/cpp/vcf_file_index.cpp +171 -0
  45. data/ext/vcftools/cpp/vcf_file_output.cpp +3012 -0
  46. data/ext/vcftools/cpp/vcftools.cpp +107 -0
  47. data/ext/vcftools/cpp/vcftools.h +25 -0
  48. data/ext/vcftools/examples/.svn/all-wcprops +185 -0
  49. data/ext/vcftools/examples/.svn/dir-prop-base +6 -0
  50. data/ext/vcftools/examples/.svn/entries +1048 -0
  51. data/ext/vcftools/examples/.svn/prop-base/perl-api-1.pl.svn-base +5 -0
  52. data/ext/vcftools/examples/.svn/text-base/annotate-test.vcf.svn-base +37 -0
  53. data/ext/vcftools/examples/.svn/text-base/annotate.out.svn-base +23 -0
  54. data/ext/vcftools/examples/.svn/text-base/annotate.txt.svn-base +7 -0
  55. data/ext/vcftools/examples/.svn/text-base/annotate2.out.svn-base +52 -0
  56. data/ext/vcftools/examples/.svn/text-base/annotate3.out.svn-base +23 -0
  57. data/ext/vcftools/examples/.svn/text-base/cmp-test-a-3.3.vcf.svn-base +12 -0
  58. data/ext/vcftools/examples/.svn/text-base/cmp-test-a.vcf.svn-base +12 -0
  59. data/ext/vcftools/examples/.svn/text-base/cmp-test-b-3.3.vcf.svn-base +12 -0
  60. data/ext/vcftools/examples/.svn/text-base/cmp-test-b.vcf.svn-base +12 -0
  61. data/ext/vcftools/examples/.svn/text-base/cmp-test.out.svn-base +53 -0
  62. data/ext/vcftools/examples/.svn/text-base/concat-a.vcf.svn-base +21 -0
  63. data/ext/vcftools/examples/.svn/text-base/concat-b.vcf.svn-base +13 -0
  64. data/ext/vcftools/examples/.svn/text-base/concat-c.vcf.svn-base +19 -0
  65. data/ext/vcftools/examples/.svn/text-base/concat.out.svn-base +39 -0
  66. data/ext/vcftools/examples/.svn/text-base/invalid-4.0.vcf.svn-base +31 -0
  67. data/ext/vcftools/examples/.svn/text-base/isec-n2-test.vcf.out.svn-base +19 -0
  68. data/ext/vcftools/examples/.svn/text-base/merge-test-a.vcf.svn-base +17 -0
  69. data/ext/vcftools/examples/.svn/text-base/merge-test-b.vcf.svn-base +17 -0
  70. data/ext/vcftools/examples/.svn/text-base/merge-test-c.vcf.svn-base +15 -0
  71. data/ext/vcftools/examples/.svn/text-base/merge-test.vcf.out.svn-base +31 -0
  72. data/ext/vcftools/examples/.svn/text-base/perl-api-1.pl.svn-base +46 -0
  73. data/ext/vcftools/examples/.svn/text-base/query-test.out.svn-base +6 -0
  74. data/ext/vcftools/examples/.svn/text-base/shuffle-test.vcf.svn-base +12 -0
  75. data/ext/vcftools/examples/.svn/text-base/subset.SNPs.out.svn-base +10 -0
  76. data/ext/vcftools/examples/.svn/text-base/subset.indels.out.svn-base +18 -0
  77. data/ext/vcftools/examples/.svn/text-base/subset.vcf.svn-base +21 -0
  78. data/ext/vcftools/examples/.svn/text-base/valid-3.3.vcf.svn-base +30 -0
  79. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.stats.svn-base +104 -0
  80. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.svn-base +34 -0
  81. data/ext/vcftools/examples/.svn/text-base/valid-4.1.vcf.svn-base +37 -0
  82. data/ext/vcftools/examples/annotate-test.vcf +37 -0
  83. data/ext/vcftools/examples/annotate.out +23 -0
  84. data/ext/vcftools/examples/annotate.txt +7 -0
  85. data/ext/vcftools/examples/annotate2.out +52 -0
  86. data/ext/vcftools/examples/annotate3.out +23 -0
  87. data/ext/vcftools/examples/cmp-test-a-3.3.vcf +12 -0
  88. data/ext/vcftools/examples/cmp-test-a.vcf +12 -0
  89. data/ext/vcftools/examples/cmp-test-b-3.3.vcf +12 -0
  90. data/ext/vcftools/examples/cmp-test-b.vcf +12 -0
  91. data/ext/vcftools/examples/cmp-test.out +53 -0
  92. data/ext/vcftools/examples/concat-a.vcf +21 -0
  93. data/ext/vcftools/examples/concat-b.vcf +13 -0
  94. data/ext/vcftools/examples/concat-c.vcf +19 -0
  95. data/ext/vcftools/examples/concat.out +39 -0
  96. data/ext/vcftools/examples/invalid-4.0.vcf +31 -0
  97. data/ext/vcftools/examples/isec-n2-test.vcf.out +19 -0
  98. data/ext/vcftools/examples/merge-test-a.vcf +17 -0
  99. data/ext/vcftools/examples/merge-test-b.vcf +17 -0
  100. data/ext/vcftools/examples/merge-test-c.vcf +15 -0
  101. data/ext/vcftools/examples/merge-test.vcf.out +31 -0
  102. data/ext/vcftools/examples/perl-api-1.pl +46 -0
  103. data/ext/vcftools/examples/query-test.out +6 -0
  104. data/ext/vcftools/examples/shuffle-test.vcf +12 -0
  105. data/ext/vcftools/examples/subset.SNPs.out +10 -0
  106. data/ext/vcftools/examples/subset.indels.out +18 -0
  107. data/ext/vcftools/examples/subset.vcf +21 -0
  108. data/ext/vcftools/examples/valid-3.3.vcf +30 -0
  109. data/ext/vcftools/examples/valid-4.0.vcf +34 -0
  110. data/ext/vcftools/examples/valid-4.0.vcf.stats +104 -0
  111. data/ext/vcftools/examples/valid-4.1.vcf +37 -0
  112. data/ext/vcftools/extconf.rb +2 -0
  113. data/ext/vcftools/perl/.svn/all-wcprops +149 -0
  114. data/ext/vcftools/perl/.svn/entries +844 -0
  115. data/ext/vcftools/perl/.svn/prop-base/fill-aa.svn-base +5 -0
  116. data/ext/vcftools/perl/.svn/prop-base/fill-an-ac.svn-base +5 -0
  117. data/ext/vcftools/perl/.svn/prop-base/fill-ref-md5.svn-base +5 -0
  118. data/ext/vcftools/perl/.svn/prop-base/tab-to-vcf.svn-base +5 -0
  119. data/ext/vcftools/perl/.svn/prop-base/test.t.svn-base +5 -0
  120. data/ext/vcftools/perl/.svn/prop-base/vcf-annotate.svn-base +5 -0
  121. data/ext/vcftools/perl/.svn/prop-base/vcf-compare.svn-base +5 -0
  122. data/ext/vcftools/perl/.svn/prop-base/vcf-concat.svn-base +5 -0
  123. data/ext/vcftools/perl/.svn/prop-base/vcf-convert.svn-base +5 -0
  124. data/ext/vcftools/perl/.svn/prop-base/vcf-fix-newlines.svn-base +5 -0
  125. data/ext/vcftools/perl/.svn/prop-base/vcf-isec.svn-base +5 -0
  126. data/ext/vcftools/perl/.svn/prop-base/vcf-merge.svn-base +5 -0
  127. data/ext/vcftools/perl/.svn/prop-base/vcf-query.svn-base +5 -0
  128. data/ext/vcftools/perl/.svn/prop-base/vcf-shuffle-cols.svn-base +5 -0
  129. data/ext/vcftools/perl/.svn/prop-base/vcf-sort.svn-base +5 -0
  130. data/ext/vcftools/perl/.svn/prop-base/vcf-stats.svn-base +5 -0
  131. data/ext/vcftools/perl/.svn/prop-base/vcf-subset.svn-base +5 -0
  132. data/ext/vcftools/perl/.svn/prop-base/vcf-to-tab.svn-base +5 -0
  133. data/ext/vcftools/perl/.svn/prop-base/vcf-validator.svn-base +5 -0
  134. data/ext/vcftools/perl/.svn/text-base/ChangeLog.svn-base +84 -0
  135. data/ext/vcftools/perl/.svn/text-base/FaSlice.pm.svn-base +214 -0
  136. data/ext/vcftools/perl/.svn/text-base/Makefile.svn-base +12 -0
  137. data/ext/vcftools/perl/.svn/text-base/Vcf.pm.svn-base +2853 -0
  138. data/ext/vcftools/perl/.svn/text-base/VcfStats.pm.svn-base +681 -0
  139. data/ext/vcftools/perl/.svn/text-base/fill-aa.svn-base +103 -0
  140. data/ext/vcftools/perl/.svn/text-base/fill-an-ac.svn-base +56 -0
  141. data/ext/vcftools/perl/.svn/text-base/fill-ref-md5.svn-base +204 -0
  142. data/ext/vcftools/perl/.svn/text-base/tab-to-vcf.svn-base +92 -0
  143. data/ext/vcftools/perl/.svn/text-base/test.t.svn-base +376 -0
  144. data/ext/vcftools/perl/.svn/text-base/vcf-annotate.svn-base +1099 -0
  145. data/ext/vcftools/perl/.svn/text-base/vcf-compare.svn-base +1193 -0
  146. data/ext/vcftools/perl/.svn/text-base/vcf-concat.svn-base +310 -0
  147. data/ext/vcftools/perl/.svn/text-base/vcf-convert.svn-base +180 -0
  148. data/ext/vcftools/perl/.svn/text-base/vcf-fix-newlines.svn-base +97 -0
  149. data/ext/vcftools/perl/.svn/text-base/vcf-isec.svn-base +660 -0
  150. data/ext/vcftools/perl/.svn/text-base/vcf-merge.svn-base +577 -0
  151. data/ext/vcftools/perl/.svn/text-base/vcf-query.svn-base +272 -0
  152. data/ext/vcftools/perl/.svn/text-base/vcf-shuffle-cols.svn-base +89 -0
  153. data/ext/vcftools/perl/.svn/text-base/vcf-sort.svn-base +79 -0
  154. data/ext/vcftools/perl/.svn/text-base/vcf-stats.svn-base +160 -0
  155. data/ext/vcftools/perl/.svn/text-base/vcf-subset.svn-base +206 -0
  156. data/ext/vcftools/perl/.svn/text-base/vcf-to-tab.svn-base +112 -0
  157. data/ext/vcftools/perl/.svn/text-base/vcf-validator.svn-base +145 -0
  158. data/ext/vcftools/perl/ChangeLog +84 -0
  159. data/ext/vcftools/perl/FaSlice.pm +214 -0
  160. data/ext/vcftools/perl/Makefile +12 -0
  161. data/ext/vcftools/perl/Vcf.pm +2853 -0
  162. data/ext/vcftools/perl/VcfStats.pm +681 -0
  163. data/ext/vcftools/perl/fill-aa +103 -0
  164. data/ext/vcftools/perl/fill-an-ac +56 -0
  165. data/ext/vcftools/perl/fill-ref-md5 +204 -0
  166. data/ext/vcftools/perl/tab-to-vcf +92 -0
  167. data/ext/vcftools/perl/test.t +376 -0
  168. data/ext/vcftools/perl/vcf-annotate +1099 -0
  169. data/ext/vcftools/perl/vcf-compare +1193 -0
  170. data/ext/vcftools/perl/vcf-concat +310 -0
  171. data/ext/vcftools/perl/vcf-convert +180 -0
  172. data/ext/vcftools/perl/vcf-fix-newlines +97 -0
  173. data/ext/vcftools/perl/vcf-isec +660 -0
  174. data/ext/vcftools/perl/vcf-merge +577 -0
  175. data/ext/vcftools/perl/vcf-query +286 -0
  176. data/ext/vcftools/perl/vcf-shuffle-cols +89 -0
  177. data/ext/vcftools/perl/vcf-sort +79 -0
  178. data/ext/vcftools/perl/vcf-stats +160 -0
  179. data/ext/vcftools/perl/vcf-subset +206 -0
  180. data/ext/vcftools/perl/vcf-to-tab +112 -0
  181. data/ext/vcftools/perl/vcf-validator +145 -0
  182. data/ext/vcftools/website/.svn/all-wcprops +41 -0
  183. data/ext/vcftools/website/.svn/entries +238 -0
  184. data/ext/vcftools/website/.svn/prop-base/VCF-poster.pdf.svn-base +5 -0
  185. data/ext/vcftools/website/.svn/prop-base/favicon.ico.svn-base +5 -0
  186. data/ext/vcftools/website/.svn/prop-base/favicon.png.svn-base +5 -0
  187. data/ext/vcftools/website/.svn/text-base/Makefile.svn-base +6 -0
  188. data/ext/vcftools/website/.svn/text-base/README.svn-base +2 -0
  189. data/ext/vcftools/website/.svn/text-base/VCF-poster.pdf.svn-base +0 -0
  190. data/ext/vcftools/website/.svn/text-base/default.css.svn-base +250 -0
  191. data/ext/vcftools/website/.svn/text-base/favicon.ico.svn-base +0 -0
  192. data/ext/vcftools/website/.svn/text-base/favicon.png.svn-base +0 -0
  193. data/ext/vcftools/website/Makefile +6 -0
  194. data/ext/vcftools/website/README +2 -0
  195. data/ext/vcftools/website/VCF-poster.pdf +0 -0
  196. data/ext/vcftools/website/default.css +250 -0
  197. data/ext/vcftools/website/favicon.ico +0 -0
  198. data/ext/vcftools/website/favicon.png +0 -0
  199. data/ext/vcftools/website/img/.svn/all-wcprops +53 -0
  200. data/ext/vcftools/website/img/.svn/entries +300 -0
  201. data/ext/vcftools/website/img/.svn/prop-base/bg.gif.svn-base +5 -0
  202. data/ext/vcftools/website/img/.svn/prop-base/bgcode.gif.svn-base +5 -0
  203. data/ext/vcftools/website/img/.svn/prop-base/bgcontainer.gif.svn-base +5 -0
  204. data/ext/vcftools/website/img/.svn/prop-base/bgul.gif.svn-base +5 -0
  205. data/ext/vcftools/website/img/.svn/prop-base/header.gif.svn-base +5 -0
  206. data/ext/vcftools/website/img/.svn/prop-base/li.gif.svn-base +5 -0
  207. data/ext/vcftools/website/img/.svn/prop-base/quote.gif.svn-base +5 -0
  208. data/ext/vcftools/website/img/.svn/prop-base/search.gif.svn-base +5 -0
  209. data/ext/vcftools/website/img/.svn/text-base/bg.gif.svn-base +0 -0
  210. data/ext/vcftools/website/img/.svn/text-base/bgcode.gif.svn-base +0 -0
  211. data/ext/vcftools/website/img/.svn/text-base/bgcontainer.gif.svn-base +0 -0
  212. data/ext/vcftools/website/img/.svn/text-base/bgul.gif.svn-base +0 -0
  213. data/ext/vcftools/website/img/.svn/text-base/header.gif.svn-base +0 -0
  214. data/ext/vcftools/website/img/.svn/text-base/li.gif.svn-base +0 -0
  215. data/ext/vcftools/website/img/.svn/text-base/quote.gif.svn-base +0 -0
  216. data/ext/vcftools/website/img/.svn/text-base/search.gif.svn-base +0 -0
  217. data/ext/vcftools/website/img/bg.gif +0 -0
  218. data/ext/vcftools/website/img/bgcode.gif +0 -0
  219. data/ext/vcftools/website/img/bgcontainer.gif +0 -0
  220. data/ext/vcftools/website/img/bgul.gif +0 -0
  221. data/ext/vcftools/website/img/header.gif +0 -0
  222. data/ext/vcftools/website/img/li.gif +0 -0
  223. data/ext/vcftools/website/img/quote.gif +0 -0
  224. data/ext/vcftools/website/img/search.gif +0 -0
  225. data/ext/vcftools/website/src/.svn/all-wcprops +53 -0
  226. data/ext/vcftools/website/src/.svn/entries +300 -0
  227. data/ext/vcftools/website/src/.svn/text-base/docs.inc.svn-base +202 -0
  228. data/ext/vcftools/website/src/.svn/text-base/index.inc.svn-base +52 -0
  229. data/ext/vcftools/website/src/.svn/text-base/index.php.svn-base +80 -0
  230. data/ext/vcftools/website/src/.svn/text-base/license.inc.svn-base +27 -0
  231. data/ext/vcftools/website/src/.svn/text-base/links.inc.svn-base +13 -0
  232. data/ext/vcftools/website/src/.svn/text-base/options.inc.svn-base +654 -0
  233. data/ext/vcftools/website/src/.svn/text-base/perl_module.inc.svn-base +249 -0
  234. data/ext/vcftools/website/src/.svn/text-base/specs.inc.svn-base +18 -0
  235. data/ext/vcftools/website/src/docs.inc +202 -0
  236. data/ext/vcftools/website/src/index.inc +52 -0
  237. data/ext/vcftools/website/src/index.php +80 -0
  238. data/ext/vcftools/website/src/license.inc +27 -0
  239. data/ext/vcftools/website/src/links.inc +13 -0
  240. data/ext/vcftools/website/src/options.inc +654 -0
  241. data/ext/vcftools/website/src/perl_module.inc +249 -0
  242. data/ext/vcftools/website/src/specs.inc +18 -0
  243. data/lib/config.ru +9 -0
  244. data/lib/ngs_server/add.rb +9 -0
  245. data/lib/ngs_server/version.rb +1 -1
  246. data/lib/ngs_server.rb +55 -3
  247. data/ngs_server.gemspec +5 -2
  248. metadata +296 -6
@@ -0,0 +1,681 @@
1
+ #
2
+ # Author: petr.danecek@sanger
3
+ #
4
+
5
+ =head1 NAME
6
+
7
+ VcfStats.pm. Module for collecting stats from VCF files.
8
+
9
+ =head1 SYNOPSIS
10
+
11
+ use VcfStats;
12
+
13
+ my $vstats = VcfStats->new(file=>'example.vcf.gz');
14
+ while (my $x=$vstats->next_data_hash())
15
+ {
16
+ $vstats->collect_stats($x);
17
+ }
18
+ $vstats->dump();
19
+
20
+ =cut
21
+
22
+ package VcfStats;
23
+ use strict;
24
+ use warnings;
25
+ use Carp;
26
+ use Data::Dumper;
27
+ use base qw(Vcf);
28
+
29
+ =head2 new
30
+
31
+ About : Creates new VcfStats.
32
+ Usage : my $vstats = VcfStats->new(file=>'my.vcf');
33
+ Args : See Vcf.pm
34
+
35
+ =cut
36
+
37
+ sub new
38
+ {
39
+ my ($class,@args) = @_;
40
+ my $self = $class->SUPER::new(@args);
41
+ for my $version (@{$$self{versions}})
42
+ {
43
+ if ( $self->isa($version) ) { eval "use base qw($version)"; }
44
+ }
45
+ bless($self,$class);
46
+ return $self;
47
+ }
48
+
49
+
50
+ sub parse_header
51
+ {
52
+ my ($self,@args) = @_;
53
+ $self->SUPER::parse_header(@args);
54
+ }
55
+
56
+ =head2 get_stats_key
57
+
58
+ About : Creates relevant stats hash key, used by select_stats
59
+ Usage :
60
+ Args [1]: Hash with filter definition (value to match, range, etc.)
61
+ [2]: Prefix of the stat
62
+ [3]: Value of the filter
63
+
64
+ =cut
65
+
66
+ sub get_stats_key
67
+ {
68
+ my ($self,$filter,$key,$value) = @_;
69
+
70
+ my $stat_key;
71
+ if ( $$filter{exact} )
72
+ {
73
+ if ( $value ne $$filter{value} ) { next; }
74
+ $stat_key = $key.'/'.$value;
75
+ }
76
+ elsif ( $value eq '.' ) { $stat_key = $key.'/.'; }
77
+ elsif ( $$filter{any} ) { $stat_key = $key.'/'.$value; }
78
+ elsif ( $$filter{bin} )
79
+ {
80
+ my $bin = int($value/$$filter{bin_size}) * $$filter{bin_size};
81
+ if ( $bin>$$filter{max} ) { $bin=">$$filter{max}"; }
82
+ $stat_key = $key.'/'.$bin;
83
+ }
84
+ else { $self->throw("TODO: $key...\n"); }
85
+ return $stat_key;
86
+ }
87
+
88
+ =head2 select_stats
89
+
90
+ About : Selects relevant stats hashes
91
+ Usage :
92
+ Args [1]: Hash record from next_data_hash
93
+ [2]: Filters
94
+
95
+ =cut
96
+
97
+ sub select_stats
98
+ {
99
+ my ($self,$rec,$filters) = @_;
100
+
101
+ if ( !exists($$self{stats}{all}) ) { $$self{stats}{all}={}; }
102
+ my @mandatory = ( $$self{stats}{all} );
103
+
104
+ my %samples;
105
+ for my $sample (keys %{$$rec{gtypes}})
106
+ {
107
+ if ( !exists($$self{stats}{samples}{$sample}) )
108
+ {
109
+ $$self{stats}{samples}{$sample} = {};
110
+ }
111
+ push @{$samples{$sample}}, $$self{stats}{samples}{$sample};
112
+ }
113
+
114
+ if ( !defined $filters ) { return (\@mandatory,\%samples); }
115
+
116
+ while (my ($key,$filter) = each %$filters)
117
+ {
118
+ if ( $key eq 'FILTER' )
119
+ {
120
+ for my $value (@{$$rec{FILTER}})
121
+ {
122
+ my $stats_key = $self->get_stats_key($filter,$key,$value);
123
+ if ( !exists($$self{stats}{$stats_key}) ) { $$self{stats}{$stats_key}={}; }
124
+ push @mandatory, $$self{stats}{$stats_key};
125
+ }
126
+ }
127
+ elsif ( $key eq 'QUAL' )
128
+ {
129
+ my $stats_key = $self->get_stats_key($filter,$key,$$rec{QUAL});
130
+ if ( !exists($$self{stats}{$stats_key}) ) { $$self{stats}{$stats_key}={}; }
131
+ push @mandatory, $$self{stats}{$stats_key};
132
+ }
133
+ elsif ( $key=~m{^INFO/} )
134
+ {
135
+ if ( $$filter{is_flag} )
136
+ {
137
+ if ( $$filter{value} && !exists($$rec{INFO}{$$filter{tag}}) ) { next; }
138
+ elsif ( !$$filter{value} && exists($$rec{INFO}{$$filter{tag}}) ) { next; }
139
+ if ( !exists($$self{stats}{$key}) ) { $$self{stats}{$key}={}; }
140
+ push @mandatory, $$self{stats}{$key};
141
+ next;
142
+ }
143
+ elsif ( exists($$rec{INFO}{$$filter{tag}}) )
144
+ {
145
+ my $stats_key = $self->get_stats_key($filter,$key,$$rec{INFO}{$$filter{tag}});
146
+ if ( !exists($$self{stats}{$stats_key}) ) { $$self{stats}{$stats_key}={}; }
147
+ push @mandatory, $$self{stats}{$stats_key};
148
+ }
149
+ }
150
+ elsif ( $key=~m{^FORMAT/([^/]+)$} )
151
+ {
152
+ while (my ($sample,$hash) = each %{$$rec{gtypes}})
153
+ {
154
+ if ( !exists($$hash{$1}) ) { next; }
155
+ my $stats_key = $self->get_stats_key($filter,$1,$$hash{$1});
156
+ if ( !exists($$self{stats}{samples}{$sample}{user}{$stats_key}) ) { $$self{stats}{samples}{$sample}{user}{$stats_key}={}; }
157
+ push @{$samples{$sample}}, $$self{stats}{samples}{$sample}{user}{$stats_key};
158
+ }
159
+ }
160
+ elsif ( $key=~m{^SAMPLE/([^/]+)/([^/]+)$} )
161
+ {
162
+ if ( !exists($$rec{gtypes}{$1}{$2}) ) { next; }
163
+ my $stats_key = $self->get_stats_key($filter,$2,$$rec{gtypes}{$1}{$2});
164
+ if ( !exists($$self{stats}{samples}{$1}{user}{$stats_key}) ) { $$self{stats}{samples}{$1}{user}{$stats_key}={} }
165
+ push @{$samples{$1}}, $$self{stats}{samples}{$1}{user}{$stats_key};
166
+ }
167
+ else { $self->throw("The feature currently not recognised: $key.\n"); }
168
+ }
169
+ return (\@mandatory,\%samples);
170
+ }
171
+
172
+ =head2 collect_stats
173
+
174
+ About : Collect stats
175
+ Usage : my $x=$vstats->next_data_hash(); $vstats->collect_stats($x);
176
+ Args :
177
+
178
+ =cut
179
+
180
+ sub collect_stats
181
+ {
182
+ my ($self,$rec,$filters) = @_;
183
+
184
+ # Ts/Tv and custom numbers based on INFO, QUAL etc. for the mandatory columns
185
+ my ($mandatory_stats,$sample_stats) = $self->select_stats($rec,$filters);
186
+ $self->collect_stats_mandatory($rec,$mandatory_stats);
187
+
188
+ # Ts/Tv for samples
189
+ while (my ($sample,$stats) = each %$sample_stats)
190
+ {
191
+ $self->collect_stats_sample($rec,$sample,$stats);
192
+ }
193
+
194
+ my %type_keys = ( r=>'ref', s=>'snp', i=>'indel' );
195
+
196
+ # Private calls and the number of shared SNPs. Check if:
197
+ # - there is a nonref variant present only in this sample (samples->sample_name->private)
198
+ # - there is a nonref variant in N samples (samples->all->shared)
199
+ # - there is a non-empty call (samples->sample_name->count)
200
+ my $shared = 0;
201
+ my $sample_name;
202
+ while (my ($sample,$stats) = each %$sample_stats)
203
+ {
204
+ my ($alleles,$seps,$is_phased,$is_empty) = $self->parse_haplotype($rec,$sample);
205
+ if ( $is_empty ) { next; }
206
+
207
+ my $is_hom=1;
208
+ my %types;
209
+ my $is_ref = 1;
210
+ for my $al (@$alleles)
211
+ {
212
+ if ( $$alleles[0] ne $al ) { $is_hom=0; }
213
+ my ($type,$len,$ht) = $self->event_type($rec,$al);
214
+ $types{$type} = 1;
215
+ if ( $type eq 'r' ) { next; }
216
+ $is_ref = 0;
217
+ }
218
+ for my $stat (@$stats) { $$stat{count}++; }
219
+
220
+ for my $type (keys %types)
221
+ {
222
+ my $key = exists($type_keys{$type}) ? $type_keys{$type} : 'other';
223
+ $key .= '_count';
224
+ for my $stat (@$stats) { $$stat{$key}++; }
225
+ }
226
+ my $key;
227
+ if ( exists($types{r}) )
228
+ {
229
+ if ( $is_hom ) { $key='hom_RR'; }
230
+ else { $key='het_RA' }
231
+ }
232
+ elsif ( $is_hom ) { $key='hom_AA'; }
233
+ else { $key='het_AA'; }
234
+ $key .= '_count';
235
+ for my $stat (@$stats) { $$stat{$key}++; }
236
+
237
+ $key = $is_phased ? 'phased' : 'unphased';
238
+ for my $stat (@$stats) { $$stat{$key}++; }
239
+
240
+ if ( $is_ref ) { next; }
241
+ $shared++;
242
+ if ( !defined $sample_name ) { $sample_name = $sample; }
243
+ }
244
+ $$self{stats}{all}{shared}{$shared}++;
245
+ if ( $shared==1 )
246
+ {
247
+ for my $stat (@{$$sample_stats{$sample_name}}) { $$stat{private}++; }
248
+ }
249
+ }
250
+
251
+
252
+ =head2 collect_stats_mandatory
253
+
254
+ About : Collect stats based on mandatory columns
255
+ Usage : my $x=$vstats->next_data_hash(); $vstats->collect_stats_mandatory($x);
256
+ Args :
257
+
258
+ =cut
259
+
260
+ sub collect_stats_mandatory
261
+ {
262
+ my ($self,$rec,$stats) = @_;
263
+
264
+ # How many mono,bi,tri-allelic etc sites are there
265
+ my $nalt = 0;
266
+ if ( !scalar keys %{$$rec{gtypes}} )
267
+ {
268
+ $nalt = scalar @{$$rec{ALT}};
269
+ if ( $nalt==1 && $$rec{ALT}[0] eq '.' ) { $nalt=0 }
270
+ }
271
+ elsif ( exists($$rec{INFO}{AC}) )
272
+ {
273
+ for my $ac (split(/,/,$$rec{INFO}{AC}))
274
+ {
275
+ if ( $ac ) { $nalt++; }
276
+ }
277
+ }
278
+ else
279
+ {
280
+ my ($an,$ac,$acs) = $self->calc_an_ac($$rec{gtypes});
281
+ for my $ac (@$acs)
282
+ {
283
+ if ( $ac ) { $nalt++; }
284
+ }
285
+ }
286
+
287
+ my %types;
288
+ for my $alt (@{$$rec{ALT}})
289
+ {
290
+ if ( $alt eq '.' ) { $alt=$$rec{REF}; }
291
+ my $type = $self->add_variant($rec,$alt,$stats);
292
+ $types{$type} = 1;
293
+ }
294
+
295
+
296
+ # Increment counters
297
+ for my $stat (@$stats)
298
+ {
299
+ $$stat{'nalt_'.$nalt}++;
300
+ $$stat{count}++;
301
+ for my $type (keys %types)
302
+ {
303
+ $$stat{$type.'_count'}++;
304
+ }
305
+ }
306
+ }
307
+
308
+
309
+ =head2 collect_stats_sample
310
+
311
+ About : Collect stats for given sample
312
+ Usage : my $x=$vstats->next_data_hash(); $vstats->collect_stats_sample($x,'NA0001');
313
+ Args [1] hash row from next_data_hash
314
+ [2] sample name
315
+ [3] stats to collect
316
+
317
+ =cut
318
+
319
+ sub collect_stats_sample
320
+ {
321
+ my ($self,$rec,$sample,$stats) = @_;
322
+
323
+ my ($alleles,$seps,$is_phased,$is_empty) = $self->parse_haplotype($rec,$sample);
324
+ if ( @$alleles > 2 ) { $self->throw("FIXME: currently handling diploid data only (easy to fix)\n"); }
325
+ my $prev;
326
+ for my $al (@$alleles)
327
+ {
328
+ if ( !defined $prev or $prev ne $al )
329
+ {
330
+ # Only heterozygous SNPs will be counted twice
331
+ $self->add_variant($rec,$al,$stats);
332
+ }
333
+ $prev = $al;
334
+ }
335
+ }
336
+
337
+
338
+ =head2 add_variant
339
+
340
+ About : Register mutation type in the selected pool
341
+ Usage : $vstats->add_variant('A','AT',$stats);
342
+ $vstats->add_variant($rec,'AT',$stats);
343
+ Args [1] Reference haplotype or VCF data line parsed by next_data_hash
344
+ [2] Variant haplotype
345
+ [3] Array of hash stats
346
+ Returns : The event type (snp,indel,ref)
347
+
348
+ =cut
349
+
350
+ sub add_variant
351
+ {
352
+ my ($self,$ref,$alt,$stats) = @_;
353
+ my $key_type = 'other';
354
+ my %key_subt;
355
+ if ( $alt eq '.' )
356
+ {
357
+ $key_type = 'missing';
358
+ }
359
+ else
360
+ {
361
+ my ($type,$len,$ht) = $self->event_type($ref,$alt);
362
+ if ( $type eq 's' )
363
+ {
364
+ $key_type = 'snp';
365
+
366
+ # The SNP can be encoded for example as GTTTTTTT>CTTTTTTT
367
+ my $ref_str = ref($ref) eq 'HASH' ? $$ref{REF} : $ref;
368
+ my $ref_len = length($ref_str);
369
+ if ( $ref_len>1 )
370
+ {
371
+ for (my $i=0; $i<$ref_len; $i++)
372
+ {
373
+ my $ref_nt = substr($ref_str,$i,1);
374
+ my $alt_nt = substr($alt,$i,1);
375
+ if ( $ref_nt ne $alt_nt )
376
+ {
377
+ $key_subt{$ref_nt.'>'.$alt_nt}++;
378
+ }
379
+ }
380
+ }
381
+ else
382
+ {
383
+ $key_subt{$ref_str.'>'.$alt}++;
384
+ }
385
+ }
386
+ elsif ( $type eq 'i' ) { $key_type = 'indel'; $key_subt{$len}++; }
387
+ elsif ( $type eq 'r' ) { $key_type = 'ref'; }
388
+ }
389
+ for my $stat (@$stats)
390
+ {
391
+ if ( %key_subt )
392
+ {
393
+ while (my ($subt,$value)=each %key_subt)
394
+ {
395
+ $$stat{$key_type}{$subt}+=$value;
396
+ }
397
+ }
398
+ else
399
+ {
400
+ $$stat{$key_type}++;
401
+ }
402
+ }
403
+ return $key_type;
404
+ }
405
+
406
+
407
+ =head2 dump
408
+
409
+ About : Produce Data::Dumper dump of the collected stats
410
+ Usage :
411
+ Args :
412
+ Returns : The dump.
413
+
414
+ =cut
415
+
416
+ sub dump
417
+ {
418
+ my ($self) = @_;
419
+ return Dumper($$self{stats});
420
+ }
421
+
422
+
423
+ sub _calc_tstv
424
+ {
425
+ my ($self,$stat) = @_;
426
+
427
+ my $ts = 0;
428
+ for my $mut qw(A>G G>A C>T T>C)
429
+ {
430
+ if ( exists($$stat{$mut}) ) { $ts += $$stat{$mut}; }
431
+ }
432
+ my $tv = 0;
433
+ for my $mut qw(A>C C>A G>T T>G A>T T>A C>G G>C)
434
+ {
435
+ if ( exists($$stat{$mut}) ) { $tv += $$stat{$mut}; }
436
+ }
437
+ my $ratio = $tv ? $ts/$tv : 0;
438
+ return ($ts,$tv,$ratio);
439
+ }
440
+
441
+
442
+ =head2 dump_tstv
443
+
444
+ About : Calculate transitions/transversions ratio and output string
445
+ Usage :
446
+ Args :
447
+ Returns : Formatted string
448
+
449
+ =cut
450
+
451
+ sub dump_tstv
452
+ {
453
+ my ($self,$stats) = @_;
454
+ my $out = "#Transitions\tTransversions\tts/tv\tSample\n";
455
+ for my $key (sort keys %$stats)
456
+ {
457
+ if ( !exists($$stats{$key}{snp}) ) { next; }
458
+ my $stat = $$stats{$key}{snp};
459
+ my ($ts,$tv,$ratio) = $self->_calc_tstv($stat);
460
+ $out .= sprintf "%d\t%d\t%.2f\t%s\n", $ts,$tv,$ratio,$key;
461
+ }
462
+ return $out;
463
+ }
464
+
465
+
466
+ =head2 dump_qual_tstv
467
+
468
+ About : Calculate marginal transitions/transversions ratios for QUAL/* stats
469
+ Usage :
470
+ Args :
471
+ Returns : Formatted string
472
+
473
+ =cut
474
+
475
+ sub dump_qual_tstv
476
+ {
477
+ my ($self,$file) = @_;
478
+ my @values;
479
+ for my $stat (keys %{$$self{stats}})
480
+ {
481
+ if ( !($stat=~m{^QUAL/(.+)}) ) { next; }
482
+ my $qual = $1;
483
+ # The quality record can be also of the form ">200". Exclude these from numeric comparison
484
+ if ( !($qual=~/^[0-9.]+$/) ) { $qual = "#$qual"; }
485
+ my $count = $$self{stats}{$stat}{count};
486
+ if ( !exists($$self{stats}{$stat}{snp}) ) { next; }
487
+ my ($ts,$tv,$ratio) = $self->_calc_tstv($$self{stats}{$stat}{snp});
488
+ push @values, [$qual,$count,$ratio];
489
+ }
490
+ my @svalues = sort { if ($$a[0]=~/^#/ or $$b[0]=~/^#/) { return $$a[0] cmp $$b[0]; } return $$a[0] <=> $$b[0]; } @values;
491
+ my $out = "#Quality\tMarginal count\tMarginal Ts/Tv\n";
492
+ for my $val (@svalues)
493
+ {
494
+ if ( $$val[0]=~/^#/ )
495
+ {
496
+ $out .= sprintf "%s\t%d\t%.2f\n", $$val[0],$$val[1],$$val[2];
497
+ }
498
+ else
499
+ {
500
+ $out .= sprintf "%.2f\t%d\t%.2f\n", $$val[0],$$val[1],$$val[2];
501
+ }
502
+ }
503
+ return $out;
504
+ }
505
+
506
+
507
+ =head2 dump_counts
508
+
509
+ About :
510
+ Usage :
511
+ Args :
512
+ Returns : Formatted string
513
+
514
+ =cut
515
+
516
+ sub dump_counts
517
+ {
518
+ my ($self) = @_;
519
+ my $out = "#Count\tFilter\n";
520
+ for my $key (sort keys %{$$self{stats}})
521
+ {
522
+ if ( !exists($$self{stats}{$key}{count}) ) { next; }
523
+ $out .= sprintf "%d\t%s\n", $$self{stats}{$key}{count},$key;
524
+ }
525
+ for my $key (sort keys %{$$self{stats}{samples}})
526
+ {
527
+ if ( !exists($$self{stats}{samples}{$key}{count}) ) { next; }
528
+ $out .= sprintf "%d\tsamples/%s\n", $$self{stats}{samples}{$key}{count},$key;
529
+ }
530
+ return $out;
531
+ }
532
+
533
+ sub dump_snp_counts
534
+ {
535
+ my ($self) = @_;
536
+ my $out = "#Count\tFilter\n";
537
+ for my $key (sort keys %{$$self{stats}})
538
+ {
539
+ if ( !exists($$self{stats}{$key}{snp_count}) ) { next; }
540
+ $out .= sprintf "%d\t%s\n",$$self{stats}{$key}{snp_count},$key;
541
+ }
542
+ for my $key (sort keys %{$$self{stats}{samples}})
543
+ {
544
+ if ( !exists($$self{stats}{samples}{$key}{snp_count}) ) { next; }
545
+ $out .= sprintf "%d\tsamples/%s\n", $$self{stats}{samples}{$key}{snp_count},$key;
546
+ }
547
+ return $out;
548
+ }
549
+
550
+ sub dump_indel_counts
551
+ {
552
+ my ($self) = @_;
553
+ my $out = "#Count\tFilter\n";
554
+ for my $key (sort keys %{$$self{stats}})
555
+ {
556
+ if ( !exists($$self{stats}{$key}{indel_count}) ) { next; }
557
+ $out .= sprintf "%d\t%s\n",$$self{stats}{$key}{indel_count},$key;
558
+ }
559
+ for my $key (sort keys %{$$self{stats}{samples}})
560
+ {
561
+ if ( !exists($$self{stats}{samples}{$key}{indel_count}) ) { next; }
562
+ $out .= sprintf "%d\tsamples/%s\n", $$self{stats}{samples}{$key}{indel_count},$key;
563
+ }
564
+ return $out;
565
+ }
566
+
567
+ sub dump_shared_counts
568
+ {
569
+ my ($self) = @_;
570
+ my $out = "#Shared SNPs\tFrequency\n";
571
+ for my $key (sort {$a<=>$b} keys %{$$self{stats}{all}{shared}})
572
+ {
573
+ $out .= sprintf "%d\t%s\n", $key,$$self{stats}{all}{shared}{$key};
574
+ }
575
+ return $out;
576
+ }
577
+
578
+ sub dump_private_counts
579
+ {
580
+ my ($self) = @_;
581
+ my $out = "#Private SNPs\tSample\n";
582
+ for my $key (sort keys %{$$self{stats}{samples}})
583
+ {
584
+ if ( !exists($$self{stats}{samples}{$key}{private}) ) { next; }
585
+ $out .= sprintf "%d\t%s\n", $$self{stats}{samples}{$key}{private},$key;
586
+ }
587
+ return $out;
588
+ }
589
+
590
+ sub _init_path
591
+ {
592
+ my ($self,$prefix) = @_;
593
+ if ( $prefix=~m{/} )
594
+ {
595
+ # A directory should be created. This will populate dir and prefix, for example
596
+ # prefix -> dir prefix
597
+ # ----------------------------
598
+ # out out.dump
599
+ # out/ out/ out/out.dump
600
+ # out/xxx out/ out/xxx.dump
601
+ #
602
+ my $dir = '';
603
+ if ( $prefix=~m{/[^/]+$} ) { $dir=$`; }
604
+ elsif ( $prefix=~m{/([^/]+)/$} ) { $dir = $`.'/'.$1; $prefix = $dir.'/'.$1; }
605
+ elsif ( $prefix=~m{([^/]+)/?$} ) { $dir=$1; $prefix=$dir.'/'.$1; }
606
+ if ( $dir ) { `mkdir -p $dir`; }
607
+ }
608
+ return $prefix;
609
+ }
610
+
611
+ sub legend
612
+ {
613
+ my ($self) = @_;
614
+ return q[
615
+ count
616
+ Number of positions with known genotype
617
+
618
+ nalt_X
619
+ Number of monoallelic (X=0), biallelic (X=1), etc. sites
620
+
621
+ ref, ref_count
622
+ Number of sites containing reference allele
623
+
624
+ shared
625
+ Number of sites having a non-reference allele in 0,1,2,etc samples
626
+
627
+ snp_count
628
+ Number of positions with SNPs
629
+ ];
630
+ }
631
+
632
+
633
+
634
+ =head2 save_stats
635
+
636
+ About : Save all collected stats to files
637
+ Usage :
638
+ Args : The prefix of output files. Non-existent directories will be created.
639
+ Returns : N/A
640
+
641
+ =cut
642
+
643
+ sub save_stats
644
+ {
645
+ my ($self,$prefix) = @_;
646
+
647
+ if ( !defined $prefix )
648
+ {
649
+ print $self->dump();
650
+ return;
651
+ }
652
+
653
+ my $path = $self->_init_path($prefix);
654
+ $self->_write_file($path.'.legend', $self->legend());
655
+ $self->_write_file($path.'.dump', $self->dump());
656
+ $self->_write_file($path.'.tstv', $self->dump_tstv($$self{stats}));
657
+ $self->_write_file($path.'.counts', $self->dump_counts());
658
+ $self->_write_file($path.'.snps', $self->dump_snp_counts());
659
+ $self->_write_file($path.'.indels', $self->dump_indel_counts());
660
+ $self->_write_file($path.'.qual-tstv',$self->dump_qual_tstv);
661
+ $self->_write_file($path.'.shared',$self->dump_shared_counts());
662
+ $self->_write_file($path.'.private',$self->dump_private_counts());
663
+
664
+ if ( exists($$self{stats}{samples}) )
665
+ {
666
+ $self->_write_file($path.'.samples-tstv',$self->dump_tstv($$self{stats}{samples}));
667
+ }
668
+ }
669
+
670
+
671
+ sub _write_file
672
+ {
673
+ my ($self,$fname,$text) = @_;
674
+ open(my $fh,'>',$fname) or $self->throw("$fname: $!");
675
+ print $fh $text;
676
+ close($fh);
677
+ }
678
+
679
+
680
+ 1;
681
+