ngs_server 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (248) hide show
  1. data/bin/ngs_server +72 -50
  2. data/ext/bamtools/extconf.rb +3 -3
  3. data/ext/vcftools/Makefile +28 -0
  4. data/ext/vcftools/README.txt +36 -0
  5. data/ext/vcftools/cpp/.svn/all-wcprops +125 -0
  6. data/ext/vcftools/cpp/.svn/dir-prop-base +6 -0
  7. data/ext/vcftools/cpp/.svn/entries +708 -0
  8. data/ext/vcftools/cpp/.svn/text-base/Makefile.svn-base +46 -0
  9. data/ext/vcftools/cpp/.svn/text-base/dgeev.cpp.svn-base +146 -0
  10. data/ext/vcftools/cpp/.svn/text-base/dgeev.h.svn-base +43 -0
  11. data/ext/vcftools/cpp/.svn/text-base/output_log.cpp.svn-base +79 -0
  12. data/ext/vcftools/cpp/.svn/text-base/output_log.h.svn-base +34 -0
  13. data/ext/vcftools/cpp/.svn/text-base/parameters.cpp.svn-base +535 -0
  14. data/ext/vcftools/cpp/.svn/text-base/parameters.h.svn-base +154 -0
  15. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.cpp.svn-base +497 -0
  16. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.h.svn-base +190 -0
  17. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_getters.cpp.svn-base +421 -0
  18. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_setters.cpp.svn-base +482 -0
  19. data/ext/vcftools/cpp/.svn/text-base/vcf_file.cpp.svn-base +495 -0
  20. data/ext/vcftools/cpp/.svn/text-base/vcf_file.h.svn-base +184 -0
  21. data/ext/vcftools/cpp/.svn/text-base/vcf_file_diff.cpp.svn-base +1282 -0
  22. data/ext/vcftools/cpp/.svn/text-base/vcf_file_filters.cpp.svn-base +1215 -0
  23. data/ext/vcftools/cpp/.svn/text-base/vcf_file_format_convert.cpp.svn-base +1138 -0
  24. data/ext/vcftools/cpp/.svn/text-base/vcf_file_index.cpp.svn-base +171 -0
  25. data/ext/vcftools/cpp/.svn/text-base/vcf_file_output.cpp.svn-base +3012 -0
  26. data/ext/vcftools/cpp/.svn/text-base/vcftools.cpp.svn-base +107 -0
  27. data/ext/vcftools/cpp/.svn/text-base/vcftools.h.svn-base +25 -0
  28. data/ext/vcftools/cpp/Makefile +46 -0
  29. data/ext/vcftools/cpp/dgeev.cpp +146 -0
  30. data/ext/vcftools/cpp/dgeev.h +43 -0
  31. data/ext/vcftools/cpp/output_log.cpp +79 -0
  32. data/ext/vcftools/cpp/output_log.h +34 -0
  33. data/ext/vcftools/cpp/parameters.cpp +535 -0
  34. data/ext/vcftools/cpp/parameters.h +154 -0
  35. data/ext/vcftools/cpp/vcf_entry.cpp +497 -0
  36. data/ext/vcftools/cpp/vcf_entry.h +190 -0
  37. data/ext/vcftools/cpp/vcf_entry_getters.cpp +421 -0
  38. data/ext/vcftools/cpp/vcf_entry_setters.cpp +482 -0
  39. data/ext/vcftools/cpp/vcf_file.cpp +495 -0
  40. data/ext/vcftools/cpp/vcf_file.h +184 -0
  41. data/ext/vcftools/cpp/vcf_file_diff.cpp +1282 -0
  42. data/ext/vcftools/cpp/vcf_file_filters.cpp +1215 -0
  43. data/ext/vcftools/cpp/vcf_file_format_convert.cpp +1138 -0
  44. data/ext/vcftools/cpp/vcf_file_index.cpp +171 -0
  45. data/ext/vcftools/cpp/vcf_file_output.cpp +3012 -0
  46. data/ext/vcftools/cpp/vcftools.cpp +107 -0
  47. data/ext/vcftools/cpp/vcftools.h +25 -0
  48. data/ext/vcftools/examples/.svn/all-wcprops +185 -0
  49. data/ext/vcftools/examples/.svn/dir-prop-base +6 -0
  50. data/ext/vcftools/examples/.svn/entries +1048 -0
  51. data/ext/vcftools/examples/.svn/prop-base/perl-api-1.pl.svn-base +5 -0
  52. data/ext/vcftools/examples/.svn/text-base/annotate-test.vcf.svn-base +37 -0
  53. data/ext/vcftools/examples/.svn/text-base/annotate.out.svn-base +23 -0
  54. data/ext/vcftools/examples/.svn/text-base/annotate.txt.svn-base +7 -0
  55. data/ext/vcftools/examples/.svn/text-base/annotate2.out.svn-base +52 -0
  56. data/ext/vcftools/examples/.svn/text-base/annotate3.out.svn-base +23 -0
  57. data/ext/vcftools/examples/.svn/text-base/cmp-test-a-3.3.vcf.svn-base +12 -0
  58. data/ext/vcftools/examples/.svn/text-base/cmp-test-a.vcf.svn-base +12 -0
  59. data/ext/vcftools/examples/.svn/text-base/cmp-test-b-3.3.vcf.svn-base +12 -0
  60. data/ext/vcftools/examples/.svn/text-base/cmp-test-b.vcf.svn-base +12 -0
  61. data/ext/vcftools/examples/.svn/text-base/cmp-test.out.svn-base +53 -0
  62. data/ext/vcftools/examples/.svn/text-base/concat-a.vcf.svn-base +21 -0
  63. data/ext/vcftools/examples/.svn/text-base/concat-b.vcf.svn-base +13 -0
  64. data/ext/vcftools/examples/.svn/text-base/concat-c.vcf.svn-base +19 -0
  65. data/ext/vcftools/examples/.svn/text-base/concat.out.svn-base +39 -0
  66. data/ext/vcftools/examples/.svn/text-base/invalid-4.0.vcf.svn-base +31 -0
  67. data/ext/vcftools/examples/.svn/text-base/isec-n2-test.vcf.out.svn-base +19 -0
  68. data/ext/vcftools/examples/.svn/text-base/merge-test-a.vcf.svn-base +17 -0
  69. data/ext/vcftools/examples/.svn/text-base/merge-test-b.vcf.svn-base +17 -0
  70. data/ext/vcftools/examples/.svn/text-base/merge-test-c.vcf.svn-base +15 -0
  71. data/ext/vcftools/examples/.svn/text-base/merge-test.vcf.out.svn-base +31 -0
  72. data/ext/vcftools/examples/.svn/text-base/perl-api-1.pl.svn-base +46 -0
  73. data/ext/vcftools/examples/.svn/text-base/query-test.out.svn-base +6 -0
  74. data/ext/vcftools/examples/.svn/text-base/shuffle-test.vcf.svn-base +12 -0
  75. data/ext/vcftools/examples/.svn/text-base/subset.SNPs.out.svn-base +10 -0
  76. data/ext/vcftools/examples/.svn/text-base/subset.indels.out.svn-base +18 -0
  77. data/ext/vcftools/examples/.svn/text-base/subset.vcf.svn-base +21 -0
  78. data/ext/vcftools/examples/.svn/text-base/valid-3.3.vcf.svn-base +30 -0
  79. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.stats.svn-base +104 -0
  80. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.svn-base +34 -0
  81. data/ext/vcftools/examples/.svn/text-base/valid-4.1.vcf.svn-base +37 -0
  82. data/ext/vcftools/examples/annotate-test.vcf +37 -0
  83. data/ext/vcftools/examples/annotate.out +23 -0
  84. data/ext/vcftools/examples/annotate.txt +7 -0
  85. data/ext/vcftools/examples/annotate2.out +52 -0
  86. data/ext/vcftools/examples/annotate3.out +23 -0
  87. data/ext/vcftools/examples/cmp-test-a-3.3.vcf +12 -0
  88. data/ext/vcftools/examples/cmp-test-a.vcf +12 -0
  89. data/ext/vcftools/examples/cmp-test-b-3.3.vcf +12 -0
  90. data/ext/vcftools/examples/cmp-test-b.vcf +12 -0
  91. data/ext/vcftools/examples/cmp-test.out +53 -0
  92. data/ext/vcftools/examples/concat-a.vcf +21 -0
  93. data/ext/vcftools/examples/concat-b.vcf +13 -0
  94. data/ext/vcftools/examples/concat-c.vcf +19 -0
  95. data/ext/vcftools/examples/concat.out +39 -0
  96. data/ext/vcftools/examples/invalid-4.0.vcf +31 -0
  97. data/ext/vcftools/examples/isec-n2-test.vcf.out +19 -0
  98. data/ext/vcftools/examples/merge-test-a.vcf +17 -0
  99. data/ext/vcftools/examples/merge-test-b.vcf +17 -0
  100. data/ext/vcftools/examples/merge-test-c.vcf +15 -0
  101. data/ext/vcftools/examples/merge-test.vcf.out +31 -0
  102. data/ext/vcftools/examples/perl-api-1.pl +46 -0
  103. data/ext/vcftools/examples/query-test.out +6 -0
  104. data/ext/vcftools/examples/shuffle-test.vcf +12 -0
  105. data/ext/vcftools/examples/subset.SNPs.out +10 -0
  106. data/ext/vcftools/examples/subset.indels.out +18 -0
  107. data/ext/vcftools/examples/subset.vcf +21 -0
  108. data/ext/vcftools/examples/valid-3.3.vcf +30 -0
  109. data/ext/vcftools/examples/valid-4.0.vcf +34 -0
  110. data/ext/vcftools/examples/valid-4.0.vcf.stats +104 -0
  111. data/ext/vcftools/examples/valid-4.1.vcf +37 -0
  112. data/ext/vcftools/extconf.rb +2 -0
  113. data/ext/vcftools/perl/.svn/all-wcprops +149 -0
  114. data/ext/vcftools/perl/.svn/entries +844 -0
  115. data/ext/vcftools/perl/.svn/prop-base/fill-aa.svn-base +5 -0
  116. data/ext/vcftools/perl/.svn/prop-base/fill-an-ac.svn-base +5 -0
  117. data/ext/vcftools/perl/.svn/prop-base/fill-ref-md5.svn-base +5 -0
  118. data/ext/vcftools/perl/.svn/prop-base/tab-to-vcf.svn-base +5 -0
  119. data/ext/vcftools/perl/.svn/prop-base/test.t.svn-base +5 -0
  120. data/ext/vcftools/perl/.svn/prop-base/vcf-annotate.svn-base +5 -0
  121. data/ext/vcftools/perl/.svn/prop-base/vcf-compare.svn-base +5 -0
  122. data/ext/vcftools/perl/.svn/prop-base/vcf-concat.svn-base +5 -0
  123. data/ext/vcftools/perl/.svn/prop-base/vcf-convert.svn-base +5 -0
  124. data/ext/vcftools/perl/.svn/prop-base/vcf-fix-newlines.svn-base +5 -0
  125. data/ext/vcftools/perl/.svn/prop-base/vcf-isec.svn-base +5 -0
  126. data/ext/vcftools/perl/.svn/prop-base/vcf-merge.svn-base +5 -0
  127. data/ext/vcftools/perl/.svn/prop-base/vcf-query.svn-base +5 -0
  128. data/ext/vcftools/perl/.svn/prop-base/vcf-shuffle-cols.svn-base +5 -0
  129. data/ext/vcftools/perl/.svn/prop-base/vcf-sort.svn-base +5 -0
  130. data/ext/vcftools/perl/.svn/prop-base/vcf-stats.svn-base +5 -0
  131. data/ext/vcftools/perl/.svn/prop-base/vcf-subset.svn-base +5 -0
  132. data/ext/vcftools/perl/.svn/prop-base/vcf-to-tab.svn-base +5 -0
  133. data/ext/vcftools/perl/.svn/prop-base/vcf-validator.svn-base +5 -0
  134. data/ext/vcftools/perl/.svn/text-base/ChangeLog.svn-base +84 -0
  135. data/ext/vcftools/perl/.svn/text-base/FaSlice.pm.svn-base +214 -0
  136. data/ext/vcftools/perl/.svn/text-base/Makefile.svn-base +12 -0
  137. data/ext/vcftools/perl/.svn/text-base/Vcf.pm.svn-base +2853 -0
  138. data/ext/vcftools/perl/.svn/text-base/VcfStats.pm.svn-base +681 -0
  139. data/ext/vcftools/perl/.svn/text-base/fill-aa.svn-base +103 -0
  140. data/ext/vcftools/perl/.svn/text-base/fill-an-ac.svn-base +56 -0
  141. data/ext/vcftools/perl/.svn/text-base/fill-ref-md5.svn-base +204 -0
  142. data/ext/vcftools/perl/.svn/text-base/tab-to-vcf.svn-base +92 -0
  143. data/ext/vcftools/perl/.svn/text-base/test.t.svn-base +376 -0
  144. data/ext/vcftools/perl/.svn/text-base/vcf-annotate.svn-base +1099 -0
  145. data/ext/vcftools/perl/.svn/text-base/vcf-compare.svn-base +1193 -0
  146. data/ext/vcftools/perl/.svn/text-base/vcf-concat.svn-base +310 -0
  147. data/ext/vcftools/perl/.svn/text-base/vcf-convert.svn-base +180 -0
  148. data/ext/vcftools/perl/.svn/text-base/vcf-fix-newlines.svn-base +97 -0
  149. data/ext/vcftools/perl/.svn/text-base/vcf-isec.svn-base +660 -0
  150. data/ext/vcftools/perl/.svn/text-base/vcf-merge.svn-base +577 -0
  151. data/ext/vcftools/perl/.svn/text-base/vcf-query.svn-base +272 -0
  152. data/ext/vcftools/perl/.svn/text-base/vcf-shuffle-cols.svn-base +89 -0
  153. data/ext/vcftools/perl/.svn/text-base/vcf-sort.svn-base +79 -0
  154. data/ext/vcftools/perl/.svn/text-base/vcf-stats.svn-base +160 -0
  155. data/ext/vcftools/perl/.svn/text-base/vcf-subset.svn-base +206 -0
  156. data/ext/vcftools/perl/.svn/text-base/vcf-to-tab.svn-base +112 -0
  157. data/ext/vcftools/perl/.svn/text-base/vcf-validator.svn-base +145 -0
  158. data/ext/vcftools/perl/ChangeLog +84 -0
  159. data/ext/vcftools/perl/FaSlice.pm +214 -0
  160. data/ext/vcftools/perl/Makefile +12 -0
  161. data/ext/vcftools/perl/Vcf.pm +2853 -0
  162. data/ext/vcftools/perl/VcfStats.pm +681 -0
  163. data/ext/vcftools/perl/fill-aa +103 -0
  164. data/ext/vcftools/perl/fill-an-ac +56 -0
  165. data/ext/vcftools/perl/fill-ref-md5 +204 -0
  166. data/ext/vcftools/perl/tab-to-vcf +92 -0
  167. data/ext/vcftools/perl/test.t +376 -0
  168. data/ext/vcftools/perl/vcf-annotate +1099 -0
  169. data/ext/vcftools/perl/vcf-compare +1193 -0
  170. data/ext/vcftools/perl/vcf-concat +310 -0
  171. data/ext/vcftools/perl/vcf-convert +180 -0
  172. data/ext/vcftools/perl/vcf-fix-newlines +97 -0
  173. data/ext/vcftools/perl/vcf-isec +660 -0
  174. data/ext/vcftools/perl/vcf-merge +577 -0
  175. data/ext/vcftools/perl/vcf-query +286 -0
  176. data/ext/vcftools/perl/vcf-shuffle-cols +89 -0
  177. data/ext/vcftools/perl/vcf-sort +79 -0
  178. data/ext/vcftools/perl/vcf-stats +160 -0
  179. data/ext/vcftools/perl/vcf-subset +206 -0
  180. data/ext/vcftools/perl/vcf-to-tab +112 -0
  181. data/ext/vcftools/perl/vcf-validator +145 -0
  182. data/ext/vcftools/website/.svn/all-wcprops +41 -0
  183. data/ext/vcftools/website/.svn/entries +238 -0
  184. data/ext/vcftools/website/.svn/prop-base/VCF-poster.pdf.svn-base +5 -0
  185. data/ext/vcftools/website/.svn/prop-base/favicon.ico.svn-base +5 -0
  186. data/ext/vcftools/website/.svn/prop-base/favicon.png.svn-base +5 -0
  187. data/ext/vcftools/website/.svn/text-base/Makefile.svn-base +6 -0
  188. data/ext/vcftools/website/.svn/text-base/README.svn-base +2 -0
  189. data/ext/vcftools/website/.svn/text-base/VCF-poster.pdf.svn-base +0 -0
  190. data/ext/vcftools/website/.svn/text-base/default.css.svn-base +250 -0
  191. data/ext/vcftools/website/.svn/text-base/favicon.ico.svn-base +0 -0
  192. data/ext/vcftools/website/.svn/text-base/favicon.png.svn-base +0 -0
  193. data/ext/vcftools/website/Makefile +6 -0
  194. data/ext/vcftools/website/README +2 -0
  195. data/ext/vcftools/website/VCF-poster.pdf +0 -0
  196. data/ext/vcftools/website/default.css +250 -0
  197. data/ext/vcftools/website/favicon.ico +0 -0
  198. data/ext/vcftools/website/favicon.png +0 -0
  199. data/ext/vcftools/website/img/.svn/all-wcprops +53 -0
  200. data/ext/vcftools/website/img/.svn/entries +300 -0
  201. data/ext/vcftools/website/img/.svn/prop-base/bg.gif.svn-base +5 -0
  202. data/ext/vcftools/website/img/.svn/prop-base/bgcode.gif.svn-base +5 -0
  203. data/ext/vcftools/website/img/.svn/prop-base/bgcontainer.gif.svn-base +5 -0
  204. data/ext/vcftools/website/img/.svn/prop-base/bgul.gif.svn-base +5 -0
  205. data/ext/vcftools/website/img/.svn/prop-base/header.gif.svn-base +5 -0
  206. data/ext/vcftools/website/img/.svn/prop-base/li.gif.svn-base +5 -0
  207. data/ext/vcftools/website/img/.svn/prop-base/quote.gif.svn-base +5 -0
  208. data/ext/vcftools/website/img/.svn/prop-base/search.gif.svn-base +5 -0
  209. data/ext/vcftools/website/img/.svn/text-base/bg.gif.svn-base +0 -0
  210. data/ext/vcftools/website/img/.svn/text-base/bgcode.gif.svn-base +0 -0
  211. data/ext/vcftools/website/img/.svn/text-base/bgcontainer.gif.svn-base +0 -0
  212. data/ext/vcftools/website/img/.svn/text-base/bgul.gif.svn-base +0 -0
  213. data/ext/vcftools/website/img/.svn/text-base/header.gif.svn-base +0 -0
  214. data/ext/vcftools/website/img/.svn/text-base/li.gif.svn-base +0 -0
  215. data/ext/vcftools/website/img/.svn/text-base/quote.gif.svn-base +0 -0
  216. data/ext/vcftools/website/img/.svn/text-base/search.gif.svn-base +0 -0
  217. data/ext/vcftools/website/img/bg.gif +0 -0
  218. data/ext/vcftools/website/img/bgcode.gif +0 -0
  219. data/ext/vcftools/website/img/bgcontainer.gif +0 -0
  220. data/ext/vcftools/website/img/bgul.gif +0 -0
  221. data/ext/vcftools/website/img/header.gif +0 -0
  222. data/ext/vcftools/website/img/li.gif +0 -0
  223. data/ext/vcftools/website/img/quote.gif +0 -0
  224. data/ext/vcftools/website/img/search.gif +0 -0
  225. data/ext/vcftools/website/src/.svn/all-wcprops +53 -0
  226. data/ext/vcftools/website/src/.svn/entries +300 -0
  227. data/ext/vcftools/website/src/.svn/text-base/docs.inc.svn-base +202 -0
  228. data/ext/vcftools/website/src/.svn/text-base/index.inc.svn-base +52 -0
  229. data/ext/vcftools/website/src/.svn/text-base/index.php.svn-base +80 -0
  230. data/ext/vcftools/website/src/.svn/text-base/license.inc.svn-base +27 -0
  231. data/ext/vcftools/website/src/.svn/text-base/links.inc.svn-base +13 -0
  232. data/ext/vcftools/website/src/.svn/text-base/options.inc.svn-base +654 -0
  233. data/ext/vcftools/website/src/.svn/text-base/perl_module.inc.svn-base +249 -0
  234. data/ext/vcftools/website/src/.svn/text-base/specs.inc.svn-base +18 -0
  235. data/ext/vcftools/website/src/docs.inc +202 -0
  236. data/ext/vcftools/website/src/index.inc +52 -0
  237. data/ext/vcftools/website/src/index.php +80 -0
  238. data/ext/vcftools/website/src/license.inc +27 -0
  239. data/ext/vcftools/website/src/links.inc +13 -0
  240. data/ext/vcftools/website/src/options.inc +654 -0
  241. data/ext/vcftools/website/src/perl_module.inc +249 -0
  242. data/ext/vcftools/website/src/specs.inc +18 -0
  243. data/lib/config.ru +9 -0
  244. data/lib/ngs_server/add.rb +9 -0
  245. data/lib/ngs_server/version.rb +1 -1
  246. data/lib/ngs_server.rb +55 -3
  247. data/ngs_server.gemspec +5 -2
  248. metadata +296 -6
@@ -0,0 +1,190 @@
1
+ /*
2
+ * vcf_entry.h
3
+ *
4
+ * Created on: Aug 19, 2009
5
+ * Author: Adam Auton
6
+ * ($Revision: 230 $)
7
+ */
8
+
9
+ #ifndef VCF_ENTRY_H_
10
+ #define VCF_ENTRY_H_
11
+
12
+ #include <algorithm>
13
+ #include <iostream>
14
+ #include <iterator>
15
+ #include <limits>
16
+ #include <map>
17
+ #include <set>
18
+ #include <sstream>
19
+ #include <string>
20
+ #include <vector>
21
+
22
+ #include <cassert>
23
+
24
+ #include "output_log.h"
25
+
26
+ using namespace std;
27
+
28
+ enum Type_enum {Integer=0, Float=1, Character=2, String=3, Flag=4};
29
+
30
+ class Field_description
31
+ {
32
+ public:
33
+ string ID;
34
+ int N_entries;
35
+ Type_enum Type;
36
+ string Description;
37
+
38
+ Field_description() : ID(""), N_entries(0), Type(Integer), Description("") {};
39
+ ~Field_description() {};
40
+ };
41
+
42
+ class vcf_entry {
43
+ public:
44
+ vcf_entry(const unsigned int N_indv);
45
+ vcf_entry(const unsigned int N_indv, const string &data_line);
46
+ ~vcf_entry();
47
+
48
+ const unsigned int N_indv;
49
+
50
+ void parse_basic_entry(bool parse_ALT=false, bool parse_FILTER=false, bool parse_INFO=false);
51
+ void parse_full_entry(bool parse_FORMAT=true);
52
+ void parse_genotype_entry(unsigned int indv, bool GT=false, bool GQ=false, bool DP=false, bool FT=false);
53
+ void parse_genotype_entries(bool GT=false, bool GQ=false, bool DP=false, bool FT=false);
54
+
55
+ void reset(const string &vcf_data_line);
56
+
57
+ string get_CHROM() const;
58
+ void get_CHROM(string &out) const;
59
+ int get_POS() const;
60
+ string get_ID() const;
61
+ string get_REF() const;
62
+ string get_ALT() const;
63
+ string get_ALT_allele(int allele_num) const;
64
+ void get_allele(int allele_num, string &out) const;
65
+ void get_alleles_vector(vector<string> &out) const;
66
+ string get_FILTER() const;
67
+ void get_FILTER_vector(vector<string> &out) const;
68
+ double get_QUAL() const;
69
+ string get_INFO(const set<string> &INFO_to_keep) const;
70
+ string get_INFO_value(const string &key) const;
71
+ string get_FORMAT() const;
72
+ void get_indv_GENOTYPE_ids(unsigned int indv, pair<int, int> &out) const;
73
+ void get_indv_GENOTYPE_strings(unsigned int indv, pair<string, string> &out) const;
74
+ char get_indv_PHASE(unsigned int indv) const;
75
+ double get_indv_GQUALITY(unsigned int indv) const;
76
+ int get_indv_DEPTH(unsigned int indv) const;
77
+ void get_indv_GFILTER(unsigned int indv, string &out) const;
78
+ void get_indv_GFILTER_vector(unsigned int indv, vector<string> &out) const;
79
+ int get_indv_ploidy(unsigned int indv) const;
80
+
81
+ bool is_SNP() const;
82
+ bool is_biallelic_SNP() const;
83
+ bool is_diploid(const vector<bool> &include_indv, const vector<bool> &include_genotype) const;
84
+
85
+ void read_indv_generic_entry(unsigned int indv, const string &FORMAT_id, string &out);
86
+ bool FORMAT_id_exists(const string &FORMAT_id);
87
+
88
+ unsigned int get_N_alleles() const;
89
+ unsigned int get_N_chr(const vector<bool> &include_indv, const vector<bool> &include_genotype) const;
90
+
91
+ void set_CHROM(const string &in);
92
+ void set_POS(const int in);
93
+ void set_ID(const string &in);
94
+ void set_REF(const string &in);
95
+ void set_ALT(const string &in);
96
+ void set_QUAL(const double in);
97
+ void set_FILTER(const string &FILTER_str);
98
+ void set_FORMAT(const string &in);
99
+ void set_INFO(const string &INFO_str);
100
+
101
+ void add_ALT_allele(const string &in);
102
+ void add_FILTER_entry(const string &in);
103
+ void add_FORMAT_entry(const string &in, unsigned int pos);
104
+
105
+ void set_indv_GENOTYPE_and_PHASE(unsigned int indv, const string &in);
106
+ void set_indv_GENOTYPE_and_PHASE(unsigned int indv, const pair<string, string> &genotype, char phase);
107
+ void set_indv_GENOTYPE_and_PHASE(unsigned int indv, const pair<int, int> &genotype, char phase);
108
+ void set_indv_GENOTYPE_alleles(unsigned int indv, const pair<string, string> &in);
109
+ void set_indv_GENOTYPE_alleles(unsigned int indv, char a1, char a2);
110
+ void set_indv_GENOTYPE_ids(unsigned int indv, const pair<int, int> &in);
111
+ void set_indv_PHASE(unsigned int indv, char in);
112
+ void set_indv_GQUALITY(unsigned int indv, double in);
113
+ void set_indv_DEPTH(unsigned int indv, int in);
114
+ void set_indv_GFILTER(unsigned int indv, const string &in);
115
+
116
+ void add_indv_GFILTER(unsigned int indv, const string &in);
117
+
118
+ static void add_INFO_descriptor(const string &in);
119
+ static void add_FILTER_descriptor(const string &in);
120
+ static void add_FORMAT_descriptor(const string &in);
121
+
122
+ void print(ostream &out);
123
+ void print(ostream &out, const set<string> &INFO_to_keep, bool keep_all_INFO=false);
124
+ void print(ostream &out, const set<string> &INFO_to_keep, bool keep_all_INFO, const vector<bool> &include_indv, const vector<bool> &include_genotype);
125
+
126
+ void filter_genotypes_by_depth(vector<bool> &include_genotype_out, int min_depth, int max_depth);
127
+ void filter_genotypes_by_quality(vector<bool> &include_genotype_out, double min_genotype_quality);
128
+ void filter_genotypes_by_filter_status(vector<bool> &include_genotype_out, const set<string> &filter_flags_to_remove, bool remove_all = false);
129
+
130
+ void get_allele_counts(vector<int> &out, unsigned int &N_non_missing_chr_out, const vector<bool> &include_indv, const vector<bool> &include_genotype) const;
131
+ void get_genotype_counts(const vector<bool> &include_indv, const vector<bool> &include_genotype, unsigned int &out_N_hom1, unsigned int &out_N_het, unsigned int &out_N_hom2) const;
132
+
133
+ static double SNPHWE(int obs_hets, int obs_hom1, int obs_hom2);
134
+ private:
135
+ istringstream data_stream;
136
+
137
+ bool basic_parsed;
138
+ bool fully_parsed;
139
+ bool parsed_ALT;
140
+ bool parsed_FILTER;
141
+ bool parsed_INFO;
142
+ bool parsed_FORMAT;
143
+
144
+ string CHROM;
145
+ int POS;
146
+ string ID;
147
+ string REF;
148
+ vector<string> ALT;
149
+ double QUAL;
150
+ vector<string> FILTER;
151
+ bool passed_filters;
152
+ vector<pair<string, string> > INFO;
153
+ vector<string> FORMAT;
154
+
155
+ vector< pair<int,int> > GENOTYPE;
156
+ vector<int> ploidy;
157
+ vector<char> PHASE;
158
+ vector<double> GQUALITY;
159
+ vector<int> DEPTH;
160
+ vector< vector<string> > GFILTER;
161
+
162
+ vector<bool> parsed_GT;
163
+ vector<bool> parsed_GQ;
164
+ vector<bool> parsed_DP;
165
+ vector<bool> parsed_FT;
166
+
167
+ int GT_idx;
168
+ int GQ_idx;
169
+ int DP_idx;
170
+ int FT_idx;
171
+
172
+ string ALT_str, FILTER_str, INFO_str, FORMAT_str, QUAL_str;
173
+ vector<string> GENOTYPE_str;
174
+
175
+ map<string, unsigned int> FORMAT_to_idx;
176
+
177
+ static map<string, Field_description> INFO_map;
178
+ static map<string, string> FILTER_map;
179
+ static map<string, Field_description> FORMAT_map;
180
+
181
+ static int str2int(const string &in, const int missing_value=-1);
182
+ static double str2double(const string &in, const double missing_value=-1.0);
183
+
184
+ static string int2str(const int in, const int missing_value=-1);
185
+ static string double2str(const double in, const double missing_value=-1.0);
186
+
187
+ static void tokenize(const string &in, char token, vector<string> &out);
188
+ };
189
+
190
+ #endif /* VCF_ENTRY_H_ */
@@ -0,0 +1,421 @@
1
+ /*
2
+ * vcf_entry_getters.cpp
3
+ *
4
+ * Created on: Nov 11, 2009
5
+ * Author: Adam Auton
6
+ * ($Revision: 230 $)
7
+ */
8
+
9
+ #include "vcf_entry.h"
10
+
11
+ // Return the CHROMosome name
12
+ string vcf_entry::get_CHROM() const
13
+ {
14
+ return CHROM;
15
+ }
16
+
17
+ // Return the CHROMosome name
18
+ void vcf_entry::get_CHROM(string &out) const
19
+ {
20
+ out = CHROM;
21
+ }
22
+
23
+ int vcf_entry::get_POS() const
24
+ {
25
+ return POS;
26
+ }
27
+
28
+ string vcf_entry::get_ID() const
29
+ {
30
+ if (ID.size() == 0)
31
+ return ".";
32
+ return ID;
33
+ }
34
+
35
+ string vcf_entry::get_REF() const
36
+ {
37
+ return REF;
38
+ }
39
+
40
+ string vcf_entry::get_ALT() const
41
+ {
42
+ assert(parsed_ALT == true);
43
+
44
+ string out;
45
+ if (ALT.size() == 0)
46
+ out = ".";
47
+ else
48
+ {
49
+ out = ALT[0];
50
+ for (unsigned int ui=1; ui<ALT.size(); ui++)
51
+ out += "," + ALT[ui];
52
+ }
53
+ return out;
54
+ }
55
+
56
+ bool vcf_entry::is_SNP() const
57
+ {
58
+ assert(parsed_ALT == true);
59
+
60
+ if (REF.size() != 1)
61
+ return false; // Reference isn't a single base
62
+
63
+ if (ALT.size() == 0)
64
+ return false; // No alternative allele
65
+
66
+ for (unsigned int ui=0; ui<ALT.size(); ui++)
67
+ if (ALT[ui].size() != 1)
68
+ return false; // Alternative allele isn't a single base
69
+
70
+ return true;
71
+ }
72
+
73
+ bool vcf_entry::is_biallelic_SNP() const
74
+ {
75
+ assert(parsed_ALT == true);
76
+
77
+ if (REF.size() != 1)
78
+ return false; // Reference isn't a single base
79
+
80
+ if (ALT.size() != 1)
81
+ return false; // Not biallelic
82
+
83
+ if (ALT[0].size() != 1)
84
+ return false; // Alternative allele isn't a single base
85
+
86
+ return true;
87
+ }
88
+
89
+ bool vcf_entry::is_diploid(const vector<bool> &include_indv, const vector<bool> &include_genotype) const
90
+ {
91
+ for (unsigned int ui=0; ui<N_indv; ui++)
92
+ {
93
+ if ((include_indv[ui] == true) && (include_genotype[ui] == true))
94
+ {
95
+ assert(parsed_GT[ui] == true);
96
+ if (ploidy[ui] != 2)
97
+ return false;
98
+ }
99
+ }
100
+ return true;
101
+ }
102
+
103
+ void vcf_entry::get_allele(int allele_num, string &out) const
104
+ {
105
+ assert(parsed_ALT == true);
106
+
107
+ if (allele_num == 0)
108
+ out = REF;
109
+ else if ((allele_num < 0) || (unsigned(allele_num - 1) >= ALT.size()))
110
+ out = ".";
111
+ else
112
+ out = ALT[allele_num-1];
113
+ }
114
+
115
+ string vcf_entry::get_ALT_allele(int allele_num) const
116
+ {
117
+ assert(parsed_ALT == true);
118
+
119
+ if ((allele_num < 0) || (unsigned(allele_num) >= ALT.size()))
120
+ return ".";
121
+ return ALT[allele_num];
122
+ }
123
+
124
+ void vcf_entry::get_alleles_vector(vector<string> &out) const
125
+ {
126
+ assert(parsed_ALT == true);
127
+ out.resize(ALT.size()+1);
128
+ out[0] = REF;
129
+ copy(ALT.begin(), ALT.end(), out.begin()+1);
130
+ }
131
+
132
+ double vcf_entry::get_QUAL() const
133
+ {
134
+ return QUAL;
135
+ }
136
+
137
+
138
+ string vcf_entry::get_FILTER() const
139
+ {
140
+ assert(parsed_FILTER == true);
141
+
142
+ ostringstream out;
143
+ if ((passed_filters == false) && (FILTER.size() == 0))
144
+ out << ".";
145
+ else if (passed_filters == true)
146
+ out << "PASS";
147
+ else
148
+ {
149
+ out << FILTER[0];
150
+ for (unsigned int ui=1; ui<FILTER.size(); ui++)
151
+ {
152
+ out << "," << FILTER[ui];
153
+ }
154
+ }
155
+ return out.str();
156
+ }
157
+
158
+ void vcf_entry::get_FILTER_vector(vector<string> &out) const
159
+ {
160
+ assert(parsed_FILTER == true);
161
+ out = FILTER;
162
+ }
163
+
164
+
165
+ string vcf_entry::get_INFO(const set<string> &INFO_to_keep) const
166
+ {
167
+ assert(parsed_INFO == true);
168
+
169
+ ostringstream out;
170
+ bool first=true;
171
+ if ((INFO.size() > 0) && (INFO_to_keep.size() > 0))
172
+ {
173
+ string key;
174
+ for (unsigned int ui=0; ui<INFO.size();ui++)
175
+ {
176
+ key = INFO[ui].first;
177
+ if (INFO_to_keep.find(key) != INFO_to_keep.end())
178
+ {
179
+ if (first != true)
180
+ out << ";";
181
+ out << key << "=" << INFO[ui].second;
182
+ first = false;
183
+ }
184
+ }
185
+ }
186
+
187
+ if (first == true)
188
+ { // Didn't find any INFO fields to keep
189
+ out.str(".");
190
+ }
191
+ return out.str();
192
+ }
193
+
194
+ string vcf_entry::get_INFO_value(const string &key) const
195
+ {
196
+ assert(parsed_INFO == true);
197
+
198
+ for (unsigned int ui=0; ui<INFO.size(); ui++)
199
+ {
200
+ if (INFO[ui].first == key)
201
+ return INFO[ui].second;
202
+ }
203
+ return "?";
204
+ }
205
+
206
+ string vcf_entry::get_FORMAT() const
207
+ {
208
+ assert(parsed_FORMAT == true);
209
+
210
+ string out;
211
+ bool first = true;
212
+ for (unsigned int ui=0; ui<FORMAT.size(); ui++)
213
+ {
214
+ if (first == false)
215
+ out += ":";
216
+ out += FORMAT[ui];
217
+ first = false;
218
+ }
219
+ return out;
220
+ }
221
+
222
+ // Return the alleles of a genotype as a pair of strings.
223
+ void vcf_entry::get_indv_GENOTYPE_strings(unsigned int indv, pair<string, string> &out) const
224
+ {
225
+ assert(parsed_GT[indv] == true);
226
+
227
+ static string out_allele1, out_allele2;
228
+
229
+ get_allele(GENOTYPE[indv].first, out_allele1);
230
+ get_allele(GENOTYPE[indv].second, out_allele2);
231
+ out = make_pair(out_allele1, out_allele2);
232
+ }
233
+
234
+
235
+ void vcf_entry::get_indv_GENOTYPE_ids(unsigned int indv, pair<int, int> &out) const
236
+ {
237
+ assert(parsed_GT[indv] == true);
238
+ out = GENOTYPE[indv];
239
+ }
240
+
241
+ char vcf_entry::get_indv_PHASE(unsigned int indv) const
242
+ {
243
+ assert(parsed_GT[indv] == true);
244
+ return PHASE[indv];
245
+ }
246
+
247
+ int vcf_entry::get_indv_DEPTH(unsigned int indv) const
248
+ {
249
+ assert(parsed_DP[indv] == true);
250
+ if (DEPTH.size() == 0)
251
+ return -1;
252
+ return DEPTH[indv];
253
+ }
254
+
255
+ double vcf_entry::get_indv_GQUALITY(unsigned int indv) const
256
+ {
257
+ assert(parsed_GQ[indv] == true);
258
+ if (GQUALITY.size() == 0)
259
+ return -1;
260
+ return GQUALITY[indv];
261
+ }
262
+
263
+ void vcf_entry::get_indv_GFILTER_vector(unsigned int indv, vector<string> &out) const
264
+ {
265
+ assert(parsed_FT[indv] == true);
266
+ if (GFILTER.size() > 0)
267
+ out = GFILTER[indv];
268
+ else
269
+ out.resize(0);
270
+ }
271
+
272
+ void vcf_entry::get_indv_GFILTER(unsigned int indv, string &out) const
273
+ {
274
+ assert(parsed_FT[indv] == true);
275
+
276
+ if ((GFILTER.size() > 0) && (GFILTER[indv].size()>0))
277
+ {
278
+ out="";
279
+ for (unsigned int ui=0; ui<GFILTER[indv].size(); ui++)
280
+ {
281
+ if (ui!=0)
282
+ out += ";";
283
+ out += GFILTER[indv][ui];
284
+ }
285
+ }
286
+ else
287
+ out = ".";
288
+ }
289
+
290
+ int vcf_entry::get_indv_ploidy(unsigned int indv) const
291
+ {
292
+ assert (parsed_GT[indv]==true);
293
+ return ploidy[indv];
294
+ }
295
+
296
+ void vcf_entry::read_indv_generic_entry(unsigned int indv, const string &FORMAT_id, string &out)
297
+ {
298
+ if (fully_parsed == false)
299
+ parse_full_entry(true);
300
+
301
+ if (parsed_FORMAT == false)
302
+ set_FORMAT(FORMAT_str);
303
+
304
+ out = ".";
305
+
306
+ if (FORMAT_to_idx.find(FORMAT_id) != FORMAT_to_idx.end())
307
+ {
308
+ unsigned int idx = FORMAT_to_idx[FORMAT_id];
309
+ static string tmpstr;
310
+ static istringstream ss;
311
+ ss.clear();
312
+ ss.str(GENOTYPE_str[indv]);
313
+
314
+ for (unsigned int ui=0; ui <= idx; ui++)
315
+ {
316
+ getline(ss, tmpstr, ':');
317
+ if (ui == idx)
318
+ {
319
+ out = tmpstr;
320
+ break;
321
+ }
322
+ if (!ss.good())
323
+ break;
324
+ }
325
+ }
326
+ }
327
+
328
+ bool vcf_entry::FORMAT_id_exists(const string &FORMAT_id)
329
+ {
330
+ assert(parsed_FORMAT == true);
331
+ if (FORMAT_to_idx.find(FORMAT_id) != FORMAT_to_idx.end())
332
+ return true;
333
+ return false;
334
+ }
335
+
336
+ unsigned int vcf_entry::get_N_alleles() const
337
+ {
338
+ assert(parsed_ALT == true);
339
+ return (ALT.size()+1);
340
+ }
341
+
342
+ unsigned int vcf_entry::get_N_chr(const vector<bool> &include_indv, const vector<bool> &include_genotype) const
343
+ {
344
+ unsigned int out=0;
345
+
346
+ for (unsigned int ui=0; ui<N_indv; ui++)
347
+ {
348
+ if ((include_indv[ui] == true) && (include_genotype[ui] == true))
349
+ {
350
+ assert(parsed_GT[ui] == true);
351
+ out += ploidy[ui];
352
+ }
353
+ }
354
+ return out;
355
+ }
356
+
357
+
358
+ // Return the frequency (counts) of each allele.
359
+ void vcf_entry::get_allele_counts(vector<int> &out, unsigned int &N_non_missing_chr_out, const vector<bool> &include_indv, const vector<bool> &include_genotype) const
360
+ {
361
+ pair<int,int> genotype;
362
+ vector<int> allele_counts(get_N_alleles(), 0);
363
+ N_non_missing_chr_out = 0;
364
+
365
+ for (unsigned int ui=0; ui<N_indv; ui++)
366
+ {
367
+ if ((include_indv[ui] == true) && (include_genotype[ui] == true))
368
+ {
369
+ assert(parsed_GT[ui] == true);
370
+ get_indv_GENOTYPE_ids(ui, genotype);
371
+ if (genotype.first != -1)
372
+ {
373
+ allele_counts[genotype.first]++;
374
+ N_non_missing_chr_out++;
375
+ }
376
+ if (genotype.second != -1)
377
+ {
378
+ allele_counts[genotype.second]++;
379
+ N_non_missing_chr_out++;
380
+ }
381
+ }
382
+ }
383
+ out = allele_counts;
384
+ }
385
+
386
+ // Return the counts of homozygote1, heterozygotes, and homozygote2
387
+ void vcf_entry::get_genotype_counts(const vector<bool> &include_indv, const vector<bool> &include_genotype, unsigned int &out_N_hom1, unsigned int &out_N_het, unsigned int &out_N_hom2) const
388
+ {
389
+ out_N_hom1 = 0; out_N_hom2 = 0; out_N_het = 0;
390
+ pair<int, int> genotype;
391
+ if (ALT.size() > 1)
392
+ error("Tried to return the genotype counts of a non-biallelic SNP", 99);
393
+
394
+ for (unsigned int ui=0; ui<N_indv; ui++)
395
+ {
396
+ if ((include_indv[ui] == true) && (include_genotype[ui] == true))
397
+ {
398
+ assert(parsed_GT[ui] == true);
399
+ get_indv_GENOTYPE_ids(ui, genotype);
400
+ if ((genotype.first != -1) && (genotype.second != -1))
401
+ {
402
+ if (genotype.first != genotype.second)
403
+ {
404
+ out_N_het++;
405
+ }
406
+ else if (genotype.first == 0)
407
+ {
408
+ out_N_hom1++;
409
+ }
410
+ else if (genotype.first == 1)
411
+ {
412
+ out_N_hom2++;
413
+ }
414
+ else
415
+ {
416
+ error("Unknown allele in genotype", 98);
417
+ }
418
+ }
419
+ }
420
+ }
421
+ }