ngs_server 0.1 → 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. data/bin/ngs_server +72 -50
  2. data/ext/bamtools/extconf.rb +3 -3
  3. data/ext/vcftools/Makefile +28 -0
  4. data/ext/vcftools/README.txt +36 -0
  5. data/ext/vcftools/cpp/.svn/all-wcprops +125 -0
  6. data/ext/vcftools/cpp/.svn/dir-prop-base +6 -0
  7. data/ext/vcftools/cpp/.svn/entries +708 -0
  8. data/ext/vcftools/cpp/.svn/text-base/Makefile.svn-base +46 -0
  9. data/ext/vcftools/cpp/.svn/text-base/dgeev.cpp.svn-base +146 -0
  10. data/ext/vcftools/cpp/.svn/text-base/dgeev.h.svn-base +43 -0
  11. data/ext/vcftools/cpp/.svn/text-base/output_log.cpp.svn-base +79 -0
  12. data/ext/vcftools/cpp/.svn/text-base/output_log.h.svn-base +34 -0
  13. data/ext/vcftools/cpp/.svn/text-base/parameters.cpp.svn-base +535 -0
  14. data/ext/vcftools/cpp/.svn/text-base/parameters.h.svn-base +154 -0
  15. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.cpp.svn-base +497 -0
  16. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.h.svn-base +190 -0
  17. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_getters.cpp.svn-base +421 -0
  18. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_setters.cpp.svn-base +482 -0
  19. data/ext/vcftools/cpp/.svn/text-base/vcf_file.cpp.svn-base +495 -0
  20. data/ext/vcftools/cpp/.svn/text-base/vcf_file.h.svn-base +184 -0
  21. data/ext/vcftools/cpp/.svn/text-base/vcf_file_diff.cpp.svn-base +1282 -0
  22. data/ext/vcftools/cpp/.svn/text-base/vcf_file_filters.cpp.svn-base +1215 -0
  23. data/ext/vcftools/cpp/.svn/text-base/vcf_file_format_convert.cpp.svn-base +1138 -0
  24. data/ext/vcftools/cpp/.svn/text-base/vcf_file_index.cpp.svn-base +171 -0
  25. data/ext/vcftools/cpp/.svn/text-base/vcf_file_output.cpp.svn-base +3012 -0
  26. data/ext/vcftools/cpp/.svn/text-base/vcftools.cpp.svn-base +107 -0
  27. data/ext/vcftools/cpp/.svn/text-base/vcftools.h.svn-base +25 -0
  28. data/ext/vcftools/cpp/Makefile +46 -0
  29. data/ext/vcftools/cpp/dgeev.cpp +146 -0
  30. data/ext/vcftools/cpp/dgeev.h +43 -0
  31. data/ext/vcftools/cpp/output_log.cpp +79 -0
  32. data/ext/vcftools/cpp/output_log.h +34 -0
  33. data/ext/vcftools/cpp/parameters.cpp +535 -0
  34. data/ext/vcftools/cpp/parameters.h +154 -0
  35. data/ext/vcftools/cpp/vcf_entry.cpp +497 -0
  36. data/ext/vcftools/cpp/vcf_entry.h +190 -0
  37. data/ext/vcftools/cpp/vcf_entry_getters.cpp +421 -0
  38. data/ext/vcftools/cpp/vcf_entry_setters.cpp +482 -0
  39. data/ext/vcftools/cpp/vcf_file.cpp +495 -0
  40. data/ext/vcftools/cpp/vcf_file.h +184 -0
  41. data/ext/vcftools/cpp/vcf_file_diff.cpp +1282 -0
  42. data/ext/vcftools/cpp/vcf_file_filters.cpp +1215 -0
  43. data/ext/vcftools/cpp/vcf_file_format_convert.cpp +1138 -0
  44. data/ext/vcftools/cpp/vcf_file_index.cpp +171 -0
  45. data/ext/vcftools/cpp/vcf_file_output.cpp +3012 -0
  46. data/ext/vcftools/cpp/vcftools.cpp +107 -0
  47. data/ext/vcftools/cpp/vcftools.h +25 -0
  48. data/ext/vcftools/examples/.svn/all-wcprops +185 -0
  49. data/ext/vcftools/examples/.svn/dir-prop-base +6 -0
  50. data/ext/vcftools/examples/.svn/entries +1048 -0
  51. data/ext/vcftools/examples/.svn/prop-base/perl-api-1.pl.svn-base +5 -0
  52. data/ext/vcftools/examples/.svn/text-base/annotate-test.vcf.svn-base +37 -0
  53. data/ext/vcftools/examples/.svn/text-base/annotate.out.svn-base +23 -0
  54. data/ext/vcftools/examples/.svn/text-base/annotate.txt.svn-base +7 -0
  55. data/ext/vcftools/examples/.svn/text-base/annotate2.out.svn-base +52 -0
  56. data/ext/vcftools/examples/.svn/text-base/annotate3.out.svn-base +23 -0
  57. data/ext/vcftools/examples/.svn/text-base/cmp-test-a-3.3.vcf.svn-base +12 -0
  58. data/ext/vcftools/examples/.svn/text-base/cmp-test-a.vcf.svn-base +12 -0
  59. data/ext/vcftools/examples/.svn/text-base/cmp-test-b-3.3.vcf.svn-base +12 -0
  60. data/ext/vcftools/examples/.svn/text-base/cmp-test-b.vcf.svn-base +12 -0
  61. data/ext/vcftools/examples/.svn/text-base/cmp-test.out.svn-base +53 -0
  62. data/ext/vcftools/examples/.svn/text-base/concat-a.vcf.svn-base +21 -0
  63. data/ext/vcftools/examples/.svn/text-base/concat-b.vcf.svn-base +13 -0
  64. data/ext/vcftools/examples/.svn/text-base/concat-c.vcf.svn-base +19 -0
  65. data/ext/vcftools/examples/.svn/text-base/concat.out.svn-base +39 -0
  66. data/ext/vcftools/examples/.svn/text-base/invalid-4.0.vcf.svn-base +31 -0
  67. data/ext/vcftools/examples/.svn/text-base/isec-n2-test.vcf.out.svn-base +19 -0
  68. data/ext/vcftools/examples/.svn/text-base/merge-test-a.vcf.svn-base +17 -0
  69. data/ext/vcftools/examples/.svn/text-base/merge-test-b.vcf.svn-base +17 -0
  70. data/ext/vcftools/examples/.svn/text-base/merge-test-c.vcf.svn-base +15 -0
  71. data/ext/vcftools/examples/.svn/text-base/merge-test.vcf.out.svn-base +31 -0
  72. data/ext/vcftools/examples/.svn/text-base/perl-api-1.pl.svn-base +46 -0
  73. data/ext/vcftools/examples/.svn/text-base/query-test.out.svn-base +6 -0
  74. data/ext/vcftools/examples/.svn/text-base/shuffle-test.vcf.svn-base +12 -0
  75. data/ext/vcftools/examples/.svn/text-base/subset.SNPs.out.svn-base +10 -0
  76. data/ext/vcftools/examples/.svn/text-base/subset.indels.out.svn-base +18 -0
  77. data/ext/vcftools/examples/.svn/text-base/subset.vcf.svn-base +21 -0
  78. data/ext/vcftools/examples/.svn/text-base/valid-3.3.vcf.svn-base +30 -0
  79. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.stats.svn-base +104 -0
  80. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.svn-base +34 -0
  81. data/ext/vcftools/examples/.svn/text-base/valid-4.1.vcf.svn-base +37 -0
  82. data/ext/vcftools/examples/annotate-test.vcf +37 -0
  83. data/ext/vcftools/examples/annotate.out +23 -0
  84. data/ext/vcftools/examples/annotate.txt +7 -0
  85. data/ext/vcftools/examples/annotate2.out +52 -0
  86. data/ext/vcftools/examples/annotate3.out +23 -0
  87. data/ext/vcftools/examples/cmp-test-a-3.3.vcf +12 -0
  88. data/ext/vcftools/examples/cmp-test-a.vcf +12 -0
  89. data/ext/vcftools/examples/cmp-test-b-3.3.vcf +12 -0
  90. data/ext/vcftools/examples/cmp-test-b.vcf +12 -0
  91. data/ext/vcftools/examples/cmp-test.out +53 -0
  92. data/ext/vcftools/examples/concat-a.vcf +21 -0
  93. data/ext/vcftools/examples/concat-b.vcf +13 -0
  94. data/ext/vcftools/examples/concat-c.vcf +19 -0
  95. data/ext/vcftools/examples/concat.out +39 -0
  96. data/ext/vcftools/examples/invalid-4.0.vcf +31 -0
  97. data/ext/vcftools/examples/isec-n2-test.vcf.out +19 -0
  98. data/ext/vcftools/examples/merge-test-a.vcf +17 -0
  99. data/ext/vcftools/examples/merge-test-b.vcf +17 -0
  100. data/ext/vcftools/examples/merge-test-c.vcf +15 -0
  101. data/ext/vcftools/examples/merge-test.vcf.out +31 -0
  102. data/ext/vcftools/examples/perl-api-1.pl +46 -0
  103. data/ext/vcftools/examples/query-test.out +6 -0
  104. data/ext/vcftools/examples/shuffle-test.vcf +12 -0
  105. data/ext/vcftools/examples/subset.SNPs.out +10 -0
  106. data/ext/vcftools/examples/subset.indels.out +18 -0
  107. data/ext/vcftools/examples/subset.vcf +21 -0
  108. data/ext/vcftools/examples/valid-3.3.vcf +30 -0
  109. data/ext/vcftools/examples/valid-4.0.vcf +34 -0
  110. data/ext/vcftools/examples/valid-4.0.vcf.stats +104 -0
  111. data/ext/vcftools/examples/valid-4.1.vcf +37 -0
  112. data/ext/vcftools/extconf.rb +2 -0
  113. data/ext/vcftools/perl/.svn/all-wcprops +149 -0
  114. data/ext/vcftools/perl/.svn/entries +844 -0
  115. data/ext/vcftools/perl/.svn/prop-base/fill-aa.svn-base +5 -0
  116. data/ext/vcftools/perl/.svn/prop-base/fill-an-ac.svn-base +5 -0
  117. data/ext/vcftools/perl/.svn/prop-base/fill-ref-md5.svn-base +5 -0
  118. data/ext/vcftools/perl/.svn/prop-base/tab-to-vcf.svn-base +5 -0
  119. data/ext/vcftools/perl/.svn/prop-base/test.t.svn-base +5 -0
  120. data/ext/vcftools/perl/.svn/prop-base/vcf-annotate.svn-base +5 -0
  121. data/ext/vcftools/perl/.svn/prop-base/vcf-compare.svn-base +5 -0
  122. data/ext/vcftools/perl/.svn/prop-base/vcf-concat.svn-base +5 -0
  123. data/ext/vcftools/perl/.svn/prop-base/vcf-convert.svn-base +5 -0
  124. data/ext/vcftools/perl/.svn/prop-base/vcf-fix-newlines.svn-base +5 -0
  125. data/ext/vcftools/perl/.svn/prop-base/vcf-isec.svn-base +5 -0
  126. data/ext/vcftools/perl/.svn/prop-base/vcf-merge.svn-base +5 -0
  127. data/ext/vcftools/perl/.svn/prop-base/vcf-query.svn-base +5 -0
  128. data/ext/vcftools/perl/.svn/prop-base/vcf-shuffle-cols.svn-base +5 -0
  129. data/ext/vcftools/perl/.svn/prop-base/vcf-sort.svn-base +5 -0
  130. data/ext/vcftools/perl/.svn/prop-base/vcf-stats.svn-base +5 -0
  131. data/ext/vcftools/perl/.svn/prop-base/vcf-subset.svn-base +5 -0
  132. data/ext/vcftools/perl/.svn/prop-base/vcf-to-tab.svn-base +5 -0
  133. data/ext/vcftools/perl/.svn/prop-base/vcf-validator.svn-base +5 -0
  134. data/ext/vcftools/perl/.svn/text-base/ChangeLog.svn-base +84 -0
  135. data/ext/vcftools/perl/.svn/text-base/FaSlice.pm.svn-base +214 -0
  136. data/ext/vcftools/perl/.svn/text-base/Makefile.svn-base +12 -0
  137. data/ext/vcftools/perl/.svn/text-base/Vcf.pm.svn-base +2853 -0
  138. data/ext/vcftools/perl/.svn/text-base/VcfStats.pm.svn-base +681 -0
  139. data/ext/vcftools/perl/.svn/text-base/fill-aa.svn-base +103 -0
  140. data/ext/vcftools/perl/.svn/text-base/fill-an-ac.svn-base +56 -0
  141. data/ext/vcftools/perl/.svn/text-base/fill-ref-md5.svn-base +204 -0
  142. data/ext/vcftools/perl/.svn/text-base/tab-to-vcf.svn-base +92 -0
  143. data/ext/vcftools/perl/.svn/text-base/test.t.svn-base +376 -0
  144. data/ext/vcftools/perl/.svn/text-base/vcf-annotate.svn-base +1099 -0
  145. data/ext/vcftools/perl/.svn/text-base/vcf-compare.svn-base +1193 -0
  146. data/ext/vcftools/perl/.svn/text-base/vcf-concat.svn-base +310 -0
  147. data/ext/vcftools/perl/.svn/text-base/vcf-convert.svn-base +180 -0
  148. data/ext/vcftools/perl/.svn/text-base/vcf-fix-newlines.svn-base +97 -0
  149. data/ext/vcftools/perl/.svn/text-base/vcf-isec.svn-base +660 -0
  150. data/ext/vcftools/perl/.svn/text-base/vcf-merge.svn-base +577 -0
  151. data/ext/vcftools/perl/.svn/text-base/vcf-query.svn-base +272 -0
  152. data/ext/vcftools/perl/.svn/text-base/vcf-shuffle-cols.svn-base +89 -0
  153. data/ext/vcftools/perl/.svn/text-base/vcf-sort.svn-base +79 -0
  154. data/ext/vcftools/perl/.svn/text-base/vcf-stats.svn-base +160 -0
  155. data/ext/vcftools/perl/.svn/text-base/vcf-subset.svn-base +206 -0
  156. data/ext/vcftools/perl/.svn/text-base/vcf-to-tab.svn-base +112 -0
  157. data/ext/vcftools/perl/.svn/text-base/vcf-validator.svn-base +145 -0
  158. data/ext/vcftools/perl/ChangeLog +84 -0
  159. data/ext/vcftools/perl/FaSlice.pm +214 -0
  160. data/ext/vcftools/perl/Makefile +12 -0
  161. data/ext/vcftools/perl/Vcf.pm +2853 -0
  162. data/ext/vcftools/perl/VcfStats.pm +681 -0
  163. data/ext/vcftools/perl/fill-aa +103 -0
  164. data/ext/vcftools/perl/fill-an-ac +56 -0
  165. data/ext/vcftools/perl/fill-ref-md5 +204 -0
  166. data/ext/vcftools/perl/tab-to-vcf +92 -0
  167. data/ext/vcftools/perl/test.t +376 -0
  168. data/ext/vcftools/perl/vcf-annotate +1099 -0
  169. data/ext/vcftools/perl/vcf-compare +1193 -0
  170. data/ext/vcftools/perl/vcf-concat +310 -0
  171. data/ext/vcftools/perl/vcf-convert +180 -0
  172. data/ext/vcftools/perl/vcf-fix-newlines +97 -0
  173. data/ext/vcftools/perl/vcf-isec +660 -0
  174. data/ext/vcftools/perl/vcf-merge +577 -0
  175. data/ext/vcftools/perl/vcf-query +286 -0
  176. data/ext/vcftools/perl/vcf-shuffle-cols +89 -0
  177. data/ext/vcftools/perl/vcf-sort +79 -0
  178. data/ext/vcftools/perl/vcf-stats +160 -0
  179. data/ext/vcftools/perl/vcf-subset +206 -0
  180. data/ext/vcftools/perl/vcf-to-tab +112 -0
  181. data/ext/vcftools/perl/vcf-validator +145 -0
  182. data/ext/vcftools/website/.svn/all-wcprops +41 -0
  183. data/ext/vcftools/website/.svn/entries +238 -0
  184. data/ext/vcftools/website/.svn/prop-base/VCF-poster.pdf.svn-base +5 -0
  185. data/ext/vcftools/website/.svn/prop-base/favicon.ico.svn-base +5 -0
  186. data/ext/vcftools/website/.svn/prop-base/favicon.png.svn-base +5 -0
  187. data/ext/vcftools/website/.svn/text-base/Makefile.svn-base +6 -0
  188. data/ext/vcftools/website/.svn/text-base/README.svn-base +2 -0
  189. data/ext/vcftools/website/.svn/text-base/VCF-poster.pdf.svn-base +0 -0
  190. data/ext/vcftools/website/.svn/text-base/default.css.svn-base +250 -0
  191. data/ext/vcftools/website/.svn/text-base/favicon.ico.svn-base +0 -0
  192. data/ext/vcftools/website/.svn/text-base/favicon.png.svn-base +0 -0
  193. data/ext/vcftools/website/Makefile +6 -0
  194. data/ext/vcftools/website/README +2 -0
  195. data/ext/vcftools/website/VCF-poster.pdf +0 -0
  196. data/ext/vcftools/website/default.css +250 -0
  197. data/ext/vcftools/website/favicon.ico +0 -0
  198. data/ext/vcftools/website/favicon.png +0 -0
  199. data/ext/vcftools/website/img/.svn/all-wcprops +53 -0
  200. data/ext/vcftools/website/img/.svn/entries +300 -0
  201. data/ext/vcftools/website/img/.svn/prop-base/bg.gif.svn-base +5 -0
  202. data/ext/vcftools/website/img/.svn/prop-base/bgcode.gif.svn-base +5 -0
  203. data/ext/vcftools/website/img/.svn/prop-base/bgcontainer.gif.svn-base +5 -0
  204. data/ext/vcftools/website/img/.svn/prop-base/bgul.gif.svn-base +5 -0
  205. data/ext/vcftools/website/img/.svn/prop-base/header.gif.svn-base +5 -0
  206. data/ext/vcftools/website/img/.svn/prop-base/li.gif.svn-base +5 -0
  207. data/ext/vcftools/website/img/.svn/prop-base/quote.gif.svn-base +5 -0
  208. data/ext/vcftools/website/img/.svn/prop-base/search.gif.svn-base +5 -0
  209. data/ext/vcftools/website/img/.svn/text-base/bg.gif.svn-base +0 -0
  210. data/ext/vcftools/website/img/.svn/text-base/bgcode.gif.svn-base +0 -0
  211. data/ext/vcftools/website/img/.svn/text-base/bgcontainer.gif.svn-base +0 -0
  212. data/ext/vcftools/website/img/.svn/text-base/bgul.gif.svn-base +0 -0
  213. data/ext/vcftools/website/img/.svn/text-base/header.gif.svn-base +0 -0
  214. data/ext/vcftools/website/img/.svn/text-base/li.gif.svn-base +0 -0
  215. data/ext/vcftools/website/img/.svn/text-base/quote.gif.svn-base +0 -0
  216. data/ext/vcftools/website/img/.svn/text-base/search.gif.svn-base +0 -0
  217. data/ext/vcftools/website/img/bg.gif +0 -0
  218. data/ext/vcftools/website/img/bgcode.gif +0 -0
  219. data/ext/vcftools/website/img/bgcontainer.gif +0 -0
  220. data/ext/vcftools/website/img/bgul.gif +0 -0
  221. data/ext/vcftools/website/img/header.gif +0 -0
  222. data/ext/vcftools/website/img/li.gif +0 -0
  223. data/ext/vcftools/website/img/quote.gif +0 -0
  224. data/ext/vcftools/website/img/search.gif +0 -0
  225. data/ext/vcftools/website/src/.svn/all-wcprops +53 -0
  226. data/ext/vcftools/website/src/.svn/entries +300 -0
  227. data/ext/vcftools/website/src/.svn/text-base/docs.inc.svn-base +202 -0
  228. data/ext/vcftools/website/src/.svn/text-base/index.inc.svn-base +52 -0
  229. data/ext/vcftools/website/src/.svn/text-base/index.php.svn-base +80 -0
  230. data/ext/vcftools/website/src/.svn/text-base/license.inc.svn-base +27 -0
  231. data/ext/vcftools/website/src/.svn/text-base/links.inc.svn-base +13 -0
  232. data/ext/vcftools/website/src/.svn/text-base/options.inc.svn-base +654 -0
  233. data/ext/vcftools/website/src/.svn/text-base/perl_module.inc.svn-base +249 -0
  234. data/ext/vcftools/website/src/.svn/text-base/specs.inc.svn-base +18 -0
  235. data/ext/vcftools/website/src/docs.inc +202 -0
  236. data/ext/vcftools/website/src/index.inc +52 -0
  237. data/ext/vcftools/website/src/index.php +80 -0
  238. data/ext/vcftools/website/src/license.inc +27 -0
  239. data/ext/vcftools/website/src/links.inc +13 -0
  240. data/ext/vcftools/website/src/options.inc +654 -0
  241. data/ext/vcftools/website/src/perl_module.inc +249 -0
  242. data/ext/vcftools/website/src/specs.inc +18 -0
  243. data/lib/config.ru +9 -0
  244. data/lib/ngs_server/add.rb +9 -0
  245. data/lib/ngs_server/version.rb +1 -1
  246. data/lib/ngs_server.rb +55 -3
  247. data/ngs_server.gemspec +5 -2
  248. metadata +296 -6
@@ -0,0 +1,154 @@
1
+ /*
2
+ * parameters.cpp
3
+ *
4
+ * Created on: Nov 11, 2009
5
+ * Author: Adam Auton
6
+ * ($Revision: 249 $)
7
+ */
8
+
9
+ // Class for reading in, checking and storing user parameters
10
+ #ifndef PARAMETERS_H_
11
+ #define PARAMETERS_H_
12
+
13
+ #include <cstdio>
14
+ #include <cstdlib>
15
+ #include <iostream>
16
+ #include <limits>
17
+ #include <string>
18
+ #include <vector>
19
+ #include <set>
20
+
21
+ #include "output_log.h"
22
+
23
+ using namespace std;
24
+
25
+ const string VCFTOOLS_VERSION="v0.1.7";
26
+
27
+ class parameters
28
+ {
29
+ public:
30
+ bool BED_exclude;
31
+ string BED_file;
32
+ string chr_to_exclude;
33
+ string chr_to_keep;
34
+ bool diff_discordance_matrix;
35
+ string diff_file;
36
+ bool diff_file_compressed;
37
+ bool diff_indv_discordance;
38
+ bool diff_site_discordance;
39
+ bool diff_switch_error;
40
+ int end_pos;
41
+ bool force_write_index;
42
+ string FORMAT_id_to_extract;
43
+ string fst_file;
44
+ bool fst_file_compressed;
45
+ vector<string> fst_populations;
46
+ set<string> geno_filter_flags_to_exclude;
47
+ string indv_exclude_file;
48
+ string indv_keep_file;
49
+ set<string> indv_to_exclude;
50
+ set<string> indv_to_keep;
51
+ vector<string> INFO_to_extract;
52
+ bool invert_mask;
53
+ int ld_bp_window_size;
54
+ int ld_snp_window_size;
55
+ int min_mac;
56
+ double min_maf;
57
+ string mask_file;
58
+ int max_alleles;
59
+ int max_genotype_depth;
60
+ double max_indv_mean_depth;
61
+ int max_mac;
62
+ double max_maf;
63
+ double max_mean_depth;
64
+ int max_missing_call_count;
65
+ int max_non_ref_ac;
66
+ double max_non_ref_af;
67
+ int max_N_indv;
68
+ int min_alleles;
69
+ int min_genotype_depth;
70
+ double min_genotype_quality;
71
+ double min_HWE_pvalue;
72
+ double min_indv_call_rate;
73
+ double min_indv_mean_depth;
74
+ int min_interSNP_distance;
75
+ int min_kept_mask_value;
76
+ double min_mean_depth;
77
+ int min_non_ref_ac;
78
+ double min_non_ref_af;
79
+ double min_quality;
80
+ double min_r2;
81
+ double min_site_call_rate;
82
+ bool output_012_matrix;
83
+ bool output_as_IMPUTE;
84
+ bool output_as_ldhat_phased;
85
+ bool output_as_ldhat_unphased;
86
+ bool output_BEAGLE_genotype_likelihoods;
87
+ bool output_counts;
88
+ bool output_filter_summary;
89
+ bool output_filtered_sites;
90
+ bool output_freq;
91
+ bool output_geno_depth;
92
+ bool output_geno_rsq;
93
+ bool output_hap_rsq;
94
+ bool output_het;
95
+ bool output_HWE;
96
+ bool output_indv_depth;
97
+ bool output_interchromosomal_rsq;
98
+ bool output_LROH;
99
+ bool output_missingness;
100
+ int output_N_PCA_SNP_loadings;
101
+ bool output_PCA;
102
+ string output_prefix;
103
+ bool output_relatedness;
104
+ bool output_singletons;
105
+ bool output_site_depth;
106
+ bool output_site_mean_depth;
107
+ bool output_site_pi;
108
+ bool output_site_quality;
109
+ int output_SNP_density_bin_size;
110
+ int output_Tajima_D_bin_size;
111
+ int output_TsTv_bin_size;
112
+ bool output_TsTv_by_count;
113
+ bool output_TsTv_by_qual;
114
+ bool phased_only;
115
+ bool PCA_no_normalisation;
116
+ int pi_window_size;
117
+ bool plink_output;
118
+ bool plink_tped_output;
119
+ string positions_file;
120
+ bool recode;
121
+ set<string> recode_INFO_to_keep;
122
+ bool recode_all_INFO;
123
+ bool remove_all_filtered_genotypes;
124
+ bool remove_all_filtered_sites;
125
+ set<string> site_filter_flags_to_exclude;
126
+ set<string> site_filter_flags_to_keep;
127
+ set<string> site_INFO_flags_to_keep;
128
+ set<string> site_INFO_flags_to_remove;
129
+ string snps_to_exclude_file;
130
+ string snps_to_keep_file;
131
+ set<string> snps_to_keep;
132
+ int start_pos;
133
+ bool suppress_allele_output;
134
+ string vcf_filename;
135
+ bool vcf_compressed;
136
+
137
+ parameters(int argc, char *argv[]);
138
+ ~parameters(){};
139
+
140
+ void read_parameters();
141
+ void print_help();
142
+ void print_params();
143
+
144
+ private:
145
+ void check_parameters();
146
+ static void error(string err_msg, int code);
147
+
148
+ vector<string> argv;
149
+
150
+ string get_arg(unsigned int i);
151
+ };
152
+
153
+
154
+ #endif /* PARAMETERS_H_ */
@@ -0,0 +1,497 @@
1
+ /*
2
+ * vcf_entry.cpp
3
+ *
4
+ * Created on: Aug 19, 2009
5
+ * Author: Adam Auton
6
+ * ($Revision: 230 $)
7
+ */
8
+
9
+ #include "vcf_entry.h"
10
+
11
+ map<string, Field_description> vcf_entry::INFO_map;
12
+ map<string, string> vcf_entry::FILTER_map;
13
+ map<string, Field_description> vcf_entry::FORMAT_map;
14
+
15
+ // Create a VCF on the basis of a data line.
16
+ vcf_entry::vcf_entry(const unsigned int N_indv, const string &line)
17
+ : N_indv(N_indv),
18
+ data_stream(line),
19
+ basic_parsed(false), fully_parsed(false),
20
+ parsed_ALT(false), parsed_FILTER(false),
21
+ parsed_INFO(false), parsed_FORMAT(false),
22
+ CHROM(""), POS(-1), REF(""), QUAL(-1),
23
+ passed_filters(false),
24
+ parsed_GT(N_indv, false), parsed_GQ(N_indv, false), parsed_DP(N_indv, false),
25
+ parsed_FT(N_indv, false),
26
+ ALT_str(""), FILTER_str(""), INFO_str(""), FORMAT_str(""), QUAL_str("")
27
+ {
28
+ }
29
+
30
+ // Create an empty VCF entry
31
+ vcf_entry::vcf_entry(const unsigned int N_indv)
32
+ : N_indv(N_indv),
33
+ data_stream("0\t0\t.\tN\t.\t.\t.\t."),
34
+ basic_parsed(false), fully_parsed(false),
35
+ parsed_ALT(false), parsed_FILTER(false),
36
+ parsed_INFO(false), parsed_FORMAT(false),
37
+ CHROM(""), POS(-1), REF(""), QUAL(-1),
38
+ passed_filters(false),
39
+ parsed_GT(N_indv, false), parsed_GQ(N_indv, false), parsed_DP(N_indv, false),
40
+ parsed_FT(N_indv, false),
41
+ ALT_str(""), FILTER_str(""), INFO_str(""), FORMAT_str(""), QUAL_str("")
42
+ {
43
+ }
44
+
45
+ vcf_entry::~vcf_entry() {}
46
+
47
+ // Reset the VCF entry object with a new data line
48
+ void vcf_entry::reset(const string &vcf_data_line)
49
+ {
50
+ basic_parsed = false;
51
+ fully_parsed = false;
52
+ parsed_ALT = false;
53
+ parsed_FILTER = false;
54
+ parsed_INFO = false;
55
+ parsed_FORMAT = false;
56
+
57
+ data_stream.clear();
58
+ data_stream.str(vcf_data_line);
59
+
60
+ fill(parsed_GT.begin(), parsed_GT.end(), false);
61
+ fill(parsed_GQ.begin(), parsed_GQ.end(), false);
62
+ fill(parsed_DP.begin(), parsed_DP.end(), false);
63
+ fill(parsed_FT.begin(), parsed_FT.end(), false);
64
+ }
65
+
66
+ // Tokenize the basic information in a VCF data line (at the tab level)
67
+ void vcf_entry::parse_basic_entry(bool parse_ALT, bool parse_FILTER, bool parse_INFO)
68
+ {
69
+ data_stream >> CHROM >> POS >> ID >> REF >> ALT_str >> QUAL_str >> FILTER_str >> INFO_str;
70
+ QUAL = str2double(QUAL_str);
71
+
72
+ // Convert to uppercase for consistency
73
+ // Note that VCF v4.1 allows mixtures of lower/upper case in REF and ALT.
74
+ // However, the spec specifically states that tools using VCF are not required
75
+ // to preserve the case.
76
+ std::transform(REF.begin(), REF.end(), REF.begin(), ::toupper);
77
+ std::transform(ALT_str.begin(), ALT_str.end(),ALT_str.begin(), ::toupper);
78
+
79
+ parsed_ALT = false;
80
+ parsed_FILTER = false;
81
+ parsed_INFO = false;
82
+ basic_parsed = true;
83
+
84
+ if (parse_ALT)
85
+ set_ALT(ALT_str);
86
+ if (parse_FILTER)
87
+ set_FILTER(FILTER_str);
88
+ if (parse_INFO)
89
+ set_INFO(INFO_str);
90
+ }
91
+
92
+ // Tokenize the genotype information (at the 'tab' level) in the VCF entry
93
+ void vcf_entry::parse_full_entry(bool parse_FORMAT)
94
+ {
95
+ if (basic_parsed == false)
96
+ parse_basic_entry();
97
+
98
+ data_stream >> FORMAT_str;
99
+
100
+ if (parse_FORMAT)
101
+ set_FORMAT(FORMAT_str);
102
+
103
+ string tmpstr; tmpstr.reserve(64);
104
+ GENOTYPE_str.resize(N_indv, tmpstr);
105
+
106
+ for (unsigned int ui=0; ui<N_indv; ui++)
107
+ data_stream >> GENOTYPE_str[ui];
108
+
109
+ // The following line copies the GENOTYPE fields from the stringstream into the GENOTYPE_str vector.
110
+ // Is actually slower than the above code.
111
+ //copy(istream_iterator<string>(data_stream), istream_iterator<string>(), GENOTYPE_str.begin());
112
+
113
+ fully_parsed = true;
114
+ }
115
+
116
+ // Tokenize a given genotype entry into it's component parts
117
+ void vcf_entry::parse_genotype_entry(unsigned int indv, bool GT, bool GQ, bool DP, bool FT)
118
+ {
119
+ if (fully_parsed == false)
120
+ parse_full_entry(true);
121
+
122
+ if (parsed_FORMAT == false)
123
+ set_FORMAT(FORMAT_str);
124
+
125
+ static string tmpstr;
126
+ static istringstream ss;
127
+ ss.clear(); ss.str(GENOTYPE_str[indv]);
128
+
129
+ int N_required = GT + GQ + DP + FT;
130
+ int N_got = 0;
131
+
132
+ int i=0;
133
+ while (getline(ss, tmpstr, ':'))
134
+ {
135
+ if (GT && (i == GT_idx)) // (FORMAT[ui] == "GT")
136
+ {
137
+ set_indv_GENOTYPE_and_PHASE(indv, tmpstr);
138
+ N_got++;
139
+ }
140
+ else if (GQ && (i == GQ_idx)) // (FORMAT[ui] == "GQ")
141
+ {
142
+ set_indv_GQUALITY(indv, str2double(tmpstr));
143
+ N_got++;
144
+ }
145
+ else if (DP && (i == DP_idx)) // (FORMAT[ui] == "DP")
146
+ {
147
+ set_indv_DEPTH(indv, str2int(tmpstr));
148
+ N_got++;
149
+ }
150
+ else if (FT && (i == FT_idx)) // (FORMAT[ui] == "FT")
151
+ {
152
+ set_indv_GFILTER(indv, tmpstr);
153
+ N_got++;
154
+ }
155
+
156
+ if (N_got == N_required)
157
+ break;
158
+ i++;
159
+ }
160
+
161
+ // Set missing return values if requested a value, but couldn't find it
162
+ if (GT && (parsed_GT[indv] == false))
163
+ {
164
+ set_indv_GENOTYPE_and_PHASE(indv, make_pair(-1,-1), '/');
165
+ }
166
+ if (GQ && (parsed_GQ[indv] == false))
167
+ {
168
+ set_indv_GQUALITY(indv, -1);
169
+ }
170
+ if (DP && (parsed_DP[indv] == false))
171
+ {
172
+ set_indv_DEPTH(indv, -1);
173
+ }
174
+ if (FT && (parsed_FT[indv] == false))
175
+ {
176
+ set_indv_GFILTER(indv, "");
177
+ }
178
+ }
179
+
180
+ // Read the VCF entry and fully populate the object
181
+ void vcf_entry::parse_genotype_entries(bool GT, bool GQ, bool DP, bool FT)
182
+ {
183
+ for (unsigned int ui=0; ui<N_indv; ui++)
184
+ parse_genotype_entry(ui, GT, GQ, DP, FT);
185
+ }
186
+
187
+ void vcf_entry::print(ostream &out)
188
+ {
189
+ vector<bool> include_indv(N_indv, true);
190
+ vector<bool> include_genotype(N_indv, true);
191
+ set<string> INFO_to_keep;
192
+ print(out, INFO_to_keep, false, include_indv, include_genotype);
193
+ }
194
+
195
+ void vcf_entry::print(ostream &out, const set<string> &INFO_to_keep, bool keep_all_INFO)
196
+ {
197
+ vector<bool> include_indv(N_indv, true);
198
+ vector<bool> include_genotype(N_indv, true);
199
+ print(out, INFO_to_keep, keep_all_INFO, include_indv, include_genotype);
200
+ }
201
+
202
+ // Output VCF entry to output stream
203
+ void vcf_entry::print(ostream &out, const set<string> &INFO_to_keep, bool keep_all_INFO, const vector<bool> &include_indv, const vector<bool> &include_genotype)
204
+ {
205
+ if (fully_parsed == false)
206
+ parse_full_entry();
207
+
208
+ out << get_CHROM() << '\t' << POS << '\t' << get_ID() << '\t' << REF << '\t' << get_ALT();
209
+
210
+ out << '\t' << double2str(QUAL);
211
+ out << '\t' << get_FILTER();
212
+ if (keep_all_INFO == false)
213
+ out << '\t' << get_INFO(INFO_to_keep);
214
+ else
215
+ out << '\t' << INFO_str;
216
+
217
+ pair<int, int> genotype;
218
+ string GFILTER_tmp;
219
+ if (FORMAT.size() > 0)
220
+ {
221
+ char PHASE;
222
+ out << '\t' << get_FORMAT();
223
+
224
+ for (unsigned int ui=0; ui<N_indv; ui++)
225
+ {
226
+ if (include_indv[ui] == false)
227
+ continue;
228
+ out << '\t';
229
+ for (int count=0; count<(int)FORMAT.size(); count++)
230
+ {
231
+ if (count == GT_idx) // (FORMAT[count] == "GT")
232
+ {
233
+ if (count != 0) out << ':';
234
+ if (include_genotype[ui] == true)
235
+ {
236
+ get_indv_GENOTYPE_ids(ui, genotype);
237
+ PHASE = get_indv_PHASE(ui);
238
+ if ((genotype.first != -1) && (genotype.second != -1))
239
+ out << int2str(genotype.first) << PHASE << int2str(genotype.second);
240
+ else if ((PHASE == '|') && (genotype.second == -1))
241
+ out << int2str(genotype.first); // Handle haploid case
242
+ else
243
+ out << int2str(genotype.first) << PHASE << int2str(genotype.second);
244
+ }
245
+ else
246
+ out << "./.";
247
+ }
248
+ else if (count == GQ_idx) //(FORMAT[count] == "GQ")
249
+ {
250
+ if (count != 0) out << ':';
251
+ out << double2str(get_indv_GQUALITY(ui));
252
+ }
253
+ else if (count == DP_idx) // (FORMAT[count] == "DP")
254
+ {
255
+ if (count != 0) out << ':';
256
+ out << int2str(get_indv_DEPTH(ui));
257
+ }
258
+ else if (count == FT_idx) // (FORMAT[count] == "FT")
259
+ {
260
+ if (count != 0) out << ':';
261
+ get_indv_GFILTER(ui, GFILTER_tmp);
262
+ out << GFILTER_tmp;
263
+ }
264
+ else
265
+ { // Unknown FORMAT so just replicate original output
266
+ if (count != 0) out << ':';
267
+ read_indv_generic_entry(ui, FORMAT[count], GFILTER_tmp);
268
+ out << GFILTER_tmp;
269
+ }
270
+ }
271
+ }
272
+ }
273
+ // out << endl;
274
+ out << '\n'; // endl flushes the buffer, which is slow. This (should be) quicker.
275
+ }
276
+
277
+ // Set the include_genotype flag on the basis of depth
278
+ void vcf_entry::filter_genotypes_by_depth(vector<bool> &include_genotype_out, int min_depth, int max_depth)
279
+ {
280
+ if (fully_parsed == false)
281
+ parse_full_entry();
282
+
283
+ //if (FORMAT_to_idx.find("DP") != FORMAT_to_idx.end())
284
+ if (DP_idx != -1)
285
+ { // Have depth info
286
+ int depth;
287
+ include_genotype_out.resize(N_indv, true);
288
+ for (unsigned int ui=0; ui<N_indv; ui++)
289
+ {
290
+ if (parsed_DP[ui] == false)
291
+ parse_genotype_entry(ui, false, false, true);
292
+ depth = get_indv_DEPTH(ui);
293
+ if ((depth < min_depth) || (depth > max_depth))
294
+ include_genotype_out[ui] = false;
295
+ }
296
+ }
297
+ }
298
+
299
+ // Filter specific genotypes by quality
300
+ void vcf_entry::filter_genotypes_by_quality(vector<bool> &include_genotype_out, double min_genotype_quality)
301
+ {
302
+ if (fully_parsed == false)
303
+ parse_full_entry();
304
+
305
+ //if (FORMAT_to_idx.find("GQ") != FORMAT_to_idx.end())
306
+ if (GQ_idx != -1)
307
+ { // Have quality info
308
+ double quality;
309
+ include_genotype_out.resize(N_indv, true);
310
+ for (unsigned int ui=0; ui<N_indv; ui++)
311
+ {
312
+ if (parsed_GQ[ui] == false)
313
+ parse_genotype_entry(ui, false, true);
314
+ quality = get_indv_GQUALITY(ui);
315
+ if (quality < min_genotype_quality)
316
+ include_genotype_out[ui] = false;
317
+ }
318
+ }
319
+ }
320
+
321
+ // Exclude genotypes with a filter flag.
322
+ void vcf_entry::filter_genotypes_by_filter_status(vector<bool> &include_genotype_out, const set<string> &filter_flags_to_remove, bool remove_all)
323
+ {
324
+ if (fully_parsed == false)
325
+ parse_full_entry();
326
+
327
+ string filter;
328
+ vector<string> GFILTERs;
329
+ //if (FORMAT_to_idx.find("FT") != FORMAT_to_idx.end())
330
+ if (FT_idx != -1)
331
+ { // Have GFilter info
332
+ include_genotype_out.resize(N_indv, true);
333
+ for (unsigned int ui=0; ui<N_indv; ui++)
334
+ {
335
+ if (parsed_FT[ui] == false)
336
+ parse_genotype_entry(ui, false, false, false, true);
337
+ get_indv_GFILTER_vector(ui, GFILTERs);
338
+
339
+ if ((remove_all == true) && (GFILTERs.size() > 0))
340
+ include_genotype_out[ui] = false;
341
+ else
342
+ {
343
+ for (unsigned int uj=0; uj<GFILTERs.size(); uj++)
344
+ if (filter_flags_to_remove.find(GFILTERs[uj]) != filter_flags_to_remove.end())
345
+ include_genotype_out[ui] = false;
346
+ }
347
+ }
348
+ }
349
+ }
350
+
351
+ /*
352
+ // This function implements an exact SNP test of Hardy-Weinberg
353
+ // Equilibrium as described in Wigginton, JE, Cutler, DJ, and
354
+ // Abecasis, GR (2005) A Note on Exact Tests of Hardy-Weinberg
355
+ // Equilibrium. American Journal of Human Genetics. 76: 000 - 000
356
+ //
357
+ // Written by Jan Wigginton
358
+ */
359
+ double vcf_entry::SNPHWE(int obs_hets, int obs_hom1, int obs_hom2)
360
+ {
361
+ if (obs_hom1 + obs_hom2 + obs_hets == 0 ) return 1;
362
+
363
+ if (obs_hom1 < 0 || obs_hom2 < 0 || obs_hets < 0)
364
+ error("Internal error: negative count in HWE test", 91);
365
+
366
+ int obs_homc = obs_hom1 < obs_hom2 ? obs_hom2 : obs_hom1;
367
+ int obs_homr = obs_hom1 < obs_hom2 ? obs_hom1 : obs_hom2;
368
+
369
+ int rare_copies = 2 * obs_homr + obs_hets;
370
+ int genotypes = obs_hets + obs_homc + obs_homr;
371
+
372
+ double * het_probs = (double *) malloc((size_t) (rare_copies + 1) * sizeof(double));
373
+ if (het_probs == NULL)
374
+ error("Internal error: SNP-HWE: Unable to allocate array", 90);
375
+
376
+ for (int i = 0; i <= rare_copies; i++)
377
+ het_probs[i] = 0.0;
378
+
379
+ /* start at midpoint */
380
+ int mid = rare_copies * (2 * genotypes - rare_copies) / (2 * genotypes);
381
+
382
+ /* check to ensure that midpoint and rare alleles have same parity */
383
+ if ((rare_copies & 1) ^ (mid & 1))
384
+ mid++;
385
+
386
+ int curr_hets = mid;
387
+ int curr_homr = (rare_copies - mid) / 2;
388
+ int curr_homc = genotypes - curr_hets - curr_homr;
389
+
390
+ het_probs[mid] = 1.0;
391
+ double sum = het_probs[mid];
392
+ for (curr_hets = mid; curr_hets > 1; curr_hets -= 2)
393
+ {
394
+ het_probs[curr_hets - 2] = het_probs[curr_hets] * curr_hets * (curr_hets - 1.0) / (4.0 * (curr_homr + 1.0) * (curr_homc + 1.0));
395
+ sum += het_probs[curr_hets - 2];
396
+
397
+ /* 2 fewer heterozygotes for next iteration -> add one rare, one common homozygote */
398
+ curr_homr++;
399
+ curr_homc++;
400
+ }
401
+
402
+ curr_hets = mid;
403
+ curr_homr = (rare_copies - mid) / 2;
404
+ curr_homc = genotypes - curr_hets - curr_homr;
405
+ for (curr_hets = mid; curr_hets <= rare_copies - 2; curr_hets += 2)
406
+ {
407
+ het_probs[curr_hets + 2] = het_probs[curr_hets] * 4.0 * curr_homr * curr_homc /((curr_hets + 2.0) * (curr_hets + 1.0));
408
+ sum += het_probs[curr_hets + 2];
409
+
410
+ /* add 2 heterozygotes for next iteration -> subtract one rare, one common homozygote */
411
+ curr_homr--;
412
+ curr_homc--;
413
+ }
414
+
415
+ for (int i = 0; i <= rare_copies; i++)
416
+ het_probs[i] /= sum;
417
+
418
+ /* alternate p-value calculation for p_hi/p_lo
419
+ double p_hi = het_probs[obs_hets];
420
+ for (int i = obs_hets + 1; i <= rare_copies; i++)
421
+ p_hi += het_probs[i];
422
+
423
+ double p_lo = het_probs[obs_hets];
424
+ for (int i = obs_hets - 1; i >= 0; i--)
425
+ p_lo += het_probs[i];
426
+
427
+ double p_hi_lo = p_hi < p_lo ? 2.0 * p_hi : 2.0 * p_lo;
428
+ */
429
+
430
+ double p_hwe = 0.0;
431
+ /* p-value calculation for p_hwe */
432
+ for (int i = 0; i <= rare_copies; i++)
433
+ {
434
+ if (het_probs[i] > het_probs[obs_hets])
435
+ continue;
436
+ p_hwe += het_probs[i];
437
+ }
438
+
439
+ p_hwe = p_hwe > 1.0 ? 1.0 : p_hwe;
440
+
441
+ free(het_probs);
442
+
443
+ return p_hwe;
444
+ }
445
+
446
+ int vcf_entry::str2int(const string &in, const int missing_value)
447
+ {
448
+ if ((in.size() == 0) || (in == "."))
449
+ return missing_value;
450
+ else
451
+ return atoi(in.c_str());
452
+ }
453
+
454
+ double vcf_entry::str2double(const string &in, const double missing_value)
455
+ {
456
+ if ((in.size() == 0) || (in == "."))
457
+ return missing_value;
458
+ else
459
+ return atof(in.c_str());
460
+ }
461
+
462
+ string vcf_entry::int2str(const int in, const int missing_value)
463
+ {
464
+ if (in == missing_value)
465
+ return ".";
466
+ else
467
+ {
468
+ static ostringstream out;
469
+ out.str(""); out.clear();
470
+ out << in;
471
+ return out.str();
472
+ }
473
+ }
474
+
475
+ string vcf_entry::double2str(const double in, const double missing_value)
476
+ {
477
+ if (in == missing_value)
478
+ return ".";
479
+ else
480
+ {
481
+ static ostringstream out;
482
+ out.str(""); out.clear();
483
+ out << in;
484
+ return out.str();
485
+ }
486
+ }
487
+
488
+ void vcf_entry::tokenize(const string &in, char token, vector<string> &out)
489
+ {
490
+ out.resize(0);
491
+ istringstream ss(in);
492
+ string tmp;
493
+ while( getline(ss, tmp, token) )
494
+ {
495
+ out.push_back(tmp);
496
+ }
497
+ }