ngs_server 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (248) hide show
  1. data/bin/ngs_server +72 -50
  2. data/ext/bamtools/extconf.rb +3 -3
  3. data/ext/vcftools/Makefile +28 -0
  4. data/ext/vcftools/README.txt +36 -0
  5. data/ext/vcftools/cpp/.svn/all-wcprops +125 -0
  6. data/ext/vcftools/cpp/.svn/dir-prop-base +6 -0
  7. data/ext/vcftools/cpp/.svn/entries +708 -0
  8. data/ext/vcftools/cpp/.svn/text-base/Makefile.svn-base +46 -0
  9. data/ext/vcftools/cpp/.svn/text-base/dgeev.cpp.svn-base +146 -0
  10. data/ext/vcftools/cpp/.svn/text-base/dgeev.h.svn-base +43 -0
  11. data/ext/vcftools/cpp/.svn/text-base/output_log.cpp.svn-base +79 -0
  12. data/ext/vcftools/cpp/.svn/text-base/output_log.h.svn-base +34 -0
  13. data/ext/vcftools/cpp/.svn/text-base/parameters.cpp.svn-base +535 -0
  14. data/ext/vcftools/cpp/.svn/text-base/parameters.h.svn-base +154 -0
  15. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.cpp.svn-base +497 -0
  16. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.h.svn-base +190 -0
  17. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_getters.cpp.svn-base +421 -0
  18. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_setters.cpp.svn-base +482 -0
  19. data/ext/vcftools/cpp/.svn/text-base/vcf_file.cpp.svn-base +495 -0
  20. data/ext/vcftools/cpp/.svn/text-base/vcf_file.h.svn-base +184 -0
  21. data/ext/vcftools/cpp/.svn/text-base/vcf_file_diff.cpp.svn-base +1282 -0
  22. data/ext/vcftools/cpp/.svn/text-base/vcf_file_filters.cpp.svn-base +1215 -0
  23. data/ext/vcftools/cpp/.svn/text-base/vcf_file_format_convert.cpp.svn-base +1138 -0
  24. data/ext/vcftools/cpp/.svn/text-base/vcf_file_index.cpp.svn-base +171 -0
  25. data/ext/vcftools/cpp/.svn/text-base/vcf_file_output.cpp.svn-base +3012 -0
  26. data/ext/vcftools/cpp/.svn/text-base/vcftools.cpp.svn-base +107 -0
  27. data/ext/vcftools/cpp/.svn/text-base/vcftools.h.svn-base +25 -0
  28. data/ext/vcftools/cpp/Makefile +46 -0
  29. data/ext/vcftools/cpp/dgeev.cpp +146 -0
  30. data/ext/vcftools/cpp/dgeev.h +43 -0
  31. data/ext/vcftools/cpp/output_log.cpp +79 -0
  32. data/ext/vcftools/cpp/output_log.h +34 -0
  33. data/ext/vcftools/cpp/parameters.cpp +535 -0
  34. data/ext/vcftools/cpp/parameters.h +154 -0
  35. data/ext/vcftools/cpp/vcf_entry.cpp +497 -0
  36. data/ext/vcftools/cpp/vcf_entry.h +190 -0
  37. data/ext/vcftools/cpp/vcf_entry_getters.cpp +421 -0
  38. data/ext/vcftools/cpp/vcf_entry_setters.cpp +482 -0
  39. data/ext/vcftools/cpp/vcf_file.cpp +495 -0
  40. data/ext/vcftools/cpp/vcf_file.h +184 -0
  41. data/ext/vcftools/cpp/vcf_file_diff.cpp +1282 -0
  42. data/ext/vcftools/cpp/vcf_file_filters.cpp +1215 -0
  43. data/ext/vcftools/cpp/vcf_file_format_convert.cpp +1138 -0
  44. data/ext/vcftools/cpp/vcf_file_index.cpp +171 -0
  45. data/ext/vcftools/cpp/vcf_file_output.cpp +3012 -0
  46. data/ext/vcftools/cpp/vcftools.cpp +107 -0
  47. data/ext/vcftools/cpp/vcftools.h +25 -0
  48. data/ext/vcftools/examples/.svn/all-wcprops +185 -0
  49. data/ext/vcftools/examples/.svn/dir-prop-base +6 -0
  50. data/ext/vcftools/examples/.svn/entries +1048 -0
  51. data/ext/vcftools/examples/.svn/prop-base/perl-api-1.pl.svn-base +5 -0
  52. data/ext/vcftools/examples/.svn/text-base/annotate-test.vcf.svn-base +37 -0
  53. data/ext/vcftools/examples/.svn/text-base/annotate.out.svn-base +23 -0
  54. data/ext/vcftools/examples/.svn/text-base/annotate.txt.svn-base +7 -0
  55. data/ext/vcftools/examples/.svn/text-base/annotate2.out.svn-base +52 -0
  56. data/ext/vcftools/examples/.svn/text-base/annotate3.out.svn-base +23 -0
  57. data/ext/vcftools/examples/.svn/text-base/cmp-test-a-3.3.vcf.svn-base +12 -0
  58. data/ext/vcftools/examples/.svn/text-base/cmp-test-a.vcf.svn-base +12 -0
  59. data/ext/vcftools/examples/.svn/text-base/cmp-test-b-3.3.vcf.svn-base +12 -0
  60. data/ext/vcftools/examples/.svn/text-base/cmp-test-b.vcf.svn-base +12 -0
  61. data/ext/vcftools/examples/.svn/text-base/cmp-test.out.svn-base +53 -0
  62. data/ext/vcftools/examples/.svn/text-base/concat-a.vcf.svn-base +21 -0
  63. data/ext/vcftools/examples/.svn/text-base/concat-b.vcf.svn-base +13 -0
  64. data/ext/vcftools/examples/.svn/text-base/concat-c.vcf.svn-base +19 -0
  65. data/ext/vcftools/examples/.svn/text-base/concat.out.svn-base +39 -0
  66. data/ext/vcftools/examples/.svn/text-base/invalid-4.0.vcf.svn-base +31 -0
  67. data/ext/vcftools/examples/.svn/text-base/isec-n2-test.vcf.out.svn-base +19 -0
  68. data/ext/vcftools/examples/.svn/text-base/merge-test-a.vcf.svn-base +17 -0
  69. data/ext/vcftools/examples/.svn/text-base/merge-test-b.vcf.svn-base +17 -0
  70. data/ext/vcftools/examples/.svn/text-base/merge-test-c.vcf.svn-base +15 -0
  71. data/ext/vcftools/examples/.svn/text-base/merge-test.vcf.out.svn-base +31 -0
  72. data/ext/vcftools/examples/.svn/text-base/perl-api-1.pl.svn-base +46 -0
  73. data/ext/vcftools/examples/.svn/text-base/query-test.out.svn-base +6 -0
  74. data/ext/vcftools/examples/.svn/text-base/shuffle-test.vcf.svn-base +12 -0
  75. data/ext/vcftools/examples/.svn/text-base/subset.SNPs.out.svn-base +10 -0
  76. data/ext/vcftools/examples/.svn/text-base/subset.indels.out.svn-base +18 -0
  77. data/ext/vcftools/examples/.svn/text-base/subset.vcf.svn-base +21 -0
  78. data/ext/vcftools/examples/.svn/text-base/valid-3.3.vcf.svn-base +30 -0
  79. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.stats.svn-base +104 -0
  80. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.svn-base +34 -0
  81. data/ext/vcftools/examples/.svn/text-base/valid-4.1.vcf.svn-base +37 -0
  82. data/ext/vcftools/examples/annotate-test.vcf +37 -0
  83. data/ext/vcftools/examples/annotate.out +23 -0
  84. data/ext/vcftools/examples/annotate.txt +7 -0
  85. data/ext/vcftools/examples/annotate2.out +52 -0
  86. data/ext/vcftools/examples/annotate3.out +23 -0
  87. data/ext/vcftools/examples/cmp-test-a-3.3.vcf +12 -0
  88. data/ext/vcftools/examples/cmp-test-a.vcf +12 -0
  89. data/ext/vcftools/examples/cmp-test-b-3.3.vcf +12 -0
  90. data/ext/vcftools/examples/cmp-test-b.vcf +12 -0
  91. data/ext/vcftools/examples/cmp-test.out +53 -0
  92. data/ext/vcftools/examples/concat-a.vcf +21 -0
  93. data/ext/vcftools/examples/concat-b.vcf +13 -0
  94. data/ext/vcftools/examples/concat-c.vcf +19 -0
  95. data/ext/vcftools/examples/concat.out +39 -0
  96. data/ext/vcftools/examples/invalid-4.0.vcf +31 -0
  97. data/ext/vcftools/examples/isec-n2-test.vcf.out +19 -0
  98. data/ext/vcftools/examples/merge-test-a.vcf +17 -0
  99. data/ext/vcftools/examples/merge-test-b.vcf +17 -0
  100. data/ext/vcftools/examples/merge-test-c.vcf +15 -0
  101. data/ext/vcftools/examples/merge-test.vcf.out +31 -0
  102. data/ext/vcftools/examples/perl-api-1.pl +46 -0
  103. data/ext/vcftools/examples/query-test.out +6 -0
  104. data/ext/vcftools/examples/shuffle-test.vcf +12 -0
  105. data/ext/vcftools/examples/subset.SNPs.out +10 -0
  106. data/ext/vcftools/examples/subset.indels.out +18 -0
  107. data/ext/vcftools/examples/subset.vcf +21 -0
  108. data/ext/vcftools/examples/valid-3.3.vcf +30 -0
  109. data/ext/vcftools/examples/valid-4.0.vcf +34 -0
  110. data/ext/vcftools/examples/valid-4.0.vcf.stats +104 -0
  111. data/ext/vcftools/examples/valid-4.1.vcf +37 -0
  112. data/ext/vcftools/extconf.rb +2 -0
  113. data/ext/vcftools/perl/.svn/all-wcprops +149 -0
  114. data/ext/vcftools/perl/.svn/entries +844 -0
  115. data/ext/vcftools/perl/.svn/prop-base/fill-aa.svn-base +5 -0
  116. data/ext/vcftools/perl/.svn/prop-base/fill-an-ac.svn-base +5 -0
  117. data/ext/vcftools/perl/.svn/prop-base/fill-ref-md5.svn-base +5 -0
  118. data/ext/vcftools/perl/.svn/prop-base/tab-to-vcf.svn-base +5 -0
  119. data/ext/vcftools/perl/.svn/prop-base/test.t.svn-base +5 -0
  120. data/ext/vcftools/perl/.svn/prop-base/vcf-annotate.svn-base +5 -0
  121. data/ext/vcftools/perl/.svn/prop-base/vcf-compare.svn-base +5 -0
  122. data/ext/vcftools/perl/.svn/prop-base/vcf-concat.svn-base +5 -0
  123. data/ext/vcftools/perl/.svn/prop-base/vcf-convert.svn-base +5 -0
  124. data/ext/vcftools/perl/.svn/prop-base/vcf-fix-newlines.svn-base +5 -0
  125. data/ext/vcftools/perl/.svn/prop-base/vcf-isec.svn-base +5 -0
  126. data/ext/vcftools/perl/.svn/prop-base/vcf-merge.svn-base +5 -0
  127. data/ext/vcftools/perl/.svn/prop-base/vcf-query.svn-base +5 -0
  128. data/ext/vcftools/perl/.svn/prop-base/vcf-shuffle-cols.svn-base +5 -0
  129. data/ext/vcftools/perl/.svn/prop-base/vcf-sort.svn-base +5 -0
  130. data/ext/vcftools/perl/.svn/prop-base/vcf-stats.svn-base +5 -0
  131. data/ext/vcftools/perl/.svn/prop-base/vcf-subset.svn-base +5 -0
  132. data/ext/vcftools/perl/.svn/prop-base/vcf-to-tab.svn-base +5 -0
  133. data/ext/vcftools/perl/.svn/prop-base/vcf-validator.svn-base +5 -0
  134. data/ext/vcftools/perl/.svn/text-base/ChangeLog.svn-base +84 -0
  135. data/ext/vcftools/perl/.svn/text-base/FaSlice.pm.svn-base +214 -0
  136. data/ext/vcftools/perl/.svn/text-base/Makefile.svn-base +12 -0
  137. data/ext/vcftools/perl/.svn/text-base/Vcf.pm.svn-base +2853 -0
  138. data/ext/vcftools/perl/.svn/text-base/VcfStats.pm.svn-base +681 -0
  139. data/ext/vcftools/perl/.svn/text-base/fill-aa.svn-base +103 -0
  140. data/ext/vcftools/perl/.svn/text-base/fill-an-ac.svn-base +56 -0
  141. data/ext/vcftools/perl/.svn/text-base/fill-ref-md5.svn-base +204 -0
  142. data/ext/vcftools/perl/.svn/text-base/tab-to-vcf.svn-base +92 -0
  143. data/ext/vcftools/perl/.svn/text-base/test.t.svn-base +376 -0
  144. data/ext/vcftools/perl/.svn/text-base/vcf-annotate.svn-base +1099 -0
  145. data/ext/vcftools/perl/.svn/text-base/vcf-compare.svn-base +1193 -0
  146. data/ext/vcftools/perl/.svn/text-base/vcf-concat.svn-base +310 -0
  147. data/ext/vcftools/perl/.svn/text-base/vcf-convert.svn-base +180 -0
  148. data/ext/vcftools/perl/.svn/text-base/vcf-fix-newlines.svn-base +97 -0
  149. data/ext/vcftools/perl/.svn/text-base/vcf-isec.svn-base +660 -0
  150. data/ext/vcftools/perl/.svn/text-base/vcf-merge.svn-base +577 -0
  151. data/ext/vcftools/perl/.svn/text-base/vcf-query.svn-base +272 -0
  152. data/ext/vcftools/perl/.svn/text-base/vcf-shuffle-cols.svn-base +89 -0
  153. data/ext/vcftools/perl/.svn/text-base/vcf-sort.svn-base +79 -0
  154. data/ext/vcftools/perl/.svn/text-base/vcf-stats.svn-base +160 -0
  155. data/ext/vcftools/perl/.svn/text-base/vcf-subset.svn-base +206 -0
  156. data/ext/vcftools/perl/.svn/text-base/vcf-to-tab.svn-base +112 -0
  157. data/ext/vcftools/perl/.svn/text-base/vcf-validator.svn-base +145 -0
  158. data/ext/vcftools/perl/ChangeLog +84 -0
  159. data/ext/vcftools/perl/FaSlice.pm +214 -0
  160. data/ext/vcftools/perl/Makefile +12 -0
  161. data/ext/vcftools/perl/Vcf.pm +2853 -0
  162. data/ext/vcftools/perl/VcfStats.pm +681 -0
  163. data/ext/vcftools/perl/fill-aa +103 -0
  164. data/ext/vcftools/perl/fill-an-ac +56 -0
  165. data/ext/vcftools/perl/fill-ref-md5 +204 -0
  166. data/ext/vcftools/perl/tab-to-vcf +92 -0
  167. data/ext/vcftools/perl/test.t +376 -0
  168. data/ext/vcftools/perl/vcf-annotate +1099 -0
  169. data/ext/vcftools/perl/vcf-compare +1193 -0
  170. data/ext/vcftools/perl/vcf-concat +310 -0
  171. data/ext/vcftools/perl/vcf-convert +180 -0
  172. data/ext/vcftools/perl/vcf-fix-newlines +97 -0
  173. data/ext/vcftools/perl/vcf-isec +660 -0
  174. data/ext/vcftools/perl/vcf-merge +577 -0
  175. data/ext/vcftools/perl/vcf-query +286 -0
  176. data/ext/vcftools/perl/vcf-shuffle-cols +89 -0
  177. data/ext/vcftools/perl/vcf-sort +79 -0
  178. data/ext/vcftools/perl/vcf-stats +160 -0
  179. data/ext/vcftools/perl/vcf-subset +206 -0
  180. data/ext/vcftools/perl/vcf-to-tab +112 -0
  181. data/ext/vcftools/perl/vcf-validator +145 -0
  182. data/ext/vcftools/website/.svn/all-wcprops +41 -0
  183. data/ext/vcftools/website/.svn/entries +238 -0
  184. data/ext/vcftools/website/.svn/prop-base/VCF-poster.pdf.svn-base +5 -0
  185. data/ext/vcftools/website/.svn/prop-base/favicon.ico.svn-base +5 -0
  186. data/ext/vcftools/website/.svn/prop-base/favicon.png.svn-base +5 -0
  187. data/ext/vcftools/website/.svn/text-base/Makefile.svn-base +6 -0
  188. data/ext/vcftools/website/.svn/text-base/README.svn-base +2 -0
  189. data/ext/vcftools/website/.svn/text-base/VCF-poster.pdf.svn-base +0 -0
  190. data/ext/vcftools/website/.svn/text-base/default.css.svn-base +250 -0
  191. data/ext/vcftools/website/.svn/text-base/favicon.ico.svn-base +0 -0
  192. data/ext/vcftools/website/.svn/text-base/favicon.png.svn-base +0 -0
  193. data/ext/vcftools/website/Makefile +6 -0
  194. data/ext/vcftools/website/README +2 -0
  195. data/ext/vcftools/website/VCF-poster.pdf +0 -0
  196. data/ext/vcftools/website/default.css +250 -0
  197. data/ext/vcftools/website/favicon.ico +0 -0
  198. data/ext/vcftools/website/favicon.png +0 -0
  199. data/ext/vcftools/website/img/.svn/all-wcprops +53 -0
  200. data/ext/vcftools/website/img/.svn/entries +300 -0
  201. data/ext/vcftools/website/img/.svn/prop-base/bg.gif.svn-base +5 -0
  202. data/ext/vcftools/website/img/.svn/prop-base/bgcode.gif.svn-base +5 -0
  203. data/ext/vcftools/website/img/.svn/prop-base/bgcontainer.gif.svn-base +5 -0
  204. data/ext/vcftools/website/img/.svn/prop-base/bgul.gif.svn-base +5 -0
  205. data/ext/vcftools/website/img/.svn/prop-base/header.gif.svn-base +5 -0
  206. data/ext/vcftools/website/img/.svn/prop-base/li.gif.svn-base +5 -0
  207. data/ext/vcftools/website/img/.svn/prop-base/quote.gif.svn-base +5 -0
  208. data/ext/vcftools/website/img/.svn/prop-base/search.gif.svn-base +5 -0
  209. data/ext/vcftools/website/img/.svn/text-base/bg.gif.svn-base +0 -0
  210. data/ext/vcftools/website/img/.svn/text-base/bgcode.gif.svn-base +0 -0
  211. data/ext/vcftools/website/img/.svn/text-base/bgcontainer.gif.svn-base +0 -0
  212. data/ext/vcftools/website/img/.svn/text-base/bgul.gif.svn-base +0 -0
  213. data/ext/vcftools/website/img/.svn/text-base/header.gif.svn-base +0 -0
  214. data/ext/vcftools/website/img/.svn/text-base/li.gif.svn-base +0 -0
  215. data/ext/vcftools/website/img/.svn/text-base/quote.gif.svn-base +0 -0
  216. data/ext/vcftools/website/img/.svn/text-base/search.gif.svn-base +0 -0
  217. data/ext/vcftools/website/img/bg.gif +0 -0
  218. data/ext/vcftools/website/img/bgcode.gif +0 -0
  219. data/ext/vcftools/website/img/bgcontainer.gif +0 -0
  220. data/ext/vcftools/website/img/bgul.gif +0 -0
  221. data/ext/vcftools/website/img/header.gif +0 -0
  222. data/ext/vcftools/website/img/li.gif +0 -0
  223. data/ext/vcftools/website/img/quote.gif +0 -0
  224. data/ext/vcftools/website/img/search.gif +0 -0
  225. data/ext/vcftools/website/src/.svn/all-wcprops +53 -0
  226. data/ext/vcftools/website/src/.svn/entries +300 -0
  227. data/ext/vcftools/website/src/.svn/text-base/docs.inc.svn-base +202 -0
  228. data/ext/vcftools/website/src/.svn/text-base/index.inc.svn-base +52 -0
  229. data/ext/vcftools/website/src/.svn/text-base/index.php.svn-base +80 -0
  230. data/ext/vcftools/website/src/.svn/text-base/license.inc.svn-base +27 -0
  231. data/ext/vcftools/website/src/.svn/text-base/links.inc.svn-base +13 -0
  232. data/ext/vcftools/website/src/.svn/text-base/options.inc.svn-base +654 -0
  233. data/ext/vcftools/website/src/.svn/text-base/perl_module.inc.svn-base +249 -0
  234. data/ext/vcftools/website/src/.svn/text-base/specs.inc.svn-base +18 -0
  235. data/ext/vcftools/website/src/docs.inc +202 -0
  236. data/ext/vcftools/website/src/index.inc +52 -0
  237. data/ext/vcftools/website/src/index.php +80 -0
  238. data/ext/vcftools/website/src/license.inc +27 -0
  239. data/ext/vcftools/website/src/links.inc +13 -0
  240. data/ext/vcftools/website/src/options.inc +654 -0
  241. data/ext/vcftools/website/src/perl_module.inc +249 -0
  242. data/ext/vcftools/website/src/specs.inc +18 -0
  243. data/lib/config.ru +9 -0
  244. data/lib/ngs_server/add.rb +9 -0
  245. data/lib/ngs_server/version.rb +1 -1
  246. data/lib/ngs_server.rb +55 -3
  247. data/ngs_server.gemspec +5 -2
  248. metadata +296 -6
@@ -0,0 +1,535 @@
1
+ /*
2
+ * parameters.cpp
3
+ *
4
+ * Created on: Nov 11, 2009
5
+ * Author: Adam Auton
6
+ * ($Revision: 249 $)
7
+ */
8
+
9
+ // Class for reading in, checking and storing user parameters
10
+
11
+ #include "parameters.h"
12
+
13
+ parameters::parameters(int argc, char *argv[])
14
+ {
15
+ string tmp;
16
+ for (int i=0; i<argc; i++)
17
+ {
18
+ tmp = argv[i];
19
+ this->argv.push_back(tmp);
20
+ }
21
+
22
+ BED_exclude = false;
23
+ BED_file = "";
24
+ chr_to_keep = "";
25
+ chr_to_exclude = "";
26
+ diff_discordance_matrix = false;
27
+ diff_file = "";
28
+ diff_file_compressed = false;
29
+ diff_indv_discordance = false;
30
+ diff_site_discordance = false;
31
+ diff_switch_error = false;
32
+ end_pos = numeric_limits<int>::max();
33
+ force_write_index = false;
34
+ fst_file = "";
35
+ fst_file_compressed = false;
36
+ indv_exclude_file = "";
37
+ indv_keep_file = "";
38
+ invert_mask = false;
39
+ recode_all_INFO = false;
40
+ ld_bp_window_size = numeric_limits<int>::max();
41
+ ld_snp_window_size = numeric_limits<int>::max();
42
+ min_mac = -1;
43
+ min_maf = -1.0;
44
+ mask_file = "";
45
+ max_alleles = numeric_limits<int>::max();
46
+ max_genotype_depth = numeric_limits<int>::max();
47
+ max_indv_mean_depth = numeric_limits<double>::max();
48
+ max_mac = numeric_limits<int>::max();
49
+ max_maf = numeric_limits<double>::max();
50
+ max_mean_depth = numeric_limits<double>::max();
51
+ max_missing_call_count = numeric_limits<int>::max();
52
+ max_non_ref_ac = numeric_limits<int>::max();
53
+ max_non_ref_af = numeric_limits<double>::max();
54
+ max_N_indv = -1;
55
+ min_alleles = -1;
56
+ min_genotype_depth = -1;
57
+ min_genotype_quality = -1.0;
58
+ min_HWE_pvalue = -1.0;
59
+ min_indv_call_rate = 0;
60
+ min_indv_mean_depth = -1.0;
61
+ min_interSNP_distance = -1;
62
+ min_kept_mask_value = 0;
63
+ min_mean_depth = -1.0;
64
+ min_quality = -1.0;
65
+ min_r2 = -1.0;
66
+ min_site_call_rate = 0;
67
+ min_non_ref_ac = -1;
68
+ min_non_ref_af = -1.0;
69
+ output_012_matrix = false;
70
+ output_as_IMPUTE = false;
71
+ output_as_ldhat_phased = false;
72
+ output_as_ldhat_unphased = false;
73
+ output_BEAGLE_genotype_likelihoods = false;
74
+ output_counts = false;
75
+ output_filter_summary = false;
76
+ output_filtered_sites = false;
77
+ output_freq = false;
78
+ output_geno_depth = false;
79
+ output_geno_rsq = false;
80
+ output_hap_rsq = false;
81
+ output_het = false;
82
+ output_HWE = false;
83
+ output_indv_depth = false;
84
+ output_interchromosomal_rsq = false;
85
+ output_LROH = false;
86
+ output_missingness = false;
87
+ output_N_PCA_SNP_loadings = -1;
88
+ output_PCA = false;
89
+ output_prefix="out";
90
+ output_relatedness = false;
91
+ output_singletons = false;
92
+ output_site_depth = false;
93
+ output_site_mean_depth = false;
94
+ output_site_pi=false;
95
+ output_site_quality = false;
96
+ output_SNP_density_bin_size = 0;
97
+ output_Tajima_D_bin_size = 0;
98
+ output_TsTv_bin_size = 0;
99
+ output_TsTv_by_count = false;
100
+ output_TsTv_by_qual = false;
101
+ phased_only = false;
102
+ PCA_no_normalisation = false;
103
+ pi_window_size = 0;
104
+ plink_output = false;
105
+ plink_tped_output = false;
106
+ positions_file = "";
107
+ recode = false;
108
+ remove_all_filtered_genotypes = false;
109
+ remove_all_filtered_sites = false;
110
+ snps_to_exclude_file = "";
111
+ snps_to_keep_file = "";
112
+ start_pos = -1;
113
+ suppress_allele_output = false;
114
+ vcf_filename="";
115
+ vcf_compressed = false;
116
+ }
117
+
118
+ void parameters::read_parameters()
119
+ {
120
+ unsigned int i=1;
121
+ string in_str;
122
+ while (i<argv.size())
123
+ {
124
+ in_str = argv[i];
125
+ if (in_str == "--vcf") { vcf_filename = get_arg(i+1); vcf_compressed = false; i++; } // VCF file to process
126
+ else if (in_str == "--012") output_012_matrix = true; // Output as 0/1/2 matrix
127
+ else if (in_str == "--BEAGLE-GL") { output_BEAGLE_genotype_likelihoods = true; min_alleles=2; max_alleles=2; } // Output as BEAGLE Genotype Likelihood format
128
+ else if (in_str == "--bed") { BED_file = get_arg(i+1); i++; BED_exclude=false; }
129
+ else if (in_str == "--chr") { chr_to_keep = get_arg(i+1); i++; } // Chromosome to process
130
+ else if (in_str == "--counts") output_counts = true; // Output per-site allele count statistics
131
+ else if (in_str == "--counts2") {output_counts = true; suppress_allele_output = true; } // Output per-site allele count statistics
132
+ else if (in_str == "--depth") output_indv_depth = true; // Output per-individual coverage statistics
133
+ else if (in_str == "--diff-discordance-matrix") { diff_discordance_matrix = true; } // Calculate some concensus statistics
134
+ else if (in_str == "--diff-indv-discordance") { diff_indv_discordance = true; } // Calculate some concensus statistics
135
+ else if (in_str == "--diff-site-discordance") { diff_site_discordance = true; } // Calculate some concensus statistics
136
+ else if (in_str == "--diff-switch-error") { diff_switch_error = true; } // Calculate some concensus statistics
137
+ else if (in_str == "--diff") { diff_file = get_arg(i+1); diff_file_compressed = false; i++; } // Calculate some concensus statistics
138
+ else if (in_str == "--exclude-bed") { BED_file = get_arg(i+1); i++; BED_exclude=true; }
139
+ else if (in_str == "--exclude") { snps_to_exclude_file = get_arg(i+1); i++; } // List of SNPs to exclude
140
+ else if (in_str == "--extract-FORMAT-info") { FORMAT_id_to_extract = get_arg(i+1); i++; }
141
+ else if (in_str == "--FILTER-summary") {output_filter_summary = true;}
142
+ else if (in_str == "--filtered-sites") {output_filtered_sites = true;}
143
+ else if (in_str == "--force-index-write") {force_write_index = true;}
144
+ else if (in_str == "--freq") output_freq = true; // Output per-site frequency statistics
145
+ else if (in_str == "--freq2") {output_freq = true; suppress_allele_output = true; } // Output per-site frequency statistics
146
+ else if (in_str == "--from-bp") { start_pos = atoi(get_arg(i+1).c_str()); i++; } // Start position
147
+ else if (in_str == "--fst") { fst_file = get_arg(i+1); fst_file_compressed = false; i++; }
148
+ else if (in_str == "--fst-pop") {fst_populations.push_back(get_arg(i+1)); i++; }
149
+ else if (in_str == "--geno-depth") output_geno_depth = true; // Output Depth for each genoptype
150
+ else if (in_str == "--geno-r2") { output_geno_rsq = true; min_alleles = 2; max_alleles = 2; } // Output pairwise LD (r^2)
151
+ else if (in_str == "--geno") { min_site_call_rate = atof(get_arg(i+1).c_str()); i++; } // Minimum per-site call rate
152
+ else if (in_str == "--get-INFO") { INFO_to_extract.push_back(get_arg(i+1)); i++; } // Add to list of INFO fields to extract
153
+ else if (in_str == "--gzdiff") { diff_file = get_arg(i+1); diff_file_compressed = true; i++; } // Calculate some concensus statistics
154
+ else if (in_str == "--gzfst") { fst_file = get_arg(i+1); fst_file_compressed = true; i++; }
155
+ else if (in_str == "--gzvcf") { vcf_filename = get_arg(i+1); vcf_compressed = true; i++; } // VCF file to process
156
+ else if (in_str == "--hap-r2") { output_hap_rsq = true; phased_only = true; min_alleles = 2; max_alleles = 2; } // Output pairwise LD (r^2)
157
+ else if (in_str == "--hardy") output_HWE = true; // Output HWE statistics
158
+ else if (in_str == "--het") output_het = true; // Output heterozygosity statistics
159
+ else if (in_str == "--hwe") { max_alleles = 2; min_HWE_pvalue = atof(get_arg(i+1).c_str()); i++; } // Minimum per-site HWE p-value
160
+ else if (in_str == "--IMPUTE") { output_as_IMPUTE = true; phased_only=true; min_site_call_rate=1.0; min_alleles=2; max_alleles=2; }// Output as IMPUTE format
161
+ else if (in_str == "--indv") { indv_to_keep.insert(get_arg(i+1)); i++; } // List of individuals to keep
162
+ else if (in_str == "--interchrom-geno-r2") { output_interchromosomal_rsq = true; min_alleles = 2; max_alleles = 2; } // Output pairwise LD (r^2)
163
+ else if (in_str == "--invert-mask") { mask_file = get_arg(i+1); i++; invert_mask = true; }
164
+ else if (in_str == "--keep-filtered") { site_filter_flags_to_keep.insert(get_arg(i+1)); i++; } // Remove a specific filter flag
165
+ else if (in_str == "--keep") { indv_keep_file = get_arg(i+1); i++; } // List of individuals to keep
166
+ else if (in_str == "--keep-INFO") { site_INFO_flags_to_keep.insert(get_arg(i+1)); i++; } // Filter sites by INFO flags
167
+ else if (in_str == "--keep-INFO-all") { recode_all_INFO=true; } // Old command (soon to be depreciated)
168
+ else if (in_str == "--ld-window-bp") { ld_bp_window_size = atoi(get_arg(i+1).c_str()); i++; } // Max bp distance for LD output
169
+ else if (in_str == "--ld-window") { ld_snp_window_size = atoi(get_arg(i+1).c_str()); i++; } // Max SNP distance for LD output
170
+ else if (in_str == "--ldhat-geno") { output_as_ldhat_unphased = true; }
171
+ else if (in_str == "--ldhat") { output_as_ldhat_phased = true; phased_only = true; } // Output as LDhat format
172
+ else if (in_str == "--LROH") {output_LROH = true;}
173
+ else if (in_str == "--mac") { min_mac = atoi(get_arg(i+1).c_str()); i++; } // Minimum Site MAC
174
+ else if (in_str == "--maf") { min_maf = atof(get_arg(i+1).c_str()); i++; } // Minimum Site MAF
175
+ else if (in_str == "--mask-min") { min_kept_mask_value = atoi(get_arg(i+1).c_str()); i++; }
176
+ else if (in_str == "--mask") { mask_file = get_arg(i+1); i++; invert_mask = false; }
177
+ else if (in_str == "--max-alleles") { max_alleles = atoi(get_arg(i+1).c_str()); i++; } // Maximum number of alleles per-site
178
+ else if (in_str == "--max-indv-meanDP") { max_indv_mean_depth = atof(get_arg(i+1).c_str()); i++; } // Maximum mean depth for an individual
179
+ else if (in_str == "--max-mac") { max_mac = atoi(get_arg(i+1).c_str()); i++; } // Maximum site MAC
180
+ else if (in_str == "--max-maf") { max_maf = atof(get_arg(i+1).c_str()); i++; } // Maximum Site MAF
181
+ else if (in_str == "--max-meanDP") { max_mean_depth = atof(get_arg(i+1).c_str()); i++; } // Site Maximum mean depth across individuals
182
+ else if (in_str == "--max-missing-count") { max_missing_call_count = atoi(get_arg(i+1).c_str()); i++; } // Site maximum missing genotypes
183
+ else if (in_str == "--max-non-ref-ac") { max_non_ref_ac = atoi(get_arg(i+1).c_str()); i++; } // Minimum Site non-ref AC
184
+ else if (in_str == "--max-non-ref-af") { max_non_ref_af = atof(get_arg(i+1).c_str()); i++; } // Minimum Site non-ref AF
185
+ else if (in_str == "--maxDP") { max_genotype_depth = atoi(get_arg(i+1).c_str()); i++; } // Maximum genotype depth
186
+ else if (in_str == "--max-indv") {max_N_indv = atoi(get_arg(i+1).c_str()); i++; }
187
+ else if (in_str == "--min-alleles") { min_alleles = atoi(get_arg(i+1).c_str()); i++; } // Minimum number of alleles per-site
188
+ else if (in_str == "--min-indv-meanDP") { min_indv_mean_depth = atof(get_arg(i+1).c_str()); i++; } // Minimum mean depth for an individual
189
+ else if (in_str == "--min-meanDP") { min_mean_depth = atof(get_arg(i+1).c_str()); i++; } // Site Minimum mean depth
190
+ else if (in_str == "--min-r2") { min_r2 = atof(get_arg(i+1).c_str()); i++; } // Min r^2 for LD output
191
+ else if (in_str == "--mind") { min_indv_call_rate = atof(get_arg(i+1).c_str()); i++; } // Minimum per-individual call rate
192
+ else if (in_str == "--minDP") { min_genotype_depth = atoi(get_arg(i+1).c_str()); i++; } // Minimum genotype depth
193
+ else if (in_str == "--minGQ") { min_genotype_quality = atof(get_arg(i+1).c_str()); i++; } // Minimum genotype quality
194
+ else if (in_str == "--minQ") { min_quality = atof(get_arg(i+1).c_str()); i++; } // Minimum per-site quality
195
+ else if (in_str == "--missing") output_missingness = true; // Output Individual and Site missingness summaries
196
+ else if (in_str == "--non-ref-ac") { min_non_ref_ac = atoi(get_arg(i+1).c_str()); i++; } // Minimum Site non-ref AC
197
+ else if (in_str == "--non-ref-af") { min_non_ref_af = atof(get_arg(i+1).c_str()); i++; } // Minimum Site non-ref AF
198
+ else if (in_str == "--not-chr") { chr_to_exclude = get_arg(i+1); i++; } // Chromosome to process
199
+ else if (in_str == "--out") { output_prefix = get_arg(i+1); i++; } // Output file prefix
200
+ else if (in_str == "--pca") { output_PCA = true; min_alleles=2; max_alleles=2; }
201
+ else if (in_str == "--pca-no-norm") { output_PCA = true; PCA_no_normalisation = true; min_alleles=2; max_alleles=2; }
202
+ else if (in_str == "--pca-snp-loadings") { output_N_PCA_SNP_loadings = atoi(get_arg(i+1).c_str()); i++; }
203
+ else if (in_str == "--phased") phased_only = true; // Keep only phased individuals / sites
204
+ else if (in_str == "--plink") plink_output = true; // Output as PLINK file
205
+ else if (in_str == "--plink-tped") plink_tped_output = true; // Output as PLINK tped file
206
+ else if (in_str == "--positions") { positions_file = get_arg(i+1); i++; }
207
+ else if (in_str == "--recode") recode = true; // Output VCF file
208
+ else if (in_str == "--recode-INFO-all") { recode_all_INFO=true; } // Specify INFO to keep when recoding
209
+ else if (in_str == "--recode-INFO") { recode_INFO_to_keep.insert(get_arg(i+1)); i++; } // Specify INFO to keep when recoding
210
+ else if (in_str == "--relatedness") { output_relatedness = true; } // Estimate relatedness between individuals
211
+ else if (in_str == "--remove-filtered-all") remove_all_filtered_sites = true; // Remove sites flagged as filtered
212
+ else if (in_str == "--remove-filtered-geno-all") remove_all_filtered_genotypes = true; // Remove genotypes flagged as filtered
213
+ else if (in_str == "--remove-filtered-geno") { geno_filter_flags_to_exclude.insert(get_arg(i+1)); i++; } // Remove genotypes flagged as filtered
214
+ else if (in_str == "--remove-filtered") { site_filter_flags_to_exclude.insert(get_arg(i+1)); i++; } // Remove a specific filter flag
215
+ else if (in_str == "--remove-indv") { indv_to_exclude.insert(get_arg(i+1)); i++; } // List of individuals to keep
216
+ else if (in_str == "--remove-INFO") { site_INFO_flags_to_remove.insert(get_arg(i+1)); i++; } // Filter sites by INFO flags
217
+ else if (in_str == "--remove") { indv_exclude_file = get_arg(i+1); i++; } // List of individuals to exclude
218
+ else if (in_str == "--singletons") output_singletons = true; // Output as 0/1/2 mat
219
+ else if (in_str == "--site-depth") output_site_depth = true; // Output Depth for each site
220
+ else if (in_str == "--site-mean-depth") output_site_mean_depth = true; // Output Mean Depth for each site
221
+ else if (in_str == "--site-pi") { output_site_pi = true; }
222
+ else if (in_str == "--site-quality") output_site_quality = true; // Output per-site qualities
223
+ else if (in_str == "--snp") { snps_to_keep.insert(get_arg(i+1)); i++; } // SNP to keep
224
+ else if (in_str == "--SNPdensity") { output_SNP_density_bin_size = atoi(get_arg(i+1).c_str()); i++; } // Output SNP density using Bin Size
225
+ else if (in_str == "--snps") { snps_to_keep_file = get_arg(i+1); i++; } // List of SNPs to keep
226
+ else if (in_str == "--TajimaD") { output_Tajima_D_bin_size = atoi(get_arg(i+1).c_str()); i++;}
227
+ else if (in_str == "--to-bp") { end_pos = atoi(get_arg(i+1).c_str()); i++; } // End position
228
+ else if (in_str == "--thin") { min_interSNP_distance = atoi(get_arg(i+1).c_str()); i++; } // Set minimum distance between SNPs
229
+ else if (in_str == "--TsTv") {output_TsTv_bin_size = atoi(get_arg(i+1).c_str()); i++;} // Output Ts/Tv stats
230
+ else if (in_str == "--TsTv-by-count") {output_TsTv_by_count = true; } // Output Ts/Tv stats
231
+ else if (in_str == "--TsTv-by-qual") {output_TsTv_by_qual = true; }
232
+ else if (in_str == "--window-pi") { pi_window_size = atoi(get_arg(i+1).c_str()); i++; }
233
+ else
234
+ error("Unknown option: " + string(in_str), 0);
235
+ i++;
236
+ }
237
+ check_parameters();
238
+ }
239
+
240
+ string parameters::get_arg(unsigned int i)
241
+ {
242
+ if (i>=argv.size())
243
+ error("Requested Missing Argument",76);
244
+ return argv[i];
245
+ }
246
+
247
+ void parameters::print_params()
248
+ {
249
+ parameters defaults(0, 0);
250
+
251
+ printLOG("Parameters as interpreted:\n");
252
+ if (vcf_filename != defaults.vcf_filename)
253
+ {
254
+ if (vcf_compressed == false)
255
+ printLOG("\t--vcf " + vcf_filename + "\n");
256
+ else
257
+ printLOG("\t--gzvcf " + vcf_filename + "\n");
258
+ }
259
+
260
+ if (chr_to_exclude != defaults.chr_to_exclude) printLOG("\t--not-chr " + chr_to_exclude + "\n");
261
+ if (chr_to_keep != defaults.chr_to_keep) printLOG("\t--chr " + chr_to_keep + "\n");
262
+ if (end_pos != defaults.end_pos) printLOG("\t--to-bp " + int2str(end_pos) + "\n");
263
+ if (force_write_index != defaults.force_write_index) printLOG("\t--force-index-write\n");
264
+ if (FORMAT_id_to_extract != defaults.FORMAT_id_to_extract) printLOG("\t--extract-FORMAT-info " + FORMAT_id_to_extract + "\n");
265
+ if (fst_file != defaults.fst_file)
266
+ {
267
+ if (vcf_compressed == false)
268
+ printLOG("\t--fst " + fst_file + "\n");
269
+ else
270
+ {
271
+ printLOG("\t--gzfst " + fst_file + "\n");
272
+ }
273
+ }
274
+ if (fst_populations.size() != 0)
275
+ {
276
+ for (unsigned int ui=0; ui<fst_populations.size(); ui++)
277
+ printLOG("\t--fst-pop " + fst_populations[ui] + "\n");
278
+ }
279
+ if (indv_exclude_file != defaults.indv_exclude_file) printLOG("\t--exclude " + indv_exclude_file + "\n");
280
+ if (indv_keep_file != defaults.indv_keep_file) printLOG("\t--keep " + indv_keep_file + "\n");
281
+ if (recode_all_INFO == true) printLOG("\t--recode-INFO-all\n");
282
+ if (ld_bp_window_size != defaults.ld_bp_window_size) printLOG("\t--ld-window-bp " + int2str(ld_bp_window_size) + "\n");
283
+ if (ld_snp_window_size != defaults.ld_snp_window_size) printLOG("\t--ld-window " + int2str(ld_snp_window_size) + "\n");
284
+ if (min_mac != defaults.min_mac) printLOG("\t--mac " + dbl2str(min_mac, 3) + "\n");
285
+ if (min_maf != defaults.min_maf) printLOG("\t--maf " + dbl2str(min_maf, 3) + "\n");
286
+ if (max_alleles != defaults.max_alleles) printLOG("\t--max-alleles " + int2str(max_alleles) + "\n");
287
+ if (max_genotype_depth != defaults.max_genotype_depth) printLOG("\t--maxDP " + dbl2str(max_genotype_depth, 3) + "\n");
288
+ if (max_indv_mean_depth != defaults.max_indv_mean_depth) printLOG("\t--max-indv-meanDP " + dbl2str(max_indv_mean_depth, 3) + "\n");
289
+ if (max_mac != defaults.max_mac) printLOG("\t--max-mac " + dbl2str(max_mac, 3) + "\n");
290
+ if (max_maf != defaults.max_maf) printLOG("\t--max-maf " + dbl2str(max_maf, 3) + "\n");
291
+ if (max_missing_call_count != defaults.max_missing_call_count) printLOG("\t--max-missing-count " + dbl2str(max_missing_call_count, 3) + "\n");
292
+ if (max_mean_depth != defaults.max_mean_depth) printLOG("\t--max-meanDP " + dbl2str(max_mean_depth, 3) + "\n");
293
+ if (max_non_ref_ac != defaults.max_non_ref_ac) printLOG("\t--max-non-ref-ac " + dbl2str(max_non_ref_ac, 3) + "\n");
294
+ if (max_non_ref_af != defaults.max_non_ref_af) printLOG("\t--max-non-ref-af " + dbl2str(max_non_ref_af, 3) + "\n");
295
+ if (max_N_indv != defaults.max_N_indv) printLOG("\t--max-indv " + int2str(max_N_indv) + "\n");
296
+ if (min_alleles != defaults.min_alleles) printLOG("\t--min-alleles " + int2str(min_alleles) + "\n");
297
+ if (min_genotype_depth != defaults.min_genotype_depth) printLOG("\t--minDP " + dbl2str(min_genotype_depth, 3) + "\n");
298
+ if (min_genotype_quality != defaults.min_genotype_quality) printLOG("\t--minGQ " + dbl2str(min_genotype_quality, 3) + "\n");
299
+ if (min_HWE_pvalue != defaults.min_HWE_pvalue) printLOG("\t--hwe " + dbl2str(min_HWE_pvalue, 3) + "\n");
300
+ if (min_indv_call_rate != defaults.min_indv_call_rate) printLOG("\t--mind " + dbl2str(min_indv_call_rate, 3) + "\n");
301
+ if (min_indv_mean_depth != defaults.min_indv_mean_depth) printLOG("\t--min-indv-meanDP " + dbl2str(min_indv_mean_depth, 3) + "\n");
302
+ if (min_interSNP_distance != defaults.min_interSNP_distance) printLOG("\t--thin " + int2str(min_interSNP_distance) + "\n");
303
+ if (min_kept_mask_value != defaults.min_kept_mask_value) printLOG("\t--mask-min " + int2str(min_kept_mask_value) + "\n");
304
+ if (min_mean_depth != defaults.min_mean_depth) printLOG("\t--min-meanDP " + dbl2str(min_mean_depth, 3) + "\n");
305
+ if (min_quality != defaults.min_quality) printLOG("\t--minQ " + dbl2str(min_quality, 3) + "\n");
306
+ if (min_r2 != defaults.min_r2) printLOG("\t--min-r2 " + dbl2str(min_r2, 3) + "\n");
307
+ if (min_site_call_rate != defaults.min_site_call_rate) printLOG("\t--geno " + dbl2str(min_site_call_rate, 3) + "\n");
308
+ if (min_non_ref_ac != defaults.min_non_ref_ac) printLOG("\t--non-ref-ac " + dbl2str(min_non_ref_ac, 3) + "\n");
309
+ if (min_non_ref_af != defaults.min_non_ref_af) printLOG("\t--non-ref-af " + dbl2str(min_non_ref_af, 3) + "\n");
310
+ if (output_012_matrix) printLOG("\t--012\n");
311
+ if (output_as_IMPUTE) printLOG("\t--IMPUTE\n");
312
+ if (output_BEAGLE_genotype_likelihoods) printLOG("\t--BEAGLE-GL\n");
313
+ if (output_counts && (suppress_allele_output==false)) printLOG("\t--counts\n");
314
+ if (output_counts && (suppress_allele_output==true)) printLOG("\t--counts2\n");
315
+ if (output_filter_summary) printLOG("\t--FILTER-summary\n");
316
+ if (output_filtered_sites != defaults.output_filtered_sites) printLOG("\t--filtered-sites\n");
317
+ if (output_freq && (suppress_allele_output==false)) printLOG("\t--freq\n");
318
+ if (output_freq && (suppress_allele_output==true)) printLOG("\t--freq2\n");
319
+ if (output_geno_depth) printLOG("\t--geno-depth\n");
320
+ if (output_geno_rsq) printLOG("\t--geno-r2\n");
321
+ if (output_hap_rsq) printLOG("\t--hap-r2\n");
322
+ if (output_het) printLOG("\t--het\n");
323
+ if (output_HWE) printLOG("\t--hardy\n");
324
+ if (output_indv_depth) printLOG("\t--depth\n");
325
+ if (output_interchromosomal_rsq) printLOG("\t--interchrom-geno-r2\n");
326
+ if (output_LROH != defaults.output_LROH) printLOG("\t--LROH\n");
327
+ if (output_missingness) printLOG("\t--missing\n");
328
+ if (output_PCA != defaults.output_PCA)
329
+ {
330
+ if (PCA_no_normalisation == false)
331
+ printLOG("\t--pca\n");
332
+ else
333
+ printLOG("\t--pca-no-norm\n");
334
+
335
+ if (output_N_PCA_SNP_loadings != defaults.output_N_PCA_SNP_loadings)
336
+ printLOG("\t--pca-snp-loadings " + int2str(output_N_PCA_SNP_loadings) + "\n");
337
+ }
338
+ if (output_prefix != defaults.output_prefix) printLOG("\t--out " + output_prefix + "\n");
339
+ if (output_relatedness) printLOG("\t--relatedness\n");
340
+ if (output_singletons) printLOG("\t--singletons\n");
341
+ if (output_site_depth) printLOG("\t--site-depth\n");
342
+ if (output_site_mean_depth) printLOG("\t--site-mean-depth\n");
343
+ if (output_site_pi != defaults.output_site_pi) printLOG("\t--site-pi\n");
344
+ if (output_site_quality) printLOG("\t--site-quality\n");
345
+ if (output_SNP_density_bin_size != defaults.output_SNP_density_bin_size) printLOG("\t--SNPdensity " + int2str(output_SNP_density_bin_size) + "\n");
346
+ if (output_TsTv_bin_size != defaults.output_TsTv_bin_size) printLOG("\t--TsTv " + int2str(output_TsTv_bin_size) + "\n");
347
+ if (output_TsTv_by_count) printLOG("\t--TsTv-by-count\n");
348
+ if (output_TsTv_by_qual) printLOG("\t--TsTv-by-qual\n");
349
+ if (phased_only) printLOG("\t--phased\n");
350
+ if (pi_window_size != defaults.pi_window_size) printLOG("\t--window-pi " + int2str(pi_window_size) + "\n");
351
+ if (plink_output) printLOG("\t--plink\n");
352
+ if (plink_tped_output) printLOG("\t--plink-tped\n");
353
+ if (positions_file != defaults.positions_file) printLOG("\t--positions " + positions_file + "\n");
354
+ if (recode) printLOG("\t--recode\n");
355
+ if (remove_all_filtered_genotypes) printLOG("\t--remove-filtered-geno-all\n");
356
+ if (remove_all_filtered_sites) printLOG("\t--remove-filtered-all\n");
357
+ if (snps_to_exclude_file != defaults.snps_to_exclude_file) printLOG("\t--exclude " + snps_to_exclude_file + "\n");
358
+ if (snps_to_keep_file != defaults.snps_to_keep_file) printLOG("\t--snps " + snps_to_keep_file + "\n");
359
+ if (start_pos != defaults.start_pos) printLOG("\t--from-bp " + int2str(start_pos) + "\n");
360
+ if (output_Tajima_D_bin_size != defaults.output_Tajima_D_bin_size) printLOG("\t--TajimaD " + int2str(output_Tajima_D_bin_size) + "\n");
361
+
362
+ if (output_as_ldhat_phased) printLOG("\t--ldhat\n");
363
+ if (output_as_ldhat_unphased) printLOG("\t--ldhat-geno\n");
364
+
365
+ if (site_filter_flags_to_exclude.size() > 0)
366
+ for (set<string>::iterator it=site_filter_flags_to_exclude.begin(); it != site_filter_flags_to_exclude.end(); ++it)
367
+ {
368
+ string tmp = *it;
369
+ printLOG("\t--remove-filtered " + tmp + "\n");
370
+ }
371
+
372
+ if (site_filter_flags_to_keep.size() > 0)
373
+ for (set<string>::iterator it=site_filter_flags_to_keep.begin(); it != site_filter_flags_to_keep.end(); ++it)
374
+ {
375
+ string tmp = *it;
376
+ printLOG("\t--keep-filtered " + tmp + "\n");
377
+ }
378
+
379
+ if (geno_filter_flags_to_exclude.size() > 0)
380
+ for (set<string>::iterator it=geno_filter_flags_to_exclude.begin(); it != geno_filter_flags_to_exclude.end(); ++it)
381
+ {
382
+ string tmp = *it;
383
+ printLOG("\t--remove-filtered-geno " + tmp + "\n");
384
+ }
385
+
386
+ if (INFO_to_extract.size() > 0)
387
+ for (unsigned int ui=0; ui<INFO_to_extract.size(); ui++)
388
+ printLOG("\t--get-INFO " + INFO_to_extract[ui] + "\n");
389
+
390
+ if (diff_file != defaults.diff_file)
391
+ {
392
+ if (vcf_compressed == false)
393
+ printLOG("\t--diff " + diff_file + "\n");
394
+ else
395
+ printLOG("\t--gzdiff " + diff_file + "\n");
396
+ if (diff_site_discordance == true) printLOG("\t--diff-site-discordance\n");
397
+ if (diff_indv_discordance == true) printLOG("\t--diff-indv-discordance\n");
398
+ if (diff_discordance_matrix == true) printLOG("\t--diff-discordance-matrix\n");
399
+ if (diff_switch_error == true) printLOG("\t--diff-switch-error\n");
400
+ }
401
+
402
+ if (recode_INFO_to_keep.size() > 0)
403
+ for (set<string>::iterator it=recode_INFO_to_keep.begin(); it != recode_INFO_to_keep.end(); ++it)
404
+ {
405
+ string tmp = *it;
406
+ printLOG("\t--recode-INFO " + tmp + "\n");
407
+ }
408
+
409
+ if (site_INFO_flags_to_remove.size() > 0)
410
+ for (set<string>::iterator it=site_INFO_flags_to_remove.begin(); it != site_INFO_flags_to_remove.end(); ++it)
411
+ {
412
+ string tmp = *it;
413
+ printLOG("\t--remove-INFO " + tmp + "\n");
414
+ }
415
+
416
+ if (site_INFO_flags_to_keep.size() > 0)
417
+ for (set<string>::iterator it=site_INFO_flags_to_keep.begin(); it != site_INFO_flags_to_keep.end(); ++it)
418
+ {
419
+ string tmp = *it;
420
+ printLOG("\t--keep-INFO " + tmp + "\n*** Note: --keep-INFO has changed. Are you sure you don't want --recode-INFO? ***\n");
421
+ }
422
+
423
+ if (BED_file != defaults.BED_file)
424
+ {
425
+ if (BED_exclude == false)
426
+ printLOG("\t--bed " + BED_file + "\n");
427
+ else
428
+ printLOG("\t--exclude-bed " + BED_file + "\n");
429
+ }
430
+
431
+ if (mask_file != defaults.mask_file)
432
+ {
433
+ if (invert_mask == false)
434
+ printLOG("\t--mask " + mask_file + "\n");
435
+ else
436
+ printLOG("\t--invert-mask " + mask_file + "\n");
437
+ }
438
+
439
+ if (snps_to_keep.size() > 0)
440
+ for (set<string>::iterator it=snps_to_keep.begin(); it != snps_to_keep.end(); ++it)
441
+ {
442
+ string tmp = *it;
443
+ printLOG("\t--snp " + tmp + "\n");
444
+ }
445
+
446
+ if (indv_to_keep.size() > 0)
447
+ for (set<string>::iterator it=indv_to_keep.begin(); it != indv_to_keep.end(); ++it)
448
+ {
449
+ string tmp = *it;
450
+ printLOG("\t--indv " + tmp + "\n");
451
+ }
452
+
453
+ if (indv_to_exclude.size() > 0)
454
+ for (set<string>::iterator it=indv_to_exclude.begin(); it != indv_to_exclude.end(); ++it)
455
+ {
456
+ string tmp = *it;
457
+ printLOG("\t--remove-indv " + tmp + "\n");
458
+ }
459
+
460
+ printLOG("\n");
461
+ }
462
+
463
+ void parameters::print_help()
464
+ {
465
+ unsigned int i;
466
+ string in_str;
467
+
468
+ if (argv.size() <= 1)
469
+ { // If there are no user parameters, display help.
470
+ argv.push_back("--?");
471
+ print_help();
472
+ }
473
+
474
+ for(i = 0; i < argv.size(); i++)
475
+ {
476
+ in_str = argv[i];
477
+ if ((in_str == "-h") || (in_str == "-?") || (in_str == "-help") || (in_str == "--?") || (in_str == "--help") || (in_str == "--h"))
478
+ {
479
+ cout << endl << "VCFtools (" << VCFTOOLS_VERSION << ")" << endl;
480
+ cout << "\u00A9 Adam Auton 2009" << endl << endl;
481
+ cout << "Process Variant Call Format files" << endl;
482
+ cout << endl;
483
+ cout << "For a list of options, please go to:" << endl;
484
+ cout << "\thttp://vcftools.sourceforge.net/options.html" << endl;
485
+ cout << endl;
486
+
487
+ exit(0);
488
+ }
489
+ }
490
+ }
491
+
492
+ // TODO: Rewrite this function to do some proper error checking.
493
+ void parameters::check_parameters()
494
+ {
495
+ parameters defaults(0, 0);
496
+ if (vcf_filename == "") error("VCF required.", 0);
497
+ if (end_pos < start_pos) error("End position must be greater than Start position.", 1);
498
+ if (((end_pos != numeric_limits<int>::max()) || (start_pos != -1)) && (chr_to_keep == "")) error("Require a chromosome when specifying a range.", 2);
499
+ if (max_maf < min_maf) error("Maximum MAF must be not be less than Minimum MAF.", 4);
500
+ if (max_mac < min_mac) error("Maximum MAC must be not be less than Minimum MAC.", 4);
501
+ if (min_maf != defaults.min_maf)
502
+ {
503
+ if ((min_maf < 0.0) || (min_maf > 1.0)) error("MAF must be between 0 and 1.", 4);
504
+ }
505
+ if (max_maf != defaults.max_maf)
506
+ {
507
+ if ((max_maf < 0.0) || (max_maf > 1.0)) error("Maximum MAF must be between 0 and 1.", 4);
508
+ }
509
+ if (min_non_ref_af != defaults.min_non_ref_af)
510
+ {
511
+ if ((min_non_ref_af < 0.0) || (min_non_ref_af > 1.0)) error("Non-Ref Allele Frequency must be between 0 and 1.", 4);
512
+ }
513
+ if (max_non_ref_af < min_non_ref_af) error("Maximum Non-Ref Allele Frequency must not be less that Minimum Non-Ref AF.", 4);
514
+ if (max_non_ref_ac < min_non_ref_ac) error("Maximum Non-Ref Allele Count must not be less that Minimum Non-Ref AC.", 4);
515
+ if ((min_site_call_rate > 1) || (min_indv_call_rate > 1)) error("Minimum Call rates cannot be greater than 1.", 5);
516
+ if (max_alleles < min_alleles) error("Max Number of Alleles must be greater than Min Number of Alleles.", 6);
517
+ if (max_mean_depth < min_mean_depth) error("Max Mean Depth must be greater the Min Mean Depth.", 7);
518
+ if (max_indv_mean_depth < min_indv_mean_depth) error("Max Indv Mean Depth must be greater the Min Indv Mean Depth.", 8);
519
+ if (max_genotype_depth < min_genotype_depth) error("Max Genotype Depth must be greater than Min Genotype Depth.", 9);
520
+ if (((output_as_ldhat_phased == true) || (output_as_ldhat_unphased)) && (chr_to_keep == "")) error("Require a chromosome (--chr) when outputting LDhat format.", 11);
521
+ if ((output_BEAGLE_genotype_likelihoods == true) && (chr_to_keep == "")) error("Require a chromosome (--chr) when outputting Beagle likelihoods.", 11);
522
+ if (min_kept_mask_value > 9) error("Min Mask value must be between 0 and 9.", 14);
523
+ if ((output_LROH == true) && (chr_to_keep == "")) error("Require a chromosome (--chr) when outputting LROH.", 11);
524
+ if ((chr_to_keep != "") && (chr_to_exclude != "") && (chr_to_keep == chr_to_exclude)) error("Chromosome to keep cannot match chromosome to exclude.",15);
525
+ if (output_TsTv_bin_size < 0) error("TsTv bin size must be > 0",16);
526
+ if (output_Tajima_D_bin_size < 0) error("Tajima D bin size must be > 0", 17);
527
+ if (pi_window_size < 0) error("Pi Window size must be > 0", 18);
528
+ if (output_SNP_density_bin_size < 0) error("SNP density bin size must be > 0", 18);
529
+ }
530
+
531
+ void parameters::error(string err_msg, int code)
532
+ {
533
+ printLOG("\n\nError: " + err_msg + "\n\n");
534
+ exit(code);
535
+ }