ngs_server 0.1 → 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. data/bin/ngs_server +72 -50
  2. data/ext/bamtools/extconf.rb +3 -3
  3. data/ext/vcftools/Makefile +28 -0
  4. data/ext/vcftools/README.txt +36 -0
  5. data/ext/vcftools/cpp/.svn/all-wcprops +125 -0
  6. data/ext/vcftools/cpp/.svn/dir-prop-base +6 -0
  7. data/ext/vcftools/cpp/.svn/entries +708 -0
  8. data/ext/vcftools/cpp/.svn/text-base/Makefile.svn-base +46 -0
  9. data/ext/vcftools/cpp/.svn/text-base/dgeev.cpp.svn-base +146 -0
  10. data/ext/vcftools/cpp/.svn/text-base/dgeev.h.svn-base +43 -0
  11. data/ext/vcftools/cpp/.svn/text-base/output_log.cpp.svn-base +79 -0
  12. data/ext/vcftools/cpp/.svn/text-base/output_log.h.svn-base +34 -0
  13. data/ext/vcftools/cpp/.svn/text-base/parameters.cpp.svn-base +535 -0
  14. data/ext/vcftools/cpp/.svn/text-base/parameters.h.svn-base +154 -0
  15. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.cpp.svn-base +497 -0
  16. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.h.svn-base +190 -0
  17. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_getters.cpp.svn-base +421 -0
  18. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_setters.cpp.svn-base +482 -0
  19. data/ext/vcftools/cpp/.svn/text-base/vcf_file.cpp.svn-base +495 -0
  20. data/ext/vcftools/cpp/.svn/text-base/vcf_file.h.svn-base +184 -0
  21. data/ext/vcftools/cpp/.svn/text-base/vcf_file_diff.cpp.svn-base +1282 -0
  22. data/ext/vcftools/cpp/.svn/text-base/vcf_file_filters.cpp.svn-base +1215 -0
  23. data/ext/vcftools/cpp/.svn/text-base/vcf_file_format_convert.cpp.svn-base +1138 -0
  24. data/ext/vcftools/cpp/.svn/text-base/vcf_file_index.cpp.svn-base +171 -0
  25. data/ext/vcftools/cpp/.svn/text-base/vcf_file_output.cpp.svn-base +3012 -0
  26. data/ext/vcftools/cpp/.svn/text-base/vcftools.cpp.svn-base +107 -0
  27. data/ext/vcftools/cpp/.svn/text-base/vcftools.h.svn-base +25 -0
  28. data/ext/vcftools/cpp/Makefile +46 -0
  29. data/ext/vcftools/cpp/dgeev.cpp +146 -0
  30. data/ext/vcftools/cpp/dgeev.h +43 -0
  31. data/ext/vcftools/cpp/output_log.cpp +79 -0
  32. data/ext/vcftools/cpp/output_log.h +34 -0
  33. data/ext/vcftools/cpp/parameters.cpp +535 -0
  34. data/ext/vcftools/cpp/parameters.h +154 -0
  35. data/ext/vcftools/cpp/vcf_entry.cpp +497 -0
  36. data/ext/vcftools/cpp/vcf_entry.h +190 -0
  37. data/ext/vcftools/cpp/vcf_entry_getters.cpp +421 -0
  38. data/ext/vcftools/cpp/vcf_entry_setters.cpp +482 -0
  39. data/ext/vcftools/cpp/vcf_file.cpp +495 -0
  40. data/ext/vcftools/cpp/vcf_file.h +184 -0
  41. data/ext/vcftools/cpp/vcf_file_diff.cpp +1282 -0
  42. data/ext/vcftools/cpp/vcf_file_filters.cpp +1215 -0
  43. data/ext/vcftools/cpp/vcf_file_format_convert.cpp +1138 -0
  44. data/ext/vcftools/cpp/vcf_file_index.cpp +171 -0
  45. data/ext/vcftools/cpp/vcf_file_output.cpp +3012 -0
  46. data/ext/vcftools/cpp/vcftools.cpp +107 -0
  47. data/ext/vcftools/cpp/vcftools.h +25 -0
  48. data/ext/vcftools/examples/.svn/all-wcprops +185 -0
  49. data/ext/vcftools/examples/.svn/dir-prop-base +6 -0
  50. data/ext/vcftools/examples/.svn/entries +1048 -0
  51. data/ext/vcftools/examples/.svn/prop-base/perl-api-1.pl.svn-base +5 -0
  52. data/ext/vcftools/examples/.svn/text-base/annotate-test.vcf.svn-base +37 -0
  53. data/ext/vcftools/examples/.svn/text-base/annotate.out.svn-base +23 -0
  54. data/ext/vcftools/examples/.svn/text-base/annotate.txt.svn-base +7 -0
  55. data/ext/vcftools/examples/.svn/text-base/annotate2.out.svn-base +52 -0
  56. data/ext/vcftools/examples/.svn/text-base/annotate3.out.svn-base +23 -0
  57. data/ext/vcftools/examples/.svn/text-base/cmp-test-a-3.3.vcf.svn-base +12 -0
  58. data/ext/vcftools/examples/.svn/text-base/cmp-test-a.vcf.svn-base +12 -0
  59. data/ext/vcftools/examples/.svn/text-base/cmp-test-b-3.3.vcf.svn-base +12 -0
  60. data/ext/vcftools/examples/.svn/text-base/cmp-test-b.vcf.svn-base +12 -0
  61. data/ext/vcftools/examples/.svn/text-base/cmp-test.out.svn-base +53 -0
  62. data/ext/vcftools/examples/.svn/text-base/concat-a.vcf.svn-base +21 -0
  63. data/ext/vcftools/examples/.svn/text-base/concat-b.vcf.svn-base +13 -0
  64. data/ext/vcftools/examples/.svn/text-base/concat-c.vcf.svn-base +19 -0
  65. data/ext/vcftools/examples/.svn/text-base/concat.out.svn-base +39 -0
  66. data/ext/vcftools/examples/.svn/text-base/invalid-4.0.vcf.svn-base +31 -0
  67. data/ext/vcftools/examples/.svn/text-base/isec-n2-test.vcf.out.svn-base +19 -0
  68. data/ext/vcftools/examples/.svn/text-base/merge-test-a.vcf.svn-base +17 -0
  69. data/ext/vcftools/examples/.svn/text-base/merge-test-b.vcf.svn-base +17 -0
  70. data/ext/vcftools/examples/.svn/text-base/merge-test-c.vcf.svn-base +15 -0
  71. data/ext/vcftools/examples/.svn/text-base/merge-test.vcf.out.svn-base +31 -0
  72. data/ext/vcftools/examples/.svn/text-base/perl-api-1.pl.svn-base +46 -0
  73. data/ext/vcftools/examples/.svn/text-base/query-test.out.svn-base +6 -0
  74. data/ext/vcftools/examples/.svn/text-base/shuffle-test.vcf.svn-base +12 -0
  75. data/ext/vcftools/examples/.svn/text-base/subset.SNPs.out.svn-base +10 -0
  76. data/ext/vcftools/examples/.svn/text-base/subset.indels.out.svn-base +18 -0
  77. data/ext/vcftools/examples/.svn/text-base/subset.vcf.svn-base +21 -0
  78. data/ext/vcftools/examples/.svn/text-base/valid-3.3.vcf.svn-base +30 -0
  79. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.stats.svn-base +104 -0
  80. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.svn-base +34 -0
  81. data/ext/vcftools/examples/.svn/text-base/valid-4.1.vcf.svn-base +37 -0
  82. data/ext/vcftools/examples/annotate-test.vcf +37 -0
  83. data/ext/vcftools/examples/annotate.out +23 -0
  84. data/ext/vcftools/examples/annotate.txt +7 -0
  85. data/ext/vcftools/examples/annotate2.out +52 -0
  86. data/ext/vcftools/examples/annotate3.out +23 -0
  87. data/ext/vcftools/examples/cmp-test-a-3.3.vcf +12 -0
  88. data/ext/vcftools/examples/cmp-test-a.vcf +12 -0
  89. data/ext/vcftools/examples/cmp-test-b-3.3.vcf +12 -0
  90. data/ext/vcftools/examples/cmp-test-b.vcf +12 -0
  91. data/ext/vcftools/examples/cmp-test.out +53 -0
  92. data/ext/vcftools/examples/concat-a.vcf +21 -0
  93. data/ext/vcftools/examples/concat-b.vcf +13 -0
  94. data/ext/vcftools/examples/concat-c.vcf +19 -0
  95. data/ext/vcftools/examples/concat.out +39 -0
  96. data/ext/vcftools/examples/invalid-4.0.vcf +31 -0
  97. data/ext/vcftools/examples/isec-n2-test.vcf.out +19 -0
  98. data/ext/vcftools/examples/merge-test-a.vcf +17 -0
  99. data/ext/vcftools/examples/merge-test-b.vcf +17 -0
  100. data/ext/vcftools/examples/merge-test-c.vcf +15 -0
  101. data/ext/vcftools/examples/merge-test.vcf.out +31 -0
  102. data/ext/vcftools/examples/perl-api-1.pl +46 -0
  103. data/ext/vcftools/examples/query-test.out +6 -0
  104. data/ext/vcftools/examples/shuffle-test.vcf +12 -0
  105. data/ext/vcftools/examples/subset.SNPs.out +10 -0
  106. data/ext/vcftools/examples/subset.indels.out +18 -0
  107. data/ext/vcftools/examples/subset.vcf +21 -0
  108. data/ext/vcftools/examples/valid-3.3.vcf +30 -0
  109. data/ext/vcftools/examples/valid-4.0.vcf +34 -0
  110. data/ext/vcftools/examples/valid-4.0.vcf.stats +104 -0
  111. data/ext/vcftools/examples/valid-4.1.vcf +37 -0
  112. data/ext/vcftools/extconf.rb +2 -0
  113. data/ext/vcftools/perl/.svn/all-wcprops +149 -0
  114. data/ext/vcftools/perl/.svn/entries +844 -0
  115. data/ext/vcftools/perl/.svn/prop-base/fill-aa.svn-base +5 -0
  116. data/ext/vcftools/perl/.svn/prop-base/fill-an-ac.svn-base +5 -0
  117. data/ext/vcftools/perl/.svn/prop-base/fill-ref-md5.svn-base +5 -0
  118. data/ext/vcftools/perl/.svn/prop-base/tab-to-vcf.svn-base +5 -0
  119. data/ext/vcftools/perl/.svn/prop-base/test.t.svn-base +5 -0
  120. data/ext/vcftools/perl/.svn/prop-base/vcf-annotate.svn-base +5 -0
  121. data/ext/vcftools/perl/.svn/prop-base/vcf-compare.svn-base +5 -0
  122. data/ext/vcftools/perl/.svn/prop-base/vcf-concat.svn-base +5 -0
  123. data/ext/vcftools/perl/.svn/prop-base/vcf-convert.svn-base +5 -0
  124. data/ext/vcftools/perl/.svn/prop-base/vcf-fix-newlines.svn-base +5 -0
  125. data/ext/vcftools/perl/.svn/prop-base/vcf-isec.svn-base +5 -0
  126. data/ext/vcftools/perl/.svn/prop-base/vcf-merge.svn-base +5 -0
  127. data/ext/vcftools/perl/.svn/prop-base/vcf-query.svn-base +5 -0
  128. data/ext/vcftools/perl/.svn/prop-base/vcf-shuffle-cols.svn-base +5 -0
  129. data/ext/vcftools/perl/.svn/prop-base/vcf-sort.svn-base +5 -0
  130. data/ext/vcftools/perl/.svn/prop-base/vcf-stats.svn-base +5 -0
  131. data/ext/vcftools/perl/.svn/prop-base/vcf-subset.svn-base +5 -0
  132. data/ext/vcftools/perl/.svn/prop-base/vcf-to-tab.svn-base +5 -0
  133. data/ext/vcftools/perl/.svn/prop-base/vcf-validator.svn-base +5 -0
  134. data/ext/vcftools/perl/.svn/text-base/ChangeLog.svn-base +84 -0
  135. data/ext/vcftools/perl/.svn/text-base/FaSlice.pm.svn-base +214 -0
  136. data/ext/vcftools/perl/.svn/text-base/Makefile.svn-base +12 -0
  137. data/ext/vcftools/perl/.svn/text-base/Vcf.pm.svn-base +2853 -0
  138. data/ext/vcftools/perl/.svn/text-base/VcfStats.pm.svn-base +681 -0
  139. data/ext/vcftools/perl/.svn/text-base/fill-aa.svn-base +103 -0
  140. data/ext/vcftools/perl/.svn/text-base/fill-an-ac.svn-base +56 -0
  141. data/ext/vcftools/perl/.svn/text-base/fill-ref-md5.svn-base +204 -0
  142. data/ext/vcftools/perl/.svn/text-base/tab-to-vcf.svn-base +92 -0
  143. data/ext/vcftools/perl/.svn/text-base/test.t.svn-base +376 -0
  144. data/ext/vcftools/perl/.svn/text-base/vcf-annotate.svn-base +1099 -0
  145. data/ext/vcftools/perl/.svn/text-base/vcf-compare.svn-base +1193 -0
  146. data/ext/vcftools/perl/.svn/text-base/vcf-concat.svn-base +310 -0
  147. data/ext/vcftools/perl/.svn/text-base/vcf-convert.svn-base +180 -0
  148. data/ext/vcftools/perl/.svn/text-base/vcf-fix-newlines.svn-base +97 -0
  149. data/ext/vcftools/perl/.svn/text-base/vcf-isec.svn-base +660 -0
  150. data/ext/vcftools/perl/.svn/text-base/vcf-merge.svn-base +577 -0
  151. data/ext/vcftools/perl/.svn/text-base/vcf-query.svn-base +272 -0
  152. data/ext/vcftools/perl/.svn/text-base/vcf-shuffle-cols.svn-base +89 -0
  153. data/ext/vcftools/perl/.svn/text-base/vcf-sort.svn-base +79 -0
  154. data/ext/vcftools/perl/.svn/text-base/vcf-stats.svn-base +160 -0
  155. data/ext/vcftools/perl/.svn/text-base/vcf-subset.svn-base +206 -0
  156. data/ext/vcftools/perl/.svn/text-base/vcf-to-tab.svn-base +112 -0
  157. data/ext/vcftools/perl/.svn/text-base/vcf-validator.svn-base +145 -0
  158. data/ext/vcftools/perl/ChangeLog +84 -0
  159. data/ext/vcftools/perl/FaSlice.pm +214 -0
  160. data/ext/vcftools/perl/Makefile +12 -0
  161. data/ext/vcftools/perl/Vcf.pm +2853 -0
  162. data/ext/vcftools/perl/VcfStats.pm +681 -0
  163. data/ext/vcftools/perl/fill-aa +103 -0
  164. data/ext/vcftools/perl/fill-an-ac +56 -0
  165. data/ext/vcftools/perl/fill-ref-md5 +204 -0
  166. data/ext/vcftools/perl/tab-to-vcf +92 -0
  167. data/ext/vcftools/perl/test.t +376 -0
  168. data/ext/vcftools/perl/vcf-annotate +1099 -0
  169. data/ext/vcftools/perl/vcf-compare +1193 -0
  170. data/ext/vcftools/perl/vcf-concat +310 -0
  171. data/ext/vcftools/perl/vcf-convert +180 -0
  172. data/ext/vcftools/perl/vcf-fix-newlines +97 -0
  173. data/ext/vcftools/perl/vcf-isec +660 -0
  174. data/ext/vcftools/perl/vcf-merge +577 -0
  175. data/ext/vcftools/perl/vcf-query +286 -0
  176. data/ext/vcftools/perl/vcf-shuffle-cols +89 -0
  177. data/ext/vcftools/perl/vcf-sort +79 -0
  178. data/ext/vcftools/perl/vcf-stats +160 -0
  179. data/ext/vcftools/perl/vcf-subset +206 -0
  180. data/ext/vcftools/perl/vcf-to-tab +112 -0
  181. data/ext/vcftools/perl/vcf-validator +145 -0
  182. data/ext/vcftools/website/.svn/all-wcprops +41 -0
  183. data/ext/vcftools/website/.svn/entries +238 -0
  184. data/ext/vcftools/website/.svn/prop-base/VCF-poster.pdf.svn-base +5 -0
  185. data/ext/vcftools/website/.svn/prop-base/favicon.ico.svn-base +5 -0
  186. data/ext/vcftools/website/.svn/prop-base/favicon.png.svn-base +5 -0
  187. data/ext/vcftools/website/.svn/text-base/Makefile.svn-base +6 -0
  188. data/ext/vcftools/website/.svn/text-base/README.svn-base +2 -0
  189. data/ext/vcftools/website/.svn/text-base/VCF-poster.pdf.svn-base +0 -0
  190. data/ext/vcftools/website/.svn/text-base/default.css.svn-base +250 -0
  191. data/ext/vcftools/website/.svn/text-base/favicon.ico.svn-base +0 -0
  192. data/ext/vcftools/website/.svn/text-base/favicon.png.svn-base +0 -0
  193. data/ext/vcftools/website/Makefile +6 -0
  194. data/ext/vcftools/website/README +2 -0
  195. data/ext/vcftools/website/VCF-poster.pdf +0 -0
  196. data/ext/vcftools/website/default.css +250 -0
  197. data/ext/vcftools/website/favicon.ico +0 -0
  198. data/ext/vcftools/website/favicon.png +0 -0
  199. data/ext/vcftools/website/img/.svn/all-wcprops +53 -0
  200. data/ext/vcftools/website/img/.svn/entries +300 -0
  201. data/ext/vcftools/website/img/.svn/prop-base/bg.gif.svn-base +5 -0
  202. data/ext/vcftools/website/img/.svn/prop-base/bgcode.gif.svn-base +5 -0
  203. data/ext/vcftools/website/img/.svn/prop-base/bgcontainer.gif.svn-base +5 -0
  204. data/ext/vcftools/website/img/.svn/prop-base/bgul.gif.svn-base +5 -0
  205. data/ext/vcftools/website/img/.svn/prop-base/header.gif.svn-base +5 -0
  206. data/ext/vcftools/website/img/.svn/prop-base/li.gif.svn-base +5 -0
  207. data/ext/vcftools/website/img/.svn/prop-base/quote.gif.svn-base +5 -0
  208. data/ext/vcftools/website/img/.svn/prop-base/search.gif.svn-base +5 -0
  209. data/ext/vcftools/website/img/.svn/text-base/bg.gif.svn-base +0 -0
  210. data/ext/vcftools/website/img/.svn/text-base/bgcode.gif.svn-base +0 -0
  211. data/ext/vcftools/website/img/.svn/text-base/bgcontainer.gif.svn-base +0 -0
  212. data/ext/vcftools/website/img/.svn/text-base/bgul.gif.svn-base +0 -0
  213. data/ext/vcftools/website/img/.svn/text-base/header.gif.svn-base +0 -0
  214. data/ext/vcftools/website/img/.svn/text-base/li.gif.svn-base +0 -0
  215. data/ext/vcftools/website/img/.svn/text-base/quote.gif.svn-base +0 -0
  216. data/ext/vcftools/website/img/.svn/text-base/search.gif.svn-base +0 -0
  217. data/ext/vcftools/website/img/bg.gif +0 -0
  218. data/ext/vcftools/website/img/bgcode.gif +0 -0
  219. data/ext/vcftools/website/img/bgcontainer.gif +0 -0
  220. data/ext/vcftools/website/img/bgul.gif +0 -0
  221. data/ext/vcftools/website/img/header.gif +0 -0
  222. data/ext/vcftools/website/img/li.gif +0 -0
  223. data/ext/vcftools/website/img/quote.gif +0 -0
  224. data/ext/vcftools/website/img/search.gif +0 -0
  225. data/ext/vcftools/website/src/.svn/all-wcprops +53 -0
  226. data/ext/vcftools/website/src/.svn/entries +300 -0
  227. data/ext/vcftools/website/src/.svn/text-base/docs.inc.svn-base +202 -0
  228. data/ext/vcftools/website/src/.svn/text-base/index.inc.svn-base +52 -0
  229. data/ext/vcftools/website/src/.svn/text-base/index.php.svn-base +80 -0
  230. data/ext/vcftools/website/src/.svn/text-base/license.inc.svn-base +27 -0
  231. data/ext/vcftools/website/src/.svn/text-base/links.inc.svn-base +13 -0
  232. data/ext/vcftools/website/src/.svn/text-base/options.inc.svn-base +654 -0
  233. data/ext/vcftools/website/src/.svn/text-base/perl_module.inc.svn-base +249 -0
  234. data/ext/vcftools/website/src/.svn/text-base/specs.inc.svn-base +18 -0
  235. data/ext/vcftools/website/src/docs.inc +202 -0
  236. data/ext/vcftools/website/src/index.inc +52 -0
  237. data/ext/vcftools/website/src/index.php +80 -0
  238. data/ext/vcftools/website/src/license.inc +27 -0
  239. data/ext/vcftools/website/src/links.inc +13 -0
  240. data/ext/vcftools/website/src/options.inc +654 -0
  241. data/ext/vcftools/website/src/perl_module.inc +249 -0
  242. data/ext/vcftools/website/src/specs.inc +18 -0
  243. data/lib/config.ru +9 -0
  244. data/lib/ngs_server/add.rb +9 -0
  245. data/lib/ngs_server/version.rb +1 -1
  246. data/lib/ngs_server.rb +55 -3
  247. data/ngs_server.gemspec +5 -2
  248. metadata +296 -6
@@ -0,0 +1,535 @@
1
+ /*
2
+ * parameters.cpp
3
+ *
4
+ * Created on: Nov 11, 2009
5
+ * Author: Adam Auton
6
+ * ($Revision: 249 $)
7
+ */
8
+
9
+ // Class for reading in, checking and storing user parameters
10
+
11
+ #include "parameters.h"
12
+
13
+ parameters::parameters(int argc, char *argv[])
14
+ {
15
+ string tmp;
16
+ for (int i=0; i<argc; i++)
17
+ {
18
+ tmp = argv[i];
19
+ this->argv.push_back(tmp);
20
+ }
21
+
22
+ BED_exclude = false;
23
+ BED_file = "";
24
+ chr_to_keep = "";
25
+ chr_to_exclude = "";
26
+ diff_discordance_matrix = false;
27
+ diff_file = "";
28
+ diff_file_compressed = false;
29
+ diff_indv_discordance = false;
30
+ diff_site_discordance = false;
31
+ diff_switch_error = false;
32
+ end_pos = numeric_limits<int>::max();
33
+ force_write_index = false;
34
+ fst_file = "";
35
+ fst_file_compressed = false;
36
+ indv_exclude_file = "";
37
+ indv_keep_file = "";
38
+ invert_mask = false;
39
+ recode_all_INFO = false;
40
+ ld_bp_window_size = numeric_limits<int>::max();
41
+ ld_snp_window_size = numeric_limits<int>::max();
42
+ min_mac = -1;
43
+ min_maf = -1.0;
44
+ mask_file = "";
45
+ max_alleles = numeric_limits<int>::max();
46
+ max_genotype_depth = numeric_limits<int>::max();
47
+ max_indv_mean_depth = numeric_limits<double>::max();
48
+ max_mac = numeric_limits<int>::max();
49
+ max_maf = numeric_limits<double>::max();
50
+ max_mean_depth = numeric_limits<double>::max();
51
+ max_missing_call_count = numeric_limits<int>::max();
52
+ max_non_ref_ac = numeric_limits<int>::max();
53
+ max_non_ref_af = numeric_limits<double>::max();
54
+ max_N_indv = -1;
55
+ min_alleles = -1;
56
+ min_genotype_depth = -1;
57
+ min_genotype_quality = -1.0;
58
+ min_HWE_pvalue = -1.0;
59
+ min_indv_call_rate = 0;
60
+ min_indv_mean_depth = -1.0;
61
+ min_interSNP_distance = -1;
62
+ min_kept_mask_value = 0;
63
+ min_mean_depth = -1.0;
64
+ min_quality = -1.0;
65
+ min_r2 = -1.0;
66
+ min_site_call_rate = 0;
67
+ min_non_ref_ac = -1;
68
+ min_non_ref_af = -1.0;
69
+ output_012_matrix = false;
70
+ output_as_IMPUTE = false;
71
+ output_as_ldhat_phased = false;
72
+ output_as_ldhat_unphased = false;
73
+ output_BEAGLE_genotype_likelihoods = false;
74
+ output_counts = false;
75
+ output_filter_summary = false;
76
+ output_filtered_sites = false;
77
+ output_freq = false;
78
+ output_geno_depth = false;
79
+ output_geno_rsq = false;
80
+ output_hap_rsq = false;
81
+ output_het = false;
82
+ output_HWE = false;
83
+ output_indv_depth = false;
84
+ output_interchromosomal_rsq = false;
85
+ output_LROH = false;
86
+ output_missingness = false;
87
+ output_N_PCA_SNP_loadings = -1;
88
+ output_PCA = false;
89
+ output_prefix="out";
90
+ output_relatedness = false;
91
+ output_singletons = false;
92
+ output_site_depth = false;
93
+ output_site_mean_depth = false;
94
+ output_site_pi=false;
95
+ output_site_quality = false;
96
+ output_SNP_density_bin_size = 0;
97
+ output_Tajima_D_bin_size = 0;
98
+ output_TsTv_bin_size = 0;
99
+ output_TsTv_by_count = false;
100
+ output_TsTv_by_qual = false;
101
+ phased_only = false;
102
+ PCA_no_normalisation = false;
103
+ pi_window_size = 0;
104
+ plink_output = false;
105
+ plink_tped_output = false;
106
+ positions_file = "";
107
+ recode = false;
108
+ remove_all_filtered_genotypes = false;
109
+ remove_all_filtered_sites = false;
110
+ snps_to_exclude_file = "";
111
+ snps_to_keep_file = "";
112
+ start_pos = -1;
113
+ suppress_allele_output = false;
114
+ vcf_filename="";
115
+ vcf_compressed = false;
116
+ }
117
+
118
+ void parameters::read_parameters()
119
+ {
120
+ unsigned int i=1;
121
+ string in_str;
122
+ while (i<argv.size())
123
+ {
124
+ in_str = argv[i];
125
+ if (in_str == "--vcf") { vcf_filename = get_arg(i+1); vcf_compressed = false; i++; } // VCF file to process
126
+ else if (in_str == "--012") output_012_matrix = true; // Output as 0/1/2 matrix
127
+ else if (in_str == "--BEAGLE-GL") { output_BEAGLE_genotype_likelihoods = true; min_alleles=2; max_alleles=2; } // Output as BEAGLE Genotype Likelihood format
128
+ else if (in_str == "--bed") { BED_file = get_arg(i+1); i++; BED_exclude=false; }
129
+ else if (in_str == "--chr") { chr_to_keep = get_arg(i+1); i++; } // Chromosome to process
130
+ else if (in_str == "--counts") output_counts = true; // Output per-site allele count statistics
131
+ else if (in_str == "--counts2") {output_counts = true; suppress_allele_output = true; } // Output per-site allele count statistics
132
+ else if (in_str == "--depth") output_indv_depth = true; // Output per-individual coverage statistics
133
+ else if (in_str == "--diff-discordance-matrix") { diff_discordance_matrix = true; } // Calculate some concensus statistics
134
+ else if (in_str == "--diff-indv-discordance") { diff_indv_discordance = true; } // Calculate some concensus statistics
135
+ else if (in_str == "--diff-site-discordance") { diff_site_discordance = true; } // Calculate some concensus statistics
136
+ else if (in_str == "--diff-switch-error") { diff_switch_error = true; } // Calculate some concensus statistics
137
+ else if (in_str == "--diff") { diff_file = get_arg(i+1); diff_file_compressed = false; i++; } // Calculate some concensus statistics
138
+ else if (in_str == "--exclude-bed") { BED_file = get_arg(i+1); i++; BED_exclude=true; }
139
+ else if (in_str == "--exclude") { snps_to_exclude_file = get_arg(i+1); i++; } // List of SNPs to exclude
140
+ else if (in_str == "--extract-FORMAT-info") { FORMAT_id_to_extract = get_arg(i+1); i++; }
141
+ else if (in_str == "--FILTER-summary") {output_filter_summary = true;}
142
+ else if (in_str == "--filtered-sites") {output_filtered_sites = true;}
143
+ else if (in_str == "--force-index-write") {force_write_index = true;}
144
+ else if (in_str == "--freq") output_freq = true; // Output per-site frequency statistics
145
+ else if (in_str == "--freq2") {output_freq = true; suppress_allele_output = true; } // Output per-site frequency statistics
146
+ else if (in_str == "--from-bp") { start_pos = atoi(get_arg(i+1).c_str()); i++; } // Start position
147
+ else if (in_str == "--fst") { fst_file = get_arg(i+1); fst_file_compressed = false; i++; }
148
+ else if (in_str == "--fst-pop") {fst_populations.push_back(get_arg(i+1)); i++; }
149
+ else if (in_str == "--geno-depth") output_geno_depth = true; // Output Depth for each genoptype
150
+ else if (in_str == "--geno-r2") { output_geno_rsq = true; min_alleles = 2; max_alleles = 2; } // Output pairwise LD (r^2)
151
+ else if (in_str == "--geno") { min_site_call_rate = atof(get_arg(i+1).c_str()); i++; } // Minimum per-site call rate
152
+ else if (in_str == "--get-INFO") { INFO_to_extract.push_back(get_arg(i+1)); i++; } // Add to list of INFO fields to extract
153
+ else if (in_str == "--gzdiff") { diff_file = get_arg(i+1); diff_file_compressed = true; i++; } // Calculate some concensus statistics
154
+ else if (in_str == "--gzfst") { fst_file = get_arg(i+1); fst_file_compressed = true; i++; }
155
+ else if (in_str == "--gzvcf") { vcf_filename = get_arg(i+1); vcf_compressed = true; i++; } // VCF file to process
156
+ else if (in_str == "--hap-r2") { output_hap_rsq = true; phased_only = true; min_alleles = 2; max_alleles = 2; } // Output pairwise LD (r^2)
157
+ else if (in_str == "--hardy") output_HWE = true; // Output HWE statistics
158
+ else if (in_str == "--het") output_het = true; // Output heterozygosity statistics
159
+ else if (in_str == "--hwe") { max_alleles = 2; min_HWE_pvalue = atof(get_arg(i+1).c_str()); i++; } // Minimum per-site HWE p-value
160
+ else if (in_str == "--IMPUTE") { output_as_IMPUTE = true; phased_only=true; min_site_call_rate=1.0; min_alleles=2; max_alleles=2; }// Output as IMPUTE format
161
+ else if (in_str == "--indv") { indv_to_keep.insert(get_arg(i+1)); i++; } // List of individuals to keep
162
+ else if (in_str == "--interchrom-geno-r2") { output_interchromosomal_rsq = true; min_alleles = 2; max_alleles = 2; } // Output pairwise LD (r^2)
163
+ else if (in_str == "--invert-mask") { mask_file = get_arg(i+1); i++; invert_mask = true; }
164
+ else if (in_str == "--keep-filtered") { site_filter_flags_to_keep.insert(get_arg(i+1)); i++; } // Remove a specific filter flag
165
+ else if (in_str == "--keep") { indv_keep_file = get_arg(i+1); i++; } // List of individuals to keep
166
+ else if (in_str == "--keep-INFO") { site_INFO_flags_to_keep.insert(get_arg(i+1)); i++; } // Filter sites by INFO flags
167
+ else if (in_str == "--keep-INFO-all") { recode_all_INFO=true; } // Old command (soon to be depreciated)
168
+ else if (in_str == "--ld-window-bp") { ld_bp_window_size = atoi(get_arg(i+1).c_str()); i++; } // Max bp distance for LD output
169
+ else if (in_str == "--ld-window") { ld_snp_window_size = atoi(get_arg(i+1).c_str()); i++; } // Max SNP distance for LD output
170
+ else if (in_str == "--ldhat-geno") { output_as_ldhat_unphased = true; }
171
+ else if (in_str == "--ldhat") { output_as_ldhat_phased = true; phased_only = true; } // Output as LDhat format
172
+ else if (in_str == "--LROH") {output_LROH = true;}
173
+ else if (in_str == "--mac") { min_mac = atoi(get_arg(i+1).c_str()); i++; } // Minimum Site MAC
174
+ else if (in_str == "--maf") { min_maf = atof(get_arg(i+1).c_str()); i++; } // Minimum Site MAF
175
+ else if (in_str == "--mask-min") { min_kept_mask_value = atoi(get_arg(i+1).c_str()); i++; }
176
+ else if (in_str == "--mask") { mask_file = get_arg(i+1); i++; invert_mask = false; }
177
+ else if (in_str == "--max-alleles") { max_alleles = atoi(get_arg(i+1).c_str()); i++; } // Maximum number of alleles per-site
178
+ else if (in_str == "--max-indv-meanDP") { max_indv_mean_depth = atof(get_arg(i+1).c_str()); i++; } // Maximum mean depth for an individual
179
+ else if (in_str == "--max-mac") { max_mac = atoi(get_arg(i+1).c_str()); i++; } // Maximum site MAC
180
+ else if (in_str == "--max-maf") { max_maf = atof(get_arg(i+1).c_str()); i++; } // Maximum Site MAF
181
+ else if (in_str == "--max-meanDP") { max_mean_depth = atof(get_arg(i+1).c_str()); i++; } // Site Maximum mean depth across individuals
182
+ else if (in_str == "--max-missing-count") { max_missing_call_count = atoi(get_arg(i+1).c_str()); i++; } // Site maximum missing genotypes
183
+ else if (in_str == "--max-non-ref-ac") { max_non_ref_ac = atoi(get_arg(i+1).c_str()); i++; } // Minimum Site non-ref AC
184
+ else if (in_str == "--max-non-ref-af") { max_non_ref_af = atof(get_arg(i+1).c_str()); i++; } // Minimum Site non-ref AF
185
+ else if (in_str == "--maxDP") { max_genotype_depth = atoi(get_arg(i+1).c_str()); i++; } // Maximum genotype depth
186
+ else if (in_str == "--max-indv") {max_N_indv = atoi(get_arg(i+1).c_str()); i++; }
187
+ else if (in_str == "--min-alleles") { min_alleles = atoi(get_arg(i+1).c_str()); i++; } // Minimum number of alleles per-site
188
+ else if (in_str == "--min-indv-meanDP") { min_indv_mean_depth = atof(get_arg(i+1).c_str()); i++; } // Minimum mean depth for an individual
189
+ else if (in_str == "--min-meanDP") { min_mean_depth = atof(get_arg(i+1).c_str()); i++; } // Site Minimum mean depth
190
+ else if (in_str == "--min-r2") { min_r2 = atof(get_arg(i+1).c_str()); i++; } // Min r^2 for LD output
191
+ else if (in_str == "--mind") { min_indv_call_rate = atof(get_arg(i+1).c_str()); i++; } // Minimum per-individual call rate
192
+ else if (in_str == "--minDP") { min_genotype_depth = atoi(get_arg(i+1).c_str()); i++; } // Minimum genotype depth
193
+ else if (in_str == "--minGQ") { min_genotype_quality = atof(get_arg(i+1).c_str()); i++; } // Minimum genotype quality
194
+ else if (in_str == "--minQ") { min_quality = atof(get_arg(i+1).c_str()); i++; } // Minimum per-site quality
195
+ else if (in_str == "--missing") output_missingness = true; // Output Individual and Site missingness summaries
196
+ else if (in_str == "--non-ref-ac") { min_non_ref_ac = atoi(get_arg(i+1).c_str()); i++; } // Minimum Site non-ref AC
197
+ else if (in_str == "--non-ref-af") { min_non_ref_af = atof(get_arg(i+1).c_str()); i++; } // Minimum Site non-ref AF
198
+ else if (in_str == "--not-chr") { chr_to_exclude = get_arg(i+1); i++; } // Chromosome to process
199
+ else if (in_str == "--out") { output_prefix = get_arg(i+1); i++; } // Output file prefix
200
+ else if (in_str == "--pca") { output_PCA = true; min_alleles=2; max_alleles=2; }
201
+ else if (in_str == "--pca-no-norm") { output_PCA = true; PCA_no_normalisation = true; min_alleles=2; max_alleles=2; }
202
+ else if (in_str == "--pca-snp-loadings") { output_N_PCA_SNP_loadings = atoi(get_arg(i+1).c_str()); i++; }
203
+ else if (in_str == "--phased") phased_only = true; // Keep only phased individuals / sites
204
+ else if (in_str == "--plink") plink_output = true; // Output as PLINK file
205
+ else if (in_str == "--plink-tped") plink_tped_output = true; // Output as PLINK tped file
206
+ else if (in_str == "--positions") { positions_file = get_arg(i+1); i++; }
207
+ else if (in_str == "--recode") recode = true; // Output VCF file
208
+ else if (in_str == "--recode-INFO-all") { recode_all_INFO=true; } // Specify INFO to keep when recoding
209
+ else if (in_str == "--recode-INFO") { recode_INFO_to_keep.insert(get_arg(i+1)); i++; } // Specify INFO to keep when recoding
210
+ else if (in_str == "--relatedness") { output_relatedness = true; } // Estimate relatedness between individuals
211
+ else if (in_str == "--remove-filtered-all") remove_all_filtered_sites = true; // Remove sites flagged as filtered
212
+ else if (in_str == "--remove-filtered-geno-all") remove_all_filtered_genotypes = true; // Remove genotypes flagged as filtered
213
+ else if (in_str == "--remove-filtered-geno") { geno_filter_flags_to_exclude.insert(get_arg(i+1)); i++; } // Remove genotypes flagged as filtered
214
+ else if (in_str == "--remove-filtered") { site_filter_flags_to_exclude.insert(get_arg(i+1)); i++; } // Remove a specific filter flag
215
+ else if (in_str == "--remove-indv") { indv_to_exclude.insert(get_arg(i+1)); i++; } // List of individuals to keep
216
+ else if (in_str == "--remove-INFO") { site_INFO_flags_to_remove.insert(get_arg(i+1)); i++; } // Filter sites by INFO flags
217
+ else if (in_str == "--remove") { indv_exclude_file = get_arg(i+1); i++; } // List of individuals to exclude
218
+ else if (in_str == "--singletons") output_singletons = true; // Output as 0/1/2 mat
219
+ else if (in_str == "--site-depth") output_site_depth = true; // Output Depth for each site
220
+ else if (in_str == "--site-mean-depth") output_site_mean_depth = true; // Output Mean Depth for each site
221
+ else if (in_str == "--site-pi") { output_site_pi = true; }
222
+ else if (in_str == "--site-quality") output_site_quality = true; // Output per-site qualities
223
+ else if (in_str == "--snp") { snps_to_keep.insert(get_arg(i+1)); i++; } // SNP to keep
224
+ else if (in_str == "--SNPdensity") { output_SNP_density_bin_size = atoi(get_arg(i+1).c_str()); i++; } // Output SNP density using Bin Size
225
+ else if (in_str == "--snps") { snps_to_keep_file = get_arg(i+1); i++; } // List of SNPs to keep
226
+ else if (in_str == "--TajimaD") { output_Tajima_D_bin_size = atoi(get_arg(i+1).c_str()); i++;}
227
+ else if (in_str == "--to-bp") { end_pos = atoi(get_arg(i+1).c_str()); i++; } // End position
228
+ else if (in_str == "--thin") { min_interSNP_distance = atoi(get_arg(i+1).c_str()); i++; } // Set minimum distance between SNPs
229
+ else if (in_str == "--TsTv") {output_TsTv_bin_size = atoi(get_arg(i+1).c_str()); i++;} // Output Ts/Tv stats
230
+ else if (in_str == "--TsTv-by-count") {output_TsTv_by_count = true; } // Output Ts/Tv stats
231
+ else if (in_str == "--TsTv-by-qual") {output_TsTv_by_qual = true; }
232
+ else if (in_str == "--window-pi") { pi_window_size = atoi(get_arg(i+1).c_str()); i++; }
233
+ else
234
+ error("Unknown option: " + string(in_str), 0);
235
+ i++;
236
+ }
237
+ check_parameters();
238
+ }
239
+
240
+ string parameters::get_arg(unsigned int i)
241
+ {
242
+ if (i>=argv.size())
243
+ error("Requested Missing Argument",76);
244
+ return argv[i];
245
+ }
246
+
247
+ void parameters::print_params()
248
+ {
249
+ parameters defaults(0, 0);
250
+
251
+ printLOG("Parameters as interpreted:\n");
252
+ if (vcf_filename != defaults.vcf_filename)
253
+ {
254
+ if (vcf_compressed == false)
255
+ printLOG("\t--vcf " + vcf_filename + "\n");
256
+ else
257
+ printLOG("\t--gzvcf " + vcf_filename + "\n");
258
+ }
259
+
260
+ if (chr_to_exclude != defaults.chr_to_exclude) printLOG("\t--not-chr " + chr_to_exclude + "\n");
261
+ if (chr_to_keep != defaults.chr_to_keep) printLOG("\t--chr " + chr_to_keep + "\n");
262
+ if (end_pos != defaults.end_pos) printLOG("\t--to-bp " + int2str(end_pos) + "\n");
263
+ if (force_write_index != defaults.force_write_index) printLOG("\t--force-index-write\n");
264
+ if (FORMAT_id_to_extract != defaults.FORMAT_id_to_extract) printLOG("\t--extract-FORMAT-info " + FORMAT_id_to_extract + "\n");
265
+ if (fst_file != defaults.fst_file)
266
+ {
267
+ if (vcf_compressed == false)
268
+ printLOG("\t--fst " + fst_file + "\n");
269
+ else
270
+ {
271
+ printLOG("\t--gzfst " + fst_file + "\n");
272
+ }
273
+ }
274
+ if (fst_populations.size() != 0)
275
+ {
276
+ for (unsigned int ui=0; ui<fst_populations.size(); ui++)
277
+ printLOG("\t--fst-pop " + fst_populations[ui] + "\n");
278
+ }
279
+ if (indv_exclude_file != defaults.indv_exclude_file) printLOG("\t--exclude " + indv_exclude_file + "\n");
280
+ if (indv_keep_file != defaults.indv_keep_file) printLOG("\t--keep " + indv_keep_file + "\n");
281
+ if (recode_all_INFO == true) printLOG("\t--recode-INFO-all\n");
282
+ if (ld_bp_window_size != defaults.ld_bp_window_size) printLOG("\t--ld-window-bp " + int2str(ld_bp_window_size) + "\n");
283
+ if (ld_snp_window_size != defaults.ld_snp_window_size) printLOG("\t--ld-window " + int2str(ld_snp_window_size) + "\n");
284
+ if (min_mac != defaults.min_mac) printLOG("\t--mac " + dbl2str(min_mac, 3) + "\n");
285
+ if (min_maf != defaults.min_maf) printLOG("\t--maf " + dbl2str(min_maf, 3) + "\n");
286
+ if (max_alleles != defaults.max_alleles) printLOG("\t--max-alleles " + int2str(max_alleles) + "\n");
287
+ if (max_genotype_depth != defaults.max_genotype_depth) printLOG("\t--maxDP " + dbl2str(max_genotype_depth, 3) + "\n");
288
+ if (max_indv_mean_depth != defaults.max_indv_mean_depth) printLOG("\t--max-indv-meanDP " + dbl2str(max_indv_mean_depth, 3) + "\n");
289
+ if (max_mac != defaults.max_mac) printLOG("\t--max-mac " + dbl2str(max_mac, 3) + "\n");
290
+ if (max_maf != defaults.max_maf) printLOG("\t--max-maf " + dbl2str(max_maf, 3) + "\n");
291
+ if (max_missing_call_count != defaults.max_missing_call_count) printLOG("\t--max-missing-count " + dbl2str(max_missing_call_count, 3) + "\n");
292
+ if (max_mean_depth != defaults.max_mean_depth) printLOG("\t--max-meanDP " + dbl2str(max_mean_depth, 3) + "\n");
293
+ if (max_non_ref_ac != defaults.max_non_ref_ac) printLOG("\t--max-non-ref-ac " + dbl2str(max_non_ref_ac, 3) + "\n");
294
+ if (max_non_ref_af != defaults.max_non_ref_af) printLOG("\t--max-non-ref-af " + dbl2str(max_non_ref_af, 3) + "\n");
295
+ if (max_N_indv != defaults.max_N_indv) printLOG("\t--max-indv " + int2str(max_N_indv) + "\n");
296
+ if (min_alleles != defaults.min_alleles) printLOG("\t--min-alleles " + int2str(min_alleles) + "\n");
297
+ if (min_genotype_depth != defaults.min_genotype_depth) printLOG("\t--minDP " + dbl2str(min_genotype_depth, 3) + "\n");
298
+ if (min_genotype_quality != defaults.min_genotype_quality) printLOG("\t--minGQ " + dbl2str(min_genotype_quality, 3) + "\n");
299
+ if (min_HWE_pvalue != defaults.min_HWE_pvalue) printLOG("\t--hwe " + dbl2str(min_HWE_pvalue, 3) + "\n");
300
+ if (min_indv_call_rate != defaults.min_indv_call_rate) printLOG("\t--mind " + dbl2str(min_indv_call_rate, 3) + "\n");
301
+ if (min_indv_mean_depth != defaults.min_indv_mean_depth) printLOG("\t--min-indv-meanDP " + dbl2str(min_indv_mean_depth, 3) + "\n");
302
+ if (min_interSNP_distance != defaults.min_interSNP_distance) printLOG("\t--thin " + int2str(min_interSNP_distance) + "\n");
303
+ if (min_kept_mask_value != defaults.min_kept_mask_value) printLOG("\t--mask-min " + int2str(min_kept_mask_value) + "\n");
304
+ if (min_mean_depth != defaults.min_mean_depth) printLOG("\t--min-meanDP " + dbl2str(min_mean_depth, 3) + "\n");
305
+ if (min_quality != defaults.min_quality) printLOG("\t--minQ " + dbl2str(min_quality, 3) + "\n");
306
+ if (min_r2 != defaults.min_r2) printLOG("\t--min-r2 " + dbl2str(min_r2, 3) + "\n");
307
+ if (min_site_call_rate != defaults.min_site_call_rate) printLOG("\t--geno " + dbl2str(min_site_call_rate, 3) + "\n");
308
+ if (min_non_ref_ac != defaults.min_non_ref_ac) printLOG("\t--non-ref-ac " + dbl2str(min_non_ref_ac, 3) + "\n");
309
+ if (min_non_ref_af != defaults.min_non_ref_af) printLOG("\t--non-ref-af " + dbl2str(min_non_ref_af, 3) + "\n");
310
+ if (output_012_matrix) printLOG("\t--012\n");
311
+ if (output_as_IMPUTE) printLOG("\t--IMPUTE\n");
312
+ if (output_BEAGLE_genotype_likelihoods) printLOG("\t--BEAGLE-GL\n");
313
+ if (output_counts && (suppress_allele_output==false)) printLOG("\t--counts\n");
314
+ if (output_counts && (suppress_allele_output==true)) printLOG("\t--counts2\n");
315
+ if (output_filter_summary) printLOG("\t--FILTER-summary\n");
316
+ if (output_filtered_sites != defaults.output_filtered_sites) printLOG("\t--filtered-sites\n");
317
+ if (output_freq && (suppress_allele_output==false)) printLOG("\t--freq\n");
318
+ if (output_freq && (suppress_allele_output==true)) printLOG("\t--freq2\n");
319
+ if (output_geno_depth) printLOG("\t--geno-depth\n");
320
+ if (output_geno_rsq) printLOG("\t--geno-r2\n");
321
+ if (output_hap_rsq) printLOG("\t--hap-r2\n");
322
+ if (output_het) printLOG("\t--het\n");
323
+ if (output_HWE) printLOG("\t--hardy\n");
324
+ if (output_indv_depth) printLOG("\t--depth\n");
325
+ if (output_interchromosomal_rsq) printLOG("\t--interchrom-geno-r2\n");
326
+ if (output_LROH != defaults.output_LROH) printLOG("\t--LROH\n");
327
+ if (output_missingness) printLOG("\t--missing\n");
328
+ if (output_PCA != defaults.output_PCA)
329
+ {
330
+ if (PCA_no_normalisation == false)
331
+ printLOG("\t--pca\n");
332
+ else
333
+ printLOG("\t--pca-no-norm\n");
334
+
335
+ if (output_N_PCA_SNP_loadings != defaults.output_N_PCA_SNP_loadings)
336
+ printLOG("\t--pca-snp-loadings " + int2str(output_N_PCA_SNP_loadings) + "\n");
337
+ }
338
+ if (output_prefix != defaults.output_prefix) printLOG("\t--out " + output_prefix + "\n");
339
+ if (output_relatedness) printLOG("\t--relatedness\n");
340
+ if (output_singletons) printLOG("\t--singletons\n");
341
+ if (output_site_depth) printLOG("\t--site-depth\n");
342
+ if (output_site_mean_depth) printLOG("\t--site-mean-depth\n");
343
+ if (output_site_pi != defaults.output_site_pi) printLOG("\t--site-pi\n");
344
+ if (output_site_quality) printLOG("\t--site-quality\n");
345
+ if (output_SNP_density_bin_size != defaults.output_SNP_density_bin_size) printLOG("\t--SNPdensity " + int2str(output_SNP_density_bin_size) + "\n");
346
+ if (output_TsTv_bin_size != defaults.output_TsTv_bin_size) printLOG("\t--TsTv " + int2str(output_TsTv_bin_size) + "\n");
347
+ if (output_TsTv_by_count) printLOG("\t--TsTv-by-count\n");
348
+ if (output_TsTv_by_qual) printLOG("\t--TsTv-by-qual\n");
349
+ if (phased_only) printLOG("\t--phased\n");
350
+ if (pi_window_size != defaults.pi_window_size) printLOG("\t--window-pi " + int2str(pi_window_size) + "\n");
351
+ if (plink_output) printLOG("\t--plink\n");
352
+ if (plink_tped_output) printLOG("\t--plink-tped\n");
353
+ if (positions_file != defaults.positions_file) printLOG("\t--positions " + positions_file + "\n");
354
+ if (recode) printLOG("\t--recode\n");
355
+ if (remove_all_filtered_genotypes) printLOG("\t--remove-filtered-geno-all\n");
356
+ if (remove_all_filtered_sites) printLOG("\t--remove-filtered-all\n");
357
+ if (snps_to_exclude_file != defaults.snps_to_exclude_file) printLOG("\t--exclude " + snps_to_exclude_file + "\n");
358
+ if (snps_to_keep_file != defaults.snps_to_keep_file) printLOG("\t--snps " + snps_to_keep_file + "\n");
359
+ if (start_pos != defaults.start_pos) printLOG("\t--from-bp " + int2str(start_pos) + "\n");
360
+ if (output_Tajima_D_bin_size != defaults.output_Tajima_D_bin_size) printLOG("\t--TajimaD " + int2str(output_Tajima_D_bin_size) + "\n");
361
+
362
+ if (output_as_ldhat_phased) printLOG("\t--ldhat\n");
363
+ if (output_as_ldhat_unphased) printLOG("\t--ldhat-geno\n");
364
+
365
+ if (site_filter_flags_to_exclude.size() > 0)
366
+ for (set<string>::iterator it=site_filter_flags_to_exclude.begin(); it != site_filter_flags_to_exclude.end(); ++it)
367
+ {
368
+ string tmp = *it;
369
+ printLOG("\t--remove-filtered " + tmp + "\n");
370
+ }
371
+
372
+ if (site_filter_flags_to_keep.size() > 0)
373
+ for (set<string>::iterator it=site_filter_flags_to_keep.begin(); it != site_filter_flags_to_keep.end(); ++it)
374
+ {
375
+ string tmp = *it;
376
+ printLOG("\t--keep-filtered " + tmp + "\n");
377
+ }
378
+
379
+ if (geno_filter_flags_to_exclude.size() > 0)
380
+ for (set<string>::iterator it=geno_filter_flags_to_exclude.begin(); it != geno_filter_flags_to_exclude.end(); ++it)
381
+ {
382
+ string tmp = *it;
383
+ printLOG("\t--remove-filtered-geno " + tmp + "\n");
384
+ }
385
+
386
+ if (INFO_to_extract.size() > 0)
387
+ for (unsigned int ui=0; ui<INFO_to_extract.size(); ui++)
388
+ printLOG("\t--get-INFO " + INFO_to_extract[ui] + "\n");
389
+
390
+ if (diff_file != defaults.diff_file)
391
+ {
392
+ if (vcf_compressed == false)
393
+ printLOG("\t--diff " + diff_file + "\n");
394
+ else
395
+ printLOG("\t--gzdiff " + diff_file + "\n");
396
+ if (diff_site_discordance == true) printLOG("\t--diff-site-discordance\n");
397
+ if (diff_indv_discordance == true) printLOG("\t--diff-indv-discordance\n");
398
+ if (diff_discordance_matrix == true) printLOG("\t--diff-discordance-matrix\n");
399
+ if (diff_switch_error == true) printLOG("\t--diff-switch-error\n");
400
+ }
401
+
402
+ if (recode_INFO_to_keep.size() > 0)
403
+ for (set<string>::iterator it=recode_INFO_to_keep.begin(); it != recode_INFO_to_keep.end(); ++it)
404
+ {
405
+ string tmp = *it;
406
+ printLOG("\t--recode-INFO " + tmp + "\n");
407
+ }
408
+
409
+ if (site_INFO_flags_to_remove.size() > 0)
410
+ for (set<string>::iterator it=site_INFO_flags_to_remove.begin(); it != site_INFO_flags_to_remove.end(); ++it)
411
+ {
412
+ string tmp = *it;
413
+ printLOG("\t--remove-INFO " + tmp + "\n");
414
+ }
415
+
416
+ if (site_INFO_flags_to_keep.size() > 0)
417
+ for (set<string>::iterator it=site_INFO_flags_to_keep.begin(); it != site_INFO_flags_to_keep.end(); ++it)
418
+ {
419
+ string tmp = *it;
420
+ printLOG("\t--keep-INFO " + tmp + "\n*** Note: --keep-INFO has changed. Are you sure you don't want --recode-INFO? ***\n");
421
+ }
422
+
423
+ if (BED_file != defaults.BED_file)
424
+ {
425
+ if (BED_exclude == false)
426
+ printLOG("\t--bed " + BED_file + "\n");
427
+ else
428
+ printLOG("\t--exclude-bed " + BED_file + "\n");
429
+ }
430
+
431
+ if (mask_file != defaults.mask_file)
432
+ {
433
+ if (invert_mask == false)
434
+ printLOG("\t--mask " + mask_file + "\n");
435
+ else
436
+ printLOG("\t--invert-mask " + mask_file + "\n");
437
+ }
438
+
439
+ if (snps_to_keep.size() > 0)
440
+ for (set<string>::iterator it=snps_to_keep.begin(); it != snps_to_keep.end(); ++it)
441
+ {
442
+ string tmp = *it;
443
+ printLOG("\t--snp " + tmp + "\n");
444
+ }
445
+
446
+ if (indv_to_keep.size() > 0)
447
+ for (set<string>::iterator it=indv_to_keep.begin(); it != indv_to_keep.end(); ++it)
448
+ {
449
+ string tmp = *it;
450
+ printLOG("\t--indv " + tmp + "\n");
451
+ }
452
+
453
+ if (indv_to_exclude.size() > 0)
454
+ for (set<string>::iterator it=indv_to_exclude.begin(); it != indv_to_exclude.end(); ++it)
455
+ {
456
+ string tmp = *it;
457
+ printLOG("\t--remove-indv " + tmp + "\n");
458
+ }
459
+
460
+ printLOG("\n");
461
+ }
462
+
463
+ void parameters::print_help()
464
+ {
465
+ unsigned int i;
466
+ string in_str;
467
+
468
+ if (argv.size() <= 1)
469
+ { // If there are no user parameters, display help.
470
+ argv.push_back("--?");
471
+ print_help();
472
+ }
473
+
474
+ for(i = 0; i < argv.size(); i++)
475
+ {
476
+ in_str = argv[i];
477
+ if ((in_str == "-h") || (in_str == "-?") || (in_str == "-help") || (in_str == "--?") || (in_str == "--help") || (in_str == "--h"))
478
+ {
479
+ cout << endl << "VCFtools (" << VCFTOOLS_VERSION << ")" << endl;
480
+ cout << "\u00A9 Adam Auton 2009" << endl << endl;
481
+ cout << "Process Variant Call Format files" << endl;
482
+ cout << endl;
483
+ cout << "For a list of options, please go to:" << endl;
484
+ cout << "\thttp://vcftools.sourceforge.net/options.html" << endl;
485
+ cout << endl;
486
+
487
+ exit(0);
488
+ }
489
+ }
490
+ }
491
+
492
+ // TODO: Rewrite this function to do some proper error checking.
493
+ void parameters::check_parameters()
494
+ {
495
+ parameters defaults(0, 0);
496
+ if (vcf_filename == "") error("VCF required.", 0);
497
+ if (end_pos < start_pos) error("End position must be greater than Start position.", 1);
498
+ if (((end_pos != numeric_limits<int>::max()) || (start_pos != -1)) && (chr_to_keep == "")) error("Require a chromosome when specifying a range.", 2);
499
+ if (max_maf < min_maf) error("Maximum MAF must be not be less than Minimum MAF.", 4);
500
+ if (max_mac < min_mac) error("Maximum MAC must be not be less than Minimum MAC.", 4);
501
+ if (min_maf != defaults.min_maf)
502
+ {
503
+ if ((min_maf < 0.0) || (min_maf > 1.0)) error("MAF must be between 0 and 1.", 4);
504
+ }
505
+ if (max_maf != defaults.max_maf)
506
+ {
507
+ if ((max_maf < 0.0) || (max_maf > 1.0)) error("Maximum MAF must be between 0 and 1.", 4);
508
+ }
509
+ if (min_non_ref_af != defaults.min_non_ref_af)
510
+ {
511
+ if ((min_non_ref_af < 0.0) || (min_non_ref_af > 1.0)) error("Non-Ref Allele Frequency must be between 0 and 1.", 4);
512
+ }
513
+ if (max_non_ref_af < min_non_ref_af) error("Maximum Non-Ref Allele Frequency must not be less that Minimum Non-Ref AF.", 4);
514
+ if (max_non_ref_ac < min_non_ref_ac) error("Maximum Non-Ref Allele Count must not be less that Minimum Non-Ref AC.", 4);
515
+ if ((min_site_call_rate > 1) || (min_indv_call_rate > 1)) error("Minimum Call rates cannot be greater than 1.", 5);
516
+ if (max_alleles < min_alleles) error("Max Number of Alleles must be greater than Min Number of Alleles.", 6);
517
+ if (max_mean_depth < min_mean_depth) error("Max Mean Depth must be greater the Min Mean Depth.", 7);
518
+ if (max_indv_mean_depth < min_indv_mean_depth) error("Max Indv Mean Depth must be greater the Min Indv Mean Depth.", 8);
519
+ if (max_genotype_depth < min_genotype_depth) error("Max Genotype Depth must be greater than Min Genotype Depth.", 9);
520
+ if (((output_as_ldhat_phased == true) || (output_as_ldhat_unphased)) && (chr_to_keep == "")) error("Require a chromosome (--chr) when outputting LDhat format.", 11);
521
+ if ((output_BEAGLE_genotype_likelihoods == true) && (chr_to_keep == "")) error("Require a chromosome (--chr) when outputting Beagle likelihoods.", 11);
522
+ if (min_kept_mask_value > 9) error("Min Mask value must be between 0 and 9.", 14);
523
+ if ((output_LROH == true) && (chr_to_keep == "")) error("Require a chromosome (--chr) when outputting LROH.", 11);
524
+ if ((chr_to_keep != "") && (chr_to_exclude != "") && (chr_to_keep == chr_to_exclude)) error("Chromosome to keep cannot match chromosome to exclude.",15);
525
+ if (output_TsTv_bin_size < 0) error("TsTv bin size must be > 0",16);
526
+ if (output_Tajima_D_bin_size < 0) error("Tajima D bin size must be > 0", 17);
527
+ if (pi_window_size < 0) error("Pi Window size must be > 0", 18);
528
+ if (output_SNP_density_bin_size < 0) error("SNP density bin size must be > 0", 18);
529
+ }
530
+
531
+ void parameters::error(string err_msg, int code)
532
+ {
533
+ printLOG("\n\nError: " + err_msg + "\n\n");
534
+ exit(code);
535
+ }