ngs_server 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (248) hide show
  1. data/bin/ngs_server +72 -50
  2. data/ext/bamtools/extconf.rb +3 -3
  3. data/ext/vcftools/Makefile +28 -0
  4. data/ext/vcftools/README.txt +36 -0
  5. data/ext/vcftools/cpp/.svn/all-wcprops +125 -0
  6. data/ext/vcftools/cpp/.svn/dir-prop-base +6 -0
  7. data/ext/vcftools/cpp/.svn/entries +708 -0
  8. data/ext/vcftools/cpp/.svn/text-base/Makefile.svn-base +46 -0
  9. data/ext/vcftools/cpp/.svn/text-base/dgeev.cpp.svn-base +146 -0
  10. data/ext/vcftools/cpp/.svn/text-base/dgeev.h.svn-base +43 -0
  11. data/ext/vcftools/cpp/.svn/text-base/output_log.cpp.svn-base +79 -0
  12. data/ext/vcftools/cpp/.svn/text-base/output_log.h.svn-base +34 -0
  13. data/ext/vcftools/cpp/.svn/text-base/parameters.cpp.svn-base +535 -0
  14. data/ext/vcftools/cpp/.svn/text-base/parameters.h.svn-base +154 -0
  15. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.cpp.svn-base +497 -0
  16. data/ext/vcftools/cpp/.svn/text-base/vcf_entry.h.svn-base +190 -0
  17. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_getters.cpp.svn-base +421 -0
  18. data/ext/vcftools/cpp/.svn/text-base/vcf_entry_setters.cpp.svn-base +482 -0
  19. data/ext/vcftools/cpp/.svn/text-base/vcf_file.cpp.svn-base +495 -0
  20. data/ext/vcftools/cpp/.svn/text-base/vcf_file.h.svn-base +184 -0
  21. data/ext/vcftools/cpp/.svn/text-base/vcf_file_diff.cpp.svn-base +1282 -0
  22. data/ext/vcftools/cpp/.svn/text-base/vcf_file_filters.cpp.svn-base +1215 -0
  23. data/ext/vcftools/cpp/.svn/text-base/vcf_file_format_convert.cpp.svn-base +1138 -0
  24. data/ext/vcftools/cpp/.svn/text-base/vcf_file_index.cpp.svn-base +171 -0
  25. data/ext/vcftools/cpp/.svn/text-base/vcf_file_output.cpp.svn-base +3012 -0
  26. data/ext/vcftools/cpp/.svn/text-base/vcftools.cpp.svn-base +107 -0
  27. data/ext/vcftools/cpp/.svn/text-base/vcftools.h.svn-base +25 -0
  28. data/ext/vcftools/cpp/Makefile +46 -0
  29. data/ext/vcftools/cpp/dgeev.cpp +146 -0
  30. data/ext/vcftools/cpp/dgeev.h +43 -0
  31. data/ext/vcftools/cpp/output_log.cpp +79 -0
  32. data/ext/vcftools/cpp/output_log.h +34 -0
  33. data/ext/vcftools/cpp/parameters.cpp +535 -0
  34. data/ext/vcftools/cpp/parameters.h +154 -0
  35. data/ext/vcftools/cpp/vcf_entry.cpp +497 -0
  36. data/ext/vcftools/cpp/vcf_entry.h +190 -0
  37. data/ext/vcftools/cpp/vcf_entry_getters.cpp +421 -0
  38. data/ext/vcftools/cpp/vcf_entry_setters.cpp +482 -0
  39. data/ext/vcftools/cpp/vcf_file.cpp +495 -0
  40. data/ext/vcftools/cpp/vcf_file.h +184 -0
  41. data/ext/vcftools/cpp/vcf_file_diff.cpp +1282 -0
  42. data/ext/vcftools/cpp/vcf_file_filters.cpp +1215 -0
  43. data/ext/vcftools/cpp/vcf_file_format_convert.cpp +1138 -0
  44. data/ext/vcftools/cpp/vcf_file_index.cpp +171 -0
  45. data/ext/vcftools/cpp/vcf_file_output.cpp +3012 -0
  46. data/ext/vcftools/cpp/vcftools.cpp +107 -0
  47. data/ext/vcftools/cpp/vcftools.h +25 -0
  48. data/ext/vcftools/examples/.svn/all-wcprops +185 -0
  49. data/ext/vcftools/examples/.svn/dir-prop-base +6 -0
  50. data/ext/vcftools/examples/.svn/entries +1048 -0
  51. data/ext/vcftools/examples/.svn/prop-base/perl-api-1.pl.svn-base +5 -0
  52. data/ext/vcftools/examples/.svn/text-base/annotate-test.vcf.svn-base +37 -0
  53. data/ext/vcftools/examples/.svn/text-base/annotate.out.svn-base +23 -0
  54. data/ext/vcftools/examples/.svn/text-base/annotate.txt.svn-base +7 -0
  55. data/ext/vcftools/examples/.svn/text-base/annotate2.out.svn-base +52 -0
  56. data/ext/vcftools/examples/.svn/text-base/annotate3.out.svn-base +23 -0
  57. data/ext/vcftools/examples/.svn/text-base/cmp-test-a-3.3.vcf.svn-base +12 -0
  58. data/ext/vcftools/examples/.svn/text-base/cmp-test-a.vcf.svn-base +12 -0
  59. data/ext/vcftools/examples/.svn/text-base/cmp-test-b-3.3.vcf.svn-base +12 -0
  60. data/ext/vcftools/examples/.svn/text-base/cmp-test-b.vcf.svn-base +12 -0
  61. data/ext/vcftools/examples/.svn/text-base/cmp-test.out.svn-base +53 -0
  62. data/ext/vcftools/examples/.svn/text-base/concat-a.vcf.svn-base +21 -0
  63. data/ext/vcftools/examples/.svn/text-base/concat-b.vcf.svn-base +13 -0
  64. data/ext/vcftools/examples/.svn/text-base/concat-c.vcf.svn-base +19 -0
  65. data/ext/vcftools/examples/.svn/text-base/concat.out.svn-base +39 -0
  66. data/ext/vcftools/examples/.svn/text-base/invalid-4.0.vcf.svn-base +31 -0
  67. data/ext/vcftools/examples/.svn/text-base/isec-n2-test.vcf.out.svn-base +19 -0
  68. data/ext/vcftools/examples/.svn/text-base/merge-test-a.vcf.svn-base +17 -0
  69. data/ext/vcftools/examples/.svn/text-base/merge-test-b.vcf.svn-base +17 -0
  70. data/ext/vcftools/examples/.svn/text-base/merge-test-c.vcf.svn-base +15 -0
  71. data/ext/vcftools/examples/.svn/text-base/merge-test.vcf.out.svn-base +31 -0
  72. data/ext/vcftools/examples/.svn/text-base/perl-api-1.pl.svn-base +46 -0
  73. data/ext/vcftools/examples/.svn/text-base/query-test.out.svn-base +6 -0
  74. data/ext/vcftools/examples/.svn/text-base/shuffle-test.vcf.svn-base +12 -0
  75. data/ext/vcftools/examples/.svn/text-base/subset.SNPs.out.svn-base +10 -0
  76. data/ext/vcftools/examples/.svn/text-base/subset.indels.out.svn-base +18 -0
  77. data/ext/vcftools/examples/.svn/text-base/subset.vcf.svn-base +21 -0
  78. data/ext/vcftools/examples/.svn/text-base/valid-3.3.vcf.svn-base +30 -0
  79. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.stats.svn-base +104 -0
  80. data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.svn-base +34 -0
  81. data/ext/vcftools/examples/.svn/text-base/valid-4.1.vcf.svn-base +37 -0
  82. data/ext/vcftools/examples/annotate-test.vcf +37 -0
  83. data/ext/vcftools/examples/annotate.out +23 -0
  84. data/ext/vcftools/examples/annotate.txt +7 -0
  85. data/ext/vcftools/examples/annotate2.out +52 -0
  86. data/ext/vcftools/examples/annotate3.out +23 -0
  87. data/ext/vcftools/examples/cmp-test-a-3.3.vcf +12 -0
  88. data/ext/vcftools/examples/cmp-test-a.vcf +12 -0
  89. data/ext/vcftools/examples/cmp-test-b-3.3.vcf +12 -0
  90. data/ext/vcftools/examples/cmp-test-b.vcf +12 -0
  91. data/ext/vcftools/examples/cmp-test.out +53 -0
  92. data/ext/vcftools/examples/concat-a.vcf +21 -0
  93. data/ext/vcftools/examples/concat-b.vcf +13 -0
  94. data/ext/vcftools/examples/concat-c.vcf +19 -0
  95. data/ext/vcftools/examples/concat.out +39 -0
  96. data/ext/vcftools/examples/invalid-4.0.vcf +31 -0
  97. data/ext/vcftools/examples/isec-n2-test.vcf.out +19 -0
  98. data/ext/vcftools/examples/merge-test-a.vcf +17 -0
  99. data/ext/vcftools/examples/merge-test-b.vcf +17 -0
  100. data/ext/vcftools/examples/merge-test-c.vcf +15 -0
  101. data/ext/vcftools/examples/merge-test.vcf.out +31 -0
  102. data/ext/vcftools/examples/perl-api-1.pl +46 -0
  103. data/ext/vcftools/examples/query-test.out +6 -0
  104. data/ext/vcftools/examples/shuffle-test.vcf +12 -0
  105. data/ext/vcftools/examples/subset.SNPs.out +10 -0
  106. data/ext/vcftools/examples/subset.indels.out +18 -0
  107. data/ext/vcftools/examples/subset.vcf +21 -0
  108. data/ext/vcftools/examples/valid-3.3.vcf +30 -0
  109. data/ext/vcftools/examples/valid-4.0.vcf +34 -0
  110. data/ext/vcftools/examples/valid-4.0.vcf.stats +104 -0
  111. data/ext/vcftools/examples/valid-4.1.vcf +37 -0
  112. data/ext/vcftools/extconf.rb +2 -0
  113. data/ext/vcftools/perl/.svn/all-wcprops +149 -0
  114. data/ext/vcftools/perl/.svn/entries +844 -0
  115. data/ext/vcftools/perl/.svn/prop-base/fill-aa.svn-base +5 -0
  116. data/ext/vcftools/perl/.svn/prop-base/fill-an-ac.svn-base +5 -0
  117. data/ext/vcftools/perl/.svn/prop-base/fill-ref-md5.svn-base +5 -0
  118. data/ext/vcftools/perl/.svn/prop-base/tab-to-vcf.svn-base +5 -0
  119. data/ext/vcftools/perl/.svn/prop-base/test.t.svn-base +5 -0
  120. data/ext/vcftools/perl/.svn/prop-base/vcf-annotate.svn-base +5 -0
  121. data/ext/vcftools/perl/.svn/prop-base/vcf-compare.svn-base +5 -0
  122. data/ext/vcftools/perl/.svn/prop-base/vcf-concat.svn-base +5 -0
  123. data/ext/vcftools/perl/.svn/prop-base/vcf-convert.svn-base +5 -0
  124. data/ext/vcftools/perl/.svn/prop-base/vcf-fix-newlines.svn-base +5 -0
  125. data/ext/vcftools/perl/.svn/prop-base/vcf-isec.svn-base +5 -0
  126. data/ext/vcftools/perl/.svn/prop-base/vcf-merge.svn-base +5 -0
  127. data/ext/vcftools/perl/.svn/prop-base/vcf-query.svn-base +5 -0
  128. data/ext/vcftools/perl/.svn/prop-base/vcf-shuffle-cols.svn-base +5 -0
  129. data/ext/vcftools/perl/.svn/prop-base/vcf-sort.svn-base +5 -0
  130. data/ext/vcftools/perl/.svn/prop-base/vcf-stats.svn-base +5 -0
  131. data/ext/vcftools/perl/.svn/prop-base/vcf-subset.svn-base +5 -0
  132. data/ext/vcftools/perl/.svn/prop-base/vcf-to-tab.svn-base +5 -0
  133. data/ext/vcftools/perl/.svn/prop-base/vcf-validator.svn-base +5 -0
  134. data/ext/vcftools/perl/.svn/text-base/ChangeLog.svn-base +84 -0
  135. data/ext/vcftools/perl/.svn/text-base/FaSlice.pm.svn-base +214 -0
  136. data/ext/vcftools/perl/.svn/text-base/Makefile.svn-base +12 -0
  137. data/ext/vcftools/perl/.svn/text-base/Vcf.pm.svn-base +2853 -0
  138. data/ext/vcftools/perl/.svn/text-base/VcfStats.pm.svn-base +681 -0
  139. data/ext/vcftools/perl/.svn/text-base/fill-aa.svn-base +103 -0
  140. data/ext/vcftools/perl/.svn/text-base/fill-an-ac.svn-base +56 -0
  141. data/ext/vcftools/perl/.svn/text-base/fill-ref-md5.svn-base +204 -0
  142. data/ext/vcftools/perl/.svn/text-base/tab-to-vcf.svn-base +92 -0
  143. data/ext/vcftools/perl/.svn/text-base/test.t.svn-base +376 -0
  144. data/ext/vcftools/perl/.svn/text-base/vcf-annotate.svn-base +1099 -0
  145. data/ext/vcftools/perl/.svn/text-base/vcf-compare.svn-base +1193 -0
  146. data/ext/vcftools/perl/.svn/text-base/vcf-concat.svn-base +310 -0
  147. data/ext/vcftools/perl/.svn/text-base/vcf-convert.svn-base +180 -0
  148. data/ext/vcftools/perl/.svn/text-base/vcf-fix-newlines.svn-base +97 -0
  149. data/ext/vcftools/perl/.svn/text-base/vcf-isec.svn-base +660 -0
  150. data/ext/vcftools/perl/.svn/text-base/vcf-merge.svn-base +577 -0
  151. data/ext/vcftools/perl/.svn/text-base/vcf-query.svn-base +272 -0
  152. data/ext/vcftools/perl/.svn/text-base/vcf-shuffle-cols.svn-base +89 -0
  153. data/ext/vcftools/perl/.svn/text-base/vcf-sort.svn-base +79 -0
  154. data/ext/vcftools/perl/.svn/text-base/vcf-stats.svn-base +160 -0
  155. data/ext/vcftools/perl/.svn/text-base/vcf-subset.svn-base +206 -0
  156. data/ext/vcftools/perl/.svn/text-base/vcf-to-tab.svn-base +112 -0
  157. data/ext/vcftools/perl/.svn/text-base/vcf-validator.svn-base +145 -0
  158. data/ext/vcftools/perl/ChangeLog +84 -0
  159. data/ext/vcftools/perl/FaSlice.pm +214 -0
  160. data/ext/vcftools/perl/Makefile +12 -0
  161. data/ext/vcftools/perl/Vcf.pm +2853 -0
  162. data/ext/vcftools/perl/VcfStats.pm +681 -0
  163. data/ext/vcftools/perl/fill-aa +103 -0
  164. data/ext/vcftools/perl/fill-an-ac +56 -0
  165. data/ext/vcftools/perl/fill-ref-md5 +204 -0
  166. data/ext/vcftools/perl/tab-to-vcf +92 -0
  167. data/ext/vcftools/perl/test.t +376 -0
  168. data/ext/vcftools/perl/vcf-annotate +1099 -0
  169. data/ext/vcftools/perl/vcf-compare +1193 -0
  170. data/ext/vcftools/perl/vcf-concat +310 -0
  171. data/ext/vcftools/perl/vcf-convert +180 -0
  172. data/ext/vcftools/perl/vcf-fix-newlines +97 -0
  173. data/ext/vcftools/perl/vcf-isec +660 -0
  174. data/ext/vcftools/perl/vcf-merge +577 -0
  175. data/ext/vcftools/perl/vcf-query +286 -0
  176. data/ext/vcftools/perl/vcf-shuffle-cols +89 -0
  177. data/ext/vcftools/perl/vcf-sort +79 -0
  178. data/ext/vcftools/perl/vcf-stats +160 -0
  179. data/ext/vcftools/perl/vcf-subset +206 -0
  180. data/ext/vcftools/perl/vcf-to-tab +112 -0
  181. data/ext/vcftools/perl/vcf-validator +145 -0
  182. data/ext/vcftools/website/.svn/all-wcprops +41 -0
  183. data/ext/vcftools/website/.svn/entries +238 -0
  184. data/ext/vcftools/website/.svn/prop-base/VCF-poster.pdf.svn-base +5 -0
  185. data/ext/vcftools/website/.svn/prop-base/favicon.ico.svn-base +5 -0
  186. data/ext/vcftools/website/.svn/prop-base/favicon.png.svn-base +5 -0
  187. data/ext/vcftools/website/.svn/text-base/Makefile.svn-base +6 -0
  188. data/ext/vcftools/website/.svn/text-base/README.svn-base +2 -0
  189. data/ext/vcftools/website/.svn/text-base/VCF-poster.pdf.svn-base +0 -0
  190. data/ext/vcftools/website/.svn/text-base/default.css.svn-base +250 -0
  191. data/ext/vcftools/website/.svn/text-base/favicon.ico.svn-base +0 -0
  192. data/ext/vcftools/website/.svn/text-base/favicon.png.svn-base +0 -0
  193. data/ext/vcftools/website/Makefile +6 -0
  194. data/ext/vcftools/website/README +2 -0
  195. data/ext/vcftools/website/VCF-poster.pdf +0 -0
  196. data/ext/vcftools/website/default.css +250 -0
  197. data/ext/vcftools/website/favicon.ico +0 -0
  198. data/ext/vcftools/website/favicon.png +0 -0
  199. data/ext/vcftools/website/img/.svn/all-wcprops +53 -0
  200. data/ext/vcftools/website/img/.svn/entries +300 -0
  201. data/ext/vcftools/website/img/.svn/prop-base/bg.gif.svn-base +5 -0
  202. data/ext/vcftools/website/img/.svn/prop-base/bgcode.gif.svn-base +5 -0
  203. data/ext/vcftools/website/img/.svn/prop-base/bgcontainer.gif.svn-base +5 -0
  204. data/ext/vcftools/website/img/.svn/prop-base/bgul.gif.svn-base +5 -0
  205. data/ext/vcftools/website/img/.svn/prop-base/header.gif.svn-base +5 -0
  206. data/ext/vcftools/website/img/.svn/prop-base/li.gif.svn-base +5 -0
  207. data/ext/vcftools/website/img/.svn/prop-base/quote.gif.svn-base +5 -0
  208. data/ext/vcftools/website/img/.svn/prop-base/search.gif.svn-base +5 -0
  209. data/ext/vcftools/website/img/.svn/text-base/bg.gif.svn-base +0 -0
  210. data/ext/vcftools/website/img/.svn/text-base/bgcode.gif.svn-base +0 -0
  211. data/ext/vcftools/website/img/.svn/text-base/bgcontainer.gif.svn-base +0 -0
  212. data/ext/vcftools/website/img/.svn/text-base/bgul.gif.svn-base +0 -0
  213. data/ext/vcftools/website/img/.svn/text-base/header.gif.svn-base +0 -0
  214. data/ext/vcftools/website/img/.svn/text-base/li.gif.svn-base +0 -0
  215. data/ext/vcftools/website/img/.svn/text-base/quote.gif.svn-base +0 -0
  216. data/ext/vcftools/website/img/.svn/text-base/search.gif.svn-base +0 -0
  217. data/ext/vcftools/website/img/bg.gif +0 -0
  218. data/ext/vcftools/website/img/bgcode.gif +0 -0
  219. data/ext/vcftools/website/img/bgcontainer.gif +0 -0
  220. data/ext/vcftools/website/img/bgul.gif +0 -0
  221. data/ext/vcftools/website/img/header.gif +0 -0
  222. data/ext/vcftools/website/img/li.gif +0 -0
  223. data/ext/vcftools/website/img/quote.gif +0 -0
  224. data/ext/vcftools/website/img/search.gif +0 -0
  225. data/ext/vcftools/website/src/.svn/all-wcprops +53 -0
  226. data/ext/vcftools/website/src/.svn/entries +300 -0
  227. data/ext/vcftools/website/src/.svn/text-base/docs.inc.svn-base +202 -0
  228. data/ext/vcftools/website/src/.svn/text-base/index.inc.svn-base +52 -0
  229. data/ext/vcftools/website/src/.svn/text-base/index.php.svn-base +80 -0
  230. data/ext/vcftools/website/src/.svn/text-base/license.inc.svn-base +27 -0
  231. data/ext/vcftools/website/src/.svn/text-base/links.inc.svn-base +13 -0
  232. data/ext/vcftools/website/src/.svn/text-base/options.inc.svn-base +654 -0
  233. data/ext/vcftools/website/src/.svn/text-base/perl_module.inc.svn-base +249 -0
  234. data/ext/vcftools/website/src/.svn/text-base/specs.inc.svn-base +18 -0
  235. data/ext/vcftools/website/src/docs.inc +202 -0
  236. data/ext/vcftools/website/src/index.inc +52 -0
  237. data/ext/vcftools/website/src/index.php +80 -0
  238. data/ext/vcftools/website/src/license.inc +27 -0
  239. data/ext/vcftools/website/src/links.inc +13 -0
  240. data/ext/vcftools/website/src/options.inc +654 -0
  241. data/ext/vcftools/website/src/perl_module.inc +249 -0
  242. data/ext/vcftools/website/src/specs.inc +18 -0
  243. data/lib/config.ru +9 -0
  244. data/lib/ngs_server/add.rb +9 -0
  245. data/lib/ngs_server/version.rb +1 -1
  246. data/lib/ngs_server.rb +55 -3
  247. data/ngs_server.gemspec +5 -2
  248. metadata +296 -6
@@ -0,0 +1,654 @@
1
+
2
+ <div class="item">
3
+ <h1>vcftools: Usage and Options</h1>
4
+ <p>The vcftools program is intended for analysis of <b>diploid SNP data</b> in VCF format.
5
+ The program is run from the command line, and the interface is
6
+ inspired by <a href="http://pngu.mgh.harvard.edu/%7Epurcell/plink/">PLINK</a>,
7
+ and so should be largely familiar to users of that package. Commands
8
+ take the
9
+ following form:</p>
10
+ <p class="codebox">vcftools --vcf file1.vcf --chr 20 --freq
11
+ </p>
12
+ <p>The above command tells vcftools to read in the file <i>file1.vcf</i>, extract
13
+ sites on chromosome 20, and calculate the allele frequency at each
14
+ site. The resulting allele frequency estimates
15
+ are stored in the output file, <i>out.freq</i>. As in the
16
+ above example, output from vcftools is mainly sent
17
+ to output files, as opposed to being shown on the screen.
18
+ </p>
19
+ <p>A description of each of the avaliable options follows. <b>Note that some commands may only be
20
+ available in the latest version of vcftools. To obtain the latest version, you should use SVN to
21
+ checkout the latest code, as described on the <a href="http://vcftools.sourceforge.net/index.html">home page</a>.</b>
22
+ <br>
23
+ <br>
24
+ <br>
25
+ <a href="#basic">Basic&nbsp;Options</a><br>
26
+ <a href="#site_filter">Site Filter Options</a><br>
27
+ <a href="#indv_filter">Individual Filters</a><br>
28
+ <a href="#geno_filter">Genotype Filters</a><br>
29
+ <a href="#stats">Output Statistics</a><br>
30
+ <a href="#other_format">Output in Other Formats</a><br>
31
+ <a href="#misc">Miscellaneous</a><br>
32
+ <a href="#diff">File Comparison</a><br>
33
+ <a href="#development">Options still in development</a><br>
34
+ </p>
35
+ </div>
36
+ <div class="item">
37
+ <h1><a name="basic" class="Q">Basic&nbsp;Options</a></h1>
38
+ <ul>
39
+ <li>--vcf &lt;filename&gt;</li>
40
+ <br>
41
+ This option defines the VCF file to be processed. The files need to be decompressed prior to use with vcftools.
42
+ <b>vcftools expects files in VCF format v4.0, a specification of which can be found <a href="specs.html">here</a></b>.
43
+ <br>
44
+ <br>
45
+ </li>
46
+ <li>--gzvcf &lt;filename&gt;</li>
47
+ <br>
48
+ This option can be used in place of the --vcf option to read compressed (gzipped) VCF files directly. Note that this
49
+ option can be quite slow when used with large files.
50
+ <br>
51
+ <br>
52
+ </li>
53
+ <li>--out &lt;prefix&gt;<br>
54
+ <br>
55
+ This option defines the output filename prefix for all files generated
56
+ by vcftools. For example, if &lt;prefix&gt; is set to&nbsp;<span style="font-style: italic;">output_filename</span>,
57
+ then all output files will be of the form <span style="font-style: italic;">output_filename.*** </span>.
58
+ If this option is omitted, all output files will have the prefix 'out.'.<br>
59
+ <br>
60
+ </li>
61
+ </ul>
62
+ </div>
63
+ <div class="item">
64
+ <h1><a name="site_filter" class="Q">Site Filter Options</a></h1>
65
+ <ul>
66
+ <li>--chr &lt;chromosome&gt;<br>
67
+ <br>
68
+ Only process sites with a chromosome identifier matching
69
+ &lt;chromosome&gt;<br>
70
+ </li>
71
+ <li>--from-bp &lt;integer&gt;</li>
72
+ <li>--to-bp &lt;integer&gt;<br>
73
+ <br>
74
+ These options define the&nbsp;physical range of sites will be
75
+ processed. Sites outside of this range will be excluded. These options
76
+ can only be used in conjuction with --chr.<br>
77
+ <br>
78
+ </li>
79
+ <li>--snp &lt;string&gt;<br>
80
+ <br>
81
+ Include SNP(s) with matching ID (e.g. a dbSNP rsID). This command can be used multiple
82
+ times in order to include more than one SNP.<br>
83
+ <br>
84
+ </li>
85
+ <li>--snps &lt;filename&gt;<br>
86
+ <br>
87
+ Include a list of SNPs given in a file. The file should contain a list
88
+ of SNP IDs (e.g. dbSNP rsIDs), with one ID per line.<br>
89
+ <br>
90
+ </li>
91
+ <li>--exclude &lt;filename&gt;<br>
92
+ <br>
93
+ Exclude a list of SNPs given in a file. The file should contain a list
94
+ of SNP IDs, with one ID per line.<br>
95
+ <br>
96
+ </li>
97
+ <li>--positions &lt;filename&gt;<br>
98
+ <br>
99
+ Include a set of sites on the basis of a
100
+ list of positions. Each line of the input file should contain a (tab-separated) chromosome and position.
101
+ The file should have a header line. Sites not included in the list are excluded.<br>
102
+ <br>
103
+ </li>
104
+ <li>--bed &lt;filename&gt;<br>
105
+ <li>--exclude-bed &lt;filename&gt;<br>
106
+ <br>
107
+ Include or exclude a set of sites on the basis of a
108
+ <a href="http://genome.ucsc.edu/FAQ/FAQformat#format1">BED</a> file. Only
109
+ the first three columns (chrom, chromStart and chromEnd) are required.
110
+ The BED file should have a header line. <br>
111
+ <br>
112
+ </li>
113
+ <li>--remove-filtered-all</li>
114
+ <li>--remove-filtered &lt;string&gt;<br>
115
+ <li>--keep-filtered &lt;string&gt;<br>
116
+ <br>
117
+ These options are used to filter sites on the basis of their FILTER flag.
118
+ The first option removes all sites with a FILTER flag.
119
+ The second option can be used to exclude sites with a specific filter flag.
120
+ The third option can be used to select sites on the basis of specific filter flags. The second and third options can be used multiple times to specify multiple FILTERs.
121
+ The --keep-filtered option is applied before the --remove-filtered option.<br>
122
+ <br>
123
+ </li>
124
+ <li>--remove-INFO &lt;string&gt;<br>
125
+ <li>--keep-INFO &lt;string&gt;<br>
126
+ <br>
127
+ These options are used to filter sites on the basis of INFO field flags.
128
+ The first option can be used to exclude sites with a specific INFO flag.
129
+ The second option can be used to select sites on the basis of specific INFO flags. These options can be used multiple times to specify multiple INFO flags.
130
+ The --keep-INFO option is applied before the --remove-INFO option. Note that <b>only INFO flags</b> can currently be used as filters
131
+ (i.e. there is currently no support for filtering by INFO field values). <br>
132
+ <br>
133
+ </li>
134
+ <li>--minQ &lt;float&gt;<br>
135
+ <br>
136
+ Include only sites with Quality above this threshold.<br>
137
+ <br>
138
+ </li>
139
+ <li>--min-meanDP &lt;float&gt;</li>
140
+ <li>--max-meanDP &lt;float&gt;<br>
141
+ <br>
142
+ Include sites with mean Depth within the thresholds defined by these
143
+ options.<br>
144
+ <br>
145
+ </li>
146
+ <li>--maf &lt;float&gt;</li>
147
+ <li>--max-maf &lt;float&gt;<br>
148
+ <br>
149
+ Include only sites with Minor Allele Frequency within the specified
150
+ range.<br>
151
+ <br>
152
+ </li>
153
+ <li>--non-ref-af &lt;float&gt;</li>
154
+ <li>--max-non-ref-af &lt;float&gt;<br>
155
+ <br>
156
+ Include only sites with all Non-Reference Allele Frequencies within the specified
157
+ range.<br>
158
+ <br>
159
+ </li>
160
+ <li>--mac &lt;int&gt;</li>
161
+ <li>--max-mac &lt;int&gt;<br>
162
+ <br>
163
+ Include only sites with Minor Allele Count within the specified
164
+ range.<br>
165
+ <br>
166
+ </li>
167
+ <li>--non-ref-ac &lt;float&gt;</li>
168
+ <li>--max-non-ref-ac &lt;float&gt;<br>
169
+ <br>
170
+ Include only sites with all Non-Reference Allele Counts within the specified
171
+ range.<br>
172
+ <br>
173
+ </li>
174
+ <li>--hwe &lt;float&gt;<br>
175
+ <br>
176
+ Assesses sites for Hardy-Weinberg Equilibrium using an exact test, as
177
+ defined by Wigginton, Cutler and Abecasis (2005). Sites with a p-value
178
+ below the threshold defined by this option are taken to be out of HWE,
179
+ and therefore excluded.<br>
180
+ <br>
181
+ </li>
182
+ <li>--geno &lt;float&gt;<br>
183
+ <br>
184
+ Exclude sites on the basis of the proportion of missing data (defined
185
+ to be between 0 and 1, where 1 indicates no missing data allowed).<br>
186
+ <br>
187
+ </li>
188
+ <li>--max-missing-count &lt;int&gt;<br>
189
+ <br>
190
+ Exclude sites with more than this number of missing chromosomes.<br>
191
+ <br>
192
+ </li>
193
+ <li>--min-alleles &lt;int&gt;</li>
194
+ <li>--max-alleles &lt;int&gt;<br>
195
+ <br>
196
+ Include only sites with a number of alleles within the specified range.
197
+ For example, to include only bi-allelic sites, one could use:<br>
198
+ <br>
199
+ <p class="codebox">vcftools --vcf file1.vcf --min-alleles 2 --max-alleles 2
200
+ </p>
201
+ <br><br>
202
+ </li>
203
+ <li>--thin &lt;int&gt;<br>
204
+ <br>
205
+ Thin sites so that no two sites are within the specified distance.<br>
206
+ <br>
207
+ </li>
208
+
209
+ <li>--mask &lt;filename&gt;<br>
210
+ <li>--invert-mask &lt;filename&gt;<br>
211
+ <li>--mask-min &lt;int&gt;<br>
212
+ <br>
213
+ Include sites on the basis of a FASTA-like file. The provided file contains a sequence of integer digits (between 0 and 9) for each position on a chromosome that specify if a site at that position
214
+ should be filtered or not. An example mask file would look like:<br>
215
+ <p class="codebox">&gt;1<br>0000011111222... </p>
216
+ In this example, sites in the VCF file located within the first 5 bases of the start of chromosome 1 would be kept, whereas sites at position 6 onwards would be filtered out.
217
+ The threshold integer that determines if sites are filtered or not is set using the --mask-min option, which defaults to 0.
218
+ The chromosomes contained in the mask file must be sorted in the same order as the VCF file.
219
+ The --mask option is used to specify the mask file to be used, whereas the --invert-mask option can be used to specify a mask file that will be inverted before being applied.
220
+ <br>
221
+ </li>
222
+
223
+ </ul>
224
+ </div>
225
+ <div class="item">
226
+ <h1><a name="indv_filter" class="Q">Individual Filters</a></h1>
227
+ <ul>
228
+ <li>--indv &lt;string&gt;<br>
229
+ <br>
230
+ Specify an individual to be kept in the analysis. This option can be used multiple times
231
+ to specify multiple individuals.
232
+ <br><br>
233
+ </li>
234
+ <li>--keep &lt;filename&gt;<br>
235
+ <br>
236
+ Provide a file containing a list of individuals to include in
237
+ subsequent analysis. Each individual ID (as defined in the VCF
238
+ headerline) should be included on a separate line.<br>
239
+ <br>
240
+ </li>
241
+ <li>--remove-indv &lt;string&gt;<br>
242
+ <br>
243
+ Specify an individual to be removed from the analysis. This option can be used multiple times
244
+ to specify multiple individuals. If the --indv option is also specified, then the --indv option is
245
+ executed before the --remove-indv option.
246
+ <br><br>
247
+ </li>
248
+ <li>--remove &lt;filename&gt;<br>
249
+ <br>
250
+ Provide a file containing a list of individuals to exclude in
251
+ subsequent analysis. Each individual ID (as defined in the VCF
252
+ headerline) should be included on a separate line. If both the --keep
253
+ and the --remove options are used, then the --keep option is execute before
254
+ the --remove option.<br>
255
+ <br>
256
+ </li>
257
+ <li>--min-indv-meanDP &lt;float&gt;</li>
258
+ <li>--max-indv-mean-DP &lt;float&gt;<br>
259
+ <br>
260
+ Calculate the mean coverage on a per-individual basis. Only individuals
261
+ with coverage within the range specified by these options are included
262
+ in subsequent analyses.<br>
263
+ <br>
264
+ </li>
265
+ <li>--mind &lt;float&gt;<br>
266
+ <br>
267
+ Specify the minimum call rate threshold for each individual. <br>
268
+ <br>
269
+ </li>
270
+ <li>--phased<br>
271
+ <br>
272
+ First excludes all individuals having all genotypes unphased, and <span style="text-decoration: underline;">subsequently excludes
273
+ all sites with unphased genotypes</span>. The remaining
274
+ data&nbsp;therefore consists of&nbsp;phased data only.
275
+ </li>
276
+ <li>--max-indv &lt;int&gt;<br>
277
+ <br>
278
+ Randomly thins individuals so that only the specified number are retained.
279
+ </li>
280
+ </ul>
281
+ </div>
282
+ <div class="item">
283
+ <h1><a name="geno_filter" class="Q">Genotype Filters</a></h1>
284
+ <ul>
285
+ <li>--remove-filtered-geno-all</li>
286
+ <li>--remove-filtered-geno &lt;string&gt;<br>
287
+ <br>
288
+ The first option removes all genotypes with a FILTER flag. The second option can be used to exclude genotypes with a specific filter flag.<br>
289
+ <br>
290
+ </li>
291
+ <li>--minGQ &lt;float&gt;<br>
292
+ <br>
293
+ Exclude all genotypes with a quality below the threshold specified by
294
+ this option (GQ).<br>
295
+ <br>
296
+ </li>
297
+ <li>--minDP &lt;float&gt;<br>
298
+ <br>
299
+ Exclude all genotypes with a sequencing depth below that specified by
300
+ this option (DP)</li>
301
+ </ul>
302
+ </div>
303
+ <div class="item">
304
+ <h1><a name="stats" class="Q">Output Statistics</a></h1>
305
+ <ul>
306
+ <li>--freq</li>
307
+ <li>--counts</li>
308
+ <li>--freq2</li>
309
+ <li>--counts2<br>
310
+ <br>
311
+ Output per-site frequency information. The --freq outputs the allele frequency in a
312
+ file with the suffix '.frq'. The --counts option outputs a similar file with the suffix '.frq.count',
313
+ that contains the raw allele counts at each site.<br>
314
+ The --freq2 and --count2 options are used to suppress allele information in the output file. In this case,
315
+ the order of the freqs/counts depends on the numbering in the VCF file.<br>
316
+ <br>
317
+ </li>
318
+ <li>--depth<br>
319
+ <br>
320
+ Generates a file containing the mean depth per individual. This file
321
+ has the suffix '.idepth'.<br>
322
+ <br>
323
+ </li>
324
+ <li>--site-depth</li>
325
+ <li>--site-mean-depth<br>
326
+ <br>
327
+ Generates a file containing the depth per site. The --site-depth option
328
+ outputs the depth for each site summed across individuals. This file
329
+ has the suffix '.ldepth'. Likewise, the --site-mean-depth outputs the
330
+ mean depth for each site, and the output file has the suffix
331
+ '.ldepth.mean'.<br>
332
+ <br>
333
+ </li>
334
+ <li>--geno-depth<br>
335
+ <br>
336
+ Generates
337
+ a (possibly very large) file containing the depth for each genotype in
338
+ the VCF file. Missing entries are given the value -1. The file has the suffix '.gdepth'.<br>
339
+ <br>
340
+ </li>
341
+ <li>--site-quality<br>
342
+ <br>
343
+ Generates a file containing the per-site SNP quality, as found in the QUAL column of the VCF file. This file
344
+ has the suffix '.lqual'.<br>
345
+ <br>
346
+ </li>
347
+ <li>--het<br>
348
+ <br>
349
+ Calculates
350
+ a measure of heterozygosity on a per-individual basis. Specfically, the
351
+ inbreeding coefficient, F, is estimated&nbsp;for each individual
352
+ using
353
+ a method of moments. The resulting file has the suffix '.het'.<br>
354
+ <br>
355
+ </li>
356
+ <li>--hardy<br>
357
+ <br>
358
+ Reports a p-value for each&nbsp;site from a&nbsp;Hardy-Weinberg
359
+ Equilibrium&nbsp;test (as
360
+ defined by Wigginton, Cutler and Abecasis (2005)). The resulting file
361
+ (with suffix '.hwe') also contains the Observed numbers of Homozygotes
362
+ and Heterozygotes and the corresponding&nbsp;Expected numbers under
363
+ HWE.&nbsp;<br>
364
+ <br>
365
+ </li>
366
+ <li>--missing<br>
367
+ <br>
368
+ Generates
369
+ two files reporting the missingness on a per-individual and per-site
370
+ basis. The two files have suffixes '.imiss' and '.lmiss' respectively.<br>
371
+ <br>
372
+ </li>
373
+ <li>--hap-r2</li>
374
+ <li>--geno-r2</li>
375
+ <li>--ld-window &lt;int&gt;</li>
376
+ <li>--ld-window-bp &lt;int&gt;</li>
377
+ <li>--min-r2 &lt;float&gt;<br>
378
+ <br>
379
+ These options are used to report Linkage Disequilibrium (LD) statistics
380
+ as summarised by the r<sup>2</sup>, D, and D' statistics. The --hap-r2
381
+ option informs vcftools to output a file reporting the r<sup>2</sup>, D, and D'
382
+ statistics using phased haplotypes. These are the traditional measures of LD often reported in the population genetics literature.
383
+ If phased haplotypes are unavailable then the --geno-r2 option may be used, which calculates the squared correlation coefficient
384
+ between genotypes encoded as 0, 1 and 2 to represent the number of non-reference alleles in each individual. This is the same as the LD measure reported by PLINK. The D and D' statistics are only available for phased genotypes.
385
+ The haplotype version outputs a file with the suffix '.hap.ld', whereas the genotype version outputs a file with the suffix '.geno.ld'. The haplotype version implies the option --phased.
386
+ <br><br>The --ld-window option defines the maximum number of SNPs between the SNPs being tested for LD.
387
+ Likewise, the --ld-window-bp option can be used to define the maximum
388
+ physical separation (in base-pairs) of SNPs included in the LD calculation. Finally,
389
+ the --min-r2 sets a minimum value for&nbsp;r<sup>2</sup>
390
+ below which the LD statistic is not reported.<br>
391
+ <br>
392
+ </li>
393
+ <li>--SNPdensity &lt;int&gt;<br>
394
+ <br>
395
+ Calculates the number and density of SNPs in bins of size defined by
396
+ this option. The resulting output file has the suffix '.snpden'.<br>
397
+ <br>
398
+ </li>
399
+ <li>--TsTv &lt;int&gt;<br>
400
+ <br>
401
+ Calculates the Transition / Transversion ratio in bins of size defined by
402
+ this option. Only uses bi-allelic SNPs. The resulting output file has the suffix '.TsTv'. A summary is also supplied in a file with the suffix '.TsTv.summary'.<br>
403
+ <br>
404
+ </li>
405
+ <li>--TsTv-by-count<br>
406
+ <br>
407
+ Calculates the Transition / Transversion ratio as a function of alternative allele count. Only uses bi-allelic SNPs.
408
+ The resulting output file has the suffix '.TsTv.count'. <br>
409
+ <br>
410
+ </li>
411
+ <li>--TsTv-by-qual<br>
412
+ <br>
413
+ Calculates the Transition / Transversion ratio as a function of SNP quality threshold. Only uses bi-allelic SNPs.
414
+ The resulting output file has the suffix '.TsTv.qual'. <br>
415
+ <br>
416
+ </li>
417
+ <li>--FILTER-summary<br>
418
+ <br>
419
+ Generates a summary of the number of SNPs and Ts/Tv ratio for each FILTER category. The output file
420
+ has the suffix '.FILTER.summary. <br>
421
+ <br>
422
+ </li>
423
+ <li>--filtered-sites<br>
424
+ <br>
425
+ Creates two files listing sites that have been kept or removed after filtering. The first file, with
426
+ suffix '.kept.sites', lists sites kept by vcftools after filters have been applied. The second file,
427
+ with the suffix '.removed.sites', list sites removed by the applied filters.<br>
428
+ <br>
429
+ </li>
430
+ <li>--singletons<br>
431
+ <br>
432
+ This option will generate a file detailing the location of singletons,
433
+ and the&nbsp;individual they occur in. The file reports both true
434
+ singletons, and private doubletons (i.e. SNPs where the minor allele
435
+ only occurs in a single individual and that individual is homozygotic
436
+ for that allele). The output file has the suffix '.singletons'.<br>
437
+ <br>
438
+ </li>
439
+ <li>--site-pi</li>
440
+ <li>--window-pi &lt;int&gt;<br>
441
+ <br>
442
+ These options are used to estimate levels of <a href="http://en.wikipedia.org/wiki/Nucleotide_diversity">nucleotide diversity</a>. The first
443
+ option does this on a per-site basis, and the output file has the suffix '.sites.pi'. The second option calculates the nucleotide diversity
444
+ in windows, with the window size defined in the option argument. <br>
445
+ <br>
446
+ </li>
447
+ </ul>
448
+ </div>
449
+ <div class="item">
450
+ <h1><a name="other_format" class="Q">Output in Other Formats</a></h1>
451
+ <ul>
452
+ <li>--012<br>
453
+ <br>
454
+ This option outputs the genotypes as a large matrix. Three files are
455
+ produced. The first, with suffix '.012', contains the genotypes of each
456
+ individual on a separate line. Genotypes are represented as 0, 1 and 2,
457
+ where the number represent that number of non-reference alleles. Missing
458
+ genotypes are represented by -1. The second file, with suffix '.012.indv'
459
+ details the individuals included in the main file. The third file, with
460
+ suffix '.012.pos' details the site locations included in the main file.<br>
461
+ <br>
462
+ </li>
463
+ <li>--IMPUTE<br>
464
+ <br>
465
+ This option outputs phased haplotypes in <a href="http://mathgen.stats.ox.ac.uk/impute/impute.html">IMPUTE</a>
466
+ reference-panel format. As IMPUTE requires phased data, using this
467
+ option also implies --phased. Unphased individuals and genotypes are
468
+ therefore excluded. Only bi-allelic sites are included in the output. Using this option generates three files. The IMPUTE
469
+ haplotype file has the suffix '.impute.hap', and the IMPUTE legend file
470
+ has the suffix '.impute.hap.legend'. The third file, with suffix
471
+ '.impute.hap.indv', details the individuals included in the haplotype
472
+ file, although this file is not needed by IMPUTE.<br>
473
+ <br>
474
+ </li>
475
+ <li>--ldhat<br>
476
+ <li>--ldhat-geno<br>
477
+ <br>
478
+ These options output data in <a href="http://www.stats.ox.ac.uk/%7Emcvean/LDhat/">LDhat</a>
479
+ format. Use of these options &nbsp;also require the --chr option to
480
+ by used. The --ldhat option outputs phased data only, and therefore also implies --phased, leading to unphased
481
+ individuals and genotypes being excluded. Alternatively, the --ldhat-geno option treats all of the data as unphased, and therefore
482
+ outputs LDhat files in genotype/unphased format. In either case, two files are
483
+ generated with the suffixes '.ldhat.sites'
484
+ and '.ldhat.locs', which correspond to the
485
+ LDhat 'sites' and 'locs' input files respectively.<br>
486
+ <br>
487
+ </li>
488
+ <li>--BEAGLE-GL<br>
489
+ <br>
490
+ This option outputs genotype likelihood information for input into the
491
+ <a href="http://faculty.washington.edu/browning/beagle/beagle.html">BEAGLE</a> program.
492
+ This option requires the VCF file to contain the FORMAT GL tag, which can generally be output by
493
+ SNP callers such as <a href="http://www.broadinstitute.org/gsa/wiki/index.php/The_Genome_Analysis_Toolkit">the GATK</a>.
494
+ Use of this option requires a chromosome to be specified via the --chr option. The resulting output
495
+ file (with the suffix '.BEAGLE.GL') contains genotype likelihoods for biallelic sites, and is
496
+ suitable for input into BEAGLE via the 'like=' argument.<br>
497
+ <br>
498
+ </li>
499
+ <li>--plink<br>
500
+ <br>
501
+ This option outputs the genotype data in <a href="http://pngu.mgh.harvard.edu/%7Epurcell/plink/">PLINK</a>
502
+ PED format. Two files are generated, with suffixes '.ped' and '.map'. Note that only bi-allelic loci will be output.
503
+ Further details of these files can be found in the <a href="http://pngu.mgh.harvard.edu/%7Epurcell/plink/data.shtml#ped">PLINK
504
+ documentation</a>. <br>
505
+ <br>Note: This option can be very slow on large datasets. Using the --chr option to divide up the dataset is advised.<br>
506
+ <br>
507
+ </li>
508
+ <li>--plink-tped<br>
509
+ <br>
510
+ The --plink option above can be extremely slow on large datasets. An alternative that might be considerably quicker is
511
+ to output in the <a href="http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#tr">PLINK transposed format</a>.
512
+ This can be achieved using the --plink-tped option, which produces two files with suffixes '.tped' and '.tfam'.
513
+ <br>
514
+ <br>
515
+ </li>
516
+ <li>--recode<br>
517
+ <br>
518
+ The --recode option is used to generate a VCF file from the input VCF file
519
+ having applied the options specified by the user. The output file has
520
+ the suffix '.recode.vcf'.
521
+ <br><br>
522
+ By default, the INFO fields are removed from the output file, as the INFO values may be invalidated
523
+ by the recoding (e.g. the total depth may need to be recalculated if individuals are removed).
524
+ This default functionality can be overridden by using the --recode-INFO &lt;string&gt; option, where &lt;string&gt;
525
+ defines the INFO key to keep in the output file. The --recode-INFO flag can be used multiple times.
526
+ Alternatively, the option --recode-INFO-all can be used to retain all INFO fields.
527
+ </li>
528
+ </ul>
529
+ </div>
530
+ <div class="item">
531
+ <h1><a name="misc" class="Q">Miscellaneous</a></h1>
532
+ <ul>
533
+ <li>--extract-FORMAT-info &lt;string&gt;<br>
534
+ <br>
535
+ Extract information from the genotype fields in the VCF file relating to a specfied FORMAT identifier. For example,
536
+ using the option '--extract-FORMAT-info GT' would extract the all of the GT (i.e. Genotype) entries.
537
+ The resulting output file has the suffix '.&lt;FORMAT_ID&gt;.FORMAT'.
538
+ <br>
539
+ <br>
540
+ <li>--get-INFO &lt;string&gt;<br>
541
+ <br>
542
+ This option is used to extract information from the INFO field in the
543
+ VCF file. The &lt;string&gt; argument specifies the INFO tag to
544
+ be extracted, and the option can be used multiple times in order to
545
+ extract multiple INFO entries. The resulting file, with suffix '.INFO',
546
+ contains the required INFO information in a tab-separated table. For
547
+ example, to extract the NS and DB flags, one would use the command:
548
+ <br>
549
+ <br>
550
+ <p class="codebox">vcftools --vcf file1.vcf --get-INFO NS --get-INFO DB </p>
551
+ <br>
552
+ </li>
553
+ </ul>
554
+ </div>
555
+
556
+ <div class="item">
557
+ <h1><a name="diff" class="Q">VCF File Comparison Options</a></h1>
558
+ <p><b>The file comparison options are currently in a state of flux and likely buggy. If you find a bug,
559
+ please report it. Note that genotype-level filters are not supported in these options.</b><br>
560
+ </p>
561
+ <ul>
562
+ <li>--diff &lt;filename&gt;<br>
563
+ <li>--gzdiff &lt;filename&gt;<br>
564
+ <br>
565
+ Select a VCF file for comparison with the file specified by the --vcf option. Outputs two files
566
+ describing the sites and individuals common / unique to each file. These files have the suffixes
567
+ '.diff.sites_in_files' and '.diff.indv_in_files' respectively. The --gzdiff version can be used to
568
+ read compressed VCF files.
569
+ <br><br>
570
+ </li>
571
+ <li>--diff-site-discordance<br>
572
+ <br>
573
+ Used in conjuction with the --diff option to calculate discordance on a site by site basis. The
574
+ resulting output file has the suffix '.diff.sites'.
575
+ <br><br>
576
+ </li>
577
+ <li>--diff-indv-discordance<br>
578
+ <br>
579
+ Used in conjuction with the --diff option to calculate discordance on a per-individual basis. The
580
+ resulting output file has the suffix '.diff.indv'.
581
+ <br><br>
582
+ </li>
583
+ <li>--diff-discordance-matrix<br>
584
+ <br>
585
+ Used in conjuction with the --diff option to calculate a discordance matrix. This option only works
586
+ with bi-allelic loci with matching alleles that are present in both files. The resulting output file
587
+ has the suffix '.diff.discordance.matrix'.
588
+ <br><br>
589
+ </li>
590
+ <li>--diff-switch-error<br>
591
+ <br>
592
+ Used in conjuction with the --diff option to calculate phasing errors
593
+ (specifically '<a href="http://www.cell.com/AJHG/retrieve/pii/S0002929707619788">switch errors</a>').
594
+ This option generates two output files describing switch errors found between sites, and the average switch
595
+ error per individual. These two files have the suffixes '.diff.switch' and '.diff.indv.switch' respectively.
596
+ <br><br>
597
+ </li>
598
+
599
+ </ul>
600
+ </div>
601
+
602
+ <div class="item">
603
+ <h1><a name="development" class="Q">Options still in development</a></h1>
604
+ <p>The following options are yet to be finalised, are likely to contain bugs, and are likely to change in the future.<br>
605
+ </p>
606
+ <ul>
607
+ <li>--fst &lt;filename&gt;<br>
608
+ <li>--gzfst &lt;filename&gt;<br>
609
+ <br>
610
+ Calculate F<sub>ST</sub> for a pair of VCF files, with the second file being specified by this option.
611
+ F<sub>ST</sub> is currently calculated using the formula described in the supplementary material of the <a href="http://www.ncbi.nlm.nih.gov/pubmed/16255080">Phase I HapMap paper</a>.
612
+ Currently, only pairwise F<sub>ST</sub> calculations are supported, although this will likely change in the future.
613
+ The --gzfst option can be used to read compressed VCF files.
614
+ This option is likely to be depreciated in future releases, and it is recommended that you use the --fst-pop option instead.
615
+ <br>
616
+ <br>
617
+ </li>
618
+ <li>--fst-pop &lt;filename&gt;<br>
619
+ <br>
620
+ If you do not have separate files for each population, you can use this option to calculate F<sub>ST</sub>.
621
+ This option is used to provide a file which lists the individuals in each population, and is used multiple times to specific multiple populations.
622
+ Unlike the --fst option, this option can be used to calculate F<sub>ST</sub> for more than two populations.
623
+ <br>
624
+ <br>
625
+ </li>
626
+ <!--
627
+ <li>--pca <br>
628
+ <br>
629
+ Perform Principal Component Analysis.
630
+ <br>
631
+ <br>
632
+ </li>
633
+ -->
634
+ <li>--LROH <br>
635
+ <br>
636
+ Identify Long Runs of Homozygosity.
637
+ <br>
638
+ <br>
639
+ </li>
640
+ <li>--relatedness <br>
641
+ <br>
642
+ Output Individual Relatedness Statistics.
643
+ <br>
644
+ <br>
645
+ </li>
646
+ <li>--TajimaD <int> <br>
647
+ <br>
648
+ Output Tajima's D statistic in bins of size <int>.
649
+ <br>
650
+ <br>
651
+ </li>
652
+ </ul>
653
+ </div>
654
+