ngs_server 0.1 → 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/ngs_server +72 -50
- data/ext/bamtools/extconf.rb +3 -3
- data/ext/vcftools/Makefile +28 -0
- data/ext/vcftools/README.txt +36 -0
- data/ext/vcftools/cpp/.svn/all-wcprops +125 -0
- data/ext/vcftools/cpp/.svn/dir-prop-base +6 -0
- data/ext/vcftools/cpp/.svn/entries +708 -0
- data/ext/vcftools/cpp/.svn/text-base/Makefile.svn-base +46 -0
- data/ext/vcftools/cpp/.svn/text-base/dgeev.cpp.svn-base +146 -0
- data/ext/vcftools/cpp/.svn/text-base/dgeev.h.svn-base +43 -0
- data/ext/vcftools/cpp/.svn/text-base/output_log.cpp.svn-base +79 -0
- data/ext/vcftools/cpp/.svn/text-base/output_log.h.svn-base +34 -0
- data/ext/vcftools/cpp/.svn/text-base/parameters.cpp.svn-base +535 -0
- data/ext/vcftools/cpp/.svn/text-base/parameters.h.svn-base +154 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry.cpp.svn-base +497 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry.h.svn-base +190 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry_getters.cpp.svn-base +421 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry_setters.cpp.svn-base +482 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file.cpp.svn-base +495 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file.h.svn-base +184 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_diff.cpp.svn-base +1282 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_filters.cpp.svn-base +1215 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_format_convert.cpp.svn-base +1138 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_index.cpp.svn-base +171 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_output.cpp.svn-base +3012 -0
- data/ext/vcftools/cpp/.svn/text-base/vcftools.cpp.svn-base +107 -0
- data/ext/vcftools/cpp/.svn/text-base/vcftools.h.svn-base +25 -0
- data/ext/vcftools/cpp/Makefile +46 -0
- data/ext/vcftools/cpp/dgeev.cpp +146 -0
- data/ext/vcftools/cpp/dgeev.h +43 -0
- data/ext/vcftools/cpp/output_log.cpp +79 -0
- data/ext/vcftools/cpp/output_log.h +34 -0
- data/ext/vcftools/cpp/parameters.cpp +535 -0
- data/ext/vcftools/cpp/parameters.h +154 -0
- data/ext/vcftools/cpp/vcf_entry.cpp +497 -0
- data/ext/vcftools/cpp/vcf_entry.h +190 -0
- data/ext/vcftools/cpp/vcf_entry_getters.cpp +421 -0
- data/ext/vcftools/cpp/vcf_entry_setters.cpp +482 -0
- data/ext/vcftools/cpp/vcf_file.cpp +495 -0
- data/ext/vcftools/cpp/vcf_file.h +184 -0
- data/ext/vcftools/cpp/vcf_file_diff.cpp +1282 -0
- data/ext/vcftools/cpp/vcf_file_filters.cpp +1215 -0
- data/ext/vcftools/cpp/vcf_file_format_convert.cpp +1138 -0
- data/ext/vcftools/cpp/vcf_file_index.cpp +171 -0
- data/ext/vcftools/cpp/vcf_file_output.cpp +3012 -0
- data/ext/vcftools/cpp/vcftools.cpp +107 -0
- data/ext/vcftools/cpp/vcftools.h +25 -0
- data/ext/vcftools/examples/.svn/all-wcprops +185 -0
- data/ext/vcftools/examples/.svn/dir-prop-base +6 -0
- data/ext/vcftools/examples/.svn/entries +1048 -0
- data/ext/vcftools/examples/.svn/prop-base/perl-api-1.pl.svn-base +5 -0
- data/ext/vcftools/examples/.svn/text-base/annotate-test.vcf.svn-base +37 -0
- data/ext/vcftools/examples/.svn/text-base/annotate.out.svn-base +23 -0
- data/ext/vcftools/examples/.svn/text-base/annotate.txt.svn-base +7 -0
- data/ext/vcftools/examples/.svn/text-base/annotate2.out.svn-base +52 -0
- data/ext/vcftools/examples/.svn/text-base/annotate3.out.svn-base +23 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-a-3.3.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-a.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-b-3.3.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-b.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test.out.svn-base +53 -0
- data/ext/vcftools/examples/.svn/text-base/concat-a.vcf.svn-base +21 -0
- data/ext/vcftools/examples/.svn/text-base/concat-b.vcf.svn-base +13 -0
- data/ext/vcftools/examples/.svn/text-base/concat-c.vcf.svn-base +19 -0
- data/ext/vcftools/examples/.svn/text-base/concat.out.svn-base +39 -0
- data/ext/vcftools/examples/.svn/text-base/invalid-4.0.vcf.svn-base +31 -0
- data/ext/vcftools/examples/.svn/text-base/isec-n2-test.vcf.out.svn-base +19 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test-a.vcf.svn-base +17 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test-b.vcf.svn-base +17 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test-c.vcf.svn-base +15 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test.vcf.out.svn-base +31 -0
- data/ext/vcftools/examples/.svn/text-base/perl-api-1.pl.svn-base +46 -0
- data/ext/vcftools/examples/.svn/text-base/query-test.out.svn-base +6 -0
- data/ext/vcftools/examples/.svn/text-base/shuffle-test.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/subset.SNPs.out.svn-base +10 -0
- data/ext/vcftools/examples/.svn/text-base/subset.indels.out.svn-base +18 -0
- data/ext/vcftools/examples/.svn/text-base/subset.vcf.svn-base +21 -0
- data/ext/vcftools/examples/.svn/text-base/valid-3.3.vcf.svn-base +30 -0
- data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.stats.svn-base +104 -0
- data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.svn-base +34 -0
- data/ext/vcftools/examples/.svn/text-base/valid-4.1.vcf.svn-base +37 -0
- data/ext/vcftools/examples/annotate-test.vcf +37 -0
- data/ext/vcftools/examples/annotate.out +23 -0
- data/ext/vcftools/examples/annotate.txt +7 -0
- data/ext/vcftools/examples/annotate2.out +52 -0
- data/ext/vcftools/examples/annotate3.out +23 -0
- data/ext/vcftools/examples/cmp-test-a-3.3.vcf +12 -0
- data/ext/vcftools/examples/cmp-test-a.vcf +12 -0
- data/ext/vcftools/examples/cmp-test-b-3.3.vcf +12 -0
- data/ext/vcftools/examples/cmp-test-b.vcf +12 -0
- data/ext/vcftools/examples/cmp-test.out +53 -0
- data/ext/vcftools/examples/concat-a.vcf +21 -0
- data/ext/vcftools/examples/concat-b.vcf +13 -0
- data/ext/vcftools/examples/concat-c.vcf +19 -0
- data/ext/vcftools/examples/concat.out +39 -0
- data/ext/vcftools/examples/invalid-4.0.vcf +31 -0
- data/ext/vcftools/examples/isec-n2-test.vcf.out +19 -0
- data/ext/vcftools/examples/merge-test-a.vcf +17 -0
- data/ext/vcftools/examples/merge-test-b.vcf +17 -0
- data/ext/vcftools/examples/merge-test-c.vcf +15 -0
- data/ext/vcftools/examples/merge-test.vcf.out +31 -0
- data/ext/vcftools/examples/perl-api-1.pl +46 -0
- data/ext/vcftools/examples/query-test.out +6 -0
- data/ext/vcftools/examples/shuffle-test.vcf +12 -0
- data/ext/vcftools/examples/subset.SNPs.out +10 -0
- data/ext/vcftools/examples/subset.indels.out +18 -0
- data/ext/vcftools/examples/subset.vcf +21 -0
- data/ext/vcftools/examples/valid-3.3.vcf +30 -0
- data/ext/vcftools/examples/valid-4.0.vcf +34 -0
- data/ext/vcftools/examples/valid-4.0.vcf.stats +104 -0
- data/ext/vcftools/examples/valid-4.1.vcf +37 -0
- data/ext/vcftools/extconf.rb +2 -0
- data/ext/vcftools/perl/.svn/all-wcprops +149 -0
- data/ext/vcftools/perl/.svn/entries +844 -0
- data/ext/vcftools/perl/.svn/prop-base/fill-aa.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/fill-an-ac.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/fill-ref-md5.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/tab-to-vcf.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/test.t.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-annotate.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-compare.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-concat.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-convert.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-fix-newlines.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-isec.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-merge.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-query.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-shuffle-cols.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-sort.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-stats.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-subset.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-to-tab.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-validator.svn-base +5 -0
- data/ext/vcftools/perl/.svn/text-base/ChangeLog.svn-base +84 -0
- data/ext/vcftools/perl/.svn/text-base/FaSlice.pm.svn-base +214 -0
- data/ext/vcftools/perl/.svn/text-base/Makefile.svn-base +12 -0
- data/ext/vcftools/perl/.svn/text-base/Vcf.pm.svn-base +2853 -0
- data/ext/vcftools/perl/.svn/text-base/VcfStats.pm.svn-base +681 -0
- data/ext/vcftools/perl/.svn/text-base/fill-aa.svn-base +103 -0
- data/ext/vcftools/perl/.svn/text-base/fill-an-ac.svn-base +56 -0
- data/ext/vcftools/perl/.svn/text-base/fill-ref-md5.svn-base +204 -0
- data/ext/vcftools/perl/.svn/text-base/tab-to-vcf.svn-base +92 -0
- data/ext/vcftools/perl/.svn/text-base/test.t.svn-base +376 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-annotate.svn-base +1099 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-compare.svn-base +1193 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-concat.svn-base +310 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-convert.svn-base +180 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-fix-newlines.svn-base +97 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-isec.svn-base +660 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-merge.svn-base +577 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-query.svn-base +272 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-shuffle-cols.svn-base +89 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-sort.svn-base +79 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-stats.svn-base +160 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-subset.svn-base +206 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-to-tab.svn-base +112 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-validator.svn-base +145 -0
- data/ext/vcftools/perl/ChangeLog +84 -0
- data/ext/vcftools/perl/FaSlice.pm +214 -0
- data/ext/vcftools/perl/Makefile +12 -0
- data/ext/vcftools/perl/Vcf.pm +2853 -0
- data/ext/vcftools/perl/VcfStats.pm +681 -0
- data/ext/vcftools/perl/fill-aa +103 -0
- data/ext/vcftools/perl/fill-an-ac +56 -0
- data/ext/vcftools/perl/fill-ref-md5 +204 -0
- data/ext/vcftools/perl/tab-to-vcf +92 -0
- data/ext/vcftools/perl/test.t +376 -0
- data/ext/vcftools/perl/vcf-annotate +1099 -0
- data/ext/vcftools/perl/vcf-compare +1193 -0
- data/ext/vcftools/perl/vcf-concat +310 -0
- data/ext/vcftools/perl/vcf-convert +180 -0
- data/ext/vcftools/perl/vcf-fix-newlines +97 -0
- data/ext/vcftools/perl/vcf-isec +660 -0
- data/ext/vcftools/perl/vcf-merge +577 -0
- data/ext/vcftools/perl/vcf-query +286 -0
- data/ext/vcftools/perl/vcf-shuffle-cols +89 -0
- data/ext/vcftools/perl/vcf-sort +79 -0
- data/ext/vcftools/perl/vcf-stats +160 -0
- data/ext/vcftools/perl/vcf-subset +206 -0
- data/ext/vcftools/perl/vcf-to-tab +112 -0
- data/ext/vcftools/perl/vcf-validator +145 -0
- data/ext/vcftools/website/.svn/all-wcprops +41 -0
- data/ext/vcftools/website/.svn/entries +238 -0
- data/ext/vcftools/website/.svn/prop-base/VCF-poster.pdf.svn-base +5 -0
- data/ext/vcftools/website/.svn/prop-base/favicon.ico.svn-base +5 -0
- data/ext/vcftools/website/.svn/prop-base/favicon.png.svn-base +5 -0
- data/ext/vcftools/website/.svn/text-base/Makefile.svn-base +6 -0
- data/ext/vcftools/website/.svn/text-base/README.svn-base +2 -0
- data/ext/vcftools/website/.svn/text-base/VCF-poster.pdf.svn-base +0 -0
- data/ext/vcftools/website/.svn/text-base/default.css.svn-base +250 -0
- data/ext/vcftools/website/.svn/text-base/favicon.ico.svn-base +0 -0
- data/ext/vcftools/website/.svn/text-base/favicon.png.svn-base +0 -0
- data/ext/vcftools/website/Makefile +6 -0
- data/ext/vcftools/website/README +2 -0
- data/ext/vcftools/website/VCF-poster.pdf +0 -0
- data/ext/vcftools/website/default.css +250 -0
- data/ext/vcftools/website/favicon.ico +0 -0
- data/ext/vcftools/website/favicon.png +0 -0
- data/ext/vcftools/website/img/.svn/all-wcprops +53 -0
- data/ext/vcftools/website/img/.svn/entries +300 -0
- data/ext/vcftools/website/img/.svn/prop-base/bg.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/bgcode.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/bgcontainer.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/bgul.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/header.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/li.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/quote.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/search.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/text-base/bg.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/bgcode.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/bgcontainer.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/bgul.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/header.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/li.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/quote.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/search.gif.svn-base +0 -0
- data/ext/vcftools/website/img/bg.gif +0 -0
- data/ext/vcftools/website/img/bgcode.gif +0 -0
- data/ext/vcftools/website/img/bgcontainer.gif +0 -0
- data/ext/vcftools/website/img/bgul.gif +0 -0
- data/ext/vcftools/website/img/header.gif +0 -0
- data/ext/vcftools/website/img/li.gif +0 -0
- data/ext/vcftools/website/img/quote.gif +0 -0
- data/ext/vcftools/website/img/search.gif +0 -0
- data/ext/vcftools/website/src/.svn/all-wcprops +53 -0
- data/ext/vcftools/website/src/.svn/entries +300 -0
- data/ext/vcftools/website/src/.svn/text-base/docs.inc.svn-base +202 -0
- data/ext/vcftools/website/src/.svn/text-base/index.inc.svn-base +52 -0
- data/ext/vcftools/website/src/.svn/text-base/index.php.svn-base +80 -0
- data/ext/vcftools/website/src/.svn/text-base/license.inc.svn-base +27 -0
- data/ext/vcftools/website/src/.svn/text-base/links.inc.svn-base +13 -0
- data/ext/vcftools/website/src/.svn/text-base/options.inc.svn-base +654 -0
- data/ext/vcftools/website/src/.svn/text-base/perl_module.inc.svn-base +249 -0
- data/ext/vcftools/website/src/.svn/text-base/specs.inc.svn-base +18 -0
- data/ext/vcftools/website/src/docs.inc +202 -0
- data/ext/vcftools/website/src/index.inc +52 -0
- data/ext/vcftools/website/src/index.php +80 -0
- data/ext/vcftools/website/src/license.inc +27 -0
- data/ext/vcftools/website/src/links.inc +13 -0
- data/ext/vcftools/website/src/options.inc +654 -0
- data/ext/vcftools/website/src/perl_module.inc +249 -0
- data/ext/vcftools/website/src/specs.inc +18 -0
- data/lib/config.ru +9 -0
- data/lib/ngs_server/add.rb +9 -0
- data/lib/ngs_server/version.rb +1 -1
- data/lib/ngs_server.rb +55 -3
- data/ngs_server.gemspec +5 -2
- metadata +296 -6
@@ -0,0 +1,654 @@
|
|
1
|
+
|
2
|
+
<div class="item">
|
3
|
+
<h1>vcftools: Usage and Options</h1>
|
4
|
+
<p>The vcftools program is intended for analysis of <b>diploid SNP data</b> in VCF format.
|
5
|
+
The program is run from the command line, and the interface is
|
6
|
+
inspired by <a href="http://pngu.mgh.harvard.edu/%7Epurcell/plink/">PLINK</a>,
|
7
|
+
and so should be largely familiar to users of that package. Commands
|
8
|
+
take the
|
9
|
+
following form:</p>
|
10
|
+
<p class="codebox">vcftools --vcf file1.vcf --chr 20 --freq
|
11
|
+
</p>
|
12
|
+
<p>The above command tells vcftools to read in the file <i>file1.vcf</i>, extract
|
13
|
+
sites on chromosome 20, and calculate the allele frequency at each
|
14
|
+
site. The resulting allele frequency estimates
|
15
|
+
are stored in the output file, <i>out.freq</i>. As in the
|
16
|
+
above example, output from vcftools is mainly sent
|
17
|
+
to output files, as opposed to being shown on the screen.
|
18
|
+
</p>
|
19
|
+
<p>A description of each of the avaliable options follows. <b>Note that some commands may only be
|
20
|
+
available in the latest version of vcftools. To obtain the latest version, you should use SVN to
|
21
|
+
checkout the latest code, as described on the <a href="http://vcftools.sourceforge.net/index.html">home page</a>.</b>
|
22
|
+
<br>
|
23
|
+
<br>
|
24
|
+
<br>
|
25
|
+
<a href="#basic">Basic Options</a><br>
|
26
|
+
<a href="#site_filter">Site Filter Options</a><br>
|
27
|
+
<a href="#indv_filter">Individual Filters</a><br>
|
28
|
+
<a href="#geno_filter">Genotype Filters</a><br>
|
29
|
+
<a href="#stats">Output Statistics</a><br>
|
30
|
+
<a href="#other_format">Output in Other Formats</a><br>
|
31
|
+
<a href="#misc">Miscellaneous</a><br>
|
32
|
+
<a href="#diff">File Comparison</a><br>
|
33
|
+
<a href="#development">Options still in development</a><br>
|
34
|
+
</p>
|
35
|
+
</div>
|
36
|
+
<div class="item">
|
37
|
+
<h1><a name="basic" class="Q">Basic Options</a></h1>
|
38
|
+
<ul>
|
39
|
+
<li>--vcf <filename></li>
|
40
|
+
<br>
|
41
|
+
This option defines the VCF file to be processed. The files need to be decompressed prior to use with vcftools.
|
42
|
+
<b>vcftools expects files in VCF format v4.0, a specification of which can be found <a href="specs.html">here</a></b>.
|
43
|
+
<br>
|
44
|
+
<br>
|
45
|
+
</li>
|
46
|
+
<li>--gzvcf <filename></li>
|
47
|
+
<br>
|
48
|
+
This option can be used in place of the --vcf option to read compressed (gzipped) VCF files directly. Note that this
|
49
|
+
option can be quite slow when used with large files.
|
50
|
+
<br>
|
51
|
+
<br>
|
52
|
+
</li>
|
53
|
+
<li>--out <prefix><br>
|
54
|
+
<br>
|
55
|
+
This option defines the output filename prefix for all files generated
|
56
|
+
by vcftools. For example, if <prefix> is set to <span style="font-style: italic;">output_filename</span>,
|
57
|
+
then all output files will be of the form <span style="font-style: italic;">output_filename.*** </span>.
|
58
|
+
If this option is omitted, all output files will have the prefix 'out.'.<br>
|
59
|
+
<br>
|
60
|
+
</li>
|
61
|
+
</ul>
|
62
|
+
</div>
|
63
|
+
<div class="item">
|
64
|
+
<h1><a name="site_filter" class="Q">Site Filter Options</a></h1>
|
65
|
+
<ul>
|
66
|
+
<li>--chr <chromosome><br>
|
67
|
+
<br>
|
68
|
+
Only process sites with a chromosome identifier matching
|
69
|
+
<chromosome><br>
|
70
|
+
</li>
|
71
|
+
<li>--from-bp <integer></li>
|
72
|
+
<li>--to-bp <integer><br>
|
73
|
+
<br>
|
74
|
+
These options define the physical range of sites will be
|
75
|
+
processed. Sites outside of this range will be excluded. These options
|
76
|
+
can only be used in conjuction with --chr.<br>
|
77
|
+
<br>
|
78
|
+
</li>
|
79
|
+
<li>--snp <string><br>
|
80
|
+
<br>
|
81
|
+
Include SNP(s) with matching ID (e.g. a dbSNP rsID). This command can be used multiple
|
82
|
+
times in order to include more than one SNP.<br>
|
83
|
+
<br>
|
84
|
+
</li>
|
85
|
+
<li>--snps <filename><br>
|
86
|
+
<br>
|
87
|
+
Include a list of SNPs given in a file. The file should contain a list
|
88
|
+
of SNP IDs (e.g. dbSNP rsIDs), with one ID per line.<br>
|
89
|
+
<br>
|
90
|
+
</li>
|
91
|
+
<li>--exclude <filename><br>
|
92
|
+
<br>
|
93
|
+
Exclude a list of SNPs given in a file. The file should contain a list
|
94
|
+
of SNP IDs, with one ID per line.<br>
|
95
|
+
<br>
|
96
|
+
</li>
|
97
|
+
<li>--positions <filename><br>
|
98
|
+
<br>
|
99
|
+
Include a set of sites on the basis of a
|
100
|
+
list of positions. Each line of the input file should contain a (tab-separated) chromosome and position.
|
101
|
+
The file should have a header line. Sites not included in the list are excluded.<br>
|
102
|
+
<br>
|
103
|
+
</li>
|
104
|
+
<li>--bed <filename><br>
|
105
|
+
<li>--exclude-bed <filename><br>
|
106
|
+
<br>
|
107
|
+
Include or exclude a set of sites on the basis of a
|
108
|
+
<a href="http://genome.ucsc.edu/FAQ/FAQformat#format1">BED</a> file. Only
|
109
|
+
the first three columns (chrom, chromStart and chromEnd) are required.
|
110
|
+
The BED file should have a header line. <br>
|
111
|
+
<br>
|
112
|
+
</li>
|
113
|
+
<li>--remove-filtered-all</li>
|
114
|
+
<li>--remove-filtered <string><br>
|
115
|
+
<li>--keep-filtered <string><br>
|
116
|
+
<br>
|
117
|
+
These options are used to filter sites on the basis of their FILTER flag.
|
118
|
+
The first option removes all sites with a FILTER flag.
|
119
|
+
The second option can be used to exclude sites with a specific filter flag.
|
120
|
+
The third option can be used to select sites on the basis of specific filter flags. The second and third options can be used multiple times to specify multiple FILTERs.
|
121
|
+
The --keep-filtered option is applied before the --remove-filtered option.<br>
|
122
|
+
<br>
|
123
|
+
</li>
|
124
|
+
<li>--remove-INFO <string><br>
|
125
|
+
<li>--keep-INFO <string><br>
|
126
|
+
<br>
|
127
|
+
These options are used to filter sites on the basis of INFO field flags.
|
128
|
+
The first option can be used to exclude sites with a specific INFO flag.
|
129
|
+
The second option can be used to select sites on the basis of specific INFO flags. These options can be used multiple times to specify multiple INFO flags.
|
130
|
+
The --keep-INFO option is applied before the --remove-INFO option. Note that <b>only INFO flags</b> can currently be used as filters
|
131
|
+
(i.e. there is currently no support for filtering by INFO field values). <br>
|
132
|
+
<br>
|
133
|
+
</li>
|
134
|
+
<li>--minQ <float><br>
|
135
|
+
<br>
|
136
|
+
Include only sites with Quality above this threshold.<br>
|
137
|
+
<br>
|
138
|
+
</li>
|
139
|
+
<li>--min-meanDP <float></li>
|
140
|
+
<li>--max-meanDP <float><br>
|
141
|
+
<br>
|
142
|
+
Include sites with mean Depth within the thresholds defined by these
|
143
|
+
options.<br>
|
144
|
+
<br>
|
145
|
+
</li>
|
146
|
+
<li>--maf <float></li>
|
147
|
+
<li>--max-maf <float><br>
|
148
|
+
<br>
|
149
|
+
Include only sites with Minor Allele Frequency within the specified
|
150
|
+
range.<br>
|
151
|
+
<br>
|
152
|
+
</li>
|
153
|
+
<li>--non-ref-af <float></li>
|
154
|
+
<li>--max-non-ref-af <float><br>
|
155
|
+
<br>
|
156
|
+
Include only sites with all Non-Reference Allele Frequencies within the specified
|
157
|
+
range.<br>
|
158
|
+
<br>
|
159
|
+
</li>
|
160
|
+
<li>--mac <int></li>
|
161
|
+
<li>--max-mac <int><br>
|
162
|
+
<br>
|
163
|
+
Include only sites with Minor Allele Count within the specified
|
164
|
+
range.<br>
|
165
|
+
<br>
|
166
|
+
</li>
|
167
|
+
<li>--non-ref-ac <float></li>
|
168
|
+
<li>--max-non-ref-ac <float><br>
|
169
|
+
<br>
|
170
|
+
Include only sites with all Non-Reference Allele Counts within the specified
|
171
|
+
range.<br>
|
172
|
+
<br>
|
173
|
+
</li>
|
174
|
+
<li>--hwe <float><br>
|
175
|
+
<br>
|
176
|
+
Assesses sites for Hardy-Weinberg Equilibrium using an exact test, as
|
177
|
+
defined by Wigginton, Cutler and Abecasis (2005). Sites with a p-value
|
178
|
+
below the threshold defined by this option are taken to be out of HWE,
|
179
|
+
and therefore excluded.<br>
|
180
|
+
<br>
|
181
|
+
</li>
|
182
|
+
<li>--geno <float><br>
|
183
|
+
<br>
|
184
|
+
Exclude sites on the basis of the proportion of missing data (defined
|
185
|
+
to be between 0 and 1, where 1 indicates no missing data allowed).<br>
|
186
|
+
<br>
|
187
|
+
</li>
|
188
|
+
<li>--max-missing-count <int><br>
|
189
|
+
<br>
|
190
|
+
Exclude sites with more than this number of missing chromosomes.<br>
|
191
|
+
<br>
|
192
|
+
</li>
|
193
|
+
<li>--min-alleles <int></li>
|
194
|
+
<li>--max-alleles <int><br>
|
195
|
+
<br>
|
196
|
+
Include only sites with a number of alleles within the specified range.
|
197
|
+
For example, to include only bi-allelic sites, one could use:<br>
|
198
|
+
<br>
|
199
|
+
<p class="codebox">vcftools --vcf file1.vcf --min-alleles 2 --max-alleles 2
|
200
|
+
</p>
|
201
|
+
<br><br>
|
202
|
+
</li>
|
203
|
+
<li>--thin <int><br>
|
204
|
+
<br>
|
205
|
+
Thin sites so that no two sites are within the specified distance.<br>
|
206
|
+
<br>
|
207
|
+
</li>
|
208
|
+
|
209
|
+
<li>--mask <filename><br>
|
210
|
+
<li>--invert-mask <filename><br>
|
211
|
+
<li>--mask-min <int><br>
|
212
|
+
<br>
|
213
|
+
Include sites on the basis of a FASTA-like file. The provided file contains a sequence of integer digits (between 0 and 9) for each position on a chromosome that specify if a site at that position
|
214
|
+
should be filtered or not. An example mask file would look like:<br>
|
215
|
+
<p class="codebox">>1<br>0000011111222... </p>
|
216
|
+
In this example, sites in the VCF file located within the first 5 bases of the start of chromosome 1 would be kept, whereas sites at position 6 onwards would be filtered out.
|
217
|
+
The threshold integer that determines if sites are filtered or not is set using the --mask-min option, which defaults to 0.
|
218
|
+
The chromosomes contained in the mask file must be sorted in the same order as the VCF file.
|
219
|
+
The --mask option is used to specify the mask file to be used, whereas the --invert-mask option can be used to specify a mask file that will be inverted before being applied.
|
220
|
+
<br>
|
221
|
+
</li>
|
222
|
+
|
223
|
+
</ul>
|
224
|
+
</div>
|
225
|
+
<div class="item">
|
226
|
+
<h1><a name="indv_filter" class="Q">Individual Filters</a></h1>
|
227
|
+
<ul>
|
228
|
+
<li>--indv <string><br>
|
229
|
+
<br>
|
230
|
+
Specify an individual to be kept in the analysis. This option can be used multiple times
|
231
|
+
to specify multiple individuals.
|
232
|
+
<br><br>
|
233
|
+
</li>
|
234
|
+
<li>--keep <filename><br>
|
235
|
+
<br>
|
236
|
+
Provide a file containing a list of individuals to include in
|
237
|
+
subsequent analysis. Each individual ID (as defined in the VCF
|
238
|
+
headerline) should be included on a separate line.<br>
|
239
|
+
<br>
|
240
|
+
</li>
|
241
|
+
<li>--remove-indv <string><br>
|
242
|
+
<br>
|
243
|
+
Specify an individual to be removed from the analysis. This option can be used multiple times
|
244
|
+
to specify multiple individuals. If the --indv option is also specified, then the --indv option is
|
245
|
+
executed before the --remove-indv option.
|
246
|
+
<br><br>
|
247
|
+
</li>
|
248
|
+
<li>--remove <filename><br>
|
249
|
+
<br>
|
250
|
+
Provide a file containing a list of individuals to exclude in
|
251
|
+
subsequent analysis. Each individual ID (as defined in the VCF
|
252
|
+
headerline) should be included on a separate line. If both the --keep
|
253
|
+
and the --remove options are used, then the --keep option is execute before
|
254
|
+
the --remove option.<br>
|
255
|
+
<br>
|
256
|
+
</li>
|
257
|
+
<li>--min-indv-meanDP <float></li>
|
258
|
+
<li>--max-indv-mean-DP <float><br>
|
259
|
+
<br>
|
260
|
+
Calculate the mean coverage on a per-individual basis. Only individuals
|
261
|
+
with coverage within the range specified by these options are included
|
262
|
+
in subsequent analyses.<br>
|
263
|
+
<br>
|
264
|
+
</li>
|
265
|
+
<li>--mind <float><br>
|
266
|
+
<br>
|
267
|
+
Specify the minimum call rate threshold for each individual. <br>
|
268
|
+
<br>
|
269
|
+
</li>
|
270
|
+
<li>--phased<br>
|
271
|
+
<br>
|
272
|
+
First excludes all individuals having all genotypes unphased, and <span style="text-decoration: underline;">subsequently excludes
|
273
|
+
all sites with unphased genotypes</span>. The remaining
|
274
|
+
data therefore consists of phased data only.
|
275
|
+
</li>
|
276
|
+
<li>--max-indv <int><br>
|
277
|
+
<br>
|
278
|
+
Randomly thins individuals so that only the specified number are retained.
|
279
|
+
</li>
|
280
|
+
</ul>
|
281
|
+
</div>
|
282
|
+
<div class="item">
|
283
|
+
<h1><a name="geno_filter" class="Q">Genotype Filters</a></h1>
|
284
|
+
<ul>
|
285
|
+
<li>--remove-filtered-geno-all</li>
|
286
|
+
<li>--remove-filtered-geno <string><br>
|
287
|
+
<br>
|
288
|
+
The first option removes all genotypes with a FILTER flag. The second option can be used to exclude genotypes with a specific filter flag.<br>
|
289
|
+
<br>
|
290
|
+
</li>
|
291
|
+
<li>--minGQ <float><br>
|
292
|
+
<br>
|
293
|
+
Exclude all genotypes with a quality below the threshold specified by
|
294
|
+
this option (GQ).<br>
|
295
|
+
<br>
|
296
|
+
</li>
|
297
|
+
<li>--minDP <float><br>
|
298
|
+
<br>
|
299
|
+
Exclude all genotypes with a sequencing depth below that specified by
|
300
|
+
this option (DP)</li>
|
301
|
+
</ul>
|
302
|
+
</div>
|
303
|
+
<div class="item">
|
304
|
+
<h1><a name="stats" class="Q">Output Statistics</a></h1>
|
305
|
+
<ul>
|
306
|
+
<li>--freq</li>
|
307
|
+
<li>--counts</li>
|
308
|
+
<li>--freq2</li>
|
309
|
+
<li>--counts2<br>
|
310
|
+
<br>
|
311
|
+
Output per-site frequency information. The --freq outputs the allele frequency in a
|
312
|
+
file with the suffix '.frq'. The --counts option outputs a similar file with the suffix '.frq.count',
|
313
|
+
that contains the raw allele counts at each site.<br>
|
314
|
+
The --freq2 and --count2 options are used to suppress allele information in the output file. In this case,
|
315
|
+
the order of the freqs/counts depends on the numbering in the VCF file.<br>
|
316
|
+
<br>
|
317
|
+
</li>
|
318
|
+
<li>--depth<br>
|
319
|
+
<br>
|
320
|
+
Generates a file containing the mean depth per individual. This file
|
321
|
+
has the suffix '.idepth'.<br>
|
322
|
+
<br>
|
323
|
+
</li>
|
324
|
+
<li>--site-depth</li>
|
325
|
+
<li>--site-mean-depth<br>
|
326
|
+
<br>
|
327
|
+
Generates a file containing the depth per site. The --site-depth option
|
328
|
+
outputs the depth for each site summed across individuals. This file
|
329
|
+
has the suffix '.ldepth'. Likewise, the --site-mean-depth outputs the
|
330
|
+
mean depth for each site, and the output file has the suffix
|
331
|
+
'.ldepth.mean'.<br>
|
332
|
+
<br>
|
333
|
+
</li>
|
334
|
+
<li>--geno-depth<br>
|
335
|
+
<br>
|
336
|
+
Generates
|
337
|
+
a (possibly very large) file containing the depth for each genotype in
|
338
|
+
the VCF file. Missing entries are given the value -1. The file has the suffix '.gdepth'.<br>
|
339
|
+
<br>
|
340
|
+
</li>
|
341
|
+
<li>--site-quality<br>
|
342
|
+
<br>
|
343
|
+
Generates a file containing the per-site SNP quality, as found in the QUAL column of the VCF file. This file
|
344
|
+
has the suffix '.lqual'.<br>
|
345
|
+
<br>
|
346
|
+
</li>
|
347
|
+
<li>--het<br>
|
348
|
+
<br>
|
349
|
+
Calculates
|
350
|
+
a measure of heterozygosity on a per-individual basis. Specfically, the
|
351
|
+
inbreeding coefficient, F, is estimated for each individual
|
352
|
+
using
|
353
|
+
a method of moments. The resulting file has the suffix '.het'.<br>
|
354
|
+
<br>
|
355
|
+
</li>
|
356
|
+
<li>--hardy<br>
|
357
|
+
<br>
|
358
|
+
Reports a p-value for each site from a Hardy-Weinberg
|
359
|
+
Equilibrium test (as
|
360
|
+
defined by Wigginton, Cutler and Abecasis (2005)). The resulting file
|
361
|
+
(with suffix '.hwe') also contains the Observed numbers of Homozygotes
|
362
|
+
and Heterozygotes and the corresponding Expected numbers under
|
363
|
+
HWE. <br>
|
364
|
+
<br>
|
365
|
+
</li>
|
366
|
+
<li>--missing<br>
|
367
|
+
<br>
|
368
|
+
Generates
|
369
|
+
two files reporting the missingness on a per-individual and per-site
|
370
|
+
basis. The two files have suffixes '.imiss' and '.lmiss' respectively.<br>
|
371
|
+
<br>
|
372
|
+
</li>
|
373
|
+
<li>--hap-r2</li>
|
374
|
+
<li>--geno-r2</li>
|
375
|
+
<li>--ld-window <int></li>
|
376
|
+
<li>--ld-window-bp <int></li>
|
377
|
+
<li>--min-r2 <float><br>
|
378
|
+
<br>
|
379
|
+
These options are used to report Linkage Disequilibrium (LD) statistics
|
380
|
+
as summarised by the r<sup>2</sup>, D, and D' statistics. The --hap-r2
|
381
|
+
option informs vcftools to output a file reporting the r<sup>2</sup>, D, and D'
|
382
|
+
statistics using phased haplotypes. These are the traditional measures of LD often reported in the population genetics literature.
|
383
|
+
If phased haplotypes are unavailable then the --geno-r2 option may be used, which calculates the squared correlation coefficient
|
384
|
+
between genotypes encoded as 0, 1 and 2 to represent the number of non-reference alleles in each individual. This is the same as the LD measure reported by PLINK. The D and D' statistics are only available for phased genotypes.
|
385
|
+
The haplotype version outputs a file with the suffix '.hap.ld', whereas the genotype version outputs a file with the suffix '.geno.ld'. The haplotype version implies the option --phased.
|
386
|
+
<br><br>The --ld-window option defines the maximum number of SNPs between the SNPs being tested for LD.
|
387
|
+
Likewise, the --ld-window-bp option can be used to define the maximum
|
388
|
+
physical separation (in base-pairs) of SNPs included in the LD calculation. Finally,
|
389
|
+
the --min-r2 sets a minimum value for r<sup>2</sup>
|
390
|
+
below which the LD statistic is not reported.<br>
|
391
|
+
<br>
|
392
|
+
</li>
|
393
|
+
<li>--SNPdensity <int><br>
|
394
|
+
<br>
|
395
|
+
Calculates the number and density of SNPs in bins of size defined by
|
396
|
+
this option. The resulting output file has the suffix '.snpden'.<br>
|
397
|
+
<br>
|
398
|
+
</li>
|
399
|
+
<li>--TsTv <int><br>
|
400
|
+
<br>
|
401
|
+
Calculates the Transition / Transversion ratio in bins of size defined by
|
402
|
+
this option. Only uses bi-allelic SNPs. The resulting output file has the suffix '.TsTv'. A summary is also supplied in a file with the suffix '.TsTv.summary'.<br>
|
403
|
+
<br>
|
404
|
+
</li>
|
405
|
+
<li>--TsTv-by-count<br>
|
406
|
+
<br>
|
407
|
+
Calculates the Transition / Transversion ratio as a function of alternative allele count. Only uses bi-allelic SNPs.
|
408
|
+
The resulting output file has the suffix '.TsTv.count'. <br>
|
409
|
+
<br>
|
410
|
+
</li>
|
411
|
+
<li>--TsTv-by-qual<br>
|
412
|
+
<br>
|
413
|
+
Calculates the Transition / Transversion ratio as a function of SNP quality threshold. Only uses bi-allelic SNPs.
|
414
|
+
The resulting output file has the suffix '.TsTv.qual'. <br>
|
415
|
+
<br>
|
416
|
+
</li>
|
417
|
+
<li>--FILTER-summary<br>
|
418
|
+
<br>
|
419
|
+
Generates a summary of the number of SNPs and Ts/Tv ratio for each FILTER category. The output file
|
420
|
+
has the suffix '.FILTER.summary. <br>
|
421
|
+
<br>
|
422
|
+
</li>
|
423
|
+
<li>--filtered-sites<br>
|
424
|
+
<br>
|
425
|
+
Creates two files listing sites that have been kept or removed after filtering. The first file, with
|
426
|
+
suffix '.kept.sites', lists sites kept by vcftools after filters have been applied. The second file,
|
427
|
+
with the suffix '.removed.sites', list sites removed by the applied filters.<br>
|
428
|
+
<br>
|
429
|
+
</li>
|
430
|
+
<li>--singletons<br>
|
431
|
+
<br>
|
432
|
+
This option will generate a file detailing the location of singletons,
|
433
|
+
and the individual they occur in. The file reports both true
|
434
|
+
singletons, and private doubletons (i.e. SNPs where the minor allele
|
435
|
+
only occurs in a single individual and that individual is homozygotic
|
436
|
+
for that allele). The output file has the suffix '.singletons'.<br>
|
437
|
+
<br>
|
438
|
+
</li>
|
439
|
+
<li>--site-pi</li>
|
440
|
+
<li>--window-pi <int><br>
|
441
|
+
<br>
|
442
|
+
These options are used to estimate levels of <a href="http://en.wikipedia.org/wiki/Nucleotide_diversity">nucleotide diversity</a>. The first
|
443
|
+
option does this on a per-site basis, and the output file has the suffix '.sites.pi'. The second option calculates the nucleotide diversity
|
444
|
+
in windows, with the window size defined in the option argument. <br>
|
445
|
+
<br>
|
446
|
+
</li>
|
447
|
+
</ul>
|
448
|
+
</div>
|
449
|
+
<div class="item">
|
450
|
+
<h1><a name="other_format" class="Q">Output in Other Formats</a></h1>
|
451
|
+
<ul>
|
452
|
+
<li>--012<br>
|
453
|
+
<br>
|
454
|
+
This option outputs the genotypes as a large matrix. Three files are
|
455
|
+
produced. The first, with suffix '.012', contains the genotypes of each
|
456
|
+
individual on a separate line. Genotypes are represented as 0, 1 and 2,
|
457
|
+
where the number represent that number of non-reference alleles. Missing
|
458
|
+
genotypes are represented by -1. The second file, with suffix '.012.indv'
|
459
|
+
details the individuals included in the main file. The third file, with
|
460
|
+
suffix '.012.pos' details the site locations included in the main file.<br>
|
461
|
+
<br>
|
462
|
+
</li>
|
463
|
+
<li>--IMPUTE<br>
|
464
|
+
<br>
|
465
|
+
This option outputs phased haplotypes in <a href="http://mathgen.stats.ox.ac.uk/impute/impute.html">IMPUTE</a>
|
466
|
+
reference-panel format. As IMPUTE requires phased data, using this
|
467
|
+
option also implies --phased. Unphased individuals and genotypes are
|
468
|
+
therefore excluded. Only bi-allelic sites are included in the output. Using this option generates three files. The IMPUTE
|
469
|
+
haplotype file has the suffix '.impute.hap', and the IMPUTE legend file
|
470
|
+
has the suffix '.impute.hap.legend'. The third file, with suffix
|
471
|
+
'.impute.hap.indv', details the individuals included in the haplotype
|
472
|
+
file, although this file is not needed by IMPUTE.<br>
|
473
|
+
<br>
|
474
|
+
</li>
|
475
|
+
<li>--ldhat<br>
|
476
|
+
<li>--ldhat-geno<br>
|
477
|
+
<br>
|
478
|
+
These options output data in <a href="http://www.stats.ox.ac.uk/%7Emcvean/LDhat/">LDhat</a>
|
479
|
+
format. Use of these options also require the --chr option to
|
480
|
+
by used. The --ldhat option outputs phased data only, and therefore also implies --phased, leading to unphased
|
481
|
+
individuals and genotypes being excluded. Alternatively, the --ldhat-geno option treats all of the data as unphased, and therefore
|
482
|
+
outputs LDhat files in genotype/unphased format. In either case, two files are
|
483
|
+
generated with the suffixes '.ldhat.sites'
|
484
|
+
and '.ldhat.locs', which correspond to the
|
485
|
+
LDhat 'sites' and 'locs' input files respectively.<br>
|
486
|
+
<br>
|
487
|
+
</li>
|
488
|
+
<li>--BEAGLE-GL<br>
|
489
|
+
<br>
|
490
|
+
This option outputs genotype likelihood information for input into the
|
491
|
+
<a href="http://faculty.washington.edu/browning/beagle/beagle.html">BEAGLE</a> program.
|
492
|
+
This option requires the VCF file to contain the FORMAT GL tag, which can generally be output by
|
493
|
+
SNP callers such as <a href="http://www.broadinstitute.org/gsa/wiki/index.php/The_Genome_Analysis_Toolkit">the GATK</a>.
|
494
|
+
Use of this option requires a chromosome to be specified via the --chr option. The resulting output
|
495
|
+
file (with the suffix '.BEAGLE.GL') contains genotype likelihoods for biallelic sites, and is
|
496
|
+
suitable for input into BEAGLE via the 'like=' argument.<br>
|
497
|
+
<br>
|
498
|
+
</li>
|
499
|
+
<li>--plink<br>
|
500
|
+
<br>
|
501
|
+
This option outputs the genotype data in <a href="http://pngu.mgh.harvard.edu/%7Epurcell/plink/">PLINK</a>
|
502
|
+
PED format. Two files are generated, with suffixes '.ped' and '.map'. Note that only bi-allelic loci will be output.
|
503
|
+
Further details of these files can be found in the <a href="http://pngu.mgh.harvard.edu/%7Epurcell/plink/data.shtml#ped">PLINK
|
504
|
+
documentation</a>. <br>
|
505
|
+
<br>Note: This option can be very slow on large datasets. Using the --chr option to divide up the dataset is advised.<br>
|
506
|
+
<br>
|
507
|
+
</li>
|
508
|
+
<li>--plink-tped<br>
|
509
|
+
<br>
|
510
|
+
The --plink option above can be extremely slow on large datasets. An alternative that might be considerably quicker is
|
511
|
+
to output in the <a href="http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#tr">PLINK transposed format</a>.
|
512
|
+
This can be achieved using the --plink-tped option, which produces two files with suffixes '.tped' and '.tfam'.
|
513
|
+
<br>
|
514
|
+
<br>
|
515
|
+
</li>
|
516
|
+
<li>--recode<br>
|
517
|
+
<br>
|
518
|
+
The --recode option is used to generate a VCF file from the input VCF file
|
519
|
+
having applied the options specified by the user. The output file has
|
520
|
+
the suffix '.recode.vcf'.
|
521
|
+
<br><br>
|
522
|
+
By default, the INFO fields are removed from the output file, as the INFO values may be invalidated
|
523
|
+
by the recoding (e.g. the total depth may need to be recalculated if individuals are removed).
|
524
|
+
This default functionality can be overridden by using the --recode-INFO <string> option, where <string>
|
525
|
+
defines the INFO key to keep in the output file. The --recode-INFO flag can be used multiple times.
|
526
|
+
Alternatively, the option --recode-INFO-all can be used to retain all INFO fields.
|
527
|
+
</li>
|
528
|
+
</ul>
|
529
|
+
</div>
|
530
|
+
<div class="item">
|
531
|
+
<h1><a name="misc" class="Q">Miscellaneous</a></h1>
|
532
|
+
<ul>
|
533
|
+
<li>--extract-FORMAT-info <string><br>
|
534
|
+
<br>
|
535
|
+
Extract information from the genotype fields in the VCF file relating to a specfied FORMAT identifier. For example,
|
536
|
+
using the option '--extract-FORMAT-info GT' would extract the all of the GT (i.e. Genotype) entries.
|
537
|
+
The resulting output file has the suffix '.<FORMAT_ID>.FORMAT'.
|
538
|
+
<br>
|
539
|
+
<br>
|
540
|
+
<li>--get-INFO <string><br>
|
541
|
+
<br>
|
542
|
+
This option is used to extract information from the INFO field in the
|
543
|
+
VCF file. The <string> argument specifies the INFO tag to
|
544
|
+
be extracted, and the option can be used multiple times in order to
|
545
|
+
extract multiple INFO entries. The resulting file, with suffix '.INFO',
|
546
|
+
contains the required INFO information in a tab-separated table. For
|
547
|
+
example, to extract the NS and DB flags, one would use the command:
|
548
|
+
<br>
|
549
|
+
<br>
|
550
|
+
<p class="codebox">vcftools --vcf file1.vcf --get-INFO NS --get-INFO DB </p>
|
551
|
+
<br>
|
552
|
+
</li>
|
553
|
+
</ul>
|
554
|
+
</div>
|
555
|
+
|
556
|
+
<div class="item">
|
557
|
+
<h1><a name="diff" class="Q">VCF File Comparison Options</a></h1>
|
558
|
+
<p><b>The file comparison options are currently in a state of flux and likely buggy. If you find a bug,
|
559
|
+
please report it. Note that genotype-level filters are not supported in these options.</b><br>
|
560
|
+
</p>
|
561
|
+
<ul>
|
562
|
+
<li>--diff <filename><br>
|
563
|
+
<li>--gzdiff <filename><br>
|
564
|
+
<br>
|
565
|
+
Select a VCF file for comparison with the file specified by the --vcf option. Outputs two files
|
566
|
+
describing the sites and individuals common / unique to each file. These files have the suffixes
|
567
|
+
'.diff.sites_in_files' and '.diff.indv_in_files' respectively. The --gzdiff version can be used to
|
568
|
+
read compressed VCF files.
|
569
|
+
<br><br>
|
570
|
+
</li>
|
571
|
+
<li>--diff-site-discordance<br>
|
572
|
+
<br>
|
573
|
+
Used in conjuction with the --diff option to calculate discordance on a site by site basis. The
|
574
|
+
resulting output file has the suffix '.diff.sites'.
|
575
|
+
<br><br>
|
576
|
+
</li>
|
577
|
+
<li>--diff-indv-discordance<br>
|
578
|
+
<br>
|
579
|
+
Used in conjuction with the --diff option to calculate discordance on a per-individual basis. The
|
580
|
+
resulting output file has the suffix '.diff.indv'.
|
581
|
+
<br><br>
|
582
|
+
</li>
|
583
|
+
<li>--diff-discordance-matrix<br>
|
584
|
+
<br>
|
585
|
+
Used in conjuction with the --diff option to calculate a discordance matrix. This option only works
|
586
|
+
with bi-allelic loci with matching alleles that are present in both files. The resulting output file
|
587
|
+
has the suffix '.diff.discordance.matrix'.
|
588
|
+
<br><br>
|
589
|
+
</li>
|
590
|
+
<li>--diff-switch-error<br>
|
591
|
+
<br>
|
592
|
+
Used in conjuction with the --diff option to calculate phasing errors
|
593
|
+
(specifically '<a href="http://www.cell.com/AJHG/retrieve/pii/S0002929707619788">switch errors</a>').
|
594
|
+
This option generates two output files describing switch errors found between sites, and the average switch
|
595
|
+
error per individual. These two files have the suffixes '.diff.switch' and '.diff.indv.switch' respectively.
|
596
|
+
<br><br>
|
597
|
+
</li>
|
598
|
+
|
599
|
+
</ul>
|
600
|
+
</div>
|
601
|
+
|
602
|
+
<div class="item">
|
603
|
+
<h1><a name="development" class="Q">Options still in development</a></h1>
|
604
|
+
<p>The following options are yet to be finalised, are likely to contain bugs, and are likely to change in the future.<br>
|
605
|
+
</p>
|
606
|
+
<ul>
|
607
|
+
<li>--fst <filename><br>
|
608
|
+
<li>--gzfst <filename><br>
|
609
|
+
<br>
|
610
|
+
Calculate F<sub>ST</sub> for a pair of VCF files, with the second file being specified by this option.
|
611
|
+
F<sub>ST</sub> is currently calculated using the formula described in the supplementary material of the <a href="http://www.ncbi.nlm.nih.gov/pubmed/16255080">Phase I HapMap paper</a>.
|
612
|
+
Currently, only pairwise F<sub>ST</sub> calculations are supported, although this will likely change in the future.
|
613
|
+
The --gzfst option can be used to read compressed VCF files.
|
614
|
+
This option is likely to be depreciated in future releases, and it is recommended that you use the --fst-pop option instead.
|
615
|
+
<br>
|
616
|
+
<br>
|
617
|
+
</li>
|
618
|
+
<li>--fst-pop <filename><br>
|
619
|
+
<br>
|
620
|
+
If you do not have separate files for each population, you can use this option to calculate F<sub>ST</sub>.
|
621
|
+
This option is used to provide a file which lists the individuals in each population, and is used multiple times to specific multiple populations.
|
622
|
+
Unlike the --fst option, this option can be used to calculate F<sub>ST</sub> for more than two populations.
|
623
|
+
<br>
|
624
|
+
<br>
|
625
|
+
</li>
|
626
|
+
<!--
|
627
|
+
<li>--pca <br>
|
628
|
+
<br>
|
629
|
+
Perform Principal Component Analysis.
|
630
|
+
<br>
|
631
|
+
<br>
|
632
|
+
</li>
|
633
|
+
-->
|
634
|
+
<li>--LROH <br>
|
635
|
+
<br>
|
636
|
+
Identify Long Runs of Homozygosity.
|
637
|
+
<br>
|
638
|
+
<br>
|
639
|
+
</li>
|
640
|
+
<li>--relatedness <br>
|
641
|
+
<br>
|
642
|
+
Output Individual Relatedness Statistics.
|
643
|
+
<br>
|
644
|
+
<br>
|
645
|
+
</li>
|
646
|
+
<li>--TajimaD <int> <br>
|
647
|
+
<br>
|
648
|
+
Output Tajima's D statistic in bins of size <int>.
|
649
|
+
<br>
|
650
|
+
<br>
|
651
|
+
</li>
|
652
|
+
</ul>
|
653
|
+
</div>
|
654
|
+
|