ngs_server 0.1 → 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/ngs_server +72 -50
- data/ext/bamtools/extconf.rb +3 -3
- data/ext/vcftools/Makefile +28 -0
- data/ext/vcftools/README.txt +36 -0
- data/ext/vcftools/cpp/.svn/all-wcprops +125 -0
- data/ext/vcftools/cpp/.svn/dir-prop-base +6 -0
- data/ext/vcftools/cpp/.svn/entries +708 -0
- data/ext/vcftools/cpp/.svn/text-base/Makefile.svn-base +46 -0
- data/ext/vcftools/cpp/.svn/text-base/dgeev.cpp.svn-base +146 -0
- data/ext/vcftools/cpp/.svn/text-base/dgeev.h.svn-base +43 -0
- data/ext/vcftools/cpp/.svn/text-base/output_log.cpp.svn-base +79 -0
- data/ext/vcftools/cpp/.svn/text-base/output_log.h.svn-base +34 -0
- data/ext/vcftools/cpp/.svn/text-base/parameters.cpp.svn-base +535 -0
- data/ext/vcftools/cpp/.svn/text-base/parameters.h.svn-base +154 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry.cpp.svn-base +497 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry.h.svn-base +190 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry_getters.cpp.svn-base +421 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry_setters.cpp.svn-base +482 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file.cpp.svn-base +495 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file.h.svn-base +184 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_diff.cpp.svn-base +1282 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_filters.cpp.svn-base +1215 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_format_convert.cpp.svn-base +1138 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_index.cpp.svn-base +171 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_output.cpp.svn-base +3012 -0
- data/ext/vcftools/cpp/.svn/text-base/vcftools.cpp.svn-base +107 -0
- data/ext/vcftools/cpp/.svn/text-base/vcftools.h.svn-base +25 -0
- data/ext/vcftools/cpp/Makefile +46 -0
- data/ext/vcftools/cpp/dgeev.cpp +146 -0
- data/ext/vcftools/cpp/dgeev.h +43 -0
- data/ext/vcftools/cpp/output_log.cpp +79 -0
- data/ext/vcftools/cpp/output_log.h +34 -0
- data/ext/vcftools/cpp/parameters.cpp +535 -0
- data/ext/vcftools/cpp/parameters.h +154 -0
- data/ext/vcftools/cpp/vcf_entry.cpp +497 -0
- data/ext/vcftools/cpp/vcf_entry.h +190 -0
- data/ext/vcftools/cpp/vcf_entry_getters.cpp +421 -0
- data/ext/vcftools/cpp/vcf_entry_setters.cpp +482 -0
- data/ext/vcftools/cpp/vcf_file.cpp +495 -0
- data/ext/vcftools/cpp/vcf_file.h +184 -0
- data/ext/vcftools/cpp/vcf_file_diff.cpp +1282 -0
- data/ext/vcftools/cpp/vcf_file_filters.cpp +1215 -0
- data/ext/vcftools/cpp/vcf_file_format_convert.cpp +1138 -0
- data/ext/vcftools/cpp/vcf_file_index.cpp +171 -0
- data/ext/vcftools/cpp/vcf_file_output.cpp +3012 -0
- data/ext/vcftools/cpp/vcftools.cpp +107 -0
- data/ext/vcftools/cpp/vcftools.h +25 -0
- data/ext/vcftools/examples/.svn/all-wcprops +185 -0
- data/ext/vcftools/examples/.svn/dir-prop-base +6 -0
- data/ext/vcftools/examples/.svn/entries +1048 -0
- data/ext/vcftools/examples/.svn/prop-base/perl-api-1.pl.svn-base +5 -0
- data/ext/vcftools/examples/.svn/text-base/annotate-test.vcf.svn-base +37 -0
- data/ext/vcftools/examples/.svn/text-base/annotate.out.svn-base +23 -0
- data/ext/vcftools/examples/.svn/text-base/annotate.txt.svn-base +7 -0
- data/ext/vcftools/examples/.svn/text-base/annotate2.out.svn-base +52 -0
- data/ext/vcftools/examples/.svn/text-base/annotate3.out.svn-base +23 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-a-3.3.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-a.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-b-3.3.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-b.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test.out.svn-base +53 -0
- data/ext/vcftools/examples/.svn/text-base/concat-a.vcf.svn-base +21 -0
- data/ext/vcftools/examples/.svn/text-base/concat-b.vcf.svn-base +13 -0
- data/ext/vcftools/examples/.svn/text-base/concat-c.vcf.svn-base +19 -0
- data/ext/vcftools/examples/.svn/text-base/concat.out.svn-base +39 -0
- data/ext/vcftools/examples/.svn/text-base/invalid-4.0.vcf.svn-base +31 -0
- data/ext/vcftools/examples/.svn/text-base/isec-n2-test.vcf.out.svn-base +19 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test-a.vcf.svn-base +17 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test-b.vcf.svn-base +17 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test-c.vcf.svn-base +15 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test.vcf.out.svn-base +31 -0
- data/ext/vcftools/examples/.svn/text-base/perl-api-1.pl.svn-base +46 -0
- data/ext/vcftools/examples/.svn/text-base/query-test.out.svn-base +6 -0
- data/ext/vcftools/examples/.svn/text-base/shuffle-test.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/subset.SNPs.out.svn-base +10 -0
- data/ext/vcftools/examples/.svn/text-base/subset.indels.out.svn-base +18 -0
- data/ext/vcftools/examples/.svn/text-base/subset.vcf.svn-base +21 -0
- data/ext/vcftools/examples/.svn/text-base/valid-3.3.vcf.svn-base +30 -0
- data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.stats.svn-base +104 -0
- data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.svn-base +34 -0
- data/ext/vcftools/examples/.svn/text-base/valid-4.1.vcf.svn-base +37 -0
- data/ext/vcftools/examples/annotate-test.vcf +37 -0
- data/ext/vcftools/examples/annotate.out +23 -0
- data/ext/vcftools/examples/annotate.txt +7 -0
- data/ext/vcftools/examples/annotate2.out +52 -0
- data/ext/vcftools/examples/annotate3.out +23 -0
- data/ext/vcftools/examples/cmp-test-a-3.3.vcf +12 -0
- data/ext/vcftools/examples/cmp-test-a.vcf +12 -0
- data/ext/vcftools/examples/cmp-test-b-3.3.vcf +12 -0
- data/ext/vcftools/examples/cmp-test-b.vcf +12 -0
- data/ext/vcftools/examples/cmp-test.out +53 -0
- data/ext/vcftools/examples/concat-a.vcf +21 -0
- data/ext/vcftools/examples/concat-b.vcf +13 -0
- data/ext/vcftools/examples/concat-c.vcf +19 -0
- data/ext/vcftools/examples/concat.out +39 -0
- data/ext/vcftools/examples/invalid-4.0.vcf +31 -0
- data/ext/vcftools/examples/isec-n2-test.vcf.out +19 -0
- data/ext/vcftools/examples/merge-test-a.vcf +17 -0
- data/ext/vcftools/examples/merge-test-b.vcf +17 -0
- data/ext/vcftools/examples/merge-test-c.vcf +15 -0
- data/ext/vcftools/examples/merge-test.vcf.out +31 -0
- data/ext/vcftools/examples/perl-api-1.pl +46 -0
- data/ext/vcftools/examples/query-test.out +6 -0
- data/ext/vcftools/examples/shuffle-test.vcf +12 -0
- data/ext/vcftools/examples/subset.SNPs.out +10 -0
- data/ext/vcftools/examples/subset.indels.out +18 -0
- data/ext/vcftools/examples/subset.vcf +21 -0
- data/ext/vcftools/examples/valid-3.3.vcf +30 -0
- data/ext/vcftools/examples/valid-4.0.vcf +34 -0
- data/ext/vcftools/examples/valid-4.0.vcf.stats +104 -0
- data/ext/vcftools/examples/valid-4.1.vcf +37 -0
- data/ext/vcftools/extconf.rb +2 -0
- data/ext/vcftools/perl/.svn/all-wcprops +149 -0
- data/ext/vcftools/perl/.svn/entries +844 -0
- data/ext/vcftools/perl/.svn/prop-base/fill-aa.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/fill-an-ac.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/fill-ref-md5.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/tab-to-vcf.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/test.t.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-annotate.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-compare.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-concat.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-convert.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-fix-newlines.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-isec.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-merge.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-query.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-shuffle-cols.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-sort.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-stats.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-subset.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-to-tab.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-validator.svn-base +5 -0
- data/ext/vcftools/perl/.svn/text-base/ChangeLog.svn-base +84 -0
- data/ext/vcftools/perl/.svn/text-base/FaSlice.pm.svn-base +214 -0
- data/ext/vcftools/perl/.svn/text-base/Makefile.svn-base +12 -0
- data/ext/vcftools/perl/.svn/text-base/Vcf.pm.svn-base +2853 -0
- data/ext/vcftools/perl/.svn/text-base/VcfStats.pm.svn-base +681 -0
- data/ext/vcftools/perl/.svn/text-base/fill-aa.svn-base +103 -0
- data/ext/vcftools/perl/.svn/text-base/fill-an-ac.svn-base +56 -0
- data/ext/vcftools/perl/.svn/text-base/fill-ref-md5.svn-base +204 -0
- data/ext/vcftools/perl/.svn/text-base/tab-to-vcf.svn-base +92 -0
- data/ext/vcftools/perl/.svn/text-base/test.t.svn-base +376 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-annotate.svn-base +1099 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-compare.svn-base +1193 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-concat.svn-base +310 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-convert.svn-base +180 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-fix-newlines.svn-base +97 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-isec.svn-base +660 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-merge.svn-base +577 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-query.svn-base +272 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-shuffle-cols.svn-base +89 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-sort.svn-base +79 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-stats.svn-base +160 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-subset.svn-base +206 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-to-tab.svn-base +112 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-validator.svn-base +145 -0
- data/ext/vcftools/perl/ChangeLog +84 -0
- data/ext/vcftools/perl/FaSlice.pm +214 -0
- data/ext/vcftools/perl/Makefile +12 -0
- data/ext/vcftools/perl/Vcf.pm +2853 -0
- data/ext/vcftools/perl/VcfStats.pm +681 -0
- data/ext/vcftools/perl/fill-aa +103 -0
- data/ext/vcftools/perl/fill-an-ac +56 -0
- data/ext/vcftools/perl/fill-ref-md5 +204 -0
- data/ext/vcftools/perl/tab-to-vcf +92 -0
- data/ext/vcftools/perl/test.t +376 -0
- data/ext/vcftools/perl/vcf-annotate +1099 -0
- data/ext/vcftools/perl/vcf-compare +1193 -0
- data/ext/vcftools/perl/vcf-concat +310 -0
- data/ext/vcftools/perl/vcf-convert +180 -0
- data/ext/vcftools/perl/vcf-fix-newlines +97 -0
- data/ext/vcftools/perl/vcf-isec +660 -0
- data/ext/vcftools/perl/vcf-merge +577 -0
- data/ext/vcftools/perl/vcf-query +286 -0
- data/ext/vcftools/perl/vcf-shuffle-cols +89 -0
- data/ext/vcftools/perl/vcf-sort +79 -0
- data/ext/vcftools/perl/vcf-stats +160 -0
- data/ext/vcftools/perl/vcf-subset +206 -0
- data/ext/vcftools/perl/vcf-to-tab +112 -0
- data/ext/vcftools/perl/vcf-validator +145 -0
- data/ext/vcftools/website/.svn/all-wcprops +41 -0
- data/ext/vcftools/website/.svn/entries +238 -0
- data/ext/vcftools/website/.svn/prop-base/VCF-poster.pdf.svn-base +5 -0
- data/ext/vcftools/website/.svn/prop-base/favicon.ico.svn-base +5 -0
- data/ext/vcftools/website/.svn/prop-base/favicon.png.svn-base +5 -0
- data/ext/vcftools/website/.svn/text-base/Makefile.svn-base +6 -0
- data/ext/vcftools/website/.svn/text-base/README.svn-base +2 -0
- data/ext/vcftools/website/.svn/text-base/VCF-poster.pdf.svn-base +0 -0
- data/ext/vcftools/website/.svn/text-base/default.css.svn-base +250 -0
- data/ext/vcftools/website/.svn/text-base/favicon.ico.svn-base +0 -0
- data/ext/vcftools/website/.svn/text-base/favicon.png.svn-base +0 -0
- data/ext/vcftools/website/Makefile +6 -0
- data/ext/vcftools/website/README +2 -0
- data/ext/vcftools/website/VCF-poster.pdf +0 -0
- data/ext/vcftools/website/default.css +250 -0
- data/ext/vcftools/website/favicon.ico +0 -0
- data/ext/vcftools/website/favicon.png +0 -0
- data/ext/vcftools/website/img/.svn/all-wcprops +53 -0
- data/ext/vcftools/website/img/.svn/entries +300 -0
- data/ext/vcftools/website/img/.svn/prop-base/bg.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/bgcode.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/bgcontainer.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/bgul.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/header.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/li.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/quote.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/search.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/text-base/bg.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/bgcode.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/bgcontainer.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/bgul.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/header.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/li.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/quote.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/search.gif.svn-base +0 -0
- data/ext/vcftools/website/img/bg.gif +0 -0
- data/ext/vcftools/website/img/bgcode.gif +0 -0
- data/ext/vcftools/website/img/bgcontainer.gif +0 -0
- data/ext/vcftools/website/img/bgul.gif +0 -0
- data/ext/vcftools/website/img/header.gif +0 -0
- data/ext/vcftools/website/img/li.gif +0 -0
- data/ext/vcftools/website/img/quote.gif +0 -0
- data/ext/vcftools/website/img/search.gif +0 -0
- data/ext/vcftools/website/src/.svn/all-wcprops +53 -0
- data/ext/vcftools/website/src/.svn/entries +300 -0
- data/ext/vcftools/website/src/.svn/text-base/docs.inc.svn-base +202 -0
- data/ext/vcftools/website/src/.svn/text-base/index.inc.svn-base +52 -0
- data/ext/vcftools/website/src/.svn/text-base/index.php.svn-base +80 -0
- data/ext/vcftools/website/src/.svn/text-base/license.inc.svn-base +27 -0
- data/ext/vcftools/website/src/.svn/text-base/links.inc.svn-base +13 -0
- data/ext/vcftools/website/src/.svn/text-base/options.inc.svn-base +654 -0
- data/ext/vcftools/website/src/.svn/text-base/perl_module.inc.svn-base +249 -0
- data/ext/vcftools/website/src/.svn/text-base/specs.inc.svn-base +18 -0
- data/ext/vcftools/website/src/docs.inc +202 -0
- data/ext/vcftools/website/src/index.inc +52 -0
- data/ext/vcftools/website/src/index.php +80 -0
- data/ext/vcftools/website/src/license.inc +27 -0
- data/ext/vcftools/website/src/links.inc +13 -0
- data/ext/vcftools/website/src/options.inc +654 -0
- data/ext/vcftools/website/src/perl_module.inc +249 -0
- data/ext/vcftools/website/src/specs.inc +18 -0
- data/lib/config.ru +9 -0
- data/lib/ngs_server/add.rb +9 -0
- data/lib/ngs_server/version.rb +1 -1
- data/lib/ngs_server.rb +55 -3
- data/ngs_server.gemspec +5 -2
- metadata +296 -6
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* vcf_entry.h
|
|
3
|
+
*
|
|
4
|
+
* Created on: Aug 19, 2009
|
|
5
|
+
* Author: Adam Auton
|
|
6
|
+
* ($Revision: 230 $)
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
#ifndef VCF_ENTRY_H_
|
|
10
|
+
#define VCF_ENTRY_H_
|
|
11
|
+
|
|
12
|
+
#include <algorithm>
|
|
13
|
+
#include <iostream>
|
|
14
|
+
#include <iterator>
|
|
15
|
+
#include <limits>
|
|
16
|
+
#include <map>
|
|
17
|
+
#include <set>
|
|
18
|
+
#include <sstream>
|
|
19
|
+
#include <string>
|
|
20
|
+
#include <vector>
|
|
21
|
+
|
|
22
|
+
#include <cassert>
|
|
23
|
+
|
|
24
|
+
#include "output_log.h"
|
|
25
|
+
|
|
26
|
+
using namespace std;
|
|
27
|
+
|
|
28
|
+
enum Type_enum {Integer=0, Float=1, Character=2, String=3, Flag=4};
|
|
29
|
+
|
|
30
|
+
class Field_description
|
|
31
|
+
{
|
|
32
|
+
public:
|
|
33
|
+
string ID;
|
|
34
|
+
int N_entries;
|
|
35
|
+
Type_enum Type;
|
|
36
|
+
string Description;
|
|
37
|
+
|
|
38
|
+
Field_description() : ID(""), N_entries(0), Type(Integer), Description("") {};
|
|
39
|
+
~Field_description() {};
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
class vcf_entry {
|
|
43
|
+
public:
|
|
44
|
+
vcf_entry(const unsigned int N_indv);
|
|
45
|
+
vcf_entry(const unsigned int N_indv, const string &data_line);
|
|
46
|
+
~vcf_entry();
|
|
47
|
+
|
|
48
|
+
const unsigned int N_indv;
|
|
49
|
+
|
|
50
|
+
void parse_basic_entry(bool parse_ALT=false, bool parse_FILTER=false, bool parse_INFO=false);
|
|
51
|
+
void parse_full_entry(bool parse_FORMAT=true);
|
|
52
|
+
void parse_genotype_entry(unsigned int indv, bool GT=false, bool GQ=false, bool DP=false, bool FT=false);
|
|
53
|
+
void parse_genotype_entries(bool GT=false, bool GQ=false, bool DP=false, bool FT=false);
|
|
54
|
+
|
|
55
|
+
void reset(const string &vcf_data_line);
|
|
56
|
+
|
|
57
|
+
string get_CHROM() const;
|
|
58
|
+
void get_CHROM(string &out) const;
|
|
59
|
+
int get_POS() const;
|
|
60
|
+
string get_ID() const;
|
|
61
|
+
string get_REF() const;
|
|
62
|
+
string get_ALT() const;
|
|
63
|
+
string get_ALT_allele(int allele_num) const;
|
|
64
|
+
void get_allele(int allele_num, string &out) const;
|
|
65
|
+
void get_alleles_vector(vector<string> &out) const;
|
|
66
|
+
string get_FILTER() const;
|
|
67
|
+
void get_FILTER_vector(vector<string> &out) const;
|
|
68
|
+
double get_QUAL() const;
|
|
69
|
+
string get_INFO(const set<string> &INFO_to_keep) const;
|
|
70
|
+
string get_INFO_value(const string &key) const;
|
|
71
|
+
string get_FORMAT() const;
|
|
72
|
+
void get_indv_GENOTYPE_ids(unsigned int indv, pair<int, int> &out) const;
|
|
73
|
+
void get_indv_GENOTYPE_strings(unsigned int indv, pair<string, string> &out) const;
|
|
74
|
+
char get_indv_PHASE(unsigned int indv) const;
|
|
75
|
+
double get_indv_GQUALITY(unsigned int indv) const;
|
|
76
|
+
int get_indv_DEPTH(unsigned int indv) const;
|
|
77
|
+
void get_indv_GFILTER(unsigned int indv, string &out) const;
|
|
78
|
+
void get_indv_GFILTER_vector(unsigned int indv, vector<string> &out) const;
|
|
79
|
+
int get_indv_ploidy(unsigned int indv) const;
|
|
80
|
+
|
|
81
|
+
bool is_SNP() const;
|
|
82
|
+
bool is_biallelic_SNP() const;
|
|
83
|
+
bool is_diploid(const vector<bool> &include_indv, const vector<bool> &include_genotype) const;
|
|
84
|
+
|
|
85
|
+
void read_indv_generic_entry(unsigned int indv, const string &FORMAT_id, string &out);
|
|
86
|
+
bool FORMAT_id_exists(const string &FORMAT_id);
|
|
87
|
+
|
|
88
|
+
unsigned int get_N_alleles() const;
|
|
89
|
+
unsigned int get_N_chr(const vector<bool> &include_indv, const vector<bool> &include_genotype) const;
|
|
90
|
+
|
|
91
|
+
void set_CHROM(const string &in);
|
|
92
|
+
void set_POS(const int in);
|
|
93
|
+
void set_ID(const string &in);
|
|
94
|
+
void set_REF(const string &in);
|
|
95
|
+
void set_ALT(const string &in);
|
|
96
|
+
void set_QUAL(const double in);
|
|
97
|
+
void set_FILTER(const string &FILTER_str);
|
|
98
|
+
void set_FORMAT(const string &in);
|
|
99
|
+
void set_INFO(const string &INFO_str);
|
|
100
|
+
|
|
101
|
+
void add_ALT_allele(const string &in);
|
|
102
|
+
void add_FILTER_entry(const string &in);
|
|
103
|
+
void add_FORMAT_entry(const string &in, unsigned int pos);
|
|
104
|
+
|
|
105
|
+
void set_indv_GENOTYPE_and_PHASE(unsigned int indv, const string &in);
|
|
106
|
+
void set_indv_GENOTYPE_and_PHASE(unsigned int indv, const pair<string, string> &genotype, char phase);
|
|
107
|
+
void set_indv_GENOTYPE_and_PHASE(unsigned int indv, const pair<int, int> &genotype, char phase);
|
|
108
|
+
void set_indv_GENOTYPE_alleles(unsigned int indv, const pair<string, string> &in);
|
|
109
|
+
void set_indv_GENOTYPE_alleles(unsigned int indv, char a1, char a2);
|
|
110
|
+
void set_indv_GENOTYPE_ids(unsigned int indv, const pair<int, int> &in);
|
|
111
|
+
void set_indv_PHASE(unsigned int indv, char in);
|
|
112
|
+
void set_indv_GQUALITY(unsigned int indv, double in);
|
|
113
|
+
void set_indv_DEPTH(unsigned int indv, int in);
|
|
114
|
+
void set_indv_GFILTER(unsigned int indv, const string &in);
|
|
115
|
+
|
|
116
|
+
void add_indv_GFILTER(unsigned int indv, const string &in);
|
|
117
|
+
|
|
118
|
+
static void add_INFO_descriptor(const string &in);
|
|
119
|
+
static void add_FILTER_descriptor(const string &in);
|
|
120
|
+
static void add_FORMAT_descriptor(const string &in);
|
|
121
|
+
|
|
122
|
+
void print(ostream &out);
|
|
123
|
+
void print(ostream &out, const set<string> &INFO_to_keep, bool keep_all_INFO=false);
|
|
124
|
+
void print(ostream &out, const set<string> &INFO_to_keep, bool keep_all_INFO, const vector<bool> &include_indv, const vector<bool> &include_genotype);
|
|
125
|
+
|
|
126
|
+
void filter_genotypes_by_depth(vector<bool> &include_genotype_out, int min_depth, int max_depth);
|
|
127
|
+
void filter_genotypes_by_quality(vector<bool> &include_genotype_out, double min_genotype_quality);
|
|
128
|
+
void filter_genotypes_by_filter_status(vector<bool> &include_genotype_out, const set<string> &filter_flags_to_remove, bool remove_all = false);
|
|
129
|
+
|
|
130
|
+
void get_allele_counts(vector<int> &out, unsigned int &N_non_missing_chr_out, const vector<bool> &include_indv, const vector<bool> &include_genotype) const;
|
|
131
|
+
void get_genotype_counts(const vector<bool> &include_indv, const vector<bool> &include_genotype, unsigned int &out_N_hom1, unsigned int &out_N_het, unsigned int &out_N_hom2) const;
|
|
132
|
+
|
|
133
|
+
static double SNPHWE(int obs_hets, int obs_hom1, int obs_hom2);
|
|
134
|
+
private:
|
|
135
|
+
istringstream data_stream;
|
|
136
|
+
|
|
137
|
+
bool basic_parsed;
|
|
138
|
+
bool fully_parsed;
|
|
139
|
+
bool parsed_ALT;
|
|
140
|
+
bool parsed_FILTER;
|
|
141
|
+
bool parsed_INFO;
|
|
142
|
+
bool parsed_FORMAT;
|
|
143
|
+
|
|
144
|
+
string CHROM;
|
|
145
|
+
int POS;
|
|
146
|
+
string ID;
|
|
147
|
+
string REF;
|
|
148
|
+
vector<string> ALT;
|
|
149
|
+
double QUAL;
|
|
150
|
+
vector<string> FILTER;
|
|
151
|
+
bool passed_filters;
|
|
152
|
+
vector<pair<string, string> > INFO;
|
|
153
|
+
vector<string> FORMAT;
|
|
154
|
+
|
|
155
|
+
vector< pair<int,int> > GENOTYPE;
|
|
156
|
+
vector<int> ploidy;
|
|
157
|
+
vector<char> PHASE;
|
|
158
|
+
vector<double> GQUALITY;
|
|
159
|
+
vector<int> DEPTH;
|
|
160
|
+
vector< vector<string> > GFILTER;
|
|
161
|
+
|
|
162
|
+
vector<bool> parsed_GT;
|
|
163
|
+
vector<bool> parsed_GQ;
|
|
164
|
+
vector<bool> parsed_DP;
|
|
165
|
+
vector<bool> parsed_FT;
|
|
166
|
+
|
|
167
|
+
int GT_idx;
|
|
168
|
+
int GQ_idx;
|
|
169
|
+
int DP_idx;
|
|
170
|
+
int FT_idx;
|
|
171
|
+
|
|
172
|
+
string ALT_str, FILTER_str, INFO_str, FORMAT_str, QUAL_str;
|
|
173
|
+
vector<string> GENOTYPE_str;
|
|
174
|
+
|
|
175
|
+
map<string, unsigned int> FORMAT_to_idx;
|
|
176
|
+
|
|
177
|
+
static map<string, Field_description> INFO_map;
|
|
178
|
+
static map<string, string> FILTER_map;
|
|
179
|
+
static map<string, Field_description> FORMAT_map;
|
|
180
|
+
|
|
181
|
+
static int str2int(const string &in, const int missing_value=-1);
|
|
182
|
+
static double str2double(const string &in, const double missing_value=-1.0);
|
|
183
|
+
|
|
184
|
+
static string int2str(const int in, const int missing_value=-1);
|
|
185
|
+
static string double2str(const double in, const double missing_value=-1.0);
|
|
186
|
+
|
|
187
|
+
static void tokenize(const string &in, char token, vector<string> &out);
|
|
188
|
+
};
|
|
189
|
+
|
|
190
|
+
#endif /* VCF_ENTRY_H_ */
|
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* vcf_entry_getters.cpp
|
|
3
|
+
*
|
|
4
|
+
* Created on: Nov 11, 2009
|
|
5
|
+
* Author: Adam Auton
|
|
6
|
+
* ($Revision: 230 $)
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
#include "vcf_entry.h"
|
|
10
|
+
|
|
11
|
+
// Return the CHROMosome name
|
|
12
|
+
string vcf_entry::get_CHROM() const
|
|
13
|
+
{
|
|
14
|
+
return CHROM;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// Return the CHROMosome name
|
|
18
|
+
void vcf_entry::get_CHROM(string &out) const
|
|
19
|
+
{
|
|
20
|
+
out = CHROM;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
int vcf_entry::get_POS() const
|
|
24
|
+
{
|
|
25
|
+
return POS;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
string vcf_entry::get_ID() const
|
|
29
|
+
{
|
|
30
|
+
if (ID.size() == 0)
|
|
31
|
+
return ".";
|
|
32
|
+
return ID;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
string vcf_entry::get_REF() const
|
|
36
|
+
{
|
|
37
|
+
return REF;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
string vcf_entry::get_ALT() const
|
|
41
|
+
{
|
|
42
|
+
assert(parsed_ALT == true);
|
|
43
|
+
|
|
44
|
+
string out;
|
|
45
|
+
if (ALT.size() == 0)
|
|
46
|
+
out = ".";
|
|
47
|
+
else
|
|
48
|
+
{
|
|
49
|
+
out = ALT[0];
|
|
50
|
+
for (unsigned int ui=1; ui<ALT.size(); ui++)
|
|
51
|
+
out += "," + ALT[ui];
|
|
52
|
+
}
|
|
53
|
+
return out;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
bool vcf_entry::is_SNP() const
|
|
57
|
+
{
|
|
58
|
+
assert(parsed_ALT == true);
|
|
59
|
+
|
|
60
|
+
if (REF.size() != 1)
|
|
61
|
+
return false; // Reference isn't a single base
|
|
62
|
+
|
|
63
|
+
if (ALT.size() == 0)
|
|
64
|
+
return false; // No alternative allele
|
|
65
|
+
|
|
66
|
+
for (unsigned int ui=0; ui<ALT.size(); ui++)
|
|
67
|
+
if (ALT[ui].size() != 1)
|
|
68
|
+
return false; // Alternative allele isn't a single base
|
|
69
|
+
|
|
70
|
+
return true;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
bool vcf_entry::is_biallelic_SNP() const
|
|
74
|
+
{
|
|
75
|
+
assert(parsed_ALT == true);
|
|
76
|
+
|
|
77
|
+
if (REF.size() != 1)
|
|
78
|
+
return false; // Reference isn't a single base
|
|
79
|
+
|
|
80
|
+
if (ALT.size() != 1)
|
|
81
|
+
return false; // Not biallelic
|
|
82
|
+
|
|
83
|
+
if (ALT[0].size() != 1)
|
|
84
|
+
return false; // Alternative allele isn't a single base
|
|
85
|
+
|
|
86
|
+
return true;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
bool vcf_entry::is_diploid(const vector<bool> &include_indv, const vector<bool> &include_genotype) const
|
|
90
|
+
{
|
|
91
|
+
for (unsigned int ui=0; ui<N_indv; ui++)
|
|
92
|
+
{
|
|
93
|
+
if ((include_indv[ui] == true) && (include_genotype[ui] == true))
|
|
94
|
+
{
|
|
95
|
+
assert(parsed_GT[ui] == true);
|
|
96
|
+
if (ploidy[ui] != 2)
|
|
97
|
+
return false;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return true;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
void vcf_entry::get_allele(int allele_num, string &out) const
|
|
104
|
+
{
|
|
105
|
+
assert(parsed_ALT == true);
|
|
106
|
+
|
|
107
|
+
if (allele_num == 0)
|
|
108
|
+
out = REF;
|
|
109
|
+
else if ((allele_num < 0) || (unsigned(allele_num - 1) >= ALT.size()))
|
|
110
|
+
out = ".";
|
|
111
|
+
else
|
|
112
|
+
out = ALT[allele_num-1];
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
string vcf_entry::get_ALT_allele(int allele_num) const
|
|
116
|
+
{
|
|
117
|
+
assert(parsed_ALT == true);
|
|
118
|
+
|
|
119
|
+
if ((allele_num < 0) || (unsigned(allele_num) >= ALT.size()))
|
|
120
|
+
return ".";
|
|
121
|
+
return ALT[allele_num];
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
void vcf_entry::get_alleles_vector(vector<string> &out) const
|
|
125
|
+
{
|
|
126
|
+
assert(parsed_ALT == true);
|
|
127
|
+
out.resize(ALT.size()+1);
|
|
128
|
+
out[0] = REF;
|
|
129
|
+
copy(ALT.begin(), ALT.end(), out.begin()+1);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
double vcf_entry::get_QUAL() const
|
|
133
|
+
{
|
|
134
|
+
return QUAL;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
string vcf_entry::get_FILTER() const
|
|
139
|
+
{
|
|
140
|
+
assert(parsed_FILTER == true);
|
|
141
|
+
|
|
142
|
+
ostringstream out;
|
|
143
|
+
if ((passed_filters == false) && (FILTER.size() == 0))
|
|
144
|
+
out << ".";
|
|
145
|
+
else if (passed_filters == true)
|
|
146
|
+
out << "PASS";
|
|
147
|
+
else
|
|
148
|
+
{
|
|
149
|
+
out << FILTER[0];
|
|
150
|
+
for (unsigned int ui=1; ui<FILTER.size(); ui++)
|
|
151
|
+
{
|
|
152
|
+
out << "," << FILTER[ui];
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
return out.str();
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
void vcf_entry::get_FILTER_vector(vector<string> &out) const
|
|
159
|
+
{
|
|
160
|
+
assert(parsed_FILTER == true);
|
|
161
|
+
out = FILTER;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
string vcf_entry::get_INFO(const set<string> &INFO_to_keep) const
|
|
166
|
+
{
|
|
167
|
+
assert(parsed_INFO == true);
|
|
168
|
+
|
|
169
|
+
ostringstream out;
|
|
170
|
+
bool first=true;
|
|
171
|
+
if ((INFO.size() > 0) && (INFO_to_keep.size() > 0))
|
|
172
|
+
{
|
|
173
|
+
string key;
|
|
174
|
+
for (unsigned int ui=0; ui<INFO.size();ui++)
|
|
175
|
+
{
|
|
176
|
+
key = INFO[ui].first;
|
|
177
|
+
if (INFO_to_keep.find(key) != INFO_to_keep.end())
|
|
178
|
+
{
|
|
179
|
+
if (first != true)
|
|
180
|
+
out << ";";
|
|
181
|
+
out << key << "=" << INFO[ui].second;
|
|
182
|
+
first = false;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
if (first == true)
|
|
188
|
+
{ // Didn't find any INFO fields to keep
|
|
189
|
+
out.str(".");
|
|
190
|
+
}
|
|
191
|
+
return out.str();
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
string vcf_entry::get_INFO_value(const string &key) const
|
|
195
|
+
{
|
|
196
|
+
assert(parsed_INFO == true);
|
|
197
|
+
|
|
198
|
+
for (unsigned int ui=0; ui<INFO.size(); ui++)
|
|
199
|
+
{
|
|
200
|
+
if (INFO[ui].first == key)
|
|
201
|
+
return INFO[ui].second;
|
|
202
|
+
}
|
|
203
|
+
return "?";
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
string vcf_entry::get_FORMAT() const
|
|
207
|
+
{
|
|
208
|
+
assert(parsed_FORMAT == true);
|
|
209
|
+
|
|
210
|
+
string out;
|
|
211
|
+
bool first = true;
|
|
212
|
+
for (unsigned int ui=0; ui<FORMAT.size(); ui++)
|
|
213
|
+
{
|
|
214
|
+
if (first == false)
|
|
215
|
+
out += ":";
|
|
216
|
+
out += FORMAT[ui];
|
|
217
|
+
first = false;
|
|
218
|
+
}
|
|
219
|
+
return out;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Return the alleles of a genotype as a pair of strings.
|
|
223
|
+
void vcf_entry::get_indv_GENOTYPE_strings(unsigned int indv, pair<string, string> &out) const
|
|
224
|
+
{
|
|
225
|
+
assert(parsed_GT[indv] == true);
|
|
226
|
+
|
|
227
|
+
static string out_allele1, out_allele2;
|
|
228
|
+
|
|
229
|
+
get_allele(GENOTYPE[indv].first, out_allele1);
|
|
230
|
+
get_allele(GENOTYPE[indv].second, out_allele2);
|
|
231
|
+
out = make_pair(out_allele1, out_allele2);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
void vcf_entry::get_indv_GENOTYPE_ids(unsigned int indv, pair<int, int> &out) const
|
|
236
|
+
{
|
|
237
|
+
assert(parsed_GT[indv] == true);
|
|
238
|
+
out = GENOTYPE[indv];
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
char vcf_entry::get_indv_PHASE(unsigned int indv) const
|
|
242
|
+
{
|
|
243
|
+
assert(parsed_GT[indv] == true);
|
|
244
|
+
return PHASE[indv];
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
int vcf_entry::get_indv_DEPTH(unsigned int indv) const
|
|
248
|
+
{
|
|
249
|
+
assert(parsed_DP[indv] == true);
|
|
250
|
+
if (DEPTH.size() == 0)
|
|
251
|
+
return -1;
|
|
252
|
+
return DEPTH[indv];
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
double vcf_entry::get_indv_GQUALITY(unsigned int indv) const
|
|
256
|
+
{
|
|
257
|
+
assert(parsed_GQ[indv] == true);
|
|
258
|
+
if (GQUALITY.size() == 0)
|
|
259
|
+
return -1;
|
|
260
|
+
return GQUALITY[indv];
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
void vcf_entry::get_indv_GFILTER_vector(unsigned int indv, vector<string> &out) const
|
|
264
|
+
{
|
|
265
|
+
assert(parsed_FT[indv] == true);
|
|
266
|
+
if (GFILTER.size() > 0)
|
|
267
|
+
out = GFILTER[indv];
|
|
268
|
+
else
|
|
269
|
+
out.resize(0);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
void vcf_entry::get_indv_GFILTER(unsigned int indv, string &out) const
|
|
273
|
+
{
|
|
274
|
+
assert(parsed_FT[indv] == true);
|
|
275
|
+
|
|
276
|
+
if ((GFILTER.size() > 0) && (GFILTER[indv].size()>0))
|
|
277
|
+
{
|
|
278
|
+
out="";
|
|
279
|
+
for (unsigned int ui=0; ui<GFILTER[indv].size(); ui++)
|
|
280
|
+
{
|
|
281
|
+
if (ui!=0)
|
|
282
|
+
out += ";";
|
|
283
|
+
out += GFILTER[indv][ui];
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
else
|
|
287
|
+
out = ".";
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
int vcf_entry::get_indv_ploidy(unsigned int indv) const
|
|
291
|
+
{
|
|
292
|
+
assert (parsed_GT[indv]==true);
|
|
293
|
+
return ploidy[indv];
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
void vcf_entry::read_indv_generic_entry(unsigned int indv, const string &FORMAT_id, string &out)
|
|
297
|
+
{
|
|
298
|
+
if (fully_parsed == false)
|
|
299
|
+
parse_full_entry(true);
|
|
300
|
+
|
|
301
|
+
if (parsed_FORMAT == false)
|
|
302
|
+
set_FORMAT(FORMAT_str);
|
|
303
|
+
|
|
304
|
+
out = ".";
|
|
305
|
+
|
|
306
|
+
if (FORMAT_to_idx.find(FORMAT_id) != FORMAT_to_idx.end())
|
|
307
|
+
{
|
|
308
|
+
unsigned int idx = FORMAT_to_idx[FORMAT_id];
|
|
309
|
+
static string tmpstr;
|
|
310
|
+
static istringstream ss;
|
|
311
|
+
ss.clear();
|
|
312
|
+
ss.str(GENOTYPE_str[indv]);
|
|
313
|
+
|
|
314
|
+
for (unsigned int ui=0; ui <= idx; ui++)
|
|
315
|
+
{
|
|
316
|
+
getline(ss, tmpstr, ':');
|
|
317
|
+
if (ui == idx)
|
|
318
|
+
{
|
|
319
|
+
out = tmpstr;
|
|
320
|
+
break;
|
|
321
|
+
}
|
|
322
|
+
if (!ss.good())
|
|
323
|
+
break;
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
bool vcf_entry::FORMAT_id_exists(const string &FORMAT_id)
|
|
329
|
+
{
|
|
330
|
+
assert(parsed_FORMAT == true);
|
|
331
|
+
if (FORMAT_to_idx.find(FORMAT_id) != FORMAT_to_idx.end())
|
|
332
|
+
return true;
|
|
333
|
+
return false;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
unsigned int vcf_entry::get_N_alleles() const
|
|
337
|
+
{
|
|
338
|
+
assert(parsed_ALT == true);
|
|
339
|
+
return (ALT.size()+1);
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
unsigned int vcf_entry::get_N_chr(const vector<bool> &include_indv, const vector<bool> &include_genotype) const
|
|
343
|
+
{
|
|
344
|
+
unsigned int out=0;
|
|
345
|
+
|
|
346
|
+
for (unsigned int ui=0; ui<N_indv; ui++)
|
|
347
|
+
{
|
|
348
|
+
if ((include_indv[ui] == true) && (include_genotype[ui] == true))
|
|
349
|
+
{
|
|
350
|
+
assert(parsed_GT[ui] == true);
|
|
351
|
+
out += ploidy[ui];
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
return out;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
// Return the frequency (counts) of each allele.
|
|
359
|
+
void vcf_entry::get_allele_counts(vector<int> &out, unsigned int &N_non_missing_chr_out, const vector<bool> &include_indv, const vector<bool> &include_genotype) const
|
|
360
|
+
{
|
|
361
|
+
pair<int,int> genotype;
|
|
362
|
+
vector<int> allele_counts(get_N_alleles(), 0);
|
|
363
|
+
N_non_missing_chr_out = 0;
|
|
364
|
+
|
|
365
|
+
for (unsigned int ui=0; ui<N_indv; ui++)
|
|
366
|
+
{
|
|
367
|
+
if ((include_indv[ui] == true) && (include_genotype[ui] == true))
|
|
368
|
+
{
|
|
369
|
+
assert(parsed_GT[ui] == true);
|
|
370
|
+
get_indv_GENOTYPE_ids(ui, genotype);
|
|
371
|
+
if (genotype.first != -1)
|
|
372
|
+
{
|
|
373
|
+
allele_counts[genotype.first]++;
|
|
374
|
+
N_non_missing_chr_out++;
|
|
375
|
+
}
|
|
376
|
+
if (genotype.second != -1)
|
|
377
|
+
{
|
|
378
|
+
allele_counts[genotype.second]++;
|
|
379
|
+
N_non_missing_chr_out++;
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
out = allele_counts;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// Return the counts of homozygote1, heterozygotes, and homozygote2
|
|
387
|
+
void vcf_entry::get_genotype_counts(const vector<bool> &include_indv, const vector<bool> &include_genotype, unsigned int &out_N_hom1, unsigned int &out_N_het, unsigned int &out_N_hom2) const
|
|
388
|
+
{
|
|
389
|
+
out_N_hom1 = 0; out_N_hom2 = 0; out_N_het = 0;
|
|
390
|
+
pair<int, int> genotype;
|
|
391
|
+
if (ALT.size() > 1)
|
|
392
|
+
error("Tried to return the genotype counts of a non-biallelic SNP", 99);
|
|
393
|
+
|
|
394
|
+
for (unsigned int ui=0; ui<N_indv; ui++)
|
|
395
|
+
{
|
|
396
|
+
if ((include_indv[ui] == true) && (include_genotype[ui] == true))
|
|
397
|
+
{
|
|
398
|
+
assert(parsed_GT[ui] == true);
|
|
399
|
+
get_indv_GENOTYPE_ids(ui, genotype);
|
|
400
|
+
if ((genotype.first != -1) && (genotype.second != -1))
|
|
401
|
+
{
|
|
402
|
+
if (genotype.first != genotype.second)
|
|
403
|
+
{
|
|
404
|
+
out_N_het++;
|
|
405
|
+
}
|
|
406
|
+
else if (genotype.first == 0)
|
|
407
|
+
{
|
|
408
|
+
out_N_hom1++;
|
|
409
|
+
}
|
|
410
|
+
else if (genotype.first == 1)
|
|
411
|
+
{
|
|
412
|
+
out_N_hom2++;
|
|
413
|
+
}
|
|
414
|
+
else
|
|
415
|
+
{
|
|
416
|
+
error("Unknown allele in genotype", 98);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
}
|