ngs_server 0.1 → 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/ngs_server +72 -50
- data/ext/bamtools/extconf.rb +3 -3
- data/ext/vcftools/Makefile +28 -0
- data/ext/vcftools/README.txt +36 -0
- data/ext/vcftools/cpp/.svn/all-wcprops +125 -0
- data/ext/vcftools/cpp/.svn/dir-prop-base +6 -0
- data/ext/vcftools/cpp/.svn/entries +708 -0
- data/ext/vcftools/cpp/.svn/text-base/Makefile.svn-base +46 -0
- data/ext/vcftools/cpp/.svn/text-base/dgeev.cpp.svn-base +146 -0
- data/ext/vcftools/cpp/.svn/text-base/dgeev.h.svn-base +43 -0
- data/ext/vcftools/cpp/.svn/text-base/output_log.cpp.svn-base +79 -0
- data/ext/vcftools/cpp/.svn/text-base/output_log.h.svn-base +34 -0
- data/ext/vcftools/cpp/.svn/text-base/parameters.cpp.svn-base +535 -0
- data/ext/vcftools/cpp/.svn/text-base/parameters.h.svn-base +154 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry.cpp.svn-base +497 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry.h.svn-base +190 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry_getters.cpp.svn-base +421 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry_setters.cpp.svn-base +482 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file.cpp.svn-base +495 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file.h.svn-base +184 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_diff.cpp.svn-base +1282 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_filters.cpp.svn-base +1215 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_format_convert.cpp.svn-base +1138 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_index.cpp.svn-base +171 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_output.cpp.svn-base +3012 -0
- data/ext/vcftools/cpp/.svn/text-base/vcftools.cpp.svn-base +107 -0
- data/ext/vcftools/cpp/.svn/text-base/vcftools.h.svn-base +25 -0
- data/ext/vcftools/cpp/Makefile +46 -0
- data/ext/vcftools/cpp/dgeev.cpp +146 -0
- data/ext/vcftools/cpp/dgeev.h +43 -0
- data/ext/vcftools/cpp/output_log.cpp +79 -0
- data/ext/vcftools/cpp/output_log.h +34 -0
- data/ext/vcftools/cpp/parameters.cpp +535 -0
- data/ext/vcftools/cpp/parameters.h +154 -0
- data/ext/vcftools/cpp/vcf_entry.cpp +497 -0
- data/ext/vcftools/cpp/vcf_entry.h +190 -0
- data/ext/vcftools/cpp/vcf_entry_getters.cpp +421 -0
- data/ext/vcftools/cpp/vcf_entry_setters.cpp +482 -0
- data/ext/vcftools/cpp/vcf_file.cpp +495 -0
- data/ext/vcftools/cpp/vcf_file.h +184 -0
- data/ext/vcftools/cpp/vcf_file_diff.cpp +1282 -0
- data/ext/vcftools/cpp/vcf_file_filters.cpp +1215 -0
- data/ext/vcftools/cpp/vcf_file_format_convert.cpp +1138 -0
- data/ext/vcftools/cpp/vcf_file_index.cpp +171 -0
- data/ext/vcftools/cpp/vcf_file_output.cpp +3012 -0
- data/ext/vcftools/cpp/vcftools.cpp +107 -0
- data/ext/vcftools/cpp/vcftools.h +25 -0
- data/ext/vcftools/examples/.svn/all-wcprops +185 -0
- data/ext/vcftools/examples/.svn/dir-prop-base +6 -0
- data/ext/vcftools/examples/.svn/entries +1048 -0
- data/ext/vcftools/examples/.svn/prop-base/perl-api-1.pl.svn-base +5 -0
- data/ext/vcftools/examples/.svn/text-base/annotate-test.vcf.svn-base +37 -0
- data/ext/vcftools/examples/.svn/text-base/annotate.out.svn-base +23 -0
- data/ext/vcftools/examples/.svn/text-base/annotate.txt.svn-base +7 -0
- data/ext/vcftools/examples/.svn/text-base/annotate2.out.svn-base +52 -0
- data/ext/vcftools/examples/.svn/text-base/annotate3.out.svn-base +23 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-a-3.3.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-a.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-b-3.3.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-b.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test.out.svn-base +53 -0
- data/ext/vcftools/examples/.svn/text-base/concat-a.vcf.svn-base +21 -0
- data/ext/vcftools/examples/.svn/text-base/concat-b.vcf.svn-base +13 -0
- data/ext/vcftools/examples/.svn/text-base/concat-c.vcf.svn-base +19 -0
- data/ext/vcftools/examples/.svn/text-base/concat.out.svn-base +39 -0
- data/ext/vcftools/examples/.svn/text-base/invalid-4.0.vcf.svn-base +31 -0
- data/ext/vcftools/examples/.svn/text-base/isec-n2-test.vcf.out.svn-base +19 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test-a.vcf.svn-base +17 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test-b.vcf.svn-base +17 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test-c.vcf.svn-base +15 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test.vcf.out.svn-base +31 -0
- data/ext/vcftools/examples/.svn/text-base/perl-api-1.pl.svn-base +46 -0
- data/ext/vcftools/examples/.svn/text-base/query-test.out.svn-base +6 -0
- data/ext/vcftools/examples/.svn/text-base/shuffle-test.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/subset.SNPs.out.svn-base +10 -0
- data/ext/vcftools/examples/.svn/text-base/subset.indels.out.svn-base +18 -0
- data/ext/vcftools/examples/.svn/text-base/subset.vcf.svn-base +21 -0
- data/ext/vcftools/examples/.svn/text-base/valid-3.3.vcf.svn-base +30 -0
- data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.stats.svn-base +104 -0
- data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.svn-base +34 -0
- data/ext/vcftools/examples/.svn/text-base/valid-4.1.vcf.svn-base +37 -0
- data/ext/vcftools/examples/annotate-test.vcf +37 -0
- data/ext/vcftools/examples/annotate.out +23 -0
- data/ext/vcftools/examples/annotate.txt +7 -0
- data/ext/vcftools/examples/annotate2.out +52 -0
- data/ext/vcftools/examples/annotate3.out +23 -0
- data/ext/vcftools/examples/cmp-test-a-3.3.vcf +12 -0
- data/ext/vcftools/examples/cmp-test-a.vcf +12 -0
- data/ext/vcftools/examples/cmp-test-b-3.3.vcf +12 -0
- data/ext/vcftools/examples/cmp-test-b.vcf +12 -0
- data/ext/vcftools/examples/cmp-test.out +53 -0
- data/ext/vcftools/examples/concat-a.vcf +21 -0
- data/ext/vcftools/examples/concat-b.vcf +13 -0
- data/ext/vcftools/examples/concat-c.vcf +19 -0
- data/ext/vcftools/examples/concat.out +39 -0
- data/ext/vcftools/examples/invalid-4.0.vcf +31 -0
- data/ext/vcftools/examples/isec-n2-test.vcf.out +19 -0
- data/ext/vcftools/examples/merge-test-a.vcf +17 -0
- data/ext/vcftools/examples/merge-test-b.vcf +17 -0
- data/ext/vcftools/examples/merge-test-c.vcf +15 -0
- data/ext/vcftools/examples/merge-test.vcf.out +31 -0
- data/ext/vcftools/examples/perl-api-1.pl +46 -0
- data/ext/vcftools/examples/query-test.out +6 -0
- data/ext/vcftools/examples/shuffle-test.vcf +12 -0
- data/ext/vcftools/examples/subset.SNPs.out +10 -0
- data/ext/vcftools/examples/subset.indels.out +18 -0
- data/ext/vcftools/examples/subset.vcf +21 -0
- data/ext/vcftools/examples/valid-3.3.vcf +30 -0
- data/ext/vcftools/examples/valid-4.0.vcf +34 -0
- data/ext/vcftools/examples/valid-4.0.vcf.stats +104 -0
- data/ext/vcftools/examples/valid-4.1.vcf +37 -0
- data/ext/vcftools/extconf.rb +2 -0
- data/ext/vcftools/perl/.svn/all-wcprops +149 -0
- data/ext/vcftools/perl/.svn/entries +844 -0
- data/ext/vcftools/perl/.svn/prop-base/fill-aa.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/fill-an-ac.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/fill-ref-md5.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/tab-to-vcf.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/test.t.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-annotate.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-compare.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-concat.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-convert.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-fix-newlines.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-isec.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-merge.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-query.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-shuffle-cols.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-sort.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-stats.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-subset.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-to-tab.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-validator.svn-base +5 -0
- data/ext/vcftools/perl/.svn/text-base/ChangeLog.svn-base +84 -0
- data/ext/vcftools/perl/.svn/text-base/FaSlice.pm.svn-base +214 -0
- data/ext/vcftools/perl/.svn/text-base/Makefile.svn-base +12 -0
- data/ext/vcftools/perl/.svn/text-base/Vcf.pm.svn-base +2853 -0
- data/ext/vcftools/perl/.svn/text-base/VcfStats.pm.svn-base +681 -0
- data/ext/vcftools/perl/.svn/text-base/fill-aa.svn-base +103 -0
- data/ext/vcftools/perl/.svn/text-base/fill-an-ac.svn-base +56 -0
- data/ext/vcftools/perl/.svn/text-base/fill-ref-md5.svn-base +204 -0
- data/ext/vcftools/perl/.svn/text-base/tab-to-vcf.svn-base +92 -0
- data/ext/vcftools/perl/.svn/text-base/test.t.svn-base +376 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-annotate.svn-base +1099 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-compare.svn-base +1193 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-concat.svn-base +310 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-convert.svn-base +180 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-fix-newlines.svn-base +97 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-isec.svn-base +660 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-merge.svn-base +577 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-query.svn-base +272 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-shuffle-cols.svn-base +89 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-sort.svn-base +79 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-stats.svn-base +160 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-subset.svn-base +206 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-to-tab.svn-base +112 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-validator.svn-base +145 -0
- data/ext/vcftools/perl/ChangeLog +84 -0
- data/ext/vcftools/perl/FaSlice.pm +214 -0
- data/ext/vcftools/perl/Makefile +12 -0
- data/ext/vcftools/perl/Vcf.pm +2853 -0
- data/ext/vcftools/perl/VcfStats.pm +681 -0
- data/ext/vcftools/perl/fill-aa +103 -0
- data/ext/vcftools/perl/fill-an-ac +56 -0
- data/ext/vcftools/perl/fill-ref-md5 +204 -0
- data/ext/vcftools/perl/tab-to-vcf +92 -0
- data/ext/vcftools/perl/test.t +376 -0
- data/ext/vcftools/perl/vcf-annotate +1099 -0
- data/ext/vcftools/perl/vcf-compare +1193 -0
- data/ext/vcftools/perl/vcf-concat +310 -0
- data/ext/vcftools/perl/vcf-convert +180 -0
- data/ext/vcftools/perl/vcf-fix-newlines +97 -0
- data/ext/vcftools/perl/vcf-isec +660 -0
- data/ext/vcftools/perl/vcf-merge +577 -0
- data/ext/vcftools/perl/vcf-query +286 -0
- data/ext/vcftools/perl/vcf-shuffle-cols +89 -0
- data/ext/vcftools/perl/vcf-sort +79 -0
- data/ext/vcftools/perl/vcf-stats +160 -0
- data/ext/vcftools/perl/vcf-subset +206 -0
- data/ext/vcftools/perl/vcf-to-tab +112 -0
- data/ext/vcftools/perl/vcf-validator +145 -0
- data/ext/vcftools/website/.svn/all-wcprops +41 -0
- data/ext/vcftools/website/.svn/entries +238 -0
- data/ext/vcftools/website/.svn/prop-base/VCF-poster.pdf.svn-base +5 -0
- data/ext/vcftools/website/.svn/prop-base/favicon.ico.svn-base +5 -0
- data/ext/vcftools/website/.svn/prop-base/favicon.png.svn-base +5 -0
- data/ext/vcftools/website/.svn/text-base/Makefile.svn-base +6 -0
- data/ext/vcftools/website/.svn/text-base/README.svn-base +2 -0
- data/ext/vcftools/website/.svn/text-base/VCF-poster.pdf.svn-base +0 -0
- data/ext/vcftools/website/.svn/text-base/default.css.svn-base +250 -0
- data/ext/vcftools/website/.svn/text-base/favicon.ico.svn-base +0 -0
- data/ext/vcftools/website/.svn/text-base/favicon.png.svn-base +0 -0
- data/ext/vcftools/website/Makefile +6 -0
- data/ext/vcftools/website/README +2 -0
- data/ext/vcftools/website/VCF-poster.pdf +0 -0
- data/ext/vcftools/website/default.css +250 -0
- data/ext/vcftools/website/favicon.ico +0 -0
- data/ext/vcftools/website/favicon.png +0 -0
- data/ext/vcftools/website/img/.svn/all-wcprops +53 -0
- data/ext/vcftools/website/img/.svn/entries +300 -0
- data/ext/vcftools/website/img/.svn/prop-base/bg.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/bgcode.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/bgcontainer.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/bgul.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/header.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/li.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/quote.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/search.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/text-base/bg.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/bgcode.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/bgcontainer.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/bgul.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/header.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/li.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/quote.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/search.gif.svn-base +0 -0
- data/ext/vcftools/website/img/bg.gif +0 -0
- data/ext/vcftools/website/img/bgcode.gif +0 -0
- data/ext/vcftools/website/img/bgcontainer.gif +0 -0
- data/ext/vcftools/website/img/bgul.gif +0 -0
- data/ext/vcftools/website/img/header.gif +0 -0
- data/ext/vcftools/website/img/li.gif +0 -0
- data/ext/vcftools/website/img/quote.gif +0 -0
- data/ext/vcftools/website/img/search.gif +0 -0
- data/ext/vcftools/website/src/.svn/all-wcprops +53 -0
- data/ext/vcftools/website/src/.svn/entries +300 -0
- data/ext/vcftools/website/src/.svn/text-base/docs.inc.svn-base +202 -0
- data/ext/vcftools/website/src/.svn/text-base/index.inc.svn-base +52 -0
- data/ext/vcftools/website/src/.svn/text-base/index.php.svn-base +80 -0
- data/ext/vcftools/website/src/.svn/text-base/license.inc.svn-base +27 -0
- data/ext/vcftools/website/src/.svn/text-base/links.inc.svn-base +13 -0
- data/ext/vcftools/website/src/.svn/text-base/options.inc.svn-base +654 -0
- data/ext/vcftools/website/src/.svn/text-base/perl_module.inc.svn-base +249 -0
- data/ext/vcftools/website/src/.svn/text-base/specs.inc.svn-base +18 -0
- data/ext/vcftools/website/src/docs.inc +202 -0
- data/ext/vcftools/website/src/index.inc +52 -0
- data/ext/vcftools/website/src/index.php +80 -0
- data/ext/vcftools/website/src/license.inc +27 -0
- data/ext/vcftools/website/src/links.inc +13 -0
- data/ext/vcftools/website/src/options.inc +654 -0
- data/ext/vcftools/website/src/perl_module.inc +249 -0
- data/ext/vcftools/website/src/specs.inc +18 -0
- data/lib/config.ru +9 -0
- data/lib/ngs_server/add.rb +9 -0
- data/lib/ngs_server/version.rb +1 -1
- data/lib/ngs_server.rb +55 -3
- data/ngs_server.gemspec +5 -2
- metadata +296 -6
@@ -0,0 +1,190 @@
|
|
1
|
+
/*
|
2
|
+
* vcf_entry.h
|
3
|
+
*
|
4
|
+
* Created on: Aug 19, 2009
|
5
|
+
* Author: Adam Auton
|
6
|
+
* ($Revision: 230 $)
|
7
|
+
*/
|
8
|
+
|
9
|
+
#ifndef VCF_ENTRY_H_
|
10
|
+
#define VCF_ENTRY_H_
|
11
|
+
|
12
|
+
#include <algorithm>
|
13
|
+
#include <iostream>
|
14
|
+
#include <iterator>
|
15
|
+
#include <limits>
|
16
|
+
#include <map>
|
17
|
+
#include <set>
|
18
|
+
#include <sstream>
|
19
|
+
#include <string>
|
20
|
+
#include <vector>
|
21
|
+
|
22
|
+
#include <cassert>
|
23
|
+
|
24
|
+
#include "output_log.h"
|
25
|
+
|
26
|
+
using namespace std;
|
27
|
+
|
28
|
+
enum Type_enum {Integer=0, Float=1, Character=2, String=3, Flag=4};
|
29
|
+
|
30
|
+
class Field_description
|
31
|
+
{
|
32
|
+
public:
|
33
|
+
string ID;
|
34
|
+
int N_entries;
|
35
|
+
Type_enum Type;
|
36
|
+
string Description;
|
37
|
+
|
38
|
+
Field_description() : ID(""), N_entries(0), Type(Integer), Description("") {};
|
39
|
+
~Field_description() {};
|
40
|
+
};
|
41
|
+
|
42
|
+
class vcf_entry {
|
43
|
+
public:
|
44
|
+
vcf_entry(const unsigned int N_indv);
|
45
|
+
vcf_entry(const unsigned int N_indv, const string &data_line);
|
46
|
+
~vcf_entry();
|
47
|
+
|
48
|
+
const unsigned int N_indv;
|
49
|
+
|
50
|
+
void parse_basic_entry(bool parse_ALT=false, bool parse_FILTER=false, bool parse_INFO=false);
|
51
|
+
void parse_full_entry(bool parse_FORMAT=true);
|
52
|
+
void parse_genotype_entry(unsigned int indv, bool GT=false, bool GQ=false, bool DP=false, bool FT=false);
|
53
|
+
void parse_genotype_entries(bool GT=false, bool GQ=false, bool DP=false, bool FT=false);
|
54
|
+
|
55
|
+
void reset(const string &vcf_data_line);
|
56
|
+
|
57
|
+
string get_CHROM() const;
|
58
|
+
void get_CHROM(string &out) const;
|
59
|
+
int get_POS() const;
|
60
|
+
string get_ID() const;
|
61
|
+
string get_REF() const;
|
62
|
+
string get_ALT() const;
|
63
|
+
string get_ALT_allele(int allele_num) const;
|
64
|
+
void get_allele(int allele_num, string &out) const;
|
65
|
+
void get_alleles_vector(vector<string> &out) const;
|
66
|
+
string get_FILTER() const;
|
67
|
+
void get_FILTER_vector(vector<string> &out) const;
|
68
|
+
double get_QUAL() const;
|
69
|
+
string get_INFO(const set<string> &INFO_to_keep) const;
|
70
|
+
string get_INFO_value(const string &key) const;
|
71
|
+
string get_FORMAT() const;
|
72
|
+
void get_indv_GENOTYPE_ids(unsigned int indv, pair<int, int> &out) const;
|
73
|
+
void get_indv_GENOTYPE_strings(unsigned int indv, pair<string, string> &out) const;
|
74
|
+
char get_indv_PHASE(unsigned int indv) const;
|
75
|
+
double get_indv_GQUALITY(unsigned int indv) const;
|
76
|
+
int get_indv_DEPTH(unsigned int indv) const;
|
77
|
+
void get_indv_GFILTER(unsigned int indv, string &out) const;
|
78
|
+
void get_indv_GFILTER_vector(unsigned int indv, vector<string> &out) const;
|
79
|
+
int get_indv_ploidy(unsigned int indv) const;
|
80
|
+
|
81
|
+
bool is_SNP() const;
|
82
|
+
bool is_biallelic_SNP() const;
|
83
|
+
bool is_diploid(const vector<bool> &include_indv, const vector<bool> &include_genotype) const;
|
84
|
+
|
85
|
+
void read_indv_generic_entry(unsigned int indv, const string &FORMAT_id, string &out);
|
86
|
+
bool FORMAT_id_exists(const string &FORMAT_id);
|
87
|
+
|
88
|
+
unsigned int get_N_alleles() const;
|
89
|
+
unsigned int get_N_chr(const vector<bool> &include_indv, const vector<bool> &include_genotype) const;
|
90
|
+
|
91
|
+
void set_CHROM(const string &in);
|
92
|
+
void set_POS(const int in);
|
93
|
+
void set_ID(const string &in);
|
94
|
+
void set_REF(const string &in);
|
95
|
+
void set_ALT(const string &in);
|
96
|
+
void set_QUAL(const double in);
|
97
|
+
void set_FILTER(const string &FILTER_str);
|
98
|
+
void set_FORMAT(const string &in);
|
99
|
+
void set_INFO(const string &INFO_str);
|
100
|
+
|
101
|
+
void add_ALT_allele(const string &in);
|
102
|
+
void add_FILTER_entry(const string &in);
|
103
|
+
void add_FORMAT_entry(const string &in, unsigned int pos);
|
104
|
+
|
105
|
+
void set_indv_GENOTYPE_and_PHASE(unsigned int indv, const string &in);
|
106
|
+
void set_indv_GENOTYPE_and_PHASE(unsigned int indv, const pair<string, string> &genotype, char phase);
|
107
|
+
void set_indv_GENOTYPE_and_PHASE(unsigned int indv, const pair<int, int> &genotype, char phase);
|
108
|
+
void set_indv_GENOTYPE_alleles(unsigned int indv, const pair<string, string> &in);
|
109
|
+
void set_indv_GENOTYPE_alleles(unsigned int indv, char a1, char a2);
|
110
|
+
void set_indv_GENOTYPE_ids(unsigned int indv, const pair<int, int> &in);
|
111
|
+
void set_indv_PHASE(unsigned int indv, char in);
|
112
|
+
void set_indv_GQUALITY(unsigned int indv, double in);
|
113
|
+
void set_indv_DEPTH(unsigned int indv, int in);
|
114
|
+
void set_indv_GFILTER(unsigned int indv, const string &in);
|
115
|
+
|
116
|
+
void add_indv_GFILTER(unsigned int indv, const string &in);
|
117
|
+
|
118
|
+
static void add_INFO_descriptor(const string &in);
|
119
|
+
static void add_FILTER_descriptor(const string &in);
|
120
|
+
static void add_FORMAT_descriptor(const string &in);
|
121
|
+
|
122
|
+
void print(ostream &out);
|
123
|
+
void print(ostream &out, const set<string> &INFO_to_keep, bool keep_all_INFO=false);
|
124
|
+
void print(ostream &out, const set<string> &INFO_to_keep, bool keep_all_INFO, const vector<bool> &include_indv, const vector<bool> &include_genotype);
|
125
|
+
|
126
|
+
void filter_genotypes_by_depth(vector<bool> &include_genotype_out, int min_depth, int max_depth);
|
127
|
+
void filter_genotypes_by_quality(vector<bool> &include_genotype_out, double min_genotype_quality);
|
128
|
+
void filter_genotypes_by_filter_status(vector<bool> &include_genotype_out, const set<string> &filter_flags_to_remove, bool remove_all = false);
|
129
|
+
|
130
|
+
void get_allele_counts(vector<int> &out, unsigned int &N_non_missing_chr_out, const vector<bool> &include_indv, const vector<bool> &include_genotype) const;
|
131
|
+
void get_genotype_counts(const vector<bool> &include_indv, const vector<bool> &include_genotype, unsigned int &out_N_hom1, unsigned int &out_N_het, unsigned int &out_N_hom2) const;
|
132
|
+
|
133
|
+
static double SNPHWE(int obs_hets, int obs_hom1, int obs_hom2);
|
134
|
+
private:
|
135
|
+
istringstream data_stream;
|
136
|
+
|
137
|
+
bool basic_parsed;
|
138
|
+
bool fully_parsed;
|
139
|
+
bool parsed_ALT;
|
140
|
+
bool parsed_FILTER;
|
141
|
+
bool parsed_INFO;
|
142
|
+
bool parsed_FORMAT;
|
143
|
+
|
144
|
+
string CHROM;
|
145
|
+
int POS;
|
146
|
+
string ID;
|
147
|
+
string REF;
|
148
|
+
vector<string> ALT;
|
149
|
+
double QUAL;
|
150
|
+
vector<string> FILTER;
|
151
|
+
bool passed_filters;
|
152
|
+
vector<pair<string, string> > INFO;
|
153
|
+
vector<string> FORMAT;
|
154
|
+
|
155
|
+
vector< pair<int,int> > GENOTYPE;
|
156
|
+
vector<int> ploidy;
|
157
|
+
vector<char> PHASE;
|
158
|
+
vector<double> GQUALITY;
|
159
|
+
vector<int> DEPTH;
|
160
|
+
vector< vector<string> > GFILTER;
|
161
|
+
|
162
|
+
vector<bool> parsed_GT;
|
163
|
+
vector<bool> parsed_GQ;
|
164
|
+
vector<bool> parsed_DP;
|
165
|
+
vector<bool> parsed_FT;
|
166
|
+
|
167
|
+
int GT_idx;
|
168
|
+
int GQ_idx;
|
169
|
+
int DP_idx;
|
170
|
+
int FT_idx;
|
171
|
+
|
172
|
+
string ALT_str, FILTER_str, INFO_str, FORMAT_str, QUAL_str;
|
173
|
+
vector<string> GENOTYPE_str;
|
174
|
+
|
175
|
+
map<string, unsigned int> FORMAT_to_idx;
|
176
|
+
|
177
|
+
static map<string, Field_description> INFO_map;
|
178
|
+
static map<string, string> FILTER_map;
|
179
|
+
static map<string, Field_description> FORMAT_map;
|
180
|
+
|
181
|
+
static int str2int(const string &in, const int missing_value=-1);
|
182
|
+
static double str2double(const string &in, const double missing_value=-1.0);
|
183
|
+
|
184
|
+
static string int2str(const int in, const int missing_value=-1);
|
185
|
+
static string double2str(const double in, const double missing_value=-1.0);
|
186
|
+
|
187
|
+
static void tokenize(const string &in, char token, vector<string> &out);
|
188
|
+
};
|
189
|
+
|
190
|
+
#endif /* VCF_ENTRY_H_ */
|
@@ -0,0 +1,421 @@
|
|
1
|
+
/*
|
2
|
+
* vcf_entry_getters.cpp
|
3
|
+
*
|
4
|
+
* Created on: Nov 11, 2009
|
5
|
+
* Author: Adam Auton
|
6
|
+
* ($Revision: 230 $)
|
7
|
+
*/
|
8
|
+
|
9
|
+
#include "vcf_entry.h"
|
10
|
+
|
11
|
+
// Return the CHROMosome name
|
12
|
+
string vcf_entry::get_CHROM() const
|
13
|
+
{
|
14
|
+
return CHROM;
|
15
|
+
}
|
16
|
+
|
17
|
+
// Return the CHROMosome name
|
18
|
+
void vcf_entry::get_CHROM(string &out) const
|
19
|
+
{
|
20
|
+
out = CHROM;
|
21
|
+
}
|
22
|
+
|
23
|
+
int vcf_entry::get_POS() const
|
24
|
+
{
|
25
|
+
return POS;
|
26
|
+
}
|
27
|
+
|
28
|
+
string vcf_entry::get_ID() const
|
29
|
+
{
|
30
|
+
if (ID.size() == 0)
|
31
|
+
return ".";
|
32
|
+
return ID;
|
33
|
+
}
|
34
|
+
|
35
|
+
string vcf_entry::get_REF() const
|
36
|
+
{
|
37
|
+
return REF;
|
38
|
+
}
|
39
|
+
|
40
|
+
string vcf_entry::get_ALT() const
|
41
|
+
{
|
42
|
+
assert(parsed_ALT == true);
|
43
|
+
|
44
|
+
string out;
|
45
|
+
if (ALT.size() == 0)
|
46
|
+
out = ".";
|
47
|
+
else
|
48
|
+
{
|
49
|
+
out = ALT[0];
|
50
|
+
for (unsigned int ui=1; ui<ALT.size(); ui++)
|
51
|
+
out += "," + ALT[ui];
|
52
|
+
}
|
53
|
+
return out;
|
54
|
+
}
|
55
|
+
|
56
|
+
bool vcf_entry::is_SNP() const
|
57
|
+
{
|
58
|
+
assert(parsed_ALT == true);
|
59
|
+
|
60
|
+
if (REF.size() != 1)
|
61
|
+
return false; // Reference isn't a single base
|
62
|
+
|
63
|
+
if (ALT.size() == 0)
|
64
|
+
return false; // No alternative allele
|
65
|
+
|
66
|
+
for (unsigned int ui=0; ui<ALT.size(); ui++)
|
67
|
+
if (ALT[ui].size() != 1)
|
68
|
+
return false; // Alternative allele isn't a single base
|
69
|
+
|
70
|
+
return true;
|
71
|
+
}
|
72
|
+
|
73
|
+
bool vcf_entry::is_biallelic_SNP() const
|
74
|
+
{
|
75
|
+
assert(parsed_ALT == true);
|
76
|
+
|
77
|
+
if (REF.size() != 1)
|
78
|
+
return false; // Reference isn't a single base
|
79
|
+
|
80
|
+
if (ALT.size() != 1)
|
81
|
+
return false; // Not biallelic
|
82
|
+
|
83
|
+
if (ALT[0].size() != 1)
|
84
|
+
return false; // Alternative allele isn't a single base
|
85
|
+
|
86
|
+
return true;
|
87
|
+
}
|
88
|
+
|
89
|
+
bool vcf_entry::is_diploid(const vector<bool> &include_indv, const vector<bool> &include_genotype) const
|
90
|
+
{
|
91
|
+
for (unsigned int ui=0; ui<N_indv; ui++)
|
92
|
+
{
|
93
|
+
if ((include_indv[ui] == true) && (include_genotype[ui] == true))
|
94
|
+
{
|
95
|
+
assert(parsed_GT[ui] == true);
|
96
|
+
if (ploidy[ui] != 2)
|
97
|
+
return false;
|
98
|
+
}
|
99
|
+
}
|
100
|
+
return true;
|
101
|
+
}
|
102
|
+
|
103
|
+
void vcf_entry::get_allele(int allele_num, string &out) const
|
104
|
+
{
|
105
|
+
assert(parsed_ALT == true);
|
106
|
+
|
107
|
+
if (allele_num == 0)
|
108
|
+
out = REF;
|
109
|
+
else if ((allele_num < 0) || (unsigned(allele_num - 1) >= ALT.size()))
|
110
|
+
out = ".";
|
111
|
+
else
|
112
|
+
out = ALT[allele_num-1];
|
113
|
+
}
|
114
|
+
|
115
|
+
string vcf_entry::get_ALT_allele(int allele_num) const
|
116
|
+
{
|
117
|
+
assert(parsed_ALT == true);
|
118
|
+
|
119
|
+
if ((allele_num < 0) || (unsigned(allele_num) >= ALT.size()))
|
120
|
+
return ".";
|
121
|
+
return ALT[allele_num];
|
122
|
+
}
|
123
|
+
|
124
|
+
void vcf_entry::get_alleles_vector(vector<string> &out) const
|
125
|
+
{
|
126
|
+
assert(parsed_ALT == true);
|
127
|
+
out.resize(ALT.size()+1);
|
128
|
+
out[0] = REF;
|
129
|
+
copy(ALT.begin(), ALT.end(), out.begin()+1);
|
130
|
+
}
|
131
|
+
|
132
|
+
double vcf_entry::get_QUAL() const
|
133
|
+
{
|
134
|
+
return QUAL;
|
135
|
+
}
|
136
|
+
|
137
|
+
|
138
|
+
string vcf_entry::get_FILTER() const
|
139
|
+
{
|
140
|
+
assert(parsed_FILTER == true);
|
141
|
+
|
142
|
+
ostringstream out;
|
143
|
+
if ((passed_filters == false) && (FILTER.size() == 0))
|
144
|
+
out << ".";
|
145
|
+
else if (passed_filters == true)
|
146
|
+
out << "PASS";
|
147
|
+
else
|
148
|
+
{
|
149
|
+
out << FILTER[0];
|
150
|
+
for (unsigned int ui=1; ui<FILTER.size(); ui++)
|
151
|
+
{
|
152
|
+
out << "," << FILTER[ui];
|
153
|
+
}
|
154
|
+
}
|
155
|
+
return out.str();
|
156
|
+
}
|
157
|
+
|
158
|
+
void vcf_entry::get_FILTER_vector(vector<string> &out) const
|
159
|
+
{
|
160
|
+
assert(parsed_FILTER == true);
|
161
|
+
out = FILTER;
|
162
|
+
}
|
163
|
+
|
164
|
+
|
165
|
+
string vcf_entry::get_INFO(const set<string> &INFO_to_keep) const
|
166
|
+
{
|
167
|
+
assert(parsed_INFO == true);
|
168
|
+
|
169
|
+
ostringstream out;
|
170
|
+
bool first=true;
|
171
|
+
if ((INFO.size() > 0) && (INFO_to_keep.size() > 0))
|
172
|
+
{
|
173
|
+
string key;
|
174
|
+
for (unsigned int ui=0; ui<INFO.size();ui++)
|
175
|
+
{
|
176
|
+
key = INFO[ui].first;
|
177
|
+
if (INFO_to_keep.find(key) != INFO_to_keep.end())
|
178
|
+
{
|
179
|
+
if (first != true)
|
180
|
+
out << ";";
|
181
|
+
out << key << "=" << INFO[ui].second;
|
182
|
+
first = false;
|
183
|
+
}
|
184
|
+
}
|
185
|
+
}
|
186
|
+
|
187
|
+
if (first == true)
|
188
|
+
{ // Didn't find any INFO fields to keep
|
189
|
+
out.str(".");
|
190
|
+
}
|
191
|
+
return out.str();
|
192
|
+
}
|
193
|
+
|
194
|
+
string vcf_entry::get_INFO_value(const string &key) const
|
195
|
+
{
|
196
|
+
assert(parsed_INFO == true);
|
197
|
+
|
198
|
+
for (unsigned int ui=0; ui<INFO.size(); ui++)
|
199
|
+
{
|
200
|
+
if (INFO[ui].first == key)
|
201
|
+
return INFO[ui].second;
|
202
|
+
}
|
203
|
+
return "?";
|
204
|
+
}
|
205
|
+
|
206
|
+
string vcf_entry::get_FORMAT() const
|
207
|
+
{
|
208
|
+
assert(parsed_FORMAT == true);
|
209
|
+
|
210
|
+
string out;
|
211
|
+
bool first = true;
|
212
|
+
for (unsigned int ui=0; ui<FORMAT.size(); ui++)
|
213
|
+
{
|
214
|
+
if (first == false)
|
215
|
+
out += ":";
|
216
|
+
out += FORMAT[ui];
|
217
|
+
first = false;
|
218
|
+
}
|
219
|
+
return out;
|
220
|
+
}
|
221
|
+
|
222
|
+
// Return the alleles of a genotype as a pair of strings.
|
223
|
+
void vcf_entry::get_indv_GENOTYPE_strings(unsigned int indv, pair<string, string> &out) const
|
224
|
+
{
|
225
|
+
assert(parsed_GT[indv] == true);
|
226
|
+
|
227
|
+
static string out_allele1, out_allele2;
|
228
|
+
|
229
|
+
get_allele(GENOTYPE[indv].first, out_allele1);
|
230
|
+
get_allele(GENOTYPE[indv].second, out_allele2);
|
231
|
+
out = make_pair(out_allele1, out_allele2);
|
232
|
+
}
|
233
|
+
|
234
|
+
|
235
|
+
void vcf_entry::get_indv_GENOTYPE_ids(unsigned int indv, pair<int, int> &out) const
|
236
|
+
{
|
237
|
+
assert(parsed_GT[indv] == true);
|
238
|
+
out = GENOTYPE[indv];
|
239
|
+
}
|
240
|
+
|
241
|
+
char vcf_entry::get_indv_PHASE(unsigned int indv) const
|
242
|
+
{
|
243
|
+
assert(parsed_GT[indv] == true);
|
244
|
+
return PHASE[indv];
|
245
|
+
}
|
246
|
+
|
247
|
+
int vcf_entry::get_indv_DEPTH(unsigned int indv) const
|
248
|
+
{
|
249
|
+
assert(parsed_DP[indv] == true);
|
250
|
+
if (DEPTH.size() == 0)
|
251
|
+
return -1;
|
252
|
+
return DEPTH[indv];
|
253
|
+
}
|
254
|
+
|
255
|
+
double vcf_entry::get_indv_GQUALITY(unsigned int indv) const
|
256
|
+
{
|
257
|
+
assert(parsed_GQ[indv] == true);
|
258
|
+
if (GQUALITY.size() == 0)
|
259
|
+
return -1;
|
260
|
+
return GQUALITY[indv];
|
261
|
+
}
|
262
|
+
|
263
|
+
void vcf_entry::get_indv_GFILTER_vector(unsigned int indv, vector<string> &out) const
|
264
|
+
{
|
265
|
+
assert(parsed_FT[indv] == true);
|
266
|
+
if (GFILTER.size() > 0)
|
267
|
+
out = GFILTER[indv];
|
268
|
+
else
|
269
|
+
out.resize(0);
|
270
|
+
}
|
271
|
+
|
272
|
+
void vcf_entry::get_indv_GFILTER(unsigned int indv, string &out) const
|
273
|
+
{
|
274
|
+
assert(parsed_FT[indv] == true);
|
275
|
+
|
276
|
+
if ((GFILTER.size() > 0) && (GFILTER[indv].size()>0))
|
277
|
+
{
|
278
|
+
out="";
|
279
|
+
for (unsigned int ui=0; ui<GFILTER[indv].size(); ui++)
|
280
|
+
{
|
281
|
+
if (ui!=0)
|
282
|
+
out += ";";
|
283
|
+
out += GFILTER[indv][ui];
|
284
|
+
}
|
285
|
+
}
|
286
|
+
else
|
287
|
+
out = ".";
|
288
|
+
}
|
289
|
+
|
290
|
+
int vcf_entry::get_indv_ploidy(unsigned int indv) const
|
291
|
+
{
|
292
|
+
assert (parsed_GT[indv]==true);
|
293
|
+
return ploidy[indv];
|
294
|
+
}
|
295
|
+
|
296
|
+
void vcf_entry::read_indv_generic_entry(unsigned int indv, const string &FORMAT_id, string &out)
|
297
|
+
{
|
298
|
+
if (fully_parsed == false)
|
299
|
+
parse_full_entry(true);
|
300
|
+
|
301
|
+
if (parsed_FORMAT == false)
|
302
|
+
set_FORMAT(FORMAT_str);
|
303
|
+
|
304
|
+
out = ".";
|
305
|
+
|
306
|
+
if (FORMAT_to_idx.find(FORMAT_id) != FORMAT_to_idx.end())
|
307
|
+
{
|
308
|
+
unsigned int idx = FORMAT_to_idx[FORMAT_id];
|
309
|
+
static string tmpstr;
|
310
|
+
static istringstream ss;
|
311
|
+
ss.clear();
|
312
|
+
ss.str(GENOTYPE_str[indv]);
|
313
|
+
|
314
|
+
for (unsigned int ui=0; ui <= idx; ui++)
|
315
|
+
{
|
316
|
+
getline(ss, tmpstr, ':');
|
317
|
+
if (ui == idx)
|
318
|
+
{
|
319
|
+
out = tmpstr;
|
320
|
+
break;
|
321
|
+
}
|
322
|
+
if (!ss.good())
|
323
|
+
break;
|
324
|
+
}
|
325
|
+
}
|
326
|
+
}
|
327
|
+
|
328
|
+
bool vcf_entry::FORMAT_id_exists(const string &FORMAT_id)
|
329
|
+
{
|
330
|
+
assert(parsed_FORMAT == true);
|
331
|
+
if (FORMAT_to_idx.find(FORMAT_id) != FORMAT_to_idx.end())
|
332
|
+
return true;
|
333
|
+
return false;
|
334
|
+
}
|
335
|
+
|
336
|
+
unsigned int vcf_entry::get_N_alleles() const
|
337
|
+
{
|
338
|
+
assert(parsed_ALT == true);
|
339
|
+
return (ALT.size()+1);
|
340
|
+
}
|
341
|
+
|
342
|
+
unsigned int vcf_entry::get_N_chr(const vector<bool> &include_indv, const vector<bool> &include_genotype) const
|
343
|
+
{
|
344
|
+
unsigned int out=0;
|
345
|
+
|
346
|
+
for (unsigned int ui=0; ui<N_indv; ui++)
|
347
|
+
{
|
348
|
+
if ((include_indv[ui] == true) && (include_genotype[ui] == true))
|
349
|
+
{
|
350
|
+
assert(parsed_GT[ui] == true);
|
351
|
+
out += ploidy[ui];
|
352
|
+
}
|
353
|
+
}
|
354
|
+
return out;
|
355
|
+
}
|
356
|
+
|
357
|
+
|
358
|
+
// Return the frequency (counts) of each allele.
|
359
|
+
void vcf_entry::get_allele_counts(vector<int> &out, unsigned int &N_non_missing_chr_out, const vector<bool> &include_indv, const vector<bool> &include_genotype) const
|
360
|
+
{
|
361
|
+
pair<int,int> genotype;
|
362
|
+
vector<int> allele_counts(get_N_alleles(), 0);
|
363
|
+
N_non_missing_chr_out = 0;
|
364
|
+
|
365
|
+
for (unsigned int ui=0; ui<N_indv; ui++)
|
366
|
+
{
|
367
|
+
if ((include_indv[ui] == true) && (include_genotype[ui] == true))
|
368
|
+
{
|
369
|
+
assert(parsed_GT[ui] == true);
|
370
|
+
get_indv_GENOTYPE_ids(ui, genotype);
|
371
|
+
if (genotype.first != -1)
|
372
|
+
{
|
373
|
+
allele_counts[genotype.first]++;
|
374
|
+
N_non_missing_chr_out++;
|
375
|
+
}
|
376
|
+
if (genotype.second != -1)
|
377
|
+
{
|
378
|
+
allele_counts[genotype.second]++;
|
379
|
+
N_non_missing_chr_out++;
|
380
|
+
}
|
381
|
+
}
|
382
|
+
}
|
383
|
+
out = allele_counts;
|
384
|
+
}
|
385
|
+
|
386
|
+
// Return the counts of homozygote1, heterozygotes, and homozygote2
|
387
|
+
void vcf_entry::get_genotype_counts(const vector<bool> &include_indv, const vector<bool> &include_genotype, unsigned int &out_N_hom1, unsigned int &out_N_het, unsigned int &out_N_hom2) const
|
388
|
+
{
|
389
|
+
out_N_hom1 = 0; out_N_hom2 = 0; out_N_het = 0;
|
390
|
+
pair<int, int> genotype;
|
391
|
+
if (ALT.size() > 1)
|
392
|
+
error("Tried to return the genotype counts of a non-biallelic SNP", 99);
|
393
|
+
|
394
|
+
for (unsigned int ui=0; ui<N_indv; ui++)
|
395
|
+
{
|
396
|
+
if ((include_indv[ui] == true) && (include_genotype[ui] == true))
|
397
|
+
{
|
398
|
+
assert(parsed_GT[ui] == true);
|
399
|
+
get_indv_GENOTYPE_ids(ui, genotype);
|
400
|
+
if ((genotype.first != -1) && (genotype.second != -1))
|
401
|
+
{
|
402
|
+
if (genotype.first != genotype.second)
|
403
|
+
{
|
404
|
+
out_N_het++;
|
405
|
+
}
|
406
|
+
else if (genotype.first == 0)
|
407
|
+
{
|
408
|
+
out_N_hom1++;
|
409
|
+
}
|
410
|
+
else if (genotype.first == 1)
|
411
|
+
{
|
412
|
+
out_N_hom2++;
|
413
|
+
}
|
414
|
+
else
|
415
|
+
{
|
416
|
+
error("Unknown allele in genotype", 98);
|
417
|
+
}
|
418
|
+
}
|
419
|
+
}
|
420
|
+
}
|
421
|
+
}
|