ngs_server 0.1 → 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/ngs_server +72 -50
- data/ext/bamtools/extconf.rb +3 -3
- data/ext/vcftools/Makefile +28 -0
- data/ext/vcftools/README.txt +36 -0
- data/ext/vcftools/cpp/.svn/all-wcprops +125 -0
- data/ext/vcftools/cpp/.svn/dir-prop-base +6 -0
- data/ext/vcftools/cpp/.svn/entries +708 -0
- data/ext/vcftools/cpp/.svn/text-base/Makefile.svn-base +46 -0
- data/ext/vcftools/cpp/.svn/text-base/dgeev.cpp.svn-base +146 -0
- data/ext/vcftools/cpp/.svn/text-base/dgeev.h.svn-base +43 -0
- data/ext/vcftools/cpp/.svn/text-base/output_log.cpp.svn-base +79 -0
- data/ext/vcftools/cpp/.svn/text-base/output_log.h.svn-base +34 -0
- data/ext/vcftools/cpp/.svn/text-base/parameters.cpp.svn-base +535 -0
- data/ext/vcftools/cpp/.svn/text-base/parameters.h.svn-base +154 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry.cpp.svn-base +497 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry.h.svn-base +190 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry_getters.cpp.svn-base +421 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry_setters.cpp.svn-base +482 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file.cpp.svn-base +495 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file.h.svn-base +184 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_diff.cpp.svn-base +1282 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_filters.cpp.svn-base +1215 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_format_convert.cpp.svn-base +1138 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_index.cpp.svn-base +171 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_output.cpp.svn-base +3012 -0
- data/ext/vcftools/cpp/.svn/text-base/vcftools.cpp.svn-base +107 -0
- data/ext/vcftools/cpp/.svn/text-base/vcftools.h.svn-base +25 -0
- data/ext/vcftools/cpp/Makefile +46 -0
- data/ext/vcftools/cpp/dgeev.cpp +146 -0
- data/ext/vcftools/cpp/dgeev.h +43 -0
- data/ext/vcftools/cpp/output_log.cpp +79 -0
- data/ext/vcftools/cpp/output_log.h +34 -0
- data/ext/vcftools/cpp/parameters.cpp +535 -0
- data/ext/vcftools/cpp/parameters.h +154 -0
- data/ext/vcftools/cpp/vcf_entry.cpp +497 -0
- data/ext/vcftools/cpp/vcf_entry.h +190 -0
- data/ext/vcftools/cpp/vcf_entry_getters.cpp +421 -0
- data/ext/vcftools/cpp/vcf_entry_setters.cpp +482 -0
- data/ext/vcftools/cpp/vcf_file.cpp +495 -0
- data/ext/vcftools/cpp/vcf_file.h +184 -0
- data/ext/vcftools/cpp/vcf_file_diff.cpp +1282 -0
- data/ext/vcftools/cpp/vcf_file_filters.cpp +1215 -0
- data/ext/vcftools/cpp/vcf_file_format_convert.cpp +1138 -0
- data/ext/vcftools/cpp/vcf_file_index.cpp +171 -0
- data/ext/vcftools/cpp/vcf_file_output.cpp +3012 -0
- data/ext/vcftools/cpp/vcftools.cpp +107 -0
- data/ext/vcftools/cpp/vcftools.h +25 -0
- data/ext/vcftools/examples/.svn/all-wcprops +185 -0
- data/ext/vcftools/examples/.svn/dir-prop-base +6 -0
- data/ext/vcftools/examples/.svn/entries +1048 -0
- data/ext/vcftools/examples/.svn/prop-base/perl-api-1.pl.svn-base +5 -0
- data/ext/vcftools/examples/.svn/text-base/annotate-test.vcf.svn-base +37 -0
- data/ext/vcftools/examples/.svn/text-base/annotate.out.svn-base +23 -0
- data/ext/vcftools/examples/.svn/text-base/annotate.txt.svn-base +7 -0
- data/ext/vcftools/examples/.svn/text-base/annotate2.out.svn-base +52 -0
- data/ext/vcftools/examples/.svn/text-base/annotate3.out.svn-base +23 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-a-3.3.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-a.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-b-3.3.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-b.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test.out.svn-base +53 -0
- data/ext/vcftools/examples/.svn/text-base/concat-a.vcf.svn-base +21 -0
- data/ext/vcftools/examples/.svn/text-base/concat-b.vcf.svn-base +13 -0
- data/ext/vcftools/examples/.svn/text-base/concat-c.vcf.svn-base +19 -0
- data/ext/vcftools/examples/.svn/text-base/concat.out.svn-base +39 -0
- data/ext/vcftools/examples/.svn/text-base/invalid-4.0.vcf.svn-base +31 -0
- data/ext/vcftools/examples/.svn/text-base/isec-n2-test.vcf.out.svn-base +19 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test-a.vcf.svn-base +17 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test-b.vcf.svn-base +17 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test-c.vcf.svn-base +15 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test.vcf.out.svn-base +31 -0
- data/ext/vcftools/examples/.svn/text-base/perl-api-1.pl.svn-base +46 -0
- data/ext/vcftools/examples/.svn/text-base/query-test.out.svn-base +6 -0
- data/ext/vcftools/examples/.svn/text-base/shuffle-test.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/subset.SNPs.out.svn-base +10 -0
- data/ext/vcftools/examples/.svn/text-base/subset.indels.out.svn-base +18 -0
- data/ext/vcftools/examples/.svn/text-base/subset.vcf.svn-base +21 -0
- data/ext/vcftools/examples/.svn/text-base/valid-3.3.vcf.svn-base +30 -0
- data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.stats.svn-base +104 -0
- data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.svn-base +34 -0
- data/ext/vcftools/examples/.svn/text-base/valid-4.1.vcf.svn-base +37 -0
- data/ext/vcftools/examples/annotate-test.vcf +37 -0
- data/ext/vcftools/examples/annotate.out +23 -0
- data/ext/vcftools/examples/annotate.txt +7 -0
- data/ext/vcftools/examples/annotate2.out +52 -0
- data/ext/vcftools/examples/annotate3.out +23 -0
- data/ext/vcftools/examples/cmp-test-a-3.3.vcf +12 -0
- data/ext/vcftools/examples/cmp-test-a.vcf +12 -0
- data/ext/vcftools/examples/cmp-test-b-3.3.vcf +12 -0
- data/ext/vcftools/examples/cmp-test-b.vcf +12 -0
- data/ext/vcftools/examples/cmp-test.out +53 -0
- data/ext/vcftools/examples/concat-a.vcf +21 -0
- data/ext/vcftools/examples/concat-b.vcf +13 -0
- data/ext/vcftools/examples/concat-c.vcf +19 -0
- data/ext/vcftools/examples/concat.out +39 -0
- data/ext/vcftools/examples/invalid-4.0.vcf +31 -0
- data/ext/vcftools/examples/isec-n2-test.vcf.out +19 -0
- data/ext/vcftools/examples/merge-test-a.vcf +17 -0
- data/ext/vcftools/examples/merge-test-b.vcf +17 -0
- data/ext/vcftools/examples/merge-test-c.vcf +15 -0
- data/ext/vcftools/examples/merge-test.vcf.out +31 -0
- data/ext/vcftools/examples/perl-api-1.pl +46 -0
- data/ext/vcftools/examples/query-test.out +6 -0
- data/ext/vcftools/examples/shuffle-test.vcf +12 -0
- data/ext/vcftools/examples/subset.SNPs.out +10 -0
- data/ext/vcftools/examples/subset.indels.out +18 -0
- data/ext/vcftools/examples/subset.vcf +21 -0
- data/ext/vcftools/examples/valid-3.3.vcf +30 -0
- data/ext/vcftools/examples/valid-4.0.vcf +34 -0
- data/ext/vcftools/examples/valid-4.0.vcf.stats +104 -0
- data/ext/vcftools/examples/valid-4.1.vcf +37 -0
- data/ext/vcftools/extconf.rb +2 -0
- data/ext/vcftools/perl/.svn/all-wcprops +149 -0
- data/ext/vcftools/perl/.svn/entries +844 -0
- data/ext/vcftools/perl/.svn/prop-base/fill-aa.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/fill-an-ac.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/fill-ref-md5.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/tab-to-vcf.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/test.t.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-annotate.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-compare.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-concat.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-convert.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-fix-newlines.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-isec.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-merge.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-query.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-shuffle-cols.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-sort.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-stats.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-subset.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-to-tab.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-validator.svn-base +5 -0
- data/ext/vcftools/perl/.svn/text-base/ChangeLog.svn-base +84 -0
- data/ext/vcftools/perl/.svn/text-base/FaSlice.pm.svn-base +214 -0
- data/ext/vcftools/perl/.svn/text-base/Makefile.svn-base +12 -0
- data/ext/vcftools/perl/.svn/text-base/Vcf.pm.svn-base +2853 -0
- data/ext/vcftools/perl/.svn/text-base/VcfStats.pm.svn-base +681 -0
- data/ext/vcftools/perl/.svn/text-base/fill-aa.svn-base +103 -0
- data/ext/vcftools/perl/.svn/text-base/fill-an-ac.svn-base +56 -0
- data/ext/vcftools/perl/.svn/text-base/fill-ref-md5.svn-base +204 -0
- data/ext/vcftools/perl/.svn/text-base/tab-to-vcf.svn-base +92 -0
- data/ext/vcftools/perl/.svn/text-base/test.t.svn-base +376 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-annotate.svn-base +1099 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-compare.svn-base +1193 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-concat.svn-base +310 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-convert.svn-base +180 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-fix-newlines.svn-base +97 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-isec.svn-base +660 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-merge.svn-base +577 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-query.svn-base +272 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-shuffle-cols.svn-base +89 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-sort.svn-base +79 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-stats.svn-base +160 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-subset.svn-base +206 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-to-tab.svn-base +112 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-validator.svn-base +145 -0
- data/ext/vcftools/perl/ChangeLog +84 -0
- data/ext/vcftools/perl/FaSlice.pm +214 -0
- data/ext/vcftools/perl/Makefile +12 -0
- data/ext/vcftools/perl/Vcf.pm +2853 -0
- data/ext/vcftools/perl/VcfStats.pm +681 -0
- data/ext/vcftools/perl/fill-aa +103 -0
- data/ext/vcftools/perl/fill-an-ac +56 -0
- data/ext/vcftools/perl/fill-ref-md5 +204 -0
- data/ext/vcftools/perl/tab-to-vcf +92 -0
- data/ext/vcftools/perl/test.t +376 -0
- data/ext/vcftools/perl/vcf-annotate +1099 -0
- data/ext/vcftools/perl/vcf-compare +1193 -0
- data/ext/vcftools/perl/vcf-concat +310 -0
- data/ext/vcftools/perl/vcf-convert +180 -0
- data/ext/vcftools/perl/vcf-fix-newlines +97 -0
- data/ext/vcftools/perl/vcf-isec +660 -0
- data/ext/vcftools/perl/vcf-merge +577 -0
- data/ext/vcftools/perl/vcf-query +286 -0
- data/ext/vcftools/perl/vcf-shuffle-cols +89 -0
- data/ext/vcftools/perl/vcf-sort +79 -0
- data/ext/vcftools/perl/vcf-stats +160 -0
- data/ext/vcftools/perl/vcf-subset +206 -0
- data/ext/vcftools/perl/vcf-to-tab +112 -0
- data/ext/vcftools/perl/vcf-validator +145 -0
- data/ext/vcftools/website/.svn/all-wcprops +41 -0
- data/ext/vcftools/website/.svn/entries +238 -0
- data/ext/vcftools/website/.svn/prop-base/VCF-poster.pdf.svn-base +5 -0
- data/ext/vcftools/website/.svn/prop-base/favicon.ico.svn-base +5 -0
- data/ext/vcftools/website/.svn/prop-base/favicon.png.svn-base +5 -0
- data/ext/vcftools/website/.svn/text-base/Makefile.svn-base +6 -0
- data/ext/vcftools/website/.svn/text-base/README.svn-base +2 -0
- data/ext/vcftools/website/.svn/text-base/VCF-poster.pdf.svn-base +0 -0
- data/ext/vcftools/website/.svn/text-base/default.css.svn-base +250 -0
- data/ext/vcftools/website/.svn/text-base/favicon.ico.svn-base +0 -0
- data/ext/vcftools/website/.svn/text-base/favicon.png.svn-base +0 -0
- data/ext/vcftools/website/Makefile +6 -0
- data/ext/vcftools/website/README +2 -0
- data/ext/vcftools/website/VCF-poster.pdf +0 -0
- data/ext/vcftools/website/default.css +250 -0
- data/ext/vcftools/website/favicon.ico +0 -0
- data/ext/vcftools/website/favicon.png +0 -0
- data/ext/vcftools/website/img/.svn/all-wcprops +53 -0
- data/ext/vcftools/website/img/.svn/entries +300 -0
- data/ext/vcftools/website/img/.svn/prop-base/bg.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/bgcode.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/bgcontainer.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/bgul.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/header.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/li.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/quote.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/search.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/text-base/bg.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/bgcode.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/bgcontainer.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/bgul.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/header.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/li.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/quote.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/search.gif.svn-base +0 -0
- data/ext/vcftools/website/img/bg.gif +0 -0
- data/ext/vcftools/website/img/bgcode.gif +0 -0
- data/ext/vcftools/website/img/bgcontainer.gif +0 -0
- data/ext/vcftools/website/img/bgul.gif +0 -0
- data/ext/vcftools/website/img/header.gif +0 -0
- data/ext/vcftools/website/img/li.gif +0 -0
- data/ext/vcftools/website/img/quote.gif +0 -0
- data/ext/vcftools/website/img/search.gif +0 -0
- data/ext/vcftools/website/src/.svn/all-wcprops +53 -0
- data/ext/vcftools/website/src/.svn/entries +300 -0
- data/ext/vcftools/website/src/.svn/text-base/docs.inc.svn-base +202 -0
- data/ext/vcftools/website/src/.svn/text-base/index.inc.svn-base +52 -0
- data/ext/vcftools/website/src/.svn/text-base/index.php.svn-base +80 -0
- data/ext/vcftools/website/src/.svn/text-base/license.inc.svn-base +27 -0
- data/ext/vcftools/website/src/.svn/text-base/links.inc.svn-base +13 -0
- data/ext/vcftools/website/src/.svn/text-base/options.inc.svn-base +654 -0
- data/ext/vcftools/website/src/.svn/text-base/perl_module.inc.svn-base +249 -0
- data/ext/vcftools/website/src/.svn/text-base/specs.inc.svn-base +18 -0
- data/ext/vcftools/website/src/docs.inc +202 -0
- data/ext/vcftools/website/src/index.inc +52 -0
- data/ext/vcftools/website/src/index.php +80 -0
- data/ext/vcftools/website/src/license.inc +27 -0
- data/ext/vcftools/website/src/links.inc +13 -0
- data/ext/vcftools/website/src/options.inc +654 -0
- data/ext/vcftools/website/src/perl_module.inc +249 -0
- data/ext/vcftools/website/src/specs.inc +18 -0
- data/lib/config.ru +9 -0
- data/lib/ngs_server/add.rb +9 -0
- data/lib/ngs_server/version.rb +1 -1
- data/lib/ngs_server.rb +55 -3
- data/ngs_server.gemspec +5 -2
- metadata +296 -6
@@ -0,0 +1,103 @@
|
|
1
|
+
#!/usr/bin/env perl
|
2
|
+
#
|
3
|
+
# Notes:
|
4
|
+
# * The AA files can be downloaded from ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/pilot_data/technical/reference/ancestral_alignments
|
5
|
+
# * The program runs samtools, therefore the AA files must be gzipped (not b2zipped).
|
6
|
+
#
|
7
|
+
# support: pd3@sanger
|
8
|
+
|
9
|
+
use strict;
|
10
|
+
use warnings;
|
11
|
+
use Carp;
|
12
|
+
use Vcf;
|
13
|
+
use FindBin;
|
14
|
+
use lib "$FindBin::Bin";
|
15
|
+
use FaSlice;
|
16
|
+
|
17
|
+
my $opts = parse_params();
|
18
|
+
fill_aa($opts,$$opts{aa_file});
|
19
|
+
|
20
|
+
exit;
|
21
|
+
|
22
|
+
#--------------------------------
|
23
|
+
|
24
|
+
sub error
|
25
|
+
{
|
26
|
+
my (@msg) = @_;
|
27
|
+
if ( scalar @msg ) { confess @msg; }
|
28
|
+
die
|
29
|
+
"Usage: fill-aa [OPTIONS] < in.vcf >out.vcf\n",
|
30
|
+
"Options:\n",
|
31
|
+
" -a, --ancestral-allele <prefix> Prefix to ancestral allele chromosome files.\n",
|
32
|
+
" -h, -?, --help This help message.\n",
|
33
|
+
"Example:\n",
|
34
|
+
" (zcat file.vcf | grep ^#; zcat file.vcf | grep -v ^# | sort -k 1,1d -k 2,2n;) | fill-aa -a human_ancestor_ 2>test.err | gzip -c >out.vcf.gz \n",
|
35
|
+
"\n";
|
36
|
+
}
|
37
|
+
|
38
|
+
|
39
|
+
sub parse_params
|
40
|
+
{
|
41
|
+
my $opts = {};
|
42
|
+
while (my $arg=shift(@ARGV))
|
43
|
+
{
|
44
|
+
if ( $arg eq '-a' || $arg eq '--ancestral-allele' ) { $$opts{aa_file} = shift(@ARGV); next }
|
45
|
+
if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
|
46
|
+
error("Unknown parameter \"$arg\". Run -h for help.\n");
|
47
|
+
}
|
48
|
+
if ( !exists($$opts{aa_file}) ) { error("Missing the -a option.\n") }
|
49
|
+
return $opts;
|
50
|
+
}
|
51
|
+
|
52
|
+
|
53
|
+
sub fill_aa
|
54
|
+
{
|
55
|
+
my ($opts,$aa_fname) = @_;
|
56
|
+
|
57
|
+
my $n_unknown = 0;
|
58
|
+
my $n_filled = 0;
|
59
|
+
|
60
|
+
my $vcf = Vcf->new(fh=>\*STDIN);
|
61
|
+
$vcf->parse_header();
|
62
|
+
$vcf->add_header_line({key=>'INFO',ID=>'AA',Number=>1,Type=>'String',
|
63
|
+
Description=>'Ancestral Allele, ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/pilot_data/technical/reference/ancestral_alignments/README'});
|
64
|
+
print $vcf->format_header();
|
65
|
+
|
66
|
+
my $fa;
|
67
|
+
while (my $rec = $vcf->next_data_hash())
|
68
|
+
{
|
69
|
+
my $chr = $$rec{CHROM};
|
70
|
+
my $pos = $$rec{POS};
|
71
|
+
|
72
|
+
my $fname = $aa_fname;
|
73
|
+
if ( ! -e $fname )
|
74
|
+
{
|
75
|
+
if ( -e "$fname$chr.fa.gz" ) { $fname = "$fname$chr.fa.gz"; }
|
76
|
+
else { error(qq[Neither "$fname" nor "$fname$chr.fa.gz" exists.\n]); }
|
77
|
+
}
|
78
|
+
if ( !$fa or $$fa{file} ne $fname )
|
79
|
+
{
|
80
|
+
$fa = FaSlice->new(file=>$fname, size=>100_000);
|
81
|
+
}
|
82
|
+
|
83
|
+
my $aa = $fa->get_base($chr,$pos);
|
84
|
+
|
85
|
+
if ( $aa )
|
86
|
+
{
|
87
|
+
$$rec{$$vcf{columns}[7]}{AA} = $aa;
|
88
|
+
$n_filled++;
|
89
|
+
}
|
90
|
+
else
|
91
|
+
{
|
92
|
+
$$rec{$$vcf{columns}[7]}{AA} = '.';
|
93
|
+
$n_unknown++;
|
94
|
+
}
|
95
|
+
print $vcf->format_line($rec);
|
96
|
+
}
|
97
|
+
|
98
|
+
print STDERR
|
99
|
+
"No AAs .. $n_unknown\n",
|
100
|
+
"AAs filled .. $n_filled\n";
|
101
|
+
}
|
102
|
+
|
103
|
+
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#!/usr/bin/env perl
|
2
|
+
|
3
|
+
use strict;
|
4
|
+
use warnings;
|
5
|
+
use Carp;
|
6
|
+
use Vcf;
|
7
|
+
|
8
|
+
my $opts = parse_params();
|
9
|
+
fill_an_ac($$opts{file});
|
10
|
+
|
11
|
+
exit;
|
12
|
+
|
13
|
+
#--------------------------------
|
14
|
+
|
15
|
+
sub error
|
16
|
+
{
|
17
|
+
my (@msg) = @_;
|
18
|
+
if ( scalar @msg ) { confess @msg; }
|
19
|
+
die
|
20
|
+
"Usage: fill-an-ac [OPTIONS] < in.vcf >out.vcf\n",
|
21
|
+
"Options:\n",
|
22
|
+
" -h, -?, --help This help message.\n",
|
23
|
+
"\n";
|
24
|
+
}
|
25
|
+
|
26
|
+
|
27
|
+
sub parse_params
|
28
|
+
{
|
29
|
+
my $opts = {};
|
30
|
+
while (my $arg=shift(@ARGV))
|
31
|
+
{
|
32
|
+
if ( -e $arg ) { $$opts{file} = $arg; next }
|
33
|
+
if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
|
34
|
+
error("Unknown parameter \"$arg\". Run -h for help.\n");
|
35
|
+
}
|
36
|
+
return $opts;
|
37
|
+
}
|
38
|
+
|
39
|
+
|
40
|
+
sub fill_an_ac
|
41
|
+
{
|
42
|
+
my ($file) = @_;
|
43
|
+
|
44
|
+
my $vcf = $file ? Vcf->new(file=>$file) : Vcf->new(fh=>\*STDIN);
|
45
|
+
$vcf->parse_header();
|
46
|
+
$vcf->add_header_line({key=>'INFO',ID=>'AC',Number=>-1,Type=>'Integer',Description=>'Allele count in genotypes'});
|
47
|
+
$vcf->add_header_line({key=>'INFO',ID=>'AN',Number=>1,Type=>'Integer',Description=>'Total number of alleles in called genotypes'});
|
48
|
+
print $vcf->format_header();
|
49
|
+
$vcf->recalc_ac_an(2);
|
50
|
+
|
51
|
+
while (my $rec=$vcf->next_data_hash())
|
52
|
+
{
|
53
|
+
print $vcf->format_line($rec);
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
@@ -0,0 +1,204 @@
|
|
1
|
+
#!/usr/bin/env perl
|
2
|
+
|
3
|
+
use strict;
|
4
|
+
use warnings;
|
5
|
+
use Carp;
|
6
|
+
use IPC::Open2;
|
7
|
+
use Vcf;
|
8
|
+
|
9
|
+
my $opts = parse_params();
|
10
|
+
fill_ref_md5($opts);
|
11
|
+
|
12
|
+
exit;
|
13
|
+
|
14
|
+
#--------------------------------
|
15
|
+
|
16
|
+
sub error
|
17
|
+
{
|
18
|
+
my (@msg) = @_;
|
19
|
+
if ( scalar @msg ) { confess @msg; }
|
20
|
+
die
|
21
|
+
"About: The script computes MD5 sum of the reference sequence and inserts\n",
|
22
|
+
" 'reference' and 'contig' tags into header as recommended by VCFv4.1.\n",
|
23
|
+
" The VCF file must be compressed and tabix indexed, as it takes advantage\n",
|
24
|
+
" of the lightning fast tabix reheader functionality.\n",
|
25
|
+
"Usage: fill-ref-md5 [OPTIONS] in.vcf.gz out.vcf.gz\n",
|
26
|
+
"Options:\n",
|
27
|
+
" -d, --dictionary <file> Where to read/write computed MD5s. Opened in append mode, existing records are not touched.\n",
|
28
|
+
" -i, --info <AS:xx,SP:xx,TX:xx> Optional info on reference assembly (AS), species (SP), taxonomy (TX)\n",
|
29
|
+
" -r, --refseq <file> The reference sequence in fasta format indexed by samtools faidx\n",
|
30
|
+
" -h, -?, --help This help message.\n",
|
31
|
+
"Examples:\n",
|
32
|
+
" fill-ref-md5 -i AS:NCBIM37,SP:\"Mus\\ Musculus\" -r NCBIM37_um.fa -d NCBIM37_um.fa.dict in.vcf.gz out.vcf.gz\n",
|
33
|
+
"\n";
|
34
|
+
}
|
35
|
+
|
36
|
+
|
37
|
+
sub parse_params
|
38
|
+
{
|
39
|
+
my $opts = {};
|
40
|
+
while (my $arg=shift(@ARGV))
|
41
|
+
{
|
42
|
+
if ( $arg eq '-i' || $arg eq '--info' ) { $$opts{info}=shift(@ARGV); next; }
|
43
|
+
if ( $arg eq '-r' || $arg eq '--refseq' ) { $$opts{refseq}=shift(@ARGV); next; }
|
44
|
+
if ( $arg eq '-d' || $arg eq '--dictionary' ) { $$opts{dictionary}=shift(@ARGV); next; }
|
45
|
+
if ( -e $arg && !exists($$opts{file}) ) { $$opts{file} = $arg; next }
|
46
|
+
if ( exists($$opts{file}) && !exists($$opts{outfile}) ) { $$opts{outfile} = $arg; next }
|
47
|
+
if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
|
48
|
+
error("Unknown parameter \"$arg\" or non-existent file. Run -h for help.\n");
|
49
|
+
}
|
50
|
+
if ( !exists($$opts{refseq}) && !exists($$opts{dictionary}) ) { error("Expected one of -d or -r options\n"); }
|
51
|
+
if ( !exists($$opts{file}) ) { error("No input VCF file given.\n"); }
|
52
|
+
if ( !exists($$opts{outfile}) ) { error("No output VCF file given.\n"); }
|
53
|
+
return $opts;
|
54
|
+
}
|
55
|
+
|
56
|
+
sub read_dict
|
57
|
+
{
|
58
|
+
my ($dict) = @_;
|
59
|
+
my $out = {};
|
60
|
+
if ( !$dict or !-e $dict ) { return $out }
|
61
|
+
|
62
|
+
open(my $fh,'<',$dict) or error("$dict: $!");
|
63
|
+
my $line=<$fh>;
|
64
|
+
if ( $line ne "\@HD\tVN:1.0\tSO:unsorted\n" ) { error("Could not parse $dict: $line"); }
|
65
|
+
|
66
|
+
while (my $line=<$fh>)
|
67
|
+
{
|
68
|
+
chomp($line);
|
69
|
+
|
70
|
+
# @SQ SN:5 LN:152537259 UR:file:/lustre/scratch102/projects/mouse/ref/NCBIM37_um.fa M5:f90804fb8fe9cb06076d51a710fb4563
|
71
|
+
my @items = split(/\t/,$line);
|
72
|
+
if ( @items != 5 ) { error("Could not parse $dict: $line"); }
|
73
|
+
my $item = shift(@items);
|
74
|
+
if ( $item ne '@SQ' ) { next; }
|
75
|
+
my $rec = {};
|
76
|
+
for my $item (@items)
|
77
|
+
{
|
78
|
+
if ( !($item=~/^([^:]+):(.+)$/) ) { error("Could not parse $dict: [$item] [$line]"); }
|
79
|
+
$$rec{$1} = $2;
|
80
|
+
}
|
81
|
+
if ( !exists($$rec{SN}) ) { error("No SN in [$dict] [$line]?"); }
|
82
|
+
$$out{$$rec{SN}} = $rec;
|
83
|
+
}
|
84
|
+
close($fh);
|
85
|
+
|
86
|
+
return $out;
|
87
|
+
}
|
88
|
+
|
89
|
+
sub add_to_dictionary
|
90
|
+
{
|
91
|
+
my ($opts,$dict,$chr) = @_;
|
92
|
+
if ( !exists($$opts{refseq}) ) { error("The chromosome [$chr] not present in the dictionary and no reference sequence given.\n"); }
|
93
|
+
|
94
|
+
my($md5_in,$md5_out,$ok,$len);
|
95
|
+
eval { open2($md5_out,$md5_in,'md5sum'); $ok=1; };
|
96
|
+
if ( !$ok ) { error("md5sum: $!"); }
|
97
|
+
|
98
|
+
my $cmd = "samtools faidx $$opts{refseq} $chr";
|
99
|
+
open(my $refseq,"$cmd |") or error("$cmd: $!");
|
100
|
+
# get rid of the first ">$chr" line.
|
101
|
+
<$refseq>;
|
102
|
+
while (my $line=<$refseq>)
|
103
|
+
{
|
104
|
+
chomp($line);
|
105
|
+
print $md5_in $line;
|
106
|
+
$len += length($line);
|
107
|
+
}
|
108
|
+
close($refseq);
|
109
|
+
|
110
|
+
close($md5_in);
|
111
|
+
my @md5 = <$md5_out>;
|
112
|
+
close($md5_out);
|
113
|
+
$md5[0] =~ s/\s+.*$//;
|
114
|
+
chomp($md5[0]);
|
115
|
+
|
116
|
+
if ( !$len ) { error("The sequence [$chr] not present in $$opts{refseq}\n"); }
|
117
|
+
|
118
|
+
$$dict{$chr} = { dirty=>1, SN=>$chr, LN=>$len, UR=>'file://'.$$opts{refseq}, M5=>$md5[0] };
|
119
|
+
$$dict{dirty} = 1;
|
120
|
+
}
|
121
|
+
|
122
|
+
sub write_dictionary
|
123
|
+
{
|
124
|
+
my ($opts,$dict) = @_;
|
125
|
+
if ( !$$dict{dirty} or !exists($$opts{dictionary}) ) { return }
|
126
|
+
|
127
|
+
my $needs_header = !-e $$opts{dictionary} ? 1 : 0;
|
128
|
+
|
129
|
+
open(my $fh,'>>',$$opts{dictionary}) or error("$$opts{dictionary}: $!");
|
130
|
+
print $fh "\@HD\tVN:1.0\tSO:unsorted\n" unless !$needs_header;
|
131
|
+
for my $key (sort keys %$dict)
|
132
|
+
{
|
133
|
+
if ( ref($$dict{$key}) ne 'HASH' or !$$dict{$key}{dirty} ) { next; }
|
134
|
+
my $sn = $$dict{$key}{SN};
|
135
|
+
my $ln = $$dict{$key}{LN};
|
136
|
+
my $ur = $$dict{$key}{UR};
|
137
|
+
my $m5 = $$dict{$key}{M5};
|
138
|
+
print $fh "\@SQ\tSN:$sn\tLN:$ln\tUR:$ur\tM5:$m5\n";
|
139
|
+
}
|
140
|
+
close($fh);
|
141
|
+
}
|
142
|
+
|
143
|
+
sub write_header
|
144
|
+
{
|
145
|
+
my ($opts,$dict,$chroms) = @_;
|
146
|
+
|
147
|
+
my %info;
|
148
|
+
if ( exists($$opts{info}) )
|
149
|
+
{
|
150
|
+
$$opts{info} =~ s/AS:/assembly:/;
|
151
|
+
$$opts{info} =~ s/SP:/species:/;
|
152
|
+
$$opts{info} =~ s/TX:/taxonomy:/;
|
153
|
+
for my $item (split(/,/,$$opts{info}))
|
154
|
+
{
|
155
|
+
my ($key,$value) = split(/:/,$item);
|
156
|
+
if ( !defined $value ) { error("Could not parse the info: [$item] [$$opts{info}]"); }
|
157
|
+
$info{$key} = $value;
|
158
|
+
}
|
159
|
+
}
|
160
|
+
|
161
|
+
my $vcf = Vcf->new(file=>$$opts{file});
|
162
|
+
$vcf->parse_header();
|
163
|
+
|
164
|
+
my $uri = $$opts{refseq}=~m{^[^/:]+:} ? '' : 'file:';
|
165
|
+
$vcf->add_header_line({key=>'reference', value=>"$uri$$opts{refseq}"});
|
166
|
+
for my $chrom (@$chroms)
|
167
|
+
{
|
168
|
+
my %line =
|
169
|
+
(
|
170
|
+
key => 'contig',
|
171
|
+
ID => $$dict{$chrom}{SN},
|
172
|
+
length => $$dict{$chrom}{LN},
|
173
|
+
md5 => $$dict{$chrom}{M5},
|
174
|
+
%info
|
175
|
+
);
|
176
|
+
$vcf->add_header_line(\%line);
|
177
|
+
}
|
178
|
+
|
179
|
+
open(my $out,'>',"$$opts{outfile}.header") or error("$$opts{outfile}.header: $!");
|
180
|
+
print $out $vcf->format_header();
|
181
|
+
close($out);
|
182
|
+
}
|
183
|
+
|
184
|
+
sub fill_ref_md5
|
185
|
+
{
|
186
|
+
my ($opts) = @_;
|
187
|
+
|
188
|
+
# List chromosomes
|
189
|
+
my @chroms = `tabix -l $$opts{file}`;
|
190
|
+
if ( $? ) { error("The command failed: tabix -l $$opts{file}\n"); }
|
191
|
+
|
192
|
+
# Read dictionary
|
193
|
+
my $dict = read_dict($$opts{dictionary},\@chroms);
|
194
|
+
for my $chr (@chroms)
|
195
|
+
{
|
196
|
+
chomp($chr);
|
197
|
+
if ( !exists($$dict{$chr}) ) { add_to_dictionary($opts,$dict,$chr); }
|
198
|
+
}
|
199
|
+
write_dictionary($opts,$dict);
|
200
|
+
write_header($opts,$dict,\@chroms);
|
201
|
+
|
202
|
+
`tabix -r $$opts{outfile}.header $$opts{file} > $$opts{outfile}`;
|
203
|
+
}
|
204
|
+
|
@@ -0,0 +1,92 @@
|
|
1
|
+
#!/usr/bin/env perl
|
2
|
+
#
|
3
|
+
# Author: petr.danecek@sanger
|
4
|
+
#
|
5
|
+
|
6
|
+
use strict;
|
7
|
+
use warnings;
|
8
|
+
use Carp;
|
9
|
+
use Vcf;
|
10
|
+
use FaSlice;
|
11
|
+
|
12
|
+
my $opts = parse_params();
|
13
|
+
tab_to_vcf();
|
14
|
+
|
15
|
+
exit;
|
16
|
+
|
17
|
+
#--------------------------------
|
18
|
+
|
19
|
+
sub error
|
20
|
+
{
|
21
|
+
my (@msg) = @_;
|
22
|
+
if ( scalar @msg ) { confess @msg; }
|
23
|
+
die
|
24
|
+
"Usage: tab-to-vcf [OPTIONS]\n",
|
25
|
+
"Options:\n",
|
26
|
+
" -i, --id <string> The column ID.\n",
|
27
|
+
" -r, --ref <fasta-file> The reference sequence (optional).\n",
|
28
|
+
" -h, -?, --help This help message.\n",
|
29
|
+
"\n";
|
30
|
+
}
|
31
|
+
|
32
|
+
|
33
|
+
sub parse_params
|
34
|
+
{
|
35
|
+
my $opts = {};
|
36
|
+
while (my $arg=shift(@ARGV))
|
37
|
+
{
|
38
|
+
if ( $arg eq '-i' || $arg eq '--id' ) { $$opts{id} = shift(@ARGV); next }
|
39
|
+
if ( $arg eq '-r' || $arg eq '--ref' ) { $$opts{refseq} = shift(@ARGV); next }
|
40
|
+
if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
|
41
|
+
error("Unknown parameter \"$arg\". Run -h for help.\n");
|
42
|
+
}
|
43
|
+
if ( !exists($$opts{id}) ) { error("Missing the -i option.\n") }
|
44
|
+
return $opts;
|
45
|
+
}
|
46
|
+
|
47
|
+
|
48
|
+
sub tab_to_vcf
|
49
|
+
{
|
50
|
+
my ($data,$prefix) = @_;
|
51
|
+
|
52
|
+
my $refseq = $$opts{refseq} ? FaSlice->new(file=>$$opts{refseq},size=>1_000_000) : undef;
|
53
|
+
|
54
|
+
my $id = $$opts{id};
|
55
|
+
|
56
|
+
my $vcf_out = Vcf->new();
|
57
|
+
$vcf_out->add_columns($id);
|
58
|
+
$vcf_out->add_header_line({key=>'FORMAT',ID=>'GT',Number=>'1',Type=>'String',Description=>"Genotype"});
|
59
|
+
print $vcf_out->format_header();
|
60
|
+
|
61
|
+
while (my $line=<STDIN>)
|
62
|
+
{
|
63
|
+
if ( $line=~/^#/ ) { next; }
|
64
|
+
|
65
|
+
# 11 86881024 CT
|
66
|
+
my @items = split(/\t/,$line);
|
67
|
+
if ( $items[2] eq '*' ) { next; }
|
68
|
+
|
69
|
+
my $chr = $items[0];
|
70
|
+
my $pos = $items[1];
|
71
|
+
my $snp = $items[2];
|
72
|
+
|
73
|
+
if ( !($pos=~/^\d+$/) ) { error("Could not parse the line: $line"); }
|
74
|
+
if ( !($snp=~/^([ACGT])([ACGT])$/) ) { error("Could not parse the line: $line"); }
|
75
|
+
$snp = "$1/$2";
|
76
|
+
|
77
|
+
my %out;
|
78
|
+
$out{CHROM} = $chr;
|
79
|
+
$out{POS} = $pos;
|
80
|
+
$out{ID} = '.';
|
81
|
+
$out{ALT} = [];
|
82
|
+
$out{REF} = $refseq->get_base($chr,$pos);
|
83
|
+
$out{QUAL} = '.';
|
84
|
+
$out{FILTER} = ['.'];
|
85
|
+
$out{FORMAT} = ['GT'];
|
86
|
+
$out{gtypes}{$id}{GT} = $snp;
|
87
|
+
|
88
|
+
$vcf_out->format_genotype_strings(\%out);
|
89
|
+
print $vcf_out->format_line(\%out);
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|