ngs_server 0.1 → 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/ngs_server +72 -50
- data/ext/bamtools/extconf.rb +3 -3
- data/ext/vcftools/Makefile +28 -0
- data/ext/vcftools/README.txt +36 -0
- data/ext/vcftools/cpp/.svn/all-wcprops +125 -0
- data/ext/vcftools/cpp/.svn/dir-prop-base +6 -0
- data/ext/vcftools/cpp/.svn/entries +708 -0
- data/ext/vcftools/cpp/.svn/text-base/Makefile.svn-base +46 -0
- data/ext/vcftools/cpp/.svn/text-base/dgeev.cpp.svn-base +146 -0
- data/ext/vcftools/cpp/.svn/text-base/dgeev.h.svn-base +43 -0
- data/ext/vcftools/cpp/.svn/text-base/output_log.cpp.svn-base +79 -0
- data/ext/vcftools/cpp/.svn/text-base/output_log.h.svn-base +34 -0
- data/ext/vcftools/cpp/.svn/text-base/parameters.cpp.svn-base +535 -0
- data/ext/vcftools/cpp/.svn/text-base/parameters.h.svn-base +154 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry.cpp.svn-base +497 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry.h.svn-base +190 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry_getters.cpp.svn-base +421 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry_setters.cpp.svn-base +482 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file.cpp.svn-base +495 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file.h.svn-base +184 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_diff.cpp.svn-base +1282 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_filters.cpp.svn-base +1215 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_format_convert.cpp.svn-base +1138 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_index.cpp.svn-base +171 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_output.cpp.svn-base +3012 -0
- data/ext/vcftools/cpp/.svn/text-base/vcftools.cpp.svn-base +107 -0
- data/ext/vcftools/cpp/.svn/text-base/vcftools.h.svn-base +25 -0
- data/ext/vcftools/cpp/Makefile +46 -0
- data/ext/vcftools/cpp/dgeev.cpp +146 -0
- data/ext/vcftools/cpp/dgeev.h +43 -0
- data/ext/vcftools/cpp/output_log.cpp +79 -0
- data/ext/vcftools/cpp/output_log.h +34 -0
- data/ext/vcftools/cpp/parameters.cpp +535 -0
- data/ext/vcftools/cpp/parameters.h +154 -0
- data/ext/vcftools/cpp/vcf_entry.cpp +497 -0
- data/ext/vcftools/cpp/vcf_entry.h +190 -0
- data/ext/vcftools/cpp/vcf_entry_getters.cpp +421 -0
- data/ext/vcftools/cpp/vcf_entry_setters.cpp +482 -0
- data/ext/vcftools/cpp/vcf_file.cpp +495 -0
- data/ext/vcftools/cpp/vcf_file.h +184 -0
- data/ext/vcftools/cpp/vcf_file_diff.cpp +1282 -0
- data/ext/vcftools/cpp/vcf_file_filters.cpp +1215 -0
- data/ext/vcftools/cpp/vcf_file_format_convert.cpp +1138 -0
- data/ext/vcftools/cpp/vcf_file_index.cpp +171 -0
- data/ext/vcftools/cpp/vcf_file_output.cpp +3012 -0
- data/ext/vcftools/cpp/vcftools.cpp +107 -0
- data/ext/vcftools/cpp/vcftools.h +25 -0
- data/ext/vcftools/examples/.svn/all-wcprops +185 -0
- data/ext/vcftools/examples/.svn/dir-prop-base +6 -0
- data/ext/vcftools/examples/.svn/entries +1048 -0
- data/ext/vcftools/examples/.svn/prop-base/perl-api-1.pl.svn-base +5 -0
- data/ext/vcftools/examples/.svn/text-base/annotate-test.vcf.svn-base +37 -0
- data/ext/vcftools/examples/.svn/text-base/annotate.out.svn-base +23 -0
- data/ext/vcftools/examples/.svn/text-base/annotate.txt.svn-base +7 -0
- data/ext/vcftools/examples/.svn/text-base/annotate2.out.svn-base +52 -0
- data/ext/vcftools/examples/.svn/text-base/annotate3.out.svn-base +23 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-a-3.3.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-a.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-b-3.3.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-b.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test.out.svn-base +53 -0
- data/ext/vcftools/examples/.svn/text-base/concat-a.vcf.svn-base +21 -0
- data/ext/vcftools/examples/.svn/text-base/concat-b.vcf.svn-base +13 -0
- data/ext/vcftools/examples/.svn/text-base/concat-c.vcf.svn-base +19 -0
- data/ext/vcftools/examples/.svn/text-base/concat.out.svn-base +39 -0
- data/ext/vcftools/examples/.svn/text-base/invalid-4.0.vcf.svn-base +31 -0
- data/ext/vcftools/examples/.svn/text-base/isec-n2-test.vcf.out.svn-base +19 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test-a.vcf.svn-base +17 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test-b.vcf.svn-base +17 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test-c.vcf.svn-base +15 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test.vcf.out.svn-base +31 -0
- data/ext/vcftools/examples/.svn/text-base/perl-api-1.pl.svn-base +46 -0
- data/ext/vcftools/examples/.svn/text-base/query-test.out.svn-base +6 -0
- data/ext/vcftools/examples/.svn/text-base/shuffle-test.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/subset.SNPs.out.svn-base +10 -0
- data/ext/vcftools/examples/.svn/text-base/subset.indels.out.svn-base +18 -0
- data/ext/vcftools/examples/.svn/text-base/subset.vcf.svn-base +21 -0
- data/ext/vcftools/examples/.svn/text-base/valid-3.3.vcf.svn-base +30 -0
- data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.stats.svn-base +104 -0
- data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.svn-base +34 -0
- data/ext/vcftools/examples/.svn/text-base/valid-4.1.vcf.svn-base +37 -0
- data/ext/vcftools/examples/annotate-test.vcf +37 -0
- data/ext/vcftools/examples/annotate.out +23 -0
- data/ext/vcftools/examples/annotate.txt +7 -0
- data/ext/vcftools/examples/annotate2.out +52 -0
- data/ext/vcftools/examples/annotate3.out +23 -0
- data/ext/vcftools/examples/cmp-test-a-3.3.vcf +12 -0
- data/ext/vcftools/examples/cmp-test-a.vcf +12 -0
- data/ext/vcftools/examples/cmp-test-b-3.3.vcf +12 -0
- data/ext/vcftools/examples/cmp-test-b.vcf +12 -0
- data/ext/vcftools/examples/cmp-test.out +53 -0
- data/ext/vcftools/examples/concat-a.vcf +21 -0
- data/ext/vcftools/examples/concat-b.vcf +13 -0
- data/ext/vcftools/examples/concat-c.vcf +19 -0
- data/ext/vcftools/examples/concat.out +39 -0
- data/ext/vcftools/examples/invalid-4.0.vcf +31 -0
- data/ext/vcftools/examples/isec-n2-test.vcf.out +19 -0
- data/ext/vcftools/examples/merge-test-a.vcf +17 -0
- data/ext/vcftools/examples/merge-test-b.vcf +17 -0
- data/ext/vcftools/examples/merge-test-c.vcf +15 -0
- data/ext/vcftools/examples/merge-test.vcf.out +31 -0
- data/ext/vcftools/examples/perl-api-1.pl +46 -0
- data/ext/vcftools/examples/query-test.out +6 -0
- data/ext/vcftools/examples/shuffle-test.vcf +12 -0
- data/ext/vcftools/examples/subset.SNPs.out +10 -0
- data/ext/vcftools/examples/subset.indels.out +18 -0
- data/ext/vcftools/examples/subset.vcf +21 -0
- data/ext/vcftools/examples/valid-3.3.vcf +30 -0
- data/ext/vcftools/examples/valid-4.0.vcf +34 -0
- data/ext/vcftools/examples/valid-4.0.vcf.stats +104 -0
- data/ext/vcftools/examples/valid-4.1.vcf +37 -0
- data/ext/vcftools/extconf.rb +2 -0
- data/ext/vcftools/perl/.svn/all-wcprops +149 -0
- data/ext/vcftools/perl/.svn/entries +844 -0
- data/ext/vcftools/perl/.svn/prop-base/fill-aa.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/fill-an-ac.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/fill-ref-md5.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/tab-to-vcf.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/test.t.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-annotate.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-compare.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-concat.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-convert.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-fix-newlines.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-isec.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-merge.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-query.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-shuffle-cols.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-sort.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-stats.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-subset.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-to-tab.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-validator.svn-base +5 -0
- data/ext/vcftools/perl/.svn/text-base/ChangeLog.svn-base +84 -0
- data/ext/vcftools/perl/.svn/text-base/FaSlice.pm.svn-base +214 -0
- data/ext/vcftools/perl/.svn/text-base/Makefile.svn-base +12 -0
- data/ext/vcftools/perl/.svn/text-base/Vcf.pm.svn-base +2853 -0
- data/ext/vcftools/perl/.svn/text-base/VcfStats.pm.svn-base +681 -0
- data/ext/vcftools/perl/.svn/text-base/fill-aa.svn-base +103 -0
- data/ext/vcftools/perl/.svn/text-base/fill-an-ac.svn-base +56 -0
- data/ext/vcftools/perl/.svn/text-base/fill-ref-md5.svn-base +204 -0
- data/ext/vcftools/perl/.svn/text-base/tab-to-vcf.svn-base +92 -0
- data/ext/vcftools/perl/.svn/text-base/test.t.svn-base +376 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-annotate.svn-base +1099 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-compare.svn-base +1193 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-concat.svn-base +310 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-convert.svn-base +180 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-fix-newlines.svn-base +97 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-isec.svn-base +660 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-merge.svn-base +577 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-query.svn-base +272 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-shuffle-cols.svn-base +89 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-sort.svn-base +79 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-stats.svn-base +160 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-subset.svn-base +206 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-to-tab.svn-base +112 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-validator.svn-base +145 -0
- data/ext/vcftools/perl/ChangeLog +84 -0
- data/ext/vcftools/perl/FaSlice.pm +214 -0
- data/ext/vcftools/perl/Makefile +12 -0
- data/ext/vcftools/perl/Vcf.pm +2853 -0
- data/ext/vcftools/perl/VcfStats.pm +681 -0
- data/ext/vcftools/perl/fill-aa +103 -0
- data/ext/vcftools/perl/fill-an-ac +56 -0
- data/ext/vcftools/perl/fill-ref-md5 +204 -0
- data/ext/vcftools/perl/tab-to-vcf +92 -0
- data/ext/vcftools/perl/test.t +376 -0
- data/ext/vcftools/perl/vcf-annotate +1099 -0
- data/ext/vcftools/perl/vcf-compare +1193 -0
- data/ext/vcftools/perl/vcf-concat +310 -0
- data/ext/vcftools/perl/vcf-convert +180 -0
- data/ext/vcftools/perl/vcf-fix-newlines +97 -0
- data/ext/vcftools/perl/vcf-isec +660 -0
- data/ext/vcftools/perl/vcf-merge +577 -0
- data/ext/vcftools/perl/vcf-query +286 -0
- data/ext/vcftools/perl/vcf-shuffle-cols +89 -0
- data/ext/vcftools/perl/vcf-sort +79 -0
- data/ext/vcftools/perl/vcf-stats +160 -0
- data/ext/vcftools/perl/vcf-subset +206 -0
- data/ext/vcftools/perl/vcf-to-tab +112 -0
- data/ext/vcftools/perl/vcf-validator +145 -0
- data/ext/vcftools/website/.svn/all-wcprops +41 -0
- data/ext/vcftools/website/.svn/entries +238 -0
- data/ext/vcftools/website/.svn/prop-base/VCF-poster.pdf.svn-base +5 -0
- data/ext/vcftools/website/.svn/prop-base/favicon.ico.svn-base +5 -0
- data/ext/vcftools/website/.svn/prop-base/favicon.png.svn-base +5 -0
- data/ext/vcftools/website/.svn/text-base/Makefile.svn-base +6 -0
- data/ext/vcftools/website/.svn/text-base/README.svn-base +2 -0
- data/ext/vcftools/website/.svn/text-base/VCF-poster.pdf.svn-base +0 -0
- data/ext/vcftools/website/.svn/text-base/default.css.svn-base +250 -0
- data/ext/vcftools/website/.svn/text-base/favicon.ico.svn-base +0 -0
- data/ext/vcftools/website/.svn/text-base/favicon.png.svn-base +0 -0
- data/ext/vcftools/website/Makefile +6 -0
- data/ext/vcftools/website/README +2 -0
- data/ext/vcftools/website/VCF-poster.pdf +0 -0
- data/ext/vcftools/website/default.css +250 -0
- data/ext/vcftools/website/favicon.ico +0 -0
- data/ext/vcftools/website/favicon.png +0 -0
- data/ext/vcftools/website/img/.svn/all-wcprops +53 -0
- data/ext/vcftools/website/img/.svn/entries +300 -0
- data/ext/vcftools/website/img/.svn/prop-base/bg.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/bgcode.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/bgcontainer.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/bgul.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/header.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/li.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/quote.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/search.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/text-base/bg.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/bgcode.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/bgcontainer.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/bgul.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/header.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/li.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/quote.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/search.gif.svn-base +0 -0
- data/ext/vcftools/website/img/bg.gif +0 -0
- data/ext/vcftools/website/img/bgcode.gif +0 -0
- data/ext/vcftools/website/img/bgcontainer.gif +0 -0
- data/ext/vcftools/website/img/bgul.gif +0 -0
- data/ext/vcftools/website/img/header.gif +0 -0
- data/ext/vcftools/website/img/li.gif +0 -0
- data/ext/vcftools/website/img/quote.gif +0 -0
- data/ext/vcftools/website/img/search.gif +0 -0
- data/ext/vcftools/website/src/.svn/all-wcprops +53 -0
- data/ext/vcftools/website/src/.svn/entries +300 -0
- data/ext/vcftools/website/src/.svn/text-base/docs.inc.svn-base +202 -0
- data/ext/vcftools/website/src/.svn/text-base/index.inc.svn-base +52 -0
- data/ext/vcftools/website/src/.svn/text-base/index.php.svn-base +80 -0
- data/ext/vcftools/website/src/.svn/text-base/license.inc.svn-base +27 -0
- data/ext/vcftools/website/src/.svn/text-base/links.inc.svn-base +13 -0
- data/ext/vcftools/website/src/.svn/text-base/options.inc.svn-base +654 -0
- data/ext/vcftools/website/src/.svn/text-base/perl_module.inc.svn-base +249 -0
- data/ext/vcftools/website/src/.svn/text-base/specs.inc.svn-base +18 -0
- data/ext/vcftools/website/src/docs.inc +202 -0
- data/ext/vcftools/website/src/index.inc +52 -0
- data/ext/vcftools/website/src/index.php +80 -0
- data/ext/vcftools/website/src/license.inc +27 -0
- data/ext/vcftools/website/src/links.inc +13 -0
- data/ext/vcftools/website/src/options.inc +654 -0
- data/ext/vcftools/website/src/perl_module.inc +249 -0
- data/ext/vcftools/website/src/specs.inc +18 -0
- data/lib/config.ru +9 -0
- data/lib/ngs_server/add.rb +9 -0
- data/lib/ngs_server/version.rb +1 -1
- data/lib/ngs_server.rb +55 -3
- data/ngs_server.gemspec +5 -2
- metadata +296 -6
@@ -0,0 +1,310 @@
|
|
1
|
+
#!/usr/bin/env perl
|
2
|
+
#
|
3
|
+
# Author: petr.danecek@sanger
|
4
|
+
#
|
5
|
+
|
6
|
+
use strict;
|
7
|
+
use warnings;
|
8
|
+
use Carp;
|
9
|
+
use Vcf;
|
10
|
+
|
11
|
+
my $opts = parse_params();
|
12
|
+
if ( $$opts{check_columns} )
|
13
|
+
{
|
14
|
+
check_columns($opts);
|
15
|
+
}
|
16
|
+
elsif ( !exists($$opts{sort}) )
|
17
|
+
{
|
18
|
+
concat($opts);
|
19
|
+
}
|
20
|
+
else
|
21
|
+
{
|
22
|
+
concat_merge($opts);
|
23
|
+
}
|
24
|
+
|
25
|
+
exit;
|
26
|
+
|
27
|
+
#--------------------------------
|
28
|
+
|
29
|
+
sub error
|
30
|
+
{
|
31
|
+
my (@msg) = @_;
|
32
|
+
if ( scalar @msg )
|
33
|
+
{
|
34
|
+
croak @msg;
|
35
|
+
}
|
36
|
+
die
|
37
|
+
"About: Convenience tool for concatenating VCF files. In the basic mode it does not \n",
|
38
|
+
" do anything fancy except for a sanity check that all files have the same columns.\n",
|
39
|
+
" When run with the -s option, it will perform a partial merge sort, looking at\n",
|
40
|
+
" a limited number of open jobs simultaneously.\n",
|
41
|
+
"Usage: vcf-concat [OPTIONS] A.vcf.gz B.vcf.gz C.vcf.gz > out.vcf\n",
|
42
|
+
"Options:\n",
|
43
|
+
" -c, --check-columns Do not concatenate, only check if the columns agree.\n",
|
44
|
+
" -f, --files <file> Read the list of files from a file.\n",
|
45
|
+
" -s, --merge-sort <int> Allow small overlaps in N consecutive files.\n",
|
46
|
+
" -h, -?, --help This help message.\n",
|
47
|
+
"\n";
|
48
|
+
}
|
49
|
+
|
50
|
+
sub parse_params
|
51
|
+
{
|
52
|
+
my $opts = { files=>[] };
|
53
|
+
while (my $arg=shift(@ARGV))
|
54
|
+
{
|
55
|
+
if ( $arg eq '-s' || $arg eq '--merge-sort' ) { $$opts{sort}=shift(@ARGV); next; }
|
56
|
+
if ( $arg eq '-c' || $arg eq '--check-columns' ) { $$opts{check_columns}=1; next; }
|
57
|
+
if ( $arg eq '-f' || $arg eq '--files' )
|
58
|
+
{
|
59
|
+
my $files = shift(@ARGV);
|
60
|
+
open(my $fh,'<',$files) or error("$files: $!");
|
61
|
+
while (my $line=<$fh>)
|
62
|
+
{
|
63
|
+
chomp($line);
|
64
|
+
push @{$$opts{files}},$line;
|
65
|
+
}
|
66
|
+
close($fh);
|
67
|
+
next;
|
68
|
+
}
|
69
|
+
if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
|
70
|
+
if ( -e $arg ) { push @{$$opts{files}},$arg; next }
|
71
|
+
error("Unknown parameter \"$arg\". Run -h for help.\n");
|
72
|
+
}
|
73
|
+
if ( ! @{$$opts{files}} ) { error("No files to concat?\n") }
|
74
|
+
return $opts;
|
75
|
+
}
|
76
|
+
|
77
|
+
sub check_columns
|
78
|
+
{
|
79
|
+
my ($opts) = @_;
|
80
|
+
my @columns;
|
81
|
+
for my $file (@{$$opts{files}})
|
82
|
+
{
|
83
|
+
my $vcf = Vcf->new(file=>$file);
|
84
|
+
$vcf->parse_header();
|
85
|
+
|
86
|
+
if ( @columns )
|
87
|
+
{
|
88
|
+
if ( @columns != @{$$vcf{columns}} ) { warn("Different number of columns in [$file].\n"); }
|
89
|
+
for (my $i=0; $i<@columns; $i++)
|
90
|
+
{
|
91
|
+
if ( $$vcf{columns}[$i] ne $columns[$i] ) { warn("The column names do not agree in [$file].\n"); last; }
|
92
|
+
}
|
93
|
+
}
|
94
|
+
else
|
95
|
+
{
|
96
|
+
@columns = @{$$vcf{columns}};
|
97
|
+
}
|
98
|
+
$vcf->close();
|
99
|
+
}
|
100
|
+
}
|
101
|
+
|
102
|
+
sub concat
|
103
|
+
{
|
104
|
+
my ($opts) = @_;
|
105
|
+
my @columns;
|
106
|
+
for my $file (@{$$opts{files}})
|
107
|
+
{
|
108
|
+
my $vcf = Vcf->new(file=>$file);
|
109
|
+
$vcf->parse_header();
|
110
|
+
|
111
|
+
if ( @columns )
|
112
|
+
{
|
113
|
+
if ( @columns != @{$$vcf{columns}} ) { error("Different number of columns in [$file].\n"); }
|
114
|
+
for (my $i=0; $i<@columns; $i++)
|
115
|
+
{
|
116
|
+
if ( $$vcf{columns}[$i] ne $columns[$i] ) { error("The column names do not agree in [$file].\n"); }
|
117
|
+
}
|
118
|
+
}
|
119
|
+
else
|
120
|
+
{
|
121
|
+
@columns = @{$$vcf{columns}};
|
122
|
+
print $vcf->format_header();
|
123
|
+
}
|
124
|
+
while (my $line=$vcf->next_line())
|
125
|
+
{
|
126
|
+
print $line;
|
127
|
+
}
|
128
|
+
}
|
129
|
+
}
|
130
|
+
|
131
|
+
sub get_chromosomes
|
132
|
+
{
|
133
|
+
my ($files) = @_;
|
134
|
+
my @out;
|
135
|
+
my %has_chrm;
|
136
|
+
for my $file (@$files)
|
137
|
+
{
|
138
|
+
my $vcf = Vcf->new(file=>$file);
|
139
|
+
my $chrms = $vcf->get_chromosomes();
|
140
|
+
for my $chr (@$chrms)
|
141
|
+
{
|
142
|
+
if ( exists($has_chrm{$chr}) ) { next; }
|
143
|
+
$has_chrm{$chr} = 1;
|
144
|
+
push @out,$chr;
|
145
|
+
}
|
146
|
+
}
|
147
|
+
return \@out;
|
148
|
+
}
|
149
|
+
|
150
|
+
sub concat_merge
|
151
|
+
{
|
152
|
+
my ($opts) = @_;
|
153
|
+
|
154
|
+
my $header_printed = 0;
|
155
|
+
my $chroms = get_chromosomes($$opts{files});
|
156
|
+
for my $chr (@$chroms)
|
157
|
+
{
|
158
|
+
my $reader = Reader->new(files=>$$opts{files},nsort=>$$opts{sort},seq=>$chr,header_printed=>$header_printed);
|
159
|
+
$header_printed = 1;
|
160
|
+
$reader->open_next();
|
161
|
+
while (1)
|
162
|
+
{
|
163
|
+
my $line = $reader->next_line();
|
164
|
+
if ( !defined $line )
|
165
|
+
{
|
166
|
+
if ( !$reader->open_next() ) { last; }
|
167
|
+
next;
|
168
|
+
}
|
169
|
+
print $line;
|
170
|
+
}
|
171
|
+
}
|
172
|
+
if ( !$header_printed )
|
173
|
+
{
|
174
|
+
my $vcf = Vcf->new(file=>$$opts{files}[0]);
|
175
|
+
$vcf->parse_header();
|
176
|
+
print $vcf->format_header();
|
177
|
+
}
|
178
|
+
}
|
179
|
+
|
180
|
+
#---------------------------------
|
181
|
+
|
182
|
+
package Reader;
|
183
|
+
|
184
|
+
use strict;
|
185
|
+
use warnings;
|
186
|
+
use Carp;
|
187
|
+
use Vcf;
|
188
|
+
|
189
|
+
sub new
|
190
|
+
{
|
191
|
+
my ($class,@args) = @_;
|
192
|
+
my $self = @args ? {@args} : {};
|
193
|
+
bless $self, ref($class) || $class;
|
194
|
+
if ( !$$self{files} ) { $self->throw("Expected the files option.\n"); }
|
195
|
+
if ( !$$self{nsort} ) { $$self{nsort} = 2; }
|
196
|
+
if ( $$self{nsort}>@{$$self{files}} ) { $$self{nsort} = scalar @{$$self{files}}; }
|
197
|
+
$$self{idxs} = undef;
|
198
|
+
$$self{vcfs} = undef;
|
199
|
+
return $self;
|
200
|
+
}
|
201
|
+
|
202
|
+
sub throw
|
203
|
+
{
|
204
|
+
my ($self,@msg) = @_;
|
205
|
+
confess @msg;
|
206
|
+
}
|
207
|
+
|
208
|
+
sub print_header
|
209
|
+
{
|
210
|
+
my ($self,$vcf) = @_;
|
211
|
+
if ( $$self{header_printed} ) { return; }
|
212
|
+
print $vcf->format_header();
|
213
|
+
$$self{header_printed} = 1;
|
214
|
+
}
|
215
|
+
|
216
|
+
|
217
|
+
# Open VCF, parse header, check column names and when callled for the first time, output the VCF header.
|
218
|
+
sub open_vcf
|
219
|
+
{
|
220
|
+
my ($self,$file) = @_;
|
221
|
+
my $vcf = Vcf->new(file=>$file,region=>$$self{seq},print_header=>1);
|
222
|
+
$vcf->parse_header();
|
223
|
+
if ( !exists($$self{columns}) )
|
224
|
+
{
|
225
|
+
$$self{columns} = [ @{$$vcf{columns}} ];
|
226
|
+
}
|
227
|
+
else
|
228
|
+
{
|
229
|
+
if ( @{$$self{columns}} != @{$$vcf{columns}} ) { $self->throw("Different number of columns in [$file].\n"); }
|
230
|
+
for (my $i=0; $i<@{$$self{columns}}; $i++)
|
231
|
+
{
|
232
|
+
if ( $$vcf{columns}[$i] ne $$self{columns}[$i] ) { $self->throw("The column names do not agree in [$file].\n"); }
|
233
|
+
}
|
234
|
+
}
|
235
|
+
$self->print_header($vcf);
|
236
|
+
return $vcf;
|
237
|
+
}
|
238
|
+
|
239
|
+
sub open_next
|
240
|
+
{
|
241
|
+
my ($self) = @_;
|
242
|
+
|
243
|
+
if ( !defined $$self{idxs} )
|
244
|
+
{
|
245
|
+
for (my $i=0; $i<$$self{nsort}; $i++)
|
246
|
+
{
|
247
|
+
$$self{idxs}[$i] = $i;
|
248
|
+
}
|
249
|
+
}
|
250
|
+
else
|
251
|
+
{
|
252
|
+
my $prev = $$self{idxs}[-1];
|
253
|
+
|
254
|
+
shift(@{$$self{idxs}});
|
255
|
+
shift(@{$$self{vcfs}});
|
256
|
+
|
257
|
+
if ( $prev+1 < @{$$self{files}} )
|
258
|
+
{
|
259
|
+
# New file to be opened
|
260
|
+
push @{$$self{idxs}}, $prev+1;
|
261
|
+
}
|
262
|
+
}
|
263
|
+
for (my $i=0; $i<@{$$self{idxs}}; $i++)
|
264
|
+
{
|
265
|
+
if ( exists($$self{vcfs}[$i]) ) { next; }
|
266
|
+
my $idx = $$self{idxs}[$i];
|
267
|
+
$$self{vcfs}[$i] = $self->open_vcf($$self{files}[$idx]);
|
268
|
+
}
|
269
|
+
if ( !@{$$self{idxs}} ) { return 0; }
|
270
|
+
return 1;
|
271
|
+
}
|
272
|
+
|
273
|
+
sub next_line
|
274
|
+
{
|
275
|
+
my ($self) = @_;
|
276
|
+
|
277
|
+
my $min = $$self{vcfs}[0]->next_line();
|
278
|
+
if ( !defined $min ) { return undef; }
|
279
|
+
|
280
|
+
if ( !($min=~/^(\S+)\t(\d+)/) ) { $self->throw("Could not parse the line: $min\n"); }
|
281
|
+
my $min_chr = $1;
|
282
|
+
my $min_pos = $2;
|
283
|
+
my $min_vcf = $$self{vcfs}[0];
|
284
|
+
|
285
|
+
for (my $i=1; $i<@{$$self{vcfs}}; $i++)
|
286
|
+
{
|
287
|
+
if ( !exists($$self{vcfs}[$i]) ) { next; }
|
288
|
+
my $line = $$self{vcfs}[$i]->next_line();
|
289
|
+
if ( !defined $line ) { next; }
|
290
|
+
if ( !($line=~/^(\S+)\t(\d+)/) ) { $self->throw("Could not parse the line: $line\n"); }
|
291
|
+
my $chr = $1;
|
292
|
+
my $pos = $2;
|
293
|
+
|
294
|
+
if ( $chr ne $min_chr ) { $self->throw("FIXME: When run with the -s option, only one chromosome can be present.\n"); }
|
295
|
+
if ( $min_pos > $pos )
|
296
|
+
{
|
297
|
+
$min_pos = $pos;
|
298
|
+
$min_vcf->_unread_line($min);
|
299
|
+
$min_vcf = $$self{vcfs}[$i];
|
300
|
+
$min = $line;
|
301
|
+
}
|
302
|
+
else
|
303
|
+
{
|
304
|
+
$$self{vcfs}[$i]->_unread_line($line);
|
305
|
+
}
|
306
|
+
}
|
307
|
+
return $min;
|
308
|
+
}
|
309
|
+
|
310
|
+
|
@@ -0,0 +1,180 @@
|
|
1
|
+
#!/usr/bin/env perl
|
2
|
+
|
3
|
+
use strict;
|
4
|
+
use warnings;
|
5
|
+
use Carp;
|
6
|
+
use Vcf;
|
7
|
+
use FindBin;
|
8
|
+
use lib "$FindBin::Bin";
|
9
|
+
use FaSlice;
|
10
|
+
|
11
|
+
my $opts = parse_params();
|
12
|
+
convert_file($opts);
|
13
|
+
|
14
|
+
exit;
|
15
|
+
|
16
|
+
#--------------------------------
|
17
|
+
|
18
|
+
sub error
|
19
|
+
{
|
20
|
+
my (@msg) = @_;
|
21
|
+
if ( scalar @msg )
|
22
|
+
{
|
23
|
+
croak @msg;
|
24
|
+
}
|
25
|
+
die
|
26
|
+
"About: Convert between VCF versions, currently to VCFv4.0 only.\n",
|
27
|
+
"Usage: cat in.vcf | vcf-convert [OPTIONS] > out.vcf\n",
|
28
|
+
"Options:\n",
|
29
|
+
" -r, --refseq <file> The reference sequence in samtools faindexed fasta file. (Not required with SNPs only.)\n",
|
30
|
+
" -v, --version <string> 4.0\n",
|
31
|
+
" -h, -?, --help This help message.\n",
|
32
|
+
"\n";
|
33
|
+
}
|
34
|
+
|
35
|
+
|
36
|
+
sub parse_params
|
37
|
+
{
|
38
|
+
my $opts = { version=>'4.0' };
|
39
|
+
while (my $arg=shift(@ARGV))
|
40
|
+
{
|
41
|
+
if ( $arg eq '-r' || $arg eq '--refseq' ) { $$opts{refseq}=shift(@ARGV); next; }
|
42
|
+
if ( $arg eq '-v' || $arg eq '--version' ) { $$opts{version}=shift(@ARGV); next; }
|
43
|
+
if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
|
44
|
+
error("Unknown parameter \"$arg\". Run -h for help.\n");
|
45
|
+
}
|
46
|
+
return $opts;
|
47
|
+
}
|
48
|
+
|
49
|
+
|
50
|
+
sub convert_file
|
51
|
+
{
|
52
|
+
my ($opts) = @_;
|
53
|
+
|
54
|
+
if ( $$opts{version} ne '4.0' ) { error("Sorry, 4.0 is currently the only supported output version.\n"); }
|
55
|
+
|
56
|
+
# How to recognise a number
|
57
|
+
my $FLOAT_RE = qr/^\-?\d+\.?\d*(?:[eE][+-]\d+)?$/;
|
58
|
+
|
59
|
+
my $vcf_in = Vcf->new(fh=>\*STDIN);
|
60
|
+
my $vcf_out = Vcf->new(version=>'4.0');
|
61
|
+
|
62
|
+
# Convert the header
|
63
|
+
$vcf_in->parse_header();
|
64
|
+
for (my $i=1; $i<@{$$vcf_in{header_lines}}; $i++)
|
65
|
+
{
|
66
|
+
$vcf_out->add_header_line($$vcf_in{header_lines}[$i]);
|
67
|
+
}
|
68
|
+
$vcf_out->add_columns(@{$$vcf_in{columns}});
|
69
|
+
print $vcf_out->format_header();
|
70
|
+
|
71
|
+
# Convert each data line
|
72
|
+
my $fa;
|
73
|
+
while (my $x=$vcf_in->next_data_hash())
|
74
|
+
{
|
75
|
+
# Convert missing (default) FORMAT values
|
76
|
+
for my $gt (values %{$$x{gtypes}})
|
77
|
+
{
|
78
|
+
for my $field (@{$$x{FORMAT}})
|
79
|
+
{
|
80
|
+
# Skip the GT tag, so that ploidy information is not lost ("./." would become ".")
|
81
|
+
if ( $field eq 'GT' ) { next; }
|
82
|
+
if ( $field eq 'FT' && $$gt{$field} eq $$vcf_in{filter_passed} ) { $$gt{$field}=$$vcf_out{filter_passed}; }
|
83
|
+
if ( exists($$vcf_in{defaults}{$field}) && $$vcf_in{defaults}{$field} eq $$gt{$field} )
|
84
|
+
{
|
85
|
+
$$gt{$field} = $$vcf_out{defaults}{$field};
|
86
|
+
next;
|
87
|
+
}
|
88
|
+
if ( exists($$vcf_in{header}{FORMAT}{$field}{default}) && $$vcf_in{header}{FORMAT}{$field}{default} eq $$gt{$field} )
|
89
|
+
{
|
90
|
+
delete($$gt{$field});
|
91
|
+
next;
|
92
|
+
}
|
93
|
+
}
|
94
|
+
}
|
95
|
+
|
96
|
+
|
97
|
+
# Change missing QUAL: In case they are numbers, do numeric comparison, as -1.0 is sometimes used instead of -1
|
98
|
+
if ( $$x{QUAL} eq $$vcf_in{defaults}{QUAL}
|
99
|
+
or ($$x{QUAL}=~$FLOAT_RE && $$vcf_in{defaults}{QUAL}=~$FLOAT_RE && $$x{QUAL}==$$vcf_in{defaults}{QUAL}) )
|
100
|
+
{
|
101
|
+
$$x{QUAL} = $$vcf_out{defaults}{QUAL};
|
102
|
+
}
|
103
|
+
for (my $i=0; $i<@{$$x{FILTER}}; $i++)
|
104
|
+
{
|
105
|
+
if ( $$x{FILTER}[$i] eq $$vcf_in{filter_passed} ) { $$x{FILTER}[$i] = $$vcf_out{filter_passed}; }
|
106
|
+
}
|
107
|
+
|
108
|
+
# Parse the ALT column and see if there are indels
|
109
|
+
my $has_indel = 0;
|
110
|
+
for my $alt (@{$$x{ALT}})
|
111
|
+
{
|
112
|
+
my ($type,$len,$ht) = $vcf_in->event_type($x,$alt);
|
113
|
+
if ( $type eq 's' or $type eq 'r' ) { next; }
|
114
|
+
if ( $type ne 'i' ) { error("FIXME: expected indel at $$x{CHROM}:$$x{POS}\n"); }
|
115
|
+
|
116
|
+
$has_indel = 1;
|
117
|
+
}
|
118
|
+
|
119
|
+
# If there is an indel, new REF and ALT must be changed
|
120
|
+
if ( $has_indel )
|
121
|
+
{
|
122
|
+
my $map = {};
|
123
|
+
my $alt_to_mapref = {};
|
124
|
+
|
125
|
+
for my $alt (@{$$x{ALT}})
|
126
|
+
{
|
127
|
+
my ($type,$len,$ht) = $vcf_in->event_type($x,$alt);
|
128
|
+
if ( $type eq 's' or $type eq 'r' )
|
129
|
+
{
|
130
|
+
$$alt_to_mapref{$alt} = { ref=>$$x{REF}, alt=>$alt };
|
131
|
+
$$map{$$x{REF}}{$alt} = 1;
|
132
|
+
next;
|
133
|
+
}
|
134
|
+
|
135
|
+
if ( $type eq 'i' && $len>0 )
|
136
|
+
{
|
137
|
+
my $tmp = $$x{REF}.$ht;
|
138
|
+
$$alt_to_mapref{$alt} = { ref=>$$x{REF}, alt=>$tmp };
|
139
|
+
$$map{$$x{REF}}{$tmp} = 1;
|
140
|
+
next;
|
141
|
+
}
|
142
|
+
elsif ( $type eq 'i' && $len<0 )
|
143
|
+
{
|
144
|
+
if ( !$fa )
|
145
|
+
{
|
146
|
+
if ( !$$opts{refseq} ) { error("Indels present, missing the -r option.\n"); }
|
147
|
+
$fa = FaSlice->new(file=>$$opts{refseq},size=>1_000_000);
|
148
|
+
}
|
149
|
+
my $ref = $fa->get_slice($$x{CHROM},$$x{POS},$$x{POS}+abs($len));
|
150
|
+
my $first = substr($ref,0,1);
|
151
|
+
|
152
|
+
# Sanity check
|
153
|
+
if ( $$x{REF} ne $first )
|
154
|
+
{
|
155
|
+
error("Sanity check failed: the ref does not agree at $$x{CHROM}:$$x{POS} .. [$$x{REF}] in .fa, [$first] in .vcf\n");
|
156
|
+
}
|
157
|
+
|
158
|
+
$$alt_to_mapref{$alt} = { ref=>$ref, alt=>$$x{REF} };
|
159
|
+
$$map{$ref}{$$x{REF}} = 1;
|
160
|
+
next;
|
161
|
+
}
|
162
|
+
else
|
163
|
+
{
|
164
|
+
error("Uh, FIXME: $$x{CHROM}:$$x{POS} [$type] [$len] [$ht]\n");
|
165
|
+
}
|
166
|
+
}
|
167
|
+
|
168
|
+
$$x{REF} = $vcf_out->fill_ref_alt_mapping($map);
|
169
|
+
for (my $i=0; $i<@{$$x{ALT}}; $i++)
|
170
|
+
{
|
171
|
+
my $ori_ref = $$alt_to_mapref{$$x{ALT}[$i]}{ref};
|
172
|
+
my $ori_alt = $$alt_to_mapref{$$x{ALT}[$i]}{alt};
|
173
|
+
$$x{ALT}[$i] = $$map{$ori_ref}{$ori_alt};
|
174
|
+
}
|
175
|
+
}
|
176
|
+
|
177
|
+
print $vcf_out->format_line($x);
|
178
|
+
}
|
179
|
+
}
|
180
|
+
|
@@ -0,0 +1,97 @@
|
|
1
|
+
#!/usr/bin/env perl
|
2
|
+
#
|
3
|
+
# Authors: Adam Auton, Petr Danecek
|
4
|
+
# (C) 2011
|
5
|
+
|
6
|
+
use strict;
|
7
|
+
use warnings;
|
8
|
+
use Carp;
|
9
|
+
|
10
|
+
my $opts = parse_params();
|
11
|
+
fix_file($opts);
|
12
|
+
|
13
|
+
exit;
|
14
|
+
|
15
|
+
#--------------------------------
|
16
|
+
|
17
|
+
sub error
|
18
|
+
{
|
19
|
+
my (@msg) = @_;
|
20
|
+
if ( scalar @msg ) { confess @msg; }
|
21
|
+
die
|
22
|
+
"About: Reads in a VCF file with any (commonly used) newline representation and outputs with the\n",
|
23
|
+
" current system's newline representation.\n",
|
24
|
+
"Usage: vcf-fix-newlines [OPTIONS]\n",
|
25
|
+
"Options:\n",
|
26
|
+
" -i, --info Report if the file is consistent with the current platform based.\n",
|
27
|
+
" -h, -?, --help This help message.\n",
|
28
|
+
"Example:\n",
|
29
|
+
" vcf-fix-newlines -i file.vcf\n",
|
30
|
+
" vcf-fix-newlines file.vcf.gz > out.vcf\n",
|
31
|
+
" cat file.vcf | vcf-fix-newlines > out.vcf\n",
|
32
|
+
"\n";
|
33
|
+
}
|
34
|
+
|
35
|
+
sub parse_params
|
36
|
+
{
|
37
|
+
my $opts = {};
|
38
|
+
while (defined(my $arg=shift(@ARGV)))
|
39
|
+
{
|
40
|
+
if ( $arg eq '-i' || $arg eq '--info' ) { $$opts{info}=1; next }
|
41
|
+
if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
|
42
|
+
if ( !exists($$opts{vcf}) && -e $arg ) { $$opts{vcf}=$arg; next; }
|
43
|
+
error("Unknown parameter \"$arg\". Run -h for help.\n");
|
44
|
+
}
|
45
|
+
return $opts;
|
46
|
+
}
|
47
|
+
|
48
|
+
sub fix_file
|
49
|
+
{
|
50
|
+
my ($opts) = @_;
|
51
|
+
|
52
|
+
my $fh = \*STDIN;
|
53
|
+
if ( $$opts{vcf} )
|
54
|
+
{
|
55
|
+
if ( $$opts{vcf}=~/\.gz/i )
|
56
|
+
{
|
57
|
+
open($fh,"gunzip -c $$opts{vcf} |") or error("gunzip -c $$opts{vcf}: $!\n");
|
58
|
+
}
|
59
|
+
else
|
60
|
+
{
|
61
|
+
open($fh,'<',$$opts{vcf}) or error("$$opts{vcf}: $!\n");
|
62
|
+
}
|
63
|
+
}
|
64
|
+
|
65
|
+
# Read a small 1kb sample
|
66
|
+
binmode $fh or error("binmode: $!");
|
67
|
+
local $/ = \1024;
|
68
|
+
my $buf = <$fh>;
|
69
|
+
if ( !defined $buf ) { error("No data read.\n"); }
|
70
|
+
|
71
|
+
# Check the origin
|
72
|
+
my ($in,$nl);
|
73
|
+
if ( $buf=~/\015\012/ ) { $in = 'Windows'; $nl=$&; }
|
74
|
+
elsif ( $buf=~/\015/ && !($buf=~/\012/) ) { $in = 'Old Mac'; $nl=$&; }
|
75
|
+
elsif ( $buf=~/\012/ && !($buf=~/\015/) ) { $in = 'UNIX'; $nl=$&; }
|
76
|
+
else
|
77
|
+
{
|
78
|
+
error("FIXME: Unable to determine the system which produced the file.\n");
|
79
|
+
}
|
80
|
+
if ( defined $in ) { warn("The file was generated on $in compatible system.\n"); }
|
81
|
+
if ( $$opts{info} ) { close($fh); return; }
|
82
|
+
if ( $nl eq "\n" ) { warn("No conversion needed.\n"); return; }
|
83
|
+
|
84
|
+
# Read the file and do the conversion
|
85
|
+
local $/ = $nl;
|
86
|
+
$buf .= <$fh>;
|
87
|
+
$buf =~ s/$nl/\n/g;
|
88
|
+
print $buf;
|
89
|
+
|
90
|
+
while($buf = <$fh>)
|
91
|
+
{
|
92
|
+
$buf =~ s/$nl/\n/g;
|
93
|
+
print $buf;
|
94
|
+
}
|
95
|
+
close($fh);
|
96
|
+
}
|
97
|
+
|