ngs_server 0.1 → 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/ngs_server +72 -50
- data/ext/bamtools/extconf.rb +3 -3
- data/ext/vcftools/Makefile +28 -0
- data/ext/vcftools/README.txt +36 -0
- data/ext/vcftools/cpp/.svn/all-wcprops +125 -0
- data/ext/vcftools/cpp/.svn/dir-prop-base +6 -0
- data/ext/vcftools/cpp/.svn/entries +708 -0
- data/ext/vcftools/cpp/.svn/text-base/Makefile.svn-base +46 -0
- data/ext/vcftools/cpp/.svn/text-base/dgeev.cpp.svn-base +146 -0
- data/ext/vcftools/cpp/.svn/text-base/dgeev.h.svn-base +43 -0
- data/ext/vcftools/cpp/.svn/text-base/output_log.cpp.svn-base +79 -0
- data/ext/vcftools/cpp/.svn/text-base/output_log.h.svn-base +34 -0
- data/ext/vcftools/cpp/.svn/text-base/parameters.cpp.svn-base +535 -0
- data/ext/vcftools/cpp/.svn/text-base/parameters.h.svn-base +154 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry.cpp.svn-base +497 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry.h.svn-base +190 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry_getters.cpp.svn-base +421 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_entry_setters.cpp.svn-base +482 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file.cpp.svn-base +495 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file.h.svn-base +184 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_diff.cpp.svn-base +1282 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_filters.cpp.svn-base +1215 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_format_convert.cpp.svn-base +1138 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_index.cpp.svn-base +171 -0
- data/ext/vcftools/cpp/.svn/text-base/vcf_file_output.cpp.svn-base +3012 -0
- data/ext/vcftools/cpp/.svn/text-base/vcftools.cpp.svn-base +107 -0
- data/ext/vcftools/cpp/.svn/text-base/vcftools.h.svn-base +25 -0
- data/ext/vcftools/cpp/Makefile +46 -0
- data/ext/vcftools/cpp/dgeev.cpp +146 -0
- data/ext/vcftools/cpp/dgeev.h +43 -0
- data/ext/vcftools/cpp/output_log.cpp +79 -0
- data/ext/vcftools/cpp/output_log.h +34 -0
- data/ext/vcftools/cpp/parameters.cpp +535 -0
- data/ext/vcftools/cpp/parameters.h +154 -0
- data/ext/vcftools/cpp/vcf_entry.cpp +497 -0
- data/ext/vcftools/cpp/vcf_entry.h +190 -0
- data/ext/vcftools/cpp/vcf_entry_getters.cpp +421 -0
- data/ext/vcftools/cpp/vcf_entry_setters.cpp +482 -0
- data/ext/vcftools/cpp/vcf_file.cpp +495 -0
- data/ext/vcftools/cpp/vcf_file.h +184 -0
- data/ext/vcftools/cpp/vcf_file_diff.cpp +1282 -0
- data/ext/vcftools/cpp/vcf_file_filters.cpp +1215 -0
- data/ext/vcftools/cpp/vcf_file_format_convert.cpp +1138 -0
- data/ext/vcftools/cpp/vcf_file_index.cpp +171 -0
- data/ext/vcftools/cpp/vcf_file_output.cpp +3012 -0
- data/ext/vcftools/cpp/vcftools.cpp +107 -0
- data/ext/vcftools/cpp/vcftools.h +25 -0
- data/ext/vcftools/examples/.svn/all-wcprops +185 -0
- data/ext/vcftools/examples/.svn/dir-prop-base +6 -0
- data/ext/vcftools/examples/.svn/entries +1048 -0
- data/ext/vcftools/examples/.svn/prop-base/perl-api-1.pl.svn-base +5 -0
- data/ext/vcftools/examples/.svn/text-base/annotate-test.vcf.svn-base +37 -0
- data/ext/vcftools/examples/.svn/text-base/annotate.out.svn-base +23 -0
- data/ext/vcftools/examples/.svn/text-base/annotate.txt.svn-base +7 -0
- data/ext/vcftools/examples/.svn/text-base/annotate2.out.svn-base +52 -0
- data/ext/vcftools/examples/.svn/text-base/annotate3.out.svn-base +23 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-a-3.3.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-a.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-b-3.3.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test-b.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/cmp-test.out.svn-base +53 -0
- data/ext/vcftools/examples/.svn/text-base/concat-a.vcf.svn-base +21 -0
- data/ext/vcftools/examples/.svn/text-base/concat-b.vcf.svn-base +13 -0
- data/ext/vcftools/examples/.svn/text-base/concat-c.vcf.svn-base +19 -0
- data/ext/vcftools/examples/.svn/text-base/concat.out.svn-base +39 -0
- data/ext/vcftools/examples/.svn/text-base/invalid-4.0.vcf.svn-base +31 -0
- data/ext/vcftools/examples/.svn/text-base/isec-n2-test.vcf.out.svn-base +19 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test-a.vcf.svn-base +17 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test-b.vcf.svn-base +17 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test-c.vcf.svn-base +15 -0
- data/ext/vcftools/examples/.svn/text-base/merge-test.vcf.out.svn-base +31 -0
- data/ext/vcftools/examples/.svn/text-base/perl-api-1.pl.svn-base +46 -0
- data/ext/vcftools/examples/.svn/text-base/query-test.out.svn-base +6 -0
- data/ext/vcftools/examples/.svn/text-base/shuffle-test.vcf.svn-base +12 -0
- data/ext/vcftools/examples/.svn/text-base/subset.SNPs.out.svn-base +10 -0
- data/ext/vcftools/examples/.svn/text-base/subset.indels.out.svn-base +18 -0
- data/ext/vcftools/examples/.svn/text-base/subset.vcf.svn-base +21 -0
- data/ext/vcftools/examples/.svn/text-base/valid-3.3.vcf.svn-base +30 -0
- data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.stats.svn-base +104 -0
- data/ext/vcftools/examples/.svn/text-base/valid-4.0.vcf.svn-base +34 -0
- data/ext/vcftools/examples/.svn/text-base/valid-4.1.vcf.svn-base +37 -0
- data/ext/vcftools/examples/annotate-test.vcf +37 -0
- data/ext/vcftools/examples/annotate.out +23 -0
- data/ext/vcftools/examples/annotate.txt +7 -0
- data/ext/vcftools/examples/annotate2.out +52 -0
- data/ext/vcftools/examples/annotate3.out +23 -0
- data/ext/vcftools/examples/cmp-test-a-3.3.vcf +12 -0
- data/ext/vcftools/examples/cmp-test-a.vcf +12 -0
- data/ext/vcftools/examples/cmp-test-b-3.3.vcf +12 -0
- data/ext/vcftools/examples/cmp-test-b.vcf +12 -0
- data/ext/vcftools/examples/cmp-test.out +53 -0
- data/ext/vcftools/examples/concat-a.vcf +21 -0
- data/ext/vcftools/examples/concat-b.vcf +13 -0
- data/ext/vcftools/examples/concat-c.vcf +19 -0
- data/ext/vcftools/examples/concat.out +39 -0
- data/ext/vcftools/examples/invalid-4.0.vcf +31 -0
- data/ext/vcftools/examples/isec-n2-test.vcf.out +19 -0
- data/ext/vcftools/examples/merge-test-a.vcf +17 -0
- data/ext/vcftools/examples/merge-test-b.vcf +17 -0
- data/ext/vcftools/examples/merge-test-c.vcf +15 -0
- data/ext/vcftools/examples/merge-test.vcf.out +31 -0
- data/ext/vcftools/examples/perl-api-1.pl +46 -0
- data/ext/vcftools/examples/query-test.out +6 -0
- data/ext/vcftools/examples/shuffle-test.vcf +12 -0
- data/ext/vcftools/examples/subset.SNPs.out +10 -0
- data/ext/vcftools/examples/subset.indels.out +18 -0
- data/ext/vcftools/examples/subset.vcf +21 -0
- data/ext/vcftools/examples/valid-3.3.vcf +30 -0
- data/ext/vcftools/examples/valid-4.0.vcf +34 -0
- data/ext/vcftools/examples/valid-4.0.vcf.stats +104 -0
- data/ext/vcftools/examples/valid-4.1.vcf +37 -0
- data/ext/vcftools/extconf.rb +2 -0
- data/ext/vcftools/perl/.svn/all-wcprops +149 -0
- data/ext/vcftools/perl/.svn/entries +844 -0
- data/ext/vcftools/perl/.svn/prop-base/fill-aa.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/fill-an-ac.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/fill-ref-md5.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/tab-to-vcf.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/test.t.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-annotate.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-compare.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-concat.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-convert.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-fix-newlines.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-isec.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-merge.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-query.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-shuffle-cols.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-sort.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-stats.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-subset.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-to-tab.svn-base +5 -0
- data/ext/vcftools/perl/.svn/prop-base/vcf-validator.svn-base +5 -0
- data/ext/vcftools/perl/.svn/text-base/ChangeLog.svn-base +84 -0
- data/ext/vcftools/perl/.svn/text-base/FaSlice.pm.svn-base +214 -0
- data/ext/vcftools/perl/.svn/text-base/Makefile.svn-base +12 -0
- data/ext/vcftools/perl/.svn/text-base/Vcf.pm.svn-base +2853 -0
- data/ext/vcftools/perl/.svn/text-base/VcfStats.pm.svn-base +681 -0
- data/ext/vcftools/perl/.svn/text-base/fill-aa.svn-base +103 -0
- data/ext/vcftools/perl/.svn/text-base/fill-an-ac.svn-base +56 -0
- data/ext/vcftools/perl/.svn/text-base/fill-ref-md5.svn-base +204 -0
- data/ext/vcftools/perl/.svn/text-base/tab-to-vcf.svn-base +92 -0
- data/ext/vcftools/perl/.svn/text-base/test.t.svn-base +376 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-annotate.svn-base +1099 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-compare.svn-base +1193 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-concat.svn-base +310 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-convert.svn-base +180 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-fix-newlines.svn-base +97 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-isec.svn-base +660 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-merge.svn-base +577 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-query.svn-base +272 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-shuffle-cols.svn-base +89 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-sort.svn-base +79 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-stats.svn-base +160 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-subset.svn-base +206 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-to-tab.svn-base +112 -0
- data/ext/vcftools/perl/.svn/text-base/vcf-validator.svn-base +145 -0
- data/ext/vcftools/perl/ChangeLog +84 -0
- data/ext/vcftools/perl/FaSlice.pm +214 -0
- data/ext/vcftools/perl/Makefile +12 -0
- data/ext/vcftools/perl/Vcf.pm +2853 -0
- data/ext/vcftools/perl/VcfStats.pm +681 -0
- data/ext/vcftools/perl/fill-aa +103 -0
- data/ext/vcftools/perl/fill-an-ac +56 -0
- data/ext/vcftools/perl/fill-ref-md5 +204 -0
- data/ext/vcftools/perl/tab-to-vcf +92 -0
- data/ext/vcftools/perl/test.t +376 -0
- data/ext/vcftools/perl/vcf-annotate +1099 -0
- data/ext/vcftools/perl/vcf-compare +1193 -0
- data/ext/vcftools/perl/vcf-concat +310 -0
- data/ext/vcftools/perl/vcf-convert +180 -0
- data/ext/vcftools/perl/vcf-fix-newlines +97 -0
- data/ext/vcftools/perl/vcf-isec +660 -0
- data/ext/vcftools/perl/vcf-merge +577 -0
- data/ext/vcftools/perl/vcf-query +286 -0
- data/ext/vcftools/perl/vcf-shuffle-cols +89 -0
- data/ext/vcftools/perl/vcf-sort +79 -0
- data/ext/vcftools/perl/vcf-stats +160 -0
- data/ext/vcftools/perl/vcf-subset +206 -0
- data/ext/vcftools/perl/vcf-to-tab +112 -0
- data/ext/vcftools/perl/vcf-validator +145 -0
- data/ext/vcftools/website/.svn/all-wcprops +41 -0
- data/ext/vcftools/website/.svn/entries +238 -0
- data/ext/vcftools/website/.svn/prop-base/VCF-poster.pdf.svn-base +5 -0
- data/ext/vcftools/website/.svn/prop-base/favicon.ico.svn-base +5 -0
- data/ext/vcftools/website/.svn/prop-base/favicon.png.svn-base +5 -0
- data/ext/vcftools/website/.svn/text-base/Makefile.svn-base +6 -0
- data/ext/vcftools/website/.svn/text-base/README.svn-base +2 -0
- data/ext/vcftools/website/.svn/text-base/VCF-poster.pdf.svn-base +0 -0
- data/ext/vcftools/website/.svn/text-base/default.css.svn-base +250 -0
- data/ext/vcftools/website/.svn/text-base/favicon.ico.svn-base +0 -0
- data/ext/vcftools/website/.svn/text-base/favicon.png.svn-base +0 -0
- data/ext/vcftools/website/Makefile +6 -0
- data/ext/vcftools/website/README +2 -0
- data/ext/vcftools/website/VCF-poster.pdf +0 -0
- data/ext/vcftools/website/default.css +250 -0
- data/ext/vcftools/website/favicon.ico +0 -0
- data/ext/vcftools/website/favicon.png +0 -0
- data/ext/vcftools/website/img/.svn/all-wcprops +53 -0
- data/ext/vcftools/website/img/.svn/entries +300 -0
- data/ext/vcftools/website/img/.svn/prop-base/bg.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/bgcode.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/bgcontainer.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/bgul.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/header.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/li.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/quote.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/prop-base/search.gif.svn-base +5 -0
- data/ext/vcftools/website/img/.svn/text-base/bg.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/bgcode.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/bgcontainer.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/bgul.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/header.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/li.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/quote.gif.svn-base +0 -0
- data/ext/vcftools/website/img/.svn/text-base/search.gif.svn-base +0 -0
- data/ext/vcftools/website/img/bg.gif +0 -0
- data/ext/vcftools/website/img/bgcode.gif +0 -0
- data/ext/vcftools/website/img/bgcontainer.gif +0 -0
- data/ext/vcftools/website/img/bgul.gif +0 -0
- data/ext/vcftools/website/img/header.gif +0 -0
- data/ext/vcftools/website/img/li.gif +0 -0
- data/ext/vcftools/website/img/quote.gif +0 -0
- data/ext/vcftools/website/img/search.gif +0 -0
- data/ext/vcftools/website/src/.svn/all-wcprops +53 -0
- data/ext/vcftools/website/src/.svn/entries +300 -0
- data/ext/vcftools/website/src/.svn/text-base/docs.inc.svn-base +202 -0
- data/ext/vcftools/website/src/.svn/text-base/index.inc.svn-base +52 -0
- data/ext/vcftools/website/src/.svn/text-base/index.php.svn-base +80 -0
- data/ext/vcftools/website/src/.svn/text-base/license.inc.svn-base +27 -0
- data/ext/vcftools/website/src/.svn/text-base/links.inc.svn-base +13 -0
- data/ext/vcftools/website/src/.svn/text-base/options.inc.svn-base +654 -0
- data/ext/vcftools/website/src/.svn/text-base/perl_module.inc.svn-base +249 -0
- data/ext/vcftools/website/src/.svn/text-base/specs.inc.svn-base +18 -0
- data/ext/vcftools/website/src/docs.inc +202 -0
- data/ext/vcftools/website/src/index.inc +52 -0
- data/ext/vcftools/website/src/index.php +80 -0
- data/ext/vcftools/website/src/license.inc +27 -0
- data/ext/vcftools/website/src/links.inc +13 -0
- data/ext/vcftools/website/src/options.inc +654 -0
- data/ext/vcftools/website/src/perl_module.inc +249 -0
- data/ext/vcftools/website/src/specs.inc +18 -0
- data/lib/config.ru +9 -0
- data/lib/ngs_server/add.rb +9 -0
- data/lib/ngs_server/version.rb +1 -1
- data/lib/ngs_server.rb +55 -3
- data/ngs_server.gemspec +5 -2
- metadata +296 -6
|
@@ -0,0 +1,577 @@
|
|
|
1
|
+
#!/usr/bin/env perl
|
|
2
|
+
#
|
|
3
|
+
# Author: petr.danecek@sanger
|
|
4
|
+
#
|
|
5
|
+
|
|
6
|
+
use strict;
|
|
7
|
+
use warnings;
|
|
8
|
+
use Carp;
|
|
9
|
+
use Vcf;
|
|
10
|
+
|
|
11
|
+
my $opts = parse_params();
|
|
12
|
+
merge_vcf_files($opts);
|
|
13
|
+
|
|
14
|
+
exit;
|
|
15
|
+
|
|
16
|
+
#--------------------------------
|
|
17
|
+
|
|
18
|
+
sub error
|
|
19
|
+
{
|
|
20
|
+
my (@msg) = @_;
|
|
21
|
+
if ( scalar @msg ) { croak join('',@msg); }
|
|
22
|
+
die
|
|
23
|
+
"About: Merge the bgzipped and tabix indexed VCF files. (E.g. bgzip file.vcf; tabix -p vcf file.vcf.gz)\n",
|
|
24
|
+
"Usage: merge-vcf [OPTIONS] file1.vcf file2.vcf.gz ... > out.vcf\n",
|
|
25
|
+
"Options:\n",
|
|
26
|
+
" -c, --chromosomes <list|file> Same as -r, left for backward compatibility. Please do not use as it will be dropped in the future.\n",
|
|
27
|
+
" -d, --remove-duplicates If there should be two consecutive rows with the same chr:pos, print only the first one.\n",
|
|
28
|
+
" -H, --vcf-header <file> Use the VCF header\n",
|
|
29
|
+
" -h, -?, --help This help message.\n",
|
|
30
|
+
" -r, --regions <list|file> Do only the given regions (comma-separated list or one region per line in a file).\n",
|
|
31
|
+
" -s, --silent Try to be a bit more silent, no warnings about duplicate lines.\n",
|
|
32
|
+
"\n";
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
sub parse_params
|
|
37
|
+
{
|
|
38
|
+
my $opts = { args=>[$0, @ARGV] };
|
|
39
|
+
while (my $arg=shift(@ARGV))
|
|
40
|
+
{
|
|
41
|
+
if ( $arg eq '-d' || $arg eq '--remove-duplicates' ) { $$opts{rm_dups}=1; next; }
|
|
42
|
+
if ( $arg eq '-s' || $arg eq '--silent' ) { $$opts{silent_dups}=1; next; }
|
|
43
|
+
if ( $arg eq '-H' || $arg eq '--vcf-header' ) { $$opts{vcf_header}=shift(@ARGV); next; }
|
|
44
|
+
if ( $arg eq '-c' || $arg eq '--chromosomes' ) { $$opts{regions_list}=shift(@ARGV); next; }
|
|
45
|
+
if ( $arg eq '-r' || $arg eq '--regions' ) { $$opts{regions_list}=shift(@ARGV); next; }
|
|
46
|
+
if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
|
|
47
|
+
if ( -e $arg ) { push @{$$opts{files}},$arg; next; }
|
|
48
|
+
error("Unknown parameter or non-existent file \"$arg\". Run -? for help.\n");
|
|
49
|
+
}
|
|
50
|
+
if ( !exists($$opts{files}) ) { error() }
|
|
51
|
+
return $opts;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# Returns the common prefix of the files.
|
|
56
|
+
sub common_prefix
|
|
57
|
+
{
|
|
58
|
+
my ($files) = @_;
|
|
59
|
+
my @paths;
|
|
60
|
+
my $len = -1;
|
|
61
|
+
for my $file (@$files)
|
|
62
|
+
{
|
|
63
|
+
my @path = split(m{/+},$file);
|
|
64
|
+
if ( $len<0 || $len>scalar @path ) { $len=scalar @path; }
|
|
65
|
+
push @paths, \@path;
|
|
66
|
+
}
|
|
67
|
+
my @common;
|
|
68
|
+
for (my $i=0; $i<$len; $i++)
|
|
69
|
+
{
|
|
70
|
+
my $identical=1;
|
|
71
|
+
for (my $ifile=1; $ifile<scalar @paths; $ifile++)
|
|
72
|
+
{
|
|
73
|
+
if ( $paths[$ifile]->[$i] ne $paths[0]->[$i] ) { $identical=0; last; }
|
|
74
|
+
}
|
|
75
|
+
if ( !$identical ) { last; }
|
|
76
|
+
push @common, $paths[0]->[$i];
|
|
77
|
+
}
|
|
78
|
+
return join('/+',@common);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
sub read_region_list
|
|
83
|
+
{
|
|
84
|
+
my ($opts) = @_;
|
|
85
|
+
|
|
86
|
+
my @regions = ();
|
|
87
|
+
if ( exists($$opts{regions_list}) )
|
|
88
|
+
{
|
|
89
|
+
if ( -e $$opts{regions_list} )
|
|
90
|
+
{
|
|
91
|
+
open(my $rgs,'<',$$opts{regions_list}) or error("$$opts{regions_list}: $!");
|
|
92
|
+
while (my $line=<$rgs>)
|
|
93
|
+
{
|
|
94
|
+
chomp($line);
|
|
95
|
+
push @regions, $line;
|
|
96
|
+
}
|
|
97
|
+
close($rgs);
|
|
98
|
+
}
|
|
99
|
+
else
|
|
100
|
+
{
|
|
101
|
+
@regions = split(/,/,$$opts{regions_list});
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
return (@regions);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
sub check_AGtags_definition
|
|
108
|
+
{
|
|
109
|
+
my ($vcf) = @_;
|
|
110
|
+
if ( $$vcf{version} >= 4.1 ) { return; }
|
|
111
|
+
|
|
112
|
+
# Whatever is the value set to, the user takes the responsibility for the merging strategy used
|
|
113
|
+
if ( exists($ENV{DONT_FIX_VCF40_AG_TAGS}) ) { return; }
|
|
114
|
+
|
|
115
|
+
my @tags;
|
|
116
|
+
if ( exists($$vcf{header}{INFO}{PL}) && $$vcf{header}{INFO}{PL}{Number} ne '.' ) { push @tags, 'PL'; }
|
|
117
|
+
if ( exists($$vcf{header}{INFO}{GL}) && $$vcf{header}{INFO}{GL}{Number} ne '.' ) { push @tags, 'GL'; }
|
|
118
|
+
if ( exists($$vcf{header}{INFO}{AC}) && $$vcf{header}{INFO}{AC}{Number} ne '.' ) { push @tags, 'AC'; }
|
|
119
|
+
if ( exists($$vcf{header}{INFO}{AF}) && $$vcf{header}{INFO}{AF}{Number} ne '.' ) { push @tags, 'AF'; }
|
|
120
|
+
|
|
121
|
+
if ( !@tags ) { return; }
|
|
122
|
+
|
|
123
|
+
$ENV{DONT_FIX_VCF40_AG_TAGS} = 1;
|
|
124
|
+
my $tags = join(',',@tags);
|
|
125
|
+
print STDERR
|
|
126
|
+
"Warning: The $tags tag(s) will not be merged correctly for multiallelic sites.\n",
|
|
127
|
+
" To be handled correctly, please redefine with Number=. or set the environment\n",
|
|
128
|
+
" variable DONT_FIX_VCF40_AG_TAGS=0.\n";
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
sub init_cols
|
|
132
|
+
{
|
|
133
|
+
my ($opts,$vcf_out) = @_;
|
|
134
|
+
|
|
135
|
+
my $prefix;
|
|
136
|
+
my @regions = read_region_list($opts);
|
|
137
|
+
my @vcfs;
|
|
138
|
+
my @cols;
|
|
139
|
+
my %has_chrom;
|
|
140
|
+
my %col_names;
|
|
141
|
+
my $icol = 9;
|
|
142
|
+
my $ncols_total = 0;
|
|
143
|
+
|
|
144
|
+
if ( !$$opts{has_col_names} ) { $prefix = common_prefix($$opts{files}); }
|
|
145
|
+
|
|
146
|
+
# Go through all files and read header, obtain list of chromosomes. The file names will be used for columns, unless
|
|
147
|
+
# they were read from the header.
|
|
148
|
+
for my $file (@{$$opts{files}})
|
|
149
|
+
{
|
|
150
|
+
my $vcf = Vcf->new(file=>$file);
|
|
151
|
+
$vcf->parse_header();
|
|
152
|
+
check_AGtags_definition($vcf);
|
|
153
|
+
$vcf->close();
|
|
154
|
+
push @vcfs, $vcf;
|
|
155
|
+
|
|
156
|
+
# Precompute the weighting factor for the QUAL column
|
|
157
|
+
my $ncols = scalar @{$$vcf{columns}} - 9;
|
|
158
|
+
if ( $ncols<=0 ) { $ncols = 1; }
|
|
159
|
+
$$vcf{qual_weight} = 1.0*$ncols;
|
|
160
|
+
$ncols_total += $ncols;
|
|
161
|
+
|
|
162
|
+
# Update the list of known chromosomes
|
|
163
|
+
if ( !exists($$opts{regions_list}) )
|
|
164
|
+
{
|
|
165
|
+
my $chrms = $vcf->get_chromosomes();
|
|
166
|
+
for my $chr (@$chrms)
|
|
167
|
+
{
|
|
168
|
+
if ( exists($has_chrom{$chr}) ) { next; }
|
|
169
|
+
$has_chrom{$chr} = 1;
|
|
170
|
+
push @regions, $chr;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
my $col_prefix = '';
|
|
175
|
+
if ( !$$opts{has_col_names} )
|
|
176
|
+
{
|
|
177
|
+
# Make the column names nice - strip common prefix and the suffix .vcf.gz
|
|
178
|
+
$col_prefix = $file;
|
|
179
|
+
$col_prefix =~ s{^/*$prefix/*}{};
|
|
180
|
+
$col_prefix =~ s/\.gz$//i;
|
|
181
|
+
$col_prefix =~ s/\.vcf$//i;
|
|
182
|
+
$col_prefix .= '_';
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
if ( !exists($$vcf{columns}) ) { error("No header present? $file\n"); }
|
|
186
|
+
|
|
187
|
+
# Create good names for the columns in the merged vcf file
|
|
188
|
+
my @vcf_cols = @{$$vcf{columns}};
|
|
189
|
+
$$vcf{__col_names} = [];
|
|
190
|
+
for my $col (@vcf_cols[9..$#vcf_cols])
|
|
191
|
+
{
|
|
192
|
+
my $col_name = $col;
|
|
193
|
+
if ( $$opts{has_col_names} )
|
|
194
|
+
{
|
|
195
|
+
if ( $icol >= @{$$vcf_out{columns}} ) { error("Fewer columns in the header than in the VCF files total.\n"); }
|
|
196
|
+
$col_name = $$vcf_out{columns}[$icol];
|
|
197
|
+
$icol++;
|
|
198
|
+
|
|
199
|
+
if ( exists($col_names{$col_name}) ) { error("The column names not unique in the header: $col_name\n"); }
|
|
200
|
+
}
|
|
201
|
+
else
|
|
202
|
+
{
|
|
203
|
+
if ( exists($col_names{$col_name}) ) { $col_name = $col_prefix.$col; }
|
|
204
|
+
if ( exists($col_names{$col_name}) ) { warn("FIXME: the column name [$col_name] not unique.\n"); }
|
|
205
|
+
}
|
|
206
|
+
warn("Using column name '$col_name' for $file:$col\n");
|
|
207
|
+
$col_names{$col_name} = 1;
|
|
208
|
+
|
|
209
|
+
push @cols, $col_name;
|
|
210
|
+
push @{$$vcf{__col_names}}, $col_name;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if ( $$opts{has_col_names} && $icol!=@{$$vcf_out{columns}} ) { error("More columns in the header than in the VCF files total.\n"); }
|
|
215
|
+
|
|
216
|
+
# QUAL weighting
|
|
217
|
+
for my $vcf (@vcfs)
|
|
218
|
+
{
|
|
219
|
+
$$vcf{qual_weight} /= $ncols_total;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
$$opts{vcfs} = \@vcfs;
|
|
223
|
+
$$opts{cols} = \@cols;
|
|
224
|
+
$$opts{regions} = \@regions;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
sub merge_vcf_files
|
|
229
|
+
{
|
|
230
|
+
my ($opts) = @_;
|
|
231
|
+
|
|
232
|
+
# Create output VCF
|
|
233
|
+
my $vcf_out;
|
|
234
|
+
if ( $$opts{vcf_header} )
|
|
235
|
+
{
|
|
236
|
+
$vcf_out = Vcf->new(file=>$$opts{vcf_header});
|
|
237
|
+
$vcf_out->parse_header();
|
|
238
|
+
if ( $$vcf_out{columns} && @{$$vcf_out{columns}} ) { $$opts{has_col_names}=1; }
|
|
239
|
+
}
|
|
240
|
+
else
|
|
241
|
+
{
|
|
242
|
+
$vcf_out = Vcf->new();
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
init_cols($opts,$vcf_out);
|
|
246
|
+
my @regions = @{$$opts{regions}};
|
|
247
|
+
my @cols = @{$$opts{cols}};
|
|
248
|
+
my @vcfs = @{$$opts{vcfs}};
|
|
249
|
+
|
|
250
|
+
# Get the header of the output VCF ready
|
|
251
|
+
$vcf_out->add_columns(@cols);
|
|
252
|
+
if ( !$$vcf_out{has_header} )
|
|
253
|
+
{
|
|
254
|
+
for my $vcf (@vcfs)
|
|
255
|
+
{
|
|
256
|
+
# To get the missig fields filled by the default values
|
|
257
|
+
for my $hline (@{$$vcf{header_lines}})
|
|
258
|
+
{
|
|
259
|
+
if ( $$hline{key} eq 'fileformat' ) { next; }
|
|
260
|
+
$vcf_out->add_header_line($hline,silent=>1);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
# List source files
|
|
266
|
+
my $source;
|
|
267
|
+
for (my $i=0; $i<@vcfs; $i++)
|
|
268
|
+
{
|
|
269
|
+
if ( $i ) { $source .= ','; }
|
|
270
|
+
$source .= "$i:$vcfs[$i]{file}";
|
|
271
|
+
}
|
|
272
|
+
$vcf_out->add_header_line({key=>'source',value=>join(' ',@{$$opts{args}})},append=>'timestamp');
|
|
273
|
+
$vcf_out->add_header_line({key=>'sourceFiles',value=>$source},append=>'timestamp');
|
|
274
|
+
$vcf_out->add_header_line({key=>'INFO',ID=>'SF',Number=>-1,Type=>'String',Description=>'Source File (index to sourceFiles, f when filtered)'});
|
|
275
|
+
|
|
276
|
+
my $have_samples = @{$$vcf_out{columns}}>9 ? 1 : 0;
|
|
277
|
+
|
|
278
|
+
$vcf_out->recalc_ac_an($have_samples ? 2 : 0);
|
|
279
|
+
$vcf_out->add_header_line({key=>'INFO',ID=>'AC',Number=>-1,Type=>'Integer',Description=>'Allele count in genotypes'});
|
|
280
|
+
$vcf_out->add_header_line({key=>'INFO',ID=>'AN',Number=>1,Type=>'Integer',Description=>'Total number of alleles in called genotypes'});
|
|
281
|
+
print $vcf_out->format_header();
|
|
282
|
+
|
|
283
|
+
# Go through all VCF files simultaneously and output each line, one region at a time.
|
|
284
|
+
for my $region (@regions)
|
|
285
|
+
{
|
|
286
|
+
# Open files
|
|
287
|
+
for my $vcf (@vcfs)
|
|
288
|
+
{
|
|
289
|
+
delete($$vcf{last_line});
|
|
290
|
+
$vcf->open(region=>$region,parse_header=>1);
|
|
291
|
+
advance_position($vcf);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
while (1)
|
|
295
|
+
{
|
|
296
|
+
my $pos = get_min_position(\@vcfs);
|
|
297
|
+
if ( !defined $pos ) { last; }
|
|
298
|
+
|
|
299
|
+
my %out;
|
|
300
|
+
$out{POS} = $pos;
|
|
301
|
+
$out{ID} = '.';
|
|
302
|
+
$out{ALT} = [];
|
|
303
|
+
$out{FORMAT} = [];
|
|
304
|
+
my %format;
|
|
305
|
+
my %info;
|
|
306
|
+
my @src_files;
|
|
307
|
+
my %filters;
|
|
308
|
+
my (@quals,@qual_weights,$qual_weights_sum,%ac,$an);
|
|
309
|
+
|
|
310
|
+
my %ref_alt_map = ();
|
|
311
|
+
# Find out the REFs and ALTs: in VCFv4.0, the REFs can differ and ALTs must be converted
|
|
312
|
+
for my $vcf (@vcfs)
|
|
313
|
+
{
|
|
314
|
+
my $line = $$vcf{last_line};
|
|
315
|
+
if ( !$line or $pos ne $$line{POS} ) { next; }
|
|
316
|
+
if ( !exists($out{CHROM}) ) { $out{CHROM} = $$line{CHROM}; }
|
|
317
|
+
my $ref = $$line{REF};
|
|
318
|
+
for my $alt (@{$$line{ALT}}) { $ref_alt_map{$ref}{$alt}=$alt; }
|
|
319
|
+
}
|
|
320
|
+
# Do the REF,ALT conversion only when necessary
|
|
321
|
+
my $new_ref;
|
|
322
|
+
if ( scalar keys %ref_alt_map > 1 )
|
|
323
|
+
{
|
|
324
|
+
$new_ref = $vcf_out->fill_ref_alt_mapping(\%ref_alt_map);
|
|
325
|
+
}
|
|
326
|
+
if ( !$have_samples )
|
|
327
|
+
{
|
|
328
|
+
# Do not loose information from the ALT column when samples are not present
|
|
329
|
+
my %alts;
|
|
330
|
+
for my $vcf (@vcfs)
|
|
331
|
+
{
|
|
332
|
+
my $line = $$vcf{last_line};
|
|
333
|
+
if ( !$line or $pos ne $$line{POS} ) { next; }
|
|
334
|
+
my $ref = $$line{REF};
|
|
335
|
+
for my $alt (@{$$line{ALT}}) { $alts{$ref_alt_map{$ref}{$alt}}=1; }
|
|
336
|
+
$out{ALT} = [ keys %alts ];
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
for (my $ivcf=0; $ivcf<@vcfs; $ivcf++)
|
|
340
|
+
{
|
|
341
|
+
my $vcf = $vcfs[$ivcf];
|
|
342
|
+
my $line = $$vcf{last_line};
|
|
343
|
+
|
|
344
|
+
# If this file does not have a record for this position, then for all its columns output undef gtype
|
|
345
|
+
if ( !$line or $pos ne $$line{POS} )
|
|
346
|
+
{
|
|
347
|
+
for (my $i=0; $i<@{$$vcf{__col_names}}; $i++)
|
|
348
|
+
{
|
|
349
|
+
my $name = $$vcf{__col_names}->[$i];
|
|
350
|
+
$out{gtypes}{$name}{GT} = $$vcf_out{defaults}{GT};
|
|
351
|
+
}
|
|
352
|
+
next;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
# Check if the site has been filtered
|
|
356
|
+
if ( scalar @{$$line{FILTER}}>1 or ($$line{FILTER}[0] ne $$vcf{filter_passed} && $$line{FILTER}[0] ne $$vcf{defaults}{default}) )
|
|
357
|
+
{
|
|
358
|
+
push @src_files,$ivcf.'f';
|
|
359
|
+
}
|
|
360
|
+
else
|
|
361
|
+
{
|
|
362
|
+
push @src_files,$ivcf;
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
# Collect information for the FILTER field
|
|
366
|
+
for my $flt (@{$$line{FILTER}})
|
|
367
|
+
{
|
|
368
|
+
if ( $flt eq $$vcf{filter_passed} )
|
|
369
|
+
{
|
|
370
|
+
$filters{$$vcf_out{filter_passed}} = 1;
|
|
371
|
+
}
|
|
372
|
+
elsif ( $flt ne $$vcf{defaults}{default} )
|
|
373
|
+
{
|
|
374
|
+
$filters{$flt} = 1;
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
# Collect information for the QUAL field
|
|
379
|
+
if ( $$line{QUAL} ne $$vcf{defaults}{QUAL} && $$line{QUAL} ne $$vcf{defaults}{default} && $$line{QUAL}>0 )
|
|
380
|
+
{
|
|
381
|
+
push @quals,$$line{QUAL};
|
|
382
|
+
push @qual_weights,$$vcf{qual_weight};
|
|
383
|
+
$qual_weights_sum += $$vcf{qual_weight};
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
if ( $$line{ID} ne '.' && $out{ID} eq '.' ) { $out{ID}=$$line{ID}; }
|
|
387
|
+
|
|
388
|
+
# Remember the FORMAT fields
|
|
389
|
+
for my $field (@{$$line{FORMAT}}) { $format{$field} = 1; }
|
|
390
|
+
|
|
391
|
+
# VCF without genotypes: calculate AC,AN if present
|
|
392
|
+
if ( !$have_samples )
|
|
393
|
+
{
|
|
394
|
+
if ( exists($$line{INFO}{AN}) ) { $an += $$line{INFO}{AN}; }
|
|
395
|
+
if ( exists($$line{INFO}{AC}) )
|
|
396
|
+
{
|
|
397
|
+
my (@acs) = split(/,/,$$line{INFO}{AC});
|
|
398
|
+
for (my $i=0; $i<@acs; $i++)
|
|
399
|
+
{
|
|
400
|
+
my $alt = $ref_alt_map{$$line{REF}}{$$line{ALT}[$i]};
|
|
401
|
+
$ac{$alt} += $acs[$i];
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
# Join the INFO field
|
|
407
|
+
for my $inf (keys %{$$line{INFO}})
|
|
408
|
+
{
|
|
409
|
+
# When conflicting INFO fields are present, use the first one
|
|
410
|
+
if ( exists($info{$inf}) ) { next; }
|
|
411
|
+
$info{$inf} = $$line{INFO}{$inf};
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
my $ref = $$line{REF};
|
|
415
|
+
|
|
416
|
+
# The ALT column may change after the merge, take care of ALT dependent tags such as GL.
|
|
417
|
+
if ( $have_samples )
|
|
418
|
+
{
|
|
419
|
+
if ( defined $new_ref )
|
|
420
|
+
{
|
|
421
|
+
$vcf->parse_AGtags($line,\%ref_alt_map,$$line{REF});
|
|
422
|
+
}
|
|
423
|
+
else
|
|
424
|
+
{
|
|
425
|
+
$vcf->parse_AGtags($line);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
# Now fill in the genotype information for each column
|
|
430
|
+
for (my $i=0; $i<@{$$vcf{__col_names}}; $i++)
|
|
431
|
+
{
|
|
432
|
+
my $ori_name = $$vcf{columns}->[$i+9];
|
|
433
|
+
my $out_name = $$vcf{__col_names}->[$i];
|
|
434
|
+
|
|
435
|
+
$out{gtypes}{$out_name} = $$line{gtypes}{$ori_name};
|
|
436
|
+
|
|
437
|
+
# This is to convert 0/1 to G/C
|
|
438
|
+
my ($alleles,$seps,$is_phased,$is_empty) = $vcf->parse_haplotype($line,$ori_name);
|
|
439
|
+
if ( defined $new_ref )
|
|
440
|
+
{
|
|
441
|
+
my @als;
|
|
442
|
+
for my $al (@$alleles)
|
|
443
|
+
{
|
|
444
|
+
push @als, exists($ref_alt_map{$ref}{$al}) ? $ref_alt_map{$ref}{$al} : '.';
|
|
445
|
+
}
|
|
446
|
+
$out{gtypes}{$out_name}{GT} = $vcf->format_haplotype(\@als,$seps);
|
|
447
|
+
}
|
|
448
|
+
else
|
|
449
|
+
{
|
|
450
|
+
$out{gtypes}{$out_name}{GT} = $vcf->format_haplotype($alleles,$seps);
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
$out{REF} = defined $new_ref ? $new_ref : $ref;
|
|
454
|
+
advance_position($vcf,$opts);
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
$out{INFO} = { %info };
|
|
458
|
+
$out{INFO}{SF} = join(',',@src_files);
|
|
459
|
+
|
|
460
|
+
# Output the QUAL information
|
|
461
|
+
my $qual;
|
|
462
|
+
for (my $i=0; $i<@quals; $i++)
|
|
463
|
+
{
|
|
464
|
+
$qual += $quals[$i] * $qual_weights[$i] * (1.0 / $qual_weights_sum);
|
|
465
|
+
}
|
|
466
|
+
$out{QUAL} = defined $qual ? sprintf("%.2f",$qual) : $$vcf_out{defaults}{QUAL};
|
|
467
|
+
|
|
468
|
+
# Output the FILTER information: remove PASS or missing value if some other information
|
|
469
|
+
# is present.
|
|
470
|
+
delete($filters{$$vcf_out{defaults}{default}});
|
|
471
|
+
if ( exists($filters{$$vcf_out{filter_passed}}) && scalar keys %filters > 1 )
|
|
472
|
+
{
|
|
473
|
+
delete($filters{$$vcf_out{filter_passed}});
|
|
474
|
+
}
|
|
475
|
+
$out{FILTER} = [ keys %filters ];
|
|
476
|
+
if ( !@{$out{FILTER}} ) { push @{$out{FILTER}},$$vcf_out{defaults}{default}; }
|
|
477
|
+
|
|
478
|
+
# The GT field must come as first
|
|
479
|
+
delete($format{GT});
|
|
480
|
+
$out{FORMAT} = ['GT'];
|
|
481
|
+
for my $key (keys %format) { push @{$out{FORMAT}},$key; }
|
|
482
|
+
|
|
483
|
+
if ( $have_samples )
|
|
484
|
+
{
|
|
485
|
+
$vcf_out->format_genotype_strings(\%out);
|
|
486
|
+
}
|
|
487
|
+
else
|
|
488
|
+
{
|
|
489
|
+
if ( defined $an ) { $out{INFO}{AN}=$an; }
|
|
490
|
+
if ( scalar keys %ac )
|
|
491
|
+
{
|
|
492
|
+
my @acs;
|
|
493
|
+
for my $alt (@{$out{ALT}})
|
|
494
|
+
{
|
|
495
|
+
# Some of the files may not have AC, the AC count can be undefined in such a case.
|
|
496
|
+
push @acs, exists($ac{$alt}) ? $ac{$alt} : 0;
|
|
497
|
+
}
|
|
498
|
+
$out{INFO}{AC} = join(',',@acs);
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
print $vcf_out->format_line(\%out);
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
sub advance_position
|
|
508
|
+
{
|
|
509
|
+
my ($vcf,$opts) = @_;
|
|
510
|
+
|
|
511
|
+
if ( exists($$vcf{last_line}) && !$$vcf{last_line} ) { return; }
|
|
512
|
+
|
|
513
|
+
my $line;
|
|
514
|
+
while (!$line)
|
|
515
|
+
{
|
|
516
|
+
$line = $vcf->next_data_hash();
|
|
517
|
+
if ( !$line )
|
|
518
|
+
{
|
|
519
|
+
$$vcf{last_line} = $line;
|
|
520
|
+
return;
|
|
521
|
+
}
|
|
522
|
+
if ( !$$vcf{last_line} ) { last; }
|
|
523
|
+
|
|
524
|
+
if ( $$vcf{last_line}{POS} eq $$line{POS} )
|
|
525
|
+
{
|
|
526
|
+
print STDERR "The position appeared twice: $$vcf{file} .. $$line{CHROM}:$$line{POS}\n" unless $$opts{silent_dups};
|
|
527
|
+
|
|
528
|
+
# This is the only reason for the while loop: if ignoring dups, get the next line
|
|
529
|
+
if ( $$opts{rm_dups} )
|
|
530
|
+
{
|
|
531
|
+
undef($line);
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
elsif ( $$vcf{last_line}{POS} > $$line{POS})
|
|
535
|
+
{
|
|
536
|
+
error("Wrong order: $$vcf{file} .. $$line{CHROM}:$$line{POS} comes after $$vcf{last_line}{CHROM}:$$vcf{last_line}{POS}\n");
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
$$vcf{last_line} = $line;
|
|
541
|
+
|
|
542
|
+
return;
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
sub get_min_position
|
|
547
|
+
{
|
|
548
|
+
my ($vcfs) = @_;
|
|
549
|
+
my ($pos,$ref);
|
|
550
|
+
for my $vcf (@$vcfs)
|
|
551
|
+
{
|
|
552
|
+
my $line = $$vcf{last_line};
|
|
553
|
+
if ( !$line ) { next; }
|
|
554
|
+
|
|
555
|
+
# Designate this position as the minimum of all the files if:
|
|
556
|
+
# .. is this the first file?
|
|
557
|
+
if ( !defined $pos )
|
|
558
|
+
{
|
|
559
|
+
$pos = $$line{POS};
|
|
560
|
+
$ref = $$line{REF};
|
|
561
|
+
|
|
562
|
+
next;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
# .. has this file lower position?
|
|
566
|
+
if ( $pos>$$line{POS} )
|
|
567
|
+
{
|
|
568
|
+
$pos = $$line{POS};
|
|
569
|
+
$ref = $$line{REF};
|
|
570
|
+
|
|
571
|
+
next;
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
return $pos;
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
|