bio-vcf 0.8.0 → 0.8.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/README.md +75 -8
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/bin/bio-vcf +24 -5
- data/bio-vcf.gemspec +9 -4
- data/features/multisample.feature +13 -0
- data/lib/bio-vcf.rb +1 -0
- data/lib/bio-vcf/bedfilter.rb +43 -0
- data/lib/bio-vcf/template.rb +75 -0
- data/lib/bio-vcf/vcfgenotypefield.rb +7 -2
- data/lib/bio-vcf/vcfheader.rb +9 -1
- data/lib/bio-vcf/vcfsample.rb +1 -1
- data/template/vcf2json.erb +14 -2
- data/template/vcf2rdf_header.erb +24 -0
- data/test/data/regression/thread4_4_failed_filter-stderr.ref +1 -0
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 90a933c33c683c1f0886a202fa5a9ee5ed2ad8ff
|
4
|
+
data.tar.gz: 1f769a89fcb3e3b44e22864ddf729ea3ac040260
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 308d93ca1bcb142fa9cd4be63d929edb0ad92b7ac0da0d4f2d51f4b363de6ef0b87ac3d2688af8c36257317e203f69355fdc3348c6a330adb7d997af7ab6714d
|
7
|
+
data.tar.gz: d7d328a13d90b209a6068f9d3f09d56e8a00262ccf7fba9d9d67ffe4935993b2ccbb2ddc2f9a4831dd8928258a9c5d468f050d9edfbb95737616f4bfaf184bb0
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -16,7 +16,7 @@ RDF and JSON. Why would you use bio-vcf over other parsers?
|
|
16
16
|
7. Bio-vcf allows for genotype processing
|
17
17
|
8. Bio-vcf has support for set analysis
|
18
18
|
9. Bio-vcf has sane error handling
|
19
|
-
10. Bio-vcf can output tabular data, HTML, LaTeX, RDF, JSON and JSON-LD using templates
|
19
|
+
10. Bio-vcf can output tabular data, HTML, LaTeX, RDF, JSON and JSON-LD and even other VCFs using (erb) templates
|
20
20
|
|
21
21
|
Bio-vcf has better performance than other tools
|
22
22
|
because of lazy parsing, multi-threading, and useful combinations of
|
@@ -52,7 +52,7 @@ a 16 core machine takes
|
|
52
52
|
sys 0m5.039s
|
53
53
|
```
|
54
54
|
|
55
|
-
which shows
|
55
|
+
which shows decent core utilisation (10x). We are running
|
56
56
|
gzip compressed VCF files of 30+ Gb with similar performance gains.
|
57
57
|
|
58
58
|
Use zcat to
|
@@ -633,6 +633,12 @@ To remove/select 3 samples:
|
|
633
633
|
bio-vcf --samples 0,1,3 < mytest.vcf
|
634
634
|
```
|
635
635
|
|
636
|
+
Filter on a BED file and annotate the gene name in the resulting VCF
|
637
|
+
|
638
|
+
```sh
|
639
|
+
bio-vcf -v --bed test/data/input/test.bed --rewrite 'rec.info["gene"]=bed[3]' < test/data/input/somaticsniper.vcf
|
640
|
+
```
|
641
|
+
|
636
642
|
## RDF output
|
637
643
|
|
638
644
|
You can use --rdf for turtle RDF output from simple one-liners, note the use of --id and
|
@@ -691,10 +697,11 @@ template could be
|
|
691
697
|
};
|
692
698
|
```
|
693
699
|
|
694
|
-
To get JSON, run with something like
|
700
|
+
To get JSON, run with something like (combining
|
701
|
+
with a filter)
|
695
702
|
|
696
703
|
```sh
|
697
|
-
bio-vcf --template template/vcf2json.erb --filter 'r.info.
|
704
|
+
bio-vcf --template template/vcf2json.erb --filter 'r.info.sao==1' < dbsnp.vcf
|
698
705
|
```
|
699
706
|
|
700
707
|
which renders
|
@@ -713,7 +720,7 @@ which renders
|
|
713
720
|
Likewise for RDF output:
|
714
721
|
|
715
722
|
```sh
|
716
|
-
bio-vcf --template template/vcf2rdf.erb --filter 'r.info.
|
723
|
+
bio-vcf --template template/vcf2rdf.erb --filter 'r.info.sao==1' < dbsnp.vcf
|
717
724
|
```
|
718
725
|
|
719
726
|
renders the ERB template
|
@@ -728,7 +735,7 @@ renders the ERB template
|
|
728
735
|
seq:pos <%= rec.pos %> ,
|
729
736
|
seq:ref "<%= rec.ref %>" ,
|
730
737
|
seq:alt "<%= rec.alt[0] %>" ,
|
731
|
-
seq:maf <%= rec.info.maf[0] %> ,
|
738
|
+
seq:maf <%= (rec.info.maf[0]*100).round %> ,
|
732
739
|
seq:dp <%= rec.info.dp %> ,
|
733
740
|
db:vcf true .
|
734
741
|
```
|
@@ -742,12 +749,72 @@ into
|
|
742
749
|
seq:pos 33703698 ,
|
743
750
|
seq:ref "C" ,
|
744
751
|
seq:alt "A" ,
|
745
|
-
seq:maf
|
752
|
+
seq:maf 16 ,
|
746
753
|
seq:dp 92 ,
|
747
754
|
db:vcf true .
|
748
755
|
```
|
749
756
|
|
750
|
-
Be creative! You can write templates for csv, HTML, XML, LaTeX, RDF, JSON, YAML, JSON-LD, etc. etc.!
|
757
|
+
Note the calculated field value for maf. Be creative! You can write templates for csv, HTML, XML, LaTeX, RDF, JSON, YAML, JSON-LD, etc. etc.!
|
758
|
+
|
759
|
+
### Metadata
|
760
|
+
|
761
|
+
Templates can also print data as a header of the JSON/YAML/RDF output. For this
|
762
|
+
use the '=' prefix with HEADER, BODY, FOOTER keywords in the template. A small example
|
763
|
+
can be
|
764
|
+
|
765
|
+
```Javascript
|
766
|
+
=HEADER
|
767
|
+
<% require 'json' %>
|
768
|
+
[
|
769
|
+
{ "HEADER": {
|
770
|
+
"options": <%= options.to_h.to_json %>,
|
771
|
+
"files": <%= ARGV %>,
|
772
|
+
"version": "<%= BIOVCF_VERSION %>"
|
773
|
+
},
|
774
|
+
|
775
|
+
=BODY
|
776
|
+
|
777
|
+
{
|
778
|
+
"seq:chr": "<%= rec.chrom %>" ,
|
779
|
+
"seq:pos": <%= rec.pos %> ,
|
780
|
+
"seq:ref": "<%= rec.ref %>" ,
|
781
|
+
"seq:alt": "<%= rec.alt[0] %>" ,
|
782
|
+
"dp": <%= rec.info.dp %> ,
|
783
|
+
},
|
784
|
+
=FOOTER
|
785
|
+
]
|
786
|
+
```
|
787
|
+
|
788
|
+
may generate something like
|
789
|
+
|
790
|
+
```Javascript
|
791
|
+
[
|
792
|
+
{ "HEADER": {
|
793
|
+
"options": {"show_help":false,"source":"https://github.com/CuppenResearch/bioruby-vcf","version":"0.8.1-pre3 (Pjotr Prins)","date":"2014-11-26 12:51:36 +0000","thread_lines":40000,"template":"template/vcf2json.erb","skip_header":true},
|
794
|
+
"files": [],
|
795
|
+
"version": "0.8.1-pre3"
|
796
|
+
},
|
797
|
+
{
|
798
|
+
"seq:chr": "1" ,
|
799
|
+
"seq:pos": 883516 ,
|
800
|
+
"seq:ref": "G" ,
|
801
|
+
"seq:alt": "A" ,
|
802
|
+
"dp": ,
|
803
|
+
},
|
804
|
+
{
|
805
|
+
"seq:chr": "1" ,
|
806
|
+
"seq:pos": 891344 ,
|
807
|
+
"seq:ref": "G" ,
|
808
|
+
"seq:alt": "A" ,
|
809
|
+
"dp": ,
|
810
|
+
},
|
811
|
+
]
|
812
|
+
```
|
813
|
+
|
814
|
+
Note that the template is not smart enough to remove the final comma
|
815
|
+
from the last BODY element. To make it valid JSON that needs to be
|
816
|
+
removed. A future version may add a parameter to the BODY element or a
|
817
|
+
global rewrite function for this purpose. YAML and RDF have no such issue.
|
751
818
|
|
752
819
|
## Statistics
|
753
820
|
|
data/Rakefile
CHANGED
@@ -21,6 +21,7 @@ Jeweler::Tasks.new do |gem|
|
|
21
21
|
gem.description = %Q{Smart lazy multi-threaded parser for VCF format with useful filtering and output rewriting}
|
22
22
|
gem.email = "pjotr.public01@thebird.nl"
|
23
23
|
gem.authors = ["Pjotr Prins"]
|
24
|
+
gem.required_ruby_version = '>=2.0.0'
|
24
25
|
# dependencies defined in Gemfile
|
25
26
|
end
|
26
27
|
Jeweler::RubygemsDotOrgTasks.new
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.8.
|
1
|
+
0.8.1
|
data/bin/bio-vcf
CHANGED
@@ -58,6 +58,10 @@ opts = OptionParser.new do |o|
|
|
58
58
|
options[:efilter_samples] = l
|
59
59
|
end
|
60
60
|
|
61
|
+
o.on("--bed bedfile", String, "Filter on BED elements") do |bed|
|
62
|
+
options[:bed] = bed
|
63
|
+
end
|
64
|
+
|
61
65
|
o.on('-e cmd', '--eval cmd',String, 'Evaluate command on each record') do |cmd|
|
62
66
|
options[:eval] = cmd
|
63
67
|
end
|
@@ -145,7 +149,9 @@ end
|
|
145
149
|
|
146
150
|
opts.parse!(ARGV)
|
147
151
|
|
148
|
-
|
152
|
+
BIOVCF_VERSION=version
|
153
|
+
BIOVCF_BANNER = "vcf #{version} (biogem Ruby #{RUBY_VERSION}) by Pjotr Prins 2014\n" if !options[:quiet]
|
154
|
+
$stderr.print BIOVCF_BANNER
|
149
155
|
|
150
156
|
if options[:show_help]
|
151
157
|
print opts
|
@@ -161,9 +167,11 @@ $stderr.print "Options: ",options,"\n" if !options[:quiet]
|
|
161
167
|
|
162
168
|
if options[:template]
|
163
169
|
include BioVcf::RDF
|
170
|
+
require 'bio-vcf/template'
|
164
171
|
fn = options[:template]
|
165
172
|
raise "No template #{fn}!" if not File.exist?(fn)
|
166
|
-
template = ERB.new(File.read(fn))
|
173
|
+
# template = ERB.new(File.read(fn))
|
174
|
+
template = Bio::Template.new(fn)
|
167
175
|
end
|
168
176
|
|
169
177
|
if options[:num_threads] != 1
|
@@ -227,7 +235,7 @@ def parse_header line, samples, options
|
|
227
235
|
end
|
228
236
|
|
229
237
|
# Parse a VCF line and return the result as a string
|
230
|
-
def parse_line line,header,options,samples,template,stats=nil
|
238
|
+
def parse_line line,header,options,bedfilter,samples,template,stats=nil
|
231
239
|
fields = VcfLine.parse(line)
|
232
240
|
rec = VcfRecord.new(fields,header)
|
233
241
|
r = rec # alias
|
@@ -248,6 +256,11 @@ def parse_line line,header,options,samples,template,stats=nil
|
|
248
256
|
|
249
257
|
# --------------------------
|
250
258
|
# Filtering and set analysis
|
259
|
+
if bedfilter
|
260
|
+
bed = bedfilter.contains(rec)
|
261
|
+
return if not bed
|
262
|
+
end
|
263
|
+
|
251
264
|
return if filter and not rec.filter(filter,ignore_missing_data: ignore_missing,quiet: quiet)
|
252
265
|
|
253
266
|
if sfilter
|
@@ -315,7 +328,7 @@ def parse_line line,header,options,samples,template,stats=nil
|
|
315
328
|
elsif options[:template]
|
316
329
|
# Ruby ERB template
|
317
330
|
begin
|
318
|
-
template.
|
331
|
+
template.body(binding)
|
319
332
|
rescue Exception => e
|
320
333
|
$stderr.print e,": ",fields,"\n"
|
321
334
|
$stderr.print e.backtrace.inspect if options[:verbose]
|
@@ -343,11 +356,15 @@ chunks = []
|
|
343
356
|
lines = []
|
344
357
|
line_number=0
|
345
358
|
|
359
|
+
if options[:bed]
|
360
|
+
bedfilter = BedFilter.new(options[:bed])
|
361
|
+
end
|
362
|
+
|
346
363
|
begin
|
347
364
|
process = lambda { | lines |
|
348
365
|
res = []
|
349
366
|
lines.each do | line |
|
350
|
-
res << parse_line(line,header,options,samples,template,stats)
|
367
|
+
res << parse_line(line,header,options,bedfilter,samples,template,stats)
|
351
368
|
end
|
352
369
|
res
|
353
370
|
}
|
@@ -357,6 +374,7 @@ begin
|
|
357
374
|
end
|
358
375
|
} # end output
|
359
376
|
|
377
|
+
print template.header(binding) if template
|
360
378
|
# ---- Main loop
|
361
379
|
STDIN.each_line do | line |
|
362
380
|
line_number += 1
|
@@ -410,6 +428,7 @@ begin
|
|
410
428
|
process.call(chunk)
|
411
429
|
}
|
412
430
|
end
|
431
|
+
print template.footer(binding) if template
|
413
432
|
stats.print if stats
|
414
433
|
|
415
434
|
rescue Exception => e
|
data/bio-vcf.gemspec
CHANGED
@@ -2,14 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
+
# stub: bio-vcf 0.8.1 ruby lib
|
5
6
|
|
6
7
|
Gem::Specification.new do |s|
|
7
8
|
s.name = "bio-vcf"
|
8
|
-
s.version = "0.8.
|
9
|
+
s.version = "0.8.1"
|
9
10
|
|
10
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
|
+
s.require_paths = ["lib"]
|
11
13
|
s.authors = ["Pjotr Prins"]
|
12
|
-
s.date = "2014-
|
14
|
+
s.date = "2014-11-26"
|
13
15
|
s.description = "Smart lazy multi-threaded parser for VCF format with useful filtering and output rewriting"
|
14
16
|
s.email = "pjotr.public01@thebird.nl"
|
15
17
|
s.executables = ["bio-vcf"]
|
@@ -40,6 +42,8 @@ Gem::Specification.new do |s|
|
|
40
42
|
"features/step_definitions/somaticsniper.rb",
|
41
43
|
"features/support/env.rb",
|
42
44
|
"lib/bio-vcf.rb",
|
45
|
+
"lib/bio-vcf/bedfilter.rb",
|
46
|
+
"lib/bio-vcf/template.rb",
|
43
47
|
"lib/bio-vcf/utils.rb",
|
44
48
|
"lib/bio-vcf/variant.rb",
|
45
49
|
"lib/bio-vcf/vcf.rb",
|
@@ -53,6 +57,7 @@ Gem::Specification.new do |s|
|
|
53
57
|
"template/gatk_vcf2rdf.erb",
|
54
58
|
"template/vcf2json.erb",
|
55
59
|
"template/vcf2rdf.erb",
|
60
|
+
"template/vcf2rdf_header.erb",
|
56
61
|
"test/data/input/dbsnp.vcf",
|
57
62
|
"test/data/input/multisample.vcf",
|
58
63
|
"test/data/input/somaticsniper.vcf",
|
@@ -70,8 +75,8 @@ Gem::Specification.new do |s|
|
|
70
75
|
]
|
71
76
|
s.homepage = "http://github.com/pjotrp/bioruby-vcf"
|
72
77
|
s.licenses = ["MIT"]
|
73
|
-
s.
|
74
|
-
s.rubygems_version = "2.
|
78
|
+
s.required_ruby_version = Gem::Requirement.new(">= 2.0.0")
|
79
|
+
s.rubygems_version = "2.2.2"
|
75
80
|
s.summary = "Fast multi-threaded VCF parser"
|
76
81
|
|
77
82
|
if s.respond_to? :specification_version then
|
@@ -63,3 +63,16 @@ Feature: Multi-sample VCF
|
|
63
63
|
And I expect rec.sample.s3t2? to be true
|
64
64
|
And I expect rec.missing_samples? to be true
|
65
65
|
|
66
|
+
# Phased genotype
|
67
|
+
Given multisample vcf line
|
68
|
+
"""
|
69
|
+
1 10723 . C G 73.85 . AC=4;AF=0.667;AN=6;BaseQRankSum=1.300;DP=18;Dels=0.00;FS=3.680;HaplotypeScore=0.0000;MLEAC=4;MLEAF=0.667;MQ=20.49;MQ0=11;MQRankSum=1.754;QD=8.21;ReadPosRankSum=0.000 GT:AD:DP:GQ:PL 0|1 ./. 1/1:2,2:4:6:66,6,0 1/1:4,1:5:3:36,3,0 ./. ./. 0/0:6,0:6:3:0,3,33
|
70
|
+
"""
|
71
|
+
When I parse the record
|
72
|
+
Then I expect rec.pos to contain 10723
|
73
|
+
Then I expect rec.valid? to be true
|
74
|
+
And I expect r.original? to be true
|
75
|
+
And I expect r.original.gts? to be true
|
76
|
+
And I expect r.original.gts to be ["C","G"]
|
77
|
+
And I expect r.original.gts[0] to be "C"
|
78
|
+
And I expect r.original.gts[1] to be "G"
|
data/lib/bio-vcf.rb
CHANGED
@@ -0,0 +1,43 @@
|
|
1
|
+
module BioVcf
|
2
|
+
|
3
|
+
class BedFilter
|
4
|
+
def initialize bedfilen
|
5
|
+
require 'binary_search/native'
|
6
|
+
|
7
|
+
# Parse Bed file and build up search array
|
8
|
+
chrs = {}
|
9
|
+
info = {}
|
10
|
+
File.open(bedfilen).each_line { | line |
|
11
|
+
(chr,start,stop,gene) = line.strip.split(/\t/)[0..3]
|
12
|
+
chrs[chr] ||= []
|
13
|
+
chrs[chr].push(stop.to_i)
|
14
|
+
info[chr+':'+stop] = [chr,start.to_i,stop.to_i,gene]
|
15
|
+
}
|
16
|
+
# Make sure chrs is sorted
|
17
|
+
@chrs = {}
|
18
|
+
chrs.each { | k,list |
|
19
|
+
@chrs[k] = list.sort
|
20
|
+
}
|
21
|
+
@info = info
|
22
|
+
end
|
23
|
+
|
24
|
+
def contains(rec)
|
25
|
+
stop_list = @chrs[rec.chrom]
|
26
|
+
if stop_list
|
27
|
+
pos = rec.pos
|
28
|
+
stop = stop_list.bsearch { |bedstop| bedstop >= pos }
|
29
|
+
if stop
|
30
|
+
rinfo = @info[rec.chrom+':'+stop.to_s]
|
31
|
+
raise "Unexpected error in BED record for #{rec.chrom}:#{stop} position" if rinfo == nil
|
32
|
+
start = rinfo[1]
|
33
|
+
if pos >= start
|
34
|
+
# p [rec.chrom,rec.pos,rinfo]
|
35
|
+
return rinfo
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
nil
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'erb'
|
2
|
+
|
3
|
+
module Bio
|
4
|
+
|
5
|
+
class Template
|
6
|
+
|
7
|
+
def initialize fn
|
8
|
+
raise "Can not find template #{fn}!" if not File.exist?(fn)
|
9
|
+
parse(File.read(fn))
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse buf
|
13
|
+
header = []
|
14
|
+
body = []
|
15
|
+
footer = []
|
16
|
+
where = :header
|
17
|
+
buf.split("\n").each do | line |
|
18
|
+
case where
|
19
|
+
when :header
|
20
|
+
next if line =~ /=HEADER/
|
21
|
+
if line =~ /=BODY/
|
22
|
+
body = []
|
23
|
+
where = :body
|
24
|
+
next
|
25
|
+
end
|
26
|
+
header << line
|
27
|
+
when :body
|
28
|
+
if line =~ /=FOOTER/
|
29
|
+
footer = []
|
30
|
+
where = :footer
|
31
|
+
next
|
32
|
+
end
|
33
|
+
body << line
|
34
|
+
else
|
35
|
+
footer << line
|
36
|
+
end
|
37
|
+
end
|
38
|
+
if body == []
|
39
|
+
body = header
|
40
|
+
header = []
|
41
|
+
end
|
42
|
+
@erb_header = ERB.new(header.join("\n")) if header.size
|
43
|
+
@erb_body = ERB.new(body.join("\n")) if body.size
|
44
|
+
@erb_footer = ERB.new(footer.join("\n")) if footer.size
|
45
|
+
end
|
46
|
+
|
47
|
+
def result env
|
48
|
+
@erb.result(env)
|
49
|
+
end
|
50
|
+
|
51
|
+
def header env
|
52
|
+
if @erb_header
|
53
|
+
@erb_header.result(env)
|
54
|
+
else
|
55
|
+
""
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def body env
|
60
|
+
if @erb_body
|
61
|
+
@erb_body.result(env)
|
62
|
+
else
|
63
|
+
""
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def footer env
|
68
|
+
if @erb_footer
|
69
|
+
@erb_footer.result(env)
|
70
|
+
else
|
71
|
+
""
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -156,7 +156,7 @@ module BioVcf
|
|
156
156
|
end
|
157
157
|
|
158
158
|
def gti
|
159
|
-
gt.split(
|
159
|
+
gt.split(/[\/\|]/).map { |g| g.to_i }
|
160
160
|
end
|
161
161
|
|
162
162
|
def gts?
|
@@ -218,7 +218,12 @@ module BioVcf
|
|
218
218
|
end
|
219
219
|
|
220
220
|
def [] name
|
221
|
-
|
221
|
+
begin
|
222
|
+
@samples[name] ||= VcfGenotypeField.new(@fields[@sample_index[name]],@format,@header,@ref,@alt)
|
223
|
+
rescue TypeError
|
224
|
+
$stderr.print "Unknown field name <#{name}> in record, did you mean r.info.#{name}?\n"
|
225
|
+
raise
|
226
|
+
end
|
222
227
|
end
|
223
228
|
|
224
229
|
def method_missing(m, *args, &block)
|
data/lib/bio-vcf/vcfheader.rb
CHANGED
@@ -60,13 +60,21 @@ module BioVcf
|
|
60
60
|
end
|
61
61
|
|
62
62
|
def samples
|
63
|
-
@samples ||= column_names
|
63
|
+
@samples ||= if column_names.size > 8
|
64
|
+
column_names[9..-1]
|
65
|
+
else
|
66
|
+
[]
|
67
|
+
end
|
64
68
|
end
|
65
69
|
|
66
70
|
def samples_index_array
|
67
71
|
@all_samples_index ||= column_names[9..-1].fill{|i| i}
|
68
72
|
end
|
69
73
|
|
74
|
+
def num_samples
|
75
|
+
@num_samples ||= ( samples == nil ? 0 : samples.size )
|
76
|
+
end
|
77
|
+
|
70
78
|
def sample_index
|
71
79
|
return @sample_index if @sample_index
|
72
80
|
index = {}
|
data/lib/bio-vcf/vcfsample.rb
CHANGED
data/template/vcf2json.erb
CHANGED
@@ -1,8 +1,20 @@
|
|
1
|
+
=HEADER
|
2
|
+
<% require 'json' %>
|
3
|
+
[
|
4
|
+
{ "HEADER": {
|
5
|
+
"options": <%= options.to_h.to_json %>,
|
6
|
+
"files": <%= ARGV %>,
|
7
|
+
"version": "<%= BIOVCF_VERSION %>"
|
8
|
+
},
|
9
|
+
|
10
|
+
=BODY
|
11
|
+
|
1
12
|
{
|
2
13
|
"seq:chr": "<%= rec.chrom %>" ,
|
3
14
|
"seq:pos": <%= rec.pos %> ,
|
4
15
|
"seq:ref": "<%= rec.ref %>" ,
|
5
16
|
"seq:alt": "<%= rec.alt[0] %>" ,
|
6
|
-
"seq:maf": <%= rec.info.maf[0] %> ,
|
7
17
|
"dp": <%= rec.info.dp %> ,
|
8
|
-
}
|
18
|
+
},
|
19
|
+
=FOOTER
|
20
|
+
]
|
@@ -0,0 +1,24 @@
|
|
1
|
+
=HEADER
|
2
|
+
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
|
3
|
+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
|
4
|
+
@prefix dc: <http://purl.org/dc/elements/1.1/> .
|
5
|
+
@prefix hgnc: <http://identifiers.org/hgnc.symbol/> .
|
6
|
+
@prefix doi: <http://dx.doi.org/> .
|
7
|
+
@prefix seq: <http://biobeat.org/rdf/seq#> .
|
8
|
+
@prefix db: <http://biobeat.org/rdf/db#> .
|
9
|
+
@prefix : <http://biobeat.org/rdf/dbsnp#> .
|
10
|
+
|
11
|
+
=BODY
|
12
|
+
<%
|
13
|
+
id = Turtle::mangle_identifier(['ch'+rec.chrom,rec.pos,rec.alt.join('')].join('_'))
|
14
|
+
%>
|
15
|
+
:<%= id %>
|
16
|
+
:query_id "<%= id %>";
|
17
|
+
seq:chr "<%= rec.chrom %>" ;
|
18
|
+
seq:pos <%= rec.pos %> ;
|
19
|
+
seq:ref "<%= rec.ref %>" ;
|
20
|
+
seq:alt "<%= rec.alt[0] %>" ;
|
21
|
+
seq:dp <%= rec.info.dp %> ;
|
22
|
+
db:vcf true .
|
23
|
+
|
24
|
+
=FOOTER
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-vcf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Pjotr Prins
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-11-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -98,6 +98,8 @@ files:
|
|
98
98
|
- features/step_definitions/somaticsniper.rb
|
99
99
|
- features/support/env.rb
|
100
100
|
- lib/bio-vcf.rb
|
101
|
+
- lib/bio-vcf/bedfilter.rb
|
102
|
+
- lib/bio-vcf/template.rb
|
101
103
|
- lib/bio-vcf/utils.rb
|
102
104
|
- lib/bio-vcf/variant.rb
|
103
105
|
- lib/bio-vcf/vcf.rb
|
@@ -111,6 +113,7 @@ files:
|
|
111
113
|
- template/gatk_vcf2rdf.erb
|
112
114
|
- template/vcf2json.erb
|
113
115
|
- template/vcf2rdf.erb
|
116
|
+
- template/vcf2rdf_header.erb
|
114
117
|
- test/data/input/dbsnp.vcf
|
115
118
|
- test/data/input/multisample.vcf
|
116
119
|
- test/data/input/somaticsniper.vcf
|
@@ -137,7 +140,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
137
140
|
requirements:
|
138
141
|
- - ">="
|
139
142
|
- !ruby/object:Gem::Version
|
140
|
-
version:
|
143
|
+
version: 2.0.0
|
141
144
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
142
145
|
requirements:
|
143
146
|
- - ">="
|
@@ -145,7 +148,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
145
148
|
version: '0'
|
146
149
|
requirements: []
|
147
150
|
rubyforge_project:
|
148
|
-
rubygems_version: 2.
|
151
|
+
rubygems_version: 2.2.2
|
149
152
|
signing_key:
|
150
153
|
specification_version: 4
|
151
154
|
summary: Fast multi-threaded VCF parser
|