bio-vcf 0.8.0 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/README.md +75 -8
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/bin/bio-vcf +24 -5
- data/bio-vcf.gemspec +9 -4
- data/features/multisample.feature +13 -0
- data/lib/bio-vcf.rb +1 -0
- data/lib/bio-vcf/bedfilter.rb +43 -0
- data/lib/bio-vcf/template.rb +75 -0
- data/lib/bio-vcf/vcfgenotypefield.rb +7 -2
- data/lib/bio-vcf/vcfheader.rb +9 -1
- data/lib/bio-vcf/vcfsample.rb +1 -1
- data/template/vcf2json.erb +14 -2
- data/template/vcf2rdf_header.erb +24 -0
- data/test/data/regression/thread4_4_failed_filter-stderr.ref +1 -0
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 90a933c33c683c1f0886a202fa5a9ee5ed2ad8ff
|
4
|
+
data.tar.gz: 1f769a89fcb3e3b44e22864ddf729ea3ac040260
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 308d93ca1bcb142fa9cd4be63d929edb0ad92b7ac0da0d4f2d51f4b363de6ef0b87ac3d2688af8c36257317e203f69355fdc3348c6a330adb7d997af7ab6714d
|
7
|
+
data.tar.gz: d7d328a13d90b209a6068f9d3f09d56e8a00262ccf7fba9d9d67ffe4935993b2ccbb2ddc2f9a4831dd8928258a9c5d468f050d9edfbb95737616f4bfaf184bb0
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -16,7 +16,7 @@ RDF and JSON. Why would you use bio-vcf over other parsers?
|
|
16
16
|
7. Bio-vcf allows for genotype processing
|
17
17
|
8. Bio-vcf has support for set analysis
|
18
18
|
9. Bio-vcf has sane error handling
|
19
|
-
10. Bio-vcf can output tabular data, HTML, LaTeX, RDF, JSON and JSON-LD using templates
|
19
|
+
10. Bio-vcf can output tabular data, HTML, LaTeX, RDF, JSON and JSON-LD and even other VCFs using (erb) templates
|
20
20
|
|
21
21
|
Bio-vcf has better performance than other tools
|
22
22
|
because of lazy parsing, multi-threading, and useful combinations of
|
@@ -52,7 +52,7 @@ a 16 core machine takes
|
|
52
52
|
sys 0m5.039s
|
53
53
|
```
|
54
54
|
|
55
|
-
which shows
|
55
|
+
which shows decent core utilisation (10x). We are running
|
56
56
|
gzip compressed VCF files of 30+ Gb with similar performance gains.
|
57
57
|
|
58
58
|
Use zcat to
|
@@ -633,6 +633,12 @@ To remove/select 3 samples:
|
|
633
633
|
bio-vcf --samples 0,1,3 < mytest.vcf
|
634
634
|
```
|
635
635
|
|
636
|
+
Filter on a BED file and annotate the gene name in the resulting VCF
|
637
|
+
|
638
|
+
```sh
|
639
|
+
bio-vcf -v --bed test/data/input/test.bed --rewrite 'rec.info["gene"]=bed[3]' < test/data/input/somaticsniper.vcf
|
640
|
+
```
|
641
|
+
|
636
642
|
## RDF output
|
637
643
|
|
638
644
|
You can use --rdf for turtle RDF output from simple one-liners, note the use of --id and
|
@@ -691,10 +697,11 @@ template could be
|
|
691
697
|
};
|
692
698
|
```
|
693
699
|
|
694
|
-
To get JSON, run with something like
|
700
|
+
To get JSON, run with something like (combining
|
701
|
+
with a filter)
|
695
702
|
|
696
703
|
```sh
|
697
|
-
bio-vcf --template template/vcf2json.erb --filter 'r.info.
|
704
|
+
bio-vcf --template template/vcf2json.erb --filter 'r.info.sao==1' < dbsnp.vcf
|
698
705
|
```
|
699
706
|
|
700
707
|
which renders
|
@@ -713,7 +720,7 @@ which renders
|
|
713
720
|
Likewise for RDF output:
|
714
721
|
|
715
722
|
```sh
|
716
|
-
bio-vcf --template template/vcf2rdf.erb --filter 'r.info.
|
723
|
+
bio-vcf --template template/vcf2rdf.erb --filter 'r.info.sao==1' < dbsnp.vcf
|
717
724
|
```
|
718
725
|
|
719
726
|
renders the ERB template
|
@@ -728,7 +735,7 @@ renders the ERB template
|
|
728
735
|
seq:pos <%= rec.pos %> ,
|
729
736
|
seq:ref "<%= rec.ref %>" ,
|
730
737
|
seq:alt "<%= rec.alt[0] %>" ,
|
731
|
-
seq:maf <%= rec.info.maf[0] %> ,
|
738
|
+
seq:maf <%= (rec.info.maf[0]*100).round %> ,
|
732
739
|
seq:dp <%= rec.info.dp %> ,
|
733
740
|
db:vcf true .
|
734
741
|
```
|
@@ -742,12 +749,72 @@ into
|
|
742
749
|
seq:pos 33703698 ,
|
743
750
|
seq:ref "C" ,
|
744
751
|
seq:alt "A" ,
|
745
|
-
seq:maf
|
752
|
+
seq:maf 16 ,
|
746
753
|
seq:dp 92 ,
|
747
754
|
db:vcf true .
|
748
755
|
```
|
749
756
|
|
750
|
-
Be creative! You can write templates for csv, HTML, XML, LaTeX, RDF, JSON, YAML, JSON-LD, etc. etc.!
|
757
|
+
Note the calculated field value for maf. Be creative! You can write templates for csv, HTML, XML, LaTeX, RDF, JSON, YAML, JSON-LD, etc. etc.!
|
758
|
+
|
759
|
+
### Metadata
|
760
|
+
|
761
|
+
Templates can also print data as a header of the JSON/YAML/RDF output. For this
|
762
|
+
use the '=' prefix with HEADER, BODY, FOOTER keywords in the template. A small example
|
763
|
+
can be
|
764
|
+
|
765
|
+
```Javascript
|
766
|
+
=HEADER
|
767
|
+
<% require 'json' %>
|
768
|
+
[
|
769
|
+
{ "HEADER": {
|
770
|
+
"options": <%= options.to_h.to_json %>,
|
771
|
+
"files": <%= ARGV %>,
|
772
|
+
"version": "<%= BIOVCF_VERSION %>"
|
773
|
+
},
|
774
|
+
|
775
|
+
=BODY
|
776
|
+
|
777
|
+
{
|
778
|
+
"seq:chr": "<%= rec.chrom %>" ,
|
779
|
+
"seq:pos": <%= rec.pos %> ,
|
780
|
+
"seq:ref": "<%= rec.ref %>" ,
|
781
|
+
"seq:alt": "<%= rec.alt[0] %>" ,
|
782
|
+
"dp": <%= rec.info.dp %> ,
|
783
|
+
},
|
784
|
+
=FOOTER
|
785
|
+
]
|
786
|
+
```
|
787
|
+
|
788
|
+
may generate something like
|
789
|
+
|
790
|
+
```Javascript
|
791
|
+
[
|
792
|
+
{ "HEADER": {
|
793
|
+
"options": {"show_help":false,"source":"https://github.com/CuppenResearch/bioruby-vcf","version":"0.8.1-pre3 (Pjotr Prins)","date":"2014-11-26 12:51:36 +0000","thread_lines":40000,"template":"template/vcf2json.erb","skip_header":true},
|
794
|
+
"files": [],
|
795
|
+
"version": "0.8.1-pre3"
|
796
|
+
},
|
797
|
+
{
|
798
|
+
"seq:chr": "1" ,
|
799
|
+
"seq:pos": 883516 ,
|
800
|
+
"seq:ref": "G" ,
|
801
|
+
"seq:alt": "A" ,
|
802
|
+
"dp": ,
|
803
|
+
},
|
804
|
+
{
|
805
|
+
"seq:chr": "1" ,
|
806
|
+
"seq:pos": 891344 ,
|
807
|
+
"seq:ref": "G" ,
|
808
|
+
"seq:alt": "A" ,
|
809
|
+
"dp": ,
|
810
|
+
},
|
811
|
+
]
|
812
|
+
```
|
813
|
+
|
814
|
+
Note that the template is not smart enough to remove the final comma
|
815
|
+
from the last BODY element. To make it valid JSON that needs to be
|
816
|
+
removed. A future version may add a parameter to the BODY element or a
|
817
|
+
global rewrite function for this purpose. YAML and RDF have no such issue.
|
751
818
|
|
752
819
|
## Statistics
|
753
820
|
|
data/Rakefile
CHANGED
@@ -21,6 +21,7 @@ Jeweler::Tasks.new do |gem|
|
|
21
21
|
gem.description = %Q{Smart lazy multi-threaded parser for VCF format with useful filtering and output rewriting}
|
22
22
|
gem.email = "pjotr.public01@thebird.nl"
|
23
23
|
gem.authors = ["Pjotr Prins"]
|
24
|
+
gem.required_ruby_version = '>=2.0.0'
|
24
25
|
# dependencies defined in Gemfile
|
25
26
|
end
|
26
27
|
Jeweler::RubygemsDotOrgTasks.new
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.8.
|
1
|
+
0.8.1
|
data/bin/bio-vcf
CHANGED
@@ -58,6 +58,10 @@ opts = OptionParser.new do |o|
|
|
58
58
|
options[:efilter_samples] = l
|
59
59
|
end
|
60
60
|
|
61
|
+
o.on("--bed bedfile", String, "Filter on BED elements") do |bed|
|
62
|
+
options[:bed] = bed
|
63
|
+
end
|
64
|
+
|
61
65
|
o.on('-e cmd', '--eval cmd',String, 'Evaluate command on each record') do |cmd|
|
62
66
|
options[:eval] = cmd
|
63
67
|
end
|
@@ -145,7 +149,9 @@ end
|
|
145
149
|
|
146
150
|
opts.parse!(ARGV)
|
147
151
|
|
148
|
-
|
152
|
+
BIOVCF_VERSION=version
|
153
|
+
BIOVCF_BANNER = "vcf #{version} (biogem Ruby #{RUBY_VERSION}) by Pjotr Prins 2014\n" if !options[:quiet]
|
154
|
+
$stderr.print BIOVCF_BANNER
|
149
155
|
|
150
156
|
if options[:show_help]
|
151
157
|
print opts
|
@@ -161,9 +167,11 @@ $stderr.print "Options: ",options,"\n" if !options[:quiet]
|
|
161
167
|
|
162
168
|
if options[:template]
|
163
169
|
include BioVcf::RDF
|
170
|
+
require 'bio-vcf/template'
|
164
171
|
fn = options[:template]
|
165
172
|
raise "No template #{fn}!" if not File.exist?(fn)
|
166
|
-
template = ERB.new(File.read(fn))
|
173
|
+
# template = ERB.new(File.read(fn))
|
174
|
+
template = Bio::Template.new(fn)
|
167
175
|
end
|
168
176
|
|
169
177
|
if options[:num_threads] != 1
|
@@ -227,7 +235,7 @@ def parse_header line, samples, options
|
|
227
235
|
end
|
228
236
|
|
229
237
|
# Parse a VCF line and return the result as a string
|
230
|
-
def parse_line line,header,options,samples,template,stats=nil
|
238
|
+
def parse_line line,header,options,bedfilter,samples,template,stats=nil
|
231
239
|
fields = VcfLine.parse(line)
|
232
240
|
rec = VcfRecord.new(fields,header)
|
233
241
|
r = rec # alias
|
@@ -248,6 +256,11 @@ def parse_line line,header,options,samples,template,stats=nil
|
|
248
256
|
|
249
257
|
# --------------------------
|
250
258
|
# Filtering and set analysis
|
259
|
+
if bedfilter
|
260
|
+
bed = bedfilter.contains(rec)
|
261
|
+
return if not bed
|
262
|
+
end
|
263
|
+
|
251
264
|
return if filter and not rec.filter(filter,ignore_missing_data: ignore_missing,quiet: quiet)
|
252
265
|
|
253
266
|
if sfilter
|
@@ -315,7 +328,7 @@ def parse_line line,header,options,samples,template,stats=nil
|
|
315
328
|
elsif options[:template]
|
316
329
|
# Ruby ERB template
|
317
330
|
begin
|
318
|
-
template.
|
331
|
+
template.body(binding)
|
319
332
|
rescue Exception => e
|
320
333
|
$stderr.print e,": ",fields,"\n"
|
321
334
|
$stderr.print e.backtrace.inspect if options[:verbose]
|
@@ -343,11 +356,15 @@ chunks = []
|
|
343
356
|
lines = []
|
344
357
|
line_number=0
|
345
358
|
|
359
|
+
if options[:bed]
|
360
|
+
bedfilter = BedFilter.new(options[:bed])
|
361
|
+
end
|
362
|
+
|
346
363
|
begin
|
347
364
|
process = lambda { | lines |
|
348
365
|
res = []
|
349
366
|
lines.each do | line |
|
350
|
-
res << parse_line(line,header,options,samples,template,stats)
|
367
|
+
res << parse_line(line,header,options,bedfilter,samples,template,stats)
|
351
368
|
end
|
352
369
|
res
|
353
370
|
}
|
@@ -357,6 +374,7 @@ begin
|
|
357
374
|
end
|
358
375
|
} # end output
|
359
376
|
|
377
|
+
print template.header(binding) if template
|
360
378
|
# ---- Main loop
|
361
379
|
STDIN.each_line do | line |
|
362
380
|
line_number += 1
|
@@ -410,6 +428,7 @@ begin
|
|
410
428
|
process.call(chunk)
|
411
429
|
}
|
412
430
|
end
|
431
|
+
print template.footer(binding) if template
|
413
432
|
stats.print if stats
|
414
433
|
|
415
434
|
rescue Exception => e
|
data/bio-vcf.gemspec
CHANGED
@@ -2,14 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
+
# stub: bio-vcf 0.8.1 ruby lib
|
5
6
|
|
6
7
|
Gem::Specification.new do |s|
|
7
8
|
s.name = "bio-vcf"
|
8
|
-
s.version = "0.8.
|
9
|
+
s.version = "0.8.1"
|
9
10
|
|
10
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
|
+
s.require_paths = ["lib"]
|
11
13
|
s.authors = ["Pjotr Prins"]
|
12
|
-
s.date = "2014-
|
14
|
+
s.date = "2014-11-26"
|
13
15
|
s.description = "Smart lazy multi-threaded parser for VCF format with useful filtering and output rewriting"
|
14
16
|
s.email = "pjotr.public01@thebird.nl"
|
15
17
|
s.executables = ["bio-vcf"]
|
@@ -40,6 +42,8 @@ Gem::Specification.new do |s|
|
|
40
42
|
"features/step_definitions/somaticsniper.rb",
|
41
43
|
"features/support/env.rb",
|
42
44
|
"lib/bio-vcf.rb",
|
45
|
+
"lib/bio-vcf/bedfilter.rb",
|
46
|
+
"lib/bio-vcf/template.rb",
|
43
47
|
"lib/bio-vcf/utils.rb",
|
44
48
|
"lib/bio-vcf/variant.rb",
|
45
49
|
"lib/bio-vcf/vcf.rb",
|
@@ -53,6 +57,7 @@ Gem::Specification.new do |s|
|
|
53
57
|
"template/gatk_vcf2rdf.erb",
|
54
58
|
"template/vcf2json.erb",
|
55
59
|
"template/vcf2rdf.erb",
|
60
|
+
"template/vcf2rdf_header.erb",
|
56
61
|
"test/data/input/dbsnp.vcf",
|
57
62
|
"test/data/input/multisample.vcf",
|
58
63
|
"test/data/input/somaticsniper.vcf",
|
@@ -70,8 +75,8 @@ Gem::Specification.new do |s|
|
|
70
75
|
]
|
71
76
|
s.homepage = "http://github.com/pjotrp/bioruby-vcf"
|
72
77
|
s.licenses = ["MIT"]
|
73
|
-
s.
|
74
|
-
s.rubygems_version = "2.
|
78
|
+
s.required_ruby_version = Gem::Requirement.new(">= 2.0.0")
|
79
|
+
s.rubygems_version = "2.2.2"
|
75
80
|
s.summary = "Fast multi-threaded VCF parser"
|
76
81
|
|
77
82
|
if s.respond_to? :specification_version then
|
@@ -63,3 +63,16 @@ Feature: Multi-sample VCF
|
|
63
63
|
And I expect rec.sample.s3t2? to be true
|
64
64
|
And I expect rec.missing_samples? to be true
|
65
65
|
|
66
|
+
# Phased genotype
|
67
|
+
Given multisample vcf line
|
68
|
+
"""
|
69
|
+
1 10723 . C G 73.85 . AC=4;AF=0.667;AN=6;BaseQRankSum=1.300;DP=18;Dels=0.00;FS=3.680;HaplotypeScore=0.0000;MLEAC=4;MLEAF=0.667;MQ=20.49;MQ0=11;MQRankSum=1.754;QD=8.21;ReadPosRankSum=0.000 GT:AD:DP:GQ:PL 0|1 ./. 1/1:2,2:4:6:66,6,0 1/1:4,1:5:3:36,3,0 ./. ./. 0/0:6,0:6:3:0,3,33
|
70
|
+
"""
|
71
|
+
When I parse the record
|
72
|
+
Then I expect rec.pos to contain 10723
|
73
|
+
Then I expect rec.valid? to be true
|
74
|
+
And I expect r.original? to be true
|
75
|
+
And I expect r.original.gts? to be true
|
76
|
+
And I expect r.original.gts to be ["C","G"]
|
77
|
+
And I expect r.original.gts[0] to be "C"
|
78
|
+
And I expect r.original.gts[1] to be "G"
|
data/lib/bio-vcf.rb
CHANGED
@@ -0,0 +1,43 @@
|
|
1
|
+
module BioVcf
|
2
|
+
|
3
|
+
class BedFilter
|
4
|
+
def initialize bedfilen
|
5
|
+
require 'binary_search/native'
|
6
|
+
|
7
|
+
# Parse Bed file and build up search array
|
8
|
+
chrs = {}
|
9
|
+
info = {}
|
10
|
+
File.open(bedfilen).each_line { | line |
|
11
|
+
(chr,start,stop,gene) = line.strip.split(/\t/)[0..3]
|
12
|
+
chrs[chr] ||= []
|
13
|
+
chrs[chr].push(stop.to_i)
|
14
|
+
info[chr+':'+stop] = [chr,start.to_i,stop.to_i,gene]
|
15
|
+
}
|
16
|
+
# Make sure chrs is sorted
|
17
|
+
@chrs = {}
|
18
|
+
chrs.each { | k,list |
|
19
|
+
@chrs[k] = list.sort
|
20
|
+
}
|
21
|
+
@info = info
|
22
|
+
end
|
23
|
+
|
24
|
+
def contains(rec)
|
25
|
+
stop_list = @chrs[rec.chrom]
|
26
|
+
if stop_list
|
27
|
+
pos = rec.pos
|
28
|
+
stop = stop_list.bsearch { |bedstop| bedstop >= pos }
|
29
|
+
if stop
|
30
|
+
rinfo = @info[rec.chrom+':'+stop.to_s]
|
31
|
+
raise "Unexpected error in BED record for #{rec.chrom}:#{stop} position" if rinfo == nil
|
32
|
+
start = rinfo[1]
|
33
|
+
if pos >= start
|
34
|
+
# p [rec.chrom,rec.pos,rinfo]
|
35
|
+
return rinfo
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
nil
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'erb'
|
2
|
+
|
3
|
+
module Bio
|
4
|
+
|
5
|
+
class Template
|
6
|
+
|
7
|
+
def initialize fn
|
8
|
+
raise "Can not find template #{fn}!" if not File.exist?(fn)
|
9
|
+
parse(File.read(fn))
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse buf
|
13
|
+
header = []
|
14
|
+
body = []
|
15
|
+
footer = []
|
16
|
+
where = :header
|
17
|
+
buf.split("\n").each do | line |
|
18
|
+
case where
|
19
|
+
when :header
|
20
|
+
next if line =~ /=HEADER/
|
21
|
+
if line =~ /=BODY/
|
22
|
+
body = []
|
23
|
+
where = :body
|
24
|
+
next
|
25
|
+
end
|
26
|
+
header << line
|
27
|
+
when :body
|
28
|
+
if line =~ /=FOOTER/
|
29
|
+
footer = []
|
30
|
+
where = :footer
|
31
|
+
next
|
32
|
+
end
|
33
|
+
body << line
|
34
|
+
else
|
35
|
+
footer << line
|
36
|
+
end
|
37
|
+
end
|
38
|
+
if body == []
|
39
|
+
body = header
|
40
|
+
header = []
|
41
|
+
end
|
42
|
+
@erb_header = ERB.new(header.join("\n")) if header.size
|
43
|
+
@erb_body = ERB.new(body.join("\n")) if body.size
|
44
|
+
@erb_footer = ERB.new(footer.join("\n")) if footer.size
|
45
|
+
end
|
46
|
+
|
47
|
+
def result env
|
48
|
+
@erb.result(env)
|
49
|
+
end
|
50
|
+
|
51
|
+
def header env
|
52
|
+
if @erb_header
|
53
|
+
@erb_header.result(env)
|
54
|
+
else
|
55
|
+
""
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def body env
|
60
|
+
if @erb_body
|
61
|
+
@erb_body.result(env)
|
62
|
+
else
|
63
|
+
""
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def footer env
|
68
|
+
if @erb_footer
|
69
|
+
@erb_footer.result(env)
|
70
|
+
else
|
71
|
+
""
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -156,7 +156,7 @@ module BioVcf
|
|
156
156
|
end
|
157
157
|
|
158
158
|
def gti
|
159
|
-
gt.split(
|
159
|
+
gt.split(/[\/\|]/).map { |g| g.to_i }
|
160
160
|
end
|
161
161
|
|
162
162
|
def gts?
|
@@ -218,7 +218,12 @@ module BioVcf
|
|
218
218
|
end
|
219
219
|
|
220
220
|
def [] name
|
221
|
-
|
221
|
+
begin
|
222
|
+
@samples[name] ||= VcfGenotypeField.new(@fields[@sample_index[name]],@format,@header,@ref,@alt)
|
223
|
+
rescue TypeError
|
224
|
+
$stderr.print "Unknown field name <#{name}> in record, did you mean r.info.#{name}?\n"
|
225
|
+
raise
|
226
|
+
end
|
222
227
|
end
|
223
228
|
|
224
229
|
def method_missing(m, *args, &block)
|
data/lib/bio-vcf/vcfheader.rb
CHANGED
@@ -60,13 +60,21 @@ module BioVcf
|
|
60
60
|
end
|
61
61
|
|
62
62
|
def samples
|
63
|
-
@samples ||= column_names
|
63
|
+
@samples ||= if column_names.size > 8
|
64
|
+
column_names[9..-1]
|
65
|
+
else
|
66
|
+
[]
|
67
|
+
end
|
64
68
|
end
|
65
69
|
|
66
70
|
def samples_index_array
|
67
71
|
@all_samples_index ||= column_names[9..-1].fill{|i| i}
|
68
72
|
end
|
69
73
|
|
74
|
+
def num_samples
|
75
|
+
@num_samples ||= ( samples == nil ? 0 : samples.size )
|
76
|
+
end
|
77
|
+
|
70
78
|
def sample_index
|
71
79
|
return @sample_index if @sample_index
|
72
80
|
index = {}
|
data/lib/bio-vcf/vcfsample.rb
CHANGED
data/template/vcf2json.erb
CHANGED
@@ -1,8 +1,20 @@
|
|
1
|
+
=HEADER
|
2
|
+
<% require 'json' %>
|
3
|
+
[
|
4
|
+
{ "HEADER": {
|
5
|
+
"options": <%= options.to_h.to_json %>,
|
6
|
+
"files": <%= ARGV %>,
|
7
|
+
"version": "<%= BIOVCF_VERSION %>"
|
8
|
+
},
|
9
|
+
|
10
|
+
=BODY
|
11
|
+
|
1
12
|
{
|
2
13
|
"seq:chr": "<%= rec.chrom %>" ,
|
3
14
|
"seq:pos": <%= rec.pos %> ,
|
4
15
|
"seq:ref": "<%= rec.ref %>" ,
|
5
16
|
"seq:alt": "<%= rec.alt[0] %>" ,
|
6
|
-
"seq:maf": <%= rec.info.maf[0] %> ,
|
7
17
|
"dp": <%= rec.info.dp %> ,
|
8
|
-
}
|
18
|
+
},
|
19
|
+
=FOOTER
|
20
|
+
]
|
@@ -0,0 +1,24 @@
|
|
1
|
+
=HEADER
|
2
|
+
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
|
3
|
+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
|
4
|
+
@prefix dc: <http://purl.org/dc/elements/1.1/> .
|
5
|
+
@prefix hgnc: <http://identifiers.org/hgnc.symbol/> .
|
6
|
+
@prefix doi: <http://dx.doi.org/> .
|
7
|
+
@prefix seq: <http://biobeat.org/rdf/seq#> .
|
8
|
+
@prefix db: <http://biobeat.org/rdf/db#> .
|
9
|
+
@prefix : <http://biobeat.org/rdf/dbsnp#> .
|
10
|
+
|
11
|
+
=BODY
|
12
|
+
<%
|
13
|
+
id = Turtle::mangle_identifier(['ch'+rec.chrom,rec.pos,rec.alt.join('')].join('_'))
|
14
|
+
%>
|
15
|
+
:<%= id %>
|
16
|
+
:query_id "<%= id %>";
|
17
|
+
seq:chr "<%= rec.chrom %>" ;
|
18
|
+
seq:pos <%= rec.pos %> ;
|
19
|
+
seq:ref "<%= rec.ref %>" ;
|
20
|
+
seq:alt "<%= rec.alt[0] %>" ;
|
21
|
+
seq:dp <%= rec.info.dp %> ;
|
22
|
+
db:vcf true .
|
23
|
+
|
24
|
+
=FOOTER
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-vcf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Pjotr Prins
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-11-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -98,6 +98,8 @@ files:
|
|
98
98
|
- features/step_definitions/somaticsniper.rb
|
99
99
|
- features/support/env.rb
|
100
100
|
- lib/bio-vcf.rb
|
101
|
+
- lib/bio-vcf/bedfilter.rb
|
102
|
+
- lib/bio-vcf/template.rb
|
101
103
|
- lib/bio-vcf/utils.rb
|
102
104
|
- lib/bio-vcf/variant.rb
|
103
105
|
- lib/bio-vcf/vcf.rb
|
@@ -111,6 +113,7 @@ files:
|
|
111
113
|
- template/gatk_vcf2rdf.erb
|
112
114
|
- template/vcf2json.erb
|
113
115
|
- template/vcf2rdf.erb
|
116
|
+
- template/vcf2rdf_header.erb
|
114
117
|
- test/data/input/dbsnp.vcf
|
115
118
|
- test/data/input/multisample.vcf
|
116
119
|
- test/data/input/somaticsniper.vcf
|
@@ -137,7 +140,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
137
140
|
requirements:
|
138
141
|
- - ">="
|
139
142
|
- !ruby/object:Gem::Version
|
140
|
-
version:
|
143
|
+
version: 2.0.0
|
141
144
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
142
145
|
requirements:
|
143
146
|
- - ">="
|
@@ -145,7 +148,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
145
148
|
version: '0'
|
146
149
|
requirements: []
|
147
150
|
rubyforge_project:
|
148
|
-
rubygems_version: 2.
|
151
|
+
rubygems_version: 2.2.2
|
149
152
|
signing_key:
|
150
153
|
specification_version: 4
|
151
154
|
summary: Fast multi-threaded VCF parser
|