bio-vcf 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/Gemfile.lock +8 -0
- data/README.md +376 -11
- data/VERSION +1 -1
- data/bin/bio-vcf +172 -39
- data/bio-vcf.gemspec +18 -3
- data/features/cli.feature +32 -0
- data/features/multisample.feature +28 -10
- data/features/step_definitions/cli-feature.rb +12 -0
- data/features/step_definitions/multisample.rb +64 -18
- data/features/support/env.rb +5 -0
- data/lib/bio-vcf.rb +2 -0
- data/lib/bio-vcf/utils.rb +23 -0
- data/lib/bio-vcf/vcfgenotypefield.rb +73 -28
- data/lib/bio-vcf/vcfheader.rb +8 -0
- data/lib/bio-vcf/vcfline.rb +1 -0
- data/lib/bio-vcf/vcfrecord.rb +142 -14
- data/lib/bio-vcf/vcfsample.rb +88 -0
- data/test/data/input/dbsnp.vcf +200 -0
- data/test/data/input/multisample.vcf +2 -2
- data/test/data/regression/eval_r.info.dp.ref +150 -0
- data/test/data/regression/r.info.dp.ref +147 -0
- data/test/data/regression/rewrite.info.sample.ref +150 -0
- data/test/data/regression/s.dp.ref +145 -0
- data/test/data/regression/seval_s.dp.ref +36 -0
- data/test/data/regression/sfilter001.ref +145 -0
- data/test/performance/metrics.md +98 -0
- metadata +28 -2
@@ -0,0 +1,98 @@
|
|
1
|
+
Round of testing with
|
2
|
+
|
3
|
+
ruby -v
|
4
|
+
ruby 2.1.0p0 (2013-12-25 revision 44422) [x86_64-linux]
|
5
|
+
|
6
|
+
wc test/tmp/test.vcf
|
7
|
+
12469 137065 2053314 test/tmp/test.vcf
|
8
|
+
|
9
|
+
time ./bin/bio-vcf -i --filter 'r.info.dp>20' --sfilter 's.dp>10' < test/tmp/test.vcf > /dev/null
|
10
|
+
vcf 0.0.3-pre4 (biogem Ruby 2.1.0) by Pjotr Prins 2014
|
11
|
+
Options: {:show_help=>false, :ignore_missing=>true, :filter=>"r.info.dp>20", :sfilter=>"s.dp>10"}
|
12
|
+
real 0m1.215s
|
13
|
+
user 0m1.208s
|
14
|
+
sys 0m0.004s
|
15
|
+
|
16
|
+
Reload
|
17
|
+
|
18
|
+
time ./bin/bio-vcf -i --filter 'r.info.dp>20' --sfilter 's.dp>10' < test/tmp/test.vcf > /dev/null
|
19
|
+
vcf 0.0.3-pre4 (biogem Ruby 2.1.0) by Pjotr Prins 2014
|
20
|
+
Options: {:show_help=>false, :ignore_missing=>true, :filter=>"r.info.dp>20", :sfilter=>"s.dp>10"}
|
21
|
+
real 0m1.194s
|
22
|
+
user 0m1.172s
|
23
|
+
sys 0m0.016s
|
24
|
+
|
25
|
+
Introduced method caching
|
26
|
+
|
27
|
+
real 0m1.190s
|
28
|
+
user 0m1.180s
|
29
|
+
sys 0m0.004s
|
30
|
+
|
31
|
+
Introduce !!Float test
|
32
|
+
|
33
|
+
real 0m1.187s
|
34
|
+
user 0m1.180s
|
35
|
+
sys 0m0.004s
|
36
|
+
|
37
|
+
Cache sample index
|
38
|
+
|
39
|
+
real 0m1.156s
|
40
|
+
user 0m1.148s
|
41
|
+
sys 0m0.004s
|
42
|
+
|
43
|
+
Run the profiler
|
44
|
+
|
45
|
+
ruby -rprofile ./bin/bio-vcf -i --filter 'r.info.dp>20' --sfilter 's.dp>10' < test/tmp/test.vcf > /dev/null
|
46
|
+
vcf 0.0.3-pre4 (biogem Ruby 2.1.0) by Pjotr Prins 2014
|
47
|
+
Options: {:show_help=>false, :ignore_missing=>true, :filter=>"r.info.dp>20", :sfilter=>"s.dp>10"}
|
48
|
+
% cumulative self self total
|
49
|
+
time seconds seconds calls ms/call ms/call name
|
50
|
+
9.45 2.19 2.19 34968 0.06 0.76 Object#parse_line
|
51
|
+
7.25 3.87 1.68 75031 0.02 0.03 BioVcf::VcfRecordInfo#[]=
|
52
|
+
7.12 5.52 1.65 34968 0.05 0.29 Kernel.eval
|
53
|
+
6.86 7.11 1.59 87481 0.02 0.10 BioVcf::VcfRecordInfo#initialize
|
54
|
+
5.57 8.40 1.29 35994 0.04 0.47 Array#each
|
55
|
+
4.14 9.36 0.96 34253 0.03 0.65 BioVcf::VcfRecord#each_sample
|
56
|
+
3.93 10.27 0.91 93880 0.01 0.03 BioVcf::VcfRecordParser.get_format
|
57
|
+
3.88 11.17 0.90 145920 0.01 0.01 String#split
|
58
|
+
|
59
|
+
Late parsing of info field without split:
|
60
|
+
|
61
|
+
real 0m1.124s
|
62
|
+
user 0m1.120s
|
63
|
+
sys 0m0.008s
|
64
|
+
|
65
|
+
Global sample info caching
|
66
|
+
|
67
|
+
real 0m1.032s
|
68
|
+
user 0m1.020s
|
69
|
+
sys 0m0.008s
|
70
|
+
|
71
|
+
Assign some repeated Hash queries
|
72
|
+
|
73
|
+
real 0m1.028s
|
74
|
+
user 0m1.024s
|
75
|
+
sys 0m0.000s
|
76
|
+
|
77
|
+
Profiler now picking out eval for further optimization
|
78
|
+
|
79
|
+
% cumulative self self total
|
80
|
+
time seconds seconds calls ms/call ms/call name
|
81
|
+
10.45 1.80 1.80 34968 0.05 0.59 Object#parse_line
|
82
|
+
7.89 3.16 1.36 34968 0.04 0.17 Kernel.eval
|
83
|
+
5.69 4.14 0.98 34253 0.03 0.57 BioVcf::VcfRecord#each_sample
|
84
|
+
4.93 4.99 0.85 12497 0.07 1.37 nil#
|
85
|
+
|
86
|
+
Compiling sample eval
|
87
|
+
|
88
|
+
real 0m0.820s
|
89
|
+
user 0m0.812s
|
90
|
+
sys 0m0.004s
|
91
|
+
|
92
|
+
Compiling record eval
|
93
|
+
|
94
|
+
real 0m0.647s
|
95
|
+
user 0m0.644s
|
96
|
+
sys 0m0.000s
|
97
|
+
|
98
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-vcf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Pjotr Prins
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-05-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: regressiontest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
description: Smart parser for VCF format
|
56
70
|
email: pjotr.public01@thebird.nl
|
57
71
|
executables:
|
@@ -70,15 +84,18 @@ files:
|
|
70
84
|
- VERSION
|
71
85
|
- bin/bio-vcf
|
72
86
|
- bio-vcf.gemspec
|
87
|
+
- features/cli.feature
|
73
88
|
- features/diff_count.feature
|
74
89
|
- features/multisample.feature
|
75
90
|
- features/somaticsniper.feature
|
76
91
|
- features/step_definitions/bio-vcf_steps.rb
|
92
|
+
- features/step_definitions/cli-feature.rb
|
77
93
|
- features/step_definitions/diff_count.rb
|
78
94
|
- features/step_definitions/multisample.rb
|
79
95
|
- features/step_definitions/somaticsniper.rb
|
80
96
|
- features/support/env.rb
|
81
97
|
- lib/bio-vcf.rb
|
98
|
+
- lib/bio-vcf/utils.rb
|
82
99
|
- lib/bio-vcf/variant.rb
|
83
100
|
- lib/bio-vcf/vcf.rb
|
84
101
|
- lib/bio-vcf/vcfgenotypefield.rb
|
@@ -86,8 +103,17 @@ files:
|
|
86
103
|
- lib/bio-vcf/vcfline.rb
|
87
104
|
- lib/bio-vcf/vcfrdf.rb
|
88
105
|
- lib/bio-vcf/vcfrecord.rb
|
106
|
+
- lib/bio-vcf/vcfsample.rb
|
107
|
+
- test/data/input/dbsnp.vcf
|
89
108
|
- test/data/input/multisample.vcf
|
90
109
|
- test/data/input/somaticsniper.vcf
|
110
|
+
- test/data/regression/eval_r.info.dp.ref
|
111
|
+
- test/data/regression/r.info.dp.ref
|
112
|
+
- test/data/regression/rewrite.info.sample.ref
|
113
|
+
- test/data/regression/s.dp.ref
|
114
|
+
- test/data/regression/seval_s.dp.ref
|
115
|
+
- test/data/regression/sfilter001.ref
|
116
|
+
- test/performance/metrics.md
|
91
117
|
homepage: http://github.com/pjotrp/bioruby-vcf
|
92
118
|
licenses:
|
93
119
|
- MIT
|