bio-samtools 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -0
- data/Gemfile.lock +5 -1
- data/README.rdoc +21 -14
- data/Rakefile +2 -13
- data/VERSION +1 -1
- data/bio-samtools.gemspec +13 -4
- data/ext/Rakefile +48 -0
- data/lib/bio/.DS_Store +0 -0
- data/lib/bio/db/sam.rb +112 -67
- data/lib/bio/db/sam/bam.rb +2 -2
- data/lib/bio/db/sam/external/libbam.1.dylib +0 -0
- data/lib/bio/db/sam/external/libbam.a +0 -0
- data/lib/bio/db/sam/sam.rb +1 -0
- data/lib/bio/db/sam/vcf.rb +78 -0
- data/test/helper.rb +1 -1
- data/test/test_basic.rb +342 -0
- data/test/test_bio-samtools.rb +0 -6
- data/test/test_pileup.rb +68 -0
- data/test/test_vcf.rb +68 -0
- metadata +37 -20
- data/test/samples/small/test_chr.fasta.fai +0 -1
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -9,9 +9,12 @@ GEM
|
|
9
9
|
bundler (~> 1.0.0)
|
10
10
|
git (>= 1.2.5)
|
11
11
|
rake
|
12
|
-
|
12
|
+
json (1.6.1)
|
13
|
+
rake (0.9.2.2)
|
13
14
|
rcov (0.9.9)
|
14
15
|
rcov (0.9.9-java)
|
16
|
+
rdoc (3.11)
|
17
|
+
json (~> 1.4)
|
15
18
|
shoulda (2.11.3)
|
16
19
|
|
17
20
|
PLATFORMS
|
@@ -24,4 +27,5 @@ DEPENDENCIES
|
|
24
27
|
ffi
|
25
28
|
jeweler
|
26
29
|
rcov
|
30
|
+
rdoc
|
27
31
|
shoulda
|
data/README.rdoc
CHANGED
@@ -16,37 +16,44 @@ the gee_fu genome browser (http://github.com/danmaclean/gee_fu).
|
|
16
16
|
'gem install bio-samtools'
|
17
17
|
|
18
18
|
== Usage
|
19
|
-
|
20
|
-
|
21
|
-
rake pileup_test
|
19
|
+
See doc/tutorial.html or doc/tutorial.pdf for a walkthrough tutorial. The documentation in doc/index.html provides information on individual objects
|
20
|
+
and methods.
|
22
21
|
|
23
|
-
|
22
|
+
The easiest way to see bio-samtools in action is to try the tests:
|
23
|
+
rake test
|
24
|
+
is all you should need to get the tests to run.
|
24
25
|
|
26
|
+
However on some systems (at least the one belonging to one of our developers on Mac OS X Lion, with the Apple provided Ruby) then you can get
|
27
|
+
a LoadError: no such file to load 'ffi', despite it being installed. This is something to do with RubyGems and rake test that we
|
28
|
+
haven't got to the bottom of yet. The workaround is to run the tests manually.
|
29
|
+
1) change into the directory bio-samtools installed into (you can find this with the command 'gem which bio-samtools')
|
30
|
+
2) run the tests as if they were any other ruby script eg ruby test/test_pileup.rb
|
25
31
|
|
26
32
|
== Dependencies:
|
27
33
|
-FFI (http://github.com/ffi/ffi)
|
28
|
-
|
29
|
-
This can be obtained from samtools. (http://samtools.sourceforge.net/ )
|
34
|
+
|
30
35
|
|
31
36
|
== FAQ.
|
32
37
|
I´m getting a segmentation Fault, what did I do wrong?
|
33
|
-
There are two known segmentation faults at the moment
|
38
|
+
-There are two known segmentation faults at the moment
|
34
39
|
-When you try to load a text file as binary file
|
35
40
|
-When you try to lad a binary file as a text file
|
36
41
|
|
37
|
-
|
38
|
-
|
39
|
-
|
42
|
+
I keep seeing this "Invalid gemspec in [some ruby gem path...]", what is wrong?
|
43
|
+
- This appears to be a bug in RubyGems that doesn't affect the running of the tools. It will keep happening until someone updates RubyGems. If it really bugs you, downgrade RubyGems.
|
44
|
+
|
45
|
+
To whom do I complain?
|
46
|
+
-about bio-samtools? Try
|
47
|
+
Ricardo dot Ramirez-Gonzalez at tgac dot ac dot uk
|
40
48
|
dan.maclean@tsl.ac.uk
|
41
49
|
|
42
50
|
== TODO
|
43
51
|
-Filter to the fetching algorithm (give a condition that has to be satisfied to add the alignment to the list)
|
44
|
-
|
45
|
-
-Operating system independent, DONE ( test needed)
|
52
|
+
|
46
53
|
|
47
54
|
== IMPORTANT NOTE
|
48
|
-
-Libraries are downloaded, compiled and installed inside the gem at install time on the host system
|
49
|
-
|
55
|
+
-Libraries (libbam) are downloaded, compiled and installed inside the gem at install time on the host system
|
56
|
+
|
50
57
|
$ openssl dgst libbam.so.1
|
51
58
|
MD5(libbam.so.1)= c45cfccfb41ffeb2730ee4b227d244c4
|
52
59
|
|
data/Rakefile
CHANGED
@@ -32,17 +32,6 @@ Jeweler::Tasks.new do |gem|
|
|
32
32
|
end
|
33
33
|
Jeweler::RubygemsDotOrgTasks.new
|
34
34
|
|
35
|
-
desc "Basic Functionality Test"
|
36
|
-
task :basic_test do
|
37
|
-
ruby "test/basictest.rb"
|
38
|
-
end
|
39
|
-
|
40
|
-
|
41
|
-
desc "Test the Pileup class"
|
42
|
-
task :pileup_test do
|
43
|
-
ruby "test/pileup.rb"
|
44
|
-
end
|
45
|
-
|
46
35
|
require 'rake/testtask'
|
47
36
|
Rake::TestTask.new(:test) do |test|
|
48
37
|
test.libs << 'lib' << 'test'
|
@@ -59,8 +48,8 @@ end
|
|
59
48
|
|
60
49
|
task :default => :test
|
61
50
|
|
62
|
-
require '
|
63
|
-
|
51
|
+
require 'rdoc/task'
|
52
|
+
RDoc::Task.new do |rdoc|
|
64
53
|
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
65
54
|
|
66
55
|
rdoc.rdoc_dir = 'rdoc'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.4.0
|
data/bio-samtools.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{bio-samtools}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.4.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = [%q{Ricardo Ramirez-Gonzalez}, %q{Dan MacLean}, %q{Raoul J.P. Bonnal}]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-11-09}
|
13
13
|
s.description = %q{Binder of samtools for ruby, on the top of FFI.
|
14
14
|
|
15
15
|
This project was born from the need to add support of BAM files to
|
@@ -94,6 +94,7 @@ Gem::Specification.new do |s|
|
|
94
94
|
"doc/tutorial.html",
|
95
95
|
"doc/tutorial.pdf",
|
96
96
|
"ext/Makefile-bioruby.patch",
|
97
|
+
"ext/Rakefile",
|
97
98
|
"ext/mkrf_conf.rb",
|
98
99
|
"lib/bio-samtools.rb",
|
99
100
|
"lib/bio/.DS_Store",
|
@@ -101,10 +102,13 @@ Gem::Specification.new do |s|
|
|
101
102
|
"lib/bio/db/sam/bam.rb",
|
102
103
|
"lib/bio/db/sam/external/COPYING",
|
103
104
|
"lib/bio/db/sam/external/VERSION",
|
105
|
+
"lib/bio/db/sam/external/libbam.1.dylib",
|
106
|
+
"lib/bio/db/sam/external/libbam.a",
|
104
107
|
"lib/bio/db/sam/faidx.rb",
|
105
108
|
"lib/bio/db/sam/library.rb",
|
106
109
|
"lib/bio/db/sam/pileup.rb",
|
107
110
|
"lib/bio/db/sam/sam.rb",
|
111
|
+
"lib/bio/db/sam/vcf.rb",
|
108
112
|
"test/basictest.rb",
|
109
113
|
"test/coverage.rb",
|
110
114
|
"test/coverage_plot.rb",
|
@@ -123,7 +127,6 @@ Gem::Specification.new do |s|
|
|
123
127
|
"test/samples/small/test_chr.fasta.amb",
|
124
128
|
"test/samples/small/test_chr.fasta.ann",
|
125
129
|
"test/samples/small/test_chr.fasta.bwt",
|
126
|
-
"test/samples/small/test_chr.fasta.fai",
|
127
130
|
"test/samples/small/test_chr.fasta.pac",
|
128
131
|
"test/samples/small/test_chr.fasta.rbwt",
|
129
132
|
"test/samples/small/test_chr.fasta.rpac",
|
@@ -131,7 +134,10 @@ Gem::Specification.new do |s|
|
|
131
134
|
"test/samples/small/test_chr.fasta.sa",
|
132
135
|
"test/samples/small/testu.bam",
|
133
136
|
"test/samples/small/testu.bam.bai",
|
134
|
-
"test/
|
137
|
+
"test/test_basic.rb",
|
138
|
+
"test/test_bio-samtools.rb",
|
139
|
+
"test/test_pileup.rb",
|
140
|
+
"test/test_vcf.rb"
|
135
141
|
]
|
136
142
|
s.homepage = %q{http://github.com/helios/bioruby-samtools}
|
137
143
|
s.licenses = [%q{MIT}]
|
@@ -151,6 +157,7 @@ Gem::Specification.new do |s|
|
|
151
157
|
s.add_development_dependency(%q<rcov>, [">= 0"])
|
152
158
|
s.add_development_dependency(%q<bio>, [">= 1.4.2"])
|
153
159
|
s.add_development_dependency(%q<ffi>, [">= 0"])
|
160
|
+
s.add_development_dependency(%q<rdoc>, [">= 0"])
|
154
161
|
else
|
155
162
|
s.add_dependency(%q<ffi>, [">= 0"])
|
156
163
|
s.add_dependency(%q<bio>, [">= 1.4.2"])
|
@@ -160,6 +167,7 @@ Gem::Specification.new do |s|
|
|
160
167
|
s.add_dependency(%q<rcov>, [">= 0"])
|
161
168
|
s.add_dependency(%q<bio>, [">= 1.4.2"])
|
162
169
|
s.add_dependency(%q<ffi>, [">= 0"])
|
170
|
+
s.add_dependency(%q<rdoc>, [">= 0"])
|
163
171
|
end
|
164
172
|
else
|
165
173
|
s.add_dependency(%q<ffi>, [">= 0"])
|
@@ -170,6 +178,7 @@ Gem::Specification.new do |s|
|
|
170
178
|
s.add_dependency(%q<rcov>, [">= 0"])
|
171
179
|
s.add_dependency(%q<bio>, [">= 1.4.2"])
|
172
180
|
s.add_dependency(%q<ffi>, [">= 0"])
|
181
|
+
s.add_dependency(%q<rdoc>, [">= 0"])
|
173
182
|
end
|
174
183
|
end
|
175
184
|
|
data/ext/Rakefile
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'rbconfig'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'fileutils'
|
4
|
+
include FileUtils::Verbose
|
5
|
+
require 'rake/clean'
|
6
|
+
|
7
|
+
URL = "http://sourceforge.net/projects/samtools/files/samtools/0.1.18/samtools-0.1.18.tar.bz2/download"
|
8
|
+
|
9
|
+
task :download do
|
10
|
+
open(URL) do |uri|
|
11
|
+
File.open("samtools-0.1.18.tar.bz2",'wb') do |fout|
|
12
|
+
fout.write(uri.read)
|
13
|
+
end #fout
|
14
|
+
end #uri
|
15
|
+
end
|
16
|
+
|
17
|
+
task :compile do
|
18
|
+
sh "tar xvfj samtools-0.1.18.tar.bz2"
|
19
|
+
cd("samtools-0.1.18") do
|
20
|
+
sh "patch < ../Makefile-bioruby.patch"
|
21
|
+
case Config::CONFIG['host_os']
|
22
|
+
when /linux/
|
23
|
+
#sh "CFLAGS='-g -Wall -O2 -fPIC' make -e"
|
24
|
+
sh "make"
|
25
|
+
cp("libbam.a","/Users/macleand/bioruby-samtools/ext/../lib/bio/db/sam/external")
|
26
|
+
#sh "CFLAGS='-g -Wall -O2 -fPIC' make -e libbam.so.1-local"
|
27
|
+
sh "make libbam.so.1-local"
|
28
|
+
cp("libbam.so.1","/Users/macleand/bioruby-samtools/ext/../lib/bio/db/sam/external")
|
29
|
+
when /darwin/
|
30
|
+
sh "make"
|
31
|
+
cp("libbam.a","/Users/macleand/bioruby-samtools/ext/../lib/bio/db/sam/external")
|
32
|
+
sh "make libbam.1.dylib-local"
|
33
|
+
cp("libbam.1.dylib","/Users/macleand/bioruby-samtools/ext/../lib/bio/db/sam/external")
|
34
|
+
when /mswin|mingw/ then raise NotImplementedError, "BWA library is not available for Windows platform"
|
35
|
+
end #case
|
36
|
+
end #cd
|
37
|
+
end
|
38
|
+
|
39
|
+
task :clean do
|
40
|
+
cd("samtools-0.1.18") do
|
41
|
+
sh "make clean"
|
42
|
+
end
|
43
|
+
rm("samtools-0.1.18.tar.bz2")
|
44
|
+
rm_rf("samtools-0.1.18")
|
45
|
+
end
|
46
|
+
|
47
|
+
task :default => [:download, :compile, :clean]
|
48
|
+
|
data/lib/bio/.DS_Store
CHANGED
Binary file
|
data/lib/bio/db/sam.rb
CHANGED
@@ -2,6 +2,8 @@ require 'bio/db/sam/library'
|
|
2
2
|
require 'bio/db/sam/bam'
|
3
3
|
require 'bio/db/sam/faidx'
|
4
4
|
require 'bio/db/sam/sam'
|
5
|
+
require 'bio/db/sam/pileup'
|
6
|
+
require 'bio/db/sam/vcf'
|
5
7
|
|
6
8
|
module LibC
|
7
9
|
extend FFI::Library
|
@@ -277,76 +279,76 @@ module Bio
|
|
277
279
|
#call the option as a symbol of the flag, eg -r for region is called :r => "some SAM compatible region"
|
278
280
|
#eg bam.mpileup(:r => "chr1:1000-2000", :q => 50) gets the bases with quality > 50 on chr1 between 1000-5000
|
279
281
|
def mpileup( opts )
|
280
|
-
|
281
|
-
raise SAMException.new(), "No BAMFile provided" unless @sam and @binary
|
282
|
-
raise SAMException.new(), "No FastA provided" unless @fasta_path
|
283
|
-
#long option form to short samtools form..
|
284
|
-
long_opts = {
|
285
|
-
:region => :r,
|
286
|
-
:illumina_quals => :six,
|
287
|
-
:count_anomalous => :A,
|
288
|
-
:no_baq => :B,
|
289
|
-
:adjust_mapq => :C,
|
290
|
-
:max_per_bam_depth => :d,
|
291
|
-
:extended_baq => :E,
|
292
|
-
:exclude_reads_file => :G,
|
293
|
-
:list_of_positions => :l,
|
294
|
-
:mapping_quality_cap => :M,
|
295
|
-
:ignore_rg => :R,
|
296
|
-
:min_mapping_quality => :q,
|
297
|
-
:min_base_quality => :Q
|
298
|
-
}
|
299
|
-
|
300
|
-
##convert any long_opts to short opts
|
301
|
-
opts.each_pair do |k,v|
|
302
|
-
if long_opts[k]
|
303
|
-
opts[long_opts[k]] = v
|
304
|
-
opts.delete(k)
|
305
|
-
end
|
306
|
-
end
|
307
|
-
|
308
|
-
##remove any calls to -g or -u for mpileup, bcf output is not yet supported
|
309
|
-
##and also associated output options
|
310
|
-
[:g, :u, :e, :h, :I, :L, :o, :p].each {|x| opts.delete(x) }
|
311
|
-
|
312
|
-
strptrs = []
|
313
|
-
strptrs << FFI::MemoryPointer.from_string("mpileup")
|
314
|
-
opts.each do |k,v|
|
315
|
-
next unless opts[k] ##dont bother unless the values provided are true..
|
316
|
-
k = '6' if k == :six
|
317
|
-
k = '-' + k.to_s
|
318
|
-
strptrs << FFI::MemoryPointer.from_string(k)
|
319
|
-
strptrs << FFI::MemoryPointer.from_string(v.to_s) unless ["-R", "-B", "-E", "-6", "-A"].include?(k) #these are just flags so don't pass a value...
|
320
|
-
end
|
321
|
-
strptrs << FFI::MemoryPointer.from_string('-f')
|
322
|
-
strptrs << FFI::MemoryPointer.from_string(@fasta_path)
|
323
|
-
strptrs << FFI::MemoryPointer.from_string(@sam)
|
324
|
-
strptrs << nil
|
325
282
|
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
283
|
+
raise SAMException.new(), "No BAMFile provided" unless @sam and @binary
|
284
|
+
raise SAMException.new(), "No FastA provided" unless @fasta_path
|
285
|
+
#long option form to short samtools form..
|
286
|
+
long_opts = {
|
287
|
+
:region => :r,
|
288
|
+
:illumina_quals => :six,
|
289
|
+
:count_anomalous => :A,
|
290
|
+
:no_baq => :B,
|
291
|
+
:adjust_mapq => :C,
|
292
|
+
:max_per_bam_depth => :d,
|
293
|
+
:extended_baq => :E,
|
294
|
+
:exclude_reads_file => :G,
|
295
|
+
:list_of_positions => :l,
|
296
|
+
:mapping_quality_cap => :M,
|
297
|
+
:ignore_rg => :R,
|
298
|
+
:min_mapping_quality => :q,
|
299
|
+
:min_base_quality => :Q
|
300
|
+
}
|
301
|
+
|
302
|
+
##convert any long_opts to short opts
|
303
|
+
opts.each_pair do |k,v|
|
304
|
+
if long_opts[k]
|
305
|
+
opts[long_opts[k]] = v
|
306
|
+
opts.delete(k)
|
307
|
+
end
|
343
308
|
end
|
344
|
-
|
345
|
-
|
346
|
-
|
309
|
+
|
310
|
+
##remove any calls to -g or -u for mpileup, bcf output is not yet supported
|
311
|
+
##and also associated output options
|
312
|
+
[:g, :u, :e, :h, :I, :L, :o, :p].each {|x| opts.delete(x) }
|
313
|
+
|
314
|
+
strptrs = []
|
315
|
+
strptrs << FFI::MemoryPointer.from_string("mpileup")
|
316
|
+
opts.each do |k,v|
|
317
|
+
next unless opts[k] ##dont bother unless the values provided are true..
|
318
|
+
k = '6' if k == :six
|
319
|
+
k = '-' + k.to_s
|
320
|
+
strptrs << FFI::MemoryPointer.from_string(k)
|
321
|
+
strptrs << FFI::MemoryPointer.from_string(v.to_s) unless ["-R", "-B", "-E", "-6", "-A"].include?(k) #these are just flags so don't pass a value...
|
322
|
+
end
|
323
|
+
strptrs << FFI::MemoryPointer.from_string('-f')
|
324
|
+
strptrs << FFI::MemoryPointer.from_string(@fasta_path)
|
325
|
+
strptrs << FFI::MemoryPointer.from_string(@sam)
|
326
|
+
strptrs << nil
|
327
|
+
|
328
|
+
# Now load all the pointers into a native memory block
|
329
|
+
argv = FFI::MemoryPointer.new(:pointer, strptrs.length)
|
330
|
+
strptrs.each_with_index do |p, i|
|
331
|
+
argv[i].put_pointer(0, p)
|
332
|
+
end
|
333
|
+
|
334
|
+
old_stdout = STDOUT.clone
|
335
|
+
read_pipe, write_pipe = IO.pipe()
|
336
|
+
STDOUT.reopen(write_pipe)
|
337
|
+
#int bam_mpileup(int argc, char *argv[])
|
338
|
+
Bio::DB::SAM::Tools.bam_mpileup(strptrs.length - 1,argv)
|
339
|
+
if fork
|
340
|
+
write_pipe.close
|
341
|
+
STDOUT.reopen(old_stdout) #beware .. stdout from other processes eg tests calling this method can get mixed in...
|
342
|
+
begin
|
343
|
+
while line = read_pipe.readline
|
344
|
+
yield Pileup.new(line)
|
345
|
+
end
|
346
|
+
rescue EOFError
|
347
|
+
read_pipe.close
|
348
|
+
Process.wait
|
349
|
+
end
|
350
|
+
end
|
347
351
|
end
|
348
|
-
end
|
349
|
-
end
|
350
352
|
|
351
353
|
# utility method that does not use the samtools API, it calls samtools directly as if on the command line and catches the output,
|
352
354
|
# to use this method you must have a version of samtools that supports the pileup command (< 0.1.17)
|
@@ -373,6 +375,49 @@ module Bio
|
|
373
375
|
end
|
374
376
|
pipe.close
|
375
377
|
end
|
378
|
+
|
379
|
+
|
380
|
+
def index_stats
|
381
|
+
raise SAMException.new(), "No BAMFile provided" unless @sam and @binary
|
382
|
+
raise SAMException.new(), "No FastA provided" unless @fasta_path
|
383
|
+
strptrs = []
|
384
|
+
strptrs << FFI::MemoryPointer.from_string("idxstats")
|
385
|
+
strptrs << FFI::MemoryPointer.from_string(@sam)
|
386
|
+
strptrs << nil
|
387
|
+
|
388
|
+
# Now load all the pointers into a native memory block
|
389
|
+
argv = FFI::MemoryPointer.new(:pointer, strptrs.length)
|
390
|
+
strptrs.each_with_index do |p, i|
|
391
|
+
argv[i].put_pointer(0, p)
|
392
|
+
end
|
393
|
+
|
394
|
+
index_stats = {}
|
395
|
+
|
396
|
+
old_stdout = STDOUT.clone
|
397
|
+
read_pipe, write_pipe = IO.pipe()
|
398
|
+
STDOUT.reopen(write_pipe)
|
399
|
+
|
400
|
+
#int bam_idxstats(int argc, char *argv[])
|
401
|
+
Bio::DB::SAM::Tools.bam_idxstats(strptrs.length - 1,argv)
|
402
|
+
if fork
|
403
|
+
write_pipe.close
|
404
|
+
STDOUT.reopen(old_stdout) #beware .. stdout from other processes eg tests calling this method can get mixed in...
|
405
|
+
begin
|
406
|
+
|
407
|
+
while line = read_pipe.readline #TAB delimited with each line consisting of reference sequence name, sequence length, # mapped reads and # unmapped reads.
|
408
|
+
info = line.split(/\t/)
|
409
|
+
next unless info.length == 4
|
410
|
+
index_stats[ info[0] ] = {:length => info[1].to_i, :mapped_reads => info[2].to_i, :unmapped_reads => info[3].to_i }
|
411
|
+
end
|
412
|
+
rescue EOFError
|
413
|
+
read_pipe.close
|
414
|
+
Process.wait
|
415
|
+
end
|
416
|
+
end #fork
|
417
|
+
index_stats
|
418
|
+
end
|
419
|
+
|
420
|
+
|
376
421
|
|
377
422
|
end
|
378
423
|
|
data/lib/bio/db/sam/bam.rb
CHANGED
Binary file
|
Binary file
|
data/lib/bio/db/sam/sam.rb
CHANGED
@@ -79,6 +79,7 @@ module Bio
|
|
79
79
|
attach_function :sampileup, [ :pointer, :int, :bam_pileup_f, :pointer ], :int
|
80
80
|
attach_function :samfaipath, [ :string ], :string
|
81
81
|
attach_function :bam_mpileup, [ :int, :pointer ], :int
|
82
|
+
attach_function :bam_idxstats, [:int, :pointer ], :int
|
82
83
|
end
|
83
84
|
end
|
84
85
|
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# :title: Vcf
|
2
|
+
# = Vcf
|
3
|
+
# A class representing information in Variant Call Format
|
4
|
+
# Forked from vcfruby at https://github.com/jesserod/vcfruby
|
5
|
+
# Modified and tests written by Dan MacLean (dan.maclean@tsl.ac.uk)
|
6
|
+
# VCF is described at http://www.1000genomes.org/node/101
|
7
|
+
|
8
|
+
class Vcf
|
9
|
+
public
|
10
|
+
attr_accessor :chrom, :pos, :id, :ref, :alt, :qual, :filter, :info, :format, :samples
|
11
|
+
|
12
|
+
#create the vcf object, use the ordered list of sample names to label samples if provided ['A', 'B', 'C'], otherwise uses, 1,2,3 etc
|
13
|
+
def initialize(line=nil, sample_names=nil)
|
14
|
+
next if line[0,1] == '#' #skip any header or meta-lines... inadvertantly passed..
|
15
|
+
@info = {}
|
16
|
+
@samples = {}
|
17
|
+
parse_line(line, sample_names) if line != nil
|
18
|
+
end
|
19
|
+
|
20
|
+
def int_or_raw(x)
|
21
|
+
Integer.new(x) rescue x
|
22
|
+
end
|
23
|
+
|
24
|
+
#gets the info in the Vcf lines and parses the
|
25
|
+
def parse_line(line, sample_names=nil)
|
26
|
+
return false if line[0,1] == '#'
|
27
|
+
|
28
|
+
f = line.chomp.split("\t", -1)
|
29
|
+
raise "VCF lines must have at least 8 fields" if f.size < 8
|
30
|
+
@chrom = f[0]
|
31
|
+
@pos = f[1].to_i
|
32
|
+
@id = '.' == f[2] ? nil : f[2]
|
33
|
+
@ref = f[3]
|
34
|
+
@alt = f[4]
|
35
|
+
@qual = f[5].to_f
|
36
|
+
@filter = '.' == f[6] ? nil : f[6]
|
37
|
+
|
38
|
+
@info = '.' == f[7] ? nil : {}
|
39
|
+
if @info
|
40
|
+
info_vec = f[7].split(";")
|
41
|
+
info_vec.each do |x|
|
42
|
+
keyval = x.split("=", -1)
|
43
|
+
if keyval.size == 2 # If it's key=value
|
44
|
+
@info[keyval[0]] = keyval[1]
|
45
|
+
else # Otherwise, it's just a flag
|
46
|
+
@info[x] = nil
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
@samples = {}
|
52
|
+
return true if f.size == 8 # Has just upto info
|
53
|
+
raise "Can't have format with no samples" if f.size == 9
|
54
|
+
|
55
|
+
@format = f[8]
|
56
|
+
|
57
|
+
sample_keys = @format.split(":")
|
58
|
+
|
59
|
+
num_samples = f.size - 9 # How many fields are past the format
|
60
|
+
|
61
|
+
if sample_names == nil # Make the sample names just ["1", "2", ... , "num_samples}"
|
62
|
+
sample_names = (1..num_samples).to_a.map{|i| i.to_s}
|
63
|
+
elsif sample_names.size != num_samples
|
64
|
+
raise "Unexpected number of samples (#{num_samples}) based on the provided sample names (#{sample_names.inspect})"
|
65
|
+
end
|
66
|
+
|
67
|
+
sample_names.each_with_index do |sample_name, sample_index|
|
68
|
+
i = sample_index + 9 # index into columns (f)
|
69
|
+
sample_values = f[i].split(":")
|
70
|
+
raise "Expected number of sample values to be <= number of sample keys in FORMAT column Format=#{@format} but sample=#{f[i]}" if sample_values.size > sample_keys.size
|
71
|
+
@samples[sample_name] = {}
|
72
|
+
sample_keys.each_with_index {|key, value_index| @samples[sample_name][key] = sample_values[value_index] || ""}
|
73
|
+
end
|
74
|
+
|
75
|
+
return true;
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
data/test/helper.rb
CHANGED
data/test/test_basic.rb
ADDED
@@ -0,0 +1,342 @@
|
|
1
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
2
|
+
$: << File.expand_path('.')
|
3
|
+
require 'rubygems'
|
4
|
+
require 'ffi'
|
5
|
+
require "test/unit"
|
6
|
+
require "bio/db/sam"
|
7
|
+
require "bio/db/sam/sam"
|
8
|
+
|
9
|
+
|
10
|
+
class TestBioDbSam < Test::Unit::TestCase
|
11
|
+
|
12
|
+
#Set up the paths
|
13
|
+
def setup
|
14
|
+
@test_folder = "test/samples/small"
|
15
|
+
@testTAMFile = @test_folder + "/test.tam"
|
16
|
+
@testBAMFile = @test_folder + "/testu.bam"
|
17
|
+
@testReference = @test_folder + "/test_chr.fasta"
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
#Removing the index files
|
22
|
+
def teardown
|
23
|
+
begin
|
24
|
+
File.delete(@testReference + ".fai")
|
25
|
+
p "deleted: " + @testReference + ".fai "
|
26
|
+
rescue
|
27
|
+
end
|
28
|
+
begin
|
29
|
+
File.delete(@testBAMFile + ".fai")
|
30
|
+
p "deleted: " + @testBAMFile + ".bai "
|
31
|
+
rescue
|
32
|
+
end
|
33
|
+
end
|
34
|
+
=begin
|
35
|
+
def default_test
|
36
|
+
puts $LOAD_PATH
|
37
|
+
assert(true, "Unit test test")
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_openSAMFile
|
41
|
+
bamfile = Bio::DB::SAM::Tools.samopen(@testTAMFile,"r",nil)
|
42
|
+
Bio::DB::SAM::Tools.samclose(bamfile)
|
43
|
+
assert(true, "file open and closed")
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_new_class_empty
|
47
|
+
begin
|
48
|
+
bam = Bio::DB::Sam.new({})
|
49
|
+
assert(false, "Should fail while opening without parameters")
|
50
|
+
rescue Bio::DB::SAMException => e
|
51
|
+
puts e.message
|
52
|
+
assert(true, e.message)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_new_class_empty_invalid_path
|
57
|
+
begin
|
58
|
+
sam = Bio::DB::Sam.new({:bam=>"INVALID"})
|
59
|
+
sam.open
|
60
|
+
sam.close
|
61
|
+
assert(false, "Should fail with an invalid path")
|
62
|
+
rescue Bio::DB::SAMException => e
|
63
|
+
puts e.message
|
64
|
+
assert(true, e.message)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_class_text_read_no_faidx
|
69
|
+
sam = Bio::DB::Sam.new({:tam=>@testTAMFile})
|
70
|
+
sam.open
|
71
|
+
sam.close
|
72
|
+
assert(true, "file open and closed with the class")
|
73
|
+
end
|
74
|
+
|
75
|
+
def test_class_text_read_no_close
|
76
|
+
|
77
|
+
fam = Bio::DB::Sam.new({:tam=>@testTAMFile})
|
78
|
+
fam.open
|
79
|
+
fam = nil
|
80
|
+
ObjectSpace.garbage_collect
|
81
|
+
|
82
|
+
assert(true, "file openend but not closed")
|
83
|
+
end
|
84
|
+
|
85
|
+
def test_class_binary_read_no_close
|
86
|
+
|
87
|
+
Bio::DB::Sam.new({:bam=>@testBAMFile}).open
|
88
|
+
ObjectSpace.garbage_collect
|
89
|
+
assert(true, "BINARY file openend but not closed")
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_read_coverage
|
93
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile, :fasta=>@testReference})
|
94
|
+
sam.open
|
95
|
+
File.open( @test_folder +"/ids2.txt", "r") do |file|
|
96
|
+
puts "file opened"
|
97
|
+
file.each_line{|line|
|
98
|
+
fetching = line.split(' ')[0]
|
99
|
+
puts "fetching: " + fetching
|
100
|
+
sam.load_reference
|
101
|
+
seq = sam.fetch_reference(fetching, 0, 16000)
|
102
|
+
# puts seq
|
103
|
+
# puts seq.length
|
104
|
+
als = sam.fetch(fetching, 0, seq.length)
|
105
|
+
# p als
|
106
|
+
if als.length() > 0 then
|
107
|
+
p fetching
|
108
|
+
p als
|
109
|
+
end
|
110
|
+
}
|
111
|
+
|
112
|
+
end
|
113
|
+
sam.close
|
114
|
+
assert(true, "Finish")
|
115
|
+
end
|
116
|
+
# def test_read_TAM_as_BAM
|
117
|
+
# begin
|
118
|
+
# sam = Bio::DB::Sam.new({:bam=>@testTAMFile})
|
119
|
+
# sam.open
|
120
|
+
# sam.close
|
121
|
+
# assert(false, "Should raise an exception for reading a BAM as TAM")
|
122
|
+
# rescue Bio::DB::SAMException => e
|
123
|
+
# assert(true, "Properly handled")
|
124
|
+
# end
|
125
|
+
# end
|
126
|
+
|
127
|
+
# def test_read_BAM_as_TAM
|
128
|
+
# begin
|
129
|
+
# sam = Bio::DB::Sam.new({:tam=>@testBAMFile})
|
130
|
+
# sam.open
|
131
|
+
# sam.close
|
132
|
+
# assert(false, "Should raise an exception for reading a BAM as TAM")
|
133
|
+
# rescue Bio::DB::SAMException => e
|
134
|
+
# assert(true, "Properly handled")
|
135
|
+
# end
|
136
|
+
# end
|
137
|
+
|
138
|
+
def test_bam_load_index
|
139
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile})
|
140
|
+
sam.open
|
141
|
+
index = sam.load_index
|
142
|
+
sam.close
|
143
|
+
assert(true, "BAM index loaded")
|
144
|
+
# attach_function :bam_index_build, [ :string ], :int
|
145
|
+
# attach_function :bam_index_load, [ :string ], :pointer
|
146
|
+
# attach_function :bam_index_destroy, [ :pointer ], :void
|
147
|
+
end
|
148
|
+
|
149
|
+
def test_tam_load_index
|
150
|
+
begin
|
151
|
+
sam = Bio::DB::Sam.new({:tam=>@testTAMFile})
|
152
|
+
sam.open
|
153
|
+
sam.load_index
|
154
|
+
sam.close
|
155
|
+
assert(false, "TAM index loaded")
|
156
|
+
rescue Bio::DB::SAMException => e
|
157
|
+
assert(true, "Unable to load an index for a TAM file")
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def test_read_segment
|
162
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile})
|
163
|
+
sam.open
|
164
|
+
als = sam.fetch("chr_1", 0, 500)
|
165
|
+
p als
|
166
|
+
sam.close
|
167
|
+
assert(true, "Seems it ran the query")
|
168
|
+
#node_7263 238 60 has 550+, query from 0 to 500, something shall come....
|
169
|
+
end
|
170
|
+
|
171
|
+
def test_read_invalid_reference
|
172
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile})
|
173
|
+
sam.open
|
174
|
+
begin
|
175
|
+
als = sam.fetch("Chr1", 0, 500)
|
176
|
+
p als
|
177
|
+
sam.close
|
178
|
+
assert(false, "Seems it ran the query")
|
179
|
+
rescue Bio::DB::SAMException => e
|
180
|
+
p e
|
181
|
+
assert(true, "Exception generated and catched")
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def test_read_invalid_reference_start_coordinate
|
186
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile})
|
187
|
+
sam.open
|
188
|
+
begin
|
189
|
+
als = sam.fetch("chr", -1, 500)
|
190
|
+
p als
|
191
|
+
sam.close
|
192
|
+
assert(false, "Seems it ran the query")
|
193
|
+
rescue Bio::DB::SAMException => e
|
194
|
+
p e
|
195
|
+
assert(true, "Exception generated and catched")
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
def test_read_invalid_reference_end_coordinate
|
200
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile})
|
201
|
+
sam.open
|
202
|
+
begin
|
203
|
+
als = sam.fetch("chr", 0, 50000)
|
204
|
+
p als
|
205
|
+
sam.close
|
206
|
+
assert(false, "Seems it ran the query")
|
207
|
+
rescue Bio::DB::SAMException => e
|
208
|
+
p e
|
209
|
+
assert(true, "Exception generated and catched")
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
def test_read_invalid_reference_swaped_coordinates
|
214
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile})
|
215
|
+
sam.open
|
216
|
+
begin
|
217
|
+
als = sam.fetch("chr", 500, 0)
|
218
|
+
p als
|
219
|
+
sam.close
|
220
|
+
assert(false, "Seems it ran the query")
|
221
|
+
rescue Bio::DB::SAMException => e
|
222
|
+
p e
|
223
|
+
assert(true, "Exception generated and catched")
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
def test_fasta_load_index
|
228
|
+
sam = Bio::DB::Sam.new({:fasta=>@testReference})
|
229
|
+
sam.load_reference
|
230
|
+
seq = sam.fetch_reference("chr_1", 0, 500)
|
231
|
+
p seq
|
232
|
+
sam.close
|
233
|
+
assert(true, "The reference was loaded")
|
234
|
+
end
|
235
|
+
|
236
|
+
def test_fasta_load_index
|
237
|
+
sam = Bio::DB::Sam.new({:fasta=>@testReference})
|
238
|
+
sam.load_reference
|
239
|
+
begin
|
240
|
+
seq = sam.fetch_reference("chr1", 0, 500)
|
241
|
+
p "Error seq:"+ seq
|
242
|
+
sam.close
|
243
|
+
assert(false, "The reference was loaded")
|
244
|
+
rescue Bio::DB::SAMException => e
|
245
|
+
p e
|
246
|
+
assert(true, "The references was not loaded")
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
def test_load_feature
|
251
|
+
|
252
|
+
fs = Feature.find_by_bam("chr_1", 0, 500,@testBAMFile)
|
253
|
+
|
254
|
+
p fs
|
255
|
+
assert(true, "Loaded as features")
|
256
|
+
end
|
257
|
+
|
258
|
+
def test_avg_coverage
|
259
|
+
sam = Bio::DB::Sam.new({:fasta=>@testReference, :bam=>@testBAMFile })
|
260
|
+
sam.open
|
261
|
+
cov = sam.average_coverage("chr_1", 60, 30)
|
262
|
+
p "Coverage: " + cov.to_s
|
263
|
+
sam.close
|
264
|
+
assert(true, "Average coverage ran")
|
265
|
+
assert(3 == cov, "The coverage is 3")
|
266
|
+
end
|
267
|
+
|
268
|
+
|
269
|
+
def test_chromosome_coverage
|
270
|
+
sam = Bio::DB::Sam.new({:fasta=>@testReference, :bam=>@testBAMFile })
|
271
|
+
sam.open
|
272
|
+
covs = sam.chromosome_coverage("chr_1", 0, 60)
|
273
|
+
p "Coverage: "
|
274
|
+
p covs
|
275
|
+
puts "POS\tCOV"
|
276
|
+
covs.each_with_index{ |cov, i| puts "#{i}\t#{cov}" }
|
277
|
+
sam.close
|
278
|
+
assert(true, "Average coverage ran")
|
279
|
+
#assert(3 == cov, "The coverage is 3")
|
280
|
+
end
|
281
|
+
=end
|
282
|
+
#test whether the call to mpileup works and returns 10 objects of class pileup
|
283
|
+
def test_pileup
|
284
|
+
sam = Bio::DB::Sam.new(:fasta=>@testReference, :bam=>@testBAMFile )
|
285
|
+
pileup_list = []
|
286
|
+
sam.mpileup(:region => "chr_1:100-109") do |pile|
|
287
|
+
next unless pile.ref_name == 'chr_1' ##required because in the test environment stdout gets mixed in with the captured stdout in the function and non pileup lines are passed...
|
288
|
+
pileup_list << pile
|
289
|
+
end
|
290
|
+
assert_equal(10,pileup_list.length)
|
291
|
+
pileup_list.each do |p|
|
292
|
+
assert_kind_of(Pileup, p)
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
|
297
|
+
#test whether the call to mpileup returns a vcf object if :g => true is used on the command-line
|
298
|
+
# def test_vcf
|
299
|
+
|
300
|
+
# sam = Bio::DB::Sam.new(:fasta=>@testReference, :bam=>@testBAMFile )
|
301
|
+
# sam.mpileup(:region => "chr_1:100-109", :u => true ) do |p|
|
302
|
+
# $stderr.puts "p is #{p}"
|
303
|
+
# assert_kind_of(String,p)
|
304
|
+
# end
|
305
|
+
# end
|
306
|
+
def test_indexstats
|
307
|
+
|
308
|
+
sam = Bio::DB::Sam.new(:bam => @testBAMFile, :fasta => @testReference)
|
309
|
+
assert_equal({"chr_1"=>{:length=>69930, :mapped_reads=>0, :unmapped_reads=>0},
|
310
|
+
"*"=>{:length=>0, :mapped_reads=>0, :unmapped_reads=>0}
|
311
|
+
}, sam.index_stats)
|
312
|
+
end
|
313
|
+
|
314
|
+
end
|
315
|
+
|
316
|
+
class Feature
|
317
|
+
attr_reader :start, :end, :strand, :sequence, :quality
|
318
|
+
|
319
|
+
def initialize(a={})
|
320
|
+
p a
|
321
|
+
@start = a[:start]
|
322
|
+
@end = a[:enf]
|
323
|
+
@strand = a[:strand]
|
324
|
+
@sequence = a[:sequence]
|
325
|
+
@quality = a[:quality]
|
326
|
+
end
|
327
|
+
|
328
|
+
def self.find_by_bam(reference,start,stop,bam_file_path)
|
329
|
+
|
330
|
+
sam = Bio::DB::Sam.new({:bam=>bam_file_path})
|
331
|
+
features = []
|
332
|
+
sam.open
|
333
|
+
|
334
|
+
fetchAlignment = Proc.new do |a|
|
335
|
+
a.query_strand ? strand = '+' : strand = '-'
|
336
|
+
features << Feature.new({:start=>a.pos,:end=>a.calend,:strand=>strand,:sequence=>a.seq,:quality=>a.qual})
|
337
|
+
end
|
338
|
+
sam.fetch_with_function(reference, start, stop, fetchAlignment)
|
339
|
+
sam.close
|
340
|
+
features
|
341
|
+
end
|
342
|
+
end
|
data/test/test_bio-samtools.rb
CHANGED
data/test/test_pileup.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
2
|
+
$: << File.expand_path('.')
|
3
|
+
|
4
|
+
require "bio/db/sam/pileup"
|
5
|
+
require "test/unit"
|
6
|
+
|
7
|
+
class TestPileup < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@six_col = Pileup.new("seq1\t272\tT\t24\t,.$.....,,.,.,...,,,.,..^+.\t<<<+;<<<<<<<<<<<=<;<;7<&")
|
11
|
+
@ten_col = Pileup.new("seq2\t151\tG\tG\t36\t0\t99\t12\t...........A\t:9<;;7=<<<<<")
|
12
|
+
@snp = Pileup.new("seq1\t272\tT\t24\t,.$.....,,.gGgGgGgGgGgGg^+.\t<<<+;<<<<<<<<<<<=<;<;7<&")
|
13
|
+
@snp_2 = Pileup.new("seq1\t272\tT\t24\t......aaaaaaggggggcccccc$^+\t<<<+;<<<<<<<<<<<=<;<;7<&")
|
14
|
+
@snp_3 = Pileup.new("seq1\t272\tT\t24\t......aaaaaaaagggggccccc$^+\t<<<+;<<<<<<<<<<<=<;<;7<&")
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_new_from_6_column
|
18
|
+
assert_equal("seq1", @six_col.ref_name)
|
19
|
+
assert_equal(272, @six_col.pos)
|
20
|
+
assert_equal("T", @six_col.ref_base)
|
21
|
+
assert_equal(24, @six_col.coverage)
|
22
|
+
assert_equal(",.$.....,,.,.,...,,,.,..^+.", @six_col.read_bases)
|
23
|
+
assert_equal("<<<+;<<<<<<<<<<<=<;<;7<&", @six_col.read_quals)
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_new_from_10_column
|
27
|
+
assert_equal("seq2", @ten_col.ref_name)
|
28
|
+
assert_equal(151, @ten_col.pos)
|
29
|
+
assert_equal("G", @ten_col.ref_base)
|
30
|
+
assert_equal("G", @ten_col.consensus)
|
31
|
+
assert_equal(36, @ten_col.consensus_quality)
|
32
|
+
assert_equal(0, @ten_col.snp_quality)
|
33
|
+
assert_equal(99, @ten_col.rms_mapq)
|
34
|
+
assert_equal(12, @ten_col.coverage)
|
35
|
+
assert_equal("...........A", @ten_col.read_bases)
|
36
|
+
assert_equal(":9<;;7=<<<<<", @ten_col.read_quals)
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_non_refs
|
40
|
+
assert_equal({:A => 1, :C => 0, :T => 0, :G => 0}, @ten_col.non_refs)
|
41
|
+
assert_equal({:A => 0, :C => 0, :T => 0, :G => 0}, @six_col.non_refs)
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_consensus
|
45
|
+
assert_equal("G", @snp.consensus)
|
46
|
+
assert_equal("ACGT", @snp_2.consensus)
|
47
|
+
assert_equal("A", @snp_3.consensus)
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_non_ref_count
|
51
|
+
assert_equal(13,@snp.non_ref_count)
|
52
|
+
assert_equal(18,@snp_2.non_ref_count)
|
53
|
+
assert_equal(18,@snp_3.non_ref_count)
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_ref_count
|
57
|
+
assert_equal(11,@snp.ref_count)
|
58
|
+
assert_equal(6,@snp_2.ref_count)
|
59
|
+
assert_equal(6,@snp_3.ref_count)
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_ref_plus_non_ref_equal_to_coverage
|
63
|
+
assert_equal(@snp.coverage,@snp.ref_count + @snp.non_ref_count)
|
64
|
+
assert_equal(@snp_2.coverage,@snp_2.ref_count + @snp_2.non_ref_count)
|
65
|
+
assert_equal(@snp_3.coverage,@snp_3.ref_count + @snp_3.non_ref_count)
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
data/test/test_vcf.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
2
|
+
$: << File.expand_path('.')
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bio/db/sam/vcf'
|
5
|
+
require "test/unit"
|
6
|
+
|
7
|
+
|
8
|
+
class TestVcf < Test::Unit::TestCase
|
9
|
+
|
10
|
+
def setup
|
11
|
+
@vcf1 = Vcf.new("19 111 . A C 9.6 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3",["a","b","c"]) #from a 3.3 vcf file
|
12
|
+
@vcf2 = Vcf.new("20 14370 rs6054257 G A 29 0 NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:-1,-1") #from a 3.3 vcf file
|
13
|
+
@vcf3 = Vcf.new("19 111 . A C 9.6 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3") #from a 4.0 vcf file
|
14
|
+
@vcf4 = Vcf.new("20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,") #from a 4.0 vcf file
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
def test_parse
|
19
|
+
assert_equal("19", @vcf1.chrom)
|
20
|
+
assert_equal(111, @vcf1.pos)
|
21
|
+
assert_equal(nil, @vcf1.id)
|
22
|
+
assert_equal("A", @vcf1.ref)
|
23
|
+
assert_equal("C",@vcf1.alt)
|
24
|
+
assert_equal(9.6,@vcf1.qual)
|
25
|
+
assert_equal(nil, @vcf1.filter)
|
26
|
+
assert_equal(nil, @vcf1.info)
|
27
|
+
assert_equal({"a"=>{"GT"=>"0|0", "HQ"=>"10,10"},
|
28
|
+
"b"=>{"GT"=>"0|0", "HQ"=>"10,10"},
|
29
|
+
"c"=>{"GT"=>"0/1", "HQ"=>"3,3"}}, @vcf1.samples)
|
30
|
+
|
31
|
+
assert_equal("20", @vcf2.chrom)
|
32
|
+
assert_equal(14370, @vcf2.pos)
|
33
|
+
assert_equal('rs6054257', @vcf2.id)
|
34
|
+
assert_equal("G", @vcf2.ref)
|
35
|
+
assert_equal("A",@vcf2.alt)
|
36
|
+
assert_equal(29,@vcf2.qual)
|
37
|
+
assert_equal("0", @vcf2.filter)
|
38
|
+
assert_equal({"DP"=>"14", "AF"=>"0.5", "NS"=>"3", "DB"=>nil, "H2"=>nil}, @vcf2.info)
|
39
|
+
assert_equal({"1"=>{"DP"=>"1", "GT"=>"0|0", "HQ"=>"51,51", "GQ"=>"48"},
|
40
|
+
"2"=>{"DP"=>"8", "GT"=>"1|0", "HQ"=>"51,51", "GQ"=>"48"},
|
41
|
+
"3"=>{"DP"=>"5", "GT"=>"1/1", "HQ"=>"-1,-1", "GQ"=>"43"}}, @vcf2.samples)
|
42
|
+
|
43
|
+
assert_equal("19", @vcf3.chrom)
|
44
|
+
assert_equal(111, @vcf3.pos)
|
45
|
+
assert_equal(nil, @vcf3.id)
|
46
|
+
assert_equal("A", @vcf3.ref)
|
47
|
+
assert_equal("C",@vcf3.alt)
|
48
|
+
assert_equal(9.6,@vcf3.qual)
|
49
|
+
assert_equal(nil, @vcf3.filter)
|
50
|
+
assert_equal(nil, @vcf3.info)
|
51
|
+
assert_equal({"1"=>{"GT"=>"0|0", "HQ"=>"10,10"},
|
52
|
+
"2"=>{"GT"=>"0|0", "HQ"=>"10,10"},
|
53
|
+
"3"=>{"GT"=>"0/1", "HQ"=>"3,3"}}, @vcf3.samples)
|
54
|
+
|
55
|
+
assert_equal("20", @vcf4.chrom)
|
56
|
+
assert_equal(14370, @vcf4.pos)
|
57
|
+
assert_equal('rs6054257', @vcf4.id)
|
58
|
+
assert_equal("G", @vcf4.ref)
|
59
|
+
assert_equal("A",@vcf4.alt)
|
60
|
+
assert_equal(29,@vcf4.qual)
|
61
|
+
assert_equal("PASS", @vcf4.filter)
|
62
|
+
assert_equal({"DP"=>"14", "AF"=>"0.5", "NS"=>"3", "DB"=>nil, "H2"=>nil}, @vcf4.info)
|
63
|
+
assert_equal({"1"=>{"DP"=>"1", "GT"=>"0|0", "HQ"=>"51,51", "GQ"=>"48"},
|
64
|
+
"2"=>{"DP"=>"8", "GT"=>"1|0", "HQ"=>"51,51", "GQ"=>"48"},
|
65
|
+
"3"=>{"DP"=>"5", "GT"=>"1/1", "HQ"=>".,", "GQ"=>"43"}}, @vcf4.samples)
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-samtools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,11 +11,11 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2011-
|
14
|
+
date: 2011-11-09 00:00:00.000000000Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: ffi
|
18
|
-
requirement: &
|
18
|
+
requirement: &2151840320 !ruby/object:Gem::Requirement
|
19
19
|
none: false
|
20
20
|
requirements:
|
21
21
|
- - ! '>='
|
@@ -23,10 +23,10 @@ dependencies:
|
|
23
23
|
version: '0'
|
24
24
|
type: :runtime
|
25
25
|
prerelease: false
|
26
|
-
version_requirements: *
|
26
|
+
version_requirements: *2151840320
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bio
|
29
|
-
requirement: &
|
29
|
+
requirement: &2151830060 !ruby/object:Gem::Requirement
|
30
30
|
none: false
|
31
31
|
requirements:
|
32
32
|
- - ! '>='
|
@@ -34,10 +34,10 @@ dependencies:
|
|
34
34
|
version: 1.4.2
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
|
-
version_requirements: *
|
37
|
+
version_requirements: *2151830060
|
38
38
|
- !ruby/object:Gem::Dependency
|
39
39
|
name: shoulda
|
40
|
-
requirement: &
|
40
|
+
requirement: &2151828360 !ruby/object:Gem::Requirement
|
41
41
|
none: false
|
42
42
|
requirements:
|
43
43
|
- - ! '>='
|
@@ -45,10 +45,10 @@ dependencies:
|
|
45
45
|
version: '0'
|
46
46
|
type: :development
|
47
47
|
prerelease: false
|
48
|
-
version_requirements: *
|
48
|
+
version_requirements: *2151828360
|
49
49
|
- !ruby/object:Gem::Dependency
|
50
50
|
name: bundler
|
51
|
-
requirement: &
|
51
|
+
requirement: &2151826260 !ruby/object:Gem::Requirement
|
52
52
|
none: false
|
53
53
|
requirements:
|
54
54
|
- - ~>
|
@@ -56,10 +56,10 @@ dependencies:
|
|
56
56
|
version: 1.0.0
|
57
57
|
type: :development
|
58
58
|
prerelease: false
|
59
|
-
version_requirements: *
|
59
|
+
version_requirements: *2151826260
|
60
60
|
- !ruby/object:Gem::Dependency
|
61
61
|
name: jeweler
|
62
|
-
requirement: &
|
62
|
+
requirement: &2151823300 !ruby/object:Gem::Requirement
|
63
63
|
none: false
|
64
64
|
requirements:
|
65
65
|
- - ! '>='
|
@@ -67,10 +67,10 @@ dependencies:
|
|
67
67
|
version: '0'
|
68
68
|
type: :development
|
69
69
|
prerelease: false
|
70
|
-
version_requirements: *
|
70
|
+
version_requirements: *2151823300
|
71
71
|
- !ruby/object:Gem::Dependency
|
72
72
|
name: rcov
|
73
|
-
requirement: &
|
73
|
+
requirement: &2151821740 !ruby/object:Gem::Requirement
|
74
74
|
none: false
|
75
75
|
requirements:
|
76
76
|
- - ! '>='
|
@@ -78,10 +78,10 @@ dependencies:
|
|
78
78
|
version: '0'
|
79
79
|
type: :development
|
80
80
|
prerelease: false
|
81
|
-
version_requirements: *
|
81
|
+
version_requirements: *2151821740
|
82
82
|
- !ruby/object:Gem::Dependency
|
83
83
|
name: bio
|
84
|
-
requirement: &
|
84
|
+
requirement: &2151820520 !ruby/object:Gem::Requirement
|
85
85
|
none: false
|
86
86
|
requirements:
|
87
87
|
- - ! '>='
|
@@ -89,10 +89,10 @@ dependencies:
|
|
89
89
|
version: 1.4.2
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
|
-
version_requirements: *
|
92
|
+
version_requirements: *2151820520
|
93
93
|
- !ruby/object:Gem::Dependency
|
94
94
|
name: ffi
|
95
|
-
requirement: &
|
95
|
+
requirement: &2151819540 !ruby/object:Gem::Requirement
|
96
96
|
none: false
|
97
97
|
requirements:
|
98
98
|
- - ! '>='
|
@@ -100,7 +100,18 @@ dependencies:
|
|
100
100
|
version: '0'
|
101
101
|
type: :development
|
102
102
|
prerelease: false
|
103
|
-
version_requirements: *
|
103
|
+
version_requirements: *2151819540
|
104
|
+
- !ruby/object:Gem::Dependency
|
105
|
+
name: rdoc
|
106
|
+
requirement: &2151818000 !ruby/object:Gem::Requirement
|
107
|
+
none: false
|
108
|
+
requirements:
|
109
|
+
- - ! '>='
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '0'
|
112
|
+
type: :development
|
113
|
+
prerelease: false
|
114
|
+
version_requirements: *2151818000
|
104
115
|
description: ! "Binder of samtools for ruby, on the top of FFI. \n\n This project
|
105
116
|
was born from the need to add support of BAM files to \n the gee_fu genome browser
|
106
117
|
(http://github.com/danmaclean/gee_fu)."
|
@@ -185,6 +196,7 @@ files:
|
|
185
196
|
- doc/tutorial.html
|
186
197
|
- doc/tutorial.pdf
|
187
198
|
- ext/Makefile-bioruby.patch
|
199
|
+
- ext/Rakefile
|
188
200
|
- ext/mkrf_conf.rb
|
189
201
|
- lib/bio-samtools.rb
|
190
202
|
- lib/bio/.DS_Store
|
@@ -192,10 +204,13 @@ files:
|
|
192
204
|
- lib/bio/db/sam/bam.rb
|
193
205
|
- lib/bio/db/sam/external/COPYING
|
194
206
|
- lib/bio/db/sam/external/VERSION
|
207
|
+
- lib/bio/db/sam/external/libbam.1.dylib
|
208
|
+
- lib/bio/db/sam/external/libbam.a
|
195
209
|
- lib/bio/db/sam/faidx.rb
|
196
210
|
- lib/bio/db/sam/library.rb
|
197
211
|
- lib/bio/db/sam/pileup.rb
|
198
212
|
- lib/bio/db/sam/sam.rb
|
213
|
+
- lib/bio/db/sam/vcf.rb
|
199
214
|
- test/basictest.rb
|
200
215
|
- test/coverage.rb
|
201
216
|
- test/coverage_plot.rb
|
@@ -214,7 +229,6 @@ files:
|
|
214
229
|
- test/samples/small/test_chr.fasta.amb
|
215
230
|
- test/samples/small/test_chr.fasta.ann
|
216
231
|
- test/samples/small/test_chr.fasta.bwt
|
217
|
-
- test/samples/small/test_chr.fasta.fai
|
218
232
|
- test/samples/small/test_chr.fasta.pac
|
219
233
|
- test/samples/small/test_chr.fasta.rbwt
|
220
234
|
- test/samples/small/test_chr.fasta.rpac
|
@@ -222,7 +236,10 @@ files:
|
|
222
236
|
- test/samples/small/test_chr.fasta.sa
|
223
237
|
- test/samples/small/testu.bam
|
224
238
|
- test/samples/small/testu.bam.bai
|
239
|
+
- test/test_basic.rb
|
225
240
|
- test/test_bio-samtools.rb
|
241
|
+
- test/test_pileup.rb
|
242
|
+
- test/test_vcf.rb
|
226
243
|
homepage: http://github.com/helios/bioruby-samtools
|
227
244
|
licenses:
|
228
245
|
- MIT
|
@@ -238,7 +255,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
238
255
|
version: '0'
|
239
256
|
segments:
|
240
257
|
- 0
|
241
|
-
hash:
|
258
|
+
hash: -1183520990124056077
|
242
259
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
243
260
|
none: false
|
244
261
|
requirements:
|
@@ -1 +0,0 @@
|
|
1
|
-
chr_1 69930 7 70 71
|