bio-samtools-wrapper 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.travis.yml +27 -0
- data/Gemfile +20 -0
- data/LICENSE.txt +702 -0
- data/README.md +501 -0
- data/Rakefile +73 -0
- data/VERSION +1 -0
- data/bin/bam_consensus.rb +85 -0
- data/bio-samtools-wrapper.gemspec +181 -0
- data/doc/Bio/DB/Alignment.html +552 -0
- data/doc/Bio/DB/Pileup.html +711 -0
- data/doc/Bio/DB/SAM/Library.html +167 -0
- data/doc/Bio/DB/SAM/Tools.html +109 -0
- data/doc/Bio/DB/SAM.html +1853 -0
- data/doc/Bio/DB/Tag.html +208 -0
- data/doc/Bio/DB/Vcf.html +431 -0
- data/doc/Bio/DB.html +105 -0
- data/doc/Bio.html +175 -0
- data/doc/LICENSE_txt.html +846 -0
- data/doc/created.rid +9 -0
- data/doc/fonts/Lato-Light.ttf +0 -0
- data/doc/fonts/Lato-LightItalic.ttf +0 -0
- data/doc/fonts/Lato-Regular.ttf +0 -0
- data/doc/fonts/Lato-RegularItalic.ttf +0 -0
- data/doc/fonts/SourceCodePro-Bold.ttf +0 -0
- data/doc/fonts/SourceCodePro-Regular.ttf +0 -0
- data/doc/fonts.css +167 -0
- data/doc/images/add.png +0 -0
- data/doc/images/arrow_up.png +0 -0
- data/doc/images/brick.png +0 -0
- data/doc/images/brick_link.png +0 -0
- data/doc/images/bug.png +0 -0
- data/doc/images/bullet_black.png +0 -0
- data/doc/images/bullet_toggle_minus.png +0 -0
- data/doc/images/bullet_toggle_plus.png +0 -0
- data/doc/images/date.png +0 -0
- data/doc/images/delete.png +0 -0
- data/doc/images/find.png +0 -0
- data/doc/images/loadingAnimation.gif +0 -0
- data/doc/images/macFFBgHack.png +0 -0
- data/doc/images/package.png +0 -0
- data/doc/images/page_green.png +0 -0
- data/doc/images/page_white_text.png +0 -0
- data/doc/images/page_white_width.png +0 -0
- data/doc/images/plugin.png +0 -0
- data/doc/images/ruby.png +0 -0
- data/doc/images/tag_blue.png +0 -0
- data/doc/images/tag_green.png +0 -0
- data/doc/images/transparent.png +0 -0
- data/doc/images/wrench.png +0 -0
- data/doc/images/wrench_orange.png +0 -0
- data/doc/images/zoom.png +0 -0
- data/doc/index.html +106 -0
- data/doc/js/darkfish.js +140 -0
- data/doc/js/jquery.js +18 -0
- data/doc/js/navigation.js +142 -0
- data/doc/js/search.js +109 -0
- data/doc/js/search_index.js +1 -0
- data/doc/js/searcher.js +228 -0
- data/doc/rdoc.css +580 -0
- data/doc/table_of_contents.html +305 -0
- data/ext/Makefile-bioruby.patch +12 -0
- data/ext/Makefile-suse.patch +11 -0
- data/ext/mkrf_conf.rb +118 -0
- data/lib/bio/BIOExtensions.rb +89 -0
- data/lib/bio/db/alignment.rb +64 -0
- data/lib/bio/db/fastadb.rb +320 -0
- data/lib/bio/db/pileup.rb +273 -0
- data/lib/bio/db/sam/external/COPYING +21 -0
- data/lib/bio/db/sam/external/VERSION +1 -0
- data/lib/bio/db/sam/library.rb +32 -0
- data/lib/bio/db/sam.rb +778 -0
- data/lib/bio/db/vcf.rb +105 -0
- data/lib/bio-samtools-wrapper.rb +9 -0
- data/test/.gitignore +1 -0
- data/test/helper.rb +18 -0
- data/test/sample.vcf +24 -0
- data/test/samples/.gitignore +1 -0
- data/test/samples/LCI/NC_001988.ffn +2 -0
- data/test/samples/LCI/test.bam +0 -0
- data/test/samples/LCI/test.bam.bai +0 -0
- data/test/samples/small/dupes.bam +0 -0
- data/test/samples/small/dupes.sam +274 -0
- data/test/samples/small/ids2.txt +1 -0
- data/test/samples/small/map_for_reheader.sam +8 -0
- data/test/samples/small/map_to_merge1.bam +0 -0
- data/test/samples/small/map_to_merge1.bam.bai +0 -0
- data/test/samples/small/map_to_merge1.sam +8 -0
- data/test/samples/small/map_to_merge2.bam +0 -0
- data/test/samples/small/map_to_merge2.bam.bai +0 -0
- data/test/samples/small/map_to_merge2.sam +8 -0
- data/test/samples/small/no_md.sam +8 -0
- data/test/samples/small/sorted.bam +0 -0
- data/test/samples/small/sorted.bam.bai +0 -0
- data/test/samples/small/test.sai +0 -0
- data/test/samples/small/test.tam +10 -0
- data/test/samples/small/test_chr.fasta +1000 -0
- data/test/samples/small/test_chr.fasta.1.bt2 +0 -0
- data/test/samples/small/test_chr.fasta.2.bt2 +0 -0
- data/test/samples/small/test_chr.fasta.3.bt2 +0 -0
- data/test/samples/small/test_chr.fasta.4.bt2 +0 -0
- data/test/samples/small/test_chr.fasta.amb +2 -0
- data/test/samples/small/test_chr.fasta.ann +3 -0
- data/test/samples/small/test_chr.fasta.bwt +0 -0
- data/test/samples/small/test_chr.fasta.pac +0 -0
- data/test/samples/small/test_chr.fasta.rbwt +0 -0
- data/test/samples/small/test_chr.fasta.rev.1.bt2 +0 -0
- data/test/samples/small/test_chr.fasta.rev.2.bt2 +0 -0
- data/test/samples/small/test_chr.fasta.rpac +0 -0
- data/test/samples/small/test_chr.fasta.rsa +0 -0
- data/test/samples/small/test_chr.fasta.sa +0 -0
- data/test/samples/small/test_cov.svg +273 -0
- data/test/samples/small/test_fastadb.fasta +34 -0
- data/test/samples/small/testu.bam +0 -0
- data/test/samples/small/testu.bed +2 -0
- data/test/test_bio-samtools-wrapper.rb +1 -0
- data/test/test_fastadb.rb +89 -0
- data/test/test_pileup.rb +90 -0
- data/test/test_sam.rb +421 -0
- data/test/test_vcf.rb +79 -0
- data/tutorial/tutorial.html +474 -0
- data/tutorial/tutorial.md +424 -0
- data/tutorial/tutorial.pdf +0 -0
- metadata +254 -0
data/test/test_pileup.rb
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
2
|
+
$: << File.expand_path('.')
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bio/db/pileup'
|
5
|
+
require "test/unit"
|
6
|
+
gem 'test-unit'
|
7
|
+
|
8
|
+
|
9
|
+
class TestPileup < Test::Unit::TestCase
|
10
|
+
|
11
|
+
def setup
|
12
|
+
@pu = Bio::DB::Pileup.new("seq1 279 C 23 A..T,,.,.,...,,,.,..... ;75&<<<<<<<<<=<<<9<<:<<")
|
13
|
+
#a snp...
|
14
|
+
@pu2 = Bio::DB::Pileup.new("seq1 279 C 23 ATTT,,.,.TTTT,,,.,TTTTT ;75&<<<<<<<<<=<<<9<<:<<")
|
15
|
+
#an indel..
|
16
|
+
@pu3 = Bio::DB::Pileup.new("seq2 156 * +AG/+AG 71 252 99 11 +AG * 3 8 0")
|
17
|
+
#two heterozygous alt snps
|
18
|
+
@pu4 = Bio::DB::Pileup.new("seq1 279 C 24 AAAAAAAAATTTTTTTTTAATTAA ;75&<<<<<<<<<=<<<9<<:<<<")
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_non_ref_count
|
22
|
+
assert_equal(2, @pu.non_ref_count)
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_ref_count
|
26
|
+
assert_equal(21, @pu.ref_count)
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_consensus
|
30
|
+
assert_equal('C', @pu.consensus)
|
31
|
+
assert_equal('T', @pu2.consensus)
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_non_refs
|
35
|
+
assert_equal(1, @pu.non_refs[:T])
|
36
|
+
assert_equal(1, @pu.non_refs[:A])
|
37
|
+
assert_equal(0, @pu.non_refs[:G])
|
38
|
+
assert_equal(0, @pu.non_refs[:C])
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
def test_to_vcf
|
43
|
+
@vcf = Bio::DB::Vcf.new(@pu.to_vcf)
|
44
|
+
assert_equal('seq1', @vcf.chrom)
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
def test_indel_gt
|
49
|
+
indel = @pu3.send(:indel_gt)
|
50
|
+
assert_equal('IAG', indel[0])
|
51
|
+
assert_equal('1/1', indel[1])
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_snp_gt
|
55
|
+
snp = @pu2.send(:snp_gt)
|
56
|
+
assert_equal('T,', snp[0])
|
57
|
+
assert_equal('1/1', snp[1])
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_genotype_list
|
61
|
+
gt2 = @pu2.genotype_list
|
62
|
+
gt3 = @pu3.genotype_list
|
63
|
+
assert_equal('T,', gt2[0])
|
64
|
+
assert_equal('1/1', gt2[1])
|
65
|
+
assert_equal('IAG', gt3[0])
|
66
|
+
assert_equal('1/1', gt3[1])
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_iupac_to_base
|
70
|
+
iupac = Bio::DB::Pileup.iupac_to_base('R')
|
71
|
+
iupac.each do |pu|
|
72
|
+
assert_send([['A' , 'G'], :member?, pu])
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def test_parse_indel
|
77
|
+
assert_equal('IAG/+AG', @pu3.parse_indel(@pu3.consensus))
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_to_s
|
81
|
+
#check whether there are the correct number of tabs in the string (number of columns -1)
|
82
|
+
assert_equal(12, @pu3.to_s.count("\t"))
|
83
|
+
assert_equal(5, @pu.to_s.count("\t"))
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_consensus_iuap
|
87
|
+
assert_equal('w', @pu4.consensus_iuap(0.1))
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
data/test/test_sam.rb
ADDED
@@ -0,0 +1,421 @@
|
|
1
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
2
|
+
$: << File.expand_path('.')
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bio/db/sam'
|
5
|
+
require "test/unit"
|
6
|
+
#gem 'ruby-prof'
|
7
|
+
gem 'test-unit'
|
8
|
+
#require "ruby-prof"
|
9
|
+
|
10
|
+
|
11
|
+
class TestBioDbSam < Test::Unit::TestCase
|
12
|
+
# include RubyProf::Test
|
13
|
+
|
14
|
+
class << self
|
15
|
+
|
16
|
+
def shutdown
|
17
|
+
File.delete("test/samples/small/different_index.bam.bai")
|
18
|
+
File.delete("test/samples/small/dupes_rmdup.bam")
|
19
|
+
File.delete("test/samples/small/mates_fixed.bam")
|
20
|
+
File.delete("test/samples/small/reheader.bam")
|
21
|
+
File.delete("test/samples/small/test_chr.fasta.fai")
|
22
|
+
File.delete("test/samples/small/test_sorted.bam")
|
23
|
+
File.delete("test/samples/small/maps_merged.bam")
|
24
|
+
File.delete("test/samples/small/maps_cated.bam")
|
25
|
+
File.delete("test/samples/small/testu.out")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
def setup
|
31
|
+
@test_folder = "test/samples/small"
|
32
|
+
@testTAMFile = @test_folder + "/test.tam"
|
33
|
+
@testBAMFile = @test_folder + "/testu.bam"
|
34
|
+
@testLCI = "test/samples/LCI/test.bam"
|
35
|
+
@testLCIref = "test/samples/LCI/NC_001988.ffn"
|
36
|
+
@testReference = @test_folder + "/test_chr.fasta"
|
37
|
+
@bed_file = @test_folder + "/testu.bed"
|
38
|
+
@sam = Bio::DB::Sam.new(
|
39
|
+
:fasta => @testReference,
|
40
|
+
:bam => @testBAMFile
|
41
|
+
)
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
def test_new
|
47
|
+
assert_kind_of(Bio::DB::Sam, @sam)
|
48
|
+
|
49
|
+
assert_raise(IOError) do
|
50
|
+
Bio::DB::Sam.new(
|
51
|
+
:fasta => @testReference,
|
52
|
+
:bam => @testBAMFile + "ads"
|
53
|
+
)
|
54
|
+
|
55
|
+
end
|
56
|
+
assert_raise(ArgumentError) do
|
57
|
+
Bio::DB::Sam.new()
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_index
|
63
|
+
test_bai_file = @testBAMFile+".bai"
|
64
|
+
#test to see if the index file exists. If so, delete it
|
65
|
+
if File.exist?(test_bai_file) == true
|
66
|
+
puts "bam index exists....deleting..."
|
67
|
+
File.delete(test_bai_file)
|
68
|
+
end
|
69
|
+
|
70
|
+
#No bam file
|
71
|
+
assert_equal(@sam.indexed?, false)
|
72
|
+
#index the bam file
|
73
|
+
@sam.index()
|
74
|
+
assert_equal(@sam.indexed?, true)
|
75
|
+
#make sure the .bai file exists
|
76
|
+
assert_nothing_thrown do
|
77
|
+
File.open(test_bai_file, "r")
|
78
|
+
end
|
79
|
+
assert(File.size(test_bai_file) > 0, "From test_index: .bai file is empty")
|
80
|
+
#as above, but give the output a different name
|
81
|
+
test_bai_file = @test_folder+"/different_index.bam.bai"
|
82
|
+
@sam.index(:out_index=> test_bai_file)
|
83
|
+
assert_nothing_thrown do
|
84
|
+
File.open(test_bai_file, "r")
|
85
|
+
end
|
86
|
+
assert(File.size(test_bai_file) > 0, "From test_index: .bai file is empty")
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_view
|
90
|
+
#how to get Bio::DB::Alignment objects ..
|
91
|
+
@sam.view() do |sam|
|
92
|
+
#test that all the objects are Bio::DB::Alignment objects and their reference is 'chr_1'
|
93
|
+
assert_equal(sam.class, Bio::DB::Alignment)
|
94
|
+
assert_equal(sam.rname, "chr_1")
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def test_fetch
|
99
|
+
#puts @sam.inspect
|
100
|
+
i = 0
|
101
|
+
@sam.index
|
102
|
+
@sam.fetch("chr_1", 10,1000) do |sam|
|
103
|
+
#test that all the objects are Bio::DB::Alignment objects
|
104
|
+
assert_equal(sam.class, Bio::DB::Alignment)
|
105
|
+
assert_equal(sam.rname, "chr_1")
|
106
|
+
i += 1
|
107
|
+
end
|
108
|
+
assert(i>0)
|
109
|
+
assert_equal(i,9)
|
110
|
+
|
111
|
+
bam=Bio::DB::Sam.new(:bam=>@testLCI,:fasta=>@testLCIref)
|
112
|
+
bam.open
|
113
|
+
count = 0
|
114
|
+
bam.fetch("NC_001988.2",0,200) do|x|
|
115
|
+
count += 1
|
116
|
+
end
|
117
|
+
assert_equal(count, 36)
|
118
|
+
count = 0
|
119
|
+
bam.fetch("NC_001988.2",75, 75) do|x|
|
120
|
+
#puts "#{x.pos} #{x.seq}"
|
121
|
+
count += 1
|
122
|
+
end
|
123
|
+
assert_equal(count, 7)
|
124
|
+
|
125
|
+
end
|
126
|
+
|
127
|
+
def test_fetch_with_function
|
128
|
+
#pass the assert to method
|
129
|
+
count = 0
|
130
|
+
block = Proc.new do |a|
|
131
|
+
assert_equal(a.class, Bio::DB::Alignment)
|
132
|
+
count += 1
|
133
|
+
end
|
134
|
+
|
135
|
+
@sam.fetch_with_function("chr_1", 10, 1000, &block)
|
136
|
+
assert_equal(count, 9)
|
137
|
+
|
138
|
+
count = 0
|
139
|
+
@sam.fetch_with_function("chr_1", 82, 140, &block)
|
140
|
+
assert_equal(count, 4)
|
141
|
+
|
142
|
+
@sam.fetch_with_function("chr_1", 0, 140, &block)
|
143
|
+
assert_equal(count, 8)
|
144
|
+
count2 = 0
|
145
|
+
@sam.fetch("chr_1",0,200) {|x| count2 += 1}
|
146
|
+
assert_equal(count2, 6)
|
147
|
+
|
148
|
+
end
|
149
|
+
|
150
|
+
def test_chromosome_coverage
|
151
|
+
#the coverage should only be 1.0 or 2.0
|
152
|
+
cov = @sam.chromosome_coverage("chr_1", 10, 1000)
|
153
|
+
cov.each do |pu|
|
154
|
+
assert_send([[1.0 , 2.0, 3.0], :member?, pu])
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def test_average_coverage
|
159
|
+
#there should be 10 positions with cov of 1.0 and 10 with cov of 2.0, so average of 1.5
|
160
|
+
test_bai_file = @testBAMFile+".bai"
|
161
|
+
if File.exist?(test_bai_file) == false
|
162
|
+
@sam.index()
|
163
|
+
end
|
164
|
+
avcov = @sam.average_coverage("chr_1", 33, 19)
|
165
|
+
assert_equal(avcov, 1.5)
|
166
|
+
File.delete(test_bai_file)
|
167
|
+
end
|
168
|
+
|
169
|
+
def test_faidx
|
170
|
+
@sam.faidx()
|
171
|
+
test_fai_file = @testReference+".fai"
|
172
|
+
#test that the .fai file exists
|
173
|
+
assert_nothing_thrown do
|
174
|
+
File.open(test_fai_file, "r")
|
175
|
+
end
|
176
|
+
#test that the file is not empty
|
177
|
+
assert(File.size(test_fai_file) > 0, "From test_faidx: .fai file is empty")
|
178
|
+
end
|
179
|
+
|
180
|
+
def test_index_stats
|
181
|
+
#puts "Stats: #{@sam.index_stats.inspect}"
|
182
|
+
@sam.index_stats.each_pair do |seq, stat|
|
183
|
+
assert_send([['chr_1' , '*'], :member?, seq])
|
184
|
+
end
|
185
|
+
assert_equal(@sam.index_stats['chr_1'][:length], 69930)
|
186
|
+
assert_equal(@sam.index_stats['chr_1'][:mapped_reads], 9)
|
187
|
+
assert_equal(@sam.index_stats['chr_1'][:unmapped_reads], 0)
|
188
|
+
assert_equal(@sam.index_stats['*'][:length], 0)
|
189
|
+
assert_equal(@sam.index_stats['*'][:mapped_reads], 0)
|
190
|
+
assert_equal(@sam.index_stats['*'][:unmapped_reads], 0)
|
191
|
+
assert_equal(@sam.index_stats.size, 2)
|
192
|
+
|
193
|
+
end
|
194
|
+
|
195
|
+
def test_fetch_reference
|
196
|
+
#this is the first 70 nucleotides of the test seqeunce
|
197
|
+
seq_expected = "CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTA"
|
198
|
+
#fetch the first 70 nuclotides
|
199
|
+
seq_fetched = @sam.fetch_reference("chr_1", 1, 70, :as_bio => false)
|
200
|
+
#test they're the same
|
201
|
+
assert_equal(seq_fetched, seq_expected)
|
202
|
+
end
|
203
|
+
|
204
|
+
def test_sort
|
205
|
+
#sort the bam file
|
206
|
+
sortedsam = @test_folder + "/test_sorted.bam"
|
207
|
+
@sam.sort(:prefix=>@test_folder + "/test_sorted")
|
208
|
+
#create a new Bio::DB::Sam from the sorted bam
|
209
|
+
@sortsam = Bio::DB::Sam.new(
|
210
|
+
:fasta => @testReference,
|
211
|
+
:bam => sortedsam
|
212
|
+
)
|
213
|
+
pos = 0
|
214
|
+
#iterate over the sorted sam file and make sure that the it's sorted by checking the order of the start positions for each read.
|
215
|
+
@sortsam.view()do |sam|
|
216
|
+
assert(sam.pos > pos, "Not sorted by position")
|
217
|
+
pos = sam.pos
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
def test_reheader
|
222
|
+
sam_header = @test_folder + "/map_for_reheader.sam"
|
223
|
+
outfile = @test_folder + "/reheader.bam"
|
224
|
+
|
225
|
+
@sam.reheader(sam_header, :out=>outfile)
|
226
|
+
reheader_bam = Bio::DB::Sam.new(:fasta => @testReference, :bam => outfile)
|
227
|
+
#check that the reference is 'chr_2'
|
228
|
+
reheader_bam.view()do |sam|
|
229
|
+
assert_equal(sam.rname, "chr_2")
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
def test_calmd
|
234
|
+
no_md_sam = @test_folder + "/no_md.sam"
|
235
|
+
md = Bio::DB::Sam.new(:fasta => @testReference, :bam => no_md_sam)
|
236
|
+
block = Proc.new {|a| assert(a.tags.has_key?('MD'), "From test_calmd: couldn't find the MD tag")}
|
237
|
+
md.calmd(:S=>true, &block)
|
238
|
+
|
239
|
+
end
|
240
|
+
|
241
|
+
def test_mpileup
|
242
|
+
#create an mpileup
|
243
|
+
# @sam.index
|
244
|
+
@sam.mpileup(:g => false) do |pileup|
|
245
|
+
#test that all the objects are Bio::DB::Pileup objects
|
246
|
+
assert_kind_of(Bio::DB::Pileup, pileup)
|
247
|
+
#test that the reference name is 'chr_1' for all objects
|
248
|
+
assert_equal(pileup.ref_name, 'chr_1')
|
249
|
+
end
|
250
|
+
#do the same for Vcf output
|
251
|
+
@sam.mpileup(:u => true) do |pileup|
|
252
|
+
assert_kind_of(Bio::DB::Vcf, pileup)
|
253
|
+
assert_equal(pileup.chrom, 'chr_1')
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
def test_region_new
|
258
|
+
reg1 = Bio::DB::Fasta::Region.new(:entry=>"chr_1", :start=>1, :end=>334)
|
259
|
+
reg2 = Bio::DB::Fasta::Region.new
|
260
|
+
reg2.entry = "chr_1"
|
261
|
+
reg2.start = 1
|
262
|
+
reg2.end = 334
|
263
|
+
|
264
|
+
assert_equal(reg1.entry, reg2.entry)
|
265
|
+
assert_equal(reg1.start, reg2.start)
|
266
|
+
assert_equal(reg1.end, reg2.end)
|
267
|
+
end
|
268
|
+
|
269
|
+
def test_mpileup_reg
|
270
|
+
#create an mpileup
|
271
|
+
reg = Bio::DB::Fasta::Region.new(:entry=>"chr_1", :start=>1, :end=>334)
|
272
|
+
|
273
|
+
@sam.mpileup_cached(:r=>reg,:g => false, :min_cov => 1, :min_per =>0.2) do |pileup|
|
274
|
+
#test that all the objects are Bio::DB::Pileup objects
|
275
|
+
assert_kind_of(Bio::DB::Pileup, pileup)
|
276
|
+
#test that the reference name is 'chr_1' for all objects
|
277
|
+
#puts pileup
|
278
|
+
assert_equal(pileup.ref_name, 'chr_1')
|
279
|
+
end
|
280
|
+
|
281
|
+
region = @sam.cached_regions[reg.to_s]
|
282
|
+
#puts "cahced_region: #{region.inspect}"
|
283
|
+
#puts "AVG COV: #{region.average_coverage}"
|
284
|
+
#puts "Reference: #{region.reference}"
|
285
|
+
# puts "Consensus: #{region.consensus}"
|
286
|
+
# puts "called: #{region.called}"
|
287
|
+
#, :snps, :reference, :base_ratios, :consensus, :coverages
|
288
|
+
snps_tot = Bio::Sequence.snps_between(region.reference, region.consensus)
|
289
|
+
assert_equal(snps_tot, 5)
|
290
|
+
assert_equal(region.called, 220)
|
291
|
+
end
|
292
|
+
|
293
|
+
def test_mpileup_reg_05
|
294
|
+
#create an mpileup
|
295
|
+
reg = Bio::DB::Fasta::Region.new(:entry=>"chr_1", :start=>1, :end=>334)
|
296
|
+
@sam.mpileup_cached(:r=>reg, :g => false, :min_cov => 1, :min_per =>0.4) do |pileup|
|
297
|
+
#test that all the objects are Bio::DB::Pileup objects
|
298
|
+
assert_kind_of(Bio::DB::Pileup, pileup)
|
299
|
+
#test that the reference name is 'chr_1' for all objects
|
300
|
+
#puts pileup
|
301
|
+
assert_equal(pileup.ref_name, 'chr_1')
|
302
|
+
|
303
|
+
end
|
304
|
+
|
305
|
+
region = @sam.cached_regions[reg.to_s]
|
306
|
+
|
307
|
+
#, :snps, :reference, :base_ratios, :consensus, :coverages
|
308
|
+
snps_tot = Bio::Sequence.snps_between(region.reference, region.consensus)
|
309
|
+
assert_equal(snps_tot, 1)
|
310
|
+
assert_equal(region.called, 220)
|
311
|
+
end
|
312
|
+
|
313
|
+
def test_depth
|
314
|
+
#the depth of coverage should be '1' at all given positions
|
315
|
+
@sam.depth(:r=>"chr_1:25-42") do |al|
|
316
|
+
assert_equal(al[2].to_i, 1)
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
def test_fixmate
|
321
|
+
mates_fixed_bam = @test_folder + "/mates_fixed.bam"
|
322
|
+
@sam.fix_mates(:out_bam=>mates_fixed_bam)
|
323
|
+
assert_nothing_thrown do
|
324
|
+
File.open(mates_fixed_bam, "r")
|
325
|
+
end
|
326
|
+
assert(File.size(mates_fixed_bam) > 0, "From test_fixmate: .bam file is empty")
|
327
|
+
end
|
328
|
+
|
329
|
+
def test_flagstats
|
330
|
+
#get the stats
|
331
|
+
stats = @sam.flag_stats()
|
332
|
+
#the number of reads mapped will be the first character on the first line.
|
333
|
+
no_reads_mapped = stats[0][0].to_i
|
334
|
+
#check that it's '9'
|
335
|
+
assert_equal(no_reads_mapped, 9)
|
336
|
+
end
|
337
|
+
|
338
|
+
def test_merge
|
339
|
+
bam1 = @test_folder + "/map_to_merge1.bam"
|
340
|
+
bam2 = @test_folder + "/map_to_merge2.bam"
|
341
|
+
bam_to_merge1 = Bio::DB::Sam.new(:fasta => @testReference, :bam => bam1)
|
342
|
+
bam_to_merge2 = Bio::DB::Sam.new(:fasta => @testReference, :bam => bam2)
|
343
|
+
bam_files = [bam_to_merge1, bam_to_merge2]
|
344
|
+
|
345
|
+
merged_bam_file = @test_folder + "/maps_merged.bam"
|
346
|
+
File.delete merged_bam_file if File.exist?(merged_bam_file)
|
347
|
+
# File.delete("test/samples/small/maps_merged.bam")
|
348
|
+
@sam.merge(:out=>merged_bam_file, :bams=>bam_files, :n=>true)
|
349
|
+
merged_bam = Bio::DB::Sam.new(:fasta => @testReference, :bam => merged_bam_file)
|
350
|
+
no_reads_mapped = 0;
|
351
|
+
|
352
|
+
merged_bam.view() do |al|
|
353
|
+
assert_kind_of(Bio::DB::Alignment, al)
|
354
|
+
no_reads_mapped+=1
|
355
|
+
end
|
356
|
+
assert_equal(no_reads_mapped, 10)
|
357
|
+
end
|
358
|
+
|
359
|
+
def test_cat
|
360
|
+
#same files used for merge, but we'll cat them instead
|
361
|
+
bam1 = @test_folder + "/map_to_merge1.bam"
|
362
|
+
bam2 = @test_folder + "/map_to_merge2.bam"
|
363
|
+
|
364
|
+
bam_files = [bam1, bam2]
|
365
|
+
|
366
|
+
cat_bam_file = @test_folder + "/maps_cated.bam"
|
367
|
+
File.delete cat_bam_file if File.exist?(cat_bam_file)
|
368
|
+
@sam.merge(:out=>cat_bam_file, :bams=>bam_files)
|
369
|
+
cated_bam = Bio::DB::Sam.new(:fasta => @testReference, :bam => cat_bam_file)
|
370
|
+
|
371
|
+
no_reads_mapped = 0;
|
372
|
+
cated_bam.view() do |al|
|
373
|
+
assert_kind_of(Bio::DB::Alignment, al)
|
374
|
+
no_reads_mapped+=1
|
375
|
+
end
|
376
|
+
#there should be 10 reads in the cat'd maps
|
377
|
+
assert_equal(no_reads_mapped, 10)
|
378
|
+
end
|
379
|
+
|
380
|
+
def test_rmdup
|
381
|
+
#dupes contains 4 reads mapped once and one read mapped to the same place 268 times.
|
382
|
+
dupes = @test_folder + "/dupes.bam"
|
383
|
+
unduped = @test_folder + "/dupes_rmdup.bam"
|
384
|
+
bam_with_dupes = Bio::DB::Sam.new(:fasta => @testReference, :bam => dupes)
|
385
|
+
bam_with_dupes.remove_duplicates(:s=>true, :out=>unduped)
|
386
|
+
|
387
|
+
unduped_bam = Bio::DB::Sam.new(:fasta => @testReference, :bam => unduped)
|
388
|
+
#rmdup should remove 267 of the 268 reads mapping to the same place, so producing a bam file with 5 reads
|
389
|
+
readcount = 0
|
390
|
+
unduped_bam.view()do |sam|
|
391
|
+
readcount +=1
|
392
|
+
end
|
393
|
+
assert_equal(readcount, 5)
|
394
|
+
end
|
395
|
+
|
396
|
+
def test_targetcut
|
397
|
+
sorted_bam = @test_folder + "/sorted.bam"
|
398
|
+
cut = Bio::DB::Sam.new(:fasta => @testReference, :bam => sorted_bam)
|
399
|
+
assert_nothing_thrown do
|
400
|
+
cut.targetcut
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
def test_docs
|
405
|
+
#force an error (use 'samtool' instead of 'samtools')
|
406
|
+
output = Bio::DB::Sam.docs('samtool', 'tview')
|
407
|
+
assert_equal(output, "program must be 'samtools' or 'bcftools'")
|
408
|
+
end
|
409
|
+
|
410
|
+
def test_bedcov
|
411
|
+
out_file = @test_folder + "/testu.out"
|
412
|
+
@sam.bedcov(:bed=>@bed_file, :out=>out_file)
|
413
|
+
f = File.open(out_file, "r")
|
414
|
+
f.each_line do |line|
|
415
|
+
f_array= line.split(/\t/)
|
416
|
+
assert_equal(f_array[3], 630)
|
417
|
+
end
|
418
|
+
f.close
|
419
|
+
end
|
420
|
+
|
421
|
+
end
|
data/test/test_vcf.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
2
|
+
$: << File.expand_path('.')
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bio/db/vcf'
|
5
|
+
require "test/unit"
|
6
|
+
gem 'test-unit'
|
7
|
+
|
8
|
+
|
9
|
+
class TestVcf < Test::Unit::TestCase
|
10
|
+
|
11
|
+
def setup
|
12
|
+
@vcf1 = Bio::DB::Vcf.new("19 111 . A C 9.6 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3",["a","b","c"]) #from a 3.3 vcf file
|
13
|
+
@vcf2 = Bio::DB::Vcf.new("20 14370 rs6054257 G A 29 0 NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:-1,-1") #from a 3.3 vcf file
|
14
|
+
@vcf3 = Bio::DB::Vcf.new("19 111 . A C 9.6 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3") #from a 4.0 vcf file
|
15
|
+
@vcf4 = Bio::DB::Vcf.new("20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,") #from a 4.0 vcf file
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
def test_parse
|
20
|
+
assert_equal("19", @vcf1.chrom)
|
21
|
+
assert_equal(111, @vcf1.pos)
|
22
|
+
assert_equal(nil, @vcf1.id)
|
23
|
+
assert_equal("A", @vcf1.ref)
|
24
|
+
assert_equal("C",@vcf1.alt)
|
25
|
+
assert_equal(9.6,@vcf1.qual)
|
26
|
+
assert_equal(nil, @vcf1.filter)
|
27
|
+
assert_equal(nil, @vcf1.info)
|
28
|
+
assert_equal({"a"=>{"GT"=>"0|0", "HQ"=>"10,10"},
|
29
|
+
"b"=>{"GT"=>"0|0", "HQ"=>"10,10"},
|
30
|
+
"c"=>{"GT"=>"0/1", "HQ"=>"3,3"}}, @vcf1.samples)
|
31
|
+
|
32
|
+
assert_equal("20", @vcf2.chrom)
|
33
|
+
assert_equal(14370, @vcf2.pos)
|
34
|
+
assert_equal('rs6054257', @vcf2.id)
|
35
|
+
assert_equal("G", @vcf2.ref)
|
36
|
+
assert_equal("A",@vcf2.alt)
|
37
|
+
assert_equal(29,@vcf2.qual)
|
38
|
+
assert_equal("0", @vcf2.filter)
|
39
|
+
assert_equal({"DP"=>"14", "AF"=>"0.5", "NS"=>"3", "DB"=>nil, "H2"=>nil}, @vcf2.info)
|
40
|
+
assert_equal({"1"=>{"DP"=>"1", "GT"=>"0|0", "HQ"=>"51,51", "GQ"=>"48"},
|
41
|
+
"2"=>{"DP"=>"8", "GT"=>"1|0", "HQ"=>"51,51", "GQ"=>"48"},
|
42
|
+
"3"=>{"DP"=>"5", "GT"=>"1/1", "HQ"=>"-1,-1", "GQ"=>"43"}}, @vcf2.samples)
|
43
|
+
|
44
|
+
assert_equal("19", @vcf3.chrom)
|
45
|
+
assert_equal(111, @vcf3.pos)
|
46
|
+
assert_equal(nil, @vcf3.id)
|
47
|
+
assert_equal("A", @vcf3.ref)
|
48
|
+
assert_equal("C",@vcf3.alt)
|
49
|
+
assert_equal(9.6,@vcf3.qual)
|
50
|
+
assert_equal(nil, @vcf3.filter)
|
51
|
+
assert_equal(nil, @vcf3.info)
|
52
|
+
assert_equal({"1"=>{"GT"=>"0|0", "HQ"=>"10,10"},
|
53
|
+
"2"=>{"GT"=>"0|0", "HQ"=>"10,10"},
|
54
|
+
"3"=>{"GT"=>"0/1", "HQ"=>"3,3"}}, @vcf3.samples)
|
55
|
+
|
56
|
+
assert_equal("20", @vcf4.chrom)
|
57
|
+
assert_equal(14370, @vcf4.pos)
|
58
|
+
assert_equal('rs6054257', @vcf4.id)
|
59
|
+
assert_equal("G", @vcf4.ref)
|
60
|
+
assert_equal("A",@vcf4.alt)
|
61
|
+
assert_equal(29,@vcf4.qual)
|
62
|
+
assert_equal("PASS", @vcf4.filter)
|
63
|
+
assert_equal({"DP"=>"14", "AF"=>"0.5", "NS"=>"3", "DB"=>nil, "H2"=>nil}, @vcf4.info)
|
64
|
+
assert_equal({"1"=>{"DP"=>"1", "GT"=>"0|0", "HQ"=>"51,51", "GQ"=>"48"},
|
65
|
+
"2"=>{"DP"=>"8", "GT"=>"1|0", "HQ"=>"51,51", "GQ"=>"48"},
|
66
|
+
"3"=>{"DP"=>"5", "GT"=>"1/1", "HQ"=>".,", "GQ"=>"43"}}, @vcf4.samples)
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_int_or_raw
|
70
|
+
assert_nothing_raised do
|
71
|
+
@vcf1.int_or_raw(1)
|
72
|
+
end
|
73
|
+
assert_raise do
|
74
|
+
@vcf1.int_or_raw(A)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
end
|