bio-samtools-wrapper 2.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.travis.yml +27 -0
- data/Gemfile +20 -0
- data/LICENSE.txt +702 -0
- data/README.md +501 -0
- data/Rakefile +73 -0
- data/VERSION +1 -0
- data/bin/bam_consensus.rb +85 -0
- data/bio-samtools-wrapper.gemspec +181 -0
- data/doc/Bio/DB/Alignment.html +552 -0
- data/doc/Bio/DB/Pileup.html +711 -0
- data/doc/Bio/DB/SAM/Library.html +167 -0
- data/doc/Bio/DB/SAM/Tools.html +109 -0
- data/doc/Bio/DB/SAM.html +1853 -0
- data/doc/Bio/DB/Tag.html +208 -0
- data/doc/Bio/DB/Vcf.html +431 -0
- data/doc/Bio/DB.html +105 -0
- data/doc/Bio.html +175 -0
- data/doc/LICENSE_txt.html +846 -0
- data/doc/created.rid +9 -0
- data/doc/fonts/Lato-Light.ttf +0 -0
- data/doc/fonts/Lato-LightItalic.ttf +0 -0
- data/doc/fonts/Lato-Regular.ttf +0 -0
- data/doc/fonts/Lato-RegularItalic.ttf +0 -0
- data/doc/fonts/SourceCodePro-Bold.ttf +0 -0
- data/doc/fonts/SourceCodePro-Regular.ttf +0 -0
- data/doc/fonts.css +167 -0
- data/doc/images/add.png +0 -0
- data/doc/images/arrow_up.png +0 -0
- data/doc/images/brick.png +0 -0
- data/doc/images/brick_link.png +0 -0
- data/doc/images/bug.png +0 -0
- data/doc/images/bullet_black.png +0 -0
- data/doc/images/bullet_toggle_minus.png +0 -0
- data/doc/images/bullet_toggle_plus.png +0 -0
- data/doc/images/date.png +0 -0
- data/doc/images/delete.png +0 -0
- data/doc/images/find.png +0 -0
- data/doc/images/loadingAnimation.gif +0 -0
- data/doc/images/macFFBgHack.png +0 -0
- data/doc/images/package.png +0 -0
- data/doc/images/page_green.png +0 -0
- data/doc/images/page_white_text.png +0 -0
- data/doc/images/page_white_width.png +0 -0
- data/doc/images/plugin.png +0 -0
- data/doc/images/ruby.png +0 -0
- data/doc/images/tag_blue.png +0 -0
- data/doc/images/tag_green.png +0 -0
- data/doc/images/transparent.png +0 -0
- data/doc/images/wrench.png +0 -0
- data/doc/images/wrench_orange.png +0 -0
- data/doc/images/zoom.png +0 -0
- data/doc/index.html +106 -0
- data/doc/js/darkfish.js +140 -0
- data/doc/js/jquery.js +18 -0
- data/doc/js/navigation.js +142 -0
- data/doc/js/search.js +109 -0
- data/doc/js/search_index.js +1 -0
- data/doc/js/searcher.js +228 -0
- data/doc/rdoc.css +580 -0
- data/doc/table_of_contents.html +305 -0
- data/ext/Makefile-bioruby.patch +12 -0
- data/ext/Makefile-suse.patch +11 -0
- data/ext/mkrf_conf.rb +118 -0
- data/lib/bio/BIOExtensions.rb +89 -0
- data/lib/bio/db/alignment.rb +64 -0
- data/lib/bio/db/fastadb.rb +320 -0
- data/lib/bio/db/pileup.rb +273 -0
- data/lib/bio/db/sam/external/COPYING +21 -0
- data/lib/bio/db/sam/external/VERSION +1 -0
- data/lib/bio/db/sam/library.rb +32 -0
- data/lib/bio/db/sam.rb +778 -0
- data/lib/bio/db/vcf.rb +105 -0
- data/lib/bio-samtools-wrapper.rb +9 -0
- data/test/.gitignore +1 -0
- data/test/helper.rb +18 -0
- data/test/sample.vcf +24 -0
- data/test/samples/.gitignore +1 -0
- data/test/samples/LCI/NC_001988.ffn +2 -0
- data/test/samples/LCI/test.bam +0 -0
- data/test/samples/LCI/test.bam.bai +0 -0
- data/test/samples/small/dupes.bam +0 -0
- data/test/samples/small/dupes.sam +274 -0
- data/test/samples/small/ids2.txt +1 -0
- data/test/samples/small/map_for_reheader.sam +8 -0
- data/test/samples/small/map_to_merge1.bam +0 -0
- data/test/samples/small/map_to_merge1.bam.bai +0 -0
- data/test/samples/small/map_to_merge1.sam +8 -0
- data/test/samples/small/map_to_merge2.bam +0 -0
- data/test/samples/small/map_to_merge2.bam.bai +0 -0
- data/test/samples/small/map_to_merge2.sam +8 -0
- data/test/samples/small/no_md.sam +8 -0
- data/test/samples/small/sorted.bam +0 -0
- data/test/samples/small/sorted.bam.bai +0 -0
- data/test/samples/small/test.sai +0 -0
- data/test/samples/small/test.tam +10 -0
- data/test/samples/small/test_chr.fasta +1000 -0
- data/test/samples/small/test_chr.fasta.1.bt2 +0 -0
- data/test/samples/small/test_chr.fasta.2.bt2 +0 -0
- data/test/samples/small/test_chr.fasta.3.bt2 +0 -0
- data/test/samples/small/test_chr.fasta.4.bt2 +0 -0
- data/test/samples/small/test_chr.fasta.amb +2 -0
- data/test/samples/small/test_chr.fasta.ann +3 -0
- data/test/samples/small/test_chr.fasta.bwt +0 -0
- data/test/samples/small/test_chr.fasta.pac +0 -0
- data/test/samples/small/test_chr.fasta.rbwt +0 -0
- data/test/samples/small/test_chr.fasta.rev.1.bt2 +0 -0
- data/test/samples/small/test_chr.fasta.rev.2.bt2 +0 -0
- data/test/samples/small/test_chr.fasta.rpac +0 -0
- data/test/samples/small/test_chr.fasta.rsa +0 -0
- data/test/samples/small/test_chr.fasta.sa +0 -0
- data/test/samples/small/test_cov.svg +273 -0
- data/test/samples/small/test_fastadb.fasta +34 -0
- data/test/samples/small/testu.bam +0 -0
- data/test/samples/small/testu.bed +2 -0
- data/test/test_bio-samtools-wrapper.rb +1 -0
- data/test/test_fastadb.rb +89 -0
- data/test/test_pileup.rb +90 -0
- data/test/test_sam.rb +421 -0
- data/test/test_vcf.rb +79 -0
- data/tutorial/tutorial.html +474 -0
- data/tutorial/tutorial.md +424 -0
- data/tutorial/tutorial.pdf +0 -0
- metadata +254 -0
data/test/test_pileup.rb
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
2
|
+
$: << File.expand_path('.')
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bio/db/pileup'
|
5
|
+
require "test/unit"
|
6
|
+
gem 'test-unit'
|
7
|
+
|
8
|
+
|
9
|
+
class TestPileup < Test::Unit::TestCase
|
10
|
+
|
11
|
+
def setup
|
12
|
+
@pu = Bio::DB::Pileup.new("seq1 279 C 23 A..T,,.,.,...,,,.,..... ;75&<<<<<<<<<=<<<9<<:<<")
|
13
|
+
#a snp...
|
14
|
+
@pu2 = Bio::DB::Pileup.new("seq1 279 C 23 ATTT,,.,.TTTT,,,.,TTTTT ;75&<<<<<<<<<=<<<9<<:<<")
|
15
|
+
#an indel..
|
16
|
+
@pu3 = Bio::DB::Pileup.new("seq2 156 * +AG/+AG 71 252 99 11 +AG * 3 8 0")
|
17
|
+
#two heterozygous alt snps
|
18
|
+
@pu4 = Bio::DB::Pileup.new("seq1 279 C 24 AAAAAAAAATTTTTTTTTAATTAA ;75&<<<<<<<<<=<<<9<<:<<<")
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_non_ref_count
|
22
|
+
assert_equal(2, @pu.non_ref_count)
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_ref_count
|
26
|
+
assert_equal(21, @pu.ref_count)
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_consensus
|
30
|
+
assert_equal('C', @pu.consensus)
|
31
|
+
assert_equal('T', @pu2.consensus)
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_non_refs
|
35
|
+
assert_equal(1, @pu.non_refs[:T])
|
36
|
+
assert_equal(1, @pu.non_refs[:A])
|
37
|
+
assert_equal(0, @pu.non_refs[:G])
|
38
|
+
assert_equal(0, @pu.non_refs[:C])
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
def test_to_vcf
|
43
|
+
@vcf = Bio::DB::Vcf.new(@pu.to_vcf)
|
44
|
+
assert_equal('seq1', @vcf.chrom)
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
def test_indel_gt
|
49
|
+
indel = @pu3.send(:indel_gt)
|
50
|
+
assert_equal('IAG', indel[0])
|
51
|
+
assert_equal('1/1', indel[1])
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_snp_gt
|
55
|
+
snp = @pu2.send(:snp_gt)
|
56
|
+
assert_equal('T,', snp[0])
|
57
|
+
assert_equal('1/1', snp[1])
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_genotype_list
|
61
|
+
gt2 = @pu2.genotype_list
|
62
|
+
gt3 = @pu3.genotype_list
|
63
|
+
assert_equal('T,', gt2[0])
|
64
|
+
assert_equal('1/1', gt2[1])
|
65
|
+
assert_equal('IAG', gt3[0])
|
66
|
+
assert_equal('1/1', gt3[1])
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_iupac_to_base
|
70
|
+
iupac = Bio::DB::Pileup.iupac_to_base('R')
|
71
|
+
iupac.each do |pu|
|
72
|
+
assert_send([['A' , 'G'], :member?, pu])
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def test_parse_indel
|
77
|
+
assert_equal('IAG/+AG', @pu3.parse_indel(@pu3.consensus))
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_to_s
|
81
|
+
#check whether there are the correct number of tabs in the string (number of columns -1)
|
82
|
+
assert_equal(12, @pu3.to_s.count("\t"))
|
83
|
+
assert_equal(5, @pu.to_s.count("\t"))
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_consensus_iuap
|
87
|
+
assert_equal('w', @pu4.consensus_iuap(0.1))
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
data/test/test_sam.rb
ADDED
@@ -0,0 +1,421 @@
|
|
1
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
2
|
+
$: << File.expand_path('.')
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bio/db/sam'
|
5
|
+
require "test/unit"
|
6
|
+
#gem 'ruby-prof'
|
7
|
+
gem 'test-unit'
|
8
|
+
#require "ruby-prof"
|
9
|
+
|
10
|
+
|
11
|
+
class TestBioDbSam < Test::Unit::TestCase
|
12
|
+
# include RubyProf::Test
|
13
|
+
|
14
|
+
class << self
|
15
|
+
|
16
|
+
def shutdown
|
17
|
+
File.delete("test/samples/small/different_index.bam.bai")
|
18
|
+
File.delete("test/samples/small/dupes_rmdup.bam")
|
19
|
+
File.delete("test/samples/small/mates_fixed.bam")
|
20
|
+
File.delete("test/samples/small/reheader.bam")
|
21
|
+
File.delete("test/samples/small/test_chr.fasta.fai")
|
22
|
+
File.delete("test/samples/small/test_sorted.bam")
|
23
|
+
File.delete("test/samples/small/maps_merged.bam")
|
24
|
+
File.delete("test/samples/small/maps_cated.bam")
|
25
|
+
File.delete("test/samples/small/testu.out")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
def setup
|
31
|
+
@test_folder = "test/samples/small"
|
32
|
+
@testTAMFile = @test_folder + "/test.tam"
|
33
|
+
@testBAMFile = @test_folder + "/testu.bam"
|
34
|
+
@testLCI = "test/samples/LCI/test.bam"
|
35
|
+
@testLCIref = "test/samples/LCI/NC_001988.ffn"
|
36
|
+
@testReference = @test_folder + "/test_chr.fasta"
|
37
|
+
@bed_file = @test_folder + "/testu.bed"
|
38
|
+
@sam = Bio::DB::Sam.new(
|
39
|
+
:fasta => @testReference,
|
40
|
+
:bam => @testBAMFile
|
41
|
+
)
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
def test_new
|
47
|
+
assert_kind_of(Bio::DB::Sam, @sam)
|
48
|
+
|
49
|
+
assert_raise(IOError) do
|
50
|
+
Bio::DB::Sam.new(
|
51
|
+
:fasta => @testReference,
|
52
|
+
:bam => @testBAMFile + "ads"
|
53
|
+
)
|
54
|
+
|
55
|
+
end
|
56
|
+
assert_raise(ArgumentError) do
|
57
|
+
Bio::DB::Sam.new()
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_index
|
63
|
+
test_bai_file = @testBAMFile+".bai"
|
64
|
+
#test to see if the index file exists. If so, delete it
|
65
|
+
if File.exist?(test_bai_file) == true
|
66
|
+
puts "bam index exists....deleting..."
|
67
|
+
File.delete(test_bai_file)
|
68
|
+
end
|
69
|
+
|
70
|
+
#No bam file
|
71
|
+
assert_equal(@sam.indexed?, false)
|
72
|
+
#index the bam file
|
73
|
+
@sam.index()
|
74
|
+
assert_equal(@sam.indexed?, true)
|
75
|
+
#make sure the .bai file exists
|
76
|
+
assert_nothing_thrown do
|
77
|
+
File.open(test_bai_file, "r")
|
78
|
+
end
|
79
|
+
assert(File.size(test_bai_file) > 0, "From test_index: .bai file is empty")
|
80
|
+
#as above, but give the output a different name
|
81
|
+
test_bai_file = @test_folder+"/different_index.bam.bai"
|
82
|
+
@sam.index(:out_index=> test_bai_file)
|
83
|
+
assert_nothing_thrown do
|
84
|
+
File.open(test_bai_file, "r")
|
85
|
+
end
|
86
|
+
assert(File.size(test_bai_file) > 0, "From test_index: .bai file is empty")
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_view
|
90
|
+
#how to get Bio::DB::Alignment objects ..
|
91
|
+
@sam.view() do |sam|
|
92
|
+
#test that all the objects are Bio::DB::Alignment objects and their reference is 'chr_1'
|
93
|
+
assert_equal(sam.class, Bio::DB::Alignment)
|
94
|
+
assert_equal(sam.rname, "chr_1")
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def test_fetch
|
99
|
+
#puts @sam.inspect
|
100
|
+
i = 0
|
101
|
+
@sam.index
|
102
|
+
@sam.fetch("chr_1", 10,1000) do |sam|
|
103
|
+
#test that all the objects are Bio::DB::Alignment objects
|
104
|
+
assert_equal(sam.class, Bio::DB::Alignment)
|
105
|
+
assert_equal(sam.rname, "chr_1")
|
106
|
+
i += 1
|
107
|
+
end
|
108
|
+
assert(i>0)
|
109
|
+
assert_equal(i,9)
|
110
|
+
|
111
|
+
bam=Bio::DB::Sam.new(:bam=>@testLCI,:fasta=>@testLCIref)
|
112
|
+
bam.open
|
113
|
+
count = 0
|
114
|
+
bam.fetch("NC_001988.2",0,200) do|x|
|
115
|
+
count += 1
|
116
|
+
end
|
117
|
+
assert_equal(count, 36)
|
118
|
+
count = 0
|
119
|
+
bam.fetch("NC_001988.2",75, 75) do|x|
|
120
|
+
#puts "#{x.pos} #{x.seq}"
|
121
|
+
count += 1
|
122
|
+
end
|
123
|
+
assert_equal(count, 7)
|
124
|
+
|
125
|
+
end
|
126
|
+
|
127
|
+
def test_fetch_with_function
|
128
|
+
#pass the assert to method
|
129
|
+
count = 0
|
130
|
+
block = Proc.new do |a|
|
131
|
+
assert_equal(a.class, Bio::DB::Alignment)
|
132
|
+
count += 1
|
133
|
+
end
|
134
|
+
|
135
|
+
@sam.fetch_with_function("chr_1", 10, 1000, &block)
|
136
|
+
assert_equal(count, 9)
|
137
|
+
|
138
|
+
count = 0
|
139
|
+
@sam.fetch_with_function("chr_1", 82, 140, &block)
|
140
|
+
assert_equal(count, 4)
|
141
|
+
|
142
|
+
@sam.fetch_with_function("chr_1", 0, 140, &block)
|
143
|
+
assert_equal(count, 8)
|
144
|
+
count2 = 0
|
145
|
+
@sam.fetch("chr_1",0,200) {|x| count2 += 1}
|
146
|
+
assert_equal(count2, 6)
|
147
|
+
|
148
|
+
end
|
149
|
+
|
150
|
+
def test_chromosome_coverage
|
151
|
+
#the coverage should only be 1.0 or 2.0
|
152
|
+
cov = @sam.chromosome_coverage("chr_1", 10, 1000)
|
153
|
+
cov.each do |pu|
|
154
|
+
assert_send([[1.0 , 2.0, 3.0], :member?, pu])
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def test_average_coverage
|
159
|
+
#there should be 10 positions with cov of 1.0 and 10 with cov of 2.0, so average of 1.5
|
160
|
+
test_bai_file = @testBAMFile+".bai"
|
161
|
+
if File.exist?(test_bai_file) == false
|
162
|
+
@sam.index()
|
163
|
+
end
|
164
|
+
avcov = @sam.average_coverage("chr_1", 33, 19)
|
165
|
+
assert_equal(avcov, 1.5)
|
166
|
+
File.delete(test_bai_file)
|
167
|
+
end
|
168
|
+
|
169
|
+
def test_faidx
|
170
|
+
@sam.faidx()
|
171
|
+
test_fai_file = @testReference+".fai"
|
172
|
+
#test that the .fai file exists
|
173
|
+
assert_nothing_thrown do
|
174
|
+
File.open(test_fai_file, "r")
|
175
|
+
end
|
176
|
+
#test that the file is not empty
|
177
|
+
assert(File.size(test_fai_file) > 0, "From test_faidx: .fai file is empty")
|
178
|
+
end
|
179
|
+
|
180
|
+
def test_index_stats
|
181
|
+
#puts "Stats: #{@sam.index_stats.inspect}"
|
182
|
+
@sam.index_stats.each_pair do |seq, stat|
|
183
|
+
assert_send([['chr_1' , '*'], :member?, seq])
|
184
|
+
end
|
185
|
+
assert_equal(@sam.index_stats['chr_1'][:length], 69930)
|
186
|
+
assert_equal(@sam.index_stats['chr_1'][:mapped_reads], 9)
|
187
|
+
assert_equal(@sam.index_stats['chr_1'][:unmapped_reads], 0)
|
188
|
+
assert_equal(@sam.index_stats['*'][:length], 0)
|
189
|
+
assert_equal(@sam.index_stats['*'][:mapped_reads], 0)
|
190
|
+
assert_equal(@sam.index_stats['*'][:unmapped_reads], 0)
|
191
|
+
assert_equal(@sam.index_stats.size, 2)
|
192
|
+
|
193
|
+
end
|
194
|
+
|
195
|
+
def test_fetch_reference
|
196
|
+
#this is the first 70 nucleotides of the test seqeunce
|
197
|
+
seq_expected = "CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTA"
|
198
|
+
#fetch the first 70 nuclotides
|
199
|
+
seq_fetched = @sam.fetch_reference("chr_1", 1, 70, :as_bio => false)
|
200
|
+
#test they're the same
|
201
|
+
assert_equal(seq_fetched, seq_expected)
|
202
|
+
end
|
203
|
+
|
204
|
+
def test_sort
|
205
|
+
#sort the bam file
|
206
|
+
sortedsam = @test_folder + "/test_sorted.bam"
|
207
|
+
@sam.sort(:prefix=>@test_folder + "/test_sorted")
|
208
|
+
#create a new Bio::DB::Sam from the sorted bam
|
209
|
+
@sortsam = Bio::DB::Sam.new(
|
210
|
+
:fasta => @testReference,
|
211
|
+
:bam => sortedsam
|
212
|
+
)
|
213
|
+
pos = 0
|
214
|
+
#iterate over the sorted sam file and make sure that the it's sorted by checking the order of the start positions for each read.
|
215
|
+
@sortsam.view()do |sam|
|
216
|
+
assert(sam.pos > pos, "Not sorted by position")
|
217
|
+
pos = sam.pos
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
def test_reheader
|
222
|
+
sam_header = @test_folder + "/map_for_reheader.sam"
|
223
|
+
outfile = @test_folder + "/reheader.bam"
|
224
|
+
|
225
|
+
@sam.reheader(sam_header, :out=>outfile)
|
226
|
+
reheader_bam = Bio::DB::Sam.new(:fasta => @testReference, :bam => outfile)
|
227
|
+
#check that the reference is 'chr_2'
|
228
|
+
reheader_bam.view()do |sam|
|
229
|
+
assert_equal(sam.rname, "chr_2")
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
def test_calmd
|
234
|
+
no_md_sam = @test_folder + "/no_md.sam"
|
235
|
+
md = Bio::DB::Sam.new(:fasta => @testReference, :bam => no_md_sam)
|
236
|
+
block = Proc.new {|a| assert(a.tags.has_key?('MD'), "From test_calmd: couldn't find the MD tag")}
|
237
|
+
md.calmd(:S=>true, &block)
|
238
|
+
|
239
|
+
end
|
240
|
+
|
241
|
+
def test_mpileup
|
242
|
+
#create an mpileup
|
243
|
+
# @sam.index
|
244
|
+
@sam.mpileup(:g => false) do |pileup|
|
245
|
+
#test that all the objects are Bio::DB::Pileup objects
|
246
|
+
assert_kind_of(Bio::DB::Pileup, pileup)
|
247
|
+
#test that the reference name is 'chr_1' for all objects
|
248
|
+
assert_equal(pileup.ref_name, 'chr_1')
|
249
|
+
end
|
250
|
+
#do the same for Vcf output
|
251
|
+
@sam.mpileup(:u => true) do |pileup|
|
252
|
+
assert_kind_of(Bio::DB::Vcf, pileup)
|
253
|
+
assert_equal(pileup.chrom, 'chr_1')
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
def test_region_new
|
258
|
+
reg1 = Bio::DB::Fasta::Region.new(:entry=>"chr_1", :start=>1, :end=>334)
|
259
|
+
reg2 = Bio::DB::Fasta::Region.new
|
260
|
+
reg2.entry = "chr_1"
|
261
|
+
reg2.start = 1
|
262
|
+
reg2.end = 334
|
263
|
+
|
264
|
+
assert_equal(reg1.entry, reg2.entry)
|
265
|
+
assert_equal(reg1.start, reg2.start)
|
266
|
+
assert_equal(reg1.end, reg2.end)
|
267
|
+
end
|
268
|
+
|
269
|
+
def test_mpileup_reg
|
270
|
+
#create an mpileup
|
271
|
+
reg = Bio::DB::Fasta::Region.new(:entry=>"chr_1", :start=>1, :end=>334)
|
272
|
+
|
273
|
+
@sam.mpileup_cached(:r=>reg,:g => false, :min_cov => 1, :min_per =>0.2) do |pileup|
|
274
|
+
#test that all the objects are Bio::DB::Pileup objects
|
275
|
+
assert_kind_of(Bio::DB::Pileup, pileup)
|
276
|
+
#test that the reference name is 'chr_1' for all objects
|
277
|
+
#puts pileup
|
278
|
+
assert_equal(pileup.ref_name, 'chr_1')
|
279
|
+
end
|
280
|
+
|
281
|
+
region = @sam.cached_regions[reg.to_s]
|
282
|
+
#puts "cahced_region: #{region.inspect}"
|
283
|
+
#puts "AVG COV: #{region.average_coverage}"
|
284
|
+
#puts "Reference: #{region.reference}"
|
285
|
+
# puts "Consensus: #{region.consensus}"
|
286
|
+
# puts "called: #{region.called}"
|
287
|
+
#, :snps, :reference, :base_ratios, :consensus, :coverages
|
288
|
+
snps_tot = Bio::Sequence.snps_between(region.reference, region.consensus)
|
289
|
+
assert_equal(snps_tot, 5)
|
290
|
+
assert_equal(region.called, 220)
|
291
|
+
end
|
292
|
+
|
293
|
+
def test_mpileup_reg_05
|
294
|
+
#create an mpileup
|
295
|
+
reg = Bio::DB::Fasta::Region.new(:entry=>"chr_1", :start=>1, :end=>334)
|
296
|
+
@sam.mpileup_cached(:r=>reg, :g => false, :min_cov => 1, :min_per =>0.4) do |pileup|
|
297
|
+
#test that all the objects are Bio::DB::Pileup objects
|
298
|
+
assert_kind_of(Bio::DB::Pileup, pileup)
|
299
|
+
#test that the reference name is 'chr_1' for all objects
|
300
|
+
#puts pileup
|
301
|
+
assert_equal(pileup.ref_name, 'chr_1')
|
302
|
+
|
303
|
+
end
|
304
|
+
|
305
|
+
region = @sam.cached_regions[reg.to_s]
|
306
|
+
|
307
|
+
#, :snps, :reference, :base_ratios, :consensus, :coverages
|
308
|
+
snps_tot = Bio::Sequence.snps_between(region.reference, region.consensus)
|
309
|
+
assert_equal(snps_tot, 1)
|
310
|
+
assert_equal(region.called, 220)
|
311
|
+
end
|
312
|
+
|
313
|
+
def test_depth
|
314
|
+
#the depth of coverage should be '1' at all given positions
|
315
|
+
@sam.depth(:r=>"chr_1:25-42") do |al|
|
316
|
+
assert_equal(al[2].to_i, 1)
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
def test_fixmate
|
321
|
+
mates_fixed_bam = @test_folder + "/mates_fixed.bam"
|
322
|
+
@sam.fix_mates(:out_bam=>mates_fixed_bam)
|
323
|
+
assert_nothing_thrown do
|
324
|
+
File.open(mates_fixed_bam, "r")
|
325
|
+
end
|
326
|
+
assert(File.size(mates_fixed_bam) > 0, "From test_fixmate: .bam file is empty")
|
327
|
+
end
|
328
|
+
|
329
|
+
def test_flagstats
|
330
|
+
#get the stats
|
331
|
+
stats = @sam.flag_stats()
|
332
|
+
#the number of reads mapped will be the first character on the first line.
|
333
|
+
no_reads_mapped = stats[0][0].to_i
|
334
|
+
#check that it's '9'
|
335
|
+
assert_equal(no_reads_mapped, 9)
|
336
|
+
end
|
337
|
+
|
338
|
+
def test_merge
|
339
|
+
bam1 = @test_folder + "/map_to_merge1.bam"
|
340
|
+
bam2 = @test_folder + "/map_to_merge2.bam"
|
341
|
+
bam_to_merge1 = Bio::DB::Sam.new(:fasta => @testReference, :bam => bam1)
|
342
|
+
bam_to_merge2 = Bio::DB::Sam.new(:fasta => @testReference, :bam => bam2)
|
343
|
+
bam_files = [bam_to_merge1, bam_to_merge2]
|
344
|
+
|
345
|
+
merged_bam_file = @test_folder + "/maps_merged.bam"
|
346
|
+
File.delete merged_bam_file if File.exist?(merged_bam_file)
|
347
|
+
# File.delete("test/samples/small/maps_merged.bam")
|
348
|
+
@sam.merge(:out=>merged_bam_file, :bams=>bam_files, :n=>true)
|
349
|
+
merged_bam = Bio::DB::Sam.new(:fasta => @testReference, :bam => merged_bam_file)
|
350
|
+
no_reads_mapped = 0;
|
351
|
+
|
352
|
+
merged_bam.view() do |al|
|
353
|
+
assert_kind_of(Bio::DB::Alignment, al)
|
354
|
+
no_reads_mapped+=1
|
355
|
+
end
|
356
|
+
assert_equal(no_reads_mapped, 10)
|
357
|
+
end
|
358
|
+
|
359
|
+
def test_cat
|
360
|
+
#same files used for merge, but we'll cat them instead
|
361
|
+
bam1 = @test_folder + "/map_to_merge1.bam"
|
362
|
+
bam2 = @test_folder + "/map_to_merge2.bam"
|
363
|
+
|
364
|
+
bam_files = [bam1, bam2]
|
365
|
+
|
366
|
+
cat_bam_file = @test_folder + "/maps_cated.bam"
|
367
|
+
File.delete cat_bam_file if File.exist?(cat_bam_file)
|
368
|
+
@sam.merge(:out=>cat_bam_file, :bams=>bam_files)
|
369
|
+
cated_bam = Bio::DB::Sam.new(:fasta => @testReference, :bam => cat_bam_file)
|
370
|
+
|
371
|
+
no_reads_mapped = 0;
|
372
|
+
cated_bam.view() do |al|
|
373
|
+
assert_kind_of(Bio::DB::Alignment, al)
|
374
|
+
no_reads_mapped+=1
|
375
|
+
end
|
376
|
+
#there should be 10 reads in the cat'd maps
|
377
|
+
assert_equal(no_reads_mapped, 10)
|
378
|
+
end
|
379
|
+
|
380
|
+
def test_rmdup
|
381
|
+
#dupes contains 4 reads mapped once and one read mapped to the same place 268 times.
|
382
|
+
dupes = @test_folder + "/dupes.bam"
|
383
|
+
unduped = @test_folder + "/dupes_rmdup.bam"
|
384
|
+
bam_with_dupes = Bio::DB::Sam.new(:fasta => @testReference, :bam => dupes)
|
385
|
+
bam_with_dupes.remove_duplicates(:s=>true, :out=>unduped)
|
386
|
+
|
387
|
+
unduped_bam = Bio::DB::Sam.new(:fasta => @testReference, :bam => unduped)
|
388
|
+
#rmdup should remove 267 of the 268 reads mapping to the same place, so producing a bam file with 5 reads
|
389
|
+
readcount = 0
|
390
|
+
unduped_bam.view()do |sam|
|
391
|
+
readcount +=1
|
392
|
+
end
|
393
|
+
assert_equal(readcount, 5)
|
394
|
+
end
|
395
|
+
|
396
|
+
def test_targetcut
|
397
|
+
sorted_bam = @test_folder + "/sorted.bam"
|
398
|
+
cut = Bio::DB::Sam.new(:fasta => @testReference, :bam => sorted_bam)
|
399
|
+
assert_nothing_thrown do
|
400
|
+
cut.targetcut
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
def test_docs
|
405
|
+
#force an error (use 'samtool' instead of 'samtools')
|
406
|
+
output = Bio::DB::Sam.docs('samtool', 'tview')
|
407
|
+
assert_equal(output, "program must be 'samtools' or 'bcftools'")
|
408
|
+
end
|
409
|
+
|
410
|
+
def test_bedcov
|
411
|
+
out_file = @test_folder + "/testu.out"
|
412
|
+
@sam.bedcov(:bed=>@bed_file, :out=>out_file)
|
413
|
+
f = File.open(out_file, "r")
|
414
|
+
f.each_line do |line|
|
415
|
+
f_array= line.split(/\t/)
|
416
|
+
assert_equal(f_array[3], 630)
|
417
|
+
end
|
418
|
+
f.close
|
419
|
+
end
|
420
|
+
|
421
|
+
end
|
data/test/test_vcf.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
2
|
+
$: << File.expand_path('.')
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bio/db/vcf'
|
5
|
+
require "test/unit"
|
6
|
+
gem 'test-unit'
|
7
|
+
|
8
|
+
|
9
|
+
class TestVcf < Test::Unit::TestCase
|
10
|
+
|
11
|
+
def setup
|
12
|
+
@vcf1 = Bio::DB::Vcf.new("19 111 . A C 9.6 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3",["a","b","c"]) #from a 3.3 vcf file
|
13
|
+
@vcf2 = Bio::DB::Vcf.new("20 14370 rs6054257 G A 29 0 NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:-1,-1") #from a 3.3 vcf file
|
14
|
+
@vcf3 = Bio::DB::Vcf.new("19 111 . A C 9.6 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3") #from a 4.0 vcf file
|
15
|
+
@vcf4 = Bio::DB::Vcf.new("20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,") #from a 4.0 vcf file
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
def test_parse
|
20
|
+
assert_equal("19", @vcf1.chrom)
|
21
|
+
assert_equal(111, @vcf1.pos)
|
22
|
+
assert_equal(nil, @vcf1.id)
|
23
|
+
assert_equal("A", @vcf1.ref)
|
24
|
+
assert_equal("C",@vcf1.alt)
|
25
|
+
assert_equal(9.6,@vcf1.qual)
|
26
|
+
assert_equal(nil, @vcf1.filter)
|
27
|
+
assert_equal(nil, @vcf1.info)
|
28
|
+
assert_equal({"a"=>{"GT"=>"0|0", "HQ"=>"10,10"},
|
29
|
+
"b"=>{"GT"=>"0|0", "HQ"=>"10,10"},
|
30
|
+
"c"=>{"GT"=>"0/1", "HQ"=>"3,3"}}, @vcf1.samples)
|
31
|
+
|
32
|
+
assert_equal("20", @vcf2.chrom)
|
33
|
+
assert_equal(14370, @vcf2.pos)
|
34
|
+
assert_equal('rs6054257', @vcf2.id)
|
35
|
+
assert_equal("G", @vcf2.ref)
|
36
|
+
assert_equal("A",@vcf2.alt)
|
37
|
+
assert_equal(29,@vcf2.qual)
|
38
|
+
assert_equal("0", @vcf2.filter)
|
39
|
+
assert_equal({"DP"=>"14", "AF"=>"0.5", "NS"=>"3", "DB"=>nil, "H2"=>nil}, @vcf2.info)
|
40
|
+
assert_equal({"1"=>{"DP"=>"1", "GT"=>"0|0", "HQ"=>"51,51", "GQ"=>"48"},
|
41
|
+
"2"=>{"DP"=>"8", "GT"=>"1|0", "HQ"=>"51,51", "GQ"=>"48"},
|
42
|
+
"3"=>{"DP"=>"5", "GT"=>"1/1", "HQ"=>"-1,-1", "GQ"=>"43"}}, @vcf2.samples)
|
43
|
+
|
44
|
+
assert_equal("19", @vcf3.chrom)
|
45
|
+
assert_equal(111, @vcf3.pos)
|
46
|
+
assert_equal(nil, @vcf3.id)
|
47
|
+
assert_equal("A", @vcf3.ref)
|
48
|
+
assert_equal("C",@vcf3.alt)
|
49
|
+
assert_equal(9.6,@vcf3.qual)
|
50
|
+
assert_equal(nil, @vcf3.filter)
|
51
|
+
assert_equal(nil, @vcf3.info)
|
52
|
+
assert_equal({"1"=>{"GT"=>"0|0", "HQ"=>"10,10"},
|
53
|
+
"2"=>{"GT"=>"0|0", "HQ"=>"10,10"},
|
54
|
+
"3"=>{"GT"=>"0/1", "HQ"=>"3,3"}}, @vcf3.samples)
|
55
|
+
|
56
|
+
assert_equal("20", @vcf4.chrom)
|
57
|
+
assert_equal(14370, @vcf4.pos)
|
58
|
+
assert_equal('rs6054257', @vcf4.id)
|
59
|
+
assert_equal("G", @vcf4.ref)
|
60
|
+
assert_equal("A",@vcf4.alt)
|
61
|
+
assert_equal(29,@vcf4.qual)
|
62
|
+
assert_equal("PASS", @vcf4.filter)
|
63
|
+
assert_equal({"DP"=>"14", "AF"=>"0.5", "NS"=>"3", "DB"=>nil, "H2"=>nil}, @vcf4.info)
|
64
|
+
assert_equal({"1"=>{"DP"=>"1", "GT"=>"0|0", "HQ"=>"51,51", "GQ"=>"48"},
|
65
|
+
"2"=>{"DP"=>"8", "GT"=>"1|0", "HQ"=>"51,51", "GQ"=>"48"},
|
66
|
+
"3"=>{"DP"=>"5", "GT"=>"1/1", "HQ"=>".,", "GQ"=>"43"}}, @vcf4.samples)
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_int_or_raw
|
70
|
+
assert_nothing_raised do
|
71
|
+
@vcf1.int_or_raw(1)
|
72
|
+
end
|
73
|
+
assert_raise do
|
74
|
+
@vcf1.int_or_raw(A)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
end
|