bio-samtools-wrapper 2.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (125) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.travis.yml +27 -0
  4. data/Gemfile +20 -0
  5. data/LICENSE.txt +702 -0
  6. data/README.md +501 -0
  7. data/Rakefile +73 -0
  8. data/VERSION +1 -0
  9. data/bin/bam_consensus.rb +85 -0
  10. data/bio-samtools-wrapper.gemspec +181 -0
  11. data/doc/Bio/DB/Alignment.html +552 -0
  12. data/doc/Bio/DB/Pileup.html +711 -0
  13. data/doc/Bio/DB/SAM/Library.html +167 -0
  14. data/doc/Bio/DB/SAM/Tools.html +109 -0
  15. data/doc/Bio/DB/SAM.html +1853 -0
  16. data/doc/Bio/DB/Tag.html +208 -0
  17. data/doc/Bio/DB/Vcf.html +431 -0
  18. data/doc/Bio/DB.html +105 -0
  19. data/doc/Bio.html +175 -0
  20. data/doc/LICENSE_txt.html +846 -0
  21. data/doc/created.rid +9 -0
  22. data/doc/fonts/Lato-Light.ttf +0 -0
  23. data/doc/fonts/Lato-LightItalic.ttf +0 -0
  24. data/doc/fonts/Lato-Regular.ttf +0 -0
  25. data/doc/fonts/Lato-RegularItalic.ttf +0 -0
  26. data/doc/fonts/SourceCodePro-Bold.ttf +0 -0
  27. data/doc/fonts/SourceCodePro-Regular.ttf +0 -0
  28. data/doc/fonts.css +167 -0
  29. data/doc/images/add.png +0 -0
  30. data/doc/images/arrow_up.png +0 -0
  31. data/doc/images/brick.png +0 -0
  32. data/doc/images/brick_link.png +0 -0
  33. data/doc/images/bug.png +0 -0
  34. data/doc/images/bullet_black.png +0 -0
  35. data/doc/images/bullet_toggle_minus.png +0 -0
  36. data/doc/images/bullet_toggle_plus.png +0 -0
  37. data/doc/images/date.png +0 -0
  38. data/doc/images/delete.png +0 -0
  39. data/doc/images/find.png +0 -0
  40. data/doc/images/loadingAnimation.gif +0 -0
  41. data/doc/images/macFFBgHack.png +0 -0
  42. data/doc/images/package.png +0 -0
  43. data/doc/images/page_green.png +0 -0
  44. data/doc/images/page_white_text.png +0 -0
  45. data/doc/images/page_white_width.png +0 -0
  46. data/doc/images/plugin.png +0 -0
  47. data/doc/images/ruby.png +0 -0
  48. data/doc/images/tag_blue.png +0 -0
  49. data/doc/images/tag_green.png +0 -0
  50. data/doc/images/transparent.png +0 -0
  51. data/doc/images/wrench.png +0 -0
  52. data/doc/images/wrench_orange.png +0 -0
  53. data/doc/images/zoom.png +0 -0
  54. data/doc/index.html +106 -0
  55. data/doc/js/darkfish.js +140 -0
  56. data/doc/js/jquery.js +18 -0
  57. data/doc/js/navigation.js +142 -0
  58. data/doc/js/search.js +109 -0
  59. data/doc/js/search_index.js +1 -0
  60. data/doc/js/searcher.js +228 -0
  61. data/doc/rdoc.css +580 -0
  62. data/doc/table_of_contents.html +305 -0
  63. data/ext/Makefile-bioruby.patch +12 -0
  64. data/ext/Makefile-suse.patch +11 -0
  65. data/ext/mkrf_conf.rb +118 -0
  66. data/lib/bio/BIOExtensions.rb +89 -0
  67. data/lib/bio/db/alignment.rb +64 -0
  68. data/lib/bio/db/fastadb.rb +320 -0
  69. data/lib/bio/db/pileup.rb +273 -0
  70. data/lib/bio/db/sam/external/COPYING +21 -0
  71. data/lib/bio/db/sam/external/VERSION +1 -0
  72. data/lib/bio/db/sam/library.rb +32 -0
  73. data/lib/bio/db/sam.rb +778 -0
  74. data/lib/bio/db/vcf.rb +105 -0
  75. data/lib/bio-samtools-wrapper.rb +9 -0
  76. data/test/.gitignore +1 -0
  77. data/test/helper.rb +18 -0
  78. data/test/sample.vcf +24 -0
  79. data/test/samples/.gitignore +1 -0
  80. data/test/samples/LCI/NC_001988.ffn +2 -0
  81. data/test/samples/LCI/test.bam +0 -0
  82. data/test/samples/LCI/test.bam.bai +0 -0
  83. data/test/samples/small/dupes.bam +0 -0
  84. data/test/samples/small/dupes.sam +274 -0
  85. data/test/samples/small/ids2.txt +1 -0
  86. data/test/samples/small/map_for_reheader.sam +8 -0
  87. data/test/samples/small/map_to_merge1.bam +0 -0
  88. data/test/samples/small/map_to_merge1.bam.bai +0 -0
  89. data/test/samples/small/map_to_merge1.sam +8 -0
  90. data/test/samples/small/map_to_merge2.bam +0 -0
  91. data/test/samples/small/map_to_merge2.bam.bai +0 -0
  92. data/test/samples/small/map_to_merge2.sam +8 -0
  93. data/test/samples/small/no_md.sam +8 -0
  94. data/test/samples/small/sorted.bam +0 -0
  95. data/test/samples/small/sorted.bam.bai +0 -0
  96. data/test/samples/small/test.sai +0 -0
  97. data/test/samples/small/test.tam +10 -0
  98. data/test/samples/small/test_chr.fasta +1000 -0
  99. data/test/samples/small/test_chr.fasta.1.bt2 +0 -0
  100. data/test/samples/small/test_chr.fasta.2.bt2 +0 -0
  101. data/test/samples/small/test_chr.fasta.3.bt2 +0 -0
  102. data/test/samples/small/test_chr.fasta.4.bt2 +0 -0
  103. data/test/samples/small/test_chr.fasta.amb +2 -0
  104. data/test/samples/small/test_chr.fasta.ann +3 -0
  105. data/test/samples/small/test_chr.fasta.bwt +0 -0
  106. data/test/samples/small/test_chr.fasta.pac +0 -0
  107. data/test/samples/small/test_chr.fasta.rbwt +0 -0
  108. data/test/samples/small/test_chr.fasta.rev.1.bt2 +0 -0
  109. data/test/samples/small/test_chr.fasta.rev.2.bt2 +0 -0
  110. data/test/samples/small/test_chr.fasta.rpac +0 -0
  111. data/test/samples/small/test_chr.fasta.rsa +0 -0
  112. data/test/samples/small/test_chr.fasta.sa +0 -0
  113. data/test/samples/small/test_cov.svg +273 -0
  114. data/test/samples/small/test_fastadb.fasta +34 -0
  115. data/test/samples/small/testu.bam +0 -0
  116. data/test/samples/small/testu.bed +2 -0
  117. data/test/test_bio-samtools-wrapper.rb +1 -0
  118. data/test/test_fastadb.rb +89 -0
  119. data/test/test_pileup.rb +90 -0
  120. data/test/test_sam.rb +421 -0
  121. data/test/test_vcf.rb +79 -0
  122. data/tutorial/tutorial.html +474 -0
  123. data/tutorial/tutorial.md +424 -0
  124. data/tutorial/tutorial.pdf +0 -0
  125. metadata +254 -0
@@ -0,0 +1,90 @@
1
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
2
+ $: << File.expand_path('.')
3
+ require 'rubygems'
4
+ require 'bio/db/pileup'
5
+ require "test/unit"
6
+ gem 'test-unit'
7
+
8
+
9
+ class TestPileup < Test::Unit::TestCase
10
+
11
+ def setup
12
+ @pu = Bio::DB::Pileup.new("seq1 279 C 23 A..T,,.,.,...,,,.,..... ;75&<<<<<<<<<=<<<9<<:<<")
13
+ #a snp...
14
+ @pu2 = Bio::DB::Pileup.new("seq1 279 C 23 ATTT,,.,.TTTT,,,.,TTTTT ;75&<<<<<<<<<=<<<9<<:<<")
15
+ #an indel..
16
+ @pu3 = Bio::DB::Pileup.new("seq2 156 * +AG/+AG 71 252 99 11 +AG * 3 8 0")
17
+ #two heterozygous alt snps
18
+ @pu4 = Bio::DB::Pileup.new("seq1 279 C 24 AAAAAAAAATTTTTTTTTAATTAA ;75&<<<<<<<<<=<<<9<<:<<<")
19
+ end
20
+
21
+ def test_non_ref_count
22
+ assert_equal(2, @pu.non_ref_count)
23
+ end
24
+
25
+ def test_ref_count
26
+ assert_equal(21, @pu.ref_count)
27
+ end
28
+
29
+ def test_consensus
30
+ assert_equal('C', @pu.consensus)
31
+ assert_equal('T', @pu2.consensus)
32
+ end
33
+
34
+ def test_non_refs
35
+ assert_equal(1, @pu.non_refs[:T])
36
+ assert_equal(1, @pu.non_refs[:A])
37
+ assert_equal(0, @pu.non_refs[:G])
38
+ assert_equal(0, @pu.non_refs[:C])
39
+ end
40
+
41
+
42
+ def test_to_vcf
43
+ @vcf = Bio::DB::Vcf.new(@pu.to_vcf)
44
+ assert_equal('seq1', @vcf.chrom)
45
+ end
46
+
47
+
48
+ def test_indel_gt
49
+ indel = @pu3.send(:indel_gt)
50
+ assert_equal('IAG', indel[0])
51
+ assert_equal('1/1', indel[1])
52
+ end
53
+
54
+ def test_snp_gt
55
+ snp = @pu2.send(:snp_gt)
56
+ assert_equal('T,', snp[0])
57
+ assert_equal('1/1', snp[1])
58
+ end
59
+
60
+ def test_genotype_list
61
+ gt2 = @pu2.genotype_list
62
+ gt3 = @pu3.genotype_list
63
+ assert_equal('T,', gt2[0])
64
+ assert_equal('1/1', gt2[1])
65
+ assert_equal('IAG', gt3[0])
66
+ assert_equal('1/1', gt3[1])
67
+ end
68
+
69
+ def test_iupac_to_base
70
+ iupac = Bio::DB::Pileup.iupac_to_base('R')
71
+ iupac.each do |pu|
72
+ assert_send([['A' , 'G'], :member?, pu])
73
+ end
74
+ end
75
+
76
+ def test_parse_indel
77
+ assert_equal('IAG/+AG', @pu3.parse_indel(@pu3.consensus))
78
+ end
79
+
80
+ def test_to_s
81
+ #check whether there are the correct number of tabs in the string (number of columns -1)
82
+ assert_equal(12, @pu3.to_s.count("\t"))
83
+ assert_equal(5, @pu.to_s.count("\t"))
84
+ end
85
+
86
+ def test_consensus_iuap
87
+ assert_equal('w', @pu4.consensus_iuap(0.1))
88
+ end
89
+
90
+ end
data/test/test_sam.rb ADDED
@@ -0,0 +1,421 @@
1
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
2
+ $: << File.expand_path('.')
3
+ require 'rubygems'
4
+ require 'bio/db/sam'
5
+ require "test/unit"
6
+ #gem 'ruby-prof'
7
+ gem 'test-unit'
8
+ #require "ruby-prof"
9
+
10
+
11
+ class TestBioDbSam < Test::Unit::TestCase
12
+ # include RubyProf::Test
13
+
14
+ class << self
15
+
16
+ def shutdown
17
+ File.delete("test/samples/small/different_index.bam.bai")
18
+ File.delete("test/samples/small/dupes_rmdup.bam")
19
+ File.delete("test/samples/small/mates_fixed.bam")
20
+ File.delete("test/samples/small/reheader.bam")
21
+ File.delete("test/samples/small/test_chr.fasta.fai")
22
+ File.delete("test/samples/small/test_sorted.bam")
23
+ File.delete("test/samples/small/maps_merged.bam")
24
+ File.delete("test/samples/small/maps_cated.bam")
25
+ File.delete("test/samples/small/testu.out")
26
+ end
27
+ end
28
+
29
+
30
+ def setup
31
+ @test_folder = "test/samples/small"
32
+ @testTAMFile = @test_folder + "/test.tam"
33
+ @testBAMFile = @test_folder + "/testu.bam"
34
+ @testLCI = "test/samples/LCI/test.bam"
35
+ @testLCIref = "test/samples/LCI/NC_001988.ffn"
36
+ @testReference = @test_folder + "/test_chr.fasta"
37
+ @bed_file = @test_folder + "/testu.bed"
38
+ @sam = Bio::DB::Sam.new(
39
+ :fasta => @testReference,
40
+ :bam => @testBAMFile
41
+ )
42
+ end
43
+
44
+
45
+
46
+ def test_new
47
+ assert_kind_of(Bio::DB::Sam, @sam)
48
+
49
+ assert_raise(IOError) do
50
+ Bio::DB::Sam.new(
51
+ :fasta => @testReference,
52
+ :bam => @testBAMFile + "ads"
53
+ )
54
+
55
+ end
56
+ assert_raise(ArgumentError) do
57
+ Bio::DB::Sam.new()
58
+ end
59
+
60
+ end
61
+
62
+ def test_index
63
+ test_bai_file = @testBAMFile+".bai"
64
+ #test to see if the index file exists. If so, delete it
65
+ if File.exist?(test_bai_file) == true
66
+ puts "bam index exists....deleting..."
67
+ File.delete(test_bai_file)
68
+ end
69
+
70
+ #No bam file
71
+ assert_equal(@sam.indexed?, false)
72
+ #index the bam file
73
+ @sam.index()
74
+ assert_equal(@sam.indexed?, true)
75
+ #make sure the .bai file exists
76
+ assert_nothing_thrown do
77
+ File.open(test_bai_file, "r")
78
+ end
79
+ assert(File.size(test_bai_file) > 0, "From test_index: .bai file is empty")
80
+ #as above, but give the output a different name
81
+ test_bai_file = @test_folder+"/different_index.bam.bai"
82
+ @sam.index(:out_index=> test_bai_file)
83
+ assert_nothing_thrown do
84
+ File.open(test_bai_file, "r")
85
+ end
86
+ assert(File.size(test_bai_file) > 0, "From test_index: .bai file is empty")
87
+ end
88
+
89
+ def test_view
90
+ #how to get Bio::DB::Alignment objects ..
91
+ @sam.view() do |sam|
92
+ #test that all the objects are Bio::DB::Alignment objects and their reference is 'chr_1'
93
+ assert_equal(sam.class, Bio::DB::Alignment)
94
+ assert_equal(sam.rname, "chr_1")
95
+ end
96
+ end
97
+
98
+ def test_fetch
99
+ #puts @sam.inspect
100
+ i = 0
101
+ @sam.index
102
+ @sam.fetch("chr_1", 10,1000) do |sam|
103
+ #test that all the objects are Bio::DB::Alignment objects
104
+ assert_equal(sam.class, Bio::DB::Alignment)
105
+ assert_equal(sam.rname, "chr_1")
106
+ i += 1
107
+ end
108
+ assert(i>0)
109
+ assert_equal(i,9)
110
+
111
+ bam=Bio::DB::Sam.new(:bam=>@testLCI,:fasta=>@testLCIref)
112
+ bam.open
113
+ count = 0
114
+ bam.fetch("NC_001988.2",0,200) do|x|
115
+ count += 1
116
+ end
117
+ assert_equal(count, 36)
118
+ count = 0
119
+ bam.fetch("NC_001988.2",75, 75) do|x|
120
+ #puts "#{x.pos} #{x.seq}"
121
+ count += 1
122
+ end
123
+ assert_equal(count, 7)
124
+
125
+ end
126
+
127
+ def test_fetch_with_function
128
+ #pass the assert to method
129
+ count = 0
130
+ block = Proc.new do |a|
131
+ assert_equal(a.class, Bio::DB::Alignment)
132
+ count += 1
133
+ end
134
+
135
+ @sam.fetch_with_function("chr_1", 10, 1000, &block)
136
+ assert_equal(count, 9)
137
+
138
+ count = 0
139
+ @sam.fetch_with_function("chr_1", 82, 140, &block)
140
+ assert_equal(count, 4)
141
+
142
+ @sam.fetch_with_function("chr_1", 0, 140, &block)
143
+ assert_equal(count, 8)
144
+ count2 = 0
145
+ @sam.fetch("chr_1",0,200) {|x| count2 += 1}
146
+ assert_equal(count2, 6)
147
+
148
+ end
149
+
150
+ def test_chromosome_coverage
151
+ #the coverage should only be 1.0 or 2.0
152
+ cov = @sam.chromosome_coverage("chr_1", 10, 1000)
153
+ cov.each do |pu|
154
+ assert_send([[1.0 , 2.0, 3.0], :member?, pu])
155
+ end
156
+ end
157
+
158
+ def test_average_coverage
159
+ #there should be 10 positions with cov of 1.0 and 10 with cov of 2.0, so average of 1.5
160
+ test_bai_file = @testBAMFile+".bai"
161
+ if File.exist?(test_bai_file) == false
162
+ @sam.index()
163
+ end
164
+ avcov = @sam.average_coverage("chr_1", 33, 19)
165
+ assert_equal(avcov, 1.5)
166
+ File.delete(test_bai_file)
167
+ end
168
+
169
+ def test_faidx
170
+ @sam.faidx()
171
+ test_fai_file = @testReference+".fai"
172
+ #test that the .fai file exists
173
+ assert_nothing_thrown do
174
+ File.open(test_fai_file, "r")
175
+ end
176
+ #test that the file is not empty
177
+ assert(File.size(test_fai_file) > 0, "From test_faidx: .fai file is empty")
178
+ end
179
+
180
+ def test_index_stats
181
+ #puts "Stats: #{@sam.index_stats.inspect}"
182
+ @sam.index_stats.each_pair do |seq, stat|
183
+ assert_send([['chr_1' , '*'], :member?, seq])
184
+ end
185
+ assert_equal(@sam.index_stats['chr_1'][:length], 69930)
186
+ assert_equal(@sam.index_stats['chr_1'][:mapped_reads], 9)
187
+ assert_equal(@sam.index_stats['chr_1'][:unmapped_reads], 0)
188
+ assert_equal(@sam.index_stats['*'][:length], 0)
189
+ assert_equal(@sam.index_stats['*'][:mapped_reads], 0)
190
+ assert_equal(@sam.index_stats['*'][:unmapped_reads], 0)
191
+ assert_equal(@sam.index_stats.size, 2)
192
+
193
+ end
194
+
195
+ def test_fetch_reference
196
+ #this is the first 70 nucleotides of the test seqeunce
197
+ seq_expected = "CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTA"
198
+ #fetch the first 70 nuclotides
199
+ seq_fetched = @sam.fetch_reference("chr_1", 1, 70, :as_bio => false)
200
+ #test they're the same
201
+ assert_equal(seq_fetched, seq_expected)
202
+ end
203
+
204
+ def test_sort
205
+ #sort the bam file
206
+ sortedsam = @test_folder + "/test_sorted.bam"
207
+ @sam.sort(:prefix=>@test_folder + "/test_sorted")
208
+ #create a new Bio::DB::Sam from the sorted bam
209
+ @sortsam = Bio::DB::Sam.new(
210
+ :fasta => @testReference,
211
+ :bam => sortedsam
212
+ )
213
+ pos = 0
214
+ #iterate over the sorted sam file and make sure that the it's sorted by checking the order of the start positions for each read.
215
+ @sortsam.view()do |sam|
216
+ assert(sam.pos > pos, "Not sorted by position")
217
+ pos = sam.pos
218
+ end
219
+ end
220
+
221
+ def test_reheader
222
+ sam_header = @test_folder + "/map_for_reheader.sam"
223
+ outfile = @test_folder + "/reheader.bam"
224
+
225
+ @sam.reheader(sam_header, :out=>outfile)
226
+ reheader_bam = Bio::DB::Sam.new(:fasta => @testReference, :bam => outfile)
227
+ #check that the reference is 'chr_2'
228
+ reheader_bam.view()do |sam|
229
+ assert_equal(sam.rname, "chr_2")
230
+ end
231
+ end
232
+
233
+ def test_calmd
234
+ no_md_sam = @test_folder + "/no_md.sam"
235
+ md = Bio::DB::Sam.new(:fasta => @testReference, :bam => no_md_sam)
236
+ block = Proc.new {|a| assert(a.tags.has_key?('MD'), "From test_calmd: couldn't find the MD tag")}
237
+ md.calmd(:S=>true, &block)
238
+
239
+ end
240
+
241
+ def test_mpileup
242
+ #create an mpileup
243
+ # @sam.index
244
+ @sam.mpileup(:g => false) do |pileup|
245
+ #test that all the objects are Bio::DB::Pileup objects
246
+ assert_kind_of(Bio::DB::Pileup, pileup)
247
+ #test that the reference name is 'chr_1' for all objects
248
+ assert_equal(pileup.ref_name, 'chr_1')
249
+ end
250
+ #do the same for Vcf output
251
+ @sam.mpileup(:u => true) do |pileup|
252
+ assert_kind_of(Bio::DB::Vcf, pileup)
253
+ assert_equal(pileup.chrom, 'chr_1')
254
+ end
255
+ end
256
+
257
+ def test_region_new
258
+ reg1 = Bio::DB::Fasta::Region.new(:entry=>"chr_1", :start=>1, :end=>334)
259
+ reg2 = Bio::DB::Fasta::Region.new
260
+ reg2.entry = "chr_1"
261
+ reg2.start = 1
262
+ reg2.end = 334
263
+
264
+ assert_equal(reg1.entry, reg2.entry)
265
+ assert_equal(reg1.start, reg2.start)
266
+ assert_equal(reg1.end, reg2.end)
267
+ end
268
+
269
+ def test_mpileup_reg
270
+ #create an mpileup
271
+ reg = Bio::DB::Fasta::Region.new(:entry=>"chr_1", :start=>1, :end=>334)
272
+
273
+ @sam.mpileup_cached(:r=>reg,:g => false, :min_cov => 1, :min_per =>0.2) do |pileup|
274
+ #test that all the objects are Bio::DB::Pileup objects
275
+ assert_kind_of(Bio::DB::Pileup, pileup)
276
+ #test that the reference name is 'chr_1' for all objects
277
+ #puts pileup
278
+ assert_equal(pileup.ref_name, 'chr_1')
279
+ end
280
+
281
+ region = @sam.cached_regions[reg.to_s]
282
+ #puts "cahced_region: #{region.inspect}"
283
+ #puts "AVG COV: #{region.average_coverage}"
284
+ #puts "Reference: #{region.reference}"
285
+ # puts "Consensus: #{region.consensus}"
286
+ # puts "called: #{region.called}"
287
+ #, :snps, :reference, :base_ratios, :consensus, :coverages
288
+ snps_tot = Bio::Sequence.snps_between(region.reference, region.consensus)
289
+ assert_equal(snps_tot, 5)
290
+ assert_equal(region.called, 220)
291
+ end
292
+
293
+ def test_mpileup_reg_05
294
+ #create an mpileup
295
+ reg = Bio::DB::Fasta::Region.new(:entry=>"chr_1", :start=>1, :end=>334)
296
+ @sam.mpileup_cached(:r=>reg, :g => false, :min_cov => 1, :min_per =>0.4) do |pileup|
297
+ #test that all the objects are Bio::DB::Pileup objects
298
+ assert_kind_of(Bio::DB::Pileup, pileup)
299
+ #test that the reference name is 'chr_1' for all objects
300
+ #puts pileup
301
+ assert_equal(pileup.ref_name, 'chr_1')
302
+
303
+ end
304
+
305
+ region = @sam.cached_regions[reg.to_s]
306
+
307
+ #, :snps, :reference, :base_ratios, :consensus, :coverages
308
+ snps_tot = Bio::Sequence.snps_between(region.reference, region.consensus)
309
+ assert_equal(snps_tot, 1)
310
+ assert_equal(region.called, 220)
311
+ end
312
+
313
+ def test_depth
314
+ #the depth of coverage should be '1' at all given positions
315
+ @sam.depth(:r=>"chr_1:25-42") do |al|
316
+ assert_equal(al[2].to_i, 1)
317
+ end
318
+ end
319
+
320
+ def test_fixmate
321
+ mates_fixed_bam = @test_folder + "/mates_fixed.bam"
322
+ @sam.fix_mates(:out_bam=>mates_fixed_bam)
323
+ assert_nothing_thrown do
324
+ File.open(mates_fixed_bam, "r")
325
+ end
326
+ assert(File.size(mates_fixed_bam) > 0, "From test_fixmate: .bam file is empty")
327
+ end
328
+
329
+ def test_flagstats
330
+ #get the stats
331
+ stats = @sam.flag_stats()
332
+ #the number of reads mapped will be the first character on the first line.
333
+ no_reads_mapped = stats[0][0].to_i
334
+ #check that it's '9'
335
+ assert_equal(no_reads_mapped, 9)
336
+ end
337
+
338
+ def test_merge
339
+ bam1 = @test_folder + "/map_to_merge1.bam"
340
+ bam2 = @test_folder + "/map_to_merge2.bam"
341
+ bam_to_merge1 = Bio::DB::Sam.new(:fasta => @testReference, :bam => bam1)
342
+ bam_to_merge2 = Bio::DB::Sam.new(:fasta => @testReference, :bam => bam2)
343
+ bam_files = [bam_to_merge1, bam_to_merge2]
344
+
345
+ merged_bam_file = @test_folder + "/maps_merged.bam"
346
+ File.delete merged_bam_file if File.exist?(merged_bam_file)
347
+ # File.delete("test/samples/small/maps_merged.bam")
348
+ @sam.merge(:out=>merged_bam_file, :bams=>bam_files, :n=>true)
349
+ merged_bam = Bio::DB::Sam.new(:fasta => @testReference, :bam => merged_bam_file)
350
+ no_reads_mapped = 0;
351
+
352
+ merged_bam.view() do |al|
353
+ assert_kind_of(Bio::DB::Alignment, al)
354
+ no_reads_mapped+=1
355
+ end
356
+ assert_equal(no_reads_mapped, 10)
357
+ end
358
+
359
+ def test_cat
360
+ #same files used for merge, but we'll cat them instead
361
+ bam1 = @test_folder + "/map_to_merge1.bam"
362
+ bam2 = @test_folder + "/map_to_merge2.bam"
363
+
364
+ bam_files = [bam1, bam2]
365
+
366
+ cat_bam_file = @test_folder + "/maps_cated.bam"
367
+ File.delete cat_bam_file if File.exist?(cat_bam_file)
368
+ @sam.merge(:out=>cat_bam_file, :bams=>bam_files)
369
+ cated_bam = Bio::DB::Sam.new(:fasta => @testReference, :bam => cat_bam_file)
370
+
371
+ no_reads_mapped = 0;
372
+ cated_bam.view() do |al|
373
+ assert_kind_of(Bio::DB::Alignment, al)
374
+ no_reads_mapped+=1
375
+ end
376
+ #there should be 10 reads in the cat'd maps
377
+ assert_equal(no_reads_mapped, 10)
378
+ end
379
+
380
+ def test_rmdup
381
+ #dupes contains 4 reads mapped once and one read mapped to the same place 268 times.
382
+ dupes = @test_folder + "/dupes.bam"
383
+ unduped = @test_folder + "/dupes_rmdup.bam"
384
+ bam_with_dupes = Bio::DB::Sam.new(:fasta => @testReference, :bam => dupes)
385
+ bam_with_dupes.remove_duplicates(:s=>true, :out=>unduped)
386
+
387
+ unduped_bam = Bio::DB::Sam.new(:fasta => @testReference, :bam => unduped)
388
+ #rmdup should remove 267 of the 268 reads mapping to the same place, so producing a bam file with 5 reads
389
+ readcount = 0
390
+ unduped_bam.view()do |sam|
391
+ readcount +=1
392
+ end
393
+ assert_equal(readcount, 5)
394
+ end
395
+
396
+ def test_targetcut
397
+ sorted_bam = @test_folder + "/sorted.bam"
398
+ cut = Bio::DB::Sam.new(:fasta => @testReference, :bam => sorted_bam)
399
+ assert_nothing_thrown do
400
+ cut.targetcut
401
+ end
402
+ end
403
+
404
+ def test_docs
405
+ #force an error (use 'samtool' instead of 'samtools')
406
+ output = Bio::DB::Sam.docs('samtool', 'tview')
407
+ assert_equal(output, "program must be 'samtools' or 'bcftools'")
408
+ end
409
+
410
+ def test_bedcov
411
+ out_file = @test_folder + "/testu.out"
412
+ @sam.bedcov(:bed=>@bed_file, :out=>out_file)
413
+ f = File.open(out_file, "r")
414
+ f.each_line do |line|
415
+ f_array= line.split(/\t/)
416
+ assert_equal(f_array[3], 630)
417
+ end
418
+ f.close
419
+ end
420
+
421
+ end
data/test/test_vcf.rb ADDED
@@ -0,0 +1,79 @@
1
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
2
+ $: << File.expand_path('.')
3
+ require 'rubygems'
4
+ require 'bio/db/vcf'
5
+ require "test/unit"
6
+ gem 'test-unit'
7
+
8
+
9
+ class TestVcf < Test::Unit::TestCase
10
+
11
+ def setup
12
+ @vcf1 = Bio::DB::Vcf.new("19 111 . A C 9.6 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3",["a","b","c"]) #from a 3.3 vcf file
13
+ @vcf2 = Bio::DB::Vcf.new("20 14370 rs6054257 G A 29 0 NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:-1,-1") #from a 3.3 vcf file
14
+ @vcf3 = Bio::DB::Vcf.new("19 111 . A C 9.6 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3") #from a 4.0 vcf file
15
+ @vcf4 = Bio::DB::Vcf.new("20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,") #from a 4.0 vcf file
16
+ end
17
+
18
+
19
+ def test_parse
20
+ assert_equal("19", @vcf1.chrom)
21
+ assert_equal(111, @vcf1.pos)
22
+ assert_equal(nil, @vcf1.id)
23
+ assert_equal("A", @vcf1.ref)
24
+ assert_equal("C",@vcf1.alt)
25
+ assert_equal(9.6,@vcf1.qual)
26
+ assert_equal(nil, @vcf1.filter)
27
+ assert_equal(nil, @vcf1.info)
28
+ assert_equal({"a"=>{"GT"=>"0|0", "HQ"=>"10,10"},
29
+ "b"=>{"GT"=>"0|0", "HQ"=>"10,10"},
30
+ "c"=>{"GT"=>"0/1", "HQ"=>"3,3"}}, @vcf1.samples)
31
+
32
+ assert_equal("20", @vcf2.chrom)
33
+ assert_equal(14370, @vcf2.pos)
34
+ assert_equal('rs6054257', @vcf2.id)
35
+ assert_equal("G", @vcf2.ref)
36
+ assert_equal("A",@vcf2.alt)
37
+ assert_equal(29,@vcf2.qual)
38
+ assert_equal("0", @vcf2.filter)
39
+ assert_equal({"DP"=>"14", "AF"=>"0.5", "NS"=>"3", "DB"=>nil, "H2"=>nil}, @vcf2.info)
40
+ assert_equal({"1"=>{"DP"=>"1", "GT"=>"0|0", "HQ"=>"51,51", "GQ"=>"48"},
41
+ "2"=>{"DP"=>"8", "GT"=>"1|0", "HQ"=>"51,51", "GQ"=>"48"},
42
+ "3"=>{"DP"=>"5", "GT"=>"1/1", "HQ"=>"-1,-1", "GQ"=>"43"}}, @vcf2.samples)
43
+
44
+ assert_equal("19", @vcf3.chrom)
45
+ assert_equal(111, @vcf3.pos)
46
+ assert_equal(nil, @vcf3.id)
47
+ assert_equal("A", @vcf3.ref)
48
+ assert_equal("C",@vcf3.alt)
49
+ assert_equal(9.6,@vcf3.qual)
50
+ assert_equal(nil, @vcf3.filter)
51
+ assert_equal(nil, @vcf3.info)
52
+ assert_equal({"1"=>{"GT"=>"0|0", "HQ"=>"10,10"},
53
+ "2"=>{"GT"=>"0|0", "HQ"=>"10,10"},
54
+ "3"=>{"GT"=>"0/1", "HQ"=>"3,3"}}, @vcf3.samples)
55
+
56
+ assert_equal("20", @vcf4.chrom)
57
+ assert_equal(14370, @vcf4.pos)
58
+ assert_equal('rs6054257', @vcf4.id)
59
+ assert_equal("G", @vcf4.ref)
60
+ assert_equal("A",@vcf4.alt)
61
+ assert_equal(29,@vcf4.qual)
62
+ assert_equal("PASS", @vcf4.filter)
63
+ assert_equal({"DP"=>"14", "AF"=>"0.5", "NS"=>"3", "DB"=>nil, "H2"=>nil}, @vcf4.info)
64
+ assert_equal({"1"=>{"DP"=>"1", "GT"=>"0|0", "HQ"=>"51,51", "GQ"=>"48"},
65
+ "2"=>{"DP"=>"8", "GT"=>"1|0", "HQ"=>"51,51", "GQ"=>"48"},
66
+ "3"=>{"DP"=>"5", "GT"=>"1/1", "HQ"=>".,", "GQ"=>"43"}}, @vcf4.samples)
67
+ end
68
+
69
+ def test_int_or_raw
70
+ assert_nothing_raised do
71
+ @vcf1.int_or_raw(1)
72
+ end
73
+ assert_raise do
74
+ @vcf1.int_or_raw(A)
75
+ end
76
+ end
77
+
78
+
79
+ end