bio-samtools-wrapper 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.travis.yml +27 -0
  4. data/Gemfile +20 -0
  5. data/LICENSE.txt +702 -0
  6. data/README.md +501 -0
  7. data/Rakefile +73 -0
  8. data/VERSION +1 -0
  9. data/bin/bam_consensus.rb +85 -0
  10. data/bio-samtools-wrapper.gemspec +181 -0
  11. data/doc/Bio/DB/Alignment.html +552 -0
  12. data/doc/Bio/DB/Pileup.html +711 -0
  13. data/doc/Bio/DB/SAM/Library.html +167 -0
  14. data/doc/Bio/DB/SAM/Tools.html +109 -0
  15. data/doc/Bio/DB/SAM.html +1853 -0
  16. data/doc/Bio/DB/Tag.html +208 -0
  17. data/doc/Bio/DB/Vcf.html +431 -0
  18. data/doc/Bio/DB.html +105 -0
  19. data/doc/Bio.html +175 -0
  20. data/doc/LICENSE_txt.html +846 -0
  21. data/doc/created.rid +9 -0
  22. data/doc/fonts/Lato-Light.ttf +0 -0
  23. data/doc/fonts/Lato-LightItalic.ttf +0 -0
  24. data/doc/fonts/Lato-Regular.ttf +0 -0
  25. data/doc/fonts/Lato-RegularItalic.ttf +0 -0
  26. data/doc/fonts/SourceCodePro-Bold.ttf +0 -0
  27. data/doc/fonts/SourceCodePro-Regular.ttf +0 -0
  28. data/doc/fonts.css +167 -0
  29. data/doc/images/add.png +0 -0
  30. data/doc/images/arrow_up.png +0 -0
  31. data/doc/images/brick.png +0 -0
  32. data/doc/images/brick_link.png +0 -0
  33. data/doc/images/bug.png +0 -0
  34. data/doc/images/bullet_black.png +0 -0
  35. data/doc/images/bullet_toggle_minus.png +0 -0
  36. data/doc/images/bullet_toggle_plus.png +0 -0
  37. data/doc/images/date.png +0 -0
  38. data/doc/images/delete.png +0 -0
  39. data/doc/images/find.png +0 -0
  40. data/doc/images/loadingAnimation.gif +0 -0
  41. data/doc/images/macFFBgHack.png +0 -0
  42. data/doc/images/package.png +0 -0
  43. data/doc/images/page_green.png +0 -0
  44. data/doc/images/page_white_text.png +0 -0
  45. data/doc/images/page_white_width.png +0 -0
  46. data/doc/images/plugin.png +0 -0
  47. data/doc/images/ruby.png +0 -0
  48. data/doc/images/tag_blue.png +0 -0
  49. data/doc/images/tag_green.png +0 -0
  50. data/doc/images/transparent.png +0 -0
  51. data/doc/images/wrench.png +0 -0
  52. data/doc/images/wrench_orange.png +0 -0
  53. data/doc/images/zoom.png +0 -0
  54. data/doc/index.html +106 -0
  55. data/doc/js/darkfish.js +140 -0
  56. data/doc/js/jquery.js +18 -0
  57. data/doc/js/navigation.js +142 -0
  58. data/doc/js/search.js +109 -0
  59. data/doc/js/search_index.js +1 -0
  60. data/doc/js/searcher.js +228 -0
  61. data/doc/rdoc.css +580 -0
  62. data/doc/table_of_contents.html +305 -0
  63. data/ext/Makefile-bioruby.patch +12 -0
  64. data/ext/Makefile-suse.patch +11 -0
  65. data/ext/mkrf_conf.rb +118 -0
  66. data/lib/bio/BIOExtensions.rb +89 -0
  67. data/lib/bio/db/alignment.rb +64 -0
  68. data/lib/bio/db/fastadb.rb +320 -0
  69. data/lib/bio/db/pileup.rb +273 -0
  70. data/lib/bio/db/sam/external/COPYING +21 -0
  71. data/lib/bio/db/sam/external/VERSION +1 -0
  72. data/lib/bio/db/sam/library.rb +32 -0
  73. data/lib/bio/db/sam.rb +778 -0
  74. data/lib/bio/db/vcf.rb +105 -0
  75. data/lib/bio-samtools-wrapper.rb +9 -0
  76. data/test/.gitignore +1 -0
  77. data/test/helper.rb +18 -0
  78. data/test/sample.vcf +24 -0
  79. data/test/samples/.gitignore +1 -0
  80. data/test/samples/LCI/NC_001988.ffn +2 -0
  81. data/test/samples/LCI/test.bam +0 -0
  82. data/test/samples/LCI/test.bam.bai +0 -0
  83. data/test/samples/small/dupes.bam +0 -0
  84. data/test/samples/small/dupes.sam +274 -0
  85. data/test/samples/small/ids2.txt +1 -0
  86. data/test/samples/small/map_for_reheader.sam +8 -0
  87. data/test/samples/small/map_to_merge1.bam +0 -0
  88. data/test/samples/small/map_to_merge1.bam.bai +0 -0
  89. data/test/samples/small/map_to_merge1.sam +8 -0
  90. data/test/samples/small/map_to_merge2.bam +0 -0
  91. data/test/samples/small/map_to_merge2.bam.bai +0 -0
  92. data/test/samples/small/map_to_merge2.sam +8 -0
  93. data/test/samples/small/no_md.sam +8 -0
  94. data/test/samples/small/sorted.bam +0 -0
  95. data/test/samples/small/sorted.bam.bai +0 -0
  96. data/test/samples/small/test.sai +0 -0
  97. data/test/samples/small/test.tam +10 -0
  98. data/test/samples/small/test_chr.fasta +1000 -0
  99. data/test/samples/small/test_chr.fasta.1.bt2 +0 -0
  100. data/test/samples/small/test_chr.fasta.2.bt2 +0 -0
  101. data/test/samples/small/test_chr.fasta.3.bt2 +0 -0
  102. data/test/samples/small/test_chr.fasta.4.bt2 +0 -0
  103. data/test/samples/small/test_chr.fasta.amb +2 -0
  104. data/test/samples/small/test_chr.fasta.ann +3 -0
  105. data/test/samples/small/test_chr.fasta.bwt +0 -0
  106. data/test/samples/small/test_chr.fasta.pac +0 -0
  107. data/test/samples/small/test_chr.fasta.rbwt +0 -0
  108. data/test/samples/small/test_chr.fasta.rev.1.bt2 +0 -0
  109. data/test/samples/small/test_chr.fasta.rev.2.bt2 +0 -0
  110. data/test/samples/small/test_chr.fasta.rpac +0 -0
  111. data/test/samples/small/test_chr.fasta.rsa +0 -0
  112. data/test/samples/small/test_chr.fasta.sa +0 -0
  113. data/test/samples/small/test_cov.svg +273 -0
  114. data/test/samples/small/test_fastadb.fasta +34 -0
  115. data/test/samples/small/testu.bam +0 -0
  116. data/test/samples/small/testu.bed +2 -0
  117. data/test/test_bio-samtools-wrapper.rb +1 -0
  118. data/test/test_fastadb.rb +89 -0
  119. data/test/test_pileup.rb +90 -0
  120. data/test/test_sam.rb +421 -0
  121. data/test/test_vcf.rb +79 -0
  122. data/tutorial/tutorial.html +474 -0
  123. data/tutorial/tutorial.md +424 -0
  124. data/tutorial/tutorial.pdf +0 -0
  125. metadata +254 -0
@@ -0,0 +1,90 @@
1
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
2
+ $: << File.expand_path('.')
3
+ require 'rubygems'
4
+ require 'bio/db/pileup'
5
+ require "test/unit"
6
+ gem 'test-unit'
7
+
8
+
9
+ class TestPileup < Test::Unit::TestCase
10
+
11
+ def setup
12
+ @pu = Bio::DB::Pileup.new("seq1 279 C 23 A..T,,.,.,...,,,.,..... ;75&<<<<<<<<<=<<<9<<:<<")
13
+ #a snp...
14
+ @pu2 = Bio::DB::Pileup.new("seq1 279 C 23 ATTT,,.,.TTTT,,,.,TTTTT ;75&<<<<<<<<<=<<<9<<:<<")
15
+ #an indel..
16
+ @pu3 = Bio::DB::Pileup.new("seq2 156 * +AG/+AG 71 252 99 11 +AG * 3 8 0")
17
+ #two heterozygous alt snps
18
+ @pu4 = Bio::DB::Pileup.new("seq1 279 C 24 AAAAAAAAATTTTTTTTTAATTAA ;75&<<<<<<<<<=<<<9<<:<<<")
19
+ end
20
+
21
+ def test_non_ref_count
22
+ assert_equal(2, @pu.non_ref_count)
23
+ end
24
+
25
+ def test_ref_count
26
+ assert_equal(21, @pu.ref_count)
27
+ end
28
+
29
+ def test_consensus
30
+ assert_equal('C', @pu.consensus)
31
+ assert_equal('T', @pu2.consensus)
32
+ end
33
+
34
+ def test_non_refs
35
+ assert_equal(1, @pu.non_refs[:T])
36
+ assert_equal(1, @pu.non_refs[:A])
37
+ assert_equal(0, @pu.non_refs[:G])
38
+ assert_equal(0, @pu.non_refs[:C])
39
+ end
40
+
41
+
42
+ def test_to_vcf
43
+ @vcf = Bio::DB::Vcf.new(@pu.to_vcf)
44
+ assert_equal('seq1', @vcf.chrom)
45
+ end
46
+
47
+
48
+ def test_indel_gt
49
+ indel = @pu3.send(:indel_gt)
50
+ assert_equal('IAG', indel[0])
51
+ assert_equal('1/1', indel[1])
52
+ end
53
+
54
+ def test_snp_gt
55
+ snp = @pu2.send(:snp_gt)
56
+ assert_equal('T,', snp[0])
57
+ assert_equal('1/1', snp[1])
58
+ end
59
+
60
+ def test_genotype_list
61
+ gt2 = @pu2.genotype_list
62
+ gt3 = @pu3.genotype_list
63
+ assert_equal('T,', gt2[0])
64
+ assert_equal('1/1', gt2[1])
65
+ assert_equal('IAG', gt3[0])
66
+ assert_equal('1/1', gt3[1])
67
+ end
68
+
69
+ def test_iupac_to_base
70
+ iupac = Bio::DB::Pileup.iupac_to_base('R')
71
+ iupac.each do |pu|
72
+ assert_send([['A' , 'G'], :member?, pu])
73
+ end
74
+ end
75
+
76
+ def test_parse_indel
77
+ assert_equal('IAG/+AG', @pu3.parse_indel(@pu3.consensus))
78
+ end
79
+
80
+ def test_to_s
81
+ #check whether there are the correct number of tabs in the string (number of columns -1)
82
+ assert_equal(12, @pu3.to_s.count("\t"))
83
+ assert_equal(5, @pu.to_s.count("\t"))
84
+ end
85
+
86
+ def test_consensus_iuap
87
+ assert_equal('w', @pu4.consensus_iuap(0.1))
88
+ end
89
+
90
+ end
data/test/test_sam.rb ADDED
@@ -0,0 +1,421 @@
1
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
2
+ $: << File.expand_path('.')
3
+ require 'rubygems'
4
+ require 'bio/db/sam'
5
+ require "test/unit"
6
+ #gem 'ruby-prof'
7
+ gem 'test-unit'
8
+ #require "ruby-prof"
9
+
10
+
11
+ class TestBioDbSam < Test::Unit::TestCase
12
+ # include RubyProf::Test
13
+
14
+ class << self
15
+
16
+ def shutdown
17
+ File.delete("test/samples/small/different_index.bam.bai")
18
+ File.delete("test/samples/small/dupes_rmdup.bam")
19
+ File.delete("test/samples/small/mates_fixed.bam")
20
+ File.delete("test/samples/small/reheader.bam")
21
+ File.delete("test/samples/small/test_chr.fasta.fai")
22
+ File.delete("test/samples/small/test_sorted.bam")
23
+ File.delete("test/samples/small/maps_merged.bam")
24
+ File.delete("test/samples/small/maps_cated.bam")
25
+ File.delete("test/samples/small/testu.out")
26
+ end
27
+ end
28
+
29
+
30
+ def setup
31
+ @test_folder = "test/samples/small"
32
+ @testTAMFile = @test_folder + "/test.tam"
33
+ @testBAMFile = @test_folder + "/testu.bam"
34
+ @testLCI = "test/samples/LCI/test.bam"
35
+ @testLCIref = "test/samples/LCI/NC_001988.ffn"
36
+ @testReference = @test_folder + "/test_chr.fasta"
37
+ @bed_file = @test_folder + "/testu.bed"
38
+ @sam = Bio::DB::Sam.new(
39
+ :fasta => @testReference,
40
+ :bam => @testBAMFile
41
+ )
42
+ end
43
+
44
+
45
+
46
+ def test_new
47
+ assert_kind_of(Bio::DB::Sam, @sam)
48
+
49
+ assert_raise(IOError) do
50
+ Bio::DB::Sam.new(
51
+ :fasta => @testReference,
52
+ :bam => @testBAMFile + "ads"
53
+ )
54
+
55
+ end
56
+ assert_raise(ArgumentError) do
57
+ Bio::DB::Sam.new()
58
+ end
59
+
60
+ end
61
+
62
+ def test_index
63
+ test_bai_file = @testBAMFile+".bai"
64
+ #test to see if the index file exists. If so, delete it
65
+ if File.exist?(test_bai_file) == true
66
+ puts "bam index exists....deleting..."
67
+ File.delete(test_bai_file)
68
+ end
69
+
70
+ #No bam file
71
+ assert_equal(@sam.indexed?, false)
72
+ #index the bam file
73
+ @sam.index()
74
+ assert_equal(@sam.indexed?, true)
75
+ #make sure the .bai file exists
76
+ assert_nothing_thrown do
77
+ File.open(test_bai_file, "r")
78
+ end
79
+ assert(File.size(test_bai_file) > 0, "From test_index: .bai file is empty")
80
+ #as above, but give the output a different name
81
+ test_bai_file = @test_folder+"/different_index.bam.bai"
82
+ @sam.index(:out_index=> test_bai_file)
83
+ assert_nothing_thrown do
84
+ File.open(test_bai_file, "r")
85
+ end
86
+ assert(File.size(test_bai_file) > 0, "From test_index: .bai file is empty")
87
+ end
88
+
89
+ def test_view
90
+ #how to get Bio::DB::Alignment objects ..
91
+ @sam.view() do |sam|
92
+ #test that all the objects are Bio::DB::Alignment objects and their reference is 'chr_1'
93
+ assert_equal(sam.class, Bio::DB::Alignment)
94
+ assert_equal(sam.rname, "chr_1")
95
+ end
96
+ end
97
+
98
+ def test_fetch
99
+ #puts @sam.inspect
100
+ i = 0
101
+ @sam.index
102
+ @sam.fetch("chr_1", 10,1000) do |sam|
103
+ #test that all the objects are Bio::DB::Alignment objects
104
+ assert_equal(sam.class, Bio::DB::Alignment)
105
+ assert_equal(sam.rname, "chr_1")
106
+ i += 1
107
+ end
108
+ assert(i>0)
109
+ assert_equal(i,9)
110
+
111
+ bam=Bio::DB::Sam.new(:bam=>@testLCI,:fasta=>@testLCIref)
112
+ bam.open
113
+ count = 0
114
+ bam.fetch("NC_001988.2",0,200) do|x|
115
+ count += 1
116
+ end
117
+ assert_equal(count, 36)
118
+ count = 0
119
+ bam.fetch("NC_001988.2",75, 75) do|x|
120
+ #puts "#{x.pos} #{x.seq}"
121
+ count += 1
122
+ end
123
+ assert_equal(count, 7)
124
+
125
+ end
126
+
127
+ def test_fetch_with_function
128
+ #pass the assert to method
129
+ count = 0
130
+ block = Proc.new do |a|
131
+ assert_equal(a.class, Bio::DB::Alignment)
132
+ count += 1
133
+ end
134
+
135
+ @sam.fetch_with_function("chr_1", 10, 1000, &block)
136
+ assert_equal(count, 9)
137
+
138
+ count = 0
139
+ @sam.fetch_with_function("chr_1", 82, 140, &block)
140
+ assert_equal(count, 4)
141
+
142
+ @sam.fetch_with_function("chr_1", 0, 140, &block)
143
+ assert_equal(count, 8)
144
+ count2 = 0
145
+ @sam.fetch("chr_1",0,200) {|x| count2 += 1}
146
+ assert_equal(count2, 6)
147
+
148
+ end
149
+
150
+ def test_chromosome_coverage
151
+ #the coverage should only be 1.0 or 2.0
152
+ cov = @sam.chromosome_coverage("chr_1", 10, 1000)
153
+ cov.each do |pu|
154
+ assert_send([[1.0 , 2.0, 3.0], :member?, pu])
155
+ end
156
+ end
157
+
158
+ def test_average_coverage
159
+ #there should be 10 positions with cov of 1.0 and 10 with cov of 2.0, so average of 1.5
160
+ test_bai_file = @testBAMFile+".bai"
161
+ if File.exist?(test_bai_file) == false
162
+ @sam.index()
163
+ end
164
+ avcov = @sam.average_coverage("chr_1", 33, 19)
165
+ assert_equal(avcov, 1.5)
166
+ File.delete(test_bai_file)
167
+ end
168
+
169
+ def test_faidx
170
+ @sam.faidx()
171
+ test_fai_file = @testReference+".fai"
172
+ #test that the .fai file exists
173
+ assert_nothing_thrown do
174
+ File.open(test_fai_file, "r")
175
+ end
176
+ #test that the file is not empty
177
+ assert(File.size(test_fai_file) > 0, "From test_faidx: .fai file is empty")
178
+ end
179
+
180
+ def test_index_stats
181
+ #puts "Stats: #{@sam.index_stats.inspect}"
182
+ @sam.index_stats.each_pair do |seq, stat|
183
+ assert_send([['chr_1' , '*'], :member?, seq])
184
+ end
185
+ assert_equal(@sam.index_stats['chr_1'][:length], 69930)
186
+ assert_equal(@sam.index_stats['chr_1'][:mapped_reads], 9)
187
+ assert_equal(@sam.index_stats['chr_1'][:unmapped_reads], 0)
188
+ assert_equal(@sam.index_stats['*'][:length], 0)
189
+ assert_equal(@sam.index_stats['*'][:mapped_reads], 0)
190
+ assert_equal(@sam.index_stats['*'][:unmapped_reads], 0)
191
+ assert_equal(@sam.index_stats.size, 2)
192
+
193
+ end
194
+
195
+ def test_fetch_reference
196
+ #this is the first 70 nucleotides of the test seqeunce
197
+ seq_expected = "CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTA"
198
+ #fetch the first 70 nuclotides
199
+ seq_fetched = @sam.fetch_reference("chr_1", 1, 70, :as_bio => false)
200
+ #test they're the same
201
+ assert_equal(seq_fetched, seq_expected)
202
+ end
203
+
204
+ def test_sort
205
+ #sort the bam file
206
+ sortedsam = @test_folder + "/test_sorted.bam"
207
+ @sam.sort(:prefix=>@test_folder + "/test_sorted")
208
+ #create a new Bio::DB::Sam from the sorted bam
209
+ @sortsam = Bio::DB::Sam.new(
210
+ :fasta => @testReference,
211
+ :bam => sortedsam
212
+ )
213
+ pos = 0
214
+ #iterate over the sorted sam file and make sure that the it's sorted by checking the order of the start positions for each read.
215
+ @sortsam.view()do |sam|
216
+ assert(sam.pos > pos, "Not sorted by position")
217
+ pos = sam.pos
218
+ end
219
+ end
220
+
221
+ def test_reheader
222
+ sam_header = @test_folder + "/map_for_reheader.sam"
223
+ outfile = @test_folder + "/reheader.bam"
224
+
225
+ @sam.reheader(sam_header, :out=>outfile)
226
+ reheader_bam = Bio::DB::Sam.new(:fasta => @testReference, :bam => outfile)
227
+ #check that the reference is 'chr_2'
228
+ reheader_bam.view()do |sam|
229
+ assert_equal(sam.rname, "chr_2")
230
+ end
231
+ end
232
+
233
+ def test_calmd
234
+ no_md_sam = @test_folder + "/no_md.sam"
235
+ md = Bio::DB::Sam.new(:fasta => @testReference, :bam => no_md_sam)
236
+ block = Proc.new {|a| assert(a.tags.has_key?('MD'), "From test_calmd: couldn't find the MD tag")}
237
+ md.calmd(:S=>true, &block)
238
+
239
+ end
240
+
241
+ def test_mpileup
242
+ #create an mpileup
243
+ # @sam.index
244
+ @sam.mpileup(:g => false) do |pileup|
245
+ #test that all the objects are Bio::DB::Pileup objects
246
+ assert_kind_of(Bio::DB::Pileup, pileup)
247
+ #test that the reference name is 'chr_1' for all objects
248
+ assert_equal(pileup.ref_name, 'chr_1')
249
+ end
250
+ #do the same for Vcf output
251
+ @sam.mpileup(:u => true) do |pileup|
252
+ assert_kind_of(Bio::DB::Vcf, pileup)
253
+ assert_equal(pileup.chrom, 'chr_1')
254
+ end
255
+ end
256
+
257
+ def test_region_new
258
+ reg1 = Bio::DB::Fasta::Region.new(:entry=>"chr_1", :start=>1, :end=>334)
259
+ reg2 = Bio::DB::Fasta::Region.new
260
+ reg2.entry = "chr_1"
261
+ reg2.start = 1
262
+ reg2.end = 334
263
+
264
+ assert_equal(reg1.entry, reg2.entry)
265
+ assert_equal(reg1.start, reg2.start)
266
+ assert_equal(reg1.end, reg2.end)
267
+ end
268
+
269
+ def test_mpileup_reg
270
+ #create an mpileup
271
+ reg = Bio::DB::Fasta::Region.new(:entry=>"chr_1", :start=>1, :end=>334)
272
+
273
+ @sam.mpileup_cached(:r=>reg,:g => false, :min_cov => 1, :min_per =>0.2) do |pileup|
274
+ #test that all the objects are Bio::DB::Pileup objects
275
+ assert_kind_of(Bio::DB::Pileup, pileup)
276
+ #test that the reference name is 'chr_1' for all objects
277
+ #puts pileup
278
+ assert_equal(pileup.ref_name, 'chr_1')
279
+ end
280
+
281
+ region = @sam.cached_regions[reg.to_s]
282
+ #puts "cahced_region: #{region.inspect}"
283
+ #puts "AVG COV: #{region.average_coverage}"
284
+ #puts "Reference: #{region.reference}"
285
+ # puts "Consensus: #{region.consensus}"
286
+ # puts "called: #{region.called}"
287
+ #, :snps, :reference, :base_ratios, :consensus, :coverages
288
+ snps_tot = Bio::Sequence.snps_between(region.reference, region.consensus)
289
+ assert_equal(snps_tot, 5)
290
+ assert_equal(region.called, 220)
291
+ end
292
+
293
+ def test_mpileup_reg_05
294
+ #create an mpileup
295
+ reg = Bio::DB::Fasta::Region.new(:entry=>"chr_1", :start=>1, :end=>334)
296
+ @sam.mpileup_cached(:r=>reg, :g => false, :min_cov => 1, :min_per =>0.4) do |pileup|
297
+ #test that all the objects are Bio::DB::Pileup objects
298
+ assert_kind_of(Bio::DB::Pileup, pileup)
299
+ #test that the reference name is 'chr_1' for all objects
300
+ #puts pileup
301
+ assert_equal(pileup.ref_name, 'chr_1')
302
+
303
+ end
304
+
305
+ region = @sam.cached_regions[reg.to_s]
306
+
307
+ #, :snps, :reference, :base_ratios, :consensus, :coverages
308
+ snps_tot = Bio::Sequence.snps_between(region.reference, region.consensus)
309
+ assert_equal(snps_tot, 1)
310
+ assert_equal(region.called, 220)
311
+ end
312
+
313
+ def test_depth
314
+ #the depth of coverage should be '1' at all given positions
315
+ @sam.depth(:r=>"chr_1:25-42") do |al|
316
+ assert_equal(al[2].to_i, 1)
317
+ end
318
+ end
319
+
320
+ def test_fixmate
321
+ mates_fixed_bam = @test_folder + "/mates_fixed.bam"
322
+ @sam.fix_mates(:out_bam=>mates_fixed_bam)
323
+ assert_nothing_thrown do
324
+ File.open(mates_fixed_bam, "r")
325
+ end
326
+ assert(File.size(mates_fixed_bam) > 0, "From test_fixmate: .bam file is empty")
327
+ end
328
+
329
+ def test_flagstats
330
+ #get the stats
331
+ stats = @sam.flag_stats()
332
+ #the number of reads mapped will be the first character on the first line.
333
+ no_reads_mapped = stats[0][0].to_i
334
+ #check that it's '9'
335
+ assert_equal(no_reads_mapped, 9)
336
+ end
337
+
338
+ def test_merge
339
+ bam1 = @test_folder + "/map_to_merge1.bam"
340
+ bam2 = @test_folder + "/map_to_merge2.bam"
341
+ bam_to_merge1 = Bio::DB::Sam.new(:fasta => @testReference, :bam => bam1)
342
+ bam_to_merge2 = Bio::DB::Sam.new(:fasta => @testReference, :bam => bam2)
343
+ bam_files = [bam_to_merge1, bam_to_merge2]
344
+
345
+ merged_bam_file = @test_folder + "/maps_merged.bam"
346
+ File.delete merged_bam_file if File.exist?(merged_bam_file)
347
+ # File.delete("test/samples/small/maps_merged.bam")
348
+ @sam.merge(:out=>merged_bam_file, :bams=>bam_files, :n=>true)
349
+ merged_bam = Bio::DB::Sam.new(:fasta => @testReference, :bam => merged_bam_file)
350
+ no_reads_mapped = 0;
351
+
352
+ merged_bam.view() do |al|
353
+ assert_kind_of(Bio::DB::Alignment, al)
354
+ no_reads_mapped+=1
355
+ end
356
+ assert_equal(no_reads_mapped, 10)
357
+ end
358
+
359
+ def test_cat
360
+ #same files used for merge, but we'll cat them instead
361
+ bam1 = @test_folder + "/map_to_merge1.bam"
362
+ bam2 = @test_folder + "/map_to_merge2.bam"
363
+
364
+ bam_files = [bam1, bam2]
365
+
366
+ cat_bam_file = @test_folder + "/maps_cated.bam"
367
+ File.delete cat_bam_file if File.exist?(cat_bam_file)
368
+ @sam.merge(:out=>cat_bam_file, :bams=>bam_files)
369
+ cated_bam = Bio::DB::Sam.new(:fasta => @testReference, :bam => cat_bam_file)
370
+
371
+ no_reads_mapped = 0;
372
+ cated_bam.view() do |al|
373
+ assert_kind_of(Bio::DB::Alignment, al)
374
+ no_reads_mapped+=1
375
+ end
376
+ #there should be 10 reads in the cat'd maps
377
+ assert_equal(no_reads_mapped, 10)
378
+ end
379
+
380
+ def test_rmdup
381
+ #dupes contains 4 reads mapped once and one read mapped to the same place 268 times.
382
+ dupes = @test_folder + "/dupes.bam"
383
+ unduped = @test_folder + "/dupes_rmdup.bam"
384
+ bam_with_dupes = Bio::DB::Sam.new(:fasta => @testReference, :bam => dupes)
385
+ bam_with_dupes.remove_duplicates(:s=>true, :out=>unduped)
386
+
387
+ unduped_bam = Bio::DB::Sam.new(:fasta => @testReference, :bam => unduped)
388
+ #rmdup should remove 267 of the 268 reads mapping to the same place, so producing a bam file with 5 reads
389
+ readcount = 0
390
+ unduped_bam.view()do |sam|
391
+ readcount +=1
392
+ end
393
+ assert_equal(readcount, 5)
394
+ end
395
+
396
+ def test_targetcut
397
+ sorted_bam = @test_folder + "/sorted.bam"
398
+ cut = Bio::DB::Sam.new(:fasta => @testReference, :bam => sorted_bam)
399
+ assert_nothing_thrown do
400
+ cut.targetcut
401
+ end
402
+ end
403
+
404
+ def test_docs
405
+ #force an error (use 'samtool' instead of 'samtools')
406
+ output = Bio::DB::Sam.docs('samtool', 'tview')
407
+ assert_equal(output, "program must be 'samtools' or 'bcftools'")
408
+ end
409
+
410
+ def test_bedcov
411
+ out_file = @test_folder + "/testu.out"
412
+ @sam.bedcov(:bed=>@bed_file, :out=>out_file)
413
+ f = File.open(out_file, "r")
414
+ f.each_line do |line|
415
+ f_array= line.split(/\t/)
416
+ assert_equal(f_array[3], 630)
417
+ end
418
+ f.close
419
+ end
420
+
421
+ end
data/test/test_vcf.rb ADDED
@@ -0,0 +1,79 @@
1
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
2
+ $: << File.expand_path('.')
3
+ require 'rubygems'
4
+ require 'bio/db/vcf'
5
+ require "test/unit"
6
+ gem 'test-unit'
7
+
8
+
9
+ class TestVcf < Test::Unit::TestCase
10
+
11
+ def setup
12
+ @vcf1 = Bio::DB::Vcf.new("19 111 . A C 9.6 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3",["a","b","c"]) #from a 3.3 vcf file
13
+ @vcf2 = Bio::DB::Vcf.new("20 14370 rs6054257 G A 29 0 NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:-1,-1") #from a 3.3 vcf file
14
+ @vcf3 = Bio::DB::Vcf.new("19 111 . A C 9.6 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3") #from a 4.0 vcf file
15
+ @vcf4 = Bio::DB::Vcf.new("20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,") #from a 4.0 vcf file
16
+ end
17
+
18
+
19
+ def test_parse
20
+ assert_equal("19", @vcf1.chrom)
21
+ assert_equal(111, @vcf1.pos)
22
+ assert_equal(nil, @vcf1.id)
23
+ assert_equal("A", @vcf1.ref)
24
+ assert_equal("C",@vcf1.alt)
25
+ assert_equal(9.6,@vcf1.qual)
26
+ assert_equal(nil, @vcf1.filter)
27
+ assert_equal(nil, @vcf1.info)
28
+ assert_equal({"a"=>{"GT"=>"0|0", "HQ"=>"10,10"},
29
+ "b"=>{"GT"=>"0|0", "HQ"=>"10,10"},
30
+ "c"=>{"GT"=>"0/1", "HQ"=>"3,3"}}, @vcf1.samples)
31
+
32
+ assert_equal("20", @vcf2.chrom)
33
+ assert_equal(14370, @vcf2.pos)
34
+ assert_equal('rs6054257', @vcf2.id)
35
+ assert_equal("G", @vcf2.ref)
36
+ assert_equal("A",@vcf2.alt)
37
+ assert_equal(29,@vcf2.qual)
38
+ assert_equal("0", @vcf2.filter)
39
+ assert_equal({"DP"=>"14", "AF"=>"0.5", "NS"=>"3", "DB"=>nil, "H2"=>nil}, @vcf2.info)
40
+ assert_equal({"1"=>{"DP"=>"1", "GT"=>"0|0", "HQ"=>"51,51", "GQ"=>"48"},
41
+ "2"=>{"DP"=>"8", "GT"=>"1|0", "HQ"=>"51,51", "GQ"=>"48"},
42
+ "3"=>{"DP"=>"5", "GT"=>"1/1", "HQ"=>"-1,-1", "GQ"=>"43"}}, @vcf2.samples)
43
+
44
+ assert_equal("19", @vcf3.chrom)
45
+ assert_equal(111, @vcf3.pos)
46
+ assert_equal(nil, @vcf3.id)
47
+ assert_equal("A", @vcf3.ref)
48
+ assert_equal("C",@vcf3.alt)
49
+ assert_equal(9.6,@vcf3.qual)
50
+ assert_equal(nil, @vcf3.filter)
51
+ assert_equal(nil, @vcf3.info)
52
+ assert_equal({"1"=>{"GT"=>"0|0", "HQ"=>"10,10"},
53
+ "2"=>{"GT"=>"0|0", "HQ"=>"10,10"},
54
+ "3"=>{"GT"=>"0/1", "HQ"=>"3,3"}}, @vcf3.samples)
55
+
56
+ assert_equal("20", @vcf4.chrom)
57
+ assert_equal(14370, @vcf4.pos)
58
+ assert_equal('rs6054257', @vcf4.id)
59
+ assert_equal("G", @vcf4.ref)
60
+ assert_equal("A",@vcf4.alt)
61
+ assert_equal(29,@vcf4.qual)
62
+ assert_equal("PASS", @vcf4.filter)
63
+ assert_equal({"DP"=>"14", "AF"=>"0.5", "NS"=>"3", "DB"=>nil, "H2"=>nil}, @vcf4.info)
64
+ assert_equal({"1"=>{"DP"=>"1", "GT"=>"0|0", "HQ"=>"51,51", "GQ"=>"48"},
65
+ "2"=>{"DP"=>"8", "GT"=>"1|0", "HQ"=>"51,51", "GQ"=>"48"},
66
+ "3"=>{"DP"=>"5", "GT"=>"1/1", "HQ"=>".,", "GQ"=>"43"}}, @vcf4.samples)
67
+ end
68
+
69
+ def test_int_or_raw
70
+ assert_nothing_raised do
71
+ @vcf1.int_or_raw(1)
72
+ end
73
+ assert_raise do
74
+ @vcf1.int_or_raw(A)
75
+ end
76
+ end
77
+
78
+
79
+ end