bio-pileup_iterator 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.travis.yml ADDED
@@ -0,0 +1,12 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - jruby-19mode # JRuby in 1.9 mode
6
+ - rbx-19mode
7
+ # - 1.8.7
8
+ # - jruby-18mode # JRuby in 1.8 mode
9
+ # - rbx-18mode
10
+
11
+ # uncomment this line if your project needs to run something other than `rake`:
12
+ # script: bundle exec rspec spec
data/Gemfile ADDED
@@ -0,0 +1,18 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+ gem 'bio', '>=1.4.2'
6
+ gem 'bio-samtools', '>=0.5.3'
7
+ gem 'bio-logger', '>=1.0.0'
8
+
9
+ # Add dependencies to develop your gem here.
10
+ # Include everything needed to run rake, tests, features, etc.
11
+ group :development do
12
+ gem "shoulda", ">= 0"
13
+ gem "rdoc", "~> 3.12"
14
+ gem "bundler", ">= 1.0.0"
15
+ gem "jeweler", "~> 1.8.3"
16
+ gem "bio", ">= 1.4.2"
17
+ gem "rdoc", "~> 3.12"
18
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Ben J. Woodcroft
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,39 @@
1
+ = bio-pileup_iterator
2
+
3
+ Full description goes here
4
+
5
+ Note: this software is under active development!
6
+
7
+ == Installation
8
+
9
+ gem install bio-pileup_iterator
10
+
11
+ == Usage
12
+
13
+ == Developers
14
+
15
+ To use the library
16
+
17
+ require 'bio-pileup_iterator
18
+
19
+ The API doc is online. For more code examples see also the test files in
20
+ the source tree.
21
+
22
+ == Project home page
23
+
24
+ Information on the source tree, documentation, issues and how to contribute, see
25
+
26
+ http://github.com/wwood/bioruby-pileup_iterator
27
+
28
+ == Cite
29
+
30
+ If you use this software, please cite http://dx.doi.org/10.1093/bioinformatics/btq475
31
+
32
+ == Biogems.info
33
+
34
+ This Biogem is published at http://biogems.info/index.html#bio-pileup_iterator
35
+
36
+ == Copyright
37
+
38
+ Copyright (c) 2012 Ben J. Woodcroft. See LICENSE.txt for further details.
39
+
data/Rakefile ADDED
@@ -0,0 +1,45 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "bio-pileup_iterator"
18
+ gem.homepage = "http://github.com/wwood/bioruby-pileup_iterator"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Iterate through a samtools pileup file}
21
+ gem.description = %Q{Iterate through a samtools pileup file}
22
+ gem.email = "donttrustben near gmail.com"
23
+ gem.authors = ["Ben J. Woodcroft"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ task :default => :test
36
+
37
+ require 'rdoc/task'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "bio-pileup_iterator #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,6 @@
1
+ require 'bio'
2
+ require 'bio-samtools'
3
+ require 'bio-logger'
4
+ Bio::Log::LoggerPlus.new('bio-pileup_iterator')
5
+
6
+ require 'bio/db/pileup_iterator'
@@ -0,0 +1,263 @@
1
+
2
+
3
+ class Bio::DB::Pileup
4
+ # Bio::DB::PileupIterator::PileupRead objects that occur at this position
5
+ attr_accessor :reads
6
+
7
+ def num_deletions
8
+ return read_bases.gsub(/[^\*]/,'').length
9
+ end
10
+ end
11
+
12
+ class Bio::DB::PileupIterator
13
+ include Enumerable
14
+
15
+ def initialize(io)
16
+ @io = io
17
+ end
18
+
19
+ # Iterates through the positions of the a pileup, returning an instance of Bio::DB::Pileup complete with an instance variable @reads, an Array of Bio::DB::PileupRead objects.
20
+ #
21
+ # Known problems:
22
+ # * Doesn't record start or ends of each read
23
+ # * Doesn't lookahead to determine the sequence of each read (though it does give the preceding bases)
24
+ # * Gives no information with mismatches
25
+ def each
26
+ current_ordered_reads = []
27
+ log = Bio::Log::LoggerPlus['bio-pileup_iterator']
28
+
29
+ @io.each_line do |line|
30
+ #log.debug "new current_line: #{line.inspect}"
31
+ pileup = Bio::DB::Pileup.new(line.strip)
32
+ current_read_index = 0
33
+ reads_ending = []
34
+
35
+ bases = pileup.read_bases
36
+ #log.debug "new column's read_bases: #{bases.inspect}"
37
+ #log.debug "pileup entry parsed: #{pileup.inspect}"
38
+ while bases.length > 0
39
+ #log.debug "bases remaining: #{bases} ------------------------"
40
+
41
+ # Firstly, what is the current read we are working with
42
+ current_read = current_ordered_reads[current_read_index]
43
+ # if adding a new read
44
+ if current_read.nil?
45
+ #log.debug 'adding a new read'
46
+ current_read = PileupRead.new
47
+ current_ordered_reads.push current_read
48
+ else
49
+ #log.debug 'reusing a read'
50
+ end
51
+ matches = nil
52
+
53
+ # Now, parse what the current read is
54
+ if matches = bases.match(/^([ACGTNacgtn\.\,])([\+\-])([0-9]+)([ACGTNacgtn]+)(\${0,1})/)
55
+ #log.debug "matched #{matches.to_s} as insertion/deletion"
56
+
57
+ # match again to better match the number of inserted bases
58
+ num_inserted = matches[3].to_i
59
+ matches = bases.match(/^([ACGTNacgtn\.\,])([\+\-])([0-9]+)([ACGTNacgtn]{#{num_inserted}})(\${0,1})/)
60
+ raise unless matches
61
+
62
+ # insertion / deletion
63
+ if matches[1] == '.'
64
+ raise if !current_read.direction.nil? and current_read.direction != PileupRead::FORWARD_DIRECTION
65
+ current_read.direction = PileupRead::FORWARD_DIRECTION
66
+ current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
67
+ elsif matches[1] == ','
68
+ raise if !current_read.direction.nil? and current_read.direction != PileupRead::REVERSE_DIRECTION
69
+ current_read.direction = PileupRead::REVERSE_DIRECTION
70
+ current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
71
+ else
72
+ # Could sanity check the direction here by detecting case, but eh
73
+ current_read.sequence = "#{current_read.sequence}#{matches[1]}"
74
+ end
75
+
76
+ # record the insertion
77
+ if matches[2] == '+'
78
+ current_read.add_insertion pileup.pos, matches[3], matches[4]
79
+ end
80
+
81
+ if matches[5].length > 0
82
+ #log.debug "Ending this read"
83
+ # end this read
84
+ reads_ending.push current_read_index
85
+ end
86
+ # currently I don't care about indels, except for the direction, so I'll leave it at that for now
87
+
88
+ # end of the read
89
+ elsif matches = bases.match(/^([\.\,])\$/)
90
+ #log.debug "matched #{matches.to_s} as end of read"
91
+ # regular match in some direction, end of read
92
+ if matches[1]=='.' # if forwards
93
+ raise if current_read.direction and current_read.direction != PileupRead::FORWARD_DIRECTION
94
+ current_read.direction = PileupRead::FORWARD_DIRECTION
95
+ current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
96
+ else # else must be backwards, since it can only be , or .
97
+ raise if current_read.direction and current_read.direction != PileupRead::REVERSE_DIRECTION
98
+ current_read.direction = PileupRead::REVERSE_DIRECTION
99
+ current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
100
+ end
101
+ #log.debug "current read after deletion: #{current_read.inspect}"
102
+ reads_ending.push current_read_index
103
+
104
+ # regular match continuuing onwards
105
+ elsif matches = bases.match(/^\./)
106
+ #log.debug "matched #{matches.to_s} as forward regular match"
107
+ # regular match in the forward direction
108
+ raise if !current_read.direction.nil? and current_read.direction != PileupRead::FORWARD_DIRECTION
109
+ current_read.direction = PileupRead::FORWARD_DIRECTION
110
+ #log.debug "before adding this base, current sequence is '#{current_read.sequence}'"
111
+ current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
112
+ #log.debug "after adding this base, current sequence is '#{current_read.sequence}', ref_base: #{pileup.ref_base}"
113
+ elsif matches = bases.match(/^\,/)
114
+ #log.debug "matched #{matches.to_s} as reverse regular match"
115
+ # regular match in the reverse direction
116
+ if !current_read.direction.nil? and current_read.direction != PileupRead::REVERSE_DIRECTION
117
+ error_msg = "Unexpectedly found read a #{current_read.direction} direction read when expecting a positive direction one. This suggests there is a problem with either the pileup file or this pileup parser. Current pileup column #{pileup.inspect}, read #{current_read.inspect}, chomped until #{bases}"
118
+ log.error error_msg
119
+ raise Exception, error_msg
120
+ end
121
+ current_read.direction = PileupRead::REVERSE_DIRECTION
122
+ current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
123
+
124
+ # starting a new read (possibly with a gap), with an accompanying insertion/deletion
125
+ elsif matches = bases.match(/^\^.([ACGTNacgtn\.\,\*])([\+\-])([0-9]+)([ACGTNacgtn]+)(\${0,1})/)
126
+ if matches[1] == '.'
127
+ #log.debug 'forward match starting a read'
128
+ current_read.direction = PileupRead::FORWARD_DIRECTION
129
+ current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
130
+ elsif matches[1] == ','
131
+ #log.debug 'reverse match starting a read'
132
+ current_read.direction = PileupRead::REVERSE_DIRECTION
133
+ current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
134
+ elsif matches[1] == '*'
135
+ #log.debug 'starting a read with a gap'
136
+ # leave direction unknown at this point
137
+ current_read.sequence = "#{current_read.sequence}#{matches[1]}"
138
+ elsif matches[1] == matches[1].upcase
139
+ #log.debug 'forward match starting a read, warning of insertion next'
140
+ current_read.direction = PileupRead::FORWARD_DIRECTION
141
+ current_read.sequence = "#{current_read.sequence}#{matches[1]}"
142
+ else
143
+ #log.debug 'forward match starting a read, warning of insertion next'
144
+ current_read.direction = PileupRead::REVERSE_DIRECTION
145
+ current_read.sequence = "#{current_read.sequence}#{matches[1]}"
146
+ end
147
+
148
+ # record the insertion
149
+ if matches[2] == '+'
150
+ current_read.add_insertion pileup.pos, matches[3], matches[4]
151
+ end
152
+
153
+ if matches[5].length > 0
154
+ #log.debug "Ending this read"
155
+ # end this read
156
+ reads_ending.push current_read_index
157
+ end
158
+
159
+
160
+ # regular match, starting a new read
161
+ elsif matches = bases.match(/^\^.([ACGTNacgtn\.\,\*])(\${0,1})/)
162
+ if matches[1] == '.'
163
+ #log.debug 'forward match starting a read'
164
+ current_read.direction = PileupRead::FORWARD_DIRECTION
165
+ current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
166
+ elsif matches[1] == ','
167
+ #log.debug 'reverse match starting a read'
168
+ current_read.direction = PileupRead::REVERSE_DIRECTION
169
+ current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
170
+ elsif matches[1] == '*'
171
+ #log.debug 'gap starting a read'
172
+ current_read.sequence = "#{current_read.sequence}#{matches[1]}"
173
+ elsif matches[1] == matches[1].upcase
174
+ #log.debug 'forward match starting a read, warning of insertion next'
175
+ current_read.direction = PileupRead::FORWARD_DIRECTION
176
+ current_read.sequence = "#{current_read.sequence}#{matches[1]}"
177
+ else
178
+ #log.debug 'forward match starting a read, warning of insertion next'
179
+ current_read.direction = PileupRead::REVERSE_DIRECTION
180
+ current_read.sequence = "#{current_read.sequence}#{matches[1]}"
181
+ end
182
+ if matches[2].length > 0
183
+ #log.debug "Ending this read, even though it started here too.. it happens.."
184
+ # end this read
185
+ reads_ending.push current_read_index
186
+ end
187
+
188
+
189
+ elsif matches = bases.match(/^\*([\+\-])([0-9]+)([ACGTNacgtn=]+)(\${0,1})/)
190
+ #log.debug 'gap then insert/delete found'
191
+ # gap - should already be known from the last position
192
+ current_read.sequence = "#{current_read.sequence}*"
193
+ if matches[4].length > 0
194
+ #log.debug "Ending this read"
195
+ # end this read
196
+ reads_ending.push current_read_index
197
+ end
198
+
199
+ # record the insertion
200
+ if matches[1] == '+'
201
+ current_read.add_insertion pileup.pos, matches[2], matches[3]
202
+ end
203
+
204
+ elsif matches = bases.match(/(^[ACGTNacgtn\*])(\${0,1})/)
205
+ #log.debug 'mismatch found (or deletion)'
206
+ # simple mismatch
207
+ current_read.sequence = "#{current_read.sequence}#{matches[1]}"
208
+ if matches[2].length > 0
209
+ #log.debug "Ending this read"
210
+ reads_ending.push current_read_index
211
+ end
212
+ end
213
+ #log.debug "current read's sequence: #{current_read.sequence}"
214
+
215
+ #raise Exception, "implement mismatch parsing here!!!"
216
+ raise Exception, "Unexpected Pileup format bases, starting here: #{bases}, from #{pileup.inspect}" if matches.nil?
217
+
218
+ #remove the matched part from the base string for next time
219
+ bases = bases[matches.to_s.length..bases.length-1]
220
+
221
+ current_read_index += 1
222
+ end
223
+
224
+ # Create a new copy of the array and yield that, otherwise when things get deleted they get removed from the yielded array as well (which is unwanted)
225
+ yielded_array = Array.new(current_ordered_reads)
226
+ pileup.reads = yielded_array
227
+ #log.debug "Number of reads yielded: #{pileup.reads.length}"
228
+ yield pileup
229
+
230
+ # Remove reads that ended. In reverse order since removing the last ones first doesn't mess with the indices beforehand in the array
231
+ reads_ending.reverse.each do |i|
232
+ #log.debug "Deleting read of index #{i} (total reads #{current_ordered_reads.length}): #{current_ordered_reads[i].inspect}"
233
+ current_ordered_reads.delete_at i
234
+ end
235
+ #log.debug "Ended up with #{current_ordered_reads.length} reads that should be present next time"
236
+ end
237
+ end
238
+
239
+ class PileupRead
240
+ # Directions relative to reference
241
+ FORWARD_DIRECTION = '+'
242
+ REVERSE_DIRECTION = '-'
243
+
244
+ # sequence is always in the direction of the start of the reference to the end - only @direction gives direction information
245
+ attr_accessor :direction, :sequence
246
+
247
+ # A hash of recorded insertions. Key of hash is the position in the consensus that is has been added to in the alignment, value is the bases that have been inserted
248
+ attr_reader :insertions
249
+
250
+ def initialize
251
+ @sequence = ''
252
+ @insertions = {}
253
+ end
254
+
255
+ def read
256
+ @sequence[@sequence.length-2..@sequence.length-1]
257
+ end
258
+
259
+ def add_insertion(position, insertion_length, insertion_bases)
260
+ insertions[position] = insertion_bases
261
+ end
262
+ end
263
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-pileup_iterator'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,212 @@
1
+ require 'helper'
2
+
3
+ class TestBioPileupIterator < Test::Unit::TestCase
4
+ def test_pileup_parsing
5
+ line = "contig00001\t199\tA\t4\t.$...$\t>a^>"
6
+ #contig00001\t200\tT\t2\t..\taR"
7
+ piles = Bio::DB::PileupIterator.new(line).to_a
8
+ pileup = piles[0]
9
+ reads = piles[0].reads
10
+
11
+ assert_equal 'A', reads[0].sequence
12
+ assert_equal 4, reads.length
13
+ assert_kind_of Bio::DB::Pileup, pileup
14
+ end
15
+
16
+ def test_2_pileup_columns
17
+ line = "contig00001\t199\tA\t4\t.$...$\t>a^>\ncontig00001\t200\tT\t2\t..\taR"
18
+ piles = Bio::DB::PileupIterator.new(line).to_a
19
+
20
+ pileup = piles[0]
21
+ reads = piles[0].reads
22
+ reads2 = piles[1].reads
23
+
24
+ assert_equal 'A', piles[0].ref_base
25
+ assert_equal 'T', piles[1].ref_base
26
+ assert_equal 4, reads.length
27
+ assert_equal 2, reads2.length
28
+ assert_equal 'AT', reads2[0].sequence
29
+ end
30
+
31
+ def test_fwd_rev
32
+ line = "contig00001\t199\tA\t4\t.$,..$\t>a^>\ncontig00001\t200\tT\t2\t,.\taR"
33
+ piles = Bio::DB::PileupIterator.new(line).to_a
34
+
35
+ pileup = piles[0]
36
+ reads = piles[0].reads
37
+ reads2 = piles[1].reads
38
+
39
+ assert_equal 4, reads.length
40
+ assert_equal 2, reads2.length
41
+ assert_equal 'AT', reads2[0].sequence
42
+ assert_equal '-', reads2[0].direction
43
+ assert_equal '+', reads2[1].direction
44
+ end
45
+
46
+ def test_deletion
47
+ line = "contig00001\t199\tA\t4\t.-1T...$\t>a^>\ncontig00001\t200\tT\t2\t*..\taR"
48
+ piles = Bio::DB::PileupIterator.new(line).to_a
49
+
50
+ pileup = piles[0]
51
+ reads = piles[0].reads
52
+ reads2 = piles[1].reads
53
+
54
+ assert_equal 'A*', reads[0].sequence
55
+ assert_equal Hash.new, reads[0].insertions
56
+ end
57
+
58
+ def test_substitution
59
+ line = "contig00001\t199\tA\t4\t.G..$\t>a^>"
60
+ piles = Bio::DB::PileupIterator.new(line).to_a
61
+
62
+ pileup = piles[0]
63
+ reads = piles[0].reads
64
+
65
+ assert_equal 'A', reads[0].sequence
66
+ assert_equal 'G', reads[1].sequence
67
+ assert_equal 'A', reads[0].sequence
68
+ end
69
+
70
+ def test_substitution_with_insertion
71
+ line = "contig00001\t199\tA\t4\tG-1T..$.\t>a^>\ncontig00001\t200\tT\t2\t*..\taR"
72
+ piles = Bio::DB::PileupIterator.new(line).to_a
73
+
74
+ pileup = piles[0]
75
+ reads = piles[0].reads
76
+ reads2 = piles[1].reads
77
+
78
+ assert_equal 2, piles.length
79
+ assert_equal 4, reads.length
80
+ assert_equal 3, reads2.length
81
+ assert_equal 'G*', reads[0].sequence
82
+ assert_equal 'AT', reads[1].sequence
83
+ assert_equal 'A', reads[2].sequence
84
+ assert_equal 'AT', reads[3].sequence
85
+ end
86
+
87
+ def test_start_read_warning_of_deletion_next
88
+ line = "contig00001\t8\tG\t4\t..,^],-1g\ta!U!\n"+
89
+ "contig00001\t9\tg\t4\t..,*\ta!aU"
90
+ piles = Bio::DB::PileupIterator.new(line).to_a
91
+
92
+ pileup = piles[0]
93
+ reads = piles[0].reads
94
+ reads2 = piles[1].reads
95
+ end
96
+
97
+ def test_star_then_insert
98
+ line = "contig00001\t23\tC\t40\t.*+1G..\t~~~~\n"
99
+ piles = Bio::DB::PileupIterator.new(line).to_a
100
+
101
+ pileup = piles[0]
102
+ reads = piles[0].reads
103
+
104
+ assert_equal 4, reads.length
105
+ assert_equal({}, reads[0].insertions)
106
+ assert_equal '*', reads[1].sequence
107
+ assert_equal({23 => 'G'}, reads[1].insertions)
108
+ end
109
+
110
+ def test_star_finishing_a_read
111
+ line = "contig00001\t717\tC\t47\t,$.$,$*$,$,$,$,$,$,$,$*$*$*$*$,$.$*$*$*$*$.$.$.$,$,$*$,$,$,$,$.$.$*$*$.$,$,$,$,$.$,$.$,$*$,$,$\t0..~2-.-.,#~~~~+,~~~~+**,!~!!!!!!~~(((((((((~!!\n"
112
+ piles = Bio::DB::PileupIterator.new(line).to_a
113
+ assert_equal '*', piles[0].reads[3].sequence
114
+ end
115
+
116
+ def test_start_finishing_a_read
117
+ line = "contig00002\t1\tC\t47\t^],$\t~\n"
118
+ piles = Bio::DB::PileupIterator.new(line).to_a
119
+ assert_equal 'C', piles[0].reads[0].sequence
120
+ end
121
+
122
+ def test_start_with_a_gap
123
+ line = "contig00075\t503\tT\t24\t,^]*\tU\n"
124
+ piles = Bio::DB::PileupIterator.new(line)
125
+ # piles.log.level = Bio::Log::DEBUG
126
+ piles = piles.to_a
127
+ assert_equal 'T', piles[0].reads[0].sequence
128
+ assert_equal '*', piles[0].reads[1].sequence
129
+ end
130
+
131
+ def test_start_then_insert_then_end
132
+ line = "contig00075\t503\tG\t24\t^].+1T$^].\t~~\n"
133
+ piles = Bio::DB::PileupIterator.new(line)
134
+ # piles.log.level = Bio::Log::DEBUG
135
+ piles = piles.to_a
136
+ assert_equal 'G', piles[0].reads[0].sequence
137
+ assert_equal({503 => 'T'}, piles[0].reads[0].insertions)
138
+ assert_equal 'G', piles[0].reads[1].sequence
139
+ end
140
+
141
+ def test_star_then_insert2
142
+ line = "contig00075\t503\tG\t24\t,*+1g.\t~~\n"
143
+ piles = Bio::DB::PileupIterator.new(line)
144
+ # piles.log.level = Bio::Log::DEBUG
145
+ piles = piles.to_a
146
+ assert_equal 'G', piles[0].reads[0].sequence
147
+ assert_equal '*', piles[0].reads[1].sequence
148
+ assert_equal 'G', piles[0].reads[2].sequence
149
+ end
150
+
151
+ def test_start_with_gap_then_insertion
152
+ line = "contig00075\t503\tG\t24\t,,.^]*+1g\tE~\n"+
153
+ "contig00075\t504\tA\t24\t,,.,\tE~\n"
154
+
155
+ piles = Bio::DB::PileupIterator.new(line)
156
+ # piles.log.level = Bio::Log::DEBUG
157
+ piles = piles.to_a
158
+ assert_equal 'GA', piles[0].reads[0].sequence
159
+ assert_equal 'GA', piles[0].reads[1].sequence
160
+ assert_equal 'GA', piles[0].reads[2].sequence
161
+ assert_equal '*A', piles[0].reads[3].sequence
162
+ assert_equal({503 => 'g'}, piles[0].reads[3].insertions)
163
+ end
164
+
165
+ def test_double_insertion
166
+ line = "contig00075\t503\tG\t24\t*+1gg\tE\n"
167
+
168
+ piles = Bio::DB::PileupIterator.new(line)
169
+ # piles.log.level = Bio::Log::DEBUG
170
+ piles = piles.to_a
171
+ assert_equal({503 => 'gg'}, piles[0].reads[0].insertions)
172
+ end
173
+
174
+ def test_non_perfect_starting_read
175
+ line = "contig00075\t503\tG\t24\t^F.*+1gg\tE\n"
176
+
177
+ piles = Bio::DB::PileupIterator.new(line)
178
+ # piles.log.level = Bio::Log::DEBUG
179
+ piles = piles.to_a
180
+ assert_equal '+', piles[0].reads[0].direction
181
+ assert_equal 'G', piles[0].reads[0].sequence
182
+ assert_equal '*', piles[0].reads[1].sequence
183
+ end
184
+
185
+ def test_non_matching_finish
186
+ line = "contig00002\t6317\tC\t2\ta$.\t!B\n"+
187
+ "contig00002\t6318\tT\t1\t.\tA\n"
188
+
189
+
190
+ piles = Bio::DB::PileupIterator.new(line)
191
+ # piles.log.level = Bio::Log::DEBUG
192
+ piles = piles.to_a
193
+ assert_equal 2, piles[0].reads.length
194
+ assert_equal 'a', piles[0].reads[0].sequence
195
+ assert_equal 'CT', piles[0].reads[1].sequence
196
+ end
197
+
198
+ def test_insertion_then_mismatch
199
+ line = "contig00044\t867\tC\t6\t,,,,,.\t!:!!:=\n"+
200
+ "contig00044\t868\tG\t6\tt,+1ttt,.\t!A!!C9\n"
201
+
202
+ piles = Bio::DB::PileupIterator.new(line)
203
+
204
+ piles = piles.to_a
205
+ assert_equal 6, piles[0].reads.length
206
+ assert_equal 'Ct', piles[0].reads[0].sequence
207
+ assert_equal 'CG', piles[0].reads[1].sequence
208
+ hash = {868=>'t'}
209
+ assert_equal hash, piles[0].reads[1].insertions
210
+ assert_equal 'Ct', piles[0].reads[2].sequence
211
+ end
212
+ end
metadata ADDED
@@ -0,0 +1,160 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-pileup_iterator
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ben J. Woodcroft
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-05-17 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bio
16
+ requirement: &84151300 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 1.4.2
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *84151300
25
+ - !ruby/object:Gem::Dependency
26
+ name: bio-samtools
27
+ requirement: &84150960 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: 0.5.3
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *84150960
36
+ - !ruby/object:Gem::Dependency
37
+ name: bio-logger
38
+ requirement: &84150510 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: 1.0.0
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *84150510
47
+ - !ruby/object:Gem::Dependency
48
+ name: shoulda
49
+ requirement: &84150270 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *84150270
58
+ - !ruby/object:Gem::Dependency
59
+ name: rdoc
60
+ requirement: &84166240 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ~>
64
+ - !ruby/object:Gem::Version
65
+ version: '3.12'
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *84166240
69
+ - !ruby/object:Gem::Dependency
70
+ name: bundler
71
+ requirement: &84165520 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: 1.0.0
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: *84165520
80
+ - !ruby/object:Gem::Dependency
81
+ name: jeweler
82
+ requirement: &84165000 !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ~>
86
+ - !ruby/object:Gem::Version
87
+ version: 1.8.3
88
+ type: :development
89
+ prerelease: false
90
+ version_requirements: *84165000
91
+ - !ruby/object:Gem::Dependency
92
+ name: bio
93
+ requirement: &84164380 !ruby/object:Gem::Requirement
94
+ none: false
95
+ requirements:
96
+ - - ! '>='
97
+ - !ruby/object:Gem::Version
98
+ version: 1.4.2
99
+ type: :development
100
+ prerelease: false
101
+ version_requirements: *84164380
102
+ - !ruby/object:Gem::Dependency
103
+ name: rdoc
104
+ requirement: &84163460 !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ~>
108
+ - !ruby/object:Gem::Version
109
+ version: '3.12'
110
+ type: :development
111
+ prerelease: false
112
+ version_requirements: *84163460
113
+ description: Iterate through a samtools pileup file
114
+ email: donttrustben near gmail.com
115
+ executables: []
116
+ extensions: []
117
+ extra_rdoc_files:
118
+ - LICENSE.txt
119
+ - README.rdoc
120
+ files:
121
+ - .document
122
+ - .travis.yml
123
+ - Gemfile
124
+ - LICENSE.txt
125
+ - README.rdoc
126
+ - Rakefile
127
+ - VERSION
128
+ - lib/bio-pileup_iterator.rb
129
+ - lib/bio/db/pileup_iterator.rb
130
+ - test/helper.rb
131
+ - test/test_bio-pileup_iterator.rb
132
+ homepage: http://github.com/wwood/bioruby-pileup_iterator
133
+ licenses:
134
+ - MIT
135
+ post_install_message:
136
+ rdoc_options: []
137
+ require_paths:
138
+ - lib
139
+ required_ruby_version: !ruby/object:Gem::Requirement
140
+ none: false
141
+ requirements:
142
+ - - ! '>='
143
+ - !ruby/object:Gem::Version
144
+ version: '0'
145
+ segments:
146
+ - 0
147
+ hash: -115500147
148
+ required_rubygems_version: !ruby/object:Gem::Requirement
149
+ none: false
150
+ requirements:
151
+ - - ! '>='
152
+ - !ruby/object:Gem::Version
153
+ version: '0'
154
+ requirements: []
155
+ rubyforge_project:
156
+ rubygems_version: 1.8.17
157
+ signing_key:
158
+ specification_version: 3
159
+ summary: Iterate through a samtools pileup file
160
+ test_files: []