bio-pileup_iterator 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.travis.yml +12 -0
- data/Gemfile +18 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +39 -0
- data/Rakefile +45 -0
- data/VERSION +1 -0
- data/lib/bio-pileup_iterator.rb +6 -0
- data/lib/bio/db/pileup_iterator.rb +263 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-pileup_iterator.rb +212 -0
- metadata +160 -0
data/.document
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 1.9.2
|
4
|
+
- 1.9.3
|
5
|
+
- jruby-19mode # JRuby in 1.9 mode
|
6
|
+
- rbx-19mode
|
7
|
+
# - 1.8.7
|
8
|
+
# - jruby-18mode # JRuby in 1.8 mode
|
9
|
+
# - rbx-18mode
|
10
|
+
|
11
|
+
# uncomment this line if your project needs to run something other than `rake`:
|
12
|
+
# script: bundle exec rspec spec
|
data/Gemfile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
gem 'bio', '>=1.4.2'
|
6
|
+
gem 'bio-samtools', '>=0.5.3'
|
7
|
+
gem 'bio-logger', '>=1.0.0'
|
8
|
+
|
9
|
+
# Add dependencies to develop your gem here.
|
10
|
+
# Include everything needed to run rake, tests, features, etc.
|
11
|
+
group :development do
|
12
|
+
gem "shoulda", ">= 0"
|
13
|
+
gem "rdoc", "~> 3.12"
|
14
|
+
gem "bundler", ">= 1.0.0"
|
15
|
+
gem "jeweler", "~> 1.8.3"
|
16
|
+
gem "bio", ">= 1.4.2"
|
17
|
+
gem "rdoc", "~> 3.12"
|
18
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2012 Ben J. Woodcroft
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
= bio-pileup_iterator
|
2
|
+
|
3
|
+
Full description goes here
|
4
|
+
|
5
|
+
Note: this software is under active development!
|
6
|
+
|
7
|
+
== Installation
|
8
|
+
|
9
|
+
gem install bio-pileup_iterator
|
10
|
+
|
11
|
+
== Usage
|
12
|
+
|
13
|
+
== Developers
|
14
|
+
|
15
|
+
To use the library
|
16
|
+
|
17
|
+
require 'bio-pileup_iterator
|
18
|
+
|
19
|
+
The API doc is online. For more code examples see also the test files in
|
20
|
+
the source tree.
|
21
|
+
|
22
|
+
== Project home page
|
23
|
+
|
24
|
+
Information on the source tree, documentation, issues and how to contribute, see
|
25
|
+
|
26
|
+
http://github.com/wwood/bioruby-pileup_iterator
|
27
|
+
|
28
|
+
== Cite
|
29
|
+
|
30
|
+
If you use this software, please cite http://dx.doi.org/10.1093/bioinformatics/btq475
|
31
|
+
|
32
|
+
== Biogems.info
|
33
|
+
|
34
|
+
This Biogem is published at http://biogems.info/index.html#bio-pileup_iterator
|
35
|
+
|
36
|
+
== Copyright
|
37
|
+
|
38
|
+
Copyright (c) 2012 Ben J. Woodcroft. See LICENSE.txt for further details.
|
39
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "bio-pileup_iterator"
|
18
|
+
gem.homepage = "http://github.com/wwood/bioruby-pileup_iterator"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{Iterate through a samtools pileup file}
|
21
|
+
gem.description = %Q{Iterate through a samtools pileup file}
|
22
|
+
gem.email = "donttrustben near gmail.com"
|
23
|
+
gem.authors = ["Ben J. Woodcroft"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rake/testtask'
|
29
|
+
Rake::TestTask.new(:test) do |test|
|
30
|
+
test.libs << 'lib' << 'test'
|
31
|
+
test.pattern = 'test/**/test_*.rb'
|
32
|
+
test.verbose = true
|
33
|
+
end
|
34
|
+
|
35
|
+
task :default => :test
|
36
|
+
|
37
|
+
require 'rdoc/task'
|
38
|
+
Rake::RDocTask.new do |rdoc|
|
39
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
40
|
+
|
41
|
+
rdoc.rdoc_dir = 'rdoc'
|
42
|
+
rdoc.title = "bio-pileup_iterator #{version}"
|
43
|
+
rdoc.rdoc_files.include('README*')
|
44
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
45
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
@@ -0,0 +1,263 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
class Bio::DB::Pileup
|
4
|
+
# Bio::DB::PileupIterator::PileupRead objects that occur at this position
|
5
|
+
attr_accessor :reads
|
6
|
+
|
7
|
+
def num_deletions
|
8
|
+
return read_bases.gsub(/[^\*]/,'').length
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
class Bio::DB::PileupIterator
|
13
|
+
include Enumerable
|
14
|
+
|
15
|
+
def initialize(io)
|
16
|
+
@io = io
|
17
|
+
end
|
18
|
+
|
19
|
+
# Iterates through the positions of the a pileup, returning an instance of Bio::DB::Pileup complete with an instance variable @reads, an Array of Bio::DB::PileupRead objects.
|
20
|
+
#
|
21
|
+
# Known problems:
|
22
|
+
# * Doesn't record start or ends of each read
|
23
|
+
# * Doesn't lookahead to determine the sequence of each read (though it does give the preceding bases)
|
24
|
+
# * Gives no information with mismatches
|
25
|
+
def each
|
26
|
+
current_ordered_reads = []
|
27
|
+
log = Bio::Log::LoggerPlus['bio-pileup_iterator']
|
28
|
+
|
29
|
+
@io.each_line do |line|
|
30
|
+
#log.debug "new current_line: #{line.inspect}"
|
31
|
+
pileup = Bio::DB::Pileup.new(line.strip)
|
32
|
+
current_read_index = 0
|
33
|
+
reads_ending = []
|
34
|
+
|
35
|
+
bases = pileup.read_bases
|
36
|
+
#log.debug "new column's read_bases: #{bases.inspect}"
|
37
|
+
#log.debug "pileup entry parsed: #{pileup.inspect}"
|
38
|
+
while bases.length > 0
|
39
|
+
#log.debug "bases remaining: #{bases} ------------------------"
|
40
|
+
|
41
|
+
# Firstly, what is the current read we are working with
|
42
|
+
current_read = current_ordered_reads[current_read_index]
|
43
|
+
# if adding a new read
|
44
|
+
if current_read.nil?
|
45
|
+
#log.debug 'adding a new read'
|
46
|
+
current_read = PileupRead.new
|
47
|
+
current_ordered_reads.push current_read
|
48
|
+
else
|
49
|
+
#log.debug 'reusing a read'
|
50
|
+
end
|
51
|
+
matches = nil
|
52
|
+
|
53
|
+
# Now, parse what the current read is
|
54
|
+
if matches = bases.match(/^([ACGTNacgtn\.\,])([\+\-])([0-9]+)([ACGTNacgtn]+)(\${0,1})/)
|
55
|
+
#log.debug "matched #{matches.to_s} as insertion/deletion"
|
56
|
+
|
57
|
+
# match again to better match the number of inserted bases
|
58
|
+
num_inserted = matches[3].to_i
|
59
|
+
matches = bases.match(/^([ACGTNacgtn\.\,])([\+\-])([0-9]+)([ACGTNacgtn]{#{num_inserted}})(\${0,1})/)
|
60
|
+
raise unless matches
|
61
|
+
|
62
|
+
# insertion / deletion
|
63
|
+
if matches[1] == '.'
|
64
|
+
raise if !current_read.direction.nil? and current_read.direction != PileupRead::FORWARD_DIRECTION
|
65
|
+
current_read.direction = PileupRead::FORWARD_DIRECTION
|
66
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
67
|
+
elsif matches[1] == ','
|
68
|
+
raise if !current_read.direction.nil? and current_read.direction != PileupRead::REVERSE_DIRECTION
|
69
|
+
current_read.direction = PileupRead::REVERSE_DIRECTION
|
70
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
71
|
+
else
|
72
|
+
# Could sanity check the direction here by detecting case, but eh
|
73
|
+
current_read.sequence = "#{current_read.sequence}#{matches[1]}"
|
74
|
+
end
|
75
|
+
|
76
|
+
# record the insertion
|
77
|
+
if matches[2] == '+'
|
78
|
+
current_read.add_insertion pileup.pos, matches[3], matches[4]
|
79
|
+
end
|
80
|
+
|
81
|
+
if matches[5].length > 0
|
82
|
+
#log.debug "Ending this read"
|
83
|
+
# end this read
|
84
|
+
reads_ending.push current_read_index
|
85
|
+
end
|
86
|
+
# currently I don't care about indels, except for the direction, so I'll leave it at that for now
|
87
|
+
|
88
|
+
# end of the read
|
89
|
+
elsif matches = bases.match(/^([\.\,])\$/)
|
90
|
+
#log.debug "matched #{matches.to_s} as end of read"
|
91
|
+
# regular match in some direction, end of read
|
92
|
+
if matches[1]=='.' # if forwards
|
93
|
+
raise if current_read.direction and current_read.direction != PileupRead::FORWARD_DIRECTION
|
94
|
+
current_read.direction = PileupRead::FORWARD_DIRECTION
|
95
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
96
|
+
else # else must be backwards, since it can only be , or .
|
97
|
+
raise if current_read.direction and current_read.direction != PileupRead::REVERSE_DIRECTION
|
98
|
+
current_read.direction = PileupRead::REVERSE_DIRECTION
|
99
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
100
|
+
end
|
101
|
+
#log.debug "current read after deletion: #{current_read.inspect}"
|
102
|
+
reads_ending.push current_read_index
|
103
|
+
|
104
|
+
# regular match continuuing onwards
|
105
|
+
elsif matches = bases.match(/^\./)
|
106
|
+
#log.debug "matched #{matches.to_s} as forward regular match"
|
107
|
+
# regular match in the forward direction
|
108
|
+
raise if !current_read.direction.nil? and current_read.direction != PileupRead::FORWARD_DIRECTION
|
109
|
+
current_read.direction = PileupRead::FORWARD_DIRECTION
|
110
|
+
#log.debug "before adding this base, current sequence is '#{current_read.sequence}'"
|
111
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
112
|
+
#log.debug "after adding this base, current sequence is '#{current_read.sequence}', ref_base: #{pileup.ref_base}"
|
113
|
+
elsif matches = bases.match(/^\,/)
|
114
|
+
#log.debug "matched #{matches.to_s} as reverse regular match"
|
115
|
+
# regular match in the reverse direction
|
116
|
+
if !current_read.direction.nil? and current_read.direction != PileupRead::REVERSE_DIRECTION
|
117
|
+
error_msg = "Unexpectedly found read a #{current_read.direction} direction read when expecting a positive direction one. This suggests there is a problem with either the pileup file or this pileup parser. Current pileup column #{pileup.inspect}, read #{current_read.inspect}, chomped until #{bases}"
|
118
|
+
log.error error_msg
|
119
|
+
raise Exception, error_msg
|
120
|
+
end
|
121
|
+
current_read.direction = PileupRead::REVERSE_DIRECTION
|
122
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
123
|
+
|
124
|
+
# starting a new read (possibly with a gap), with an accompanying insertion/deletion
|
125
|
+
elsif matches = bases.match(/^\^.([ACGTNacgtn\.\,\*])([\+\-])([0-9]+)([ACGTNacgtn]+)(\${0,1})/)
|
126
|
+
if matches[1] == '.'
|
127
|
+
#log.debug 'forward match starting a read'
|
128
|
+
current_read.direction = PileupRead::FORWARD_DIRECTION
|
129
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
130
|
+
elsif matches[1] == ','
|
131
|
+
#log.debug 'reverse match starting a read'
|
132
|
+
current_read.direction = PileupRead::REVERSE_DIRECTION
|
133
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
134
|
+
elsif matches[1] == '*'
|
135
|
+
#log.debug 'starting a read with a gap'
|
136
|
+
# leave direction unknown at this point
|
137
|
+
current_read.sequence = "#{current_read.sequence}#{matches[1]}"
|
138
|
+
elsif matches[1] == matches[1].upcase
|
139
|
+
#log.debug 'forward match starting a read, warning of insertion next'
|
140
|
+
current_read.direction = PileupRead::FORWARD_DIRECTION
|
141
|
+
current_read.sequence = "#{current_read.sequence}#{matches[1]}"
|
142
|
+
else
|
143
|
+
#log.debug 'forward match starting a read, warning of insertion next'
|
144
|
+
current_read.direction = PileupRead::REVERSE_DIRECTION
|
145
|
+
current_read.sequence = "#{current_read.sequence}#{matches[1]}"
|
146
|
+
end
|
147
|
+
|
148
|
+
# record the insertion
|
149
|
+
if matches[2] == '+'
|
150
|
+
current_read.add_insertion pileup.pos, matches[3], matches[4]
|
151
|
+
end
|
152
|
+
|
153
|
+
if matches[5].length > 0
|
154
|
+
#log.debug "Ending this read"
|
155
|
+
# end this read
|
156
|
+
reads_ending.push current_read_index
|
157
|
+
end
|
158
|
+
|
159
|
+
|
160
|
+
# regular match, starting a new read
|
161
|
+
elsif matches = bases.match(/^\^.([ACGTNacgtn\.\,\*])(\${0,1})/)
|
162
|
+
if matches[1] == '.'
|
163
|
+
#log.debug 'forward match starting a read'
|
164
|
+
current_read.direction = PileupRead::FORWARD_DIRECTION
|
165
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
166
|
+
elsif matches[1] == ','
|
167
|
+
#log.debug 'reverse match starting a read'
|
168
|
+
current_read.direction = PileupRead::REVERSE_DIRECTION
|
169
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
170
|
+
elsif matches[1] == '*'
|
171
|
+
#log.debug 'gap starting a read'
|
172
|
+
current_read.sequence = "#{current_read.sequence}#{matches[1]}"
|
173
|
+
elsif matches[1] == matches[1].upcase
|
174
|
+
#log.debug 'forward match starting a read, warning of insertion next'
|
175
|
+
current_read.direction = PileupRead::FORWARD_DIRECTION
|
176
|
+
current_read.sequence = "#{current_read.sequence}#{matches[1]}"
|
177
|
+
else
|
178
|
+
#log.debug 'forward match starting a read, warning of insertion next'
|
179
|
+
current_read.direction = PileupRead::REVERSE_DIRECTION
|
180
|
+
current_read.sequence = "#{current_read.sequence}#{matches[1]}"
|
181
|
+
end
|
182
|
+
if matches[2].length > 0
|
183
|
+
#log.debug "Ending this read, even though it started here too.. it happens.."
|
184
|
+
# end this read
|
185
|
+
reads_ending.push current_read_index
|
186
|
+
end
|
187
|
+
|
188
|
+
|
189
|
+
elsif matches = bases.match(/^\*([\+\-])([0-9]+)([ACGTNacgtn=]+)(\${0,1})/)
|
190
|
+
#log.debug 'gap then insert/delete found'
|
191
|
+
# gap - should already be known from the last position
|
192
|
+
current_read.sequence = "#{current_read.sequence}*"
|
193
|
+
if matches[4].length > 0
|
194
|
+
#log.debug "Ending this read"
|
195
|
+
# end this read
|
196
|
+
reads_ending.push current_read_index
|
197
|
+
end
|
198
|
+
|
199
|
+
# record the insertion
|
200
|
+
if matches[1] == '+'
|
201
|
+
current_read.add_insertion pileup.pos, matches[2], matches[3]
|
202
|
+
end
|
203
|
+
|
204
|
+
elsif matches = bases.match(/(^[ACGTNacgtn\*])(\${0,1})/)
|
205
|
+
#log.debug 'mismatch found (or deletion)'
|
206
|
+
# simple mismatch
|
207
|
+
current_read.sequence = "#{current_read.sequence}#{matches[1]}"
|
208
|
+
if matches[2].length > 0
|
209
|
+
#log.debug "Ending this read"
|
210
|
+
reads_ending.push current_read_index
|
211
|
+
end
|
212
|
+
end
|
213
|
+
#log.debug "current read's sequence: #{current_read.sequence}"
|
214
|
+
|
215
|
+
#raise Exception, "implement mismatch parsing here!!!"
|
216
|
+
raise Exception, "Unexpected Pileup format bases, starting here: #{bases}, from #{pileup.inspect}" if matches.nil?
|
217
|
+
|
218
|
+
#remove the matched part from the base string for next time
|
219
|
+
bases = bases[matches.to_s.length..bases.length-1]
|
220
|
+
|
221
|
+
current_read_index += 1
|
222
|
+
end
|
223
|
+
|
224
|
+
# Create a new copy of the array and yield that, otherwise when things get deleted they get removed from the yielded array as well (which is unwanted)
|
225
|
+
yielded_array = Array.new(current_ordered_reads)
|
226
|
+
pileup.reads = yielded_array
|
227
|
+
#log.debug "Number of reads yielded: #{pileup.reads.length}"
|
228
|
+
yield pileup
|
229
|
+
|
230
|
+
# Remove reads that ended. In reverse order since removing the last ones first doesn't mess with the indices beforehand in the array
|
231
|
+
reads_ending.reverse.each do |i|
|
232
|
+
#log.debug "Deleting read of index #{i} (total reads #{current_ordered_reads.length}): #{current_ordered_reads[i].inspect}"
|
233
|
+
current_ordered_reads.delete_at i
|
234
|
+
end
|
235
|
+
#log.debug "Ended up with #{current_ordered_reads.length} reads that should be present next time"
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
class PileupRead
|
240
|
+
# Directions relative to reference
|
241
|
+
FORWARD_DIRECTION = '+'
|
242
|
+
REVERSE_DIRECTION = '-'
|
243
|
+
|
244
|
+
# sequence is always in the direction of the start of the reference to the end - only @direction gives direction information
|
245
|
+
attr_accessor :direction, :sequence
|
246
|
+
|
247
|
+
# A hash of recorded insertions. Key of hash is the position in the consensus that is has been added to in the alignment, value is the bases that have been inserted
|
248
|
+
attr_reader :insertions
|
249
|
+
|
250
|
+
def initialize
|
251
|
+
@sequence = ''
|
252
|
+
@insertions = {}
|
253
|
+
end
|
254
|
+
|
255
|
+
def read
|
256
|
+
@sequence[@sequence.length-2..@sequence.length-1]
|
257
|
+
end
|
258
|
+
|
259
|
+
def add_insertion(position, insertion_length, insertion_bases)
|
260
|
+
insertions[position] = insertion_bases
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
require 'shoulda'
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'bio-pileup_iterator'
|
16
|
+
|
17
|
+
class Test::Unit::TestCase
|
18
|
+
end
|
@@ -0,0 +1,212 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestBioPileupIterator < Test::Unit::TestCase
|
4
|
+
def test_pileup_parsing
|
5
|
+
line = "contig00001\t199\tA\t4\t.$...$\t>a^>"
|
6
|
+
#contig00001\t200\tT\t2\t..\taR"
|
7
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
8
|
+
pileup = piles[0]
|
9
|
+
reads = piles[0].reads
|
10
|
+
|
11
|
+
assert_equal 'A', reads[0].sequence
|
12
|
+
assert_equal 4, reads.length
|
13
|
+
assert_kind_of Bio::DB::Pileup, pileup
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_2_pileup_columns
|
17
|
+
line = "contig00001\t199\tA\t4\t.$...$\t>a^>\ncontig00001\t200\tT\t2\t..\taR"
|
18
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
19
|
+
|
20
|
+
pileup = piles[0]
|
21
|
+
reads = piles[0].reads
|
22
|
+
reads2 = piles[1].reads
|
23
|
+
|
24
|
+
assert_equal 'A', piles[0].ref_base
|
25
|
+
assert_equal 'T', piles[1].ref_base
|
26
|
+
assert_equal 4, reads.length
|
27
|
+
assert_equal 2, reads2.length
|
28
|
+
assert_equal 'AT', reads2[0].sequence
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_fwd_rev
|
32
|
+
line = "contig00001\t199\tA\t4\t.$,..$\t>a^>\ncontig00001\t200\tT\t2\t,.\taR"
|
33
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
34
|
+
|
35
|
+
pileup = piles[0]
|
36
|
+
reads = piles[0].reads
|
37
|
+
reads2 = piles[1].reads
|
38
|
+
|
39
|
+
assert_equal 4, reads.length
|
40
|
+
assert_equal 2, reads2.length
|
41
|
+
assert_equal 'AT', reads2[0].sequence
|
42
|
+
assert_equal '-', reads2[0].direction
|
43
|
+
assert_equal '+', reads2[1].direction
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_deletion
|
47
|
+
line = "contig00001\t199\tA\t4\t.-1T...$\t>a^>\ncontig00001\t200\tT\t2\t*..\taR"
|
48
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
49
|
+
|
50
|
+
pileup = piles[0]
|
51
|
+
reads = piles[0].reads
|
52
|
+
reads2 = piles[1].reads
|
53
|
+
|
54
|
+
assert_equal 'A*', reads[0].sequence
|
55
|
+
assert_equal Hash.new, reads[0].insertions
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_substitution
|
59
|
+
line = "contig00001\t199\tA\t4\t.G..$\t>a^>"
|
60
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
61
|
+
|
62
|
+
pileup = piles[0]
|
63
|
+
reads = piles[0].reads
|
64
|
+
|
65
|
+
assert_equal 'A', reads[0].sequence
|
66
|
+
assert_equal 'G', reads[1].sequence
|
67
|
+
assert_equal 'A', reads[0].sequence
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_substitution_with_insertion
|
71
|
+
line = "contig00001\t199\tA\t4\tG-1T..$.\t>a^>\ncontig00001\t200\tT\t2\t*..\taR"
|
72
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
73
|
+
|
74
|
+
pileup = piles[0]
|
75
|
+
reads = piles[0].reads
|
76
|
+
reads2 = piles[1].reads
|
77
|
+
|
78
|
+
assert_equal 2, piles.length
|
79
|
+
assert_equal 4, reads.length
|
80
|
+
assert_equal 3, reads2.length
|
81
|
+
assert_equal 'G*', reads[0].sequence
|
82
|
+
assert_equal 'AT', reads[1].sequence
|
83
|
+
assert_equal 'A', reads[2].sequence
|
84
|
+
assert_equal 'AT', reads[3].sequence
|
85
|
+
end
|
86
|
+
|
87
|
+
def test_start_read_warning_of_deletion_next
|
88
|
+
line = "contig00001\t8\tG\t4\t..,^],-1g\ta!U!\n"+
|
89
|
+
"contig00001\t9\tg\t4\t..,*\ta!aU"
|
90
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
91
|
+
|
92
|
+
pileup = piles[0]
|
93
|
+
reads = piles[0].reads
|
94
|
+
reads2 = piles[1].reads
|
95
|
+
end
|
96
|
+
|
97
|
+
def test_star_then_insert
|
98
|
+
line = "contig00001\t23\tC\t40\t.*+1G..\t~~~~\n"
|
99
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
100
|
+
|
101
|
+
pileup = piles[0]
|
102
|
+
reads = piles[0].reads
|
103
|
+
|
104
|
+
assert_equal 4, reads.length
|
105
|
+
assert_equal({}, reads[0].insertions)
|
106
|
+
assert_equal '*', reads[1].sequence
|
107
|
+
assert_equal({23 => 'G'}, reads[1].insertions)
|
108
|
+
end
|
109
|
+
|
110
|
+
def test_star_finishing_a_read
|
111
|
+
line = "contig00001\t717\tC\t47\t,$.$,$*$,$,$,$,$,$,$,$*$*$*$*$,$.$*$*$*$*$.$.$.$,$,$*$,$,$,$,$.$.$*$*$.$,$,$,$,$.$,$.$,$*$,$,$\t0..~2-.-.,#~~~~+,~~~~+**,!~!!!!!!~~(((((((((~!!\n"
|
112
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
113
|
+
assert_equal '*', piles[0].reads[3].sequence
|
114
|
+
end
|
115
|
+
|
116
|
+
def test_start_finishing_a_read
|
117
|
+
line = "contig00002\t1\tC\t47\t^],$\t~\n"
|
118
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
119
|
+
assert_equal 'C', piles[0].reads[0].sequence
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_start_with_a_gap
|
123
|
+
line = "contig00075\t503\tT\t24\t,^]*\tU\n"
|
124
|
+
piles = Bio::DB::PileupIterator.new(line)
|
125
|
+
# piles.log.level = Bio::Log::DEBUG
|
126
|
+
piles = piles.to_a
|
127
|
+
assert_equal 'T', piles[0].reads[0].sequence
|
128
|
+
assert_equal '*', piles[0].reads[1].sequence
|
129
|
+
end
|
130
|
+
|
131
|
+
def test_start_then_insert_then_end
|
132
|
+
line = "contig00075\t503\tG\t24\t^].+1T$^].\t~~\n"
|
133
|
+
piles = Bio::DB::PileupIterator.new(line)
|
134
|
+
# piles.log.level = Bio::Log::DEBUG
|
135
|
+
piles = piles.to_a
|
136
|
+
assert_equal 'G', piles[0].reads[0].sequence
|
137
|
+
assert_equal({503 => 'T'}, piles[0].reads[0].insertions)
|
138
|
+
assert_equal 'G', piles[0].reads[1].sequence
|
139
|
+
end
|
140
|
+
|
141
|
+
def test_star_then_insert2
|
142
|
+
line = "contig00075\t503\tG\t24\t,*+1g.\t~~\n"
|
143
|
+
piles = Bio::DB::PileupIterator.new(line)
|
144
|
+
# piles.log.level = Bio::Log::DEBUG
|
145
|
+
piles = piles.to_a
|
146
|
+
assert_equal 'G', piles[0].reads[0].sequence
|
147
|
+
assert_equal '*', piles[0].reads[1].sequence
|
148
|
+
assert_equal 'G', piles[0].reads[2].sequence
|
149
|
+
end
|
150
|
+
|
151
|
+
def test_start_with_gap_then_insertion
|
152
|
+
line = "contig00075\t503\tG\t24\t,,.^]*+1g\tE~\n"+
|
153
|
+
"contig00075\t504\tA\t24\t,,.,\tE~\n"
|
154
|
+
|
155
|
+
piles = Bio::DB::PileupIterator.new(line)
|
156
|
+
# piles.log.level = Bio::Log::DEBUG
|
157
|
+
piles = piles.to_a
|
158
|
+
assert_equal 'GA', piles[0].reads[0].sequence
|
159
|
+
assert_equal 'GA', piles[0].reads[1].sequence
|
160
|
+
assert_equal 'GA', piles[0].reads[2].sequence
|
161
|
+
assert_equal '*A', piles[0].reads[3].sequence
|
162
|
+
assert_equal({503 => 'g'}, piles[0].reads[3].insertions)
|
163
|
+
end
|
164
|
+
|
165
|
+
def test_double_insertion
|
166
|
+
line = "contig00075\t503\tG\t24\t*+1gg\tE\n"
|
167
|
+
|
168
|
+
piles = Bio::DB::PileupIterator.new(line)
|
169
|
+
# piles.log.level = Bio::Log::DEBUG
|
170
|
+
piles = piles.to_a
|
171
|
+
assert_equal({503 => 'gg'}, piles[0].reads[0].insertions)
|
172
|
+
end
|
173
|
+
|
174
|
+
def test_non_perfect_starting_read
|
175
|
+
line = "contig00075\t503\tG\t24\t^F.*+1gg\tE\n"
|
176
|
+
|
177
|
+
piles = Bio::DB::PileupIterator.new(line)
|
178
|
+
# piles.log.level = Bio::Log::DEBUG
|
179
|
+
piles = piles.to_a
|
180
|
+
assert_equal '+', piles[0].reads[0].direction
|
181
|
+
assert_equal 'G', piles[0].reads[0].sequence
|
182
|
+
assert_equal '*', piles[0].reads[1].sequence
|
183
|
+
end
|
184
|
+
|
185
|
+
def test_non_matching_finish
|
186
|
+
line = "contig00002\t6317\tC\t2\ta$.\t!B\n"+
|
187
|
+
"contig00002\t6318\tT\t1\t.\tA\n"
|
188
|
+
|
189
|
+
|
190
|
+
piles = Bio::DB::PileupIterator.new(line)
|
191
|
+
# piles.log.level = Bio::Log::DEBUG
|
192
|
+
piles = piles.to_a
|
193
|
+
assert_equal 2, piles[0].reads.length
|
194
|
+
assert_equal 'a', piles[0].reads[0].sequence
|
195
|
+
assert_equal 'CT', piles[0].reads[1].sequence
|
196
|
+
end
|
197
|
+
|
198
|
+
def test_insertion_then_mismatch
|
199
|
+
line = "contig00044\t867\tC\t6\t,,,,,.\t!:!!:=\n"+
|
200
|
+
"contig00044\t868\tG\t6\tt,+1ttt,.\t!A!!C9\n"
|
201
|
+
|
202
|
+
piles = Bio::DB::PileupIterator.new(line)
|
203
|
+
|
204
|
+
piles = piles.to_a
|
205
|
+
assert_equal 6, piles[0].reads.length
|
206
|
+
assert_equal 'Ct', piles[0].reads[0].sequence
|
207
|
+
assert_equal 'CG', piles[0].reads[1].sequence
|
208
|
+
hash = {868=>'t'}
|
209
|
+
assert_equal hash, piles[0].reads[1].insertions
|
210
|
+
assert_equal 'Ct', piles[0].reads[2].sequence
|
211
|
+
end
|
212
|
+
end
|
metadata
ADDED
@@ -0,0 +1,160 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bio-pileup_iterator
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Ben J. Woodcroft
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-05-17 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bio
|
16
|
+
requirement: &84151300 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.4.2
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *84151300
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: bio-samtools
|
27
|
+
requirement: &84150960 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 0.5.3
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *84150960
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: bio-logger
|
38
|
+
requirement: &84150510 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 1.0.0
|
44
|
+
type: :runtime
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *84150510
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: shoulda
|
49
|
+
requirement: &84150270 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *84150270
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: rdoc
|
60
|
+
requirement: &84166240 !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ~>
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '3.12'
|
66
|
+
type: :development
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *84166240
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: bundler
|
71
|
+
requirement: &84165520 !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: 1.0.0
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: *84165520
|
80
|
+
- !ruby/object:Gem::Dependency
|
81
|
+
name: jeweler
|
82
|
+
requirement: &84165000 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ~>
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: 1.8.3
|
88
|
+
type: :development
|
89
|
+
prerelease: false
|
90
|
+
version_requirements: *84165000
|
91
|
+
- !ruby/object:Gem::Dependency
|
92
|
+
name: bio
|
93
|
+
requirement: &84164380 !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ! '>='
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: 1.4.2
|
99
|
+
type: :development
|
100
|
+
prerelease: false
|
101
|
+
version_requirements: *84164380
|
102
|
+
- !ruby/object:Gem::Dependency
|
103
|
+
name: rdoc
|
104
|
+
requirement: &84163460 !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ~>
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '3.12'
|
110
|
+
type: :development
|
111
|
+
prerelease: false
|
112
|
+
version_requirements: *84163460
|
113
|
+
description: Iterate through a samtools pileup file
|
114
|
+
email: donttrustben near gmail.com
|
115
|
+
executables: []
|
116
|
+
extensions: []
|
117
|
+
extra_rdoc_files:
|
118
|
+
- LICENSE.txt
|
119
|
+
- README.rdoc
|
120
|
+
files:
|
121
|
+
- .document
|
122
|
+
- .travis.yml
|
123
|
+
- Gemfile
|
124
|
+
- LICENSE.txt
|
125
|
+
- README.rdoc
|
126
|
+
- Rakefile
|
127
|
+
- VERSION
|
128
|
+
- lib/bio-pileup_iterator.rb
|
129
|
+
- lib/bio/db/pileup_iterator.rb
|
130
|
+
- test/helper.rb
|
131
|
+
- test/test_bio-pileup_iterator.rb
|
132
|
+
homepage: http://github.com/wwood/bioruby-pileup_iterator
|
133
|
+
licenses:
|
134
|
+
- MIT
|
135
|
+
post_install_message:
|
136
|
+
rdoc_options: []
|
137
|
+
require_paths:
|
138
|
+
- lib
|
139
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
140
|
+
none: false
|
141
|
+
requirements:
|
142
|
+
- - ! '>='
|
143
|
+
- !ruby/object:Gem::Version
|
144
|
+
version: '0'
|
145
|
+
segments:
|
146
|
+
- 0
|
147
|
+
hash: -115500147
|
148
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
149
|
+
none: false
|
150
|
+
requirements:
|
151
|
+
- - ! '>='
|
152
|
+
- !ruby/object:Gem::Version
|
153
|
+
version: '0'
|
154
|
+
requirements: []
|
155
|
+
rubyforge_project:
|
156
|
+
rubygems_version: 1.8.17
|
157
|
+
signing_key:
|
158
|
+
specification_version: 3
|
159
|
+
summary: Iterate through a samtools pileup file
|
160
|
+
test_files: []
|