bio-pileup_iterator 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.travis.yml +12 -0
- data/Gemfile +18 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +39 -0
- data/Rakefile +45 -0
- data/VERSION +1 -0
- data/lib/bio-pileup_iterator.rb +6 -0
- data/lib/bio/db/pileup_iterator.rb +263 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-pileup_iterator.rb +212 -0
- metadata +160 -0
data/.document
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 1.9.2
|
4
|
+
- 1.9.3
|
5
|
+
- jruby-19mode # JRuby in 1.9 mode
|
6
|
+
- rbx-19mode
|
7
|
+
# - 1.8.7
|
8
|
+
# - jruby-18mode # JRuby in 1.8 mode
|
9
|
+
# - rbx-18mode
|
10
|
+
|
11
|
+
# uncomment this line if your project needs to run something other than `rake`:
|
12
|
+
# script: bundle exec rspec spec
|
data/Gemfile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
gem 'bio', '>=1.4.2'
|
6
|
+
gem 'bio-samtools', '>=0.5.3'
|
7
|
+
gem 'bio-logger', '>=1.0.0'
|
8
|
+
|
9
|
+
# Add dependencies to develop your gem here.
|
10
|
+
# Include everything needed to run rake, tests, features, etc.
|
11
|
+
group :development do
|
12
|
+
gem "shoulda", ">= 0"
|
13
|
+
gem "rdoc", "~> 3.12"
|
14
|
+
gem "bundler", ">= 1.0.0"
|
15
|
+
gem "jeweler", "~> 1.8.3"
|
16
|
+
gem "bio", ">= 1.4.2"
|
17
|
+
gem "rdoc", "~> 3.12"
|
18
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2012 Ben J. Woodcroft
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
= bio-pileup_iterator
|
2
|
+
|
3
|
+
Full description goes here
|
4
|
+
|
5
|
+
Note: this software is under active development!
|
6
|
+
|
7
|
+
== Installation
|
8
|
+
|
9
|
+
gem install bio-pileup_iterator
|
10
|
+
|
11
|
+
== Usage
|
12
|
+
|
13
|
+
== Developers
|
14
|
+
|
15
|
+
To use the library
|
16
|
+
|
17
|
+
require 'bio-pileup_iterator
|
18
|
+
|
19
|
+
The API doc is online. For more code examples see also the test files in
|
20
|
+
the source tree.
|
21
|
+
|
22
|
+
== Project home page
|
23
|
+
|
24
|
+
Information on the source tree, documentation, issues and how to contribute, see
|
25
|
+
|
26
|
+
http://github.com/wwood/bioruby-pileup_iterator
|
27
|
+
|
28
|
+
== Cite
|
29
|
+
|
30
|
+
If you use this software, please cite http://dx.doi.org/10.1093/bioinformatics/btq475
|
31
|
+
|
32
|
+
== Biogems.info
|
33
|
+
|
34
|
+
This Biogem is published at http://biogems.info/index.html#bio-pileup_iterator
|
35
|
+
|
36
|
+
== Copyright
|
37
|
+
|
38
|
+
Copyright (c) 2012 Ben J. Woodcroft. See LICENSE.txt for further details.
|
39
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "bio-pileup_iterator"
|
18
|
+
gem.homepage = "http://github.com/wwood/bioruby-pileup_iterator"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{Iterate through a samtools pileup file}
|
21
|
+
gem.description = %Q{Iterate through a samtools pileup file}
|
22
|
+
gem.email = "donttrustben near gmail.com"
|
23
|
+
gem.authors = ["Ben J. Woodcroft"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rake/testtask'
|
29
|
+
Rake::TestTask.new(:test) do |test|
|
30
|
+
test.libs << 'lib' << 'test'
|
31
|
+
test.pattern = 'test/**/test_*.rb'
|
32
|
+
test.verbose = true
|
33
|
+
end
|
34
|
+
|
35
|
+
task :default => :test
|
36
|
+
|
37
|
+
require 'rdoc/task'
|
38
|
+
Rake::RDocTask.new do |rdoc|
|
39
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
40
|
+
|
41
|
+
rdoc.rdoc_dir = 'rdoc'
|
42
|
+
rdoc.title = "bio-pileup_iterator #{version}"
|
43
|
+
rdoc.rdoc_files.include('README*')
|
44
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
45
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
@@ -0,0 +1,263 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
class Bio::DB::Pileup
|
4
|
+
# Bio::DB::PileupIterator::PileupRead objects that occur at this position
|
5
|
+
attr_accessor :reads
|
6
|
+
|
7
|
+
def num_deletions
|
8
|
+
return read_bases.gsub(/[^\*]/,'').length
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
class Bio::DB::PileupIterator
|
13
|
+
include Enumerable
|
14
|
+
|
15
|
+
def initialize(io)
|
16
|
+
@io = io
|
17
|
+
end
|
18
|
+
|
19
|
+
# Iterates through the positions of the a pileup, returning an instance of Bio::DB::Pileup complete with an instance variable @reads, an Array of Bio::DB::PileupRead objects.
|
20
|
+
#
|
21
|
+
# Known problems:
|
22
|
+
# * Doesn't record start or ends of each read
|
23
|
+
# * Doesn't lookahead to determine the sequence of each read (though it does give the preceding bases)
|
24
|
+
# * Gives no information with mismatches
|
25
|
+
def each
|
26
|
+
current_ordered_reads = []
|
27
|
+
log = Bio::Log::LoggerPlus['bio-pileup_iterator']
|
28
|
+
|
29
|
+
@io.each_line do |line|
|
30
|
+
#log.debug "new current_line: #{line.inspect}"
|
31
|
+
pileup = Bio::DB::Pileup.new(line.strip)
|
32
|
+
current_read_index = 0
|
33
|
+
reads_ending = []
|
34
|
+
|
35
|
+
bases = pileup.read_bases
|
36
|
+
#log.debug "new column's read_bases: #{bases.inspect}"
|
37
|
+
#log.debug "pileup entry parsed: #{pileup.inspect}"
|
38
|
+
while bases.length > 0
|
39
|
+
#log.debug "bases remaining: #{bases} ------------------------"
|
40
|
+
|
41
|
+
# Firstly, what is the current read we are working with
|
42
|
+
current_read = current_ordered_reads[current_read_index]
|
43
|
+
# if adding a new read
|
44
|
+
if current_read.nil?
|
45
|
+
#log.debug 'adding a new read'
|
46
|
+
current_read = PileupRead.new
|
47
|
+
current_ordered_reads.push current_read
|
48
|
+
else
|
49
|
+
#log.debug 'reusing a read'
|
50
|
+
end
|
51
|
+
matches = nil
|
52
|
+
|
53
|
+
# Now, parse what the current read is
|
54
|
+
if matches = bases.match(/^([ACGTNacgtn\.\,])([\+\-])([0-9]+)([ACGTNacgtn]+)(\${0,1})/)
|
55
|
+
#log.debug "matched #{matches.to_s} as insertion/deletion"
|
56
|
+
|
57
|
+
# match again to better match the number of inserted bases
|
58
|
+
num_inserted = matches[3].to_i
|
59
|
+
matches = bases.match(/^([ACGTNacgtn\.\,])([\+\-])([0-9]+)([ACGTNacgtn]{#{num_inserted}})(\${0,1})/)
|
60
|
+
raise unless matches
|
61
|
+
|
62
|
+
# insertion / deletion
|
63
|
+
if matches[1] == '.'
|
64
|
+
raise if !current_read.direction.nil? and current_read.direction != PileupRead::FORWARD_DIRECTION
|
65
|
+
current_read.direction = PileupRead::FORWARD_DIRECTION
|
66
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
67
|
+
elsif matches[1] == ','
|
68
|
+
raise if !current_read.direction.nil? and current_read.direction != PileupRead::REVERSE_DIRECTION
|
69
|
+
current_read.direction = PileupRead::REVERSE_DIRECTION
|
70
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
71
|
+
else
|
72
|
+
# Could sanity check the direction here by detecting case, but eh
|
73
|
+
current_read.sequence = "#{current_read.sequence}#{matches[1]}"
|
74
|
+
end
|
75
|
+
|
76
|
+
# record the insertion
|
77
|
+
if matches[2] == '+'
|
78
|
+
current_read.add_insertion pileup.pos, matches[3], matches[4]
|
79
|
+
end
|
80
|
+
|
81
|
+
if matches[5].length > 0
|
82
|
+
#log.debug "Ending this read"
|
83
|
+
# end this read
|
84
|
+
reads_ending.push current_read_index
|
85
|
+
end
|
86
|
+
# currently I don't care about indels, except for the direction, so I'll leave it at that for now
|
87
|
+
|
88
|
+
# end of the read
|
89
|
+
elsif matches = bases.match(/^([\.\,])\$/)
|
90
|
+
#log.debug "matched #{matches.to_s} as end of read"
|
91
|
+
# regular match in some direction, end of read
|
92
|
+
if matches[1]=='.' # if forwards
|
93
|
+
raise if current_read.direction and current_read.direction != PileupRead::FORWARD_DIRECTION
|
94
|
+
current_read.direction = PileupRead::FORWARD_DIRECTION
|
95
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
96
|
+
else # else must be backwards, since it can only be , or .
|
97
|
+
raise if current_read.direction and current_read.direction != PileupRead::REVERSE_DIRECTION
|
98
|
+
current_read.direction = PileupRead::REVERSE_DIRECTION
|
99
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
100
|
+
end
|
101
|
+
#log.debug "current read after deletion: #{current_read.inspect}"
|
102
|
+
reads_ending.push current_read_index
|
103
|
+
|
104
|
+
# regular match continuuing onwards
|
105
|
+
elsif matches = bases.match(/^\./)
|
106
|
+
#log.debug "matched #{matches.to_s} as forward regular match"
|
107
|
+
# regular match in the forward direction
|
108
|
+
raise if !current_read.direction.nil? and current_read.direction != PileupRead::FORWARD_DIRECTION
|
109
|
+
current_read.direction = PileupRead::FORWARD_DIRECTION
|
110
|
+
#log.debug "before adding this base, current sequence is '#{current_read.sequence}'"
|
111
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
112
|
+
#log.debug "after adding this base, current sequence is '#{current_read.sequence}', ref_base: #{pileup.ref_base}"
|
113
|
+
elsif matches = bases.match(/^\,/)
|
114
|
+
#log.debug "matched #{matches.to_s} as reverse regular match"
|
115
|
+
# regular match in the reverse direction
|
116
|
+
if !current_read.direction.nil? and current_read.direction != PileupRead::REVERSE_DIRECTION
|
117
|
+
error_msg = "Unexpectedly found read a #{current_read.direction} direction read when expecting a positive direction one. This suggests there is a problem with either the pileup file or this pileup parser. Current pileup column #{pileup.inspect}, read #{current_read.inspect}, chomped until #{bases}"
|
118
|
+
log.error error_msg
|
119
|
+
raise Exception, error_msg
|
120
|
+
end
|
121
|
+
current_read.direction = PileupRead::REVERSE_DIRECTION
|
122
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
123
|
+
|
124
|
+
# starting a new read (possibly with a gap), with an accompanying insertion/deletion
|
125
|
+
elsif matches = bases.match(/^\^.([ACGTNacgtn\.\,\*])([\+\-])([0-9]+)([ACGTNacgtn]+)(\${0,1})/)
|
126
|
+
if matches[1] == '.'
|
127
|
+
#log.debug 'forward match starting a read'
|
128
|
+
current_read.direction = PileupRead::FORWARD_DIRECTION
|
129
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
130
|
+
elsif matches[1] == ','
|
131
|
+
#log.debug 'reverse match starting a read'
|
132
|
+
current_read.direction = PileupRead::REVERSE_DIRECTION
|
133
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
134
|
+
elsif matches[1] == '*'
|
135
|
+
#log.debug 'starting a read with a gap'
|
136
|
+
# leave direction unknown at this point
|
137
|
+
current_read.sequence = "#{current_read.sequence}#{matches[1]}"
|
138
|
+
elsif matches[1] == matches[1].upcase
|
139
|
+
#log.debug 'forward match starting a read, warning of insertion next'
|
140
|
+
current_read.direction = PileupRead::FORWARD_DIRECTION
|
141
|
+
current_read.sequence = "#{current_read.sequence}#{matches[1]}"
|
142
|
+
else
|
143
|
+
#log.debug 'forward match starting a read, warning of insertion next'
|
144
|
+
current_read.direction = PileupRead::REVERSE_DIRECTION
|
145
|
+
current_read.sequence = "#{current_read.sequence}#{matches[1]}"
|
146
|
+
end
|
147
|
+
|
148
|
+
# record the insertion
|
149
|
+
if matches[2] == '+'
|
150
|
+
current_read.add_insertion pileup.pos, matches[3], matches[4]
|
151
|
+
end
|
152
|
+
|
153
|
+
if matches[5].length > 0
|
154
|
+
#log.debug "Ending this read"
|
155
|
+
# end this read
|
156
|
+
reads_ending.push current_read_index
|
157
|
+
end
|
158
|
+
|
159
|
+
|
160
|
+
# regular match, starting a new read
|
161
|
+
elsif matches = bases.match(/^\^.([ACGTNacgtn\.\,\*])(\${0,1})/)
|
162
|
+
if matches[1] == '.'
|
163
|
+
#log.debug 'forward match starting a read'
|
164
|
+
current_read.direction = PileupRead::FORWARD_DIRECTION
|
165
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
166
|
+
elsif matches[1] == ','
|
167
|
+
#log.debug 'reverse match starting a read'
|
168
|
+
current_read.direction = PileupRead::REVERSE_DIRECTION
|
169
|
+
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
170
|
+
elsif matches[1] == '*'
|
171
|
+
#log.debug 'gap starting a read'
|
172
|
+
current_read.sequence = "#{current_read.sequence}#{matches[1]}"
|
173
|
+
elsif matches[1] == matches[1].upcase
|
174
|
+
#log.debug 'forward match starting a read, warning of insertion next'
|
175
|
+
current_read.direction = PileupRead::FORWARD_DIRECTION
|
176
|
+
current_read.sequence = "#{current_read.sequence}#{matches[1]}"
|
177
|
+
else
|
178
|
+
#log.debug 'forward match starting a read, warning of insertion next'
|
179
|
+
current_read.direction = PileupRead::REVERSE_DIRECTION
|
180
|
+
current_read.sequence = "#{current_read.sequence}#{matches[1]}"
|
181
|
+
end
|
182
|
+
if matches[2].length > 0
|
183
|
+
#log.debug "Ending this read, even though it started here too.. it happens.."
|
184
|
+
# end this read
|
185
|
+
reads_ending.push current_read_index
|
186
|
+
end
|
187
|
+
|
188
|
+
|
189
|
+
elsif matches = bases.match(/^\*([\+\-])([0-9]+)([ACGTNacgtn=]+)(\${0,1})/)
|
190
|
+
#log.debug 'gap then insert/delete found'
|
191
|
+
# gap - should already be known from the last position
|
192
|
+
current_read.sequence = "#{current_read.sequence}*"
|
193
|
+
if matches[4].length > 0
|
194
|
+
#log.debug "Ending this read"
|
195
|
+
# end this read
|
196
|
+
reads_ending.push current_read_index
|
197
|
+
end
|
198
|
+
|
199
|
+
# record the insertion
|
200
|
+
if matches[1] == '+'
|
201
|
+
current_read.add_insertion pileup.pos, matches[2], matches[3]
|
202
|
+
end
|
203
|
+
|
204
|
+
elsif matches = bases.match(/(^[ACGTNacgtn\*])(\${0,1})/)
|
205
|
+
#log.debug 'mismatch found (or deletion)'
|
206
|
+
# simple mismatch
|
207
|
+
current_read.sequence = "#{current_read.sequence}#{matches[1]}"
|
208
|
+
if matches[2].length > 0
|
209
|
+
#log.debug "Ending this read"
|
210
|
+
reads_ending.push current_read_index
|
211
|
+
end
|
212
|
+
end
|
213
|
+
#log.debug "current read's sequence: #{current_read.sequence}"
|
214
|
+
|
215
|
+
#raise Exception, "implement mismatch parsing here!!!"
|
216
|
+
raise Exception, "Unexpected Pileup format bases, starting here: #{bases}, from #{pileup.inspect}" if matches.nil?
|
217
|
+
|
218
|
+
#remove the matched part from the base string for next time
|
219
|
+
bases = bases[matches.to_s.length..bases.length-1]
|
220
|
+
|
221
|
+
current_read_index += 1
|
222
|
+
end
|
223
|
+
|
224
|
+
# Create a new copy of the array and yield that, otherwise when things get deleted they get removed from the yielded array as well (which is unwanted)
|
225
|
+
yielded_array = Array.new(current_ordered_reads)
|
226
|
+
pileup.reads = yielded_array
|
227
|
+
#log.debug "Number of reads yielded: #{pileup.reads.length}"
|
228
|
+
yield pileup
|
229
|
+
|
230
|
+
# Remove reads that ended. In reverse order since removing the last ones first doesn't mess with the indices beforehand in the array
|
231
|
+
reads_ending.reverse.each do |i|
|
232
|
+
#log.debug "Deleting read of index #{i} (total reads #{current_ordered_reads.length}): #{current_ordered_reads[i].inspect}"
|
233
|
+
current_ordered_reads.delete_at i
|
234
|
+
end
|
235
|
+
#log.debug "Ended up with #{current_ordered_reads.length} reads that should be present next time"
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
class PileupRead
|
240
|
+
# Directions relative to reference
|
241
|
+
FORWARD_DIRECTION = '+'
|
242
|
+
REVERSE_DIRECTION = '-'
|
243
|
+
|
244
|
+
# sequence is always in the direction of the start of the reference to the end - only @direction gives direction information
|
245
|
+
attr_accessor :direction, :sequence
|
246
|
+
|
247
|
+
# A hash of recorded insertions. Key of hash is the position in the consensus that is has been added to in the alignment, value is the bases that have been inserted
|
248
|
+
attr_reader :insertions
|
249
|
+
|
250
|
+
def initialize
|
251
|
+
@sequence = ''
|
252
|
+
@insertions = {}
|
253
|
+
end
|
254
|
+
|
255
|
+
def read
|
256
|
+
@sequence[@sequence.length-2..@sequence.length-1]
|
257
|
+
end
|
258
|
+
|
259
|
+
def add_insertion(position, insertion_length, insertion_bases)
|
260
|
+
insertions[position] = insertion_bases
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
require 'shoulda'
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'bio-pileup_iterator'
|
16
|
+
|
17
|
+
class Test::Unit::TestCase
|
18
|
+
end
|
@@ -0,0 +1,212 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestBioPileupIterator < Test::Unit::TestCase
|
4
|
+
def test_pileup_parsing
|
5
|
+
line = "contig00001\t199\tA\t4\t.$...$\t>a^>"
|
6
|
+
#contig00001\t200\tT\t2\t..\taR"
|
7
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
8
|
+
pileup = piles[0]
|
9
|
+
reads = piles[0].reads
|
10
|
+
|
11
|
+
assert_equal 'A', reads[0].sequence
|
12
|
+
assert_equal 4, reads.length
|
13
|
+
assert_kind_of Bio::DB::Pileup, pileup
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_2_pileup_columns
|
17
|
+
line = "contig00001\t199\tA\t4\t.$...$\t>a^>\ncontig00001\t200\tT\t2\t..\taR"
|
18
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
19
|
+
|
20
|
+
pileup = piles[0]
|
21
|
+
reads = piles[0].reads
|
22
|
+
reads2 = piles[1].reads
|
23
|
+
|
24
|
+
assert_equal 'A', piles[0].ref_base
|
25
|
+
assert_equal 'T', piles[1].ref_base
|
26
|
+
assert_equal 4, reads.length
|
27
|
+
assert_equal 2, reads2.length
|
28
|
+
assert_equal 'AT', reads2[0].sequence
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_fwd_rev
|
32
|
+
line = "contig00001\t199\tA\t4\t.$,..$\t>a^>\ncontig00001\t200\tT\t2\t,.\taR"
|
33
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
34
|
+
|
35
|
+
pileup = piles[0]
|
36
|
+
reads = piles[0].reads
|
37
|
+
reads2 = piles[1].reads
|
38
|
+
|
39
|
+
assert_equal 4, reads.length
|
40
|
+
assert_equal 2, reads2.length
|
41
|
+
assert_equal 'AT', reads2[0].sequence
|
42
|
+
assert_equal '-', reads2[0].direction
|
43
|
+
assert_equal '+', reads2[1].direction
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_deletion
|
47
|
+
line = "contig00001\t199\tA\t4\t.-1T...$\t>a^>\ncontig00001\t200\tT\t2\t*..\taR"
|
48
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
49
|
+
|
50
|
+
pileup = piles[0]
|
51
|
+
reads = piles[0].reads
|
52
|
+
reads2 = piles[1].reads
|
53
|
+
|
54
|
+
assert_equal 'A*', reads[0].sequence
|
55
|
+
assert_equal Hash.new, reads[0].insertions
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_substitution
|
59
|
+
line = "contig00001\t199\tA\t4\t.G..$\t>a^>"
|
60
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
61
|
+
|
62
|
+
pileup = piles[0]
|
63
|
+
reads = piles[0].reads
|
64
|
+
|
65
|
+
assert_equal 'A', reads[0].sequence
|
66
|
+
assert_equal 'G', reads[1].sequence
|
67
|
+
assert_equal 'A', reads[0].sequence
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_substitution_with_insertion
|
71
|
+
line = "contig00001\t199\tA\t4\tG-1T..$.\t>a^>\ncontig00001\t200\tT\t2\t*..\taR"
|
72
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
73
|
+
|
74
|
+
pileup = piles[0]
|
75
|
+
reads = piles[0].reads
|
76
|
+
reads2 = piles[1].reads
|
77
|
+
|
78
|
+
assert_equal 2, piles.length
|
79
|
+
assert_equal 4, reads.length
|
80
|
+
assert_equal 3, reads2.length
|
81
|
+
assert_equal 'G*', reads[0].sequence
|
82
|
+
assert_equal 'AT', reads[1].sequence
|
83
|
+
assert_equal 'A', reads[2].sequence
|
84
|
+
assert_equal 'AT', reads[3].sequence
|
85
|
+
end
|
86
|
+
|
87
|
+
def test_start_read_warning_of_deletion_next
|
88
|
+
line = "contig00001\t8\tG\t4\t..,^],-1g\ta!U!\n"+
|
89
|
+
"contig00001\t9\tg\t4\t..,*\ta!aU"
|
90
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
91
|
+
|
92
|
+
pileup = piles[0]
|
93
|
+
reads = piles[0].reads
|
94
|
+
reads2 = piles[1].reads
|
95
|
+
end
|
96
|
+
|
97
|
+
def test_star_then_insert
|
98
|
+
line = "contig00001\t23\tC\t40\t.*+1G..\t~~~~\n"
|
99
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
100
|
+
|
101
|
+
pileup = piles[0]
|
102
|
+
reads = piles[0].reads
|
103
|
+
|
104
|
+
assert_equal 4, reads.length
|
105
|
+
assert_equal({}, reads[0].insertions)
|
106
|
+
assert_equal '*', reads[1].sequence
|
107
|
+
assert_equal({23 => 'G'}, reads[1].insertions)
|
108
|
+
end
|
109
|
+
|
110
|
+
def test_star_finishing_a_read
|
111
|
+
line = "contig00001\t717\tC\t47\t,$.$,$*$,$,$,$,$,$,$,$*$*$*$*$,$.$*$*$*$*$.$.$.$,$,$*$,$,$,$,$.$.$*$*$.$,$,$,$,$.$,$.$,$*$,$,$\t0..~2-.-.,#~~~~+,~~~~+**,!~!!!!!!~~(((((((((~!!\n"
|
112
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
113
|
+
assert_equal '*', piles[0].reads[3].sequence
|
114
|
+
end
|
115
|
+
|
116
|
+
def test_start_finishing_a_read
|
117
|
+
line = "contig00002\t1\tC\t47\t^],$\t~\n"
|
118
|
+
piles = Bio::DB::PileupIterator.new(line).to_a
|
119
|
+
assert_equal 'C', piles[0].reads[0].sequence
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_start_with_a_gap
|
123
|
+
line = "contig00075\t503\tT\t24\t,^]*\tU\n"
|
124
|
+
piles = Bio::DB::PileupIterator.new(line)
|
125
|
+
# piles.log.level = Bio::Log::DEBUG
|
126
|
+
piles = piles.to_a
|
127
|
+
assert_equal 'T', piles[0].reads[0].sequence
|
128
|
+
assert_equal '*', piles[0].reads[1].sequence
|
129
|
+
end
|
130
|
+
|
131
|
+
def test_start_then_insert_then_end
|
132
|
+
line = "contig00075\t503\tG\t24\t^].+1T$^].\t~~\n"
|
133
|
+
piles = Bio::DB::PileupIterator.new(line)
|
134
|
+
# piles.log.level = Bio::Log::DEBUG
|
135
|
+
piles = piles.to_a
|
136
|
+
assert_equal 'G', piles[0].reads[0].sequence
|
137
|
+
assert_equal({503 => 'T'}, piles[0].reads[0].insertions)
|
138
|
+
assert_equal 'G', piles[0].reads[1].sequence
|
139
|
+
end
|
140
|
+
|
141
|
+
def test_star_then_insert2
|
142
|
+
line = "contig00075\t503\tG\t24\t,*+1g.\t~~\n"
|
143
|
+
piles = Bio::DB::PileupIterator.new(line)
|
144
|
+
# piles.log.level = Bio::Log::DEBUG
|
145
|
+
piles = piles.to_a
|
146
|
+
assert_equal 'G', piles[0].reads[0].sequence
|
147
|
+
assert_equal '*', piles[0].reads[1].sequence
|
148
|
+
assert_equal 'G', piles[0].reads[2].sequence
|
149
|
+
end
|
150
|
+
|
151
|
+
def test_start_with_gap_then_insertion
|
152
|
+
line = "contig00075\t503\tG\t24\t,,.^]*+1g\tE~\n"+
|
153
|
+
"contig00075\t504\tA\t24\t,,.,\tE~\n"
|
154
|
+
|
155
|
+
piles = Bio::DB::PileupIterator.new(line)
|
156
|
+
# piles.log.level = Bio::Log::DEBUG
|
157
|
+
piles = piles.to_a
|
158
|
+
assert_equal 'GA', piles[0].reads[0].sequence
|
159
|
+
assert_equal 'GA', piles[0].reads[1].sequence
|
160
|
+
assert_equal 'GA', piles[0].reads[2].sequence
|
161
|
+
assert_equal '*A', piles[0].reads[3].sequence
|
162
|
+
assert_equal({503 => 'g'}, piles[0].reads[3].insertions)
|
163
|
+
end
|
164
|
+
|
165
|
+
def test_double_insertion
|
166
|
+
line = "contig00075\t503\tG\t24\t*+1gg\tE\n"
|
167
|
+
|
168
|
+
piles = Bio::DB::PileupIterator.new(line)
|
169
|
+
# piles.log.level = Bio::Log::DEBUG
|
170
|
+
piles = piles.to_a
|
171
|
+
assert_equal({503 => 'gg'}, piles[0].reads[0].insertions)
|
172
|
+
end
|
173
|
+
|
174
|
+
def test_non_perfect_starting_read
|
175
|
+
line = "contig00075\t503\tG\t24\t^F.*+1gg\tE\n"
|
176
|
+
|
177
|
+
piles = Bio::DB::PileupIterator.new(line)
|
178
|
+
# piles.log.level = Bio::Log::DEBUG
|
179
|
+
piles = piles.to_a
|
180
|
+
assert_equal '+', piles[0].reads[0].direction
|
181
|
+
assert_equal 'G', piles[0].reads[0].sequence
|
182
|
+
assert_equal '*', piles[0].reads[1].sequence
|
183
|
+
end
|
184
|
+
|
185
|
+
def test_non_matching_finish
|
186
|
+
line = "contig00002\t6317\tC\t2\ta$.\t!B\n"+
|
187
|
+
"contig00002\t6318\tT\t1\t.\tA\n"
|
188
|
+
|
189
|
+
|
190
|
+
piles = Bio::DB::PileupIterator.new(line)
|
191
|
+
# piles.log.level = Bio::Log::DEBUG
|
192
|
+
piles = piles.to_a
|
193
|
+
assert_equal 2, piles[0].reads.length
|
194
|
+
assert_equal 'a', piles[0].reads[0].sequence
|
195
|
+
assert_equal 'CT', piles[0].reads[1].sequence
|
196
|
+
end
|
197
|
+
|
198
|
+
def test_insertion_then_mismatch
|
199
|
+
line = "contig00044\t867\tC\t6\t,,,,,.\t!:!!:=\n"+
|
200
|
+
"contig00044\t868\tG\t6\tt,+1ttt,.\t!A!!C9\n"
|
201
|
+
|
202
|
+
piles = Bio::DB::PileupIterator.new(line)
|
203
|
+
|
204
|
+
piles = piles.to_a
|
205
|
+
assert_equal 6, piles[0].reads.length
|
206
|
+
assert_equal 'Ct', piles[0].reads[0].sequence
|
207
|
+
assert_equal 'CG', piles[0].reads[1].sequence
|
208
|
+
hash = {868=>'t'}
|
209
|
+
assert_equal hash, piles[0].reads[1].insertions
|
210
|
+
assert_equal 'Ct', piles[0].reads[2].sequence
|
211
|
+
end
|
212
|
+
end
|
metadata
ADDED
@@ -0,0 +1,160 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bio-pileup_iterator
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Ben J. Woodcroft
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-05-17 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bio
|
16
|
+
requirement: &84151300 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.4.2
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *84151300
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: bio-samtools
|
27
|
+
requirement: &84150960 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 0.5.3
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *84150960
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: bio-logger
|
38
|
+
requirement: &84150510 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 1.0.0
|
44
|
+
type: :runtime
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *84150510
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: shoulda
|
49
|
+
requirement: &84150270 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *84150270
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: rdoc
|
60
|
+
requirement: &84166240 !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ~>
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '3.12'
|
66
|
+
type: :development
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *84166240
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: bundler
|
71
|
+
requirement: &84165520 !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: 1.0.0
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: *84165520
|
80
|
+
- !ruby/object:Gem::Dependency
|
81
|
+
name: jeweler
|
82
|
+
requirement: &84165000 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ~>
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: 1.8.3
|
88
|
+
type: :development
|
89
|
+
prerelease: false
|
90
|
+
version_requirements: *84165000
|
91
|
+
- !ruby/object:Gem::Dependency
|
92
|
+
name: bio
|
93
|
+
requirement: &84164380 !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ! '>='
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: 1.4.2
|
99
|
+
type: :development
|
100
|
+
prerelease: false
|
101
|
+
version_requirements: *84164380
|
102
|
+
- !ruby/object:Gem::Dependency
|
103
|
+
name: rdoc
|
104
|
+
requirement: &84163460 !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ~>
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '3.12'
|
110
|
+
type: :development
|
111
|
+
prerelease: false
|
112
|
+
version_requirements: *84163460
|
113
|
+
description: Iterate through a samtools pileup file
|
114
|
+
email: donttrustben near gmail.com
|
115
|
+
executables: []
|
116
|
+
extensions: []
|
117
|
+
extra_rdoc_files:
|
118
|
+
- LICENSE.txt
|
119
|
+
- README.rdoc
|
120
|
+
files:
|
121
|
+
- .document
|
122
|
+
- .travis.yml
|
123
|
+
- Gemfile
|
124
|
+
- LICENSE.txt
|
125
|
+
- README.rdoc
|
126
|
+
- Rakefile
|
127
|
+
- VERSION
|
128
|
+
- lib/bio-pileup_iterator.rb
|
129
|
+
- lib/bio/db/pileup_iterator.rb
|
130
|
+
- test/helper.rb
|
131
|
+
- test/test_bio-pileup_iterator.rb
|
132
|
+
homepage: http://github.com/wwood/bioruby-pileup_iterator
|
133
|
+
licenses:
|
134
|
+
- MIT
|
135
|
+
post_install_message:
|
136
|
+
rdoc_options: []
|
137
|
+
require_paths:
|
138
|
+
- lib
|
139
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
140
|
+
none: false
|
141
|
+
requirements:
|
142
|
+
- - ! '>='
|
143
|
+
- !ruby/object:Gem::Version
|
144
|
+
version: '0'
|
145
|
+
segments:
|
146
|
+
- 0
|
147
|
+
hash: -115500147
|
148
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
149
|
+
none: false
|
150
|
+
requirements:
|
151
|
+
- - ! '>='
|
152
|
+
- !ruby/object:Gem::Version
|
153
|
+
version: '0'
|
154
|
+
requirements: []
|
155
|
+
rubyforge_project:
|
156
|
+
rubygems_version: 1.8.17
|
157
|
+
signing_key:
|
158
|
+
specification_version: 3
|
159
|
+
summary: Iterate through a samtools pileup file
|
160
|
+
test_files: []
|