bio-pileup_iterator 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +1 -1
- data/VERSION +1 -1
- data/lib/bio/db/pileup_iterator.rb +23 -10
- data/test/test_bio-pileup_iterator.rb +45 -0
- metadata +67 -22
data/.travis.yml
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.4
|
@@ -1,12 +1,8 @@
|
|
1
|
-
|
1
|
+
require 'pp'
|
2
2
|
|
3
3
|
class Bio::DB::Pileup
|
4
4
|
# Bio::DB::PileupIterator::PileupRead objects that occur at this position
|
5
5
|
attr_accessor :reads
|
6
|
-
|
7
|
-
def num_deletions
|
8
|
-
return read_bases.gsub(/[^\*]/,'').length
|
9
|
-
end
|
10
6
|
end
|
11
7
|
|
12
8
|
class Bio::DB::PileupIterator
|
@@ -21,6 +17,7 @@ class Bio::DB::PileupIterator
|
|
21
17
|
# Known problems:
|
22
18
|
# * Doesn't record start or ends of each read
|
23
19
|
# * Doesn't lookahead to determine the sequence of each read (though it does give the preceding bases)
|
20
|
+
# * Doesn't record the mapping quality of each read
|
24
21
|
def each
|
25
22
|
current_ordered_reads = []
|
26
23
|
log = Bio::Log::LoggerPlus['bio-pileup_iterator']
|
@@ -35,6 +32,7 @@ class Bio::DB::PileupIterator
|
|
35
32
|
log.debug "new column's read_bases: #{bases.inspect}" if log.debug?
|
36
33
|
log.debug "pileup entry parsed: #{pileup.inspect}" if log.debug?
|
37
34
|
while bases.length > 0
|
35
|
+
log.debug "==== new read within a single pileup being parsed. Starting with #{bases}" if log.debug?
|
38
36
|
|
39
37
|
# Firstly, what is the current read we are working with
|
40
38
|
current_read = current_ordered_reads[current_read_index]
|
@@ -47,9 +45,11 @@ class Bio::DB::PileupIterator
|
|
47
45
|
matches = nil
|
48
46
|
|
49
47
|
# if starting, remove it
|
48
|
+
log.debug "before read start removal, pileup is #{bases}, read is #{current_read}" if log.debug?
|
50
49
|
matched_string = ''
|
51
|
-
if bases[0
|
52
|
-
|
50
|
+
if bases[0]=='^'
|
51
|
+
# Match the ^ and the mapping quality
|
52
|
+
matched_string += bases[0..1]
|
53
53
|
bases = bases[2...bases.length]
|
54
54
|
end
|
55
55
|
log.debug "after read start removal, pileup is #{bases}" if log.debug?
|
@@ -58,15 +58,28 @@ class Bio::DB::PileupIterator
|
|
58
58
|
if matches = bases.match(/^([ACGTNacgtn\.\,\*])/)
|
59
59
|
matched_string += bases[0]
|
60
60
|
if matches[1] == '.'
|
61
|
-
|
61
|
+
if !current_read.direction.nil? and current_read.direction != PileupRead::FORWARD_DIRECTION
|
62
|
+
pp current_read
|
63
|
+
raise "Unexpectedly found direction #{current_read.direction}, expected #{PileupRead::FORWARD_DIRECTION}, in starting at '#{bases}'(EndOfLine) in '#{line}', in the read above"
|
64
|
+
end
|
62
65
|
current_read.direction = PileupRead::FORWARD_DIRECTION
|
63
66
|
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
64
67
|
elsif matches[1] == ','
|
65
|
-
|
68
|
+
if !current_read.direction.nil? and current_read.direction != PileupRead::REVERSE_DIRECTION
|
69
|
+
pp current_read
|
70
|
+
raise "Unexpectedly found direction #{current_read.direction}, expected #{PileupRead::REVERSE_DIRECTION}, in starting at '#{bases}'(EndOfLine) in '#{line}', in the read above"
|
71
|
+
end
|
66
72
|
current_read.direction = PileupRead::REVERSE_DIRECTION
|
67
73
|
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
68
74
|
else
|
69
|
-
|
75
|
+
if current_read.direction.nil? and matches[1] != '*'
|
76
|
+
# Could sanity check the direction here by detecting case, but eh
|
77
|
+
if matches[1].upcase == matches[1] or matches[1]== '.'
|
78
|
+
current_read.direction = PileupRead::FORWARD_DIRECTION
|
79
|
+
else
|
80
|
+
current_read.direction = PileupRead::REVERSE_DIRECTION
|
81
|
+
end
|
82
|
+
end
|
70
83
|
current_read.sequence = "#{current_read.sequence}#{matches[1]}"
|
71
84
|
end
|
72
85
|
# remove the matched base
|
@@ -223,4 +223,49 @@ class TestBioPileupIterator < Test::Unit::TestCase
|
|
223
223
|
assert_equal 10, piles[0].coverage
|
224
224
|
assert_equal 10, piles[0].reads.length
|
225
225
|
end
|
226
|
+
|
227
|
+
def test_optional_mapping_quality
|
228
|
+
line = "gi|308171891|ref|NC_014551.1|\t2\tA\t2\t^:,^~,\t!!\n"
|
229
|
+
piles = Bio::DB::PileupIterator.new(line).to_a #parse, it should fail otherwise
|
230
|
+
assert_equal 2, piles[0].coverage
|
231
|
+
assert_equal 2, piles[0].reads.length
|
232
|
+
assert_equal 'A', piles[0].reads[0].sequence
|
233
|
+
assert_equal '-', piles[0].reads[0].direction
|
234
|
+
|
235
|
+
line_without_mapping_quality = "gi|308171891|ref|NC_014551.1|\t2\tA\t2\t^,^,\t!!\n"
|
236
|
+
piles = Bio::DB::PileupIterator.new(line).to_a #parse, it should fail otherwise
|
237
|
+
assert_equal 2, piles[0].coverage
|
238
|
+
assert_equal 2, piles[0].reads.length
|
239
|
+
assert_equal 'A', piles[0].reads[0].sequence
|
240
|
+
assert_equal '-', piles[0].reads[0].direction
|
241
|
+
end
|
242
|
+
|
243
|
+
def test_when_read_mapping_quality_is_dot
|
244
|
+
lines = "gi|308171891|ref|NC_014551.1|\t2\tA\t2\t^:,^~,\t!!\n"+
|
245
|
+
"gi|308171891|ref|NC_014551.1|\t3\tT\t4\t,,^!.^.,\t!!!!\n"+ # This is the line that is really being tested
|
246
|
+
"gi|308171891|ref|NC_014551.1|\t4\tT\t4\t,,.,\t!!!!\n"
|
247
|
+
piles = Bio::DB::PileupIterator.new(lines).to_a #parse, it should fail otherwise
|
248
|
+
assert_equal 2, piles[0].coverage
|
249
|
+
assert_equal 4, piles[2].coverage
|
250
|
+
assert_equal 2, piles[0].reads.length
|
251
|
+
assert_equal 'ATT', piles[0].reads[0].sequence
|
252
|
+
assert_equal '-', piles[0].reads[0].direction
|
253
|
+
end
|
254
|
+
|
255
|
+
def test_n
|
256
|
+
lines = "gi|308171891|ref|NC_014551.1|\t111565\tN\t7\taaAaAAA\t~~~~~~~\n"
|
257
|
+
piles = Bio::DB::PileupIterator.new(lines).to_a #parse, it should fail otherwise
|
258
|
+
assert_equal 1, piles.length
|
259
|
+
assert_equal 7, piles[0].coverage
|
260
|
+
assert_equal 'a', piles[0].reads[1].sequence
|
261
|
+
assert_equal 'N', piles[0].ref_base
|
262
|
+
end
|
263
|
+
|
264
|
+
def test_direction_without_starting_a_read
|
265
|
+
lines = "contig00074\t4243\tN\t8\tg$GGGgGgg\t~~~~~~~~\n"
|
266
|
+
|
267
|
+
piles = Bio::DB::PileupIterator.new(lines).to_a
|
268
|
+
assert_equal '-', piles[0].reads[0].direction
|
269
|
+
assert_equal '+', piles[0].reads[1].direction
|
270
|
+
end
|
226
271
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-pileup_iterator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-11-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bio
|
16
|
-
requirement:
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,15 @@ dependencies:
|
|
21
21
|
version: 1.4.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements:
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.4.2
|
25
30
|
- !ruby/object:Gem::Dependency
|
26
31
|
name: bio-samtools
|
27
|
-
requirement:
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
28
33
|
none: false
|
29
34
|
requirements:
|
30
35
|
- - ! '>='
|
@@ -32,10 +37,15 @@ dependencies:
|
|
32
37
|
version: 0.5.3
|
33
38
|
type: :runtime
|
34
39
|
prerelease: false
|
35
|
-
version_requirements:
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 0.5.3
|
36
46
|
- !ruby/object:Gem::Dependency
|
37
47
|
name: bio-logger
|
38
|
-
requirement:
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
39
49
|
none: false
|
40
50
|
requirements:
|
41
51
|
- - ! '>='
|
@@ -43,10 +53,15 @@ dependencies:
|
|
43
53
|
version: 1.0.0
|
44
54
|
type: :runtime
|
45
55
|
prerelease: false
|
46
|
-
version_requirements:
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 1.0.0
|
47
62
|
- !ruby/object:Gem::Dependency
|
48
63
|
name: shoulda
|
49
|
-
requirement:
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
50
65
|
none: false
|
51
66
|
requirements:
|
52
67
|
- - ! '>='
|
@@ -54,10 +69,15 @@ dependencies:
|
|
54
69
|
version: '0'
|
55
70
|
type: :development
|
56
71
|
prerelease: false
|
57
|
-
version_requirements:
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
58
78
|
- !ruby/object:Gem::Dependency
|
59
79
|
name: rdoc
|
60
|
-
requirement:
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
61
81
|
none: false
|
62
82
|
requirements:
|
63
83
|
- - ~>
|
@@ -65,10 +85,15 @@ dependencies:
|
|
65
85
|
version: '3.12'
|
66
86
|
type: :development
|
67
87
|
prerelease: false
|
68
|
-
version_requirements:
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ~>
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '3.12'
|
69
94
|
- !ruby/object:Gem::Dependency
|
70
95
|
name: bundler
|
71
|
-
requirement:
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
72
97
|
none: false
|
73
98
|
requirements:
|
74
99
|
- - ! '>='
|
@@ -76,10 +101,15 @@ dependencies:
|
|
76
101
|
version: 1.0.0
|
77
102
|
type: :development
|
78
103
|
prerelease: false
|
79
|
-
version_requirements:
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 1.0.0
|
80
110
|
- !ruby/object:Gem::Dependency
|
81
111
|
name: jeweler
|
82
|
-
requirement:
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
83
113
|
none: false
|
84
114
|
requirements:
|
85
115
|
- - ~>
|
@@ -87,10 +117,15 @@ dependencies:
|
|
87
117
|
version: 1.8.3
|
88
118
|
type: :development
|
89
119
|
prerelease: false
|
90
|
-
version_requirements:
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ~>
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: 1.8.3
|
91
126
|
- !ruby/object:Gem::Dependency
|
92
127
|
name: bio
|
93
|
-
requirement:
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
94
129
|
none: false
|
95
130
|
requirements:
|
96
131
|
- - ! '>='
|
@@ -98,10 +133,15 @@ dependencies:
|
|
98
133
|
version: 1.4.2
|
99
134
|
type: :development
|
100
135
|
prerelease: false
|
101
|
-
version_requirements:
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - ! '>='
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: 1.4.2
|
102
142
|
- !ruby/object:Gem::Dependency
|
103
143
|
name: rdoc
|
104
|
-
requirement:
|
144
|
+
requirement: !ruby/object:Gem::Requirement
|
105
145
|
none: false
|
106
146
|
requirements:
|
107
147
|
- - ~>
|
@@ -109,7 +149,12 @@ dependencies:
|
|
109
149
|
version: '3.12'
|
110
150
|
type: :development
|
111
151
|
prerelease: false
|
112
|
-
version_requirements:
|
152
|
+
version_requirements: !ruby/object:Gem::Requirement
|
153
|
+
none: false
|
154
|
+
requirements:
|
155
|
+
- - ~>
|
156
|
+
- !ruby/object:Gem::Version
|
157
|
+
version: '3.12'
|
113
158
|
description: Iterate through a samtools pileup file
|
114
159
|
email: donttrustben near gmail.com
|
115
160
|
executables: []
|
@@ -144,7 +189,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
144
189
|
version: '0'
|
145
190
|
segments:
|
146
191
|
- 0
|
147
|
-
hash:
|
192
|
+
hash: -965124785
|
148
193
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
149
194
|
none: false
|
150
195
|
requirements:
|
@@ -153,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
153
198
|
version: '0'
|
154
199
|
requirements: []
|
155
200
|
rubyforge_project:
|
156
|
-
rubygems_version: 1.8.
|
201
|
+
rubygems_version: 1.8.24
|
157
202
|
signing_key:
|
158
203
|
specification_version: 3
|
159
204
|
summary: Iterate through a samtools pileup file
|