bio-pileup_iterator 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +1 -1
- data/VERSION +1 -1
- data/lib/bio/db/pileup_iterator.rb +23 -10
- data/test/test_bio-pileup_iterator.rb +45 -0
- metadata +67 -22
data/.travis.yml
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.4
|
@@ -1,12 +1,8 @@
|
|
1
|
-
|
1
|
+
require 'pp'
|
2
2
|
|
3
3
|
class Bio::DB::Pileup
|
4
4
|
# Bio::DB::PileupIterator::PileupRead objects that occur at this position
|
5
5
|
attr_accessor :reads
|
6
|
-
|
7
|
-
def num_deletions
|
8
|
-
return read_bases.gsub(/[^\*]/,'').length
|
9
|
-
end
|
10
6
|
end
|
11
7
|
|
12
8
|
class Bio::DB::PileupIterator
|
@@ -21,6 +17,7 @@ class Bio::DB::PileupIterator
|
|
21
17
|
# Known problems:
|
22
18
|
# * Doesn't record start or ends of each read
|
23
19
|
# * Doesn't lookahead to determine the sequence of each read (though it does give the preceding bases)
|
20
|
+
# * Doesn't record the mapping quality of each read
|
24
21
|
def each
|
25
22
|
current_ordered_reads = []
|
26
23
|
log = Bio::Log::LoggerPlus['bio-pileup_iterator']
|
@@ -35,6 +32,7 @@ class Bio::DB::PileupIterator
|
|
35
32
|
log.debug "new column's read_bases: #{bases.inspect}" if log.debug?
|
36
33
|
log.debug "pileup entry parsed: #{pileup.inspect}" if log.debug?
|
37
34
|
while bases.length > 0
|
35
|
+
log.debug "==== new read within a single pileup being parsed. Starting with #{bases}" if log.debug?
|
38
36
|
|
39
37
|
# Firstly, what is the current read we are working with
|
40
38
|
current_read = current_ordered_reads[current_read_index]
|
@@ -47,9 +45,11 @@ class Bio::DB::PileupIterator
|
|
47
45
|
matches = nil
|
48
46
|
|
49
47
|
# if starting, remove it
|
48
|
+
log.debug "before read start removal, pileup is #{bases}, read is #{current_read}" if log.debug?
|
50
49
|
matched_string = ''
|
51
|
-
if bases[0
|
52
|
-
|
50
|
+
if bases[0]=='^'
|
51
|
+
# Match the ^ and the mapping quality
|
52
|
+
matched_string += bases[0..1]
|
53
53
|
bases = bases[2...bases.length]
|
54
54
|
end
|
55
55
|
log.debug "after read start removal, pileup is #{bases}" if log.debug?
|
@@ -58,15 +58,28 @@ class Bio::DB::PileupIterator
|
|
58
58
|
if matches = bases.match(/^([ACGTNacgtn\.\,\*])/)
|
59
59
|
matched_string += bases[0]
|
60
60
|
if matches[1] == '.'
|
61
|
-
|
61
|
+
if !current_read.direction.nil? and current_read.direction != PileupRead::FORWARD_DIRECTION
|
62
|
+
pp current_read
|
63
|
+
raise "Unexpectedly found direction #{current_read.direction}, expected #{PileupRead::FORWARD_DIRECTION}, in starting at '#{bases}'(EndOfLine) in '#{line}', in the read above"
|
64
|
+
end
|
62
65
|
current_read.direction = PileupRead::FORWARD_DIRECTION
|
63
66
|
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
64
67
|
elsif matches[1] == ','
|
65
|
-
|
68
|
+
if !current_read.direction.nil? and current_read.direction != PileupRead::REVERSE_DIRECTION
|
69
|
+
pp current_read
|
70
|
+
raise "Unexpectedly found direction #{current_read.direction}, expected #{PileupRead::REVERSE_DIRECTION}, in starting at '#{bases}'(EndOfLine) in '#{line}', in the read above"
|
71
|
+
end
|
66
72
|
current_read.direction = PileupRead::REVERSE_DIRECTION
|
67
73
|
current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
|
68
74
|
else
|
69
|
-
|
75
|
+
if current_read.direction.nil? and matches[1] != '*'
|
76
|
+
# Could sanity check the direction here by detecting case, but eh
|
77
|
+
if matches[1].upcase == matches[1] or matches[1]== '.'
|
78
|
+
current_read.direction = PileupRead::FORWARD_DIRECTION
|
79
|
+
else
|
80
|
+
current_read.direction = PileupRead::REVERSE_DIRECTION
|
81
|
+
end
|
82
|
+
end
|
70
83
|
current_read.sequence = "#{current_read.sequence}#{matches[1]}"
|
71
84
|
end
|
72
85
|
# remove the matched base
|
@@ -223,4 +223,49 @@ class TestBioPileupIterator < Test::Unit::TestCase
|
|
223
223
|
assert_equal 10, piles[0].coverage
|
224
224
|
assert_equal 10, piles[0].reads.length
|
225
225
|
end
|
226
|
+
|
227
|
+
def test_optional_mapping_quality
|
228
|
+
line = "gi|308171891|ref|NC_014551.1|\t2\tA\t2\t^:,^~,\t!!\n"
|
229
|
+
piles = Bio::DB::PileupIterator.new(line).to_a #parse, it should fail otherwise
|
230
|
+
assert_equal 2, piles[0].coverage
|
231
|
+
assert_equal 2, piles[0].reads.length
|
232
|
+
assert_equal 'A', piles[0].reads[0].sequence
|
233
|
+
assert_equal '-', piles[0].reads[0].direction
|
234
|
+
|
235
|
+
line_without_mapping_quality = "gi|308171891|ref|NC_014551.1|\t2\tA\t2\t^,^,\t!!\n"
|
236
|
+
piles = Bio::DB::PileupIterator.new(line).to_a #parse, it should fail otherwise
|
237
|
+
assert_equal 2, piles[0].coverage
|
238
|
+
assert_equal 2, piles[0].reads.length
|
239
|
+
assert_equal 'A', piles[0].reads[0].sequence
|
240
|
+
assert_equal '-', piles[0].reads[0].direction
|
241
|
+
end
|
242
|
+
|
243
|
+
def test_when_read_mapping_quality_is_dot
|
244
|
+
lines = "gi|308171891|ref|NC_014551.1|\t2\tA\t2\t^:,^~,\t!!\n"+
|
245
|
+
"gi|308171891|ref|NC_014551.1|\t3\tT\t4\t,,^!.^.,\t!!!!\n"+ # This is the line that is really being tested
|
246
|
+
"gi|308171891|ref|NC_014551.1|\t4\tT\t4\t,,.,\t!!!!\n"
|
247
|
+
piles = Bio::DB::PileupIterator.new(lines).to_a #parse, it should fail otherwise
|
248
|
+
assert_equal 2, piles[0].coverage
|
249
|
+
assert_equal 4, piles[2].coverage
|
250
|
+
assert_equal 2, piles[0].reads.length
|
251
|
+
assert_equal 'ATT', piles[0].reads[0].sequence
|
252
|
+
assert_equal '-', piles[0].reads[0].direction
|
253
|
+
end
|
254
|
+
|
255
|
+
def test_n
|
256
|
+
lines = "gi|308171891|ref|NC_014551.1|\t111565\tN\t7\taaAaAAA\t~~~~~~~\n"
|
257
|
+
piles = Bio::DB::PileupIterator.new(lines).to_a #parse, it should fail otherwise
|
258
|
+
assert_equal 1, piles.length
|
259
|
+
assert_equal 7, piles[0].coverage
|
260
|
+
assert_equal 'a', piles[0].reads[1].sequence
|
261
|
+
assert_equal 'N', piles[0].ref_base
|
262
|
+
end
|
263
|
+
|
264
|
+
def test_direction_without_starting_a_read
|
265
|
+
lines = "contig00074\t4243\tN\t8\tg$GGGgGgg\t~~~~~~~~\n"
|
266
|
+
|
267
|
+
piles = Bio::DB::PileupIterator.new(lines).to_a
|
268
|
+
assert_equal '-', piles[0].reads[0].direction
|
269
|
+
assert_equal '+', piles[0].reads[1].direction
|
270
|
+
end
|
226
271
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-pileup_iterator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-11-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bio
|
16
|
-
requirement:
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,15 @@ dependencies:
|
|
21
21
|
version: 1.4.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements:
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.4.2
|
25
30
|
- !ruby/object:Gem::Dependency
|
26
31
|
name: bio-samtools
|
27
|
-
requirement:
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
28
33
|
none: false
|
29
34
|
requirements:
|
30
35
|
- - ! '>='
|
@@ -32,10 +37,15 @@ dependencies:
|
|
32
37
|
version: 0.5.3
|
33
38
|
type: :runtime
|
34
39
|
prerelease: false
|
35
|
-
version_requirements:
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 0.5.3
|
36
46
|
- !ruby/object:Gem::Dependency
|
37
47
|
name: bio-logger
|
38
|
-
requirement:
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
39
49
|
none: false
|
40
50
|
requirements:
|
41
51
|
- - ! '>='
|
@@ -43,10 +53,15 @@ dependencies:
|
|
43
53
|
version: 1.0.0
|
44
54
|
type: :runtime
|
45
55
|
prerelease: false
|
46
|
-
version_requirements:
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 1.0.0
|
47
62
|
- !ruby/object:Gem::Dependency
|
48
63
|
name: shoulda
|
49
|
-
requirement:
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
50
65
|
none: false
|
51
66
|
requirements:
|
52
67
|
- - ! '>='
|
@@ -54,10 +69,15 @@ dependencies:
|
|
54
69
|
version: '0'
|
55
70
|
type: :development
|
56
71
|
prerelease: false
|
57
|
-
version_requirements:
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
58
78
|
- !ruby/object:Gem::Dependency
|
59
79
|
name: rdoc
|
60
|
-
requirement:
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
61
81
|
none: false
|
62
82
|
requirements:
|
63
83
|
- - ~>
|
@@ -65,10 +85,15 @@ dependencies:
|
|
65
85
|
version: '3.12'
|
66
86
|
type: :development
|
67
87
|
prerelease: false
|
68
|
-
version_requirements:
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ~>
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '3.12'
|
69
94
|
- !ruby/object:Gem::Dependency
|
70
95
|
name: bundler
|
71
|
-
requirement:
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
72
97
|
none: false
|
73
98
|
requirements:
|
74
99
|
- - ! '>='
|
@@ -76,10 +101,15 @@ dependencies:
|
|
76
101
|
version: 1.0.0
|
77
102
|
type: :development
|
78
103
|
prerelease: false
|
79
|
-
version_requirements:
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 1.0.0
|
80
110
|
- !ruby/object:Gem::Dependency
|
81
111
|
name: jeweler
|
82
|
-
requirement:
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
83
113
|
none: false
|
84
114
|
requirements:
|
85
115
|
- - ~>
|
@@ -87,10 +117,15 @@ dependencies:
|
|
87
117
|
version: 1.8.3
|
88
118
|
type: :development
|
89
119
|
prerelease: false
|
90
|
-
version_requirements:
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ~>
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: 1.8.3
|
91
126
|
- !ruby/object:Gem::Dependency
|
92
127
|
name: bio
|
93
|
-
requirement:
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
94
129
|
none: false
|
95
130
|
requirements:
|
96
131
|
- - ! '>='
|
@@ -98,10 +133,15 @@ dependencies:
|
|
98
133
|
version: 1.4.2
|
99
134
|
type: :development
|
100
135
|
prerelease: false
|
101
|
-
version_requirements:
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - ! '>='
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: 1.4.2
|
102
142
|
- !ruby/object:Gem::Dependency
|
103
143
|
name: rdoc
|
104
|
-
requirement:
|
144
|
+
requirement: !ruby/object:Gem::Requirement
|
105
145
|
none: false
|
106
146
|
requirements:
|
107
147
|
- - ~>
|
@@ -109,7 +149,12 @@ dependencies:
|
|
109
149
|
version: '3.12'
|
110
150
|
type: :development
|
111
151
|
prerelease: false
|
112
|
-
version_requirements:
|
152
|
+
version_requirements: !ruby/object:Gem::Requirement
|
153
|
+
none: false
|
154
|
+
requirements:
|
155
|
+
- - ~>
|
156
|
+
- !ruby/object:Gem::Version
|
157
|
+
version: '3.12'
|
113
158
|
description: Iterate through a samtools pileup file
|
114
159
|
email: donttrustben near gmail.com
|
115
160
|
executables: []
|
@@ -144,7 +189,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
144
189
|
version: '0'
|
145
190
|
segments:
|
146
191
|
- 0
|
147
|
-
hash:
|
192
|
+
hash: -965124785
|
148
193
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
149
194
|
none: false
|
150
195
|
requirements:
|
@@ -153,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
153
198
|
version: '0'
|
154
199
|
requirements: []
|
155
200
|
rubyforge_project:
|
156
|
-
rubygems_version: 1.8.
|
201
|
+
rubygems_version: 1.8.24
|
157
202
|
signing_key:
|
158
203
|
specification_version: 3
|
159
204
|
summary: Iterate through a samtools pileup file
|