bio-fastqc 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/bio/fastqc/parser.rb +71 -63
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d5242df4f3dd1aa1468109a0345a4d23490f9831
|
4
|
+
data.tar.gz: 29387a46f7ad03e0a54d609e6c176ec35c0fdd3d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2b043c738683cf72a8751c7fa17f32fae49ccfcd71e29e2f774aaf0dc2a950667fcd7eb195c19dd99055b391036a846a5d9e695bbdf8ce850a3a39ad16000587
|
7
|
+
data.tar.gz: 9d035cf0add01562bf6eb106ce32392d63b71b565a261b0746908183fb1ac96885d17144a0ad522e297771017f35815ebdadd4968cb0e7521babd10c85636a94
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.1
|
data/lib/bio/fastqc/parser.rb
CHANGED
@@ -77,135 +77,143 @@ module Bio
|
|
77
77
|
end
|
78
78
|
|
79
79
|
def per_base_sequence_quality
|
80
|
-
node = @object.select{|a| a.first.first == ">>Per base sequence quality" }
|
81
|
-
node.
|
80
|
+
node = @object.select{|a| a.first.first == ">>Per base sequence quality" }.first
|
81
|
+
node.select{|n| n.first != ">>Per base sequence quality" } if node
|
82
82
|
end
|
83
83
|
|
84
84
|
## Custom module: overall mean base call quality indicator
|
85
85
|
def overall_mean_quality_score
|
86
86
|
per_base = self.per_base_sequence_quality
|
87
|
-
|
88
|
-
|
87
|
+
if per_base
|
88
|
+
v = per_base.map{|c| (10**(c[1].to_f/-10)).to_f }
|
89
|
+
-10 * Math.log10(v.reduce(:+) / v.size)
|
90
|
+
end
|
89
91
|
end
|
90
92
|
|
91
93
|
## Custom module: overall median base call quality indicator
|
92
94
|
def overall_median_quality_score
|
93
95
|
per_base = self.per_base_sequence_quality
|
94
|
-
|
95
|
-
|
96
|
+
if per_base
|
97
|
+
v = per_base.map{|c| (10**(c[2].to_f/-10)).to_f }
|
98
|
+
-10 * Math.log10(v.reduce(:+) / v.size)
|
99
|
+
end
|
96
100
|
end
|
97
101
|
|
98
102
|
def per_tile_sequence_quality
|
99
|
-
node = @object.select{|a| a.first.first == ">>Per tile sequence quality" }
|
100
|
-
node.
|
101
|
-
rescue
|
102
|
-
[]
|
103
|
+
node = @object.select{|a| a.first.first == ">>Per tile sequence quality" }.first
|
104
|
+
node.select{|n| n.first != ">>Per tile sequence quality" } if node
|
103
105
|
end
|
104
106
|
|
105
107
|
def per_sequence_quality_scores
|
106
|
-
node = @object.select{|a| a.first.first == ">>Per sequence quality scores" }
|
107
|
-
node.
|
108
|
+
node = @object.select{|a| a.first.first == ">>Per sequence quality scores" }.first
|
109
|
+
node.select{|n| n.first != ">>Per sequence quality scores" } if node
|
108
110
|
end
|
109
111
|
|
110
112
|
def per_base_sequence_content
|
111
|
-
node = @object.select{|a| a.first.first == ">>Per base sequence content" }
|
112
|
-
node.
|
113
|
+
node = @object.select{|a| a.first.first == ">>Per base sequence content" }.first
|
114
|
+
node.select{|n| n.first != ">>Per base sequence content" } if node
|
113
115
|
end
|
114
116
|
|
115
117
|
def per_sequence_gc_content
|
116
|
-
node = @object.select{|a| a.first.first == ">>Per sequence GC content" }
|
117
|
-
node.
|
118
|
+
node = @object.select{|a| a.first.first == ">>Per sequence GC content" }.first
|
119
|
+
node.select{|n| n.first != ">>Per sequence GC content" } if node
|
118
120
|
end
|
119
121
|
|
120
122
|
def per_sequence_gc_content
|
121
|
-
node = @object.select{|a| a.first.first == ">>Per sequence GC content" }
|
122
|
-
node.
|
123
|
+
node = @object.select{|a| a.first.first == ">>Per sequence GC content" }.first
|
124
|
+
node.select{|n| n.first != ">>Per sequence GC content" } if node
|
123
125
|
end
|
124
126
|
|
125
127
|
def per_base_n_content
|
126
|
-
node = @object.select{|a| a.first.first == ">>Per base N content" }
|
127
|
-
node.
|
128
|
+
node = @object.select{|a| a.first.first == ">>Per base N content" }.first
|
129
|
+
node.select{|n| n.first != ">>Per base N content" } if node
|
128
130
|
end
|
129
131
|
|
130
132
|
## Custom module: overall N content
|
131
133
|
def overall_n_content
|
132
134
|
per_base = self.per_base_n_content
|
133
|
-
|
134
|
-
|
135
|
+
if per_base
|
136
|
+
v = per_base.map{|c| c[1].to_f }
|
137
|
+
v.reduce(:+) / v.size
|
138
|
+
end
|
135
139
|
end
|
136
140
|
|
137
141
|
def sequence_length_distribution
|
138
|
-
node = @object.select{|a| a.first.first == ">>Sequence Length Distribution" }
|
139
|
-
node.
|
142
|
+
node = @object.select{|a| a.first.first == ">>Sequence Length Distribution" }.first
|
143
|
+
node.select{|n| n.first != ">>Sequence Length Distribution" } if node
|
140
144
|
end
|
141
145
|
|
142
146
|
## Custom module: mean sequence length calculated from distribution
|
143
147
|
def mean_sequence_length
|
144
148
|
distribution = self.sequence_length_distribution
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
149
|
+
if distribution
|
150
|
+
sum = distribution.map do |length_count|
|
151
|
+
length = length_count[0]
|
152
|
+
count = length_count[1].to_f
|
153
|
+
if length =~ /\d-\d/
|
154
|
+
f = length.sub(/-\d+$/,"").to_i
|
155
|
+
b = length.sub(/^\d+-/,"").to_i
|
156
|
+
mean = (f + b) / 2
|
157
|
+
mean * count
|
158
|
+
else
|
159
|
+
length.to_i * count
|
160
|
+
end
|
155
161
|
end
|
162
|
+
sum.reduce(:+) / self.total_sequences
|
156
163
|
end
|
157
|
-
sum.reduce(:+) / self.total_sequences
|
158
164
|
end
|
159
165
|
|
160
166
|
## Custom module: median sequence length calculated from distribution
|
161
167
|
def median_sequence_length
|
162
168
|
distribution = self.sequence_length_distribution
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
169
|
+
if distribution
|
170
|
+
array = distribution.map do |length_count|
|
171
|
+
length = length_count[0]
|
172
|
+
count = length_count[1].to_i
|
173
|
+
if length =~ /\d-\d/
|
174
|
+
f = length.sub(/-\d+$/,"").to_i
|
175
|
+
b = length.sub(/^\d+-/,"").to_i
|
176
|
+
mean = (f + b) / 2
|
177
|
+
[mean.to_f] * count
|
178
|
+
else
|
179
|
+
[length.to_f] * count
|
180
|
+
end
|
181
|
+
end
|
182
|
+
sorted = array.flatten.sort
|
183
|
+
quot = sorted.size / 2
|
184
|
+
if !sorted.size.even?
|
185
|
+
sorted[quot]
|
171
186
|
else
|
172
|
-
[
|
187
|
+
f = sorted[quot]
|
188
|
+
b = sorted[quot - 1]
|
189
|
+
(f + b) / 2
|
173
190
|
end
|
174
191
|
end
|
175
|
-
sorted = array.flatten.sort
|
176
|
-
quot = sorted.size / 2
|
177
|
-
if !sorted.size.even?
|
178
|
-
sorted[quot]
|
179
|
-
else
|
180
|
-
f = sorted[quot]
|
181
|
-
b = sorted[quot - 1]
|
182
|
-
(f + b) / 2
|
183
|
-
end
|
184
192
|
end
|
185
193
|
|
186
194
|
def sequence_duplication_levels
|
187
|
-
node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }
|
188
|
-
node.
|
195
|
+
node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }.first
|
196
|
+
node.select{|n| n.first != ">>Sequence Duplication Levels" && n.first != "\#Total Duplicate Percentage" } if node
|
189
197
|
end
|
190
198
|
|
191
199
|
def total_duplicate_percentage
|
192
|
-
node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }
|
193
|
-
node.
|
200
|
+
node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }.first
|
201
|
+
node.select{|n| n.first == "\#Total Duplicate Percentage" }.flatten[1].to_f if node
|
194
202
|
end
|
195
203
|
|
196
204
|
def overrepresented_sequences
|
197
|
-
node = @object.select{|a| a.first.first == ">>Overrepresented sequences" }
|
198
|
-
node.
|
205
|
+
node = @object.select{|a| a.first.first == ">>Overrepresented sequences" }.first
|
206
|
+
node.select{|n| n.first != ">>Overrepresented sequences" } if node
|
199
207
|
end
|
200
208
|
|
201
209
|
def adapter_content
|
202
|
-
node = @object.select{|a| a.first.first == ">>Adapter Content" }
|
203
|
-
node.
|
210
|
+
node = @object.select{|a| a.first.first == ">>Adapter Content" }.first
|
211
|
+
node.select{|n| n.first != ">>Adapter Content" } if node
|
204
212
|
end
|
205
213
|
|
206
214
|
def kmer_content
|
207
|
-
node = @object.select{|a| a.first.first == ">>Kmer Content" }
|
208
|
-
node.
|
215
|
+
node = @object.select{|a| a.first.first == ">>Kmer Content" }.first
|
216
|
+
node.select{|n| n.first != ">>Kmer Content" } if node
|
209
217
|
end
|
210
218
|
|
211
219
|
def summary
|