bio-fastqc 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/bio/fastqc/parser.rb +71 -63
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d5242df4f3dd1aa1468109a0345a4d23490f9831
|
4
|
+
data.tar.gz: 29387a46f7ad03e0a54d609e6c176ec35c0fdd3d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2b043c738683cf72a8751c7fa17f32fae49ccfcd71e29e2f774aaf0dc2a950667fcd7eb195c19dd99055b391036a846a5d9e695bbdf8ce850a3a39ad16000587
|
7
|
+
data.tar.gz: 9d035cf0add01562bf6eb106ce32392d63b71b565a261b0746908183fb1ac96885d17144a0ad522e297771017f35815ebdadd4968cb0e7521babd10c85636a94
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.1
|
data/lib/bio/fastqc/parser.rb
CHANGED
@@ -77,135 +77,143 @@ module Bio
|
|
77
77
|
end
|
78
78
|
|
79
79
|
def per_base_sequence_quality
|
80
|
-
node = @object.select{|a| a.first.first == ">>Per base sequence quality" }
|
81
|
-
node.
|
80
|
+
node = @object.select{|a| a.first.first == ">>Per base sequence quality" }.first
|
81
|
+
node.select{|n| n.first != ">>Per base sequence quality" } if node
|
82
82
|
end
|
83
83
|
|
84
84
|
## Custom module: overall mean base call quality indicator
|
85
85
|
def overall_mean_quality_score
|
86
86
|
per_base = self.per_base_sequence_quality
|
87
|
-
|
88
|
-
|
87
|
+
if per_base
|
88
|
+
v = per_base.map{|c| (10**(c[1].to_f/-10)).to_f }
|
89
|
+
-10 * Math.log10(v.reduce(:+) / v.size)
|
90
|
+
end
|
89
91
|
end
|
90
92
|
|
91
93
|
## Custom module: overall median base call quality indicator
|
92
94
|
def overall_median_quality_score
|
93
95
|
per_base = self.per_base_sequence_quality
|
94
|
-
|
95
|
-
|
96
|
+
if per_base
|
97
|
+
v = per_base.map{|c| (10**(c[2].to_f/-10)).to_f }
|
98
|
+
-10 * Math.log10(v.reduce(:+) / v.size)
|
99
|
+
end
|
96
100
|
end
|
97
101
|
|
98
102
|
def per_tile_sequence_quality
|
99
|
-
node = @object.select{|a| a.first.first == ">>Per tile sequence quality" }
|
100
|
-
node.
|
101
|
-
rescue
|
102
|
-
[]
|
103
|
+
node = @object.select{|a| a.first.first == ">>Per tile sequence quality" }.first
|
104
|
+
node.select{|n| n.first != ">>Per tile sequence quality" } if node
|
103
105
|
end
|
104
106
|
|
105
107
|
def per_sequence_quality_scores
|
106
|
-
node = @object.select{|a| a.first.first == ">>Per sequence quality scores" }
|
107
|
-
node.
|
108
|
+
node = @object.select{|a| a.first.first == ">>Per sequence quality scores" }.first
|
109
|
+
node.select{|n| n.first != ">>Per sequence quality scores" } if node
|
108
110
|
end
|
109
111
|
|
110
112
|
def per_base_sequence_content
|
111
|
-
node = @object.select{|a| a.first.first == ">>Per base sequence content" }
|
112
|
-
node.
|
113
|
+
node = @object.select{|a| a.first.first == ">>Per base sequence content" }.first
|
114
|
+
node.select{|n| n.first != ">>Per base sequence content" } if node
|
113
115
|
end
|
114
116
|
|
115
117
|
def per_sequence_gc_content
|
116
|
-
node = @object.select{|a| a.first.first == ">>Per sequence GC content" }
|
117
|
-
node.
|
118
|
+
node = @object.select{|a| a.first.first == ">>Per sequence GC content" }.first
|
119
|
+
node.select{|n| n.first != ">>Per sequence GC content" } if node
|
118
120
|
end
|
119
121
|
|
120
122
|
def per_sequence_gc_content
|
121
|
-
node = @object.select{|a| a.first.first == ">>Per sequence GC content" }
|
122
|
-
node.
|
123
|
+
node = @object.select{|a| a.first.first == ">>Per sequence GC content" }.first
|
124
|
+
node.select{|n| n.first != ">>Per sequence GC content" } if node
|
123
125
|
end
|
124
126
|
|
125
127
|
def per_base_n_content
|
126
|
-
node = @object.select{|a| a.first.first == ">>Per base N content" }
|
127
|
-
node.
|
128
|
+
node = @object.select{|a| a.first.first == ">>Per base N content" }.first
|
129
|
+
node.select{|n| n.first != ">>Per base N content" } if node
|
128
130
|
end
|
129
131
|
|
130
132
|
## Custom module: overall N content
|
131
133
|
def overall_n_content
|
132
134
|
per_base = self.per_base_n_content
|
133
|
-
|
134
|
-
|
135
|
+
if per_base
|
136
|
+
v = per_base.map{|c| c[1].to_f }
|
137
|
+
v.reduce(:+) / v.size
|
138
|
+
end
|
135
139
|
end
|
136
140
|
|
137
141
|
def sequence_length_distribution
|
138
|
-
node = @object.select{|a| a.first.first == ">>Sequence Length Distribution" }
|
139
|
-
node.
|
142
|
+
node = @object.select{|a| a.first.first == ">>Sequence Length Distribution" }.first
|
143
|
+
node.select{|n| n.first != ">>Sequence Length Distribution" } if node
|
140
144
|
end
|
141
145
|
|
142
146
|
## Custom module: mean sequence length calculated from distribution
|
143
147
|
def mean_sequence_length
|
144
148
|
distribution = self.sequence_length_distribution
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
149
|
+
if distribution
|
150
|
+
sum = distribution.map do |length_count|
|
151
|
+
length = length_count[0]
|
152
|
+
count = length_count[1].to_f
|
153
|
+
if length =~ /\d-\d/
|
154
|
+
f = length.sub(/-\d+$/,"").to_i
|
155
|
+
b = length.sub(/^\d+-/,"").to_i
|
156
|
+
mean = (f + b) / 2
|
157
|
+
mean * count
|
158
|
+
else
|
159
|
+
length.to_i * count
|
160
|
+
end
|
155
161
|
end
|
162
|
+
sum.reduce(:+) / self.total_sequences
|
156
163
|
end
|
157
|
-
sum.reduce(:+) / self.total_sequences
|
158
164
|
end
|
159
165
|
|
160
166
|
## Custom module: median sequence length calculated from distribution
|
161
167
|
def median_sequence_length
|
162
168
|
distribution = self.sequence_length_distribution
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
169
|
+
if distribution
|
170
|
+
array = distribution.map do |length_count|
|
171
|
+
length = length_count[0]
|
172
|
+
count = length_count[1].to_i
|
173
|
+
if length =~ /\d-\d/
|
174
|
+
f = length.sub(/-\d+$/,"").to_i
|
175
|
+
b = length.sub(/^\d+-/,"").to_i
|
176
|
+
mean = (f + b) / 2
|
177
|
+
[mean.to_f] * count
|
178
|
+
else
|
179
|
+
[length.to_f] * count
|
180
|
+
end
|
181
|
+
end
|
182
|
+
sorted = array.flatten.sort
|
183
|
+
quot = sorted.size / 2
|
184
|
+
if !sorted.size.even?
|
185
|
+
sorted[quot]
|
171
186
|
else
|
172
|
-
[
|
187
|
+
f = sorted[quot]
|
188
|
+
b = sorted[quot - 1]
|
189
|
+
(f + b) / 2
|
173
190
|
end
|
174
191
|
end
|
175
|
-
sorted = array.flatten.sort
|
176
|
-
quot = sorted.size / 2
|
177
|
-
if !sorted.size.even?
|
178
|
-
sorted[quot]
|
179
|
-
else
|
180
|
-
f = sorted[quot]
|
181
|
-
b = sorted[quot - 1]
|
182
|
-
(f + b) / 2
|
183
|
-
end
|
184
192
|
end
|
185
193
|
|
186
194
|
def sequence_duplication_levels
|
187
|
-
node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }
|
188
|
-
node.
|
195
|
+
node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }.first
|
196
|
+
node.select{|n| n.first != ">>Sequence Duplication Levels" && n.first != "\#Total Duplicate Percentage" } if node
|
189
197
|
end
|
190
198
|
|
191
199
|
def total_duplicate_percentage
|
192
|
-
node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }
|
193
|
-
node.
|
200
|
+
node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }.first
|
201
|
+
node.select{|n| n.first == "\#Total Duplicate Percentage" }.flatten[1].to_f if node
|
194
202
|
end
|
195
203
|
|
196
204
|
def overrepresented_sequences
|
197
|
-
node = @object.select{|a| a.first.first == ">>Overrepresented sequences" }
|
198
|
-
node.
|
205
|
+
node = @object.select{|a| a.first.first == ">>Overrepresented sequences" }.first
|
206
|
+
node.select{|n| n.first != ">>Overrepresented sequences" } if node
|
199
207
|
end
|
200
208
|
|
201
209
|
def adapter_content
|
202
|
-
node = @object.select{|a| a.first.first == ">>Adapter Content" }
|
203
|
-
node.
|
210
|
+
node = @object.select{|a| a.first.first == ">>Adapter Content" }.first
|
211
|
+
node.select{|n| n.first != ">>Adapter Content" } if node
|
204
212
|
end
|
205
213
|
|
206
214
|
def kmer_content
|
207
|
-
node = @object.select{|a| a.first.first == ">>Kmer Content" }
|
208
|
-
node.
|
215
|
+
node = @object.select{|a| a.first.first == ">>Kmer Content" }.first
|
216
|
+
node.select{|n| n.first != ">>Kmer Content" } if node
|
209
217
|
end
|
210
218
|
|
211
219
|
def summary
|