bio-fastqc 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/VERSION +1 -1
  3. data/lib/bio/fastqc/parser.rb +71 -63
  4. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f33c80f37ab9c976d1cdfd3e12735bcc1c6216a0
4
- data.tar.gz: 1e72aa79ffbbe2e8622434b3f81333e4c222066b
3
+ metadata.gz: d5242df4f3dd1aa1468109a0345a4d23490f9831
4
+ data.tar.gz: 29387a46f7ad03e0a54d609e6c176ec35c0fdd3d
5
5
  SHA512:
6
- metadata.gz: 1c6b23deab46efbe45a64beef5575127e1edcbaae9903b30d9d7b795b3d0151ec9d11ed14bcd2dc66b31127e9f5a3abaf7ba6c843b46a4bfd86a9446d735412f
7
- data.tar.gz: 5bd1f534bb2dc492ca6d98b3d4b6f614e75cafb72c69f1b917acb4284087d456dc776eb6b944a8707c5f6f9401412a4b5903cdd8a4673dfb31ccb7fcad3617ec
6
+ metadata.gz: 2b043c738683cf72a8751c7fa17f32fae49ccfcd71e29e2f774aaf0dc2a950667fcd7eb195c19dd99055b391036a846a5d9e695bbdf8ce850a3a39ad16000587
7
+ data.tar.gz: 9d035cf0add01562bf6eb106ce32392d63b71b565a261b0746908183fb1ac96885d17144a0ad522e297771017f35815ebdadd4968cb0e7521babd10c85636a94
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.0
1
+ 0.4.1
@@ -77,135 +77,143 @@ module Bio
77
77
  end
78
78
 
79
79
  def per_base_sequence_quality
80
- node = @object.select{|a| a.first.first == ">>Per base sequence quality" }
81
- node.first.select{|n| n.first != ">>Per base sequence quality" }
80
+ node = @object.select{|a| a.first.first == ">>Per base sequence quality" }.first
81
+ node.select{|n| n.first != ">>Per base sequence quality" } if node
82
82
  end
83
83
 
84
84
  ## Custom module: overall mean base call quality indicator
85
85
  def overall_mean_quality_score
86
86
  per_base = self.per_base_sequence_quality
87
- v = per_base.map{|c| (10**(c[1].to_f/-10)).to_f }
88
- -10 * Math.log10(v.reduce(:+) / v.size)
87
+ if per_base
88
+ v = per_base.map{|c| (10**(c[1].to_f/-10)).to_f }
89
+ -10 * Math.log10(v.reduce(:+) / v.size)
90
+ end
89
91
  end
90
92
 
91
93
  ## Custom module: overall median base call quality indicator
92
94
  def overall_median_quality_score
93
95
  per_base = self.per_base_sequence_quality
94
- v = per_base.map{|c| (10**(c[2].to_f/-10)).to_f }
95
- -10 * Math.log10(v.reduce(:+) / v.size)
96
+ if per_base
97
+ v = per_base.map{|c| (10**(c[2].to_f/-10)).to_f }
98
+ -10 * Math.log10(v.reduce(:+) / v.size)
99
+ end
96
100
  end
97
101
 
98
102
  def per_tile_sequence_quality
99
- node = @object.select{|a| a.first.first == ">>Per tile sequence quality" }
100
- node.first.select{|n| n.first != ">>Per tile sequence quality" }
101
- rescue
102
- []
103
+ node = @object.select{|a| a.first.first == ">>Per tile sequence quality" }.first
104
+ node.select{|n| n.first != ">>Per tile sequence quality" } if node
103
105
  end
104
106
 
105
107
  def per_sequence_quality_scores
106
- node = @object.select{|a| a.first.first == ">>Per sequence quality scores" }
107
- node.first.select{|n| n.first != ">>Per sequence quality scores" }
108
+ node = @object.select{|a| a.first.first == ">>Per sequence quality scores" }.first
109
+ node.select{|n| n.first != ">>Per sequence quality scores" } if node
108
110
  end
109
111
 
110
112
  def per_base_sequence_content
111
- node = @object.select{|a| a.first.first == ">>Per base sequence content" }
112
- node.first.select{|n| n.first != ">>Per base sequence content" }
113
+ node = @object.select{|a| a.first.first == ">>Per base sequence content" }.first
114
+ node.select{|n| n.first != ">>Per base sequence content" } if node
113
115
  end
114
116
 
115
117
  def per_sequence_gc_content
116
- node = @object.select{|a| a.first.first == ">>Per sequence GC content" }
117
- node.first.select{|n| n.first != ">>Per sequence GC content" }
118
+ node = @object.select{|a| a.first.first == ">>Per sequence GC content" }.first
119
+ node.select{|n| n.first != ">>Per sequence GC content" } if node
118
120
  end
119
121
 
120
122
  def per_sequence_gc_content
121
- node = @object.select{|a| a.first.first == ">>Per sequence GC content" }
122
- node.first.select{|n| n.first != ">>Per sequence GC content" }
123
+ node = @object.select{|a| a.first.first == ">>Per sequence GC content" }.first
124
+ node.select{|n| n.first != ">>Per sequence GC content" } if node
123
125
  end
124
126
 
125
127
  def per_base_n_content
126
- node = @object.select{|a| a.first.first == ">>Per base N content" }
127
- node.first.select{|n| n.first != ">>Per base N content" }
128
+ node = @object.select{|a| a.first.first == ">>Per base N content" }.first
129
+ node.select{|n| n.first != ">>Per base N content" } if node
128
130
  end
129
131
 
130
132
  ## Custom module: overall N content
131
133
  def overall_n_content
132
134
  per_base = self.per_base_n_content
133
- v = per_base.map{|c| c[1].to_f }
134
- v.reduce(:+) / v.size
135
+ if per_base
136
+ v = per_base.map{|c| c[1].to_f }
137
+ v.reduce(:+) / v.size
138
+ end
135
139
  end
136
140
 
137
141
  def sequence_length_distribution
138
- node = @object.select{|a| a.first.first == ">>Sequence Length Distribution" }
139
- node.first.select{|n| n.first != ">>Sequence Length Distribution" }
142
+ node = @object.select{|a| a.first.first == ">>Sequence Length Distribution" }.first
143
+ node.select{|n| n.first != ">>Sequence Length Distribution" } if node
140
144
  end
141
145
 
142
146
  ## Custom module: mean sequence length calculated from distribution
143
147
  def mean_sequence_length
144
148
  distribution = self.sequence_length_distribution
145
- sum = distribution.map do |length_count|
146
- length = length_count[0]
147
- count = length_count[1].to_f
148
- if length =~ /\d-\d/
149
- f = length.sub(/-\d+$/,"").to_i
150
- b = length.sub(/^\d+-/,"").to_i
151
- mean = (f + b) / 2
152
- mean * count
153
- else
154
- length.to_i * count
149
+ if distribution
150
+ sum = distribution.map do |length_count|
151
+ length = length_count[0]
152
+ count = length_count[1].to_f
153
+ if length =~ /\d-\d/
154
+ f = length.sub(/-\d+$/,"").to_i
155
+ b = length.sub(/^\d+-/,"").to_i
156
+ mean = (f + b) / 2
157
+ mean * count
158
+ else
159
+ length.to_i * count
160
+ end
155
161
  end
162
+ sum.reduce(:+) / self.total_sequences
156
163
  end
157
- sum.reduce(:+) / self.total_sequences
158
164
  end
159
165
 
160
166
  ## Custom module: median sequence length calculated from distribution
161
167
  def median_sequence_length
162
168
  distribution = self.sequence_length_distribution
163
- array = distribution.map do |length_count|
164
- length = length_count[0]
165
- count = length_count[1].to_i
166
- if length =~ /\d-\d/
167
- f = length.sub(/-\d+$/,"").to_i
168
- b = length.sub(/^\d+-/,"").to_i
169
- mean = (f + b) / 2
170
- [mean.to_f] * count
169
+ if distribution
170
+ array = distribution.map do |length_count|
171
+ length = length_count[0]
172
+ count = length_count[1].to_i
173
+ if length =~ /\d-\d/
174
+ f = length.sub(/-\d+$/,"").to_i
175
+ b = length.sub(/^\d+-/,"").to_i
176
+ mean = (f + b) / 2
177
+ [mean.to_f] * count
178
+ else
179
+ [length.to_f] * count
180
+ end
181
+ end
182
+ sorted = array.flatten.sort
183
+ quot = sorted.size / 2
184
+ if !sorted.size.even?
185
+ sorted[quot]
171
186
  else
172
- [length.to_f] * count
187
+ f = sorted[quot]
188
+ b = sorted[quot - 1]
189
+ (f + b) / 2
173
190
  end
174
191
  end
175
- sorted = array.flatten.sort
176
- quot = sorted.size / 2
177
- if !sorted.size.even?
178
- sorted[quot]
179
- else
180
- f = sorted[quot]
181
- b = sorted[quot - 1]
182
- (f + b) / 2
183
- end
184
192
  end
185
193
 
186
194
  def sequence_duplication_levels
187
- node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }
188
- node.first.select{|n| n.first != ">>Sequence Duplication Levels" && n.first != "\#Total Duplicate Percentage" }
195
+ node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }.first
196
+ node.select{|n| n.first != ">>Sequence Duplication Levels" && n.first != "\#Total Duplicate Percentage" } if node
189
197
  end
190
198
 
191
199
  def total_duplicate_percentage
192
- node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }
193
- node.first.select{|n| n.first == "\#Total Duplicate Percentage" }.flatten[1].to_f
200
+ node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }.first
201
+ node.select{|n| n.first == "\#Total Duplicate Percentage" }.flatten[1].to_f if node
194
202
  end
195
203
 
196
204
  def overrepresented_sequences
197
- node = @object.select{|a| a.first.first == ">>Overrepresented sequences" }
198
- node.first.select{|n| n.first != ">>Overrepresented sequences" }
205
+ node = @object.select{|a| a.first.first == ">>Overrepresented sequences" }.first
206
+ node.select{|n| n.first != ">>Overrepresented sequences" } if node
199
207
  end
200
208
 
201
209
  def adapter_content
202
- node = @object.select{|a| a.first.first == ">>Adapter Content" }
203
- node.first.select{|n| n.first != ">>Adapter Content" }
210
+ node = @object.select{|a| a.first.first == ">>Adapter Content" }.first
211
+ node.select{|n| n.first != ">>Adapter Content" } if node
204
212
  end
205
213
 
206
214
  def kmer_content
207
- node = @object.select{|a| a.first.first == ">>Kmer Content" }
208
- node.first.select{|n| n.first != ">>Kmer Content" }
215
+ node = @object.select{|a| a.first.first == ">>Kmer Content" }.first
216
+ node.select{|n| n.first != ">>Kmer Content" } if node
209
217
  end
210
218
 
211
219
  def summary
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-fastqc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tazro Inutano Ohta