bio-fastqc 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -8
- data/Gemfile +1 -1
- data/README.md +5 -1
- data/VERSION +1 -1
- data/lib/bio/fastqc/data.rb +23 -23
- data/lib/bio/fastqc/parser.rb +224 -213
- data/spec/bio-fastqc_spec.rb +288 -9
- data/spec/spec_helper.rb +12 -0
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 47531c30911b727ad8b3bdc50e26ca8a3a6467e1
|
4
|
+
data.tar.gz: 68bfdb57eac8779b634abcce1bed1cd40986a602
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b5b9b66d01c11669db71c96eb3c7e87c74e7ab45b55605481b6d75b14f5c06b7bb2813dff5d20833068d17bf7246aef663356523b46278621b863675ebc9186e
|
7
|
+
data.tar.gz: e81822150a02e8e7634e0a5942fe0806297c5a86211dc2bb5609d2504b47d64e77c4e7122526069037e6af1166857ba1934444fb51aa4e241690f598e9afc051
|
data/.travis.yml
CHANGED
@@ -1,13 +1,6 @@
|
|
1
1
|
language: ruby
|
2
2
|
rvm:
|
3
|
-
-
|
4
|
-
- 1.9.3
|
5
|
-
- jruby-19mode # JRuby in 1.9 mode
|
6
|
-
|
7
|
-
# - rbx-19mode
|
8
|
-
# - 1.8.7
|
9
|
-
# - jruby-18mode # JRuby in 1.8 mode
|
10
|
-
# - rbx-18mode
|
3
|
+
- 2.2.1
|
11
4
|
|
12
5
|
# uncomment this line if your project needs to run something other than `rake`:
|
13
6
|
# script: bundle exec rspec spec
|
data/Gemfile
CHANGED
@@ -9,7 +9,7 @@ gem 'thor', "~> 0.19.1"
|
|
9
9
|
# Add dependencies to develop your gem here.
|
10
10
|
# Include everything needed to run rake, tests, features, etc.
|
11
11
|
group :development do
|
12
|
-
gem 'bundler', '
|
12
|
+
gem 'bundler', '>= 1.8.0'
|
13
13
|
gem 'rake', '~> 10.0'
|
14
14
|
gem 'rspec', '~> 3.3'
|
15
15
|
gem 'jeweler', '~> 2.0'
|
data/README.md
CHANGED
@@ -2,7 +2,11 @@
|
|
2
2
|
|
3
3
|
[](http://travis-ci.org/inutano/bioruby-fastqc)
|
4
4
|
|
5
|
-
A ruby parser for [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) data.
|
5
|
+
A ruby parser for [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) data.
|
6
|
+
|
7
|
+
## Reqruirements
|
8
|
+
|
9
|
+
Ruby 2.0 or later
|
6
10
|
|
7
11
|
## Installation
|
8
12
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.1
|
data/lib/bio/fastqc/data.rb
CHANGED
@@ -4,33 +4,33 @@ require 'zip'
|
|
4
4
|
|
5
5
|
module Bio
|
6
6
|
module FastQC
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
7
|
+
class Data
|
8
|
+
class << self
|
9
|
+
def read(file)
|
10
|
+
read_zipfile(file)
|
11
|
+
rescue Zip::Error
|
12
|
+
read_flatfile(file)
|
13
13
|
rescue Errno::EISDIR
|
14
14
|
read_dir(file)
|
15
|
-
|
15
|
+
end
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
17
|
+
def read_zipfile(file)
|
18
|
+
Zip::File.open(file) do |zipfile|
|
19
|
+
zipfile.glob('*/fastqc_data.txt').first.get_input_stream.read
|
20
|
+
end
|
21
|
+
end
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
23
|
+
def read_flatfile(file)
|
24
|
+
open(file).read
|
25
|
+
end
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
27
|
+
def read_dir(file)
|
28
|
+
open(File.join(file, "fastqc_data.txt")).read
|
29
|
+
rescue Errno::ENOENT
|
30
|
+
puts "FastQC data file fastqc_data.txt not found"
|
31
|
+
exit
|
32
|
+
end
|
33
33
|
end
|
34
|
-
|
35
|
-
|
34
|
+
end
|
35
|
+
end
|
36
36
|
end
|
data/lib/bio/fastqc/parser.rb
CHANGED
@@ -2,94 +2,98 @@
|
|
2
2
|
|
3
3
|
module Bio
|
4
4
|
module FastQC
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
5
|
+
class Parser
|
6
|
+
def initialize(fastqc_data_txt)
|
7
|
+
@data = fastqc_data_txt
|
8
|
+
@object = parse(@data)
|
9
|
+
@base = self.basic_statistics
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse(data)
|
13
|
+
modules = data.split(">>END_MODULE\n")
|
14
|
+
modules.map do |node|
|
15
|
+
lines = node.split("\n")
|
16
|
+
rm_header = lines.map do |line|
|
17
|
+
if line !~ /^\#/ || line =~ /^#Total Duplicate Percentage/
|
18
|
+
line.split("\t")
|
19
|
+
end
|
20
|
+
end
|
21
|
+
rm_header.compact
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def fastqc_version
|
26
|
+
@data.split("\n").first.split("\t").last
|
27
|
+
end
|
28
|
+
|
29
|
+
def basic_statistics
|
30
|
+
Hash[*@object.select{|a| a.first.first == ">>Basic Statistics" }.flatten]
|
31
|
+
end
|
32
|
+
|
33
|
+
def filename
|
34
|
+
@base["Filename"]
|
35
|
+
end
|
36
|
+
|
37
|
+
def file_type
|
38
|
+
@base["File type"]
|
39
|
+
end
|
40
|
+
|
41
|
+
def encoding
|
42
|
+
@base["Encoding"]
|
43
|
+
end
|
44
|
+
|
45
|
+
def total_sequences
|
46
|
+
@base["Total Sequences"].to_i
|
47
|
+
end
|
48
|
+
|
49
|
+
def filtered_sequences
|
50
|
+
@base["Filtered Sequences"].to_i
|
51
|
+
end
|
52
|
+
|
53
|
+
def sequence_length
|
54
|
+
@base["Sequence length"]
|
55
|
+
end
|
56
|
+
|
57
|
+
def min_length
|
58
|
+
l = @base["Sequence length"]
|
59
|
+
if l =~ /\d-\d/
|
60
|
+
l.sub(/-\d+$/,"").to_i
|
61
|
+
else
|
62
|
+
l.to_i
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def max_length
|
67
|
+
l = @base["Sequence length"]
|
68
|
+
if l =~ /\d-\d/
|
69
|
+
l.sub(/^\d+-/,"").to_i
|
70
|
+
else
|
71
|
+
l.to_i
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def percent_gc
|
76
|
+
@base["%GC"].to_i
|
77
|
+
end
|
78
|
+
|
79
|
+
def per_base_sequence_quality
|
80
|
+
node = @object.select{|a| a.first.first == ">>Per base sequence quality" }
|
81
|
+
node.first.select{|n| n.first != ">>Per base sequence quality" }
|
82
|
+
end
|
83
|
+
|
84
|
+
## Custom module: overall mean base call quality indicator
|
85
|
+
def overall_mean_quality_score
|
86
|
+
per_base = self.per_base_sequence_quality
|
87
|
+
v = per_base.map{|c| (10**(c[1].to_f/-10)).to_f }
|
88
|
+
-10 * Math.log10(v.reduce(:+) / v.size)
|
89
|
+
end
|
86
90
|
|
87
91
|
## Custom module: overall median base call quality indicator
|
88
|
-
|
89
|
-
|
92
|
+
def overall_median_quality_score
|
93
|
+
per_base = self.per_base_sequence_quality
|
90
94
|
v = per_base.map{|c| (10**(c[2].to_f/-10)).to_f }
|
91
|
-
|
92
|
-
|
95
|
+
-10 * Math.log10(v.reduce(:+) / v.size)
|
96
|
+
end
|
93
97
|
|
94
98
|
def per_tile_sequence_quality
|
95
99
|
node = @object.select{|a| a.first.first == ">>Per tile sequence quality" }
|
@@ -98,135 +102,142 @@ module Bio
|
|
98
102
|
[]
|
99
103
|
end
|
100
104
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
105
|
+
def per_sequence_quality_scores
|
106
|
+
node = @object.select{|a| a.first.first == ">>Per sequence quality scores" }
|
107
|
+
node.first.select{|n| n.first != ">>Per sequence quality scores" }
|
108
|
+
end
|
109
|
+
|
110
|
+
def per_base_sequence_content
|
111
|
+
node = @object.select{|a| a.first.first == ">>Per base sequence content" }
|
112
|
+
node.first.select{|n| n.first != ">>Per base sequence content" }
|
113
|
+
end
|
114
|
+
|
115
|
+
def per_sequence_gc_content
|
116
|
+
node = @object.select{|a| a.first.first == ">>Per sequence GC content" }
|
117
|
+
node.first.select{|n| n.first != ">>Per sequence GC content" }
|
118
|
+
end
|
119
|
+
|
120
|
+
def per_sequence_gc_content
|
121
|
+
node = @object.select{|a| a.first.first == ">>Per sequence GC content" }
|
122
|
+
node.first.select{|n| n.first != ">>Per sequence GC content" }
|
123
|
+
end
|
124
|
+
|
125
|
+
def per_base_n_content
|
126
|
+
node = @object.select{|a| a.first.first == ">>Per base N content" }
|
127
|
+
node.first.select{|n| n.first != ">>Per base N content" }
|
128
|
+
end
|
129
|
+
|
130
|
+
## Custom module: overall N content
|
131
|
+
def overall_n_content
|
132
|
+
per_base = self.per_base_n_content
|
133
|
+
v = per_base.map{|c| c[1].to_f }
|
134
|
+
v.reduce(:+) / v.size
|
135
|
+
end
|
136
|
+
|
137
|
+
def sequence_length_distribution
|
138
|
+
node = @object.select{|a| a.first.first == ">>Sequence Length Distribution" }
|
139
|
+
node.first.select{|n| n.first != ">>Sequence Length Distribution" }
|
140
|
+
end
|
141
|
+
|
142
|
+
## Custom module: mean sequence length calculated from distribution
|
143
|
+
def mean_sequence_length
|
144
|
+
distribution = self.sequence_length_distribution
|
145
|
+
sum = distribution.map do |length_count|
|
146
|
+
length = length_count[0]
|
147
|
+
count = length_count[1].to_f
|
148
|
+
if length =~ /\d-\d/
|
149
|
+
f = length.sub(/-\d+$/,"").to_i
|
150
|
+
b = length.sub(/^\d+-/,"").to_i
|
151
|
+
mean = (f + b) / 2
|
152
|
+
mean * count
|
153
|
+
else
|
154
|
+
length.to_i * count
|
155
|
+
end
|
156
|
+
end
|
157
|
+
sum.reduce(:+) / self.total_sequences
|
158
|
+
end
|
159
|
+
|
160
|
+
## Custom module: median sequence length calculated from distribution
|
161
|
+
def median_sequence_length
|
162
|
+
distribution = self.sequence_length_distribution
|
163
|
+
array = distribution.map do |length_count|
|
164
|
+
length = length_count[0]
|
165
|
+
count = length_count[1].to_i
|
166
|
+
if length =~ /\d-\d/
|
167
|
+
f = length.sub(/-\d+$/,"").to_i
|
168
|
+
b = length.sub(/^\d+-/,"").to_i
|
169
|
+
mean = (f + b) / 2
|
170
|
+
[mean.to_f] * count
|
171
|
+
else
|
172
|
+
[length.to_f] * count
|
173
|
+
end
|
174
|
+
end
|
175
|
+
sorted = array.flatten.sort
|
176
|
+
quot = sorted.size / 2
|
177
|
+
if !sorted.size.even?
|
178
|
+
sorted[quot]
|
179
|
+
else
|
180
|
+
f = sorted[quot]
|
181
|
+
b = sorted[quot - 1]
|
182
|
+
(f + b) / 2
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
def sequence_duplication_levels
|
187
|
+
node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }
|
188
|
+
node.first.select{|n| n.first != ">>Sequence Duplication Levels" && n.first != "\#Total Duplicate Percentage" }
|
189
|
+
end
|
190
|
+
|
191
|
+
def total_duplicate_percentage
|
192
|
+
node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }
|
193
|
+
node.first.select{|n| n.first == "\#Total Duplicate Percentage" }.flatten[1].to_f
|
194
|
+
end
|
195
|
+
|
196
|
+
def overrepresented_sequences
|
197
|
+
node = @object.select{|a| a.first.first == ">>Overrepresented sequences" }
|
198
|
+
node.first.select{|n| n.first != ">>Overrepresented sequences" }
|
199
|
+
end
|
200
|
+
|
201
|
+
def adapter_content
|
202
|
+
node = @object.select{|a| a.first.first == ">>Adapter Content" }
|
203
|
+
node.first.select{|n| n.first != ">>Adapter Content" }
|
204
|
+
end
|
205
|
+
|
206
|
+
def kmer_content
|
207
|
+
node = @object.select{|a| a.first.first == ">>Kmer Content" }
|
208
|
+
node.first.select{|n| n.first != ">>Kmer Content" }
|
209
|
+
end
|
210
|
+
|
211
|
+
def summary
|
212
|
+
{
|
213
|
+
fastqc_version: self.fastqc_version,
|
204
214
|
filename: self.filename,
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
215
|
+
file_type: self.file_type,
|
216
|
+
encoding: self.encoding,
|
217
|
+
total_sequences: self.total_sequences,
|
218
|
+
filtered_sequences: self.filtered_sequences,
|
219
|
+
sequence_length: self.sequence_length,
|
220
|
+
percent_gc: self.percent_gc,
|
221
|
+
per_base_sequence_quality: self.per_base_sequence_quality,
|
212
222
|
per_tile_sequence_quality: self.per_tile_sequence_quality,
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
223
|
+
per_sequnce_quality_scores: self.per_sequence_quality_scores,
|
224
|
+
per_base_sequence_content: self.per_base_sequence_content,
|
225
|
+
per_sequence_gc_content: self.per_sequence_gc_content,
|
226
|
+
per_base_n_content: self.per_base_n_content,
|
227
|
+
sequence_length_distribution: self.sequence_length_distribution,
|
228
|
+
total_duplicate_percentage: self.total_duplicate_percentage, sequence_duplication_levels: self.sequence_duplication_levels,
|
229
|
+
overrepresented_sequences: self.overrepresented_sequences,
|
230
|
+
adapter_content: self.adapter_content,
|
231
|
+
kmer_content: self.kmer_content,
|
232
|
+
min_length: self.min_length,
|
233
|
+
max_length: self.max_length,
|
234
|
+
overall_mean_quality_score: self.overall_mean_quality_score,
|
235
|
+
overall_median_quality_score: self.overall_median_quality_score,
|
236
|
+
overall_n_content: self.overall_n_content,
|
237
|
+
mean_sequence_length: self.mean_sequence_length,
|
238
|
+
median_sequence_length: self.median_sequence_length,
|
228
239
|
}
|
229
|
-
|
230
|
-
|
231
|
-
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
232
243
|
end
|
data/spec/bio-fastqc_spec.rb
CHANGED
@@ -3,17 +3,296 @@ require 'spec_helper'
|
|
3
3
|
describe Bio::FastQC do
|
4
4
|
context "with an example data" do
|
5
5
|
before do
|
6
|
-
zipfile = File.join(__dir__, "example_fastqc.zip")
|
7
|
-
@data = Bio::FastQC::Data.read(zipfile)
|
6
|
+
@zipfile = File.join(__dir__, "example_fastqc.zip")
|
8
7
|
end
|
9
|
-
|
10
|
-
|
11
|
-
|
8
|
+
|
9
|
+
describe Bio::FastQC::Data do
|
10
|
+
before do
|
11
|
+
@data = Bio::FastQC::Data.read(@zipfile)
|
12
|
+
end
|
13
|
+
|
14
|
+
describe '#read' do
|
15
|
+
it 'returns parsed data from zipfile' do
|
16
|
+
expect(@data).not_to be_empty
|
17
|
+
end
|
18
|
+
end
|
12
19
|
end
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
20
|
+
|
21
|
+
describe Bio::FastQC::Parser do
|
22
|
+
before do
|
23
|
+
@data = Bio::FastQC::Data.read(@zipfile)
|
24
|
+
@parser = Bio::FastQC::Parser.new(@data)
|
25
|
+
end
|
26
|
+
|
27
|
+
describe '#fastqc_version' do
|
28
|
+
it 'returns fastqc version as String and not empty' do
|
29
|
+
expect(@parser.fastqc_version).to be_instance_of(String)
|
30
|
+
expect(@parser.fastqc_version).not_to be_empty
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe '#filename' do
|
35
|
+
it 'returns filename as String and not empty' do
|
36
|
+
expect(@parser.filename).to be_instance_of(String)
|
37
|
+
expect(@parser.filename).not_to be_empty
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe '#file_type' do
|
42
|
+
it 'returns file type as String and not empty' do
|
43
|
+
expect(@parser.file_type).to be_instance_of(String)
|
44
|
+
expect(@parser.file_type).not_to be_empty
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
describe '#encoding' do
|
49
|
+
it 'returns encoding type as String and not empty' do
|
50
|
+
expect(@parser.encoding).to be_instance_of(String)
|
51
|
+
expect(@parser.encoding).not_to be_empty
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
describe '#total_sequences' do
|
56
|
+
it 'returns total number of sequences as Fixnum' do
|
57
|
+
expect(@parser.total_sequences).to be_instance_of(Fixnum)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
describe '#filtered_sequences' do
|
62
|
+
it 'returns number of filtered sequence as Fixnum and not empty' do
|
63
|
+
expect(@parser.filtered_sequences).to be_instance_of(Fixnum)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
describe '#sequence_length' do
|
68
|
+
it 'returns length of sequence as String and not empty' do
|
69
|
+
expect(@parser.sequence_length).to be_instance_of(String)
|
70
|
+
expect(@parser.sequence_length).not_to be_empty
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
describe '#percent_gc' do
|
75
|
+
it 'returns percentage of GC content as Fixnum and not empty' do
|
76
|
+
expect(@parser.percent_gc).to be_instance_of(Fixnum)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
describe '#per_base_sequence_quality' do
|
81
|
+
before do
|
82
|
+
@value = @parser.per_base_sequence_quality
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'returns data frame as Array' do
|
86
|
+
expect(@value).to be_instance_of(Array)
|
87
|
+
end
|
88
|
+
|
89
|
+
it 'returns an array with depth 2' do
|
90
|
+
expect(@value.depth).to eq(2)
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'returns an array of an array with 7 elements' do
|
94
|
+
sizes = @value.map{|a| a.size }.uniq
|
95
|
+
expect(sizes).to eq([7])
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
describe '#per_tile_sequence_quality' do
|
100
|
+
it 'returns data frame as Array' do
|
101
|
+
expect(@parser.per_tile_sequence_quality).to be_instance_of(Array)
|
102
|
+
end
|
103
|
+
|
104
|
+
it 'returns array with depth 2' do
|
105
|
+
expect(@parser.per_tile_sequence_quality.depth).to eq(2)
|
106
|
+
end
|
107
|
+
|
108
|
+
it 'returns an array of an array with 3 elements' do
|
109
|
+
sizes = @parser.per_tile_sequence_quality.map{|a| a.size }.uniq
|
110
|
+
expect(sizes).to eq([3])
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
describe '#per_sequence_quality_scores' do
|
115
|
+
it 'returns data frame as Array' do
|
116
|
+
expect(@parser.per_sequence_quality_scores).to be_instance_of(Array)
|
117
|
+
end
|
118
|
+
|
119
|
+
it 'returns array with depth 2' do
|
120
|
+
expect(@parser.per_sequence_quality_scores.depth).to eq(2)
|
121
|
+
end
|
122
|
+
|
123
|
+
it 'returns an array of an array with 2 elements' do
|
124
|
+
sizes = @parser.per_sequence_quality_scores.map{|a| a.size }.uniq
|
125
|
+
expect(sizes).to eq([2])
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
describe '#per_base_sequence_content' do
|
130
|
+
it 'returns data frame as Array' do
|
131
|
+
expect(@parser.per_base_sequence_content).to be_instance_of(Array)
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'returns array with depth 2' do
|
135
|
+
expect(@parser.per_base_sequence_content.depth).to eq(2)
|
136
|
+
end
|
137
|
+
|
138
|
+
it 'returns an array of an array with 5 elements' do
|
139
|
+
sizes = @parser.per_base_sequence_content.map{|a| a.size }.uniq
|
140
|
+
expect(sizes).to eq([5])
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
describe '#per_sequence_gc_content' do
|
145
|
+
it 'returns data frame as Array' do
|
146
|
+
expect(@parser.per_sequence_gc_content).to be_instance_of(Array)
|
147
|
+
end
|
148
|
+
|
149
|
+
it 'returns array with depth 2' do
|
150
|
+
expect(@parser.per_sequence_gc_content.depth).to eq(2)
|
151
|
+
end
|
152
|
+
|
153
|
+
it 'returns an array of an array with 2 elements' do
|
154
|
+
sizes = @parser.per_sequence_gc_content.map{|a| a.size }.uniq
|
155
|
+
expect(sizes).to eq([2])
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
describe '#per_base_n_content' do
|
160
|
+
it 'returns data frame as Array' do
|
161
|
+
expect(@parser.per_base_n_content).to be_instance_of(Array)
|
162
|
+
end
|
163
|
+
|
164
|
+
it 'returns array with depth 2' do
|
165
|
+
expect(@parser.per_base_n_content.depth).to eq(2)
|
166
|
+
end
|
167
|
+
|
168
|
+
it 'returns an array of an array with 2 elements' do
|
169
|
+
sizes = @parser.per_base_n_content.map{|a| a.size }.uniq
|
170
|
+
expect(sizes).to eq([2])
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
describe '#sequence_length_distribution' do
|
175
|
+
it 'returns data frame as Array' do
|
176
|
+
expect(@parser.sequence_length_distribution).to be_instance_of(Array)
|
177
|
+
end
|
178
|
+
|
179
|
+
it 'returns array with depth 2' do
|
180
|
+
expect(@parser.sequence_length_distribution.depth).to eq(2)
|
181
|
+
end
|
182
|
+
|
183
|
+
it 'returns an array of an array with 2 elements' do
|
184
|
+
sizes = @parser.sequence_length_distribution.map{|a| a.size }.uniq
|
185
|
+
expect(sizes).to eq([2])
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
describe '#total_duplicate_percentage' do
|
190
|
+
it 'returns duplicate percentage as Float and not empty' do
|
191
|
+
expect(@parser.total_duplicate_percentage).to be_instance_of(Float)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
describe '#sequence_duplication_levels' do
|
196
|
+
it 'returns data frame as Array' do
|
197
|
+
expect(@parser.sequence_duplication_levels).to be_instance_of(Array)
|
198
|
+
end
|
199
|
+
|
200
|
+
it 'returns array with depth 2' do
|
201
|
+
expect(@parser.sequence_duplication_levels.depth).to eq(2)
|
202
|
+
end
|
203
|
+
|
204
|
+
it 'returns an array of an array with 3 elements' do
|
205
|
+
sizes = @parser.sequence_duplication_levels.map{|a| a.size }.uniq
|
206
|
+
expect(sizes).to eq([3])
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
describe '#overrepresented_sequences' do
|
211
|
+
it 'returns data frame as Array' do
|
212
|
+
expect(@parser.overrepresented_sequences).to be_instance_of(Array)
|
213
|
+
end
|
214
|
+
|
215
|
+
it 'returns array with depth 2' do
|
216
|
+
expect(@parser.overrepresented_sequences.depth).to eq(2)
|
217
|
+
end
|
218
|
+
|
219
|
+
it 'returns an array of an array with 4 elements' do
|
220
|
+
sizes = @parser.overrepresented_sequences.map{|a| a.size }.uniq
|
221
|
+
expect(sizes).to eq([4])
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
describe '#adapter_content' do
|
226
|
+
it 'returns data frame as Array' do
|
227
|
+
expect(@parser.adapter_content).to be_instance_of(Array)
|
228
|
+
end
|
229
|
+
|
230
|
+
it 'returns array with depth 2' do
|
231
|
+
expect(@parser.adapter_content.depth).to eq(2)
|
232
|
+
end
|
233
|
+
|
234
|
+
it 'returns an array of an array with 5 elements' do
|
235
|
+
sizes = @parser.adapter_content.map{|a| a.size }.uniq
|
236
|
+
expect(sizes).to eq([5])
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
describe '#kmer_content' do
|
241
|
+
it 'returns data frame as Array' do
|
242
|
+
expect(@parser.kmer_content).to be_instance_of(Array)
|
243
|
+
end
|
244
|
+
|
245
|
+
it 'returns array with depth 2' do
|
246
|
+
expect(@parser.kmer_content.depth).to eq(2)
|
247
|
+
end
|
248
|
+
|
249
|
+
it 'returns an array of an array with 5 elements' do
|
250
|
+
sizes = @parser.kmer_content.map{|a| a.size }.uniq
|
251
|
+
expect(sizes).to eq([5])
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
describe '#min_length' do
|
256
|
+
it 'returns minimum read length as Fixnum and not empty' do
|
257
|
+
expect(@parser.min_length).to be_instance_of(Fixnum)
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
describe '#max_length' do
|
262
|
+
it 'returns maximum read length as Fixnum and not empty' do
|
263
|
+
expect(@parser.max_length).to be_instance_of(Fixnum)
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
describe '#overall_mean_quality_score' do
|
268
|
+
it 'returns overall mean quality score as Float and not empty' do
|
269
|
+
expect(@parser.overall_mean_quality_score).to be_instance_of(Float)
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
describe '#overall_median_quality_score' do
|
274
|
+
it 'returns overall median quality score as Float and not empty' do
|
275
|
+
expect(@parser.overall_median_quality_score).to be_instance_of(Float)
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
describe '#overall_n_content' do
|
280
|
+
it 'returns overall N content as Float and not empty' do
|
281
|
+
expect(@parser.overall_n_content).to be_instance_of(Float)
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
describe '#mean_sequence_length' do
|
286
|
+
it 'returns mean sequence length from read length distribution as Float and not empty' do
|
287
|
+
expect(@parser.mean_sequence_length).to be_instance_of(Float)
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
describe '#median_sequence_length' do
|
292
|
+
it 'returns median sequence length from read length distribution as Float and not empty' do
|
293
|
+
expect(@parser.median_sequence_length).to be_instance_of(Float)
|
294
|
+
end
|
295
|
+
end
|
17
296
|
end
|
18
297
|
end
|
19
298
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-fastqc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tazro Inutano Ohta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rubyzip
|
@@ -48,16 +48,16 @@ dependencies:
|
|
48
48
|
name: bundler
|
49
49
|
requirement: !ruby/object:Gem::Requirement
|
50
50
|
requirements:
|
51
|
-
- - "
|
51
|
+
- - ">="
|
52
52
|
- !ruby/object:Gem::Version
|
53
|
-
version:
|
53
|
+
version: 1.8.0
|
54
54
|
type: :development
|
55
55
|
prerelease: false
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
57
57
|
requirements:
|
58
|
-
- - "
|
58
|
+
- - ">="
|
59
59
|
- !ruby/object:Gem::Version
|
60
|
-
version:
|
60
|
+
version: 1.8.0
|
61
61
|
- !ruby/object:Gem::Dependency
|
62
62
|
name: rake
|
63
63
|
requirement: !ruby/object:Gem::Requirement
|