bio-fastqc 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: eadb40fda6d28b9a545f14241ba2faec3b3a6125
4
- data.tar.gz: 22920fae6a3f69315e7ef1d497816b00efb2518f
3
+ metadata.gz: 47531c30911b727ad8b3bdc50e26ca8a3a6467e1
4
+ data.tar.gz: 68bfdb57eac8779b634abcce1bed1cd40986a602
5
5
  SHA512:
6
- metadata.gz: 37fb1d7b6e7124ff9c3678ca01402d9cb68422b5630bb55fd15df4e57c3697acac118e53e5d7b4a753487be693fcfd53c67e62f9d5442a2a0b350ddf1719a670
7
- data.tar.gz: 71ecd9ac350ef2aecdd8048de2f76faeb5371b0c468f781eb71f934ae5a2cd1bbecb8a4a66db8a24c0d1a022bcc74aa06291c5795f651ba6b179b0dc6fda8d7c
6
+ metadata.gz: b5b9b66d01c11669db71c96eb3c7e87c74e7ab45b55605481b6d75b14f5c06b7bb2813dff5d20833068d17bf7246aef663356523b46278621b863675ebc9186e
7
+ data.tar.gz: e81822150a02e8e7634e0a5942fe0806297c5a86211dc2bb5609d2504b47d64e77c4e7122526069037e6af1166857ba1934444fb51aa4e241690f598e9afc051
@@ -1,13 +1,6 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 1.9.2
4
- - 1.9.3
5
- - jruby-19mode # JRuby in 1.9 mode
6
-
7
- # - rbx-19mode
8
- # - 1.8.7
9
- # - jruby-18mode # JRuby in 1.8 mode
10
- # - rbx-18mode
3
+ - 2.2.1
11
4
 
12
5
  # uncomment this line if your project needs to run something other than `rake`:
13
6
  # script: bundle exec rspec spec
data/Gemfile CHANGED
@@ -9,7 +9,7 @@ gem 'thor', "~> 0.19.1"
9
9
  # Add dependencies to develop your gem here.
10
10
  # Include everything needed to run rake, tests, features, etc.
11
11
  group :development do
12
- gem 'bundler', '~> 1.10'
12
+ gem 'bundler', '>= 1.8.0'
13
13
  gem 'rake', '~> 10.0'
14
14
  gem 'rspec', '~> 3.3'
15
15
  gem 'jeweler', '~> 2.0'
data/README.md CHANGED
@@ -2,7 +2,11 @@
2
2
 
3
3
  [![Build Status](https://secure.travis-ci.org/inutano/bioruby-fastqc.png)](http://travis-ci.org/inutano/bioruby-fastqc)
4
4
 
5
- A ruby parser for [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) data.
5
+ A ruby parser for [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) data.
6
+
7
+ ## Reqruirements
8
+
9
+ Ruby 2.0 or later
6
10
 
7
11
  ## Installation
8
12
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.0
1
+ 0.2.1
@@ -4,33 +4,33 @@ require 'zip'
4
4
 
5
5
  module Bio
6
6
  module FastQC
7
- class Data
8
- class << self
9
- def read(file)
10
- read_zipfile(file)
11
- rescue Zip::Error
12
- read_flatfile(file)
7
+ class Data
8
+ class << self
9
+ def read(file)
10
+ read_zipfile(file)
11
+ rescue Zip::Error
12
+ read_flatfile(file)
13
13
  rescue Errno::EISDIR
14
14
  read_dir(file)
15
- end
15
+ end
16
16
 
17
- def read_zipfile(file)
18
- Zip::File.open(file) do |zipfile|
19
- zipfile.glob('*/fastqc_data.txt').first.get_input_stream.read
20
- end
21
- end
17
+ def read_zipfile(file)
18
+ Zip::File.open(file) do |zipfile|
19
+ zipfile.glob('*/fastqc_data.txt').first.get_input_stream.read
20
+ end
21
+ end
22
22
 
23
- def read_flatfile(file)
24
- open(file).read
25
- end
23
+ def read_flatfile(file)
24
+ open(file).read
25
+ end
26
26
 
27
- def read_dir(file)
28
- open(File.join(file, "fastqc_data.txt")).read
29
- rescue Errno::ENOENT
30
- puts "FastQC data file fastqc_data.txt not found"
31
- exit
32
- end
27
+ def read_dir(file)
28
+ open(File.join(file, "fastqc_data.txt")).read
29
+ rescue Errno::ENOENT
30
+ puts "FastQC data file fastqc_data.txt not found"
31
+ exit
32
+ end
33
33
  end
34
- end
35
- end
34
+ end
35
+ end
36
36
  end
@@ -2,94 +2,98 @@
2
2
 
3
3
  module Bio
4
4
  module FastQC
5
- class Parser
6
- def initialize(fastqc_data_txt)
7
- @data = fastqc_data_txt
8
- @object = parse(@data)
9
- @base = self.basic_statistics
10
- end
11
-
12
- def parse(data)
13
- modules = data.split(">>END_MODULE\n")
14
- modules.map do |node|
15
- lines = node.split("\n")
16
- rm_header = lines.map do |line|
17
- if line !~ /^\#/ || line =~ /^#Total Duplicate Percentage/
18
- line.split("\t")
19
- end
20
- end
21
- rm_header.compact
22
- end
23
- end
24
-
25
- def basic_statistics
26
- Hash[*@object.select{|a| a.first.first == ">>Basic Statistics" }.flatten]
27
- end
28
-
29
- def filename
30
- @base["Filename"]
31
- end
32
-
33
- def file_type
34
- @base["File type"]
35
- end
36
-
37
- def encoding
38
- @base["Encoding"]
39
- end
40
-
41
- def total_sequences
42
- @base["Total Sequences"].to_i
43
- end
44
-
45
- def filtered_sequences
46
- @base["Filtered Sequences"].to_i
47
- end
48
-
49
- def sequence_length
50
- @base["Sequence length"]
51
- end
52
-
53
- def min_length
54
- l = @base["Sequence length"]
55
- if l =~ /\d-\d/
56
- l.sub(/-\d+$/,"").to_i
57
- else
58
- l.to_i
59
- end
60
- end
61
-
62
- def max_length
63
- l = @base["Sequence length"]
64
- if l =~ /\d-\d/
65
- l.sub(/^\d+-/,"").to_i
66
- else
67
- l.to_i
68
- end
69
- end
70
-
71
- def percent_gc
72
- @base["%GC"].to_i
73
- end
74
-
75
- def per_base_sequence_quality
76
- node = @object.select{|a| a.first.first == ">>Per base sequence quality" }
77
- node.first.select{|n| n.first != ">>Per base sequence quality" }
78
- end
79
-
80
- ## Custom module: overall mean base call quality indicator
81
- def overall_mean_quality_score
82
- per_base = self.per_base_sequence_quality
83
- v = per_base.map{|c| (10**(c[1].to_f/-10)).to_f }
84
- -10 * Math.log10(v.reduce(:+) / v.size)
85
- end
5
+ class Parser
6
+ def initialize(fastqc_data_txt)
7
+ @data = fastqc_data_txt
8
+ @object = parse(@data)
9
+ @base = self.basic_statistics
10
+ end
11
+
12
+ def parse(data)
13
+ modules = data.split(">>END_MODULE\n")
14
+ modules.map do |node|
15
+ lines = node.split("\n")
16
+ rm_header = lines.map do |line|
17
+ if line !~ /^\#/ || line =~ /^#Total Duplicate Percentage/
18
+ line.split("\t")
19
+ end
20
+ end
21
+ rm_header.compact
22
+ end
23
+ end
24
+
25
+ def fastqc_version
26
+ @data.split("\n").first.split("\t").last
27
+ end
28
+
29
+ def basic_statistics
30
+ Hash[*@object.select{|a| a.first.first == ">>Basic Statistics" }.flatten]
31
+ end
32
+
33
+ def filename
34
+ @base["Filename"]
35
+ end
36
+
37
+ def file_type
38
+ @base["File type"]
39
+ end
40
+
41
+ def encoding
42
+ @base["Encoding"]
43
+ end
44
+
45
+ def total_sequences
46
+ @base["Total Sequences"].to_i
47
+ end
48
+
49
+ def filtered_sequences
50
+ @base["Filtered Sequences"].to_i
51
+ end
52
+
53
+ def sequence_length
54
+ @base["Sequence length"]
55
+ end
56
+
57
+ def min_length
58
+ l = @base["Sequence length"]
59
+ if l =~ /\d-\d/
60
+ l.sub(/-\d+$/,"").to_i
61
+ else
62
+ l.to_i
63
+ end
64
+ end
65
+
66
+ def max_length
67
+ l = @base["Sequence length"]
68
+ if l =~ /\d-\d/
69
+ l.sub(/^\d+-/,"").to_i
70
+ else
71
+ l.to_i
72
+ end
73
+ end
74
+
75
+ def percent_gc
76
+ @base["%GC"].to_i
77
+ end
78
+
79
+ def per_base_sequence_quality
80
+ node = @object.select{|a| a.first.first == ">>Per base sequence quality" }
81
+ node.first.select{|n| n.first != ">>Per base sequence quality" }
82
+ end
83
+
84
+ ## Custom module: overall mean base call quality indicator
85
+ def overall_mean_quality_score
86
+ per_base = self.per_base_sequence_quality
87
+ v = per_base.map{|c| (10**(c[1].to_f/-10)).to_f }
88
+ -10 * Math.log10(v.reduce(:+) / v.size)
89
+ end
86
90
 
87
91
  ## Custom module: overall median base call quality indicator
88
- def overall_median_quality_score
89
- per_base = self.per_base_sequence_quality
92
+ def overall_median_quality_score
93
+ per_base = self.per_base_sequence_quality
90
94
  v = per_base.map{|c| (10**(c[2].to_f/-10)).to_f }
91
- -10 * Math.log10(v.reduce(:+) / v.size)
92
- end
95
+ -10 * Math.log10(v.reduce(:+) / v.size)
96
+ end
93
97
 
94
98
  def per_tile_sequence_quality
95
99
  node = @object.select{|a| a.first.first == ">>Per tile sequence quality" }
@@ -98,135 +102,142 @@ module Bio
98
102
  []
99
103
  end
100
104
 
101
- def per_sequence_quality_scores
102
- node = @object.select{|a| a.first.first == ">>Per sequence quality scores" }
103
- node.first.select{|n| n.first != ">>Per sequence quality scores" }
104
- end
105
-
106
- def per_base_sequence_content
107
- node = @object.select{|a| a.first.first == ">>Per base sequence content" }
108
- node.first.select{|n| n.first != ">>Per base sequence content" }
109
- end
110
-
111
- def per_sequence_gc_content
112
- node = @object.select{|a| a.first.first == ">>Per sequence GC content" }
113
- node.first.select{|n| n.first != ">>Per sequence GC content" }
114
- end
115
-
116
- def per_sequence_gc_content
117
- node = @object.select{|a| a.first.first == ">>Per sequence GC content" }
118
- node.first.select{|n| n.first != ">>Per sequence GC content" }
119
- end
120
-
121
- def per_base_n_content
122
- node = @object.select{|a| a.first.first == ">>Per base N content" }
123
- node.first.select{|n| n.first != ">>Per base N content" }
124
- end
125
-
126
- ## Custom module: overall N content
127
- def overall_n_content
128
- per_base = self.per_base_n_content
129
- v = per_base.map{|c| c[1].to_f }
130
- v.reduce(:+) / v.size
131
- end
132
-
133
- def sequence_length_distribution
134
- node = @object.select{|a| a.first.first == ">>Sequence Length Distribution" }
135
- node.first.select{|n| n.first != ">>Sequence Length Distribution" }
136
- end
137
-
138
- ## Custom module: mean sequence length calculated from distribution
139
- def mean_sequence_length
140
- distribution = self.sequence_length_distribution
141
- sum = distribution.map do |length_count|
142
- length = length_count[0]
143
- count = length_count[1].to_f
144
- if length =~ /\d-\d/
145
- f = length.sub(/-\d+$/,"").to_i
146
- b = length.sub(/^\d+-/,"").to_i
147
- mean = (f + b) / 2
148
- mean * count
149
- else
150
- length.to_i * count
151
- end
152
- end
153
- sum.reduce(:+) / self.total_sequences
154
- end
155
-
156
- ## Custom module: median sequence length calculated from distribution
157
- def median_sequence_length
158
- distribution = self.sequence_length_distribution
159
- array = distribution.map do |length_count|
160
- length = length_count[0]
161
- count = length_count[1].to_i
162
- if length =~ /\d-\d/
163
- f = length.sub(/-\d+$/,"").to_i
164
- b = length.sub(/^\d+-/,"").to_i
165
- mean = (f + b) / 2
166
- [mean] * count
167
- else
168
- [length.to_i] * count
169
- end
170
- end
171
- sorted = array.flatten.sort
172
- quot = sorted.size / 2
173
- if !sorted.size.even?
174
- sorted[quot]
175
- else
176
- f = sorted[quot]
177
- b = sorted[quot - 1]
178
- (f + b) / 2
179
- end
180
- end
181
-
182
- def sequence_duplication_levels
183
- node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }
184
- node.first.select{|n| n.first != ">>Sequence Duplication Levels" && n.first != "\#Total Duplicate Percentage" }
185
- end
186
-
187
- def total_duplicate_percentage
188
- node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }
189
- node.first.select{|n| n.first == "\#Total Duplicate Percentage" }.flatten[1].to_f
190
- end
191
-
192
- def overrepresented_sequences
193
- node = @object.select{|a| a.first.first == ">>Overrepresented sequences" }
194
- node.first.select{|n| n.first != ">>Overrepresented sequences" }
195
- end
196
-
197
- def kmer_content
198
- node = @object.select{|a| a.first.first == ">>Kmer Content" }
199
- node.first.select{|n| n.first != ">>Kmer Content" }
200
- end
201
-
202
- def summary
203
- {
105
+ def per_sequence_quality_scores
106
+ node = @object.select{|a| a.first.first == ">>Per sequence quality scores" }
107
+ node.first.select{|n| n.first != ">>Per sequence quality scores" }
108
+ end
109
+
110
+ def per_base_sequence_content
111
+ node = @object.select{|a| a.first.first == ">>Per base sequence content" }
112
+ node.first.select{|n| n.first != ">>Per base sequence content" }
113
+ end
114
+
115
+ def per_sequence_gc_content
116
+ node = @object.select{|a| a.first.first == ">>Per sequence GC content" }
117
+ node.first.select{|n| n.first != ">>Per sequence GC content" }
118
+ end
119
+
120
+ def per_sequence_gc_content
121
+ node = @object.select{|a| a.first.first == ">>Per sequence GC content" }
122
+ node.first.select{|n| n.first != ">>Per sequence GC content" }
123
+ end
124
+
125
+ def per_base_n_content
126
+ node = @object.select{|a| a.first.first == ">>Per base N content" }
127
+ node.first.select{|n| n.first != ">>Per base N content" }
128
+ end
129
+
130
+ ## Custom module: overall N content
131
+ def overall_n_content
132
+ per_base = self.per_base_n_content
133
+ v = per_base.map{|c| c[1].to_f }
134
+ v.reduce(:+) / v.size
135
+ end
136
+
137
+ def sequence_length_distribution
138
+ node = @object.select{|a| a.first.first == ">>Sequence Length Distribution" }
139
+ node.first.select{|n| n.first != ">>Sequence Length Distribution" }
140
+ end
141
+
142
+ ## Custom module: mean sequence length calculated from distribution
143
+ def mean_sequence_length
144
+ distribution = self.sequence_length_distribution
145
+ sum = distribution.map do |length_count|
146
+ length = length_count[0]
147
+ count = length_count[1].to_f
148
+ if length =~ /\d-\d/
149
+ f = length.sub(/-\d+$/,"").to_i
150
+ b = length.sub(/^\d+-/,"").to_i
151
+ mean = (f + b) / 2
152
+ mean * count
153
+ else
154
+ length.to_i * count
155
+ end
156
+ end
157
+ sum.reduce(:+) / self.total_sequences
158
+ end
159
+
160
+ ## Custom module: median sequence length calculated from distribution
161
+ def median_sequence_length
162
+ distribution = self.sequence_length_distribution
163
+ array = distribution.map do |length_count|
164
+ length = length_count[0]
165
+ count = length_count[1].to_i
166
+ if length =~ /\d-\d/
167
+ f = length.sub(/-\d+$/,"").to_i
168
+ b = length.sub(/^\d+-/,"").to_i
169
+ mean = (f + b) / 2
170
+ [mean.to_f] * count
171
+ else
172
+ [length.to_f] * count
173
+ end
174
+ end
175
+ sorted = array.flatten.sort
176
+ quot = sorted.size / 2
177
+ if !sorted.size.even?
178
+ sorted[quot]
179
+ else
180
+ f = sorted[quot]
181
+ b = sorted[quot - 1]
182
+ (f + b) / 2
183
+ end
184
+ end
185
+
186
+ def sequence_duplication_levels
187
+ node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }
188
+ node.first.select{|n| n.first != ">>Sequence Duplication Levels" && n.first != "\#Total Duplicate Percentage" }
189
+ end
190
+
191
+ def total_duplicate_percentage
192
+ node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }
193
+ node.first.select{|n| n.first == "\#Total Duplicate Percentage" }.flatten[1].to_f
194
+ end
195
+
196
+ def overrepresented_sequences
197
+ node = @object.select{|a| a.first.first == ">>Overrepresented sequences" }
198
+ node.first.select{|n| n.first != ">>Overrepresented sequences" }
199
+ end
200
+
201
+ def adapter_content
202
+ node = @object.select{|a| a.first.first == ">>Adapter Content" }
203
+ node.first.select{|n| n.first != ">>Adapter Content" }
204
+ end
205
+
206
+ def kmer_content
207
+ node = @object.select{|a| a.first.first == ">>Kmer Content" }
208
+ node.first.select{|n| n.first != ">>Kmer Content" }
209
+ end
210
+
211
+ def summary
212
+ {
213
+ fastqc_version: self.fastqc_version,
204
214
  filename: self.filename,
205
- file_type: self.file_type,
206
- encoding: self.encoding,
207
- total_sequences: self.total_sequences,
208
- filtered_sequences: self.filtered_sequences,
209
- sequence_length: self.sequence_length,
210
- percent_gc: self.percent_gc,
211
- per_base_sequence_quality: self.per_base_sequence_quality,
215
+ file_type: self.file_type,
216
+ encoding: self.encoding,
217
+ total_sequences: self.total_sequences,
218
+ filtered_sequences: self.filtered_sequences,
219
+ sequence_length: self.sequence_length,
220
+ percent_gc: self.percent_gc,
221
+ per_base_sequence_quality: self.per_base_sequence_quality,
212
222
  per_tile_sequence_quality: self.per_tile_sequence_quality,
213
- per_sequnce_quality_scores: self.per_sequence_quality_scores,
214
- per_base_sequence_content: self.per_base_sequence_content,
215
- per_sequence_gc_content: self.per_sequence_gc_content,
216
- per_base_n_content: self.per_base_n_content,
217
- sequence_length_distribution: self.sequence_length_distribution,
218
- total_duplicate_percentage: self.total_duplicate_percentage, sequence_duplication_levels: self.sequence_duplication_levels,
219
- overrepresented_sequences: self.overrepresented_sequences,
220
- kmer_content: self.kmer_content,
221
- min_length: self.min_length,
222
- max_length: self.max_length,
223
- overall_mean_quality_score: self.overall_mean_quality_score,
224
- overall_median_quality_score: self.overall_median_quality_score,
225
- overall_n_content: self.overall_n_content,
226
- mean_sequence_length: self.mean_sequence_length,
227
- median_sequence_length: self.median_sequence_length,
223
+ per_sequnce_quality_scores: self.per_sequence_quality_scores,
224
+ per_base_sequence_content: self.per_base_sequence_content,
225
+ per_sequence_gc_content: self.per_sequence_gc_content,
226
+ per_base_n_content: self.per_base_n_content,
227
+ sequence_length_distribution: self.sequence_length_distribution,
228
+ total_duplicate_percentage: self.total_duplicate_percentage, sequence_duplication_levels: self.sequence_duplication_levels,
229
+ overrepresented_sequences: self.overrepresented_sequences,
230
+ adapter_content: self.adapter_content,
231
+ kmer_content: self.kmer_content,
232
+ min_length: self.min_length,
233
+ max_length: self.max_length,
234
+ overall_mean_quality_score: self.overall_mean_quality_score,
235
+ overall_median_quality_score: self.overall_median_quality_score,
236
+ overall_n_content: self.overall_n_content,
237
+ mean_sequence_length: self.mean_sequence_length,
238
+ median_sequence_length: self.median_sequence_length,
228
239
  }
229
- end
230
- end
231
- end
240
+ end
241
+ end
242
+ end
232
243
  end
@@ -3,17 +3,296 @@ require 'spec_helper'
3
3
  describe Bio::FastQC do
4
4
  context "with an example data" do
5
5
  before do
6
- zipfile = File.join(__dir__, "example_fastqc.zip")
7
- @data = Bio::FastQC::Data.read(zipfile)
6
+ @zipfile = File.join(__dir__, "example_fastqc.zip")
8
7
  end
9
-
10
- it 'extracts data from zip file' do
11
- expect(@data).not_to be_empty
8
+
9
+ describe Bio::FastQC::Data do
10
+ before do
11
+ @data = Bio::FastQC::Data.read(@zipfile)
12
+ end
13
+
14
+ describe '#read' do
15
+ it 'returns parsed data from zipfile' do
16
+ expect(@data).not_to be_empty
17
+ end
18
+ end
12
19
  end
13
-
14
- it 'parses a fastqc data and returns json' do
15
- p = Bio::FastQC::Parser.new(@data)
16
- expect(p.summary).not_to be_empty
20
+
21
+ describe Bio::FastQC::Parser do
22
+ before do
23
+ @data = Bio::FastQC::Data.read(@zipfile)
24
+ @parser = Bio::FastQC::Parser.new(@data)
25
+ end
26
+
27
+ describe '#fastqc_version' do
28
+ it 'returns fastqc version as String and not empty' do
29
+ expect(@parser.fastqc_version).to be_instance_of(String)
30
+ expect(@parser.fastqc_version).not_to be_empty
31
+ end
32
+ end
33
+
34
+ describe '#filename' do
35
+ it 'returns filename as String and not empty' do
36
+ expect(@parser.filename).to be_instance_of(String)
37
+ expect(@parser.filename).not_to be_empty
38
+ end
39
+ end
40
+
41
+ describe '#file_type' do
42
+ it 'returns file type as String and not empty' do
43
+ expect(@parser.file_type).to be_instance_of(String)
44
+ expect(@parser.file_type).not_to be_empty
45
+ end
46
+ end
47
+
48
+ describe '#encoding' do
49
+ it 'returns encoding type as String and not empty' do
50
+ expect(@parser.encoding).to be_instance_of(String)
51
+ expect(@parser.encoding).not_to be_empty
52
+ end
53
+ end
54
+
55
+ describe '#total_sequences' do
56
+ it 'returns total number of sequences as Fixnum' do
57
+ expect(@parser.total_sequences).to be_instance_of(Fixnum)
58
+ end
59
+ end
60
+
61
+ describe '#filtered_sequences' do
62
+ it 'returns number of filtered sequence as Fixnum and not empty' do
63
+ expect(@parser.filtered_sequences).to be_instance_of(Fixnum)
64
+ end
65
+ end
66
+
67
+ describe '#sequence_length' do
68
+ it 'returns length of sequence as String and not empty' do
69
+ expect(@parser.sequence_length).to be_instance_of(String)
70
+ expect(@parser.sequence_length).not_to be_empty
71
+ end
72
+ end
73
+
74
+ describe '#percent_gc' do
75
+ it 'returns percentage of GC content as Fixnum and not empty' do
76
+ expect(@parser.percent_gc).to be_instance_of(Fixnum)
77
+ end
78
+ end
79
+
80
+ describe '#per_base_sequence_quality' do
81
+ before do
82
+ @value = @parser.per_base_sequence_quality
83
+ end
84
+
85
+ it 'returns data frame as Array' do
86
+ expect(@value).to be_instance_of(Array)
87
+ end
88
+
89
+ it 'returns an array with depth 2' do
90
+ expect(@value.depth).to eq(2)
91
+ end
92
+
93
+ it 'returns an array of an array with 7 elements' do
94
+ sizes = @value.map{|a| a.size }.uniq
95
+ expect(sizes).to eq([7])
96
+ end
97
+ end
98
+
99
+ describe '#per_tile_sequence_quality' do
100
+ it 'returns data frame as Array' do
101
+ expect(@parser.per_tile_sequence_quality).to be_instance_of(Array)
102
+ end
103
+
104
+ it 'returns array with depth 2' do
105
+ expect(@parser.per_tile_sequence_quality.depth).to eq(2)
106
+ end
107
+
108
+ it 'returns an array of an array with 3 elements' do
109
+ sizes = @parser.per_tile_sequence_quality.map{|a| a.size }.uniq
110
+ expect(sizes).to eq([3])
111
+ end
112
+ end
113
+
114
+ describe '#per_sequence_quality_scores' do
115
+ it 'returns data frame as Array' do
116
+ expect(@parser.per_sequence_quality_scores).to be_instance_of(Array)
117
+ end
118
+
119
+ it 'returns array with depth 2' do
120
+ expect(@parser.per_sequence_quality_scores.depth).to eq(2)
121
+ end
122
+
123
+ it 'returns an array of an array with 2 elements' do
124
+ sizes = @parser.per_sequence_quality_scores.map{|a| a.size }.uniq
125
+ expect(sizes).to eq([2])
126
+ end
127
+ end
128
+
129
+ describe '#per_base_sequence_content' do
130
+ it 'returns data frame as Array' do
131
+ expect(@parser.per_base_sequence_content).to be_instance_of(Array)
132
+ end
133
+
134
+ it 'returns array with depth 2' do
135
+ expect(@parser.per_base_sequence_content.depth).to eq(2)
136
+ end
137
+
138
+ it 'returns an array of an array with 5 elements' do
139
+ sizes = @parser.per_base_sequence_content.map{|a| a.size }.uniq
140
+ expect(sizes).to eq([5])
141
+ end
142
+ end
143
+
144
+ describe '#per_sequence_gc_content' do
145
+ it 'returns data frame as Array' do
146
+ expect(@parser.per_sequence_gc_content).to be_instance_of(Array)
147
+ end
148
+
149
+ it 'returns array with depth 2' do
150
+ expect(@parser.per_sequence_gc_content.depth).to eq(2)
151
+ end
152
+
153
+ it 'returns an array of an array with 2 elements' do
154
+ sizes = @parser.per_sequence_gc_content.map{|a| a.size }.uniq
155
+ expect(sizes).to eq([2])
156
+ end
157
+ end
158
+
159
+ describe '#per_base_n_content' do
160
+ it 'returns data frame as Array' do
161
+ expect(@parser.per_base_n_content).to be_instance_of(Array)
162
+ end
163
+
164
+ it 'returns array with depth 2' do
165
+ expect(@parser.per_base_n_content.depth).to eq(2)
166
+ end
167
+
168
+ it 'returns an array of an array with 2 elements' do
169
+ sizes = @parser.per_base_n_content.map{|a| a.size }.uniq
170
+ expect(sizes).to eq([2])
171
+ end
172
+ end
173
+
174
+ describe '#sequence_length_distribution' do
175
+ it 'returns data frame as Array' do
176
+ expect(@parser.sequence_length_distribution).to be_instance_of(Array)
177
+ end
178
+
179
+ it 'returns array with depth 2' do
180
+ expect(@parser.sequence_length_distribution.depth).to eq(2)
181
+ end
182
+
183
+ it 'returns an array of an array with 2 elements' do
184
+ sizes = @parser.sequence_length_distribution.map{|a| a.size }.uniq
185
+ expect(sizes).to eq([2])
186
+ end
187
+ end
188
+
189
+ describe '#total_duplicate_percentage' do
190
+ it 'returns duplicate percentage as Float and not empty' do
191
+ expect(@parser.total_duplicate_percentage).to be_instance_of(Float)
192
+ end
193
+ end
194
+
195
+ describe '#sequence_duplication_levels' do
196
+ it 'returns data frame as Array' do
197
+ expect(@parser.sequence_duplication_levels).to be_instance_of(Array)
198
+ end
199
+
200
+ it 'returns array with depth 2' do
201
+ expect(@parser.sequence_duplication_levels.depth).to eq(2)
202
+ end
203
+
204
+ it 'returns an array of an array with 3 elements' do
205
+ sizes = @parser.sequence_duplication_levels.map{|a| a.size }.uniq
206
+ expect(sizes).to eq([3])
207
+ end
208
+ end
209
+
210
+ describe '#overrepresented_sequences' do
211
+ it 'returns data frame as Array' do
212
+ expect(@parser.overrepresented_sequences).to be_instance_of(Array)
213
+ end
214
+
215
+ it 'returns array with depth 2' do
216
+ expect(@parser.overrepresented_sequences.depth).to eq(2)
217
+ end
218
+
219
+ it 'returns an array of an array with 4 elements' do
220
+ sizes = @parser.overrepresented_sequences.map{|a| a.size }.uniq
221
+ expect(sizes).to eq([4])
222
+ end
223
+ end
224
+
225
+ describe '#adapter_content' do
226
+ it 'returns data frame as Array' do
227
+ expect(@parser.adapter_content).to be_instance_of(Array)
228
+ end
229
+
230
+ it 'returns array with depth 2' do
231
+ expect(@parser.adapter_content.depth).to eq(2)
232
+ end
233
+
234
+ it 'returns an array of an array with 5 elements' do
235
+ sizes = @parser.adapter_content.map{|a| a.size }.uniq
236
+ expect(sizes).to eq([5])
237
+ end
238
+ end
239
+
240
+ describe '#kmer_content' do
241
+ it 'returns data frame as Array' do
242
+ expect(@parser.kmer_content).to be_instance_of(Array)
243
+ end
244
+
245
+ it 'returns array with depth 2' do
246
+ expect(@parser.kmer_content.depth).to eq(2)
247
+ end
248
+
249
+ it 'returns an array of an array with 5 elements' do
250
+ sizes = @parser.kmer_content.map{|a| a.size }.uniq
251
+ expect(sizes).to eq([5])
252
+ end
253
+ end
254
+
255
+ describe '#min_length' do
256
+ it 'returns minimum read length as Fixnum and not empty' do
257
+ expect(@parser.min_length).to be_instance_of(Fixnum)
258
+ end
259
+ end
260
+
261
+ describe '#max_length' do
262
+ it 'returns maximum read length as Fixnum and not empty' do
263
+ expect(@parser.max_length).to be_instance_of(Fixnum)
264
+ end
265
+ end
266
+
267
+ describe '#overall_mean_quality_score' do
268
+ it 'returns overall mean quality score as Float and not empty' do
269
+ expect(@parser.overall_mean_quality_score).to be_instance_of(Float)
270
+ end
271
+ end
272
+
273
+ describe '#overall_median_quality_score' do
274
+ it 'returns overall median quality score as Float and not empty' do
275
+ expect(@parser.overall_median_quality_score).to be_instance_of(Float)
276
+ end
277
+ end
278
+
279
+ describe '#overall_n_content' do
280
+ it 'returns overall N content as Float and not empty' do
281
+ expect(@parser.overall_n_content).to be_instance_of(Float)
282
+ end
283
+ end
284
+
285
+ describe '#mean_sequence_length' do
286
+ it 'returns mean sequence length from read length distribution as Float and not empty' do
287
+ expect(@parser.mean_sequence_length).to be_instance_of(Float)
288
+ end
289
+ end
290
+
291
+ describe '#median_sequence_length' do
292
+ it 'returns median sequence length from read length distribution as Float and not empty' do
293
+ expect(@parser.median_sequence_length).to be_instance_of(Float)
294
+ end
295
+ end
17
296
  end
18
297
  end
19
298
  end
@@ -1,2 +1,14 @@
1
1
  $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
2
  require 'bio-fastqc'
3
+
4
+ class Array
5
+ def depth
6
+ map {|element| element.depth + 1 }.max
7
+ end
8
+ end
9
+
10
+ class Object
11
+ def depth
12
+ 0
13
+ end
14
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-fastqc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tazro Inutano Ohta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-06 00:00:00.000000000 Z
11
+ date: 2015-11-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rubyzip
@@ -48,16 +48,16 @@ dependencies:
48
48
  name: bundler
49
49
  requirement: !ruby/object:Gem::Requirement
50
50
  requirements:
51
- - - "~>"
51
+ - - ">="
52
52
  - !ruby/object:Gem::Version
53
- version: '1.10'
53
+ version: 1.8.0
54
54
  type: :development
55
55
  prerelease: false
56
56
  version_requirements: !ruby/object:Gem::Requirement
57
57
  requirements:
58
- - - "~>"
58
+ - - ">="
59
59
  - !ruby/object:Gem::Version
60
- version: '1.10'
60
+ version: 1.8.0
61
61
  - !ruby/object:Gem::Dependency
62
62
  name: rake
63
63
  requirement: !ruby/object:Gem::Requirement