bio-fastqc 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: eadb40fda6d28b9a545f14241ba2faec3b3a6125
4
- data.tar.gz: 22920fae6a3f69315e7ef1d497816b00efb2518f
3
+ metadata.gz: 47531c30911b727ad8b3bdc50e26ca8a3a6467e1
4
+ data.tar.gz: 68bfdb57eac8779b634abcce1bed1cd40986a602
5
5
  SHA512:
6
- metadata.gz: 37fb1d7b6e7124ff9c3678ca01402d9cb68422b5630bb55fd15df4e57c3697acac118e53e5d7b4a753487be693fcfd53c67e62f9d5442a2a0b350ddf1719a670
7
- data.tar.gz: 71ecd9ac350ef2aecdd8048de2f76faeb5371b0c468f781eb71f934ae5a2cd1bbecb8a4a66db8a24c0d1a022bcc74aa06291c5795f651ba6b179b0dc6fda8d7c
6
+ metadata.gz: b5b9b66d01c11669db71c96eb3c7e87c74e7ab45b55605481b6d75b14f5c06b7bb2813dff5d20833068d17bf7246aef663356523b46278621b863675ebc9186e
7
+ data.tar.gz: e81822150a02e8e7634e0a5942fe0806297c5a86211dc2bb5609d2504b47d64e77c4e7122526069037e6af1166857ba1934444fb51aa4e241690f598e9afc051
@@ -1,13 +1,6 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 1.9.2
4
- - 1.9.3
5
- - jruby-19mode # JRuby in 1.9 mode
6
-
7
- # - rbx-19mode
8
- # - 1.8.7
9
- # - jruby-18mode # JRuby in 1.8 mode
10
- # - rbx-18mode
3
+ - 2.2.1
11
4
 
12
5
  # uncomment this line if your project needs to run something other than `rake`:
13
6
  # script: bundle exec rspec spec
data/Gemfile CHANGED
@@ -9,7 +9,7 @@ gem 'thor', "~> 0.19.1"
9
9
  # Add dependencies to develop your gem here.
10
10
  # Include everything needed to run rake, tests, features, etc.
11
11
  group :development do
12
- gem 'bundler', '~> 1.10'
12
+ gem 'bundler', '>= 1.8.0'
13
13
  gem 'rake', '~> 10.0'
14
14
  gem 'rspec', '~> 3.3'
15
15
  gem 'jeweler', '~> 2.0'
data/README.md CHANGED
@@ -2,7 +2,11 @@
2
2
 
3
3
  [![Build Status](https://secure.travis-ci.org/inutano/bioruby-fastqc.png)](http://travis-ci.org/inutano/bioruby-fastqc)
4
4
 
5
- A ruby parser for [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) data.
5
+ A ruby parser for [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) data.
6
+
7
+ ## Reqruirements
8
+
9
+ Ruby 2.0 or later
6
10
 
7
11
  ## Installation
8
12
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.0
1
+ 0.2.1
@@ -4,33 +4,33 @@ require 'zip'
4
4
 
5
5
  module Bio
6
6
  module FastQC
7
- class Data
8
- class << self
9
- def read(file)
10
- read_zipfile(file)
11
- rescue Zip::Error
12
- read_flatfile(file)
7
+ class Data
8
+ class << self
9
+ def read(file)
10
+ read_zipfile(file)
11
+ rescue Zip::Error
12
+ read_flatfile(file)
13
13
  rescue Errno::EISDIR
14
14
  read_dir(file)
15
- end
15
+ end
16
16
 
17
- def read_zipfile(file)
18
- Zip::File.open(file) do |zipfile|
19
- zipfile.glob('*/fastqc_data.txt').first.get_input_stream.read
20
- end
21
- end
17
+ def read_zipfile(file)
18
+ Zip::File.open(file) do |zipfile|
19
+ zipfile.glob('*/fastqc_data.txt').first.get_input_stream.read
20
+ end
21
+ end
22
22
 
23
- def read_flatfile(file)
24
- open(file).read
25
- end
23
+ def read_flatfile(file)
24
+ open(file).read
25
+ end
26
26
 
27
- def read_dir(file)
28
- open(File.join(file, "fastqc_data.txt")).read
29
- rescue Errno::ENOENT
30
- puts "FastQC data file fastqc_data.txt not found"
31
- exit
32
- end
27
+ def read_dir(file)
28
+ open(File.join(file, "fastqc_data.txt")).read
29
+ rescue Errno::ENOENT
30
+ puts "FastQC data file fastqc_data.txt not found"
31
+ exit
32
+ end
33
33
  end
34
- end
35
- end
34
+ end
35
+ end
36
36
  end
@@ -2,94 +2,98 @@
2
2
 
3
3
  module Bio
4
4
  module FastQC
5
- class Parser
6
- def initialize(fastqc_data_txt)
7
- @data = fastqc_data_txt
8
- @object = parse(@data)
9
- @base = self.basic_statistics
10
- end
11
-
12
- def parse(data)
13
- modules = data.split(">>END_MODULE\n")
14
- modules.map do |node|
15
- lines = node.split("\n")
16
- rm_header = lines.map do |line|
17
- if line !~ /^\#/ || line =~ /^#Total Duplicate Percentage/
18
- line.split("\t")
19
- end
20
- end
21
- rm_header.compact
22
- end
23
- end
24
-
25
- def basic_statistics
26
- Hash[*@object.select{|a| a.first.first == ">>Basic Statistics" }.flatten]
27
- end
28
-
29
- def filename
30
- @base["Filename"]
31
- end
32
-
33
- def file_type
34
- @base["File type"]
35
- end
36
-
37
- def encoding
38
- @base["Encoding"]
39
- end
40
-
41
- def total_sequences
42
- @base["Total Sequences"].to_i
43
- end
44
-
45
- def filtered_sequences
46
- @base["Filtered Sequences"].to_i
47
- end
48
-
49
- def sequence_length
50
- @base["Sequence length"]
51
- end
52
-
53
- def min_length
54
- l = @base["Sequence length"]
55
- if l =~ /\d-\d/
56
- l.sub(/-\d+$/,"").to_i
57
- else
58
- l.to_i
59
- end
60
- end
61
-
62
- def max_length
63
- l = @base["Sequence length"]
64
- if l =~ /\d-\d/
65
- l.sub(/^\d+-/,"").to_i
66
- else
67
- l.to_i
68
- end
69
- end
70
-
71
- def percent_gc
72
- @base["%GC"].to_i
73
- end
74
-
75
- def per_base_sequence_quality
76
- node = @object.select{|a| a.first.first == ">>Per base sequence quality" }
77
- node.first.select{|n| n.first != ">>Per base sequence quality" }
78
- end
79
-
80
- ## Custom module: overall mean base call quality indicator
81
- def overall_mean_quality_score
82
- per_base = self.per_base_sequence_quality
83
- v = per_base.map{|c| (10**(c[1].to_f/-10)).to_f }
84
- -10 * Math.log10(v.reduce(:+) / v.size)
85
- end
5
+ class Parser
6
+ def initialize(fastqc_data_txt)
7
+ @data = fastqc_data_txt
8
+ @object = parse(@data)
9
+ @base = self.basic_statistics
10
+ end
11
+
12
+ def parse(data)
13
+ modules = data.split(">>END_MODULE\n")
14
+ modules.map do |node|
15
+ lines = node.split("\n")
16
+ rm_header = lines.map do |line|
17
+ if line !~ /^\#/ || line =~ /^#Total Duplicate Percentage/
18
+ line.split("\t")
19
+ end
20
+ end
21
+ rm_header.compact
22
+ end
23
+ end
24
+
25
+ def fastqc_version
26
+ @data.split("\n").first.split("\t").last
27
+ end
28
+
29
+ def basic_statistics
30
+ Hash[*@object.select{|a| a.first.first == ">>Basic Statistics" }.flatten]
31
+ end
32
+
33
+ def filename
34
+ @base["Filename"]
35
+ end
36
+
37
+ def file_type
38
+ @base["File type"]
39
+ end
40
+
41
+ def encoding
42
+ @base["Encoding"]
43
+ end
44
+
45
+ def total_sequences
46
+ @base["Total Sequences"].to_i
47
+ end
48
+
49
+ def filtered_sequences
50
+ @base["Filtered Sequences"].to_i
51
+ end
52
+
53
+ def sequence_length
54
+ @base["Sequence length"]
55
+ end
56
+
57
+ def min_length
58
+ l = @base["Sequence length"]
59
+ if l =~ /\d-\d/
60
+ l.sub(/-\d+$/,"").to_i
61
+ else
62
+ l.to_i
63
+ end
64
+ end
65
+
66
+ def max_length
67
+ l = @base["Sequence length"]
68
+ if l =~ /\d-\d/
69
+ l.sub(/^\d+-/,"").to_i
70
+ else
71
+ l.to_i
72
+ end
73
+ end
74
+
75
+ def percent_gc
76
+ @base["%GC"].to_i
77
+ end
78
+
79
+ def per_base_sequence_quality
80
+ node = @object.select{|a| a.first.first == ">>Per base sequence quality" }
81
+ node.first.select{|n| n.first != ">>Per base sequence quality" }
82
+ end
83
+
84
+ ## Custom module: overall mean base call quality indicator
85
+ def overall_mean_quality_score
86
+ per_base = self.per_base_sequence_quality
87
+ v = per_base.map{|c| (10**(c[1].to_f/-10)).to_f }
88
+ -10 * Math.log10(v.reduce(:+) / v.size)
89
+ end
86
90
 
87
91
  ## Custom module: overall median base call quality indicator
88
- def overall_median_quality_score
89
- per_base = self.per_base_sequence_quality
92
+ def overall_median_quality_score
93
+ per_base = self.per_base_sequence_quality
90
94
  v = per_base.map{|c| (10**(c[2].to_f/-10)).to_f }
91
- -10 * Math.log10(v.reduce(:+) / v.size)
92
- end
95
+ -10 * Math.log10(v.reduce(:+) / v.size)
96
+ end
93
97
 
94
98
  def per_tile_sequence_quality
95
99
  node = @object.select{|a| a.first.first == ">>Per tile sequence quality" }
@@ -98,135 +102,142 @@ module Bio
98
102
  []
99
103
  end
100
104
 
101
- def per_sequence_quality_scores
102
- node = @object.select{|a| a.first.first == ">>Per sequence quality scores" }
103
- node.first.select{|n| n.first != ">>Per sequence quality scores" }
104
- end
105
-
106
- def per_base_sequence_content
107
- node = @object.select{|a| a.first.first == ">>Per base sequence content" }
108
- node.first.select{|n| n.first != ">>Per base sequence content" }
109
- end
110
-
111
- def per_sequence_gc_content
112
- node = @object.select{|a| a.first.first == ">>Per sequence GC content" }
113
- node.first.select{|n| n.first != ">>Per sequence GC content" }
114
- end
115
-
116
- def per_sequence_gc_content
117
- node = @object.select{|a| a.first.first == ">>Per sequence GC content" }
118
- node.first.select{|n| n.first != ">>Per sequence GC content" }
119
- end
120
-
121
- def per_base_n_content
122
- node = @object.select{|a| a.first.first == ">>Per base N content" }
123
- node.first.select{|n| n.first != ">>Per base N content" }
124
- end
125
-
126
- ## Custom module: overall N content
127
- def overall_n_content
128
- per_base = self.per_base_n_content
129
- v = per_base.map{|c| c[1].to_f }
130
- v.reduce(:+) / v.size
131
- end
132
-
133
- def sequence_length_distribution
134
- node = @object.select{|a| a.first.first == ">>Sequence Length Distribution" }
135
- node.first.select{|n| n.first != ">>Sequence Length Distribution" }
136
- end
137
-
138
- ## Custom module: mean sequence length calculated from distribution
139
- def mean_sequence_length
140
- distribution = self.sequence_length_distribution
141
- sum = distribution.map do |length_count|
142
- length = length_count[0]
143
- count = length_count[1].to_f
144
- if length =~ /\d-\d/
145
- f = length.sub(/-\d+$/,"").to_i
146
- b = length.sub(/^\d+-/,"").to_i
147
- mean = (f + b) / 2
148
- mean * count
149
- else
150
- length.to_i * count
151
- end
152
- end
153
- sum.reduce(:+) / self.total_sequences
154
- end
155
-
156
- ## Custom module: median sequence length calculated from distribution
157
- def median_sequence_length
158
- distribution = self.sequence_length_distribution
159
- array = distribution.map do |length_count|
160
- length = length_count[0]
161
- count = length_count[1].to_i
162
- if length =~ /\d-\d/
163
- f = length.sub(/-\d+$/,"").to_i
164
- b = length.sub(/^\d+-/,"").to_i
165
- mean = (f + b) / 2
166
- [mean] * count
167
- else
168
- [length.to_i] * count
169
- end
170
- end
171
- sorted = array.flatten.sort
172
- quot = sorted.size / 2
173
- if !sorted.size.even?
174
- sorted[quot]
175
- else
176
- f = sorted[quot]
177
- b = sorted[quot - 1]
178
- (f + b) / 2
179
- end
180
- end
181
-
182
- def sequence_duplication_levels
183
- node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }
184
- node.first.select{|n| n.first != ">>Sequence Duplication Levels" && n.first != "\#Total Duplicate Percentage" }
185
- end
186
-
187
- def total_duplicate_percentage
188
- node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }
189
- node.first.select{|n| n.first == "\#Total Duplicate Percentage" }.flatten[1].to_f
190
- end
191
-
192
- def overrepresented_sequences
193
- node = @object.select{|a| a.first.first == ">>Overrepresented sequences" }
194
- node.first.select{|n| n.first != ">>Overrepresented sequences" }
195
- end
196
-
197
- def kmer_content
198
- node = @object.select{|a| a.first.first == ">>Kmer Content" }
199
- node.first.select{|n| n.first != ">>Kmer Content" }
200
- end
201
-
202
- def summary
203
- {
105
+ def per_sequence_quality_scores
106
+ node = @object.select{|a| a.first.first == ">>Per sequence quality scores" }
107
+ node.first.select{|n| n.first != ">>Per sequence quality scores" }
108
+ end
109
+
110
+ def per_base_sequence_content
111
+ node = @object.select{|a| a.first.first == ">>Per base sequence content" }
112
+ node.first.select{|n| n.first != ">>Per base sequence content" }
113
+ end
114
+
115
+ def per_sequence_gc_content
116
+ node = @object.select{|a| a.first.first == ">>Per sequence GC content" }
117
+ node.first.select{|n| n.first != ">>Per sequence GC content" }
118
+ end
119
+
120
+ def per_sequence_gc_content
121
+ node = @object.select{|a| a.first.first == ">>Per sequence GC content" }
122
+ node.first.select{|n| n.first != ">>Per sequence GC content" }
123
+ end
124
+
125
+ def per_base_n_content
126
+ node = @object.select{|a| a.first.first == ">>Per base N content" }
127
+ node.first.select{|n| n.first != ">>Per base N content" }
128
+ end
129
+
130
+ ## Custom module: overall N content
131
+ def overall_n_content
132
+ per_base = self.per_base_n_content
133
+ v = per_base.map{|c| c[1].to_f }
134
+ v.reduce(:+) / v.size
135
+ end
136
+
137
+ def sequence_length_distribution
138
+ node = @object.select{|a| a.first.first == ">>Sequence Length Distribution" }
139
+ node.first.select{|n| n.first != ">>Sequence Length Distribution" }
140
+ end
141
+
142
+ ## Custom module: mean sequence length calculated from distribution
143
+ def mean_sequence_length
144
+ distribution = self.sequence_length_distribution
145
+ sum = distribution.map do |length_count|
146
+ length = length_count[0]
147
+ count = length_count[1].to_f
148
+ if length =~ /\d-\d/
149
+ f = length.sub(/-\d+$/,"").to_i
150
+ b = length.sub(/^\d+-/,"").to_i
151
+ mean = (f + b) / 2
152
+ mean * count
153
+ else
154
+ length.to_i * count
155
+ end
156
+ end
157
+ sum.reduce(:+) / self.total_sequences
158
+ end
159
+
160
+ ## Custom module: median sequence length calculated from distribution
161
+ def median_sequence_length
162
+ distribution = self.sequence_length_distribution
163
+ array = distribution.map do |length_count|
164
+ length = length_count[0]
165
+ count = length_count[1].to_i
166
+ if length =~ /\d-\d/
167
+ f = length.sub(/-\d+$/,"").to_i
168
+ b = length.sub(/^\d+-/,"").to_i
169
+ mean = (f + b) / 2
170
+ [mean.to_f] * count
171
+ else
172
+ [length.to_f] * count
173
+ end
174
+ end
175
+ sorted = array.flatten.sort
176
+ quot = sorted.size / 2
177
+ if !sorted.size.even?
178
+ sorted[quot]
179
+ else
180
+ f = sorted[quot]
181
+ b = sorted[quot - 1]
182
+ (f + b) / 2
183
+ end
184
+ end
185
+
186
+ def sequence_duplication_levels
187
+ node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }
188
+ node.first.select{|n| n.first != ">>Sequence Duplication Levels" && n.first != "\#Total Duplicate Percentage" }
189
+ end
190
+
191
+ def total_duplicate_percentage
192
+ node = @object.select{|a| a.first.first == ">>Sequence Duplication Levels" }
193
+ node.first.select{|n| n.first == "\#Total Duplicate Percentage" }.flatten[1].to_f
194
+ end
195
+
196
+ def overrepresented_sequences
197
+ node = @object.select{|a| a.first.first == ">>Overrepresented sequences" }
198
+ node.first.select{|n| n.first != ">>Overrepresented sequences" }
199
+ end
200
+
201
+ def adapter_content
202
+ node = @object.select{|a| a.first.first == ">>Adapter Content" }
203
+ node.first.select{|n| n.first != ">>Adapter Content" }
204
+ end
205
+
206
+ def kmer_content
207
+ node = @object.select{|a| a.first.first == ">>Kmer Content" }
208
+ node.first.select{|n| n.first != ">>Kmer Content" }
209
+ end
210
+
211
+ def summary
212
+ {
213
+ fastqc_version: self.fastqc_version,
204
214
  filename: self.filename,
205
- file_type: self.file_type,
206
- encoding: self.encoding,
207
- total_sequences: self.total_sequences,
208
- filtered_sequences: self.filtered_sequences,
209
- sequence_length: self.sequence_length,
210
- percent_gc: self.percent_gc,
211
- per_base_sequence_quality: self.per_base_sequence_quality,
215
+ file_type: self.file_type,
216
+ encoding: self.encoding,
217
+ total_sequences: self.total_sequences,
218
+ filtered_sequences: self.filtered_sequences,
219
+ sequence_length: self.sequence_length,
220
+ percent_gc: self.percent_gc,
221
+ per_base_sequence_quality: self.per_base_sequence_quality,
212
222
  per_tile_sequence_quality: self.per_tile_sequence_quality,
213
- per_sequnce_quality_scores: self.per_sequence_quality_scores,
214
- per_base_sequence_content: self.per_base_sequence_content,
215
- per_sequence_gc_content: self.per_sequence_gc_content,
216
- per_base_n_content: self.per_base_n_content,
217
- sequence_length_distribution: self.sequence_length_distribution,
218
- total_duplicate_percentage: self.total_duplicate_percentage, sequence_duplication_levels: self.sequence_duplication_levels,
219
- overrepresented_sequences: self.overrepresented_sequences,
220
- kmer_content: self.kmer_content,
221
- min_length: self.min_length,
222
- max_length: self.max_length,
223
- overall_mean_quality_score: self.overall_mean_quality_score,
224
- overall_median_quality_score: self.overall_median_quality_score,
225
- overall_n_content: self.overall_n_content,
226
- mean_sequence_length: self.mean_sequence_length,
227
- median_sequence_length: self.median_sequence_length,
223
+ per_sequnce_quality_scores: self.per_sequence_quality_scores,
224
+ per_base_sequence_content: self.per_base_sequence_content,
225
+ per_sequence_gc_content: self.per_sequence_gc_content,
226
+ per_base_n_content: self.per_base_n_content,
227
+ sequence_length_distribution: self.sequence_length_distribution,
228
+ total_duplicate_percentage: self.total_duplicate_percentage, sequence_duplication_levels: self.sequence_duplication_levels,
229
+ overrepresented_sequences: self.overrepresented_sequences,
230
+ adapter_content: self.adapter_content,
231
+ kmer_content: self.kmer_content,
232
+ min_length: self.min_length,
233
+ max_length: self.max_length,
234
+ overall_mean_quality_score: self.overall_mean_quality_score,
235
+ overall_median_quality_score: self.overall_median_quality_score,
236
+ overall_n_content: self.overall_n_content,
237
+ mean_sequence_length: self.mean_sequence_length,
238
+ median_sequence_length: self.median_sequence_length,
228
239
  }
229
- end
230
- end
231
- end
240
+ end
241
+ end
242
+ end
232
243
  end
@@ -3,17 +3,296 @@ require 'spec_helper'
3
3
  describe Bio::FastQC do
4
4
  context "with an example data" do
5
5
  before do
6
- zipfile = File.join(__dir__, "example_fastqc.zip")
7
- @data = Bio::FastQC::Data.read(zipfile)
6
+ @zipfile = File.join(__dir__, "example_fastqc.zip")
8
7
  end
9
-
10
- it 'extracts data from zip file' do
11
- expect(@data).not_to be_empty
8
+
9
+ describe Bio::FastQC::Data do
10
+ before do
11
+ @data = Bio::FastQC::Data.read(@zipfile)
12
+ end
13
+
14
+ describe '#read' do
15
+ it 'returns parsed data from zipfile' do
16
+ expect(@data).not_to be_empty
17
+ end
18
+ end
12
19
  end
13
-
14
- it 'parses a fastqc data and returns json' do
15
- p = Bio::FastQC::Parser.new(@data)
16
- expect(p.summary).not_to be_empty
20
+
21
+ describe Bio::FastQC::Parser do
22
+ before do
23
+ @data = Bio::FastQC::Data.read(@zipfile)
24
+ @parser = Bio::FastQC::Parser.new(@data)
25
+ end
26
+
27
+ describe '#fastqc_version' do
28
+ it 'returns fastqc version as String and not empty' do
29
+ expect(@parser.fastqc_version).to be_instance_of(String)
30
+ expect(@parser.fastqc_version).not_to be_empty
31
+ end
32
+ end
33
+
34
+ describe '#filename' do
35
+ it 'returns filename as String and not empty' do
36
+ expect(@parser.filename).to be_instance_of(String)
37
+ expect(@parser.filename).not_to be_empty
38
+ end
39
+ end
40
+
41
+ describe '#file_type' do
42
+ it 'returns file type as String and not empty' do
43
+ expect(@parser.file_type).to be_instance_of(String)
44
+ expect(@parser.file_type).not_to be_empty
45
+ end
46
+ end
47
+
48
+ describe '#encoding' do
49
+ it 'returns encoding type as String and not empty' do
50
+ expect(@parser.encoding).to be_instance_of(String)
51
+ expect(@parser.encoding).not_to be_empty
52
+ end
53
+ end
54
+
55
+ describe '#total_sequences' do
56
+ it 'returns total number of sequences as Fixnum' do
57
+ expect(@parser.total_sequences).to be_instance_of(Fixnum)
58
+ end
59
+ end
60
+
61
+ describe '#filtered_sequences' do
62
+ it 'returns number of filtered sequence as Fixnum and not empty' do
63
+ expect(@parser.filtered_sequences).to be_instance_of(Fixnum)
64
+ end
65
+ end
66
+
67
+ describe '#sequence_length' do
68
+ it 'returns length of sequence as String and not empty' do
69
+ expect(@parser.sequence_length).to be_instance_of(String)
70
+ expect(@parser.sequence_length).not_to be_empty
71
+ end
72
+ end
73
+
74
+ describe '#percent_gc' do
75
+ it 'returns percentage of GC content as Fixnum and not empty' do
76
+ expect(@parser.percent_gc).to be_instance_of(Fixnum)
77
+ end
78
+ end
79
+
80
+ describe '#per_base_sequence_quality' do
81
+ before do
82
+ @value = @parser.per_base_sequence_quality
83
+ end
84
+
85
+ it 'returns data frame as Array' do
86
+ expect(@value).to be_instance_of(Array)
87
+ end
88
+
89
+ it 'returns an array with depth 2' do
90
+ expect(@value.depth).to eq(2)
91
+ end
92
+
93
+ it 'returns an array of an array with 7 elements' do
94
+ sizes = @value.map{|a| a.size }.uniq
95
+ expect(sizes).to eq([7])
96
+ end
97
+ end
98
+
99
+ describe '#per_tile_sequence_quality' do
100
+ it 'returns data frame as Array' do
101
+ expect(@parser.per_tile_sequence_quality).to be_instance_of(Array)
102
+ end
103
+
104
+ it 'returns array with depth 2' do
105
+ expect(@parser.per_tile_sequence_quality.depth).to eq(2)
106
+ end
107
+
108
+ it 'returns an array of an array with 3 elements' do
109
+ sizes = @parser.per_tile_sequence_quality.map{|a| a.size }.uniq
110
+ expect(sizes).to eq([3])
111
+ end
112
+ end
113
+
114
+ describe '#per_sequence_quality_scores' do
115
+ it 'returns data frame as Array' do
116
+ expect(@parser.per_sequence_quality_scores).to be_instance_of(Array)
117
+ end
118
+
119
+ it 'returns array with depth 2' do
120
+ expect(@parser.per_sequence_quality_scores.depth).to eq(2)
121
+ end
122
+
123
+ it 'returns an array of an array with 2 elements' do
124
+ sizes = @parser.per_sequence_quality_scores.map{|a| a.size }.uniq
125
+ expect(sizes).to eq([2])
126
+ end
127
+ end
128
+
129
+ describe '#per_base_sequence_content' do
130
+ it 'returns data frame as Array' do
131
+ expect(@parser.per_base_sequence_content).to be_instance_of(Array)
132
+ end
133
+
134
+ it 'returns array with depth 2' do
135
+ expect(@parser.per_base_sequence_content.depth).to eq(2)
136
+ end
137
+
138
+ it 'returns an array of an array with 5 elements' do
139
+ sizes = @parser.per_base_sequence_content.map{|a| a.size }.uniq
140
+ expect(sizes).to eq([5])
141
+ end
142
+ end
143
+
144
+ describe '#per_sequence_gc_content' do
145
+ it 'returns data frame as Array' do
146
+ expect(@parser.per_sequence_gc_content).to be_instance_of(Array)
147
+ end
148
+
149
+ it 'returns array with depth 2' do
150
+ expect(@parser.per_sequence_gc_content.depth).to eq(2)
151
+ end
152
+
153
+ it 'returns an array of an array with 2 elements' do
154
+ sizes = @parser.per_sequence_gc_content.map{|a| a.size }.uniq
155
+ expect(sizes).to eq([2])
156
+ end
157
+ end
158
+
159
+ describe '#per_base_n_content' do
160
+ it 'returns data frame as Array' do
161
+ expect(@parser.per_base_n_content).to be_instance_of(Array)
162
+ end
163
+
164
+ it 'returns array with depth 2' do
165
+ expect(@parser.per_base_n_content.depth).to eq(2)
166
+ end
167
+
168
+ it 'returns an array of an array with 2 elements' do
169
+ sizes = @parser.per_base_n_content.map{|a| a.size }.uniq
170
+ expect(sizes).to eq([2])
171
+ end
172
+ end
173
+
174
+ describe '#sequence_length_distribution' do
175
+ it 'returns data frame as Array' do
176
+ expect(@parser.sequence_length_distribution).to be_instance_of(Array)
177
+ end
178
+
179
+ it 'returns array with depth 2' do
180
+ expect(@parser.sequence_length_distribution.depth).to eq(2)
181
+ end
182
+
183
+ it 'returns an array of an array with 2 elements' do
184
+ sizes = @parser.sequence_length_distribution.map{|a| a.size }.uniq
185
+ expect(sizes).to eq([2])
186
+ end
187
+ end
188
+
189
+ describe '#total_duplicate_percentage' do
190
+ it 'returns duplicate percentage as Float and not empty' do
191
+ expect(@parser.total_duplicate_percentage).to be_instance_of(Float)
192
+ end
193
+ end
194
+
195
+ describe '#sequence_duplication_levels' do
196
+ it 'returns data frame as Array' do
197
+ expect(@parser.sequence_duplication_levels).to be_instance_of(Array)
198
+ end
199
+
200
+ it 'returns array with depth 2' do
201
+ expect(@parser.sequence_duplication_levels.depth).to eq(2)
202
+ end
203
+
204
+ it 'returns an array of an array with 3 elements' do
205
+ sizes = @parser.sequence_duplication_levels.map{|a| a.size }.uniq
206
+ expect(sizes).to eq([3])
207
+ end
208
+ end
209
+
210
+ describe '#overrepresented_sequences' do
211
+ it 'returns data frame as Array' do
212
+ expect(@parser.overrepresented_sequences).to be_instance_of(Array)
213
+ end
214
+
215
+ it 'returns array with depth 2' do
216
+ expect(@parser.overrepresented_sequences.depth).to eq(2)
217
+ end
218
+
219
+ it 'returns an array of an array with 4 elements' do
220
+ sizes = @parser.overrepresented_sequences.map{|a| a.size }.uniq
221
+ expect(sizes).to eq([4])
222
+ end
223
+ end
224
+
225
+ describe '#adapter_content' do
226
+ it 'returns data frame as Array' do
227
+ expect(@parser.adapter_content).to be_instance_of(Array)
228
+ end
229
+
230
+ it 'returns array with depth 2' do
231
+ expect(@parser.adapter_content.depth).to eq(2)
232
+ end
233
+
234
+ it 'returns an array of an array with 5 elements' do
235
+ sizes = @parser.adapter_content.map{|a| a.size }.uniq
236
+ expect(sizes).to eq([5])
237
+ end
238
+ end
239
+
240
+ describe '#kmer_content' do
241
+ it 'returns data frame as Array' do
242
+ expect(@parser.kmer_content).to be_instance_of(Array)
243
+ end
244
+
245
+ it 'returns array with depth 2' do
246
+ expect(@parser.kmer_content.depth).to eq(2)
247
+ end
248
+
249
+ it 'returns an array of an array with 5 elements' do
250
+ sizes = @parser.kmer_content.map{|a| a.size }.uniq
251
+ expect(sizes).to eq([5])
252
+ end
253
+ end
254
+
255
+ describe '#min_length' do
256
+ it 'returns minimum read length as Fixnum and not empty' do
257
+ expect(@parser.min_length).to be_instance_of(Fixnum)
258
+ end
259
+ end
260
+
261
+ describe '#max_length' do
262
+ it 'returns maximum read length as Fixnum and not empty' do
263
+ expect(@parser.max_length).to be_instance_of(Fixnum)
264
+ end
265
+ end
266
+
267
+ describe '#overall_mean_quality_score' do
268
+ it 'returns overall mean quality score as Float and not empty' do
269
+ expect(@parser.overall_mean_quality_score).to be_instance_of(Float)
270
+ end
271
+ end
272
+
273
+ describe '#overall_median_quality_score' do
274
+ it 'returns overall median quality score as Float and not empty' do
275
+ expect(@parser.overall_median_quality_score).to be_instance_of(Float)
276
+ end
277
+ end
278
+
279
+ describe '#overall_n_content' do
280
+ it 'returns overall N content as Float and not empty' do
281
+ expect(@parser.overall_n_content).to be_instance_of(Float)
282
+ end
283
+ end
284
+
285
+ describe '#mean_sequence_length' do
286
+ it 'returns mean sequence length from read length distribution as Float and not empty' do
287
+ expect(@parser.mean_sequence_length).to be_instance_of(Float)
288
+ end
289
+ end
290
+
291
+ describe '#median_sequence_length' do
292
+ it 'returns median sequence length from read length distribution as Float and not empty' do
293
+ expect(@parser.median_sequence_length).to be_instance_of(Float)
294
+ end
295
+ end
17
296
  end
18
297
  end
19
298
  end
@@ -1,2 +1,14 @@
1
1
  $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
2
  require 'bio-fastqc'
3
+
4
+ class Array
5
+ def depth
6
+ map {|element| element.depth + 1 }.max
7
+ end
8
+ end
9
+
10
+ class Object
11
+ def depth
12
+ 0
13
+ end
14
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-fastqc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tazro Inutano Ohta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-06 00:00:00.000000000 Z
11
+ date: 2015-11-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rubyzip
@@ -48,16 +48,16 @@ dependencies:
48
48
  name: bundler
49
49
  requirement: !ruby/object:Gem::Requirement
50
50
  requirements:
51
- - - "~>"
51
+ - - ">="
52
52
  - !ruby/object:Gem::Version
53
- version: '1.10'
53
+ version: 1.8.0
54
54
  type: :development
55
55
  prerelease: false
56
56
  version_requirements: !ruby/object:Gem::Requirement
57
57
  requirements:
58
- - - "~>"
58
+ - - ">="
59
59
  - !ruby/object:Gem::Version
60
- version: '1.10'
60
+ version: 1.8.0
61
61
  - !ruby/object:Gem::Dependency
62
62
  name: rake
63
63
  requirement: !ruby/object:Gem::Requirement