parse_fasta 1.9.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +8 -8
  2. data/.gitignore +1 -0
  3. data/.rspec +2 -0
  4. data/CHANGELOG.md +178 -0
  5. data/README.md +42 -215
  6. data/Rakefile +2 -4
  7. data/bin/console +14 -0
  8. data/bin/setup +8 -0
  9. data/lib/parse_fasta/error.rb +39 -0
  10. data/lib/parse_fasta/record.rb +88 -0
  11. data/lib/parse_fasta/seq_file.rb +221 -114
  12. data/lib/parse_fasta/version.rb +2 -2
  13. data/lib/parse_fasta.rb +5 -20
  14. data/spec/parse_fasta/record_spec.rb +115 -0
  15. data/spec/parse_fasta/seq_file_spec.rb +238 -0
  16. data/spec/parse_fasta_spec.rb +25 -0
  17. data/spec/spec_helper.rb +2 -44
  18. data/spec/test_files/cr.fa +1 -0
  19. data/spec/test_files/cr.fa.gz +0 -0
  20. data/spec/test_files/cr.fq +3 -0
  21. data/spec/test_files/cr.fq.gz +0 -0
  22. data/spec/test_files/cr_nl.fa +4 -0
  23. data/spec/test_files/cr_nl.fa.gz +0 -0
  24. data/spec/test_files/cr_nl.fq +8 -0
  25. data/spec/test_files/cr_nl.fq.gz +0 -0
  26. data/spec/test_files/multi_blob.fa.gz +0 -0
  27. data/spec/test_files/multi_blob.fq.gz +0 -0
  28. data/spec/test_files/not_a_seq_file.txt +1 -0
  29. data/{test_files/bad.fa → spec/test_files/poorly_catted.fa} +0 -0
  30. data/{test_files/test.fa → spec/test_files/seqs.fa} +0 -0
  31. data/spec/test_files/seqs.fa.gz +0 -0
  32. data/spec/test_files/seqs.fq +8 -0
  33. data/spec/test_files/seqs.fq.gz +0 -0
  34. metadata +49 -24
  35. data/lib/parse_fasta/fasta_file.rb +0 -232
  36. data/lib/parse_fasta/fastq_file.rb +0 -160
  37. data/lib/parse_fasta/quality.rb +0 -54
  38. data/lib/parse_fasta/sequence.rb +0 -174
  39. data/spec/lib/fasta_file_spec.rb +0 -212
  40. data/spec/lib/fastq_file_spec.rb +0 -143
  41. data/spec/lib/quality_spec.rb +0 -51
  42. data/spec/lib/seq_file_spec.rb +0 -357
  43. data/spec/lib/sequence_spec.rb +0 -188
  44. data/test_files/benchmark.rb +0 -99
  45. data/test_files/bogus.txt +0 -2
  46. data/test_files/test.fa.gz +0 -0
  47. data/test_files/test.fq +0 -8
  48. data/test_files/test.fq.gz +0 -0
@@ -1,357 +0,0 @@
1
- # Copyright 2014, 2015 Ryan Moore
2
- # Contact: moorer@udel.edu
3
- #
4
- # This file is part of parse_fasta.
5
- #
6
- # parse_fasta is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # parse_fasta is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
-
19
- require 'spec_helper'
20
-
21
- describe SeqFile do
22
-
23
- describe "#to_hash" do
24
- context "when input is a fasta file" do
25
- let(:records) { Helpers::RECORDS_MAP }
26
- let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fa.gz" }
27
- let(:fasta) { SeqFile.open(fname) }
28
-
29
- context "with badly catted fasta" do
30
- it "raises ParseFasta::SequenceFormatError" do
31
- fname = "#{File.dirname(__FILE__)}/../../test_files/bad.fa"
32
-
33
- expect { FastaFile.open(fname).to_hash }.
34
- to raise_error ParseFasta::SequenceFormatError
35
- end
36
- end
37
-
38
- it "reads the records into a hash: header as key and seq as val" do
39
- expect(fasta.to_hash).to eq records
40
- end
41
-
42
- it "passes the values as Sequence objects" do
43
- expect(
44
- fasta.to_hash.values.all? { |val| val.instance_of? Sequence }
45
- ).to eq true
46
- end
47
- end
48
-
49
- context "when input is a fastq file" do
50
- let(:records) {
51
- { "seq1" => { head: "seq1",
52
- seq: "AACCTTGG",
53
- desc: "",
54
- qual: ")#3gTqN8" },
55
- "seq2 apples" => { head: "seq2 apples",
56
- seq: "ACTG",
57
- desc: "seq2 apples",
58
- qual: "*ujM" }
59
- }
60
- }
61
- let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fq.gz" }
62
- let(:fastq) { SeqFile.open(fname) }
63
-
64
- it "reads the records into a hash: header as key and seq as val" do
65
- expect(fastq.to_hash).to eq records
66
- end
67
-
68
- it "passes the seqs as Sequence objects" do
69
- expect(
70
- fastq.to_hash.values.all? { |val| val[:seq].instance_of? Sequence }
71
- ).to eq true
72
- end
73
-
74
- it "passes the quals as Quality objects" do
75
- expect(
76
- fastq.to_hash.values.all? { |val| val[:qual].instance_of? Quality }
77
- ).to eq true
78
- end
79
- end
80
- end
81
-
82
- context "when input is a fasta file" do
83
- describe "#each_record" do
84
- let(:records) { Helpers::RECORDS }
85
-
86
- let(:f_handle) { SeqFile.open(@fname).each_record { |s| } }
87
-
88
- context "with badly catted fasta" do
89
- it "raises ParseFasta::SequenceFormatError" do
90
- fname = "#{File.dirname(__FILE__)}/../../test_files/bad.fa"
91
-
92
- expect { FastaFile.open(fname).to_hash }.
93
- to raise_error ParseFasta::SequenceFormatError
94
- end
95
- end
96
-
97
- shared_examples_for "parsing a fasta file" do
98
- it "yields proper header and sequence for each record" do
99
- expect { |b|
100
- SeqFile.open(@fname).each_record(&b)
101
- }.to yield_successive_args(*records)
102
- end
103
-
104
- it "yields the sequence as a Sequence class" do
105
- SeqFile.open(@fname).each_record do |_, seq|
106
- expect(seq).to be_an_instance_of Sequence
107
- end
108
- end
109
- end
110
-
111
- context "with a gzipped file" do
112
- before(:each) do
113
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa.gz"
114
- end
115
-
116
- it_behaves_like "parsing a fasta file"
117
-
118
- it "closes the GzipReader" do
119
- expect(f_handle).to be_closed
120
- end
121
-
122
- it "returns GzipReader object" do
123
- expect(f_handle).to be_an_instance_of Zlib::GzipReader
124
- end
125
- end
126
-
127
- context "with a non-gzipped file" do
128
- before(:each) do
129
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa"
130
- end
131
-
132
- it_behaves_like "parsing a fasta file"
133
-
134
- it "doesn't close the File (approx regular file behavior)" do
135
- expect(f_handle).not_to be_closed
136
- end
137
-
138
- it "returns FastaFile object" do
139
- expect(f_handle).to be_a FastaFile
140
- end
141
- end
142
- end
143
- end
144
-
145
- context "when input is a fastq file" do
146
- let(:records) {
147
- [["seq1", "AACCTTGG"],
148
- ["seq2 apples", "ACTG"]] }
149
- let(:f_handle) { SeqFile.open(@fname).each_record { |s| } }
150
-
151
- shared_examples_for "parsing a fastq file" do
152
- it "yields only header & sequence" do
153
- expect { |b|
154
- SeqFile.open(@fname).each_record(&b)
155
- }.to yield_successive_args(records[0], records[1])
156
- end
157
-
158
- it "yields the sequence as a Sequence class" do
159
- SeqFile.open(@fname).each_record do |_, seq, _, _|
160
- expect(seq).to be_an_instance_of Sequence
161
- end
162
- end
163
- end
164
-
165
- context "with a 4 line per record fastq file" do
166
- describe "#each_record" do
167
- context "with a gzipped file" do
168
- before(:each) do
169
- @fname =
170
- "#{File.dirname(__FILE__)}/../../test_files/test.fq.gz"
171
- end
172
-
173
- it_behaves_like "parsing a fastq file"
174
-
175
- it "closes the GzipReader" do
176
- expect(f_handle).to be_closed
177
- end
178
-
179
- it "returns GzipReader object" do
180
- expect(f_handle).to be_an_instance_of Zlib::GzipReader
181
- end
182
- end
183
-
184
- context "with a non-gzipped file" do
185
- before(:each) do
186
- @fname =
187
- "#{File.dirname(__FILE__)}/../../test_files/test.fq"
188
- end
189
-
190
- it_behaves_like "parsing a fastq file"
191
-
192
- it "doesn't close the SeqFile (approx reg file behav)" do
193
- expect(f_handle).not_to be_closed
194
- end
195
-
196
- it "returns FastqFile object" do
197
- expect(f_handle).to be_a FastqFile
198
- end
199
- end
200
- end
201
- end
202
- end
203
-
204
- context "when input is bogus" do
205
- describe "#each_record" do
206
- it "raises an ArgumentError with message" do
207
- fname = "#{File.dirname(__FILE__)}/../../test_files/bogus.txt"
208
- err_msg = "Input does not look like FASTA or FASTQ"
209
-
210
- expect { SeqFile.open(fname).each_record do |h, s|
211
- puts [h, s].join ' '
212
- end
213
- }.to raise_error(ArgumentError, err_msg)
214
- end
215
- end
216
- end
217
-
218
- #####
219
-
220
- context "when input is a fasta file" do
221
- describe "#each_record_fast" do
222
- let(:records) { Helpers::RECORDS_FAST }
223
-
224
- let(:f_handle) { SeqFile.open(@fname).each_record_fast { |s| } }
225
-
226
- context "with badly catted fasta" do
227
- it "raises ParseFasta::SequenceFormatError" do
228
- fname = "#{File.dirname(__FILE__)}/../../test_files/bad.fa"
229
-
230
- expect { FastaFile.open(fname).to_hash }.
231
- to raise_error ParseFasta::SequenceFormatError
232
- end
233
- end
234
-
235
- shared_examples_for "parsing a fasta file" do
236
- it "yields proper header and sequence for each record" do
237
- expect { |b|
238
- SeqFile.open(@fname).each_record_fast(&b)
239
- }.to yield_successive_args(*records)
240
- end
241
-
242
- it "yields the sequence as a String class" do
243
- SeqFile.open(@fname).each_record_fast do |_, seq|
244
- expect(seq).to be_an_instance_of String
245
- end
246
- end
247
- end
248
-
249
- context "with a gzipped file" do
250
- before(:each) do
251
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa.gz"
252
- end
253
-
254
- it_behaves_like "parsing a fasta file"
255
-
256
- it "closes the GzipReader" do
257
- expect(f_handle).to be_closed
258
- end
259
-
260
- it "returns GzipReader object" do
261
- expect(f_handle).to be_an_instance_of Zlib::GzipReader
262
- end
263
- end
264
-
265
- context "with a non-gzipped file" do
266
- before(:each) do
267
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa"
268
- end
269
-
270
- it_behaves_like "parsing a fasta file"
271
-
272
- it "doesn't close the File (approx regular file behavior)" do
273
- expect(f_handle).not_to be_closed
274
- end
275
-
276
- it "returns FastaFile object" do
277
- expect(f_handle).to be_a FastaFile
278
- end
279
- end
280
- end
281
- end
282
-
283
- context "when input is a fastq file" do
284
- let(:records) {
285
- [["seq1", "AA CC TT GG"],
286
- ["seq2 apples", "ACTG"]] }
287
- let(:f_handle) { SeqFile.open(@fname).each_record_fast { |s| } }
288
-
289
- shared_examples_for "parsing a fastq file" do
290
- it "yields only header & sequence" do
291
- expect { |b|
292
- SeqFile.open(@fname).each_record_fast(&b)
293
- }.to yield_successive_args(records[0], records[1])
294
- end
295
-
296
- it "yields the sequence as a String class" do
297
- SeqFile.open(@fname).each_record_fast do |_, seq, _, _|
298
- expect(seq).to be_an_instance_of String
299
- end
300
- end
301
- end
302
-
303
- context "with a 4 line per record fastq file" do
304
- describe "#each_record_fast" do
305
- context "with a gzipped file" do
306
- before(:each) do
307
- @fname =
308
- "#{File.dirname(__FILE__)}/../../test_files/test.fq.gz"
309
- end
310
-
311
- it_behaves_like "parsing a fastq file"
312
-
313
- it "closes the GzipReader" do
314
- expect(f_handle).to be_closed
315
- end
316
-
317
- it "returns GzipReader object" do
318
- expect(f_handle).to be_an_instance_of Zlib::GzipReader
319
- end
320
- end
321
-
322
- context "with a non-gzipped file" do
323
- before(:each) do
324
- @fname =
325
- "#{File.dirname(__FILE__)}/../../test_files/test.fq"
326
- end
327
-
328
- it_behaves_like "parsing a fastq file"
329
-
330
- it "doesn't close the SeqFile (approx reg file behav)" do
331
- expect(f_handle).not_to be_closed
332
- end
333
-
334
- it "returns FastqFile object" do
335
- expect(f_handle).to be_a FastqFile
336
- end
337
- end
338
- end
339
- end
340
- end
341
-
342
- context "when input is bogus" do
343
- describe "#each_record_fast" do
344
- it "raises an ArgumentError with message" do
345
- fname = "#{File.dirname(__FILE__)}/../../test_files/bogus.txt"
346
- err_msg = "Input does not look like FASTA or FASTQ"
347
-
348
- expect { SeqFile.open(fname).each_record_fast do |h, s|
349
- puts [h, s].join ' '
350
- end
351
- }.to raise_error(ArgumentError, err_msg)
352
- end
353
- end
354
- end
355
-
356
-
357
- end
@@ -1,188 +0,0 @@
1
- # Copyright 2014, 2015 Ryan Moore
2
- # Contact: moorer@udel.edu
3
- #
4
- # This file is part of parse_fasta.
5
- #
6
- # parse_fasta is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # parse_fasta is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
-
19
- require 'spec_helper'
20
- require 'bio'
21
-
22
- describe Sequence do
23
-
24
- # it "has AmbiguousSequenceError" do
25
- # expect(Sequence::AmbiguousSequenceError).not_to be nil
26
- # end
27
-
28
- it "inherits from String" do
29
- expect(Sequence.new('ACTG')).to be_a String
30
- end
31
-
32
- describe "::new" do
33
- it "removes any spaces in the sequence" do
34
- s = "ACT ACT ACT GCT "
35
- s_no_spaces = "ACTACTACTGCT"
36
- expect(Sequence.new(s)).to eq s_no_spaces
37
- end
38
-
39
- context "when sequence has a '>' in it" do
40
- it "raises SequenceFormatError" do
41
- s = "actg>sequence 3"
42
- expect { Sequence.new(s) }.
43
- to raise_error ParseFasta::SequenceFormatError
44
- end
45
- end
46
- end
47
-
48
- describe "#gc" do
49
- it "gives the same answer as BioRuby" do
50
- s = 'ACtgcGAtcgCgAaTtGgCcnNuU'
51
- bioruby_gc = Bio::Sequence::NA.new(s).gc_content
52
- expect(Sequence.new(s).gc).to eq bioruby_gc
53
- end
54
-
55
- context "when sequence isn't empty" do
56
- it "calculates gc" do
57
- s = Sequence.new('ActGnu')
58
- expect(s.gc).to eq(2 / 5.to_f)
59
- end
60
- end
61
-
62
- context "when sequence is empty" do
63
- it "returns 0" do
64
- s = Sequence.new('')
65
- expect(s.gc).to eq 0
66
- end
67
- end
68
-
69
- context "there are no A, C, T, G or U (ie only N)" do
70
- it "returns 0" do
71
- s = Sequence.new('NNNNNnn')
72
- expect(s.gc).to eq 0
73
- end
74
- end
75
- end
76
-
77
- describe "#base_counts" do
78
- context "for a DNA sequence with default or falsy argument" do
79
- it "returns a map of A, C, T, and G counts" do
80
- s = Sequence.new('ACTGactg')
81
- expect(s.base_counts).to eq({ a: 2, c: 2, t: 2, g: 2 })
82
- end
83
- end
84
-
85
- context "for a DNA sequence with truthy argument" do
86
- it "returns a map of A, C, T, G and N counts" do
87
- s = Sequence.new('ACTGNactgn')
88
- expect(s.base_counts(1)).to eq({ a: 2, c: 2, t: 2, g: 2, n: 2 })
89
- end
90
- end
91
-
92
- context "for an RNA sequence with falsy or default argument" do
93
- it "returns a map of A, C, U, G counts" do
94
- s = Sequence.new('ACUGacug')
95
- expect(s.base_counts).to eq({ a: 2, c: 2, u: 2, g: 2 })
96
- end
97
- end
98
-
99
- context "for an RNA sequence with truthy argument" do
100
- it "returns a map of A, C, U, G and N counts" do
101
- s = Sequence.new('ACUGNacugn')
102
- expect(s.base_counts(1)).to eq({ a: 2, c: 2, u: 2, g: 2, n: 2 })
103
- end
104
- end
105
-
106
- context "for a sequence with both U and T present" do
107
- s = Sequence.new('AaCcTtGgNnUu')
108
- err_message = 'ERROR: A sequence contains both T and U'
109
-
110
- it "warns the user about having both U and T present" do
111
- expect(s).to receive(:warn).with(err_message)
112
- s.base_counts
113
- end
114
-
115
- it "returns a map that counts both U's and T's" do
116
- expect(s.base_counts).to eq({ a: 2, c: 2, t: 2, u: 2, g: 2 })
117
- end
118
-
119
- it "returns a map with T, U and N if truthy argument given" do
120
- base_counts = { a: 2, c: 2, t: 2, u: 2, g: 2, n: 2 }
121
- expect(s.base_counts(1)).to eq(base_counts)
122
- end
123
- end
124
- end
125
-
126
- describe "#base_frequencies" do
127
- context "with falsy argument" do
128
- it "doesn't count ambiguous bases in total bases" do
129
- s = Sequence.new('ACTTn')
130
- base_freqs = { a: 0.25, c: 0.25, t: 0.5, g: 0.0 }
131
- expect(s.base_frequencies).to eq(base_freqs)
132
- end
133
- end
134
-
135
- context "when counting ambiguous bases" do
136
- it "does count ambiguous bases in total bases" do
137
- s = Sequence.new('ACTTn')
138
- base_freqs = { a: 0.2, c: 0.2, t: 0.4, g: 0.0, n: 0.2 }
139
- expect(s.base_frequencies(1)).to eq(base_freqs)
140
- end
141
- end
142
- end
143
-
144
- describe "#rev_comp" do
145
- # it "raises error if both T and U are present" do
146
- # s = Sequence.new("actGU")
147
- # err = Sequence::AmbiguousSequenceError
148
- # msg = "Sequence is ambiguous -- both T and U present"
149
- # expect { s.rev_comp }.to raise_error(err, msg)
150
- # end
151
-
152
- # it "warns if non iupac characters are present" do
153
- # s = Sequence.new("--..9284ldkjfalsjf")
154
- # msg = "WARNING: Sequence contains non IUPAC characters"
155
- # expect(s).to receive(:warn).with(msg)
156
- # s.rev_comp
157
- # end
158
- it "returns a reverse complement of the Sequence" do
159
- s = Sequence.new("gARKbdctymvhu").rev_comp
160
- expect(s).to eq "adbkraghvMYTc"
161
-
162
- s = Sequence.new("ctyMVhgarKBda").rev_comp
163
- expect(s).to eq "thVMytcdBKrag"
164
- end
165
-
166
- it "leaves non-IUPAC characters alone" do
167
- s = Sequence.new("cccc--CCCcccga").rev_comp
168
- expect(s).to eq "tcgggGGG--gggg"
169
- end
170
-
171
- it "returns a Sequence" do
172
- s = Sequence.new("cccc--CCCcccga")
173
- expect(s.rev_comp).to be_an_instance_of(Sequence)
174
- end
175
-
176
- it "gives back original sequence when called in succession" do
177
- s = Sequence.new("cccc--CCCcccga")
178
- expect(s.rev_comp.rev_comp).to eq s
179
- end
180
-
181
- context "with an empty sequence" do
182
- it "returns an empty sequence" do
183
- s = Sequence.new("")
184
- expect(s.rev_comp).to be_empty
185
- end
186
- end
187
- end
188
- end
@@ -1,99 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # Copyright 2014, 2015 Ryan Moore
4
- # Contact: moorer@udel.edu
5
-
6
- # This file is part of parse_fasta.
7
-
8
- # parse_fasta is free software: you can redistribute it and/or modify
9
- # it under the terms of the GNU General Public License as published by
10
- # the Free Software Foundation, either version 3 of the License, or
11
- # (at your option) any later version.
12
-
13
- # parse_fasta is distributed in the hope that it will be useful,
14
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
- # GNU General Public License for more details.
17
-
18
- # You should have received a copy of the GNU General Public License
19
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
20
-
21
- require 'parse_fasta'
22
- require 'bio'
23
- require 'benchmark'
24
-
25
- def this_parse_fasta fname
26
- FastaFile.open(fname, 'r').each_record do |header, sequence|
27
- [header, sequence.length].join("\t")
28
- end
29
- end
30
-
31
- def this_parse_fasta_fast fname
32
- FastaFile.open(fname, 'r').each_record_fast do |header, sequence|
33
- [header, sequence.length].join("\t")
34
- end
35
- end
36
-
37
- def bioruby_parse_fasta fname
38
- Bio::FastaFormat.open(fname).each do |entry|
39
- [entry.definition, entry.seq.length].join("\t")
40
- end
41
- end
42
-
43
- Benchmark.bmbm do |x|
44
- x.report('parse_fasta') { this_parse_fasta(ARGV.first) }
45
- x.report('parse_fasta fast') { this_parse_fasta_fast(ARGV.first) }
46
- x.report('bioruby') { bioruby_parse_fasta(ARGV.first) }
47
- end
48
-
49
- ####
50
-
51
- def this_gc(str)
52
- Sequence.new(str).gc
53
- end
54
-
55
- def bioruby_gc(str)
56
- Bio::Sequence::NA.new(str).gc_content
57
- end
58
-
59
- # make a random sequence of given length
60
- def make_seq(num)
61
- num.times.reduce('') { |str, n| str << %w[A a C c T t G g N n].sample }
62
- end
63
-
64
- # s1 = make_seq(2000000)
65
- # s2 = make_seq(4000000)
66
- # s3 = make_seq(8000000)
67
-
68
- # Benchmark.bmbm do |x|
69
- # x.report('this_gc 1') { this_gc(s1) }
70
- # x.report('bioruby_gc 1') { bioruby_gc(s1) }
71
-
72
- # x.report('this_gc 2') { this_gc(s2) }
73
- # x.report('bioruby_gc 2') { bioruby_gc(s2) }
74
-
75
- # x.report('this_gc 3') { this_gc(s3) }
76
- # x.report('bioruby_gc 3') { bioruby_gc(s3) }
77
- # end
78
-
79
- # fastq = ARGV.first
80
-
81
- def bioruby_fastq(fastq)
82
- Bio::FlatFile.open(Bio::Fastq, fastq) do |fq|
83
- fq.each do |entry|
84
- [entry.definition, entry.seq.length].join("\t")
85
- end
86
- end
87
- end
88
-
89
- def this_fastq(fastq)
90
- FastqFile.open(fastq).each_record do |head, seq, desc, qual|
91
- [head, seq.length].join("\t")
92
- end
93
- end
94
-
95
- # file is 4 million illumina reads (16,000,000 lines) 1.4gb
96
- # Benchmark.bmbm do |x|
97
- # x.report('this_fastq') { this_fastq(ARGV.first) }
98
- # x.report('bioruby_fastq') { bioruby_fastq(ARGV.first) }
99
- # end
data/test_files/bogus.txt DELETED
@@ -1,2 +0,0 @@
1
- this header is missing the '>'
2
- ACTGACTGATCGATCGTAGCTACGTAGCTACG
Binary file
data/test_files/test.fq DELETED
@@ -1,8 +0,0 @@
1
- @seq1
2
- AA CC TT GG
3
- +
4
- )# 3g Tq N8
5
- @seq2 apples
6
- ACTG
7
- +seq2 apples
8
- *ujM
Binary file