parse_fasta 1.9.2 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +8 -8
  2. data/.gitignore +1 -0
  3. data/.rspec +2 -0
  4. data/CHANGELOG.md +178 -0
  5. data/README.md +42 -215
  6. data/Rakefile +2 -4
  7. data/bin/console +14 -0
  8. data/bin/setup +8 -0
  9. data/lib/parse_fasta/error.rb +39 -0
  10. data/lib/parse_fasta/record.rb +88 -0
  11. data/lib/parse_fasta/seq_file.rb +221 -114
  12. data/lib/parse_fasta/version.rb +2 -2
  13. data/lib/parse_fasta.rb +5 -20
  14. data/spec/parse_fasta/record_spec.rb +115 -0
  15. data/spec/parse_fasta/seq_file_spec.rb +238 -0
  16. data/spec/parse_fasta_spec.rb +25 -0
  17. data/spec/spec_helper.rb +2 -44
  18. data/spec/test_files/cr.fa +1 -0
  19. data/spec/test_files/cr.fa.gz +0 -0
  20. data/spec/test_files/cr.fq +3 -0
  21. data/spec/test_files/cr.fq.gz +0 -0
  22. data/spec/test_files/cr_nl.fa +4 -0
  23. data/spec/test_files/cr_nl.fa.gz +0 -0
  24. data/spec/test_files/cr_nl.fq +8 -0
  25. data/spec/test_files/cr_nl.fq.gz +0 -0
  26. data/spec/test_files/multi_blob.fa.gz +0 -0
  27. data/spec/test_files/multi_blob.fq.gz +0 -0
  28. data/spec/test_files/not_a_seq_file.txt +1 -0
  29. data/{test_files/bad.fa → spec/test_files/poorly_catted.fa} +0 -0
  30. data/{test_files/test.fa → spec/test_files/seqs.fa} +0 -0
  31. data/spec/test_files/seqs.fa.gz +0 -0
  32. data/spec/test_files/seqs.fq +8 -0
  33. data/spec/test_files/seqs.fq.gz +0 -0
  34. metadata +49 -24
  35. data/lib/parse_fasta/fasta_file.rb +0 -232
  36. data/lib/parse_fasta/fastq_file.rb +0 -160
  37. data/lib/parse_fasta/quality.rb +0 -54
  38. data/lib/parse_fasta/sequence.rb +0 -174
  39. data/spec/lib/fasta_file_spec.rb +0 -212
  40. data/spec/lib/fastq_file_spec.rb +0 -143
  41. data/spec/lib/quality_spec.rb +0 -51
  42. data/spec/lib/seq_file_spec.rb +0 -357
  43. data/spec/lib/sequence_spec.rb +0 -188
  44. data/test_files/benchmark.rb +0 -99
  45. data/test_files/bogus.txt +0 -2
  46. data/test_files/test.fa.gz +0 -0
  47. data/test_files/test.fq +0 -8
  48. data/test_files/test.fq.gz +0 -0
@@ -1,357 +0,0 @@
1
- # Copyright 2014, 2015 Ryan Moore
2
- # Contact: moorer@udel.edu
3
- #
4
- # This file is part of parse_fasta.
5
- #
6
- # parse_fasta is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # parse_fasta is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
-
19
- require 'spec_helper'
20
-
21
- describe SeqFile do
22
-
23
- describe "#to_hash" do
24
- context "when input is a fasta file" do
25
- let(:records) { Helpers::RECORDS_MAP }
26
- let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fa.gz" }
27
- let(:fasta) { SeqFile.open(fname) }
28
-
29
- context "with badly catted fasta" do
30
- it "raises ParseFasta::SequenceFormatError" do
31
- fname = "#{File.dirname(__FILE__)}/../../test_files/bad.fa"
32
-
33
- expect { FastaFile.open(fname).to_hash }.
34
- to raise_error ParseFasta::SequenceFormatError
35
- end
36
- end
37
-
38
- it "reads the records into a hash: header as key and seq as val" do
39
- expect(fasta.to_hash).to eq records
40
- end
41
-
42
- it "passes the values as Sequence objects" do
43
- expect(
44
- fasta.to_hash.values.all? { |val| val.instance_of? Sequence }
45
- ).to eq true
46
- end
47
- end
48
-
49
- context "when input is a fastq file" do
50
- let(:records) {
51
- { "seq1" => { head: "seq1",
52
- seq: "AACCTTGG",
53
- desc: "",
54
- qual: ")#3gTqN8" },
55
- "seq2 apples" => { head: "seq2 apples",
56
- seq: "ACTG",
57
- desc: "seq2 apples",
58
- qual: "*ujM" }
59
- }
60
- }
61
- let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fq.gz" }
62
- let(:fastq) { SeqFile.open(fname) }
63
-
64
- it "reads the records into a hash: header as key and seq as val" do
65
- expect(fastq.to_hash).to eq records
66
- end
67
-
68
- it "passes the seqs as Sequence objects" do
69
- expect(
70
- fastq.to_hash.values.all? { |val| val[:seq].instance_of? Sequence }
71
- ).to eq true
72
- end
73
-
74
- it "passes the quals as Quality objects" do
75
- expect(
76
- fastq.to_hash.values.all? { |val| val[:qual].instance_of? Quality }
77
- ).to eq true
78
- end
79
- end
80
- end
81
-
82
- context "when input is a fasta file" do
83
- describe "#each_record" do
84
- let(:records) { Helpers::RECORDS }
85
-
86
- let(:f_handle) { SeqFile.open(@fname).each_record { |s| } }
87
-
88
- context "with badly catted fasta" do
89
- it "raises ParseFasta::SequenceFormatError" do
90
- fname = "#{File.dirname(__FILE__)}/../../test_files/bad.fa"
91
-
92
- expect { FastaFile.open(fname).to_hash }.
93
- to raise_error ParseFasta::SequenceFormatError
94
- end
95
- end
96
-
97
- shared_examples_for "parsing a fasta file" do
98
- it "yields proper header and sequence for each record" do
99
- expect { |b|
100
- SeqFile.open(@fname).each_record(&b)
101
- }.to yield_successive_args(*records)
102
- end
103
-
104
- it "yields the sequence as a Sequence class" do
105
- SeqFile.open(@fname).each_record do |_, seq|
106
- expect(seq).to be_an_instance_of Sequence
107
- end
108
- end
109
- end
110
-
111
- context "with a gzipped file" do
112
- before(:each) do
113
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa.gz"
114
- end
115
-
116
- it_behaves_like "parsing a fasta file"
117
-
118
- it "closes the GzipReader" do
119
- expect(f_handle).to be_closed
120
- end
121
-
122
- it "returns GzipReader object" do
123
- expect(f_handle).to be_an_instance_of Zlib::GzipReader
124
- end
125
- end
126
-
127
- context "with a non-gzipped file" do
128
- before(:each) do
129
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa"
130
- end
131
-
132
- it_behaves_like "parsing a fasta file"
133
-
134
- it "doesn't close the File (approx regular file behavior)" do
135
- expect(f_handle).not_to be_closed
136
- end
137
-
138
- it "returns FastaFile object" do
139
- expect(f_handle).to be_a FastaFile
140
- end
141
- end
142
- end
143
- end
144
-
145
- context "when input is a fastq file" do
146
- let(:records) {
147
- [["seq1", "AACCTTGG"],
148
- ["seq2 apples", "ACTG"]] }
149
- let(:f_handle) { SeqFile.open(@fname).each_record { |s| } }
150
-
151
- shared_examples_for "parsing a fastq file" do
152
- it "yields only header & sequence" do
153
- expect { |b|
154
- SeqFile.open(@fname).each_record(&b)
155
- }.to yield_successive_args(records[0], records[1])
156
- end
157
-
158
- it "yields the sequence as a Sequence class" do
159
- SeqFile.open(@fname).each_record do |_, seq, _, _|
160
- expect(seq).to be_an_instance_of Sequence
161
- end
162
- end
163
- end
164
-
165
- context "with a 4 line per record fastq file" do
166
- describe "#each_record" do
167
- context "with a gzipped file" do
168
- before(:each) do
169
- @fname =
170
- "#{File.dirname(__FILE__)}/../../test_files/test.fq.gz"
171
- end
172
-
173
- it_behaves_like "parsing a fastq file"
174
-
175
- it "closes the GzipReader" do
176
- expect(f_handle).to be_closed
177
- end
178
-
179
- it "returns GzipReader object" do
180
- expect(f_handle).to be_an_instance_of Zlib::GzipReader
181
- end
182
- end
183
-
184
- context "with a non-gzipped file" do
185
- before(:each) do
186
- @fname =
187
- "#{File.dirname(__FILE__)}/../../test_files/test.fq"
188
- end
189
-
190
- it_behaves_like "parsing a fastq file"
191
-
192
- it "doesn't close the SeqFile (approx reg file behav)" do
193
- expect(f_handle).not_to be_closed
194
- end
195
-
196
- it "returns FastqFile object" do
197
- expect(f_handle).to be_a FastqFile
198
- end
199
- end
200
- end
201
- end
202
- end
203
-
204
- context "when input is bogus" do
205
- describe "#each_record" do
206
- it "raises an ArgumentError with message" do
207
- fname = "#{File.dirname(__FILE__)}/../../test_files/bogus.txt"
208
- err_msg = "Input does not look like FASTA or FASTQ"
209
-
210
- expect { SeqFile.open(fname).each_record do |h, s|
211
- puts [h, s].join ' '
212
- end
213
- }.to raise_error(ArgumentError, err_msg)
214
- end
215
- end
216
- end
217
-
218
- #####
219
-
220
- context "when input is a fasta file" do
221
- describe "#each_record_fast" do
222
- let(:records) { Helpers::RECORDS_FAST }
223
-
224
- let(:f_handle) { SeqFile.open(@fname).each_record_fast { |s| } }
225
-
226
- context "with badly catted fasta" do
227
- it "raises ParseFasta::SequenceFormatError" do
228
- fname = "#{File.dirname(__FILE__)}/../../test_files/bad.fa"
229
-
230
- expect { FastaFile.open(fname).to_hash }.
231
- to raise_error ParseFasta::SequenceFormatError
232
- end
233
- end
234
-
235
- shared_examples_for "parsing a fasta file" do
236
- it "yields proper header and sequence for each record" do
237
- expect { |b|
238
- SeqFile.open(@fname).each_record_fast(&b)
239
- }.to yield_successive_args(*records)
240
- end
241
-
242
- it "yields the sequence as a String class" do
243
- SeqFile.open(@fname).each_record_fast do |_, seq|
244
- expect(seq).to be_an_instance_of String
245
- end
246
- end
247
- end
248
-
249
- context "with a gzipped file" do
250
- before(:each) do
251
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa.gz"
252
- end
253
-
254
- it_behaves_like "parsing a fasta file"
255
-
256
- it "closes the GzipReader" do
257
- expect(f_handle).to be_closed
258
- end
259
-
260
- it "returns GzipReader object" do
261
- expect(f_handle).to be_an_instance_of Zlib::GzipReader
262
- end
263
- end
264
-
265
- context "with a non-gzipped file" do
266
- before(:each) do
267
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa"
268
- end
269
-
270
- it_behaves_like "parsing a fasta file"
271
-
272
- it "doesn't close the File (approx regular file behavior)" do
273
- expect(f_handle).not_to be_closed
274
- end
275
-
276
- it "returns FastaFile object" do
277
- expect(f_handle).to be_a FastaFile
278
- end
279
- end
280
- end
281
- end
282
-
283
- context "when input is a fastq file" do
284
- let(:records) {
285
- [["seq1", "AA CC TT GG"],
286
- ["seq2 apples", "ACTG"]] }
287
- let(:f_handle) { SeqFile.open(@fname).each_record_fast { |s| } }
288
-
289
- shared_examples_for "parsing a fastq file" do
290
- it "yields only header & sequence" do
291
- expect { |b|
292
- SeqFile.open(@fname).each_record_fast(&b)
293
- }.to yield_successive_args(records[0], records[1])
294
- end
295
-
296
- it "yields the sequence as a String class" do
297
- SeqFile.open(@fname).each_record_fast do |_, seq, _, _|
298
- expect(seq).to be_an_instance_of String
299
- end
300
- end
301
- end
302
-
303
- context "with a 4 line per record fastq file" do
304
- describe "#each_record_fast" do
305
- context "with a gzipped file" do
306
- before(:each) do
307
- @fname =
308
- "#{File.dirname(__FILE__)}/../../test_files/test.fq.gz"
309
- end
310
-
311
- it_behaves_like "parsing a fastq file"
312
-
313
- it "closes the GzipReader" do
314
- expect(f_handle).to be_closed
315
- end
316
-
317
- it "returns GzipReader object" do
318
- expect(f_handle).to be_an_instance_of Zlib::GzipReader
319
- end
320
- end
321
-
322
- context "with a non-gzipped file" do
323
- before(:each) do
324
- @fname =
325
- "#{File.dirname(__FILE__)}/../../test_files/test.fq"
326
- end
327
-
328
- it_behaves_like "parsing a fastq file"
329
-
330
- it "doesn't close the SeqFile (approx reg file behav)" do
331
- expect(f_handle).not_to be_closed
332
- end
333
-
334
- it "returns FastqFile object" do
335
- expect(f_handle).to be_a FastqFile
336
- end
337
- end
338
- end
339
- end
340
- end
341
-
342
- context "when input is bogus" do
343
- describe "#each_record_fast" do
344
- it "raises an ArgumentError with message" do
345
- fname = "#{File.dirname(__FILE__)}/../../test_files/bogus.txt"
346
- err_msg = "Input does not look like FASTA or FASTQ"
347
-
348
- expect { SeqFile.open(fname).each_record_fast do |h, s|
349
- puts [h, s].join ' '
350
- end
351
- }.to raise_error(ArgumentError, err_msg)
352
- end
353
- end
354
- end
355
-
356
-
357
- end
@@ -1,188 +0,0 @@
1
- # Copyright 2014, 2015 Ryan Moore
2
- # Contact: moorer@udel.edu
3
- #
4
- # This file is part of parse_fasta.
5
- #
6
- # parse_fasta is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # parse_fasta is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
-
19
- require 'spec_helper'
20
- require 'bio'
21
-
22
- describe Sequence do
23
-
24
- # it "has AmbiguousSequenceError" do
25
- # expect(Sequence::AmbiguousSequenceError).not_to be nil
26
- # end
27
-
28
- it "inherits from String" do
29
- expect(Sequence.new('ACTG')).to be_a String
30
- end
31
-
32
- describe "::new" do
33
- it "removes any spaces in the sequence" do
34
- s = "ACT ACT ACT GCT "
35
- s_no_spaces = "ACTACTACTGCT"
36
- expect(Sequence.new(s)).to eq s_no_spaces
37
- end
38
-
39
- context "when sequence has a '>' in it" do
40
- it "raises SequenceFormatError" do
41
- s = "actg>sequence 3"
42
- expect { Sequence.new(s) }.
43
- to raise_error ParseFasta::SequenceFormatError
44
- end
45
- end
46
- end
47
-
48
- describe "#gc" do
49
- it "gives the same answer as BioRuby" do
50
- s = 'ACtgcGAtcgCgAaTtGgCcnNuU'
51
- bioruby_gc = Bio::Sequence::NA.new(s).gc_content
52
- expect(Sequence.new(s).gc).to eq bioruby_gc
53
- end
54
-
55
- context "when sequence isn't empty" do
56
- it "calculates gc" do
57
- s = Sequence.new('ActGnu')
58
- expect(s.gc).to eq(2 / 5.to_f)
59
- end
60
- end
61
-
62
- context "when sequence is empty" do
63
- it "returns 0" do
64
- s = Sequence.new('')
65
- expect(s.gc).to eq 0
66
- end
67
- end
68
-
69
- context "there are no A, C, T, G or U (ie only N)" do
70
- it "returns 0" do
71
- s = Sequence.new('NNNNNnn')
72
- expect(s.gc).to eq 0
73
- end
74
- end
75
- end
76
-
77
- describe "#base_counts" do
78
- context "for a DNA sequence with default or falsy argument" do
79
- it "returns a map of A, C, T, and G counts" do
80
- s = Sequence.new('ACTGactg')
81
- expect(s.base_counts).to eq({ a: 2, c: 2, t: 2, g: 2 })
82
- end
83
- end
84
-
85
- context "for a DNA sequence with truthy argument" do
86
- it "returns a map of A, C, T, G and N counts" do
87
- s = Sequence.new('ACTGNactgn')
88
- expect(s.base_counts(1)).to eq({ a: 2, c: 2, t: 2, g: 2, n: 2 })
89
- end
90
- end
91
-
92
- context "for an RNA sequence with falsy or default argument" do
93
- it "returns a map of A, C, U, G counts" do
94
- s = Sequence.new('ACUGacug')
95
- expect(s.base_counts).to eq({ a: 2, c: 2, u: 2, g: 2 })
96
- end
97
- end
98
-
99
- context "for an RNA sequence with truthy argument" do
100
- it "returns a map of A, C, U, G and N counts" do
101
- s = Sequence.new('ACUGNacugn')
102
- expect(s.base_counts(1)).to eq({ a: 2, c: 2, u: 2, g: 2, n: 2 })
103
- end
104
- end
105
-
106
- context "for a sequence with both U and T present" do
107
- s = Sequence.new('AaCcTtGgNnUu')
108
- err_message = 'ERROR: A sequence contains both T and U'
109
-
110
- it "warns the user about having both U and T present" do
111
- expect(s).to receive(:warn).with(err_message)
112
- s.base_counts
113
- end
114
-
115
- it "returns a map that counts both U's and T's" do
116
- expect(s.base_counts).to eq({ a: 2, c: 2, t: 2, u: 2, g: 2 })
117
- end
118
-
119
- it "returns a map with T, U and N if truthy argument given" do
120
- base_counts = { a: 2, c: 2, t: 2, u: 2, g: 2, n: 2 }
121
- expect(s.base_counts(1)).to eq(base_counts)
122
- end
123
- end
124
- end
125
-
126
- describe "#base_frequencies" do
127
- context "with falsy argument" do
128
- it "doesn't count ambiguous bases in total bases" do
129
- s = Sequence.new('ACTTn')
130
- base_freqs = { a: 0.25, c: 0.25, t: 0.5, g: 0.0 }
131
- expect(s.base_frequencies).to eq(base_freqs)
132
- end
133
- end
134
-
135
- context "when counting ambiguous bases" do
136
- it "does count ambiguous bases in total bases" do
137
- s = Sequence.new('ACTTn')
138
- base_freqs = { a: 0.2, c: 0.2, t: 0.4, g: 0.0, n: 0.2 }
139
- expect(s.base_frequencies(1)).to eq(base_freqs)
140
- end
141
- end
142
- end
143
-
144
- describe "#rev_comp" do
145
- # it "raises error if both T and U are present" do
146
- # s = Sequence.new("actGU")
147
- # err = Sequence::AmbiguousSequenceError
148
- # msg = "Sequence is ambiguous -- both T and U present"
149
- # expect { s.rev_comp }.to raise_error(err, msg)
150
- # end
151
-
152
- # it "warns if non iupac characters are present" do
153
- # s = Sequence.new("--..9284ldkjfalsjf")
154
- # msg = "WARNING: Sequence contains non IUPAC characters"
155
- # expect(s).to receive(:warn).with(msg)
156
- # s.rev_comp
157
- # end
158
- it "returns a reverse complement of the Sequence" do
159
- s = Sequence.new("gARKbdctymvhu").rev_comp
160
- expect(s).to eq "adbkraghvMYTc"
161
-
162
- s = Sequence.new("ctyMVhgarKBda").rev_comp
163
- expect(s).to eq "thVMytcdBKrag"
164
- end
165
-
166
- it "leaves non-IUPAC characters alone" do
167
- s = Sequence.new("cccc--CCCcccga").rev_comp
168
- expect(s).to eq "tcgggGGG--gggg"
169
- end
170
-
171
- it "returns a Sequence" do
172
- s = Sequence.new("cccc--CCCcccga")
173
- expect(s.rev_comp).to be_an_instance_of(Sequence)
174
- end
175
-
176
- it "gives back original sequence when called in succession" do
177
- s = Sequence.new("cccc--CCCcccga")
178
- expect(s.rev_comp.rev_comp).to eq s
179
- end
180
-
181
- context "with an empty sequence" do
182
- it "returns an empty sequence" do
183
- s = Sequence.new("")
184
- expect(s.rev_comp).to be_empty
185
- end
186
- end
187
- end
188
- end
@@ -1,99 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # Copyright 2014, 2015 Ryan Moore
4
- # Contact: moorer@udel.edu
5
-
6
- # This file is part of parse_fasta.
7
-
8
- # parse_fasta is free software: you can redistribute it and/or modify
9
- # it under the terms of the GNU General Public License as published by
10
- # the Free Software Foundation, either version 3 of the License, or
11
- # (at your option) any later version.
12
-
13
- # parse_fasta is distributed in the hope that it will be useful,
14
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
- # GNU General Public License for more details.
17
-
18
- # You should have received a copy of the GNU General Public License
19
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
20
-
21
- require 'parse_fasta'
22
- require 'bio'
23
- require 'benchmark'
24
-
25
- def this_parse_fasta fname
26
- FastaFile.open(fname, 'r').each_record do |header, sequence|
27
- [header, sequence.length].join("\t")
28
- end
29
- end
30
-
31
- def this_parse_fasta_fast fname
32
- FastaFile.open(fname, 'r').each_record_fast do |header, sequence|
33
- [header, sequence.length].join("\t")
34
- end
35
- end
36
-
37
- def bioruby_parse_fasta fname
38
- Bio::FastaFormat.open(fname).each do |entry|
39
- [entry.definition, entry.seq.length].join("\t")
40
- end
41
- end
42
-
43
- Benchmark.bmbm do |x|
44
- x.report('parse_fasta') { this_parse_fasta(ARGV.first) }
45
- x.report('parse_fasta fast') { this_parse_fasta_fast(ARGV.first) }
46
- x.report('bioruby') { bioruby_parse_fasta(ARGV.first) }
47
- end
48
-
49
- ####
50
-
51
- def this_gc(str)
52
- Sequence.new(str).gc
53
- end
54
-
55
- def bioruby_gc(str)
56
- Bio::Sequence::NA.new(str).gc_content
57
- end
58
-
59
- # make a random sequence of given length
60
- def make_seq(num)
61
- num.times.reduce('') { |str, n| str << %w[A a C c T t G g N n].sample }
62
- end
63
-
64
- # s1 = make_seq(2000000)
65
- # s2 = make_seq(4000000)
66
- # s3 = make_seq(8000000)
67
-
68
- # Benchmark.bmbm do |x|
69
- # x.report('this_gc 1') { this_gc(s1) }
70
- # x.report('bioruby_gc 1') { bioruby_gc(s1) }
71
-
72
- # x.report('this_gc 2') { this_gc(s2) }
73
- # x.report('bioruby_gc 2') { bioruby_gc(s2) }
74
-
75
- # x.report('this_gc 3') { this_gc(s3) }
76
- # x.report('bioruby_gc 3') { bioruby_gc(s3) }
77
- # end
78
-
79
- # fastq = ARGV.first
80
-
81
- def bioruby_fastq(fastq)
82
- Bio::FlatFile.open(Bio::Fastq, fastq) do |fq|
83
- fq.each do |entry|
84
- [entry.definition, entry.seq.length].join("\t")
85
- end
86
- end
87
- end
88
-
89
- def this_fastq(fastq)
90
- FastqFile.open(fastq).each_record do |head, seq, desc, qual|
91
- [head, seq.length].join("\t")
92
- end
93
- end
94
-
95
- # file is 4 million illumina reads (16,000,000 lines) 1.4gb
96
- # Benchmark.bmbm do |x|
97
- # x.report('this_fastq') { this_fastq(ARGV.first) }
98
- # x.report('bioruby_fastq') { bioruby_fastq(ARGV.first) }
99
- # end
data/test_files/bogus.txt DELETED
@@ -1,2 +0,0 @@
1
- this header is missing the '>'
2
- ACTGACTGATCGATCGTAGCTACGTAGCTACG
Binary file
data/test_files/test.fq DELETED
@@ -1,8 +0,0 @@
1
- @seq1
2
- AA CC TT GG
3
- +
4
- )# 3g Tq N8
5
- @seq2 apples
6
- ACTG
7
- +seq2 apples
8
- *ujM
Binary file