parse_fasta 1.9.2 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +8 -8
  2. data/.gitignore +1 -0
  3. data/.rspec +2 -0
  4. data/CHANGELOG.md +178 -0
  5. data/README.md +42 -215
  6. data/Rakefile +2 -4
  7. data/bin/console +14 -0
  8. data/bin/setup +8 -0
  9. data/lib/parse_fasta/error.rb +39 -0
  10. data/lib/parse_fasta/record.rb +88 -0
  11. data/lib/parse_fasta/seq_file.rb +221 -114
  12. data/lib/parse_fasta/version.rb +2 -2
  13. data/lib/parse_fasta.rb +5 -20
  14. data/spec/parse_fasta/record_spec.rb +115 -0
  15. data/spec/parse_fasta/seq_file_spec.rb +238 -0
  16. data/spec/parse_fasta_spec.rb +25 -0
  17. data/spec/spec_helper.rb +2 -44
  18. data/spec/test_files/cr.fa +1 -0
  19. data/spec/test_files/cr.fa.gz +0 -0
  20. data/spec/test_files/cr.fq +3 -0
  21. data/spec/test_files/cr.fq.gz +0 -0
  22. data/spec/test_files/cr_nl.fa +4 -0
  23. data/spec/test_files/cr_nl.fa.gz +0 -0
  24. data/spec/test_files/cr_nl.fq +8 -0
  25. data/spec/test_files/cr_nl.fq.gz +0 -0
  26. data/spec/test_files/multi_blob.fa.gz +0 -0
  27. data/spec/test_files/multi_blob.fq.gz +0 -0
  28. data/spec/test_files/not_a_seq_file.txt +1 -0
  29. data/{test_files/bad.fa → spec/test_files/poorly_catted.fa} +0 -0
  30. data/{test_files/test.fa → spec/test_files/seqs.fa} +0 -0
  31. data/spec/test_files/seqs.fa.gz +0 -0
  32. data/spec/test_files/seqs.fq +8 -0
  33. data/spec/test_files/seqs.fq.gz +0 -0
  34. metadata +49 -24
  35. data/lib/parse_fasta/fasta_file.rb +0 -232
  36. data/lib/parse_fasta/fastq_file.rb +0 -160
  37. data/lib/parse_fasta/quality.rb +0 -54
  38. data/lib/parse_fasta/sequence.rb +0 -174
  39. data/spec/lib/fasta_file_spec.rb +0 -212
  40. data/spec/lib/fastq_file_spec.rb +0 -143
  41. data/spec/lib/quality_spec.rb +0 -51
  42. data/spec/lib/seq_file_spec.rb +0 -357
  43. data/spec/lib/sequence_spec.rb +0 -188
  44. data/test_files/benchmark.rb +0 -99
  45. data/test_files/bogus.txt +0 -2
  46. data/test_files/test.fa.gz +0 -0
  47. data/test_files/test.fq +0 -8
  48. data/test_files/test.fq.gz +0 -0
@@ -1,212 +0,0 @@
1
- # Copyright 2014, 2015 Ryan Moore
2
- # Contact: moorer@udel.edu
3
- #
4
- # This file is part of parse_fasta.
5
- #
6
- # parse_fasta is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # parse_fasta is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
-
19
- require 'spec_helper'
20
-
21
- describe FastaFile do
22
- describe "::open" do
23
- context "when input is bogus" do
24
- it "raises a ParseFasta::DataFormatError with message" do
25
- fname = "#{File.dirname(__FILE__)}/../../test_files/bogus.txt"
26
-
27
- expect { FastaFile.open(fname).each_record do |h, s|
28
- puts [h, s].join ' '
29
- end
30
- }.to raise_error ParseFasta::DataFormatError
31
- end
32
- end
33
-
34
- let(:fasta) { "#{File.dirname(__FILE__)}/../../test_files/test.fa" }
35
-
36
- it "takes all the wacky args like IO.open" do
37
- expect {
38
- FastaFile.open(fasta, mode: 'r', cr_newline: true)
39
- }.not_to raise_error
40
- end
41
-
42
- it "returns a FastaFile" do
43
- expect(FastaFile.open(fasta)).to be_a FastaFile
44
- end
45
- end
46
-
47
- describe "#to_hash" do
48
- let(:records) { Helpers::RECORDS_MAP }
49
- let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fa.gz" }
50
- let(:fasta) { FastaFile.open(fname) }
51
-
52
- context "with badly catted fasta" do
53
- it "raises ParseFasta::SequenceFormatError" do
54
- fname = "#{File.dirname(__FILE__)}/../../test_files/bad.fa"
55
-
56
- expect { FastaFile.open(fname).to_hash }.
57
- to raise_error ParseFasta::SequenceFormatError
58
- end
59
- end
60
-
61
- it "reads the records into a hash: header as key and seq as val" do
62
- expect(fasta.to_hash).to eq records
63
- end
64
-
65
- it "passes the values as Sequence objects" do
66
- expect(
67
- fasta.to_hash.values.all? { |val| val.instance_of? Sequence }
68
- ).to eq true
69
- end
70
- end
71
-
72
- describe "#each_record" do
73
- let(:records) { Helpers::RECORDS }
74
-
75
- let(:truthy_records) { Helpers::TRUTHY_RECORDS }
76
- let(:f_handle) { FastaFile.open(@fname).each_record { |s| } }
77
-
78
- context "with badly catted fasta" do
79
- it "raises ParseFasta::SequenceFormatError" do
80
- fname = "#{File.dirname(__FILE__)}/../../test_files/bad.fa"
81
-
82
- expect { FastaFile.open(fname).each_record {} }.
83
- to raise_error ParseFasta::SequenceFormatError
84
- end
85
- end
86
-
87
- shared_examples_for "any FastaFile" do
88
- context "with no arguments" do
89
- it "yields proper header and sequence for each record" do
90
- expect { |b|
91
- FastaFile.open(@fname).each_record(&b)
92
- }.to yield_successive_args(*records)
93
- end
94
-
95
- it "yields the sequence as a Sequence class" do
96
- FastaFile.open(@fname).each_record do |_, seq|
97
- expect(seq).to be_an_instance_of Sequence
98
- end
99
- end
100
- end
101
-
102
- context "with a truthy argument" do
103
- it "yields proper header and sequence for each record" do
104
- expect { |b|
105
- FastaFile.open(@fname).each_record(1, &b)
106
- }.to yield_successive_args(*truthy_records)
107
- end
108
-
109
- it "yields the sequence as a Sequence class" do
110
- FastaFile.open(@fname).each_record(1) do |_, seq|
111
- all_Sequences = seq.map { |s| s.instance_of?(Sequence) }.all?
112
- expect(all_Sequences).to be true
113
- end
114
- end
115
-
116
- end
117
- end
118
-
119
- context "with a gzipped file" do
120
- before(:each) do
121
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa.gz"
122
- end
123
-
124
- it_behaves_like "any FastaFile"
125
-
126
- it "closes the GzipReader" do
127
- expect(f_handle).to be_closed
128
- end
129
-
130
- it "returns GzipReader object" do
131
- expect(f_handle).to be_an_instance_of Zlib::GzipReader
132
- end
133
- end
134
-
135
- context "with a non-gzipped file" do
136
- before(:each) do
137
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa"
138
- end
139
-
140
- it_behaves_like "any FastaFile"
141
-
142
- it "doesn't close the FastqFile (approx regular file behavior)" do
143
- expect(f_handle).not_to be_closed
144
- end
145
-
146
- it "returns FastaFile object" do
147
- expect(f_handle).to be_an_instance_of FastaFile
148
- end
149
- end
150
- end
151
-
152
- describe "#each_record_fast" do
153
- let(:records) { Helpers::RECORDS_FAST }
154
-
155
- let(:f_handle) { FastaFile.open(@fname).each_record_fast { |s| } }
156
-
157
- context "with badly catted fasta" do
158
- it "raises ParseFasta::SequenceFormatError" do
159
- fname = "#{File.dirname(__FILE__)}/../../test_files/bad.fa"
160
-
161
- expect { FastaFile.open(fname).each_record_fast {} }.
162
- to raise_error ParseFasta::SequenceFormatError
163
- end
164
- end
165
-
166
- shared_examples_for "any FastaFile" do
167
- it "yields proper header and sequence for each record" do
168
- expect { |b|
169
- FastaFile.open(@fname).each_record_fast(&b)
170
- }.to yield_successive_args(*records)
171
- end
172
-
173
- it "yields the sequence as a String class" do
174
- FastaFile.open(@fname).each_record_fast do |_, seq|
175
- expect(seq).to be_an_instance_of String
176
- end
177
- end
178
- end
179
-
180
- context "with a gzipped file" do
181
- before(:each) do
182
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa.gz"
183
- end
184
-
185
- it_behaves_like "any FastaFile"
186
-
187
- it "closes the GzipReader" do
188
- expect(f_handle).to be_closed
189
- end
190
-
191
- it "returns GzipReader object" do
192
- expect(f_handle).to be_an_instance_of Zlib::GzipReader
193
- end
194
- end
195
-
196
- context "with a non-gzipped file" do
197
- before(:each) do
198
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa"
199
- end
200
-
201
- it_behaves_like "any FastaFile"
202
-
203
- it "doesn't close the FastqFile (approx regular file behavior)" do
204
- expect(f_handle).not_to be_closed
205
- end
206
-
207
- it "returns FastaFile object" do
208
- expect(f_handle).to be_an_instance_of FastaFile
209
- end
210
- end
211
- end
212
- end
@@ -1,143 +0,0 @@
1
- # Copyright 2014, 2015 Ryan Moore
2
- # Contact: moorer@udel.edu
3
- #
4
- # This file is part of parse_fasta.
5
- #
6
- # parse_fasta is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # parse_fasta is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
-
19
- require 'spec_helper'
20
-
21
- describe FastqFile do
22
- let(:records) {
23
- [["seq1", "AACCTTGG", "", ")#3gTqN8"],
24
- ["seq2 apples", "ACTG", "seq2 apples", "*ujM"]]
25
- }
26
-
27
- let(:records_fast) {
28
- [["seq1", "AA CC TT GG", "", ")# 3g Tq N8"],
29
- ["seq2 apples", "ACTG", "seq2 apples", "*ujM"]]
30
- }
31
-
32
- let(:f_handle) { FastqFile.open(@fname).each_record { |s| } }
33
-
34
-
35
- shared_examples_for "any FastqFile" do
36
- it "yields proper header, sequence, description, and quality" do
37
- expect { |b|
38
- FastqFile.open(@fname).each_record(&b)
39
- }.to yield_successive_args(records[0], records[1])
40
- end
41
-
42
- it "yields the sequence as a Sequence class" do
43
- FastqFile.open(@fname).each_record do |_, seq, _, _|
44
- expect(seq).to be_an_instance_of Sequence
45
- end
46
- end
47
-
48
- it "yields the quality as a Quality class" do
49
- FastqFile.open(@fname).each_record do |_, _, _, qual|
50
- expect(qual).to be_an_instance_of Quality
51
- end
52
- end
53
- end
54
-
55
- describe "#each_record_fast" do
56
- before(:each) do
57
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fq.gz"
58
- end
59
-
60
- it "yields proper header, sequence, description, and quality" do
61
- expect { |b|
62
- FastqFile.open(@fname).each_record_fast(&b)
63
- }.to yield_successive_args(records_fast[0], records_fast[1])
64
- end
65
-
66
- it "yields all params as String" do
67
- FastqFile.open(@fname).each_record_fast do |h, s, d, q|
68
- expect(h).to be_an_instance_of String
69
- expect(s).to be_an_instance_of String
70
- expect(d).to be_an_instance_of String
71
- expect(q).to be_an_instance_of String
72
- end
73
- end
74
- end
75
-
76
- describe "#to_hash" do
77
- let(:records) {
78
- { "seq1" => { head: "seq1",
79
- seq: "AACCTTGG",
80
- desc: "",
81
- qual: ")#3gTqN8" },
82
- "seq2 apples" => { head: "seq2 apples",
83
- seq: "ACTG",
84
- desc: "seq2 apples",
85
- qual: "*ujM" }
86
- }
87
- }
88
- let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fq.gz" }
89
- let(:fastq) { FastqFile.open(fname) }
90
-
91
- it "reads the records into a hash: header as key and seq as val" do
92
- expect(fastq.to_hash).to eq records
93
- end
94
-
95
- it "passes the seqs as Sequence objects" do
96
- expect(
97
- fastq.to_hash.values.all? { |val| val[:seq].instance_of? Sequence }
98
- ).to eq true
99
- end
100
-
101
- it "passes the quals as Quality objects" do
102
- expect(
103
- fastq.to_hash.values.all? { |val| val[:qual].instance_of? Quality }
104
- ).to eq true
105
- end
106
- end
107
-
108
- context "with a 4 line per record fastq file" do
109
- describe "#each_record" do
110
- context "with a gzipped file" do
111
- before(:each) do
112
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fq.gz"
113
- end
114
-
115
- it_behaves_like "any FastqFile"
116
-
117
- it "closes the GzipReader" do
118
- expect(f_handle).to be_closed
119
- end
120
-
121
- it "returns GzipReader object" do
122
- expect(f_handle).to be_an_instance_of Zlib::GzipReader
123
- end
124
- end
125
-
126
- context "with a non-gzipped file" do
127
- before(:each) do
128
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fq"
129
- end
130
-
131
- it_behaves_like "any FastqFile"
132
-
133
- it "doesn't close the FastqFile (approx regular file behavior)" do
134
- expect(f_handle).not_to be_closed
135
- end
136
-
137
- it "returns FastqFile object" do
138
- expect(f_handle).to be_an_instance_of FastqFile
139
- end
140
- end
141
- end
142
- end
143
- end
@@ -1,51 +0,0 @@
1
- # Copyright 2014, 2015 Ryan Moore
2
- # Contact: moorer@udel.edu
3
- #
4
- # This file is part of parse_fasta.
5
- #
6
- # parse_fasta is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # parse_fasta is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
-
19
- require 'spec_helper'
20
- require 'bio'
21
-
22
- describe Quality do
23
- let(:qual_string) { qual_string = Quality.new('ab%63:K') }
24
- let(:bioruby_qual_scores) do
25
- Bio::Fastq.new("@seq1\nACTGACT\n+\n#{qual_string}").quality_scores
26
- end
27
-
28
- describe "::new" do
29
- it "removes any spaces in the quality string" do
30
- q = " ab # :m, ! "
31
- q_no_spaces = "ab#:m,!"
32
- expect(Quality.new(q)).to eq q_no_spaces
33
- end
34
- end
35
-
36
- describe "#qual_scores" do
37
- context "with illumina style quality scores" do
38
- it "returns an array of quality scores" do
39
- expect(qual_string.qual_scores).to eq bioruby_qual_scores
40
- end
41
- end
42
- end
43
-
44
- describe "#mean_qual" do
45
- it "returns the mean quality for the quality string" do
46
- len = qual_string.length.to_f
47
- mean_quality = qual_string.qual_scores.reduce(:+) / len
48
- expect(qual_string.mean_qual).to eq mean_quality
49
- end
50
- end
51
- end