parse_fasta 1.9.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +8 -8
  2. data/.gitignore +1 -0
  3. data/.rspec +2 -0
  4. data/CHANGELOG.md +178 -0
  5. data/README.md +42 -215
  6. data/Rakefile +2 -4
  7. data/bin/console +14 -0
  8. data/bin/setup +8 -0
  9. data/lib/parse_fasta/error.rb +39 -0
  10. data/lib/parse_fasta/record.rb +88 -0
  11. data/lib/parse_fasta/seq_file.rb +221 -114
  12. data/lib/parse_fasta/version.rb +2 -2
  13. data/lib/parse_fasta.rb +5 -20
  14. data/spec/parse_fasta/record_spec.rb +115 -0
  15. data/spec/parse_fasta/seq_file_spec.rb +238 -0
  16. data/spec/parse_fasta_spec.rb +25 -0
  17. data/spec/spec_helper.rb +2 -44
  18. data/spec/test_files/cr.fa +1 -0
  19. data/spec/test_files/cr.fa.gz +0 -0
  20. data/spec/test_files/cr.fq +3 -0
  21. data/spec/test_files/cr.fq.gz +0 -0
  22. data/spec/test_files/cr_nl.fa +4 -0
  23. data/spec/test_files/cr_nl.fa.gz +0 -0
  24. data/spec/test_files/cr_nl.fq +8 -0
  25. data/spec/test_files/cr_nl.fq.gz +0 -0
  26. data/spec/test_files/multi_blob.fa.gz +0 -0
  27. data/spec/test_files/multi_blob.fq.gz +0 -0
  28. data/spec/test_files/not_a_seq_file.txt +1 -0
  29. data/{test_files/bad.fa → spec/test_files/poorly_catted.fa} +0 -0
  30. data/{test_files/test.fa → spec/test_files/seqs.fa} +0 -0
  31. data/spec/test_files/seqs.fa.gz +0 -0
  32. data/spec/test_files/seqs.fq +8 -0
  33. data/spec/test_files/seqs.fq.gz +0 -0
  34. metadata +49 -24
  35. data/lib/parse_fasta/fasta_file.rb +0 -232
  36. data/lib/parse_fasta/fastq_file.rb +0 -160
  37. data/lib/parse_fasta/quality.rb +0 -54
  38. data/lib/parse_fasta/sequence.rb +0 -174
  39. data/spec/lib/fasta_file_spec.rb +0 -212
  40. data/spec/lib/fastq_file_spec.rb +0 -143
  41. data/spec/lib/quality_spec.rb +0 -51
  42. data/spec/lib/seq_file_spec.rb +0 -357
  43. data/spec/lib/sequence_spec.rb +0 -188
  44. data/test_files/benchmark.rb +0 -99
  45. data/test_files/bogus.txt +0 -2
  46. data/test_files/test.fa.gz +0 -0
  47. data/test_files/test.fq +0 -8
  48. data/test_files/test.fq.gz +0 -0
@@ -1,212 +0,0 @@
1
- # Copyright 2014, 2015 Ryan Moore
2
- # Contact: moorer@udel.edu
3
- #
4
- # This file is part of parse_fasta.
5
- #
6
- # parse_fasta is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # parse_fasta is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
-
19
- require 'spec_helper'
20
-
21
- describe FastaFile do
22
- describe "::open" do
23
- context "when input is bogus" do
24
- it "raises a ParseFasta::DataFormatError with message" do
25
- fname = "#{File.dirname(__FILE__)}/../../test_files/bogus.txt"
26
-
27
- expect { FastaFile.open(fname).each_record do |h, s|
28
- puts [h, s].join ' '
29
- end
30
- }.to raise_error ParseFasta::DataFormatError
31
- end
32
- end
33
-
34
- let(:fasta) { "#{File.dirname(__FILE__)}/../../test_files/test.fa" }
35
-
36
- it "takes all the wacky args like IO.open" do
37
- expect {
38
- FastaFile.open(fasta, mode: 'r', cr_newline: true)
39
- }.not_to raise_error
40
- end
41
-
42
- it "returns a FastaFile" do
43
- expect(FastaFile.open(fasta)).to be_a FastaFile
44
- end
45
- end
46
-
47
- describe "#to_hash" do
48
- let(:records) { Helpers::RECORDS_MAP }
49
- let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fa.gz" }
50
- let(:fasta) { FastaFile.open(fname) }
51
-
52
- context "with badly catted fasta" do
53
- it "raises ParseFasta::SequenceFormatError" do
54
- fname = "#{File.dirname(__FILE__)}/../../test_files/bad.fa"
55
-
56
- expect { FastaFile.open(fname).to_hash }.
57
- to raise_error ParseFasta::SequenceFormatError
58
- end
59
- end
60
-
61
- it "reads the records into a hash: header as key and seq as val" do
62
- expect(fasta.to_hash).to eq records
63
- end
64
-
65
- it "passes the values as Sequence objects" do
66
- expect(
67
- fasta.to_hash.values.all? { |val| val.instance_of? Sequence }
68
- ).to eq true
69
- end
70
- end
71
-
72
- describe "#each_record" do
73
- let(:records) { Helpers::RECORDS }
74
-
75
- let(:truthy_records) { Helpers::TRUTHY_RECORDS }
76
- let(:f_handle) { FastaFile.open(@fname).each_record { |s| } }
77
-
78
- context "with badly catted fasta" do
79
- it "raises ParseFasta::SequenceFormatError" do
80
- fname = "#{File.dirname(__FILE__)}/../../test_files/bad.fa"
81
-
82
- expect { FastaFile.open(fname).each_record {} }.
83
- to raise_error ParseFasta::SequenceFormatError
84
- end
85
- end
86
-
87
- shared_examples_for "any FastaFile" do
88
- context "with no arguments" do
89
- it "yields proper header and sequence for each record" do
90
- expect { |b|
91
- FastaFile.open(@fname).each_record(&b)
92
- }.to yield_successive_args(*records)
93
- end
94
-
95
- it "yields the sequence as a Sequence class" do
96
- FastaFile.open(@fname).each_record do |_, seq|
97
- expect(seq).to be_an_instance_of Sequence
98
- end
99
- end
100
- end
101
-
102
- context "with a truthy argument" do
103
- it "yields proper header and sequence for each record" do
104
- expect { |b|
105
- FastaFile.open(@fname).each_record(1, &b)
106
- }.to yield_successive_args(*truthy_records)
107
- end
108
-
109
- it "yields the sequence as a Sequence class" do
110
- FastaFile.open(@fname).each_record(1) do |_, seq|
111
- all_Sequences = seq.map { |s| s.instance_of?(Sequence) }.all?
112
- expect(all_Sequences).to be true
113
- end
114
- end
115
-
116
- end
117
- end
118
-
119
- context "with a gzipped file" do
120
- before(:each) do
121
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa.gz"
122
- end
123
-
124
- it_behaves_like "any FastaFile"
125
-
126
- it "closes the GzipReader" do
127
- expect(f_handle).to be_closed
128
- end
129
-
130
- it "returns GzipReader object" do
131
- expect(f_handle).to be_an_instance_of Zlib::GzipReader
132
- end
133
- end
134
-
135
- context "with a non-gzipped file" do
136
- before(:each) do
137
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa"
138
- end
139
-
140
- it_behaves_like "any FastaFile"
141
-
142
- it "doesn't close the FastqFile (approx regular file behavior)" do
143
- expect(f_handle).not_to be_closed
144
- end
145
-
146
- it "returns FastaFile object" do
147
- expect(f_handle).to be_an_instance_of FastaFile
148
- end
149
- end
150
- end
151
-
152
- describe "#each_record_fast" do
153
- let(:records) { Helpers::RECORDS_FAST }
154
-
155
- let(:f_handle) { FastaFile.open(@fname).each_record_fast { |s| } }
156
-
157
- context "with badly catted fasta" do
158
- it "raises ParseFasta::SequenceFormatError" do
159
- fname = "#{File.dirname(__FILE__)}/../../test_files/bad.fa"
160
-
161
- expect { FastaFile.open(fname).each_record_fast {} }.
162
- to raise_error ParseFasta::SequenceFormatError
163
- end
164
- end
165
-
166
- shared_examples_for "any FastaFile" do
167
- it "yields proper header and sequence for each record" do
168
- expect { |b|
169
- FastaFile.open(@fname).each_record_fast(&b)
170
- }.to yield_successive_args(*records)
171
- end
172
-
173
- it "yields the sequence as a String class" do
174
- FastaFile.open(@fname).each_record_fast do |_, seq|
175
- expect(seq).to be_an_instance_of String
176
- end
177
- end
178
- end
179
-
180
- context "with a gzipped file" do
181
- before(:each) do
182
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa.gz"
183
- end
184
-
185
- it_behaves_like "any FastaFile"
186
-
187
- it "closes the GzipReader" do
188
- expect(f_handle).to be_closed
189
- end
190
-
191
- it "returns GzipReader object" do
192
- expect(f_handle).to be_an_instance_of Zlib::GzipReader
193
- end
194
- end
195
-
196
- context "with a non-gzipped file" do
197
- before(:each) do
198
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa"
199
- end
200
-
201
- it_behaves_like "any FastaFile"
202
-
203
- it "doesn't close the FastqFile (approx regular file behavior)" do
204
- expect(f_handle).not_to be_closed
205
- end
206
-
207
- it "returns FastaFile object" do
208
- expect(f_handle).to be_an_instance_of FastaFile
209
- end
210
- end
211
- end
212
- end
@@ -1,143 +0,0 @@
1
- # Copyright 2014, 2015 Ryan Moore
2
- # Contact: moorer@udel.edu
3
- #
4
- # This file is part of parse_fasta.
5
- #
6
- # parse_fasta is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # parse_fasta is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
-
19
- require 'spec_helper'
20
-
21
- describe FastqFile do
22
- let(:records) {
23
- [["seq1", "AACCTTGG", "", ")#3gTqN8"],
24
- ["seq2 apples", "ACTG", "seq2 apples", "*ujM"]]
25
- }
26
-
27
- let(:records_fast) {
28
- [["seq1", "AA CC TT GG", "", ")# 3g Tq N8"],
29
- ["seq2 apples", "ACTG", "seq2 apples", "*ujM"]]
30
- }
31
-
32
- let(:f_handle) { FastqFile.open(@fname).each_record { |s| } }
33
-
34
-
35
- shared_examples_for "any FastqFile" do
36
- it "yields proper header, sequence, description, and quality" do
37
- expect { |b|
38
- FastqFile.open(@fname).each_record(&b)
39
- }.to yield_successive_args(records[0], records[1])
40
- end
41
-
42
- it "yields the sequence as a Sequence class" do
43
- FastqFile.open(@fname).each_record do |_, seq, _, _|
44
- expect(seq).to be_an_instance_of Sequence
45
- end
46
- end
47
-
48
- it "yields the quality as a Quality class" do
49
- FastqFile.open(@fname).each_record do |_, _, _, qual|
50
- expect(qual).to be_an_instance_of Quality
51
- end
52
- end
53
- end
54
-
55
- describe "#each_record_fast" do
56
- before(:each) do
57
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fq.gz"
58
- end
59
-
60
- it "yields proper header, sequence, description, and quality" do
61
- expect { |b|
62
- FastqFile.open(@fname).each_record_fast(&b)
63
- }.to yield_successive_args(records_fast[0], records_fast[1])
64
- end
65
-
66
- it "yields all params as String" do
67
- FastqFile.open(@fname).each_record_fast do |h, s, d, q|
68
- expect(h).to be_an_instance_of String
69
- expect(s).to be_an_instance_of String
70
- expect(d).to be_an_instance_of String
71
- expect(q).to be_an_instance_of String
72
- end
73
- end
74
- end
75
-
76
- describe "#to_hash" do
77
- let(:records) {
78
- { "seq1" => { head: "seq1",
79
- seq: "AACCTTGG",
80
- desc: "",
81
- qual: ")#3gTqN8" },
82
- "seq2 apples" => { head: "seq2 apples",
83
- seq: "ACTG",
84
- desc: "seq2 apples",
85
- qual: "*ujM" }
86
- }
87
- }
88
- let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fq.gz" }
89
- let(:fastq) { FastqFile.open(fname) }
90
-
91
- it "reads the records into a hash: header as key and seq as val" do
92
- expect(fastq.to_hash).to eq records
93
- end
94
-
95
- it "passes the seqs as Sequence objects" do
96
- expect(
97
- fastq.to_hash.values.all? { |val| val[:seq].instance_of? Sequence }
98
- ).to eq true
99
- end
100
-
101
- it "passes the quals as Quality objects" do
102
- expect(
103
- fastq.to_hash.values.all? { |val| val[:qual].instance_of? Quality }
104
- ).to eq true
105
- end
106
- end
107
-
108
- context "with a 4 line per record fastq file" do
109
- describe "#each_record" do
110
- context "with a gzipped file" do
111
- before(:each) do
112
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fq.gz"
113
- end
114
-
115
- it_behaves_like "any FastqFile"
116
-
117
- it "closes the GzipReader" do
118
- expect(f_handle).to be_closed
119
- end
120
-
121
- it "returns GzipReader object" do
122
- expect(f_handle).to be_an_instance_of Zlib::GzipReader
123
- end
124
- end
125
-
126
- context "with a non-gzipped file" do
127
- before(:each) do
128
- @fname = "#{File.dirname(__FILE__)}/../../test_files/test.fq"
129
- end
130
-
131
- it_behaves_like "any FastqFile"
132
-
133
- it "doesn't close the FastqFile (approx regular file behavior)" do
134
- expect(f_handle).not_to be_closed
135
- end
136
-
137
- it "returns FastqFile object" do
138
- expect(f_handle).to be_an_instance_of FastqFile
139
- end
140
- end
141
- end
142
- end
143
- end
@@ -1,51 +0,0 @@
1
- # Copyright 2014, 2015 Ryan Moore
2
- # Contact: moorer@udel.edu
3
- #
4
- # This file is part of parse_fasta.
5
- #
6
- # parse_fasta is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # parse_fasta is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
-
19
- require 'spec_helper'
20
- require 'bio'
21
-
22
- describe Quality do
23
- let(:qual_string) { qual_string = Quality.new('ab%63:K') }
24
- let(:bioruby_qual_scores) do
25
- Bio::Fastq.new("@seq1\nACTGACT\n+\n#{qual_string}").quality_scores
26
- end
27
-
28
- describe "::new" do
29
- it "removes any spaces in the quality string" do
30
- q = " ab # :m, ! "
31
- q_no_spaces = "ab#:m,!"
32
- expect(Quality.new(q)).to eq q_no_spaces
33
- end
34
- end
35
-
36
- describe "#qual_scores" do
37
- context "with illumina style quality scores" do
38
- it "returns an array of quality scores" do
39
- expect(qual_string.qual_scores).to eq bioruby_qual_scores
40
- end
41
- end
42
- end
43
-
44
- describe "#mean_qual" do
45
- it "returns the mean quality for the quality string" do
46
- len = qual_string.length.to_f
47
- mean_quality = qual_string.qual_scores.reduce(:+) / len
48
- expect(qual_string.mean_qual).to eq mean_quality
49
- end
50
- end
51
- end