parse_fasta 1.9.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/CHANGELOG.md +178 -0
- data/README.md +42 -215
- data/Rakefile +2 -4
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/parse_fasta/error.rb +39 -0
- data/lib/parse_fasta/record.rb +88 -0
- data/lib/parse_fasta/seq_file.rb +221 -114
- data/lib/parse_fasta/version.rb +2 -2
- data/lib/parse_fasta.rb +5 -20
- data/spec/parse_fasta/record_spec.rb +115 -0
- data/spec/parse_fasta/seq_file_spec.rb +238 -0
- data/spec/parse_fasta_spec.rb +25 -0
- data/spec/spec_helper.rb +2 -44
- data/spec/test_files/cr.fa +1 -0
- data/spec/test_files/cr.fa.gz +0 -0
- data/spec/test_files/cr.fq +3 -0
- data/spec/test_files/cr.fq.gz +0 -0
- data/spec/test_files/cr_nl.fa +4 -0
- data/spec/test_files/cr_nl.fa.gz +0 -0
- data/spec/test_files/cr_nl.fq +8 -0
- data/spec/test_files/cr_nl.fq.gz +0 -0
- data/spec/test_files/multi_blob.fa.gz +0 -0
- data/spec/test_files/multi_blob.fq.gz +0 -0
- data/spec/test_files/not_a_seq_file.txt +1 -0
- data/{test_files/bad.fa → spec/test_files/poorly_catted.fa} +0 -0
- data/{test_files/test.fa → spec/test_files/seqs.fa} +0 -0
- data/spec/test_files/seqs.fa.gz +0 -0
- data/spec/test_files/seqs.fq +8 -0
- data/spec/test_files/seqs.fq.gz +0 -0
- metadata +49 -24
- data/lib/parse_fasta/fasta_file.rb +0 -232
- data/lib/parse_fasta/fastq_file.rb +0 -160
- data/lib/parse_fasta/quality.rb +0 -54
- data/lib/parse_fasta/sequence.rb +0 -174
- data/spec/lib/fasta_file_spec.rb +0 -212
- data/spec/lib/fastq_file_spec.rb +0 -143
- data/spec/lib/quality_spec.rb +0 -51
- data/spec/lib/seq_file_spec.rb +0 -357
- data/spec/lib/sequence_spec.rb +0 -188
- data/test_files/benchmark.rb +0 -99
- data/test_files/bogus.txt +0 -2
- data/test_files/test.fa.gz +0 -0
- data/test_files/test.fq +0 -8
- data/test_files/test.fq.gz +0 -0
data/spec/lib/fasta_file_spec.rb
DELETED
@@ -1,212 +0,0 @@
|
|
1
|
-
# Copyright 2014, 2015 Ryan Moore
|
2
|
-
# Contact: moorer@udel.edu
|
3
|
-
#
|
4
|
-
# This file is part of parse_fasta.
|
5
|
-
#
|
6
|
-
# parse_fasta is free software: you can redistribute it and/or modify
|
7
|
-
# it under the terms of the GNU General Public License as published by
|
8
|
-
# the Free Software Foundation, either version 3 of the License, or
|
9
|
-
# (at your option) any later version.
|
10
|
-
#
|
11
|
-
# parse_fasta is distributed in the hope that it will be useful,
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
# GNU General Public License for more details.
|
15
|
-
#
|
16
|
-
# You should have received a copy of the GNU General Public License
|
17
|
-
# along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
|
19
|
-
require 'spec_helper'
|
20
|
-
|
21
|
-
describe FastaFile do
|
22
|
-
describe "::open" do
|
23
|
-
context "when input is bogus" do
|
24
|
-
it "raises a ParseFasta::DataFormatError with message" do
|
25
|
-
fname = "#{File.dirname(__FILE__)}/../../test_files/bogus.txt"
|
26
|
-
|
27
|
-
expect { FastaFile.open(fname).each_record do |h, s|
|
28
|
-
puts [h, s].join ' '
|
29
|
-
end
|
30
|
-
}.to raise_error ParseFasta::DataFormatError
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
let(:fasta) { "#{File.dirname(__FILE__)}/../../test_files/test.fa" }
|
35
|
-
|
36
|
-
it "takes all the wacky args like IO.open" do
|
37
|
-
expect {
|
38
|
-
FastaFile.open(fasta, mode: 'r', cr_newline: true)
|
39
|
-
}.not_to raise_error
|
40
|
-
end
|
41
|
-
|
42
|
-
it "returns a FastaFile" do
|
43
|
-
expect(FastaFile.open(fasta)).to be_a FastaFile
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
describe "#to_hash" do
|
48
|
-
let(:records) { Helpers::RECORDS_MAP }
|
49
|
-
let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fa.gz" }
|
50
|
-
let(:fasta) { FastaFile.open(fname) }
|
51
|
-
|
52
|
-
context "with badly catted fasta" do
|
53
|
-
it "raises ParseFasta::SequenceFormatError" do
|
54
|
-
fname = "#{File.dirname(__FILE__)}/../../test_files/bad.fa"
|
55
|
-
|
56
|
-
expect { FastaFile.open(fname).to_hash }.
|
57
|
-
to raise_error ParseFasta::SequenceFormatError
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
it "reads the records into a hash: header as key and seq as val" do
|
62
|
-
expect(fasta.to_hash).to eq records
|
63
|
-
end
|
64
|
-
|
65
|
-
it "passes the values as Sequence objects" do
|
66
|
-
expect(
|
67
|
-
fasta.to_hash.values.all? { |val| val.instance_of? Sequence }
|
68
|
-
).to eq true
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
describe "#each_record" do
|
73
|
-
let(:records) { Helpers::RECORDS }
|
74
|
-
|
75
|
-
let(:truthy_records) { Helpers::TRUTHY_RECORDS }
|
76
|
-
let(:f_handle) { FastaFile.open(@fname).each_record { |s| } }
|
77
|
-
|
78
|
-
context "with badly catted fasta" do
|
79
|
-
it "raises ParseFasta::SequenceFormatError" do
|
80
|
-
fname = "#{File.dirname(__FILE__)}/../../test_files/bad.fa"
|
81
|
-
|
82
|
-
expect { FastaFile.open(fname).each_record {} }.
|
83
|
-
to raise_error ParseFasta::SequenceFormatError
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
shared_examples_for "any FastaFile" do
|
88
|
-
context "with no arguments" do
|
89
|
-
it "yields proper header and sequence for each record" do
|
90
|
-
expect { |b|
|
91
|
-
FastaFile.open(@fname).each_record(&b)
|
92
|
-
}.to yield_successive_args(*records)
|
93
|
-
end
|
94
|
-
|
95
|
-
it "yields the sequence as a Sequence class" do
|
96
|
-
FastaFile.open(@fname).each_record do |_, seq|
|
97
|
-
expect(seq).to be_an_instance_of Sequence
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
context "with a truthy argument" do
|
103
|
-
it "yields proper header and sequence for each record" do
|
104
|
-
expect { |b|
|
105
|
-
FastaFile.open(@fname).each_record(1, &b)
|
106
|
-
}.to yield_successive_args(*truthy_records)
|
107
|
-
end
|
108
|
-
|
109
|
-
it "yields the sequence as a Sequence class" do
|
110
|
-
FastaFile.open(@fname).each_record(1) do |_, seq|
|
111
|
-
all_Sequences = seq.map { |s| s.instance_of?(Sequence) }.all?
|
112
|
-
expect(all_Sequences).to be true
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
end
|
117
|
-
end
|
118
|
-
|
119
|
-
context "with a gzipped file" do
|
120
|
-
before(:each) do
|
121
|
-
@fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa.gz"
|
122
|
-
end
|
123
|
-
|
124
|
-
it_behaves_like "any FastaFile"
|
125
|
-
|
126
|
-
it "closes the GzipReader" do
|
127
|
-
expect(f_handle).to be_closed
|
128
|
-
end
|
129
|
-
|
130
|
-
it "returns GzipReader object" do
|
131
|
-
expect(f_handle).to be_an_instance_of Zlib::GzipReader
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
context "with a non-gzipped file" do
|
136
|
-
before(:each) do
|
137
|
-
@fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa"
|
138
|
-
end
|
139
|
-
|
140
|
-
it_behaves_like "any FastaFile"
|
141
|
-
|
142
|
-
it "doesn't close the FastqFile (approx regular file behavior)" do
|
143
|
-
expect(f_handle).not_to be_closed
|
144
|
-
end
|
145
|
-
|
146
|
-
it "returns FastaFile object" do
|
147
|
-
expect(f_handle).to be_an_instance_of FastaFile
|
148
|
-
end
|
149
|
-
end
|
150
|
-
end
|
151
|
-
|
152
|
-
describe "#each_record_fast" do
|
153
|
-
let(:records) { Helpers::RECORDS_FAST }
|
154
|
-
|
155
|
-
let(:f_handle) { FastaFile.open(@fname).each_record_fast { |s| } }
|
156
|
-
|
157
|
-
context "with badly catted fasta" do
|
158
|
-
it "raises ParseFasta::SequenceFormatError" do
|
159
|
-
fname = "#{File.dirname(__FILE__)}/../../test_files/bad.fa"
|
160
|
-
|
161
|
-
expect { FastaFile.open(fname).each_record_fast {} }.
|
162
|
-
to raise_error ParseFasta::SequenceFormatError
|
163
|
-
end
|
164
|
-
end
|
165
|
-
|
166
|
-
shared_examples_for "any FastaFile" do
|
167
|
-
it "yields proper header and sequence for each record" do
|
168
|
-
expect { |b|
|
169
|
-
FastaFile.open(@fname).each_record_fast(&b)
|
170
|
-
}.to yield_successive_args(*records)
|
171
|
-
end
|
172
|
-
|
173
|
-
it "yields the sequence as a String class" do
|
174
|
-
FastaFile.open(@fname).each_record_fast do |_, seq|
|
175
|
-
expect(seq).to be_an_instance_of String
|
176
|
-
end
|
177
|
-
end
|
178
|
-
end
|
179
|
-
|
180
|
-
context "with a gzipped file" do
|
181
|
-
before(:each) do
|
182
|
-
@fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa.gz"
|
183
|
-
end
|
184
|
-
|
185
|
-
it_behaves_like "any FastaFile"
|
186
|
-
|
187
|
-
it "closes the GzipReader" do
|
188
|
-
expect(f_handle).to be_closed
|
189
|
-
end
|
190
|
-
|
191
|
-
it "returns GzipReader object" do
|
192
|
-
expect(f_handle).to be_an_instance_of Zlib::GzipReader
|
193
|
-
end
|
194
|
-
end
|
195
|
-
|
196
|
-
context "with a non-gzipped file" do
|
197
|
-
before(:each) do
|
198
|
-
@fname = "#{File.dirname(__FILE__)}/../../test_files/test.fa"
|
199
|
-
end
|
200
|
-
|
201
|
-
it_behaves_like "any FastaFile"
|
202
|
-
|
203
|
-
it "doesn't close the FastqFile (approx regular file behavior)" do
|
204
|
-
expect(f_handle).not_to be_closed
|
205
|
-
end
|
206
|
-
|
207
|
-
it "returns FastaFile object" do
|
208
|
-
expect(f_handle).to be_an_instance_of FastaFile
|
209
|
-
end
|
210
|
-
end
|
211
|
-
end
|
212
|
-
end
|
data/spec/lib/fastq_file_spec.rb
DELETED
@@ -1,143 +0,0 @@
|
|
1
|
-
# Copyright 2014, 2015 Ryan Moore
|
2
|
-
# Contact: moorer@udel.edu
|
3
|
-
#
|
4
|
-
# This file is part of parse_fasta.
|
5
|
-
#
|
6
|
-
# parse_fasta is free software: you can redistribute it and/or modify
|
7
|
-
# it under the terms of the GNU General Public License as published by
|
8
|
-
# the Free Software Foundation, either version 3 of the License, or
|
9
|
-
# (at your option) any later version.
|
10
|
-
#
|
11
|
-
# parse_fasta is distributed in the hope that it will be useful,
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
# GNU General Public License for more details.
|
15
|
-
#
|
16
|
-
# You should have received a copy of the GNU General Public License
|
17
|
-
# along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
|
19
|
-
require 'spec_helper'
|
20
|
-
|
21
|
-
describe FastqFile do
|
22
|
-
let(:records) {
|
23
|
-
[["seq1", "AACCTTGG", "", ")#3gTqN8"],
|
24
|
-
["seq2 apples", "ACTG", "seq2 apples", "*ujM"]]
|
25
|
-
}
|
26
|
-
|
27
|
-
let(:records_fast) {
|
28
|
-
[["seq1", "AA CC TT GG", "", ")# 3g Tq N8"],
|
29
|
-
["seq2 apples", "ACTG", "seq2 apples", "*ujM"]]
|
30
|
-
}
|
31
|
-
|
32
|
-
let(:f_handle) { FastqFile.open(@fname).each_record { |s| } }
|
33
|
-
|
34
|
-
|
35
|
-
shared_examples_for "any FastqFile" do
|
36
|
-
it "yields proper header, sequence, description, and quality" do
|
37
|
-
expect { |b|
|
38
|
-
FastqFile.open(@fname).each_record(&b)
|
39
|
-
}.to yield_successive_args(records[0], records[1])
|
40
|
-
end
|
41
|
-
|
42
|
-
it "yields the sequence as a Sequence class" do
|
43
|
-
FastqFile.open(@fname).each_record do |_, seq, _, _|
|
44
|
-
expect(seq).to be_an_instance_of Sequence
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
it "yields the quality as a Quality class" do
|
49
|
-
FastqFile.open(@fname).each_record do |_, _, _, qual|
|
50
|
-
expect(qual).to be_an_instance_of Quality
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
describe "#each_record_fast" do
|
56
|
-
before(:each) do
|
57
|
-
@fname = "#{File.dirname(__FILE__)}/../../test_files/test.fq.gz"
|
58
|
-
end
|
59
|
-
|
60
|
-
it "yields proper header, sequence, description, and quality" do
|
61
|
-
expect { |b|
|
62
|
-
FastqFile.open(@fname).each_record_fast(&b)
|
63
|
-
}.to yield_successive_args(records_fast[0], records_fast[1])
|
64
|
-
end
|
65
|
-
|
66
|
-
it "yields all params as String" do
|
67
|
-
FastqFile.open(@fname).each_record_fast do |h, s, d, q|
|
68
|
-
expect(h).to be_an_instance_of String
|
69
|
-
expect(s).to be_an_instance_of String
|
70
|
-
expect(d).to be_an_instance_of String
|
71
|
-
expect(q).to be_an_instance_of String
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
describe "#to_hash" do
|
77
|
-
let(:records) {
|
78
|
-
{ "seq1" => { head: "seq1",
|
79
|
-
seq: "AACCTTGG",
|
80
|
-
desc: "",
|
81
|
-
qual: ")#3gTqN8" },
|
82
|
-
"seq2 apples" => { head: "seq2 apples",
|
83
|
-
seq: "ACTG",
|
84
|
-
desc: "seq2 apples",
|
85
|
-
qual: "*ujM" }
|
86
|
-
}
|
87
|
-
}
|
88
|
-
let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fq.gz" }
|
89
|
-
let(:fastq) { FastqFile.open(fname) }
|
90
|
-
|
91
|
-
it "reads the records into a hash: header as key and seq as val" do
|
92
|
-
expect(fastq.to_hash).to eq records
|
93
|
-
end
|
94
|
-
|
95
|
-
it "passes the seqs as Sequence objects" do
|
96
|
-
expect(
|
97
|
-
fastq.to_hash.values.all? { |val| val[:seq].instance_of? Sequence }
|
98
|
-
).to eq true
|
99
|
-
end
|
100
|
-
|
101
|
-
it "passes the quals as Quality objects" do
|
102
|
-
expect(
|
103
|
-
fastq.to_hash.values.all? { |val| val[:qual].instance_of? Quality }
|
104
|
-
).to eq true
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
context "with a 4 line per record fastq file" do
|
109
|
-
describe "#each_record" do
|
110
|
-
context "with a gzipped file" do
|
111
|
-
before(:each) do
|
112
|
-
@fname = "#{File.dirname(__FILE__)}/../../test_files/test.fq.gz"
|
113
|
-
end
|
114
|
-
|
115
|
-
it_behaves_like "any FastqFile"
|
116
|
-
|
117
|
-
it "closes the GzipReader" do
|
118
|
-
expect(f_handle).to be_closed
|
119
|
-
end
|
120
|
-
|
121
|
-
it "returns GzipReader object" do
|
122
|
-
expect(f_handle).to be_an_instance_of Zlib::GzipReader
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
|
-
context "with a non-gzipped file" do
|
127
|
-
before(:each) do
|
128
|
-
@fname = "#{File.dirname(__FILE__)}/../../test_files/test.fq"
|
129
|
-
end
|
130
|
-
|
131
|
-
it_behaves_like "any FastqFile"
|
132
|
-
|
133
|
-
it "doesn't close the FastqFile (approx regular file behavior)" do
|
134
|
-
expect(f_handle).not_to be_closed
|
135
|
-
end
|
136
|
-
|
137
|
-
it "returns FastqFile object" do
|
138
|
-
expect(f_handle).to be_an_instance_of FastqFile
|
139
|
-
end
|
140
|
-
end
|
141
|
-
end
|
142
|
-
end
|
143
|
-
end
|
data/spec/lib/quality_spec.rb
DELETED
@@ -1,51 +0,0 @@
|
|
1
|
-
# Copyright 2014, 2015 Ryan Moore
|
2
|
-
# Contact: moorer@udel.edu
|
3
|
-
#
|
4
|
-
# This file is part of parse_fasta.
|
5
|
-
#
|
6
|
-
# parse_fasta is free software: you can redistribute it and/or modify
|
7
|
-
# it under the terms of the GNU General Public License as published by
|
8
|
-
# the Free Software Foundation, either version 3 of the License, or
|
9
|
-
# (at your option) any later version.
|
10
|
-
#
|
11
|
-
# parse_fasta is distributed in the hope that it will be useful,
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
# GNU General Public License for more details.
|
15
|
-
#
|
16
|
-
# You should have received a copy of the GNU General Public License
|
17
|
-
# along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
|
19
|
-
require 'spec_helper'
|
20
|
-
require 'bio'
|
21
|
-
|
22
|
-
describe Quality do
|
23
|
-
let(:qual_string) { qual_string = Quality.new('ab%63:K') }
|
24
|
-
let(:bioruby_qual_scores) do
|
25
|
-
Bio::Fastq.new("@seq1\nACTGACT\n+\n#{qual_string}").quality_scores
|
26
|
-
end
|
27
|
-
|
28
|
-
describe "::new" do
|
29
|
-
it "removes any spaces in the quality string" do
|
30
|
-
q = " ab # :m, ! "
|
31
|
-
q_no_spaces = "ab#:m,!"
|
32
|
-
expect(Quality.new(q)).to eq q_no_spaces
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
describe "#qual_scores" do
|
37
|
-
context "with illumina style quality scores" do
|
38
|
-
it "returns an array of quality scores" do
|
39
|
-
expect(qual_string.qual_scores).to eq bioruby_qual_scores
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
describe "#mean_qual" do
|
45
|
-
it "returns the mean quality for the quality string" do
|
46
|
-
len = qual_string.length.to_f
|
47
|
-
mean_quality = qual_string.qual_scores.reduce(:+) / len
|
48
|
-
expect(qual_string.mean_qual).to eq mean_quality
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|