parse_fasta 1.9.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/CHANGELOG.md +178 -0
- data/README.md +42 -215
- data/Rakefile +2 -4
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/parse_fasta/error.rb +39 -0
- data/lib/parse_fasta/record.rb +88 -0
- data/lib/parse_fasta/seq_file.rb +221 -114
- data/lib/parse_fasta/version.rb +2 -2
- data/lib/parse_fasta.rb +5 -20
- data/spec/parse_fasta/record_spec.rb +115 -0
- data/spec/parse_fasta/seq_file_spec.rb +238 -0
- data/spec/parse_fasta_spec.rb +25 -0
- data/spec/spec_helper.rb +2 -44
- data/spec/test_files/cr.fa +1 -0
- data/spec/test_files/cr.fa.gz +0 -0
- data/spec/test_files/cr.fq +3 -0
- data/spec/test_files/cr.fq.gz +0 -0
- data/spec/test_files/cr_nl.fa +4 -0
- data/spec/test_files/cr_nl.fa.gz +0 -0
- data/spec/test_files/cr_nl.fq +8 -0
- data/spec/test_files/cr_nl.fq.gz +0 -0
- data/spec/test_files/multi_blob.fa.gz +0 -0
- data/spec/test_files/multi_blob.fq.gz +0 -0
- data/spec/test_files/not_a_seq_file.txt +1 -0
- data/{test_files/bad.fa → spec/test_files/poorly_catted.fa} +0 -0
- data/{test_files/test.fa → spec/test_files/seqs.fa} +0 -0
- data/spec/test_files/seqs.fa.gz +0 -0
- data/spec/test_files/seqs.fq +8 -0
- data/spec/test_files/seqs.fq.gz +0 -0
- metadata +49 -24
- data/lib/parse_fasta/fasta_file.rb +0 -232
- data/lib/parse_fasta/fastq_file.rb +0 -160
- data/lib/parse_fasta/quality.rb +0 -54
- data/lib/parse_fasta/sequence.rb +0 -174
- data/spec/lib/fasta_file_spec.rb +0 -212
- data/spec/lib/fastq_file_spec.rb +0 -143
- data/spec/lib/quality_spec.rb +0 -51
- data/spec/lib/seq_file_spec.rb +0 -357
- data/spec/lib/sequence_spec.rb +0 -188
- data/test_files/benchmark.rb +0 -99
- data/test_files/bogus.txt +0 -2
- data/test_files/test.fa.gz +0 -0
- data/test_files/test.fq +0 -8
- data/test_files/test.fq.gz +0 -0
@@ -0,0 +1,238 @@
|
|
1
|
+
# Copyright 2014 - 2016 Ryan Moore
|
2
|
+
# Contact: moorer@udel.edu
|
3
|
+
#
|
4
|
+
# This file is part of parse_fasta.
|
5
|
+
#
|
6
|
+
# parse_fasta is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# parse_fasta is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
|
19
|
+
require "spec_helper"
|
20
|
+
|
21
|
+
module ParseFasta
|
22
|
+
describe SeqFile do
|
23
|
+
let(:test_dir) {
|
24
|
+
File.join File.dirname(__FILE__), "..", "test_files"
|
25
|
+
}
|
26
|
+
|
27
|
+
let(:fasta) {
|
28
|
+
File.join test_dir, "seqs.fa"
|
29
|
+
}
|
30
|
+
let(:fasta_gz) {
|
31
|
+
File.join test_dir, "seqs.fa.gz"
|
32
|
+
}
|
33
|
+
let(:fastq) {
|
34
|
+
File.join test_dir, "seqs.fq"
|
35
|
+
}
|
36
|
+
let(:fastq_gz) {
|
37
|
+
File.join test_dir, "seqs.fq.gz"
|
38
|
+
}
|
39
|
+
|
40
|
+
let(:fasta_records) {
|
41
|
+
[Record.new(header: "empty seq at beginning",
|
42
|
+
seq: ""),
|
43
|
+
Record.new(header: "seq1 is fun",
|
44
|
+
seq: "AACTGGNNN"),
|
45
|
+
Record.new(header: "seq2",
|
46
|
+
seq: "AATCCTGNNN"),
|
47
|
+
Record.new(header: "empty seq 1",
|
48
|
+
seq: ""),
|
49
|
+
Record.new(header: "empty seq 2",
|
50
|
+
seq: ""),
|
51
|
+
Record.new(header: "seq3",
|
52
|
+
seq: "yyyyyyyyyyyyyyyNNN"),
|
53
|
+
Record.new(header: "seq 4 > has many '>' in header",
|
54
|
+
seq: "ACTGactg"),
|
55
|
+
Record.new(header: "empty seq at end",
|
56
|
+
seq: "")]
|
57
|
+
}
|
58
|
+
let(:fastq_records) {
|
59
|
+
[Record.new(header: "seq1",
|
60
|
+
seq: "AA CC TT GG",
|
61
|
+
desc: "",
|
62
|
+
qual: ")# 3g Tq N8"),
|
63
|
+
Record.new(header: "seq2 @pples",
|
64
|
+
seq: "ACTG",
|
65
|
+
desc: "seq2 +pples",
|
66
|
+
qual: "*ujM")]
|
67
|
+
}
|
68
|
+
|
69
|
+
# to test the line endings
|
70
|
+
let(:line_endings_fastq_records) {
|
71
|
+
[Record.new(header: "apple", seq: "ACTG", desc: "", qual: "IIII"),
|
72
|
+
Record.new(header: "pie", seq: "AACC", desc: "", qual: "BBBB"),]
|
73
|
+
}
|
74
|
+
let(:line_endings_fasta_records) {
|
75
|
+
[Record.new(header: "apple", seq: "ACTG"),
|
76
|
+
Record.new(header: "pie", seq: "AACC"),]
|
77
|
+
}
|
78
|
+
|
79
|
+
|
80
|
+
describe "::open" do
|
81
|
+
context "when the file doesn't exist" do
|
82
|
+
it "raises FileNotFoundError" do
|
83
|
+
expect { SeqFile.open "arstoien" }.
|
84
|
+
to raise_error ParseFasta::Error::FileNotFoundError
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
context "when input looks like neither fastA or fastQ" do
|
89
|
+
it "raises a DataFormatError" do
|
90
|
+
fname = File.join test_dir, "not_a_seq_file.txt"
|
91
|
+
|
92
|
+
expect { SeqFile.open(fname) }.
|
93
|
+
to raise_error ParseFasta::Error::DataFormatError
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
context "when input looks like fastA" do
|
98
|
+
it "sets @type to :fasta" do
|
99
|
+
expect(SeqFile.open(fasta).type).to eq :fasta
|
100
|
+
end
|
101
|
+
|
102
|
+
it "sets @type to :fasta (gzipped)" do
|
103
|
+
expect(SeqFile.open(fasta_gz).type).to eq :fasta
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
context "when input looks like fastQ" do
|
108
|
+
it "sets @type to :fastq" do
|
109
|
+
expect(SeqFile.open(fastq).type).to eq :fastq
|
110
|
+
end
|
111
|
+
|
112
|
+
it "sets @type to :fastq (gzipped)" do
|
113
|
+
expect(SeqFile.open(fastq_gz).type).to eq :fastq
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
it "returns a SeqFile" do
|
118
|
+
expect(SeqFile.open fasta).to be_a SeqFile
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
describe "#each_record" do
|
123
|
+
shared_examples "it yields the records" do
|
124
|
+
it "yields the records" do
|
125
|
+
expect { |b| SeqFile.open(fname).each_record &b }.
|
126
|
+
to yield_successive_args(*records)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
context "input is fastA" do
|
131
|
+
context "with gzipped fastA" do
|
132
|
+
let(:fname) { File.join test_dir, "seqs.fa.gz" }
|
133
|
+
let(:records) { fasta_records }
|
134
|
+
|
135
|
+
include_examples "it yields the records"
|
136
|
+
end
|
137
|
+
|
138
|
+
context "with gzipped fastA with multiple blobs" do
|
139
|
+
# e.g., $ gzip -c a.fa > c.fa.gz; gzip -c b.fa >> c.fa.gz
|
140
|
+
let(:fname) { File.join test_dir, "multi_blob.fa.gz" }
|
141
|
+
let(:records) { fasta_records + fasta_records }
|
142
|
+
|
143
|
+
include_examples "it yields the records"
|
144
|
+
end
|
145
|
+
|
146
|
+
context "with non-gzipped fastA" do
|
147
|
+
let(:fname) { File.join test_dir, "seqs.fa" }
|
148
|
+
let(:records) { fasta_records }
|
149
|
+
|
150
|
+
|
151
|
+
include_examples "it yields the records"
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
context "input is fastQ" do
|
156
|
+
context "with gzipped fastQ" do
|
157
|
+
let(:fname) { File.join test_dir, "seqs.fq.gz" }
|
158
|
+
let(:records) { fastq_records }
|
159
|
+
|
160
|
+
include_examples "it yields the records"
|
161
|
+
end
|
162
|
+
|
163
|
+
context "with gzipped fastQ with multiple blobs" do
|
164
|
+
# e.g., $ gzip -c a.fq > c.fq.gz; gzip -c b.fq >> c.fq.gz
|
165
|
+
let(:fname) { File.join test_dir, "multi_blob.fq.gz" }
|
166
|
+
let(:records) { fastq_records + fastq_records }
|
167
|
+
|
168
|
+
include_examples "it yields the records"
|
169
|
+
end
|
170
|
+
|
171
|
+
context "with non-gzipped fastQ" do
|
172
|
+
let(:fname) { File.join test_dir, "seqs.fq" }
|
173
|
+
let(:records) { fastq_records }
|
174
|
+
|
175
|
+
include_examples "it yields the records"
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
context "handles non newline line endings" do
|
180
|
+
context "fastQ, non-gz, carriage return only" do
|
181
|
+
let(:fname) { File.join test_dir, "cr.fq" }
|
182
|
+
let(:records) { line_endings_fastq_records }
|
183
|
+
|
184
|
+
include_examples "it yields the records"
|
185
|
+
end
|
186
|
+
|
187
|
+
context "fastQ, gz, carriage return only" do
|
188
|
+
let(:fname) { File.join test_dir, "cr.fq.gz" }
|
189
|
+
let(:records) { line_endings_fastq_records }
|
190
|
+
|
191
|
+
include_examples "it yields the records"
|
192
|
+
end
|
193
|
+
|
194
|
+
context "fastQ, non-gz, carriage return and newline" do
|
195
|
+
let(:fname) { File.join test_dir, "cr_nl.fq" }
|
196
|
+
let(:records) { line_endings_fastq_records }
|
197
|
+
|
198
|
+
include_examples "it yields the records"
|
199
|
+
end
|
200
|
+
|
201
|
+
context "fastQ, gz, carriage return and newline" do
|
202
|
+
let(:fname) { File.join test_dir, "cr_nl.fq.gz" }
|
203
|
+
let(:records) { line_endings_fastq_records }
|
204
|
+
|
205
|
+
include_examples "it yields the records"
|
206
|
+
end
|
207
|
+
|
208
|
+
context "fastA, non-gz, carriage return only" do
|
209
|
+
let(:fname) { File.join test_dir, "cr.fa" }
|
210
|
+
let(:records) { line_endings_fasta_records }
|
211
|
+
|
212
|
+
include_examples "it yields the records"
|
213
|
+
end
|
214
|
+
|
215
|
+
context "fastA, gz, carriage return only" do
|
216
|
+
let(:fname) { File.join test_dir, "cr.fa.gz" }
|
217
|
+
let(:records) { line_endings_fasta_records }
|
218
|
+
|
219
|
+
include_examples "it yields the records"
|
220
|
+
end
|
221
|
+
|
222
|
+
context "fastA, non-gz, carriage return and newline" do
|
223
|
+
let(:fname) { File.join test_dir, "cr_nl.fa" }
|
224
|
+
let(:records) { line_endings_fasta_records }
|
225
|
+
|
226
|
+
include_examples "it yields the records"
|
227
|
+
end
|
228
|
+
|
229
|
+
context "fastA, gz, carriage return and newline" do
|
230
|
+
let(:fname) { File.join test_dir, "cr_nl.fa.gz" }
|
231
|
+
let(:records) { line_endings_fasta_records }
|
232
|
+
|
233
|
+
include_examples "it yields the records"
|
234
|
+
end
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# Copyright 2014 - 2016 Ryan Moore
|
2
|
+
# Contact: moorer@udel.edu
|
3
|
+
#
|
4
|
+
# This file is part of parse_fasta.
|
5
|
+
#
|
6
|
+
# parse_fasta is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# parse_fasta is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
|
19
|
+
require 'spec_helper'
|
20
|
+
|
21
|
+
describe ParseFasta do
|
22
|
+
it 'has a version number' do
|
23
|
+
expect(ParseFasta::VERSION).not_to be nil
|
24
|
+
end
|
25
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright 2014
|
1
|
+
# Copyright 2014 - 2016 Ryan Moore
|
2
2
|
# Contact: moorer@udel.edu
|
3
3
|
#
|
4
4
|
# This file is part of parse_fasta.
|
@@ -19,47 +19,5 @@
|
|
19
19
|
require 'coveralls'
|
20
20
|
Coveralls.wear!
|
21
21
|
|
22
|
+
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
22
23
|
require 'parse_fasta'
|
23
|
-
|
24
|
-
module Helpers
|
25
|
-
|
26
|
-
RECORDS = [["empty seq at beginning", ""],
|
27
|
-
["seq1 is fun", "AACTGGNNN"],
|
28
|
-
["seq2", "AATCCTGNNN"],
|
29
|
-
["empty seq 1", ""],
|
30
|
-
["empty seq 2", ""],
|
31
|
-
["seq3", "yyyyyyyyyyyyyyyNNN"],
|
32
|
-
["seq 4 > has many '>' in header", "ACTGactg"],
|
33
|
-
["empty seq at end", ""]]
|
34
|
-
|
35
|
-
RECORDS_FAST = [["empty seq at beginning", ""],
|
36
|
-
["seq1 is fun", "AAC TGG NN N"],
|
37
|
-
["seq2", "AATCCTGNNN"],
|
38
|
-
["empty seq 1", ""],
|
39
|
-
["empty seq 2", ""],
|
40
|
-
["seq3", "yyyyyyyyyyyyyyyNNN"],
|
41
|
-
["seq 4 > has many '>' in header", "ACTGactg"],
|
42
|
-
["empty seq at end", ""]]
|
43
|
-
|
44
|
-
RECORDS_MAP = {
|
45
|
-
"empty seq at beginning" => "",
|
46
|
-
"seq1 is fun" => "AACTGGNNN",
|
47
|
-
"seq2" => "AATCCTGNNN",
|
48
|
-
"empty seq 1" => "",
|
49
|
-
"empty seq 2" => "",
|
50
|
-
"seq3" => "yyyyyyyyyyyyyyyNNN",
|
51
|
-
"seq 4 > has many '>' in header" => "ACTGactg",
|
52
|
-
"empty seq at end" => ""
|
53
|
-
}
|
54
|
-
|
55
|
-
TRUTHY_RECORDS = [["empty seq at beginning", []],
|
56
|
-
["seq1 is fun", ["AACTGGNNN"]],
|
57
|
-
["seq2", ["AAT", "CCTGNNN"]],
|
58
|
-
["empty seq 1", []],
|
59
|
-
["empty seq 2", []],
|
60
|
-
["seq3", ["yyyyyyyyyy", "yyyyy", "NNN"]],
|
61
|
-
["seq 4 > has many '>' in header", ["ACTG" ,
|
62
|
-
"actg"]],
|
63
|
-
["empty seq at end", []]]
|
64
|
-
|
65
|
-
end
|
@@ -0,0 +1 @@
|
|
1
|
+
>apple
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
this is not a seq file
|
File without changes
|
File without changes
|
Binary file
|
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parse_fasta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Moore
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-10-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -112,39 +112,50 @@ description: Provides nice, programmatic access to fasta and fastq files, as wel
|
|
112
112
|
as providing Sequence and Quality helper classes. No need for BioRuby ;)
|
113
113
|
email:
|
114
114
|
- moorer@udel.edu
|
115
|
-
executables:
|
115
|
+
executables:
|
116
|
+
- console
|
117
|
+
- setup
|
116
118
|
extensions: []
|
117
119
|
extra_rdoc_files: []
|
118
120
|
files:
|
119
121
|
- .coveralls.yml
|
120
122
|
- .gitignore
|
123
|
+
- .rspec
|
121
124
|
- .travis.yml
|
122
125
|
- .yardopts
|
126
|
+
- CHANGELOG.md
|
123
127
|
- COPYING
|
124
128
|
- Gemfile
|
125
129
|
- README.md
|
126
130
|
- Rakefile
|
131
|
+
- bin/console
|
132
|
+
- bin/setup
|
127
133
|
- lib/parse_fasta.rb
|
128
|
-
- lib/parse_fasta/
|
129
|
-
- lib/parse_fasta/
|
130
|
-
- lib/parse_fasta/quality.rb
|
134
|
+
- lib/parse_fasta/error.rb
|
135
|
+
- lib/parse_fasta/record.rb
|
131
136
|
- lib/parse_fasta/seq_file.rb
|
132
|
-
- lib/parse_fasta/sequence.rb
|
133
137
|
- lib/parse_fasta/version.rb
|
134
138
|
- parse_fasta.gemspec
|
135
|
-
- spec/
|
136
|
-
- spec/
|
137
|
-
- spec/
|
138
|
-
- spec/lib/seq_file_spec.rb
|
139
|
-
- spec/lib/sequence_spec.rb
|
139
|
+
- spec/parse_fasta/record_spec.rb
|
140
|
+
- spec/parse_fasta/seq_file_spec.rb
|
141
|
+
- spec/parse_fasta_spec.rb
|
140
142
|
- spec/spec_helper.rb
|
141
|
-
- test_files/
|
142
|
-
- test_files/
|
143
|
-
- test_files/
|
144
|
-
- test_files/
|
145
|
-
- test_files/
|
146
|
-
- test_files/
|
147
|
-
- test_files/
|
143
|
+
- spec/test_files/cr.fa
|
144
|
+
- spec/test_files/cr.fa.gz
|
145
|
+
- spec/test_files/cr.fq
|
146
|
+
- spec/test_files/cr.fq.gz
|
147
|
+
- spec/test_files/cr_nl.fa
|
148
|
+
- spec/test_files/cr_nl.fa.gz
|
149
|
+
- spec/test_files/cr_nl.fq
|
150
|
+
- spec/test_files/cr_nl.fq.gz
|
151
|
+
- spec/test_files/multi_blob.fa.gz
|
152
|
+
- spec/test_files/multi_blob.fq.gz
|
153
|
+
- spec/test_files/not_a_seq_file.txt
|
154
|
+
- spec/test_files/poorly_catted.fa
|
155
|
+
- spec/test_files/seqs.fa
|
156
|
+
- spec/test_files/seqs.fa.gz
|
157
|
+
- spec/test_files/seqs.fq
|
158
|
+
- spec/test_files/seqs.fq.gz
|
148
159
|
homepage: https://github.com/mooreryan/parse_fasta
|
149
160
|
licenses:
|
150
161
|
- ! 'GPLv3: http://www.gnu.org/licenses/gpl.txt'
|
@@ -170,10 +181,24 @@ signing_key:
|
|
170
181
|
specification_version: 4
|
171
182
|
summary: Easy-peasy parsing of fasta & fastq files!
|
172
183
|
test_files:
|
173
|
-
- spec/
|
174
|
-
- spec/
|
175
|
-
- spec/
|
176
|
-
- spec/lib/seq_file_spec.rb
|
177
|
-
- spec/lib/sequence_spec.rb
|
184
|
+
- spec/parse_fasta/record_spec.rb
|
185
|
+
- spec/parse_fasta/seq_file_spec.rb
|
186
|
+
- spec/parse_fasta_spec.rb
|
178
187
|
- spec/spec_helper.rb
|
188
|
+
- spec/test_files/cr.fa
|
189
|
+
- spec/test_files/cr.fa.gz
|
190
|
+
- spec/test_files/cr.fq
|
191
|
+
- spec/test_files/cr.fq.gz
|
192
|
+
- spec/test_files/cr_nl.fa
|
193
|
+
- spec/test_files/cr_nl.fa.gz
|
194
|
+
- spec/test_files/cr_nl.fq
|
195
|
+
- spec/test_files/cr_nl.fq.gz
|
196
|
+
- spec/test_files/multi_blob.fa.gz
|
197
|
+
- spec/test_files/multi_blob.fq.gz
|
198
|
+
- spec/test_files/not_a_seq_file.txt
|
199
|
+
- spec/test_files/poorly_catted.fa
|
200
|
+
- spec/test_files/seqs.fa
|
201
|
+
- spec/test_files/seqs.fa.gz
|
202
|
+
- spec/test_files/seqs.fq
|
203
|
+
- spec/test_files/seqs.fq.gz
|
179
204
|
has_rdoc:
|