parse_fasta 1.9.2 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +8 -8
  2. data/.gitignore +1 -0
  3. data/.rspec +2 -0
  4. data/CHANGELOG.md +178 -0
  5. data/README.md +42 -215
  6. data/Rakefile +2 -4
  7. data/bin/console +14 -0
  8. data/bin/setup +8 -0
  9. data/lib/parse_fasta/error.rb +39 -0
  10. data/lib/parse_fasta/record.rb +88 -0
  11. data/lib/parse_fasta/seq_file.rb +221 -114
  12. data/lib/parse_fasta/version.rb +2 -2
  13. data/lib/parse_fasta.rb +5 -20
  14. data/spec/parse_fasta/record_spec.rb +115 -0
  15. data/spec/parse_fasta/seq_file_spec.rb +238 -0
  16. data/spec/parse_fasta_spec.rb +25 -0
  17. data/spec/spec_helper.rb +2 -44
  18. data/spec/test_files/cr.fa +1 -0
  19. data/spec/test_files/cr.fa.gz +0 -0
  20. data/spec/test_files/cr.fq +3 -0
  21. data/spec/test_files/cr.fq.gz +0 -0
  22. data/spec/test_files/cr_nl.fa +4 -0
  23. data/spec/test_files/cr_nl.fa.gz +0 -0
  24. data/spec/test_files/cr_nl.fq +8 -0
  25. data/spec/test_files/cr_nl.fq.gz +0 -0
  26. data/spec/test_files/multi_blob.fa.gz +0 -0
  27. data/spec/test_files/multi_blob.fq.gz +0 -0
  28. data/spec/test_files/not_a_seq_file.txt +1 -0
  29. data/{test_files/bad.fa → spec/test_files/poorly_catted.fa} +0 -0
  30. data/{test_files/test.fa → spec/test_files/seqs.fa} +0 -0
  31. data/spec/test_files/seqs.fa.gz +0 -0
  32. data/spec/test_files/seqs.fq +8 -0
  33. data/spec/test_files/seqs.fq.gz +0 -0
  34. metadata +49 -24
  35. data/lib/parse_fasta/fasta_file.rb +0 -232
  36. data/lib/parse_fasta/fastq_file.rb +0 -160
  37. data/lib/parse_fasta/quality.rb +0 -54
  38. data/lib/parse_fasta/sequence.rb +0 -174
  39. data/spec/lib/fasta_file_spec.rb +0 -212
  40. data/spec/lib/fastq_file_spec.rb +0 -143
  41. data/spec/lib/quality_spec.rb +0 -51
  42. data/spec/lib/seq_file_spec.rb +0 -357
  43. data/spec/lib/sequence_spec.rb +0 -188
  44. data/test_files/benchmark.rb +0 -99
  45. data/test_files/bogus.txt +0 -2
  46. data/test_files/test.fa.gz +0 -0
  47. data/test_files/test.fq +0 -8
  48. data/test_files/test.fq.gz +0 -0
@@ -0,0 +1,238 @@
1
+ # Copyright 2014 - 2016 Ryan Moore
2
+ # Contact: moorer@udel.edu
3
+ #
4
+ # This file is part of parse_fasta.
5
+ #
6
+ # parse_fasta is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # parse_fasta is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
+
19
+ require "spec_helper"
20
+
21
+ module ParseFasta
22
+ describe SeqFile do
23
+ let(:test_dir) {
24
+ File.join File.dirname(__FILE__), "..", "test_files"
25
+ }
26
+
27
+ let(:fasta) {
28
+ File.join test_dir, "seqs.fa"
29
+ }
30
+ let(:fasta_gz) {
31
+ File.join test_dir, "seqs.fa.gz"
32
+ }
33
+ let(:fastq) {
34
+ File.join test_dir, "seqs.fq"
35
+ }
36
+ let(:fastq_gz) {
37
+ File.join test_dir, "seqs.fq.gz"
38
+ }
39
+
40
+ let(:fasta_records) {
41
+ [Record.new(header: "empty seq at beginning",
42
+ seq: ""),
43
+ Record.new(header: "seq1 is fun",
44
+ seq: "AACTGGNNN"),
45
+ Record.new(header: "seq2",
46
+ seq: "AATCCTGNNN"),
47
+ Record.new(header: "empty seq 1",
48
+ seq: ""),
49
+ Record.new(header: "empty seq 2",
50
+ seq: ""),
51
+ Record.new(header: "seq3",
52
+ seq: "yyyyyyyyyyyyyyyNNN"),
53
+ Record.new(header: "seq 4 > has many '>' in header",
54
+ seq: "ACTGactg"),
55
+ Record.new(header: "empty seq at end",
56
+ seq: "")]
57
+ }
58
+ let(:fastq_records) {
59
+ [Record.new(header: "seq1",
60
+ seq: "AA CC TT GG",
61
+ desc: "",
62
+ qual: ")# 3g Tq N8"),
63
+ Record.new(header: "seq2 @pples",
64
+ seq: "ACTG",
65
+ desc: "seq2 +pples",
66
+ qual: "*ujM")]
67
+ }
68
+
69
+ # to test the line endings
70
+ let(:line_endings_fastq_records) {
71
+ [Record.new(header: "apple", seq: "ACTG", desc: "", qual: "IIII"),
72
+ Record.new(header: "pie", seq: "AACC", desc: "", qual: "BBBB"),]
73
+ }
74
+ let(:line_endings_fasta_records) {
75
+ [Record.new(header: "apple", seq: "ACTG"),
76
+ Record.new(header: "pie", seq: "AACC"),]
77
+ }
78
+
79
+
80
+ describe "::open" do
81
+ context "when the file doesn't exist" do
82
+ it "raises FileNotFoundError" do
83
+ expect { SeqFile.open "arstoien" }.
84
+ to raise_error ParseFasta::Error::FileNotFoundError
85
+ end
86
+ end
87
+
88
+ context "when input looks like neither fastA or fastQ" do
89
+ it "raises a DataFormatError" do
90
+ fname = File.join test_dir, "not_a_seq_file.txt"
91
+
92
+ expect { SeqFile.open(fname) }.
93
+ to raise_error ParseFasta::Error::DataFormatError
94
+ end
95
+ end
96
+
97
+ context "when input looks like fastA" do
98
+ it "sets @type to :fasta" do
99
+ expect(SeqFile.open(fasta).type).to eq :fasta
100
+ end
101
+
102
+ it "sets @type to :fasta (gzipped)" do
103
+ expect(SeqFile.open(fasta_gz).type).to eq :fasta
104
+ end
105
+ end
106
+
107
+ context "when input looks like fastQ" do
108
+ it "sets @type to :fastq" do
109
+ expect(SeqFile.open(fastq).type).to eq :fastq
110
+ end
111
+
112
+ it "sets @type to :fastq (gzipped)" do
113
+ expect(SeqFile.open(fastq_gz).type).to eq :fastq
114
+ end
115
+ end
116
+
117
+ it "returns a SeqFile" do
118
+ expect(SeqFile.open fasta).to be_a SeqFile
119
+ end
120
+ end
121
+
122
+ describe "#each_record" do
123
+ shared_examples "it yields the records" do
124
+ it "yields the records" do
125
+ expect { |b| SeqFile.open(fname).each_record &b }.
126
+ to yield_successive_args(*records)
127
+ end
128
+ end
129
+
130
+ context "input is fastA" do
131
+ context "with gzipped fastA" do
132
+ let(:fname) { File.join test_dir, "seqs.fa.gz" }
133
+ let(:records) { fasta_records }
134
+
135
+ include_examples "it yields the records"
136
+ end
137
+
138
+ context "with gzipped fastA with multiple blobs" do
139
+ # e.g., $ gzip -c a.fa > c.fa.gz; gzip -c b.fa >> c.fa.gz
140
+ let(:fname) { File.join test_dir, "multi_blob.fa.gz" }
141
+ let(:records) { fasta_records + fasta_records }
142
+
143
+ include_examples "it yields the records"
144
+ end
145
+
146
+ context "with non-gzipped fastA" do
147
+ let(:fname) { File.join test_dir, "seqs.fa" }
148
+ let(:records) { fasta_records }
149
+
150
+
151
+ include_examples "it yields the records"
152
+ end
153
+ end
154
+
155
+ context "input is fastQ" do
156
+ context "with gzipped fastQ" do
157
+ let(:fname) { File.join test_dir, "seqs.fq.gz" }
158
+ let(:records) { fastq_records }
159
+
160
+ include_examples "it yields the records"
161
+ end
162
+
163
+ context "with gzipped fastQ with multiple blobs" do
164
+ # e.g., $ gzip -c a.fq > c.fq.gz; gzip -c b.fq >> c.fq.gz
165
+ let(:fname) { File.join test_dir, "multi_blob.fq.gz" }
166
+ let(:records) { fastq_records + fastq_records }
167
+
168
+ include_examples "it yields the records"
169
+ end
170
+
171
+ context "with non-gzipped fastQ" do
172
+ let(:fname) { File.join test_dir, "seqs.fq" }
173
+ let(:records) { fastq_records }
174
+
175
+ include_examples "it yields the records"
176
+ end
177
+ end
178
+
179
+ context "handles non newline line endings" do
180
+ context "fastQ, non-gz, carriage return only" do
181
+ let(:fname) { File.join test_dir, "cr.fq" }
182
+ let(:records) { line_endings_fastq_records }
183
+
184
+ include_examples "it yields the records"
185
+ end
186
+
187
+ context "fastQ, gz, carriage return only" do
188
+ let(:fname) { File.join test_dir, "cr.fq.gz" }
189
+ let(:records) { line_endings_fastq_records }
190
+
191
+ include_examples "it yields the records"
192
+ end
193
+
194
+ context "fastQ, non-gz, carriage return and newline" do
195
+ let(:fname) { File.join test_dir, "cr_nl.fq" }
196
+ let(:records) { line_endings_fastq_records }
197
+
198
+ include_examples "it yields the records"
199
+ end
200
+
201
+ context "fastQ, gz, carriage return and newline" do
202
+ let(:fname) { File.join test_dir, "cr_nl.fq.gz" }
203
+ let(:records) { line_endings_fastq_records }
204
+
205
+ include_examples "it yields the records"
206
+ end
207
+
208
+ context "fastA, non-gz, carriage return only" do
209
+ let(:fname) { File.join test_dir, "cr.fa" }
210
+ let(:records) { line_endings_fasta_records }
211
+
212
+ include_examples "it yields the records"
213
+ end
214
+
215
+ context "fastA, gz, carriage return only" do
216
+ let(:fname) { File.join test_dir, "cr.fa.gz" }
217
+ let(:records) { line_endings_fasta_records }
218
+
219
+ include_examples "it yields the records"
220
+ end
221
+
222
+ context "fastA, non-gz, carriage return and newline" do
223
+ let(:fname) { File.join test_dir, "cr_nl.fa" }
224
+ let(:records) { line_endings_fasta_records }
225
+
226
+ include_examples "it yields the records"
227
+ end
228
+
229
+ context "fastA, gz, carriage return and newline" do
230
+ let(:fname) { File.join test_dir, "cr_nl.fa.gz" }
231
+ let(:records) { line_endings_fasta_records }
232
+
233
+ include_examples "it yields the records"
234
+ end
235
+ end
236
+ end
237
+ end
238
+ end
@@ -0,0 +1,25 @@
1
+ # Copyright 2014 - 2016 Ryan Moore
2
+ # Contact: moorer@udel.edu
3
+ #
4
+ # This file is part of parse_fasta.
5
+ #
6
+ # parse_fasta is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # parse_fasta is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
+
19
+ require 'spec_helper'
20
+
21
+ describe ParseFasta do
22
+ it 'has a version number' do
23
+ expect(ParseFasta::VERSION).not_to be nil
24
+ end
25
+ end
data/spec/spec_helper.rb CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2014, 2015 Ryan Moore
1
+ # Copyright 2014 - 2016 Ryan Moore
2
2
  # Contact: moorer@udel.edu
3
3
  #
4
4
  # This file is part of parse_fasta.
@@ -19,47 +19,5 @@
19
19
  require 'coveralls'
20
20
  Coveralls.wear!
21
21
 
22
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
22
23
  require 'parse_fasta'
23
-
24
- module Helpers
25
-
26
- RECORDS = [["empty seq at beginning", ""],
27
- ["seq1 is fun", "AACTGGNNN"],
28
- ["seq2", "AATCCTGNNN"],
29
- ["empty seq 1", ""],
30
- ["empty seq 2", ""],
31
- ["seq3", "yyyyyyyyyyyyyyyNNN"],
32
- ["seq 4 > has many '>' in header", "ACTGactg"],
33
- ["empty seq at end", ""]]
34
-
35
- RECORDS_FAST = [["empty seq at beginning", ""],
36
- ["seq1 is fun", "AAC TGG NN N"],
37
- ["seq2", "AATCCTGNNN"],
38
- ["empty seq 1", ""],
39
- ["empty seq 2", ""],
40
- ["seq3", "yyyyyyyyyyyyyyyNNN"],
41
- ["seq 4 > has many '>' in header", "ACTGactg"],
42
- ["empty seq at end", ""]]
43
-
44
- RECORDS_MAP = {
45
- "empty seq at beginning" => "",
46
- "seq1 is fun" => "AACTGGNNN",
47
- "seq2" => "AATCCTGNNN",
48
- "empty seq 1" => "",
49
- "empty seq 2" => "",
50
- "seq3" => "yyyyyyyyyyyyyyyNNN",
51
- "seq 4 > has many '>' in header" => "ACTGactg",
52
- "empty seq at end" => ""
53
- }
54
-
55
- TRUTHY_RECORDS = [["empty seq at beginning", []],
56
- ["seq1 is fun", ["AACTGGNNN"]],
57
- ["seq2", ["AAT", "CCTGNNN"]],
58
- ["empty seq 1", []],
59
- ["empty seq 2", []],
60
- ["seq3", ["yyyyyyyyyy", "yyyyy", "NNN"]],
61
- ["seq 4 > has many '>' in header", ["ACTG" ,
62
- "actg"]],
63
- ["empty seq at end", []]]
64
-
65
- end
@@ -0,0 +1 @@
1
+ >apple
Binary file
@@ -0,0 +1 @@
1
+ @apple
2
+
3
+
Binary file
@@ -0,0 +1,4 @@
1
+ >apple
2
+ ACTG
3
+ >pie
4
+ AACC
Binary file
@@ -0,0 +1,8 @@
1
+ @apple
2
+ ACTG
3
+ +
4
+ IIII
5
+ @pie
6
+ AACC
7
+ +
8
+ BBBB
Binary file
Binary file
Binary file
@@ -0,0 +1 @@
1
+ this is not a seq file
File without changes
Binary file
@@ -0,0 +1,8 @@
1
+ @seq1
2
+ AA CC TT GG
3
+ +
4
+ )# 3g Tq N8
5
+ @seq2 @pples
6
+ ACTG
7
+ +seq2 +pples
8
+ *ujM
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_fasta
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.2
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-20 00:00:00.000000000 Z
11
+ date: 2016-10-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -112,39 +112,50 @@ description: Provides nice, programmatic access to fasta and fastq files, as wel
112
112
  as providing Sequence and Quality helper classes. No need for BioRuby ;)
113
113
  email:
114
114
  - moorer@udel.edu
115
- executables: []
115
+ executables:
116
+ - console
117
+ - setup
116
118
  extensions: []
117
119
  extra_rdoc_files: []
118
120
  files:
119
121
  - .coveralls.yml
120
122
  - .gitignore
123
+ - .rspec
121
124
  - .travis.yml
122
125
  - .yardopts
126
+ - CHANGELOG.md
123
127
  - COPYING
124
128
  - Gemfile
125
129
  - README.md
126
130
  - Rakefile
131
+ - bin/console
132
+ - bin/setup
127
133
  - lib/parse_fasta.rb
128
- - lib/parse_fasta/fasta_file.rb
129
- - lib/parse_fasta/fastq_file.rb
130
- - lib/parse_fasta/quality.rb
134
+ - lib/parse_fasta/error.rb
135
+ - lib/parse_fasta/record.rb
131
136
  - lib/parse_fasta/seq_file.rb
132
- - lib/parse_fasta/sequence.rb
133
137
  - lib/parse_fasta/version.rb
134
138
  - parse_fasta.gemspec
135
- - spec/lib/fasta_file_spec.rb
136
- - spec/lib/fastq_file_spec.rb
137
- - spec/lib/quality_spec.rb
138
- - spec/lib/seq_file_spec.rb
139
- - spec/lib/sequence_spec.rb
139
+ - spec/parse_fasta/record_spec.rb
140
+ - spec/parse_fasta/seq_file_spec.rb
141
+ - spec/parse_fasta_spec.rb
140
142
  - spec/spec_helper.rb
141
- - test_files/bad.fa
142
- - test_files/benchmark.rb
143
- - test_files/bogus.txt
144
- - test_files/test.fa
145
- - test_files/test.fa.gz
146
- - test_files/test.fq
147
- - test_files/test.fq.gz
143
+ - spec/test_files/cr.fa
144
+ - spec/test_files/cr.fa.gz
145
+ - spec/test_files/cr.fq
146
+ - spec/test_files/cr.fq.gz
147
+ - spec/test_files/cr_nl.fa
148
+ - spec/test_files/cr_nl.fa.gz
149
+ - spec/test_files/cr_nl.fq
150
+ - spec/test_files/cr_nl.fq.gz
151
+ - spec/test_files/multi_blob.fa.gz
152
+ - spec/test_files/multi_blob.fq.gz
153
+ - spec/test_files/not_a_seq_file.txt
154
+ - spec/test_files/poorly_catted.fa
155
+ - spec/test_files/seqs.fa
156
+ - spec/test_files/seqs.fa.gz
157
+ - spec/test_files/seqs.fq
158
+ - spec/test_files/seqs.fq.gz
148
159
  homepage: https://github.com/mooreryan/parse_fasta
149
160
  licenses:
150
161
  - ! 'GPLv3: http://www.gnu.org/licenses/gpl.txt'
@@ -170,10 +181,24 @@ signing_key:
170
181
  specification_version: 4
171
182
  summary: Easy-peasy parsing of fasta & fastq files!
172
183
  test_files:
173
- - spec/lib/fasta_file_spec.rb
174
- - spec/lib/fastq_file_spec.rb
175
- - spec/lib/quality_spec.rb
176
- - spec/lib/seq_file_spec.rb
177
- - spec/lib/sequence_spec.rb
184
+ - spec/parse_fasta/record_spec.rb
185
+ - spec/parse_fasta/seq_file_spec.rb
186
+ - spec/parse_fasta_spec.rb
178
187
  - spec/spec_helper.rb
188
+ - spec/test_files/cr.fa
189
+ - spec/test_files/cr.fa.gz
190
+ - spec/test_files/cr.fq
191
+ - spec/test_files/cr.fq.gz
192
+ - spec/test_files/cr_nl.fa
193
+ - spec/test_files/cr_nl.fa.gz
194
+ - spec/test_files/cr_nl.fq
195
+ - spec/test_files/cr_nl.fq.gz
196
+ - spec/test_files/multi_blob.fa.gz
197
+ - spec/test_files/multi_blob.fq.gz
198
+ - spec/test_files/not_a_seq_file.txt
199
+ - spec/test_files/poorly_catted.fa
200
+ - spec/test_files/seqs.fa
201
+ - spec/test_files/seqs.fa.gz
202
+ - spec/test_files/seqs.fq
203
+ - spec/test_files/seqs.fq.gz
179
204
  has_rdoc: