parse_fasta 2.0.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -2
- data/CHANGELOG.md +4 -0
- data/README.md +23 -7
- data/lib/parse_fasta/error.rb +4 -0
- data/lib/parse_fasta/record.rb +100 -2
- data/lib/parse_fasta/version.rb +1 -1
- data/spec/parse_fasta/record_spec.rb +113 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5b6a779e6af25bc031bbb8e4b0f0819723c5cb35
|
4
|
+
data.tar.gz: a919f21792fe97bdf9b8f0fe9064950f9fcaf19a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7fac0a0ca2c64235d5bf74221fd72373282e8f296045da23aec834350d6d2532d06847c1916b3ac22593c35a1e528239f5d015d3159d8054a3523af45fc6e299
|
7
|
+
data.tar.gz: e89410b0f7172737f811a68d5fc858b5cd0b60734b46715bb859c6a32cafbc95191b92fc49817807a079275df54d4a86a8538faca8386807c86429e559155672
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -59,13 +59,29 @@ The `Record#desc` and `Record#qual` will be `nil` if the file you are parsing is
|
|
59
59
|
```ruby
|
60
60
|
ParseFasta::SeqFile.open(ARGV[0]).each_record do |rec|
|
61
61
|
if rec.qual
|
62
|
-
|
63
|
-
puts rec.seq
|
64
|
-
puts "+#{rec.desc}"
|
65
|
-
puts rec.qual
|
62
|
+
# it's a fastQ record
|
66
63
|
else
|
67
|
-
|
68
|
-
puts rec.sequence
|
64
|
+
# it's a fastA record
|
69
65
|
end
|
70
66
|
end
|
71
|
-
```
|
67
|
+
```
|
68
|
+
|
69
|
+
You can also check this with `Record#fastq?`
|
70
|
+
|
71
|
+
```ruby
|
72
|
+
ParseFasta::SeqFile.open(ARGV[0]).each_record do |rec|
|
73
|
+
if rec.fastq?
|
74
|
+
# it's a fastQ record
|
75
|
+
else
|
76
|
+
# it's a fastA record
|
77
|
+
end
|
78
|
+
end
|
79
|
+
```
|
80
|
+
|
81
|
+
And there is a nice `#to_s` method, that does what it should whether the record is fastA or fastQ like. Check out the docs for info on the fancy `#to_fasta` and `#to_fastq` methods!
|
82
|
+
|
83
|
+
```ruby
|
84
|
+
ParseFasta::SeqFile.open(ARGV[0]).each_record do |rec|
|
85
|
+
puts rec.to_s
|
86
|
+
end
|
87
|
+
```
|
data/lib/parse_fasta/error.rb
CHANGED
@@ -24,6 +24,10 @@ module ParseFasta
|
|
24
24
|
class ParseFastaError < StandardError
|
25
25
|
end
|
26
26
|
|
27
|
+
# Raised when a method has a bad argument
|
28
|
+
class ArgumentError < ParseFastaError
|
29
|
+
end
|
30
|
+
|
27
31
|
# Raised when the input file doesn't look like fastA or fastQ
|
28
32
|
class DataFormatError < ParseFastaError
|
29
33
|
end
|
data/lib/parse_fasta/record.rb
CHANGED
@@ -44,7 +44,7 @@ module ParseFasta
|
|
44
44
|
# @param desc [String] the description line of a fastQ record
|
45
45
|
# @param qual [String] the quality string of a fastQ record
|
46
46
|
#
|
47
|
-
# @raise [SequenceFormatError] if a fastA sequence has a '>'
|
47
|
+
# @raise [ParseFasta::Error::SequenceFormatError] if a fastA sequence has a '>'
|
48
48
|
# character in it
|
49
49
|
def initialize args = {}
|
50
50
|
@header = args.fetch :header
|
@@ -56,7 +56,7 @@ module ParseFasta
|
|
56
56
|
|
57
57
|
seq = args.fetch(:seq).gsub(/\s+/, "")
|
58
58
|
|
59
|
-
if
|
59
|
+
if fastq? # is fastQ
|
60
60
|
@seq = seq
|
61
61
|
else # is fastA
|
62
62
|
@seq = check_fasta_seq(seq)
|
@@ -73,6 +73,93 @@ module ParseFasta
|
|
73
73
|
self.desc == rec.desc && self.qual == rec.qual
|
74
74
|
end
|
75
75
|
|
76
|
+
# Return a fastA or fastQ record ready to print.
|
77
|
+
#
|
78
|
+
# If the Record is fastQ like then it returns a fastQ record
|
79
|
+
# string. If the record is fastA like, then it returns a fastA
|
80
|
+
# record string.
|
81
|
+
#
|
82
|
+
# @return [String] a printable sequence record
|
83
|
+
#
|
84
|
+
# @example When the record is fastA like
|
85
|
+
# rec = Record.new header: "Apple", seq: "ACTG"
|
86
|
+
# rec.to_s #=> ">Apple\nACTG"
|
87
|
+
#
|
88
|
+
# @example When the record is fastQ like
|
89
|
+
# rec = Record.new header: "Apple", seq: "ACTG", desc: "Hi", qual: "IIII"
|
90
|
+
# rec.to_s #=> "@Apple\nACTG\n+Hi\nIIII"
|
91
|
+
def to_s
|
92
|
+
if fastq?
|
93
|
+
to_fastq
|
94
|
+
else
|
95
|
+
to_fasta
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Returns a fastA record ready to print.
|
100
|
+
#
|
101
|
+
# If the record is fastQ like, the desc and qual are dropped.
|
102
|
+
#
|
103
|
+
# @return [String] a printable fastA sequence record
|
104
|
+
#
|
105
|
+
# @example When the record is fastA like
|
106
|
+
# rec = Record.new header: "Apple", seq: "ACTG"
|
107
|
+
# rec.to_fasta #=> ">Apple\nACTG"
|
108
|
+
#
|
109
|
+
# @example When the record is fastQ like
|
110
|
+
# rec = Record.new header: "Apple", seq: "ACTG", desc: "Hi", qual: "IIII"
|
111
|
+
# rec.to_fasta #=> ">Apple\nACTG"
|
112
|
+
def to_fasta
|
113
|
+
">#{header}\n#{seq}"
|
114
|
+
end
|
115
|
+
|
116
|
+
# Returns a fastA record ready to print.
|
117
|
+
#
|
118
|
+
# If the record is fastA like, the desc and qual can be specified.
|
119
|
+
#
|
120
|
+
# @return [String] a printable fastQ sequence record
|
121
|
+
#
|
122
|
+
# @example When the record is fastA like, no args
|
123
|
+
# rec = Record.new header: "Apple", seq: "ACTG"
|
124
|
+
# rec.to_fastq #=> "@Apple\nACTG\n+\nIIII"
|
125
|
+
#
|
126
|
+
# @example When the record is fastA like, desc and qual specified
|
127
|
+
# rec = Record.new header: "Apple", seq: "ACTG"
|
128
|
+
# rec.to_fastq decs: "Hi", qual: "A" #=> "@Apple\nACTG\n+Hi\nAAAA"
|
129
|
+
#
|
130
|
+
# @example When the record is fastA like, can specify fancy qual strings
|
131
|
+
# rec = Record.new header: "Apple", seq: "ACTGACTG"
|
132
|
+
# rec.to_fastq decs: "Hi", qual: "!a2" #=> "@Apple\nACTG\n+Hi\n!a2!a2!a"
|
133
|
+
#
|
134
|
+
# @example When the record is fastQ like
|
135
|
+
# rec = Record.new header: "Apple", seq: "ACTG", desc: "Hi", qual: "IIII"
|
136
|
+
# rec.to_fastq #=> ">Apple\nACTG"
|
137
|
+
#
|
138
|
+
# @raise [ParseFasta::Error::ArgumentError] if qual is ""
|
139
|
+
def to_fastq opts = {}
|
140
|
+
if fastq?
|
141
|
+
"@#{@header}\n#{@seq}\n+#{@desc}\n#{qual}"
|
142
|
+
else
|
143
|
+
qual = opts.fetch :qual, "I"
|
144
|
+
check_qual qual
|
145
|
+
|
146
|
+
desc = opts.fetch :desc, ""
|
147
|
+
|
148
|
+
qual_str = make_qual_str qual
|
149
|
+
|
150
|
+
"@#{@header}\n#{@seq}\n+#{desc}\n#{qual_str}"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# Returns true if record is a fastQ record.
|
155
|
+
#
|
156
|
+
# This method returns true if the fastq instance method is set.
|
157
|
+
#
|
158
|
+
# @return [Bool] true if record is fastQ, false if it is fastA
|
159
|
+
def fastq?
|
160
|
+
true if @qual
|
161
|
+
end
|
162
|
+
|
76
163
|
private
|
77
164
|
|
78
165
|
def check_fasta_seq seq
|
@@ -84,5 +171,16 @@ module ParseFasta
|
|
84
171
|
seq
|
85
172
|
end
|
86
173
|
end
|
174
|
+
|
175
|
+
def make_qual_str qual
|
176
|
+
(qual * (@seq.length / qual.length.to_f).ceil)[0, @seq.length]
|
177
|
+
end
|
178
|
+
|
179
|
+
def check_qual qual
|
180
|
+
if qual.length.zero?
|
181
|
+
raise ParseFasta::Error::ArgumentError,
|
182
|
+
":qual was '#{qual.inspect}', but it can't be empty"
|
183
|
+
end
|
184
|
+
end
|
87
185
|
end
|
88
186
|
end
|
data/lib/parse_fasta/version.rb
CHANGED
@@ -109,7 +109,120 @@ module ParseFasta
|
|
109
109
|
|
110
110
|
expect(rec1 == rec2).to eq false
|
111
111
|
end
|
112
|
+
end
|
113
|
+
|
114
|
+
describe "#fastq?" do
|
115
|
+
it "returns true if @qual is not nil" do
|
116
|
+
rec = Record.new header: "a", seq: "a", qual: "a"
|
117
|
+
|
118
|
+
expect(rec.fastq?).to eq true
|
119
|
+
end
|
120
|
+
|
121
|
+
it "returns nil if @qual is nil" do
|
122
|
+
rec = Record.new header: "a", seq: "a"
|
123
|
+
|
124
|
+
expect(rec.fastq?).to eq nil
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
describe "#to_s" do
|
129
|
+
context "when the record is fastA like" do
|
130
|
+
it "returns a string of the fastA record ready to print" do
|
131
|
+
rec = Record.new header: "apple", seq: "actg"
|
132
|
+
|
133
|
+
expect(rec.to_s).to eq ">apple\nactg"
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
context "when the record is fastQ like" do
|
138
|
+
it "returns a string of the fastQ record ready to print" do
|
139
|
+
rec = Record.new header: "apple", seq: "actg", desc: "", qual: "IIII"
|
140
|
+
|
141
|
+
expect(rec.to_s).to eq "@apple\nactg\n+\nIIII"
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
describe "#to_fasta" do
|
147
|
+
context "when the record is fastA like" do
|
148
|
+
it "returns a string of the fastA record ready to print" do
|
149
|
+
rec = Record.new header: "apple", seq: "actg"
|
150
|
+
|
151
|
+
expect(rec.to_fasta).to eq ">apple\nactg"
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
context "when the record is fastQ like" do
|
156
|
+
it "returns a string of the fastQ record in fastA format" do
|
157
|
+
rec = Record.new header: "apple", seq: "actg", desc: "", qual: "IIII"
|
158
|
+
|
159
|
+
expect(rec.to_fasta).to eq ">apple\nactg"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
112
163
|
|
164
|
+
describe "#to_fastq" do
|
165
|
+
context "when the record is fastA like" do
|
166
|
+
let(:rec) {Record.new header: "apple", seq: "actg"}
|
167
|
+
|
168
|
+
it "has a default quality string (I) and description" do
|
169
|
+
expect(rec.to_fastq).to eq "@apple\nactg\n+\nIIII"
|
170
|
+
end
|
171
|
+
|
172
|
+
context "specifying values for qual string" do
|
173
|
+
it "can specify the qual string value (single value)" do
|
174
|
+
expect(rec.to_fastq qual: "A").to eq "@apple\nactg\n+\nAAAA"
|
175
|
+
end
|
176
|
+
|
177
|
+
it "can specify complex qual string values (uneven division)" do
|
178
|
+
seq = "actgactgactgactg"
|
179
|
+
qual = "1@c1@c1@c1@c1@c1"
|
180
|
+
rec = Record.new header: "apple", seq: seq
|
181
|
+
|
182
|
+
expect(rec.to_fastq qual: "1@c").to eq "@apple\n#{seq}\n+\n#{qual}"
|
183
|
+
end
|
184
|
+
|
185
|
+
it "can specify complex qual string values (even division)" do
|
186
|
+
seq = "actgactgactgactg"
|
187
|
+
qual = "a1a1a1a1a1a1a1a1"
|
188
|
+
rec = Record.new header: "apple", seq: seq
|
189
|
+
|
190
|
+
expect(rec.to_fastq qual: "a1").to eq "@apple\n#{seq}\n+\n#{qual}"
|
191
|
+
end
|
192
|
+
|
193
|
+
it "can specify complex qual string values (full length)" do
|
194
|
+
seq = "actgactgactgactg"
|
195
|
+
qual = "a1a1a1a1a1a1a1a1"
|
196
|
+
rec = Record.new header: "apple", seq: seq
|
197
|
+
|
198
|
+
expect(rec.to_fastq qual: qual).to eq "@apple\n#{seq}\n+\n#{qual}"
|
199
|
+
end
|
200
|
+
|
201
|
+
it "raises ParseFasta::Error::ArgumentError when :qual is empty" do
|
202
|
+
seq = "actgactgactgactg"
|
203
|
+
qual = "IIIIIIIIIIIIIIII"
|
204
|
+
rec = Record.new header: "apple", seq: seq
|
205
|
+
|
206
|
+
expect{rec.to_fastq qual: ""}.to raise_error ParseFasta::Error::ArgumentError
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
it "can specify the description" do
|
211
|
+
expect(rec.to_fastq desc: "pie").to eq "@apple\nactg\n+pie\nIIII"
|
212
|
+
end
|
213
|
+
|
214
|
+
it "can specify the both" do
|
215
|
+
expect(rec.to_fastq qual: "A", desc: "pie").to eq "@apple\nactg\n+pie\nAAAA"
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
context "when the record is fastQ like" do
|
220
|
+
it "returns a string of the fastQ format" do
|
221
|
+
rec = Record.new header: "apple", seq: "actg", desc: "", qual: "IIII"
|
222
|
+
|
223
|
+
expect(rec.to_fastq).to eq "@apple\nactg\n+\nIIII"
|
224
|
+
end
|
225
|
+
end
|
113
226
|
end
|
114
227
|
end
|
115
228
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parse_fasta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Moore
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-10-
|
11
|
+
date: 2016-10-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|