parse_fasta 2.0.1 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -2
- data/CHANGELOG.md +4 -0
- data/README.md +23 -7
- data/lib/parse_fasta/error.rb +4 -0
- data/lib/parse_fasta/record.rb +100 -2
- data/lib/parse_fasta/version.rb +1 -1
- data/spec/parse_fasta/record_spec.rb +113 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5b6a779e6af25bc031bbb8e4b0f0819723c5cb35
|
4
|
+
data.tar.gz: a919f21792fe97bdf9b8f0fe9064950f9fcaf19a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7fac0a0ca2c64235d5bf74221fd72373282e8f296045da23aec834350d6d2532d06847c1916b3ac22593c35a1e528239f5d015d3159d8054a3523af45fc6e299
|
7
|
+
data.tar.gz: e89410b0f7172737f811a68d5fc858b5cd0b60734b46715bb859c6a32cafbc95191b92fc49817807a079275df54d4a86a8538faca8386807c86429e559155672
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -59,13 +59,29 @@ The `Record#desc` and `Record#qual` will be `nil` if the file you are parsing is
|
|
59
59
|
```ruby
|
60
60
|
ParseFasta::SeqFile.open(ARGV[0]).each_record do |rec|
|
61
61
|
if rec.qual
|
62
|
-
|
63
|
-
puts rec.seq
|
64
|
-
puts "+#{rec.desc}"
|
65
|
-
puts rec.qual
|
62
|
+
# it's a fastQ record
|
66
63
|
else
|
67
|
-
|
68
|
-
puts rec.sequence
|
64
|
+
# it's a fastA record
|
69
65
|
end
|
70
66
|
end
|
71
|
-
```
|
67
|
+
```
|
68
|
+
|
69
|
+
You can also check this with `Record#fastq?`
|
70
|
+
|
71
|
+
```ruby
|
72
|
+
ParseFasta::SeqFile.open(ARGV[0]).each_record do |rec|
|
73
|
+
if rec.fastq?
|
74
|
+
# it's a fastQ record
|
75
|
+
else
|
76
|
+
# it's a fastA record
|
77
|
+
end
|
78
|
+
end
|
79
|
+
```
|
80
|
+
|
81
|
+
And there is a nice `#to_s` method, that does what it should whether the record is fastA or fastQ like. Check out the docs for info on the fancy `#to_fasta` and `#to_fastq` methods!
|
82
|
+
|
83
|
+
```ruby
|
84
|
+
ParseFasta::SeqFile.open(ARGV[0]).each_record do |rec|
|
85
|
+
puts rec.to_s
|
86
|
+
end
|
87
|
+
```
|
data/lib/parse_fasta/error.rb
CHANGED
@@ -24,6 +24,10 @@ module ParseFasta
|
|
24
24
|
class ParseFastaError < StandardError
|
25
25
|
end
|
26
26
|
|
27
|
+
# Raised when a method has a bad argument
|
28
|
+
class ArgumentError < ParseFastaError
|
29
|
+
end
|
30
|
+
|
27
31
|
# Raised when the input file doesn't look like fastA or fastQ
|
28
32
|
class DataFormatError < ParseFastaError
|
29
33
|
end
|
data/lib/parse_fasta/record.rb
CHANGED
@@ -44,7 +44,7 @@ module ParseFasta
|
|
44
44
|
# @param desc [String] the description line of a fastQ record
|
45
45
|
# @param qual [String] the quality string of a fastQ record
|
46
46
|
#
|
47
|
-
# @raise [SequenceFormatError] if a fastA sequence has a '>'
|
47
|
+
# @raise [ParseFasta::Error::SequenceFormatError] if a fastA sequence has a '>'
|
48
48
|
# character in it
|
49
49
|
def initialize args = {}
|
50
50
|
@header = args.fetch :header
|
@@ -56,7 +56,7 @@ module ParseFasta
|
|
56
56
|
|
57
57
|
seq = args.fetch(:seq).gsub(/\s+/, "")
|
58
58
|
|
59
|
-
if
|
59
|
+
if fastq? # is fastQ
|
60
60
|
@seq = seq
|
61
61
|
else # is fastA
|
62
62
|
@seq = check_fasta_seq(seq)
|
@@ -73,6 +73,93 @@ module ParseFasta
|
|
73
73
|
self.desc == rec.desc && self.qual == rec.qual
|
74
74
|
end
|
75
75
|
|
76
|
+
# Return a fastA or fastQ record ready to print.
|
77
|
+
#
|
78
|
+
# If the Record is fastQ like then it returns a fastQ record
|
79
|
+
# string. If the record is fastA like, then it returns a fastA
|
80
|
+
# record string.
|
81
|
+
#
|
82
|
+
# @return [String] a printable sequence record
|
83
|
+
#
|
84
|
+
# @example When the record is fastA like
|
85
|
+
# rec = Record.new header: "Apple", seq: "ACTG"
|
86
|
+
# rec.to_s #=> ">Apple\nACTG"
|
87
|
+
#
|
88
|
+
# @example When the record is fastQ like
|
89
|
+
# rec = Record.new header: "Apple", seq: "ACTG", desc: "Hi", qual: "IIII"
|
90
|
+
# rec.to_s #=> "@Apple\nACTG\n+Hi\nIIII"
|
91
|
+
def to_s
|
92
|
+
if fastq?
|
93
|
+
to_fastq
|
94
|
+
else
|
95
|
+
to_fasta
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Returns a fastA record ready to print.
|
100
|
+
#
|
101
|
+
# If the record is fastQ like, the desc and qual are dropped.
|
102
|
+
#
|
103
|
+
# @return [String] a printable fastA sequence record
|
104
|
+
#
|
105
|
+
# @example When the record is fastA like
|
106
|
+
# rec = Record.new header: "Apple", seq: "ACTG"
|
107
|
+
# rec.to_fasta #=> ">Apple\nACTG"
|
108
|
+
#
|
109
|
+
# @example When the record is fastQ like
|
110
|
+
# rec = Record.new header: "Apple", seq: "ACTG", desc: "Hi", qual: "IIII"
|
111
|
+
# rec.to_fasta #=> ">Apple\nACTG"
|
112
|
+
def to_fasta
|
113
|
+
">#{header}\n#{seq}"
|
114
|
+
end
|
115
|
+
|
116
|
+
# Returns a fastA record ready to print.
|
117
|
+
#
|
118
|
+
# If the record is fastA like, the desc and qual can be specified.
|
119
|
+
#
|
120
|
+
# @return [String] a printable fastQ sequence record
|
121
|
+
#
|
122
|
+
# @example When the record is fastA like, no args
|
123
|
+
# rec = Record.new header: "Apple", seq: "ACTG"
|
124
|
+
# rec.to_fastq #=> "@Apple\nACTG\n+\nIIII"
|
125
|
+
#
|
126
|
+
# @example When the record is fastA like, desc and qual specified
|
127
|
+
# rec = Record.new header: "Apple", seq: "ACTG"
|
128
|
+
# rec.to_fastq decs: "Hi", qual: "A" #=> "@Apple\nACTG\n+Hi\nAAAA"
|
129
|
+
#
|
130
|
+
# @example When the record is fastA like, can specify fancy qual strings
|
131
|
+
# rec = Record.new header: "Apple", seq: "ACTGACTG"
|
132
|
+
# rec.to_fastq decs: "Hi", qual: "!a2" #=> "@Apple\nACTG\n+Hi\n!a2!a2!a"
|
133
|
+
#
|
134
|
+
# @example When the record is fastQ like
|
135
|
+
# rec = Record.new header: "Apple", seq: "ACTG", desc: "Hi", qual: "IIII"
|
136
|
+
# rec.to_fastq #=> ">Apple\nACTG"
|
137
|
+
#
|
138
|
+
# @raise [ParseFasta::Error::ArgumentError] if qual is ""
|
139
|
+
def to_fastq opts = {}
|
140
|
+
if fastq?
|
141
|
+
"@#{@header}\n#{@seq}\n+#{@desc}\n#{qual}"
|
142
|
+
else
|
143
|
+
qual = opts.fetch :qual, "I"
|
144
|
+
check_qual qual
|
145
|
+
|
146
|
+
desc = opts.fetch :desc, ""
|
147
|
+
|
148
|
+
qual_str = make_qual_str qual
|
149
|
+
|
150
|
+
"@#{@header}\n#{@seq}\n+#{desc}\n#{qual_str}"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# Returns true if record is a fastQ record.
|
155
|
+
#
|
156
|
+
# This method returns true if the fastq instance method is set.
|
157
|
+
#
|
158
|
+
# @return [Bool] true if record is fastQ, false if it is fastA
|
159
|
+
def fastq?
|
160
|
+
true if @qual
|
161
|
+
end
|
162
|
+
|
76
163
|
private
|
77
164
|
|
78
165
|
def check_fasta_seq seq
|
@@ -84,5 +171,16 @@ module ParseFasta
|
|
84
171
|
seq
|
85
172
|
end
|
86
173
|
end
|
174
|
+
|
175
|
+
def make_qual_str qual
|
176
|
+
(qual * (@seq.length / qual.length.to_f).ceil)[0, @seq.length]
|
177
|
+
end
|
178
|
+
|
179
|
+
def check_qual qual
|
180
|
+
if qual.length.zero?
|
181
|
+
raise ParseFasta::Error::ArgumentError,
|
182
|
+
":qual was '#{qual.inspect}', but it can't be empty"
|
183
|
+
end
|
184
|
+
end
|
87
185
|
end
|
88
186
|
end
|
data/lib/parse_fasta/version.rb
CHANGED
@@ -109,7 +109,120 @@ module ParseFasta
|
|
109
109
|
|
110
110
|
expect(rec1 == rec2).to eq false
|
111
111
|
end
|
112
|
+
end
|
113
|
+
|
114
|
+
describe "#fastq?" do
|
115
|
+
it "returns true if @qual is not nil" do
|
116
|
+
rec = Record.new header: "a", seq: "a", qual: "a"
|
117
|
+
|
118
|
+
expect(rec.fastq?).to eq true
|
119
|
+
end
|
120
|
+
|
121
|
+
it "returns nil if @qual is nil" do
|
122
|
+
rec = Record.new header: "a", seq: "a"
|
123
|
+
|
124
|
+
expect(rec.fastq?).to eq nil
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
describe "#to_s" do
|
129
|
+
context "when the record is fastA like" do
|
130
|
+
it "returns a string of the fastA record ready to print" do
|
131
|
+
rec = Record.new header: "apple", seq: "actg"
|
132
|
+
|
133
|
+
expect(rec.to_s).to eq ">apple\nactg"
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
context "when the record is fastQ like" do
|
138
|
+
it "returns a string of the fastQ record ready to print" do
|
139
|
+
rec = Record.new header: "apple", seq: "actg", desc: "", qual: "IIII"
|
140
|
+
|
141
|
+
expect(rec.to_s).to eq "@apple\nactg\n+\nIIII"
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
describe "#to_fasta" do
|
147
|
+
context "when the record is fastA like" do
|
148
|
+
it "returns a string of the fastA record ready to print" do
|
149
|
+
rec = Record.new header: "apple", seq: "actg"
|
150
|
+
|
151
|
+
expect(rec.to_fasta).to eq ">apple\nactg"
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
context "when the record is fastQ like" do
|
156
|
+
it "returns a string of the fastQ record in fastA format" do
|
157
|
+
rec = Record.new header: "apple", seq: "actg", desc: "", qual: "IIII"
|
158
|
+
|
159
|
+
expect(rec.to_fasta).to eq ">apple\nactg"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
112
163
|
|
164
|
+
describe "#to_fastq" do
|
165
|
+
context "when the record is fastA like" do
|
166
|
+
let(:rec) {Record.new header: "apple", seq: "actg"}
|
167
|
+
|
168
|
+
it "has a default quality string (I) and description" do
|
169
|
+
expect(rec.to_fastq).to eq "@apple\nactg\n+\nIIII"
|
170
|
+
end
|
171
|
+
|
172
|
+
context "specifying values for qual string" do
|
173
|
+
it "can specify the qual string value (single value)" do
|
174
|
+
expect(rec.to_fastq qual: "A").to eq "@apple\nactg\n+\nAAAA"
|
175
|
+
end
|
176
|
+
|
177
|
+
it "can specify complex qual string values (uneven division)" do
|
178
|
+
seq = "actgactgactgactg"
|
179
|
+
qual = "1@c1@c1@c1@c1@c1"
|
180
|
+
rec = Record.new header: "apple", seq: seq
|
181
|
+
|
182
|
+
expect(rec.to_fastq qual: "1@c").to eq "@apple\n#{seq}\n+\n#{qual}"
|
183
|
+
end
|
184
|
+
|
185
|
+
it "can specify complex qual string values (even division)" do
|
186
|
+
seq = "actgactgactgactg"
|
187
|
+
qual = "a1a1a1a1a1a1a1a1"
|
188
|
+
rec = Record.new header: "apple", seq: seq
|
189
|
+
|
190
|
+
expect(rec.to_fastq qual: "a1").to eq "@apple\n#{seq}\n+\n#{qual}"
|
191
|
+
end
|
192
|
+
|
193
|
+
it "can specify complex qual string values (full length)" do
|
194
|
+
seq = "actgactgactgactg"
|
195
|
+
qual = "a1a1a1a1a1a1a1a1"
|
196
|
+
rec = Record.new header: "apple", seq: seq
|
197
|
+
|
198
|
+
expect(rec.to_fastq qual: qual).to eq "@apple\n#{seq}\n+\n#{qual}"
|
199
|
+
end
|
200
|
+
|
201
|
+
it "raises ParseFasta::Error::ArgumentError when :qual is empty" do
|
202
|
+
seq = "actgactgactgactg"
|
203
|
+
qual = "IIIIIIIIIIIIIIII"
|
204
|
+
rec = Record.new header: "apple", seq: seq
|
205
|
+
|
206
|
+
expect{rec.to_fastq qual: ""}.to raise_error ParseFasta::Error::ArgumentError
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
it "can specify the description" do
|
211
|
+
expect(rec.to_fastq desc: "pie").to eq "@apple\nactg\n+pie\nIIII"
|
212
|
+
end
|
213
|
+
|
214
|
+
it "can specify the both" do
|
215
|
+
expect(rec.to_fastq qual: "A", desc: "pie").to eq "@apple\nactg\n+pie\nAAAA"
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
context "when the record is fastQ like" do
|
220
|
+
it "returns a string of the fastQ format" do
|
221
|
+
rec = Record.new header: "apple", seq: "actg", desc: "", qual: "IIII"
|
222
|
+
|
223
|
+
expect(rec.to_fastq).to eq "@apple\nactg\n+\nIIII"
|
224
|
+
end
|
225
|
+
end
|
113
226
|
end
|
114
227
|
end
|
115
228
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parse_fasta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Moore
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-10-
|
11
|
+
date: 2016-10-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|