parse_fasta 1.7.2 → 1.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/README.md +8 -1
- data/lib/parse_fasta/sequence.rb +35 -0
- data/lib/parse_fasta/version.rb +1 -1
- data/spec/lib/sequence_spec.rb +49 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MmIxNzgzM2E2MzkzYTc1ZjM2MWE0NzIxN2ZmZmRkMGUxMDQ0Y2MzYw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
Njc4ZTVlN2EwMTVjZDkwNWZmYTM2MzcwY2FkY2NlNjVmNmJiNWFmYw==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZjNiOTc2NTM0OTc0M2U4NDNkYzhlNTBlNjM3MzI4Mjc4YWY4YmE4MjRhMDdj
|
10
|
+
MGRiNWNiMmExZTZjYmRhZDk0YzcxMTQ1OGQ0NzdkYTVmMmQ0Mzg1YmU0NmFl
|
11
|
+
NmNjYWM1MmY4ZWRhNTY5MDAzNTk1YzcyN2IzYWE2MTNkMzAwNDg=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
Mjk5ZjU3YWI3YTJlN2Q4NWJjMWY3NDczOTBhNzI3NzlkMDViZjRhZGFkN2I4
|
14
|
+
NmU5YzFhYTI4ZDc2N2RhYTE5ODdkODE5NTQ5ZjJmNzNmNjEyNzY3NzJiZTk3
|
15
|
+
ODA2MWI3YjUzZDdmMTE5MGM1MDA3ZTk1NmMyNGU3NjFmOTIyMWY=
|
data/README.md
CHANGED
@@ -27,7 +27,7 @@ lightweight than BioRuby. And more fun! ;)
|
|
27
27
|
## Documentation ##
|
28
28
|
|
29
29
|
Checkout
|
30
|
-
[parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.
|
30
|
+
[parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.8.0/frames)
|
31
31
|
for the full api documentation.
|
32
32
|
|
33
33
|
## Usage ##
|
@@ -66,6 +66,13 @@ Read fasta file into a hash.
|
|
66
66
|
|
67
67
|
## Versions ##
|
68
68
|
|
69
|
+
### 1.8 ###
|
70
|
+
|
71
|
+
Add `Sequence#rev_comp`. It can handle IUPAC characters. Since
|
72
|
+
`parse_fasta` doesn't check whether the seq is AA or NA, if called on
|
73
|
+
an amino acid string, things will get weird as it will complement the
|
74
|
+
IUPAC characters in the AA string and leave others.
|
75
|
+
|
69
76
|
### 1.7 ###
|
70
77
|
|
71
78
|
Add `SeqFile#to_hash`, `FastaFile#to_hash` and `FastqFile#to_hash`.
|
data/lib/parse_fasta/sequence.rb
CHANGED
@@ -20,6 +20,15 @@
|
|
20
20
|
# nucleotide sequences.
|
21
21
|
class Sequence < String
|
22
22
|
|
23
|
+
# # Error raised if both T and U are present
|
24
|
+
# #
|
25
|
+
# # @note This is NOT checked on every call to Sequence.new
|
26
|
+
# class AmbiguousSequenceError < StandardError
|
27
|
+
# def message
|
28
|
+
# "Sequence is ambiguous -- both T and U present"
|
29
|
+
# end
|
30
|
+
# end
|
31
|
+
|
23
32
|
# Strips whitespace from the str argument before calling super
|
24
33
|
#
|
25
34
|
# @return [Sequence] A Sequence string
|
@@ -130,4 +139,30 @@ class Sequence < String
|
|
130
139
|
base_counts.map { |base, count| [base, count/total_bases] }.flatten
|
131
140
|
Hash[*base_freqs]
|
132
141
|
end
|
142
|
+
|
143
|
+
# Returns a reverse complement of self
|
144
|
+
#
|
145
|
+
# @return [Sequence] a Sequence that is the reverse complement of
|
146
|
+
# self
|
147
|
+
#
|
148
|
+
# @example Hanldes any IUPAC character and capitalization properly
|
149
|
+
# Sequence.new("gARKbdctymvhu").rev_comp #=> "adbkraghvMYTc"
|
150
|
+
#
|
151
|
+
# @example Leaves non IUPAC characters
|
152
|
+
# Sequence.new("cccc--CCCcccga").rev_comp #=> "tcgggGGG--gggg""
|
153
|
+
#
|
154
|
+
# @note If Sequence contains non-IUPAC characters, these are not
|
155
|
+
# complemented
|
156
|
+
def rev_comp
|
157
|
+
# if self.match(/T/i) && self.match(/U/i)
|
158
|
+
# raise Sequence::AmbiguousSequenceError
|
159
|
+
# end
|
160
|
+
|
161
|
+
# if self.match(/[^ATUGCYRSWKMBDHVN]/i)
|
162
|
+
# warn "WARNING: Sequence contains non IUPAC characters"
|
163
|
+
# end
|
164
|
+
|
165
|
+
self.reverse.tr("ATUGCYRSWKMBDHVNatugcyrswkmbdhvn",
|
166
|
+
"TAACGRYSWMKVHDBNtaacgryswmkvhdbn")
|
167
|
+
end
|
133
168
|
end
|
data/lib/parse_fasta/version.rb
CHANGED
data/spec/lib/sequence_spec.rb
CHANGED
@@ -21,6 +21,10 @@ require 'bio'
|
|
21
21
|
|
22
22
|
describe Sequence do
|
23
23
|
|
24
|
+
# it "has AmbiguousSequenceError" do
|
25
|
+
# expect(Sequence::AmbiguousSequenceError).not_to be nil
|
26
|
+
# end
|
27
|
+
|
24
28
|
it "inherits from String" do
|
25
29
|
expect(Sequence.new('ACTG')).to be_a String
|
26
30
|
end
|
@@ -128,4 +132,49 @@ describe Sequence do
|
|
128
132
|
end
|
129
133
|
end
|
130
134
|
end
|
135
|
+
|
136
|
+
describe "#rev_comp" do
|
137
|
+
# it "raises error if both T and U are present" do
|
138
|
+
# s = Sequence.new("actGU")
|
139
|
+
# err = Sequence::AmbiguousSequenceError
|
140
|
+
# msg = "Sequence is ambiguous -- both T and U present"
|
141
|
+
# expect { s.rev_comp }.to raise_error(err, msg)
|
142
|
+
# end
|
143
|
+
|
144
|
+
# it "warns if non iupac characters are present" do
|
145
|
+
# s = Sequence.new("--..9284ldkjfalsjf")
|
146
|
+
# msg = "WARNING: Sequence contains non IUPAC characters"
|
147
|
+
# expect(s).to receive(:warn).with(msg)
|
148
|
+
# s.rev_comp
|
149
|
+
# end
|
150
|
+
it "returns a reverse complement of the Sequence" do
|
151
|
+
s = Sequence.new("gARKbdctymvhu").rev_comp
|
152
|
+
expect(s).to eq "adbkraghvMYTc"
|
153
|
+
|
154
|
+
s = Sequence.new("ctyMVhgarKBda").rev_comp
|
155
|
+
expect(s).to eq "thVMytcdBKrag"
|
156
|
+
end
|
157
|
+
|
158
|
+
it "leaves non-IUPAC characters alone" do
|
159
|
+
s = Sequence.new("cccc--CCCcccga").rev_comp
|
160
|
+
expect(s).to eq "tcgggGGG--gggg"
|
161
|
+
end
|
162
|
+
|
163
|
+
it "returns a Sequence" do
|
164
|
+
s = Sequence.new("cccc--CCCcccga")
|
165
|
+
expect(s.rev_comp).to be_an_instance_of(Sequence)
|
166
|
+
end
|
167
|
+
|
168
|
+
it "gives back original sequence when called in succession" do
|
169
|
+
s = Sequence.new("cccc--CCCcccga")
|
170
|
+
expect(s.rev_comp.rev_comp).to eq s
|
171
|
+
end
|
172
|
+
|
173
|
+
context "with an empty sequence" do
|
174
|
+
it "returns an empty sequence" do
|
175
|
+
s = Sequence.new("")
|
176
|
+
expect(s.rev_comp).to be_empty
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
131
180
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parse_fasta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Moore
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-11-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|