parse_fasta 1.7.2 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/README.md +8 -1
- data/lib/parse_fasta/sequence.rb +35 -0
- data/lib/parse_fasta/version.rb +1 -1
- data/spec/lib/sequence_spec.rb +49 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MmIxNzgzM2E2MzkzYTc1ZjM2MWE0NzIxN2ZmZmRkMGUxMDQ0Y2MzYw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
Njc4ZTVlN2EwMTVjZDkwNWZmYTM2MzcwY2FkY2NlNjVmNmJiNWFmYw==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZjNiOTc2NTM0OTc0M2U4NDNkYzhlNTBlNjM3MzI4Mjc4YWY4YmE4MjRhMDdj
|
10
|
+
MGRiNWNiMmExZTZjYmRhZDk0YzcxMTQ1OGQ0NzdkYTVmMmQ0Mzg1YmU0NmFl
|
11
|
+
NmNjYWM1MmY4ZWRhNTY5MDAzNTk1YzcyN2IzYWE2MTNkMzAwNDg=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
Mjk5ZjU3YWI3YTJlN2Q4NWJjMWY3NDczOTBhNzI3NzlkMDViZjRhZGFkN2I4
|
14
|
+
NmU5YzFhYTI4ZDc2N2RhYTE5ODdkODE5NTQ5ZjJmNzNmNjEyNzY3NzJiZTk3
|
15
|
+
ODA2MWI3YjUzZDdmMTE5MGM1MDA3ZTk1NmMyNGU3NjFmOTIyMWY=
|
data/README.md
CHANGED
@@ -27,7 +27,7 @@ lightweight than BioRuby. And more fun! ;)
|
|
27
27
|
## Documentation ##
|
28
28
|
|
29
29
|
Checkout
|
30
|
-
[parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.
|
30
|
+
[parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.8.0/frames)
|
31
31
|
for the full api documentation.
|
32
32
|
|
33
33
|
## Usage ##
|
@@ -66,6 +66,13 @@ Read fasta file into a hash.
|
|
66
66
|
|
67
67
|
## Versions ##
|
68
68
|
|
69
|
+
### 1.8 ###
|
70
|
+
|
71
|
+
Add `Sequence#rev_comp`. It can handle IUPAC characters. Since
|
72
|
+
`parse_fasta` doesn't check whether the seq is AA or NA, if called on
|
73
|
+
an amino acid string, things will get weird as it will complement the
|
74
|
+
IUPAC characters in the AA string and leave others.
|
75
|
+
|
69
76
|
### 1.7 ###
|
70
77
|
|
71
78
|
Add `SeqFile#to_hash`, `FastaFile#to_hash` and `FastqFile#to_hash`.
|
data/lib/parse_fasta/sequence.rb
CHANGED
@@ -20,6 +20,15 @@
|
|
20
20
|
# nucleotide sequences.
|
21
21
|
class Sequence < String
|
22
22
|
|
23
|
+
# # Error raised if both T and U are present
|
24
|
+
# #
|
25
|
+
# # @note This is NOT checked on every call to Sequence.new
|
26
|
+
# class AmbiguousSequenceError < StandardError
|
27
|
+
# def message
|
28
|
+
# "Sequence is ambiguous -- both T and U present"
|
29
|
+
# end
|
30
|
+
# end
|
31
|
+
|
23
32
|
# Strips whitespace from the str argument before calling super
|
24
33
|
#
|
25
34
|
# @return [Sequence] A Sequence string
|
@@ -130,4 +139,30 @@ class Sequence < String
|
|
130
139
|
base_counts.map { |base, count| [base, count/total_bases] }.flatten
|
131
140
|
Hash[*base_freqs]
|
132
141
|
end
|
142
|
+
|
143
|
+
# Returns a reverse complement of self
|
144
|
+
#
|
145
|
+
# @return [Sequence] a Sequence that is the reverse complement of
|
146
|
+
# self
|
147
|
+
#
|
148
|
+
# @example Hanldes any IUPAC character and capitalization properly
|
149
|
+
# Sequence.new("gARKbdctymvhu").rev_comp #=> "adbkraghvMYTc"
|
150
|
+
#
|
151
|
+
# @example Leaves non IUPAC characters
|
152
|
+
# Sequence.new("cccc--CCCcccga").rev_comp #=> "tcgggGGG--gggg""
|
153
|
+
#
|
154
|
+
# @note If Sequence contains non-IUPAC characters, these are not
|
155
|
+
# complemented
|
156
|
+
def rev_comp
|
157
|
+
# if self.match(/T/i) && self.match(/U/i)
|
158
|
+
# raise Sequence::AmbiguousSequenceError
|
159
|
+
# end
|
160
|
+
|
161
|
+
# if self.match(/[^ATUGCYRSWKMBDHVN]/i)
|
162
|
+
# warn "WARNING: Sequence contains non IUPAC characters"
|
163
|
+
# end
|
164
|
+
|
165
|
+
self.reverse.tr("ATUGCYRSWKMBDHVNatugcyrswkmbdhvn",
|
166
|
+
"TAACGRYSWMKVHDBNtaacgryswmkvhdbn")
|
167
|
+
end
|
133
168
|
end
|
data/lib/parse_fasta/version.rb
CHANGED
data/spec/lib/sequence_spec.rb
CHANGED
@@ -21,6 +21,10 @@ require 'bio'
|
|
21
21
|
|
22
22
|
describe Sequence do
|
23
23
|
|
24
|
+
# it "has AmbiguousSequenceError" do
|
25
|
+
# expect(Sequence::AmbiguousSequenceError).not_to be nil
|
26
|
+
# end
|
27
|
+
|
24
28
|
it "inherits from String" do
|
25
29
|
expect(Sequence.new('ACTG')).to be_a String
|
26
30
|
end
|
@@ -128,4 +132,49 @@ describe Sequence do
|
|
128
132
|
end
|
129
133
|
end
|
130
134
|
end
|
135
|
+
|
136
|
+
describe "#rev_comp" do
|
137
|
+
# it "raises error if both T and U are present" do
|
138
|
+
# s = Sequence.new("actGU")
|
139
|
+
# err = Sequence::AmbiguousSequenceError
|
140
|
+
# msg = "Sequence is ambiguous -- both T and U present"
|
141
|
+
# expect { s.rev_comp }.to raise_error(err, msg)
|
142
|
+
# end
|
143
|
+
|
144
|
+
# it "warns if non iupac characters are present" do
|
145
|
+
# s = Sequence.new("--..9284ldkjfalsjf")
|
146
|
+
# msg = "WARNING: Sequence contains non IUPAC characters"
|
147
|
+
# expect(s).to receive(:warn).with(msg)
|
148
|
+
# s.rev_comp
|
149
|
+
# end
|
150
|
+
it "returns a reverse complement of the Sequence" do
|
151
|
+
s = Sequence.new("gARKbdctymvhu").rev_comp
|
152
|
+
expect(s).to eq "adbkraghvMYTc"
|
153
|
+
|
154
|
+
s = Sequence.new("ctyMVhgarKBda").rev_comp
|
155
|
+
expect(s).to eq "thVMytcdBKrag"
|
156
|
+
end
|
157
|
+
|
158
|
+
it "leaves non-IUPAC characters alone" do
|
159
|
+
s = Sequence.new("cccc--CCCcccga").rev_comp
|
160
|
+
expect(s).to eq "tcgggGGG--gggg"
|
161
|
+
end
|
162
|
+
|
163
|
+
it "returns a Sequence" do
|
164
|
+
s = Sequence.new("cccc--CCCcccga")
|
165
|
+
expect(s.rev_comp).to be_an_instance_of(Sequence)
|
166
|
+
end
|
167
|
+
|
168
|
+
it "gives back original sequence when called in succession" do
|
169
|
+
s = Sequence.new("cccc--CCCcccga")
|
170
|
+
expect(s.rev_comp.rev_comp).to eq s
|
171
|
+
end
|
172
|
+
|
173
|
+
context "with an empty sequence" do
|
174
|
+
it "returns an empty sequence" do
|
175
|
+
s = Sequence.new("")
|
176
|
+
expect(s.rev_comp).to be_empty
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
131
180
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parse_fasta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Moore
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-11-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|