parse_fasta 1.7.2 → 1.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- NzY3MWVlYjVmYzM4NzhjZjIzNDUyYjFlNWZhMjMxMGE4NWQ3YTI4Yw==
4
+ MmIxNzgzM2E2MzkzYTc1ZjM2MWE0NzIxN2ZmZmRkMGUxMDQ0Y2MzYw==
5
5
  data.tar.gz: !binary |-
6
- OTVjMThlYzg0NzUwZTdmMzIyZmJlYjc5NjBiNDUzMmE2NjJjMGVkOA==
6
+ Njc4ZTVlN2EwMTVjZDkwNWZmYTM2MzcwY2FkY2NlNjVmNmJiNWFmYw==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- ZTVkOTQ4MGMzMTQ4ZDg5MzczMDllMzgwNDY3Y2NmNzU4NDc5MzMwOGIwYzA3
10
- Y2FjMzQ1YzA4NDhiMjY0OWMyYjMyMWFhOWM5MTI5OGFiMTJiMDFhYmJlNTI3
11
- NjBkZDU5MzMzOTJlOGY0NzdiZWRiYWI5ZWE2YTBiZjJkYTk1ZTU=
9
+ ZjNiOTc2NTM0OTc0M2U4NDNkYzhlNTBlNjM3MzI4Mjc4YWY4YmE4MjRhMDdj
10
+ MGRiNWNiMmExZTZjYmRhZDk0YzcxMTQ1OGQ0NzdkYTVmMmQ0Mzg1YmU0NmFl
11
+ NmNjYWM1MmY4ZWRhNTY5MDAzNTk1YzcyN2IzYWE2MTNkMzAwNDg=
12
12
  data.tar.gz: !binary |-
13
- MWJkZjY5ZWY5MmYyNTJjMjIyODE0ZmQ4YWU4OTZlMTY5MzIyNGNlZGJkNjU2
14
- YjBjNmI5YTIyYTdmMGJhZmVlNTExZWJkN2U4OWRlMDE0NzM3MGI4ZGE4M2Rj
15
- MzNjODU3YzYzMDFmZDg1MjU1YzQ2MDgxODIwNmQ3ZDJkZDU3MWQ=
13
+ Mjk5ZjU3YWI3YTJlN2Q4NWJjMWY3NDczOTBhNzI3NzlkMDViZjRhZGFkN2I4
14
+ NmU5YzFhYTI4ZDc2N2RhYTE5ODdkODE5NTQ5ZjJmNzNmNjEyNzY3NzJiZTk3
15
+ ODA2MWI3YjUzZDdmMTE5MGM1MDA3ZTk1NmMyNGU3NjFmOTIyMWY=
data/README.md CHANGED
@@ -27,7 +27,7 @@ lightweight than BioRuby. And more fun! ;)
27
27
  ## Documentation ##
28
28
 
29
29
  Checkout
30
- [parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.7.2/frames)
30
+ [parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.8.0/frames)
31
31
  for the full api documentation.
32
32
 
33
33
  ## Usage ##
@@ -66,6 +66,13 @@ Read fasta file into a hash.
66
66
 
67
67
  ## Versions ##
68
68
 
69
+ ### 1.8 ###
70
+
71
+ Add `Sequence#rev_comp`. It can handle IUPAC characters. Since
72
+ `parse_fasta` doesn't check whether the seq is AA or NA, if called on
73
+ an amino acid string, things will get weird as it will complement the
74
+ IUPAC characters in the AA string and leave others.
75
+
69
76
  ### 1.7 ###
70
77
 
71
78
  Add `SeqFile#to_hash`, `FastaFile#to_hash` and `FastqFile#to_hash`.
@@ -20,6 +20,15 @@
20
20
  # nucleotide sequences.
21
21
  class Sequence < String
22
22
 
23
+ # # Error raised if both T and U are present
24
+ # #
25
+ # # @note This is NOT checked on every call to Sequence.new
26
+ # class AmbiguousSequenceError < StandardError
27
+ # def message
28
+ # "Sequence is ambiguous -- both T and U present"
29
+ # end
30
+ # end
31
+
23
32
  # Strips whitespace from the str argument before calling super
24
33
  #
25
34
  # @return [Sequence] A Sequence string
@@ -130,4 +139,30 @@ class Sequence < String
130
139
  base_counts.map { |base, count| [base, count/total_bases] }.flatten
131
140
  Hash[*base_freqs]
132
141
  end
142
+
143
+ # Returns a reverse complement of self
144
+ #
145
+ # @return [Sequence] a Sequence that is the reverse complement of
146
+ # self
147
+ #
148
+ # @example Hanldes any IUPAC character and capitalization properly
149
+ # Sequence.new("gARKbdctymvhu").rev_comp #=> "adbkraghvMYTc"
150
+ #
151
+ # @example Leaves non IUPAC characters
152
+ # Sequence.new("cccc--CCCcccga").rev_comp #=> "tcgggGGG--gggg""
153
+ #
154
+ # @note If Sequence contains non-IUPAC characters, these are not
155
+ # complemented
156
+ def rev_comp
157
+ # if self.match(/T/i) && self.match(/U/i)
158
+ # raise Sequence::AmbiguousSequenceError
159
+ # end
160
+
161
+ # if self.match(/[^ATUGCYRSWKMBDHVN]/i)
162
+ # warn "WARNING: Sequence contains non IUPAC characters"
163
+ # end
164
+
165
+ self.reverse.tr("ATUGCYRSWKMBDHVNatugcyrswkmbdhvn",
166
+ "TAACGRYSWMKVHDBNtaacgryswmkvhdbn")
167
+ end
133
168
  end
@@ -17,5 +17,5 @@
17
17
  # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
18
 
19
19
  module ParseFasta
20
- VERSION = "1.7.2"
20
+ VERSION = "1.8.0"
21
21
  end
@@ -21,6 +21,10 @@ require 'bio'
21
21
 
22
22
  describe Sequence do
23
23
 
24
+ # it "has AmbiguousSequenceError" do
25
+ # expect(Sequence::AmbiguousSequenceError).not_to be nil
26
+ # end
27
+
24
28
  it "inherits from String" do
25
29
  expect(Sequence.new('ACTG')).to be_a String
26
30
  end
@@ -128,4 +132,49 @@ describe Sequence do
128
132
  end
129
133
  end
130
134
  end
135
+
136
+ describe "#rev_comp" do
137
+ # it "raises error if both T and U are present" do
138
+ # s = Sequence.new("actGU")
139
+ # err = Sequence::AmbiguousSequenceError
140
+ # msg = "Sequence is ambiguous -- both T and U present"
141
+ # expect { s.rev_comp }.to raise_error(err, msg)
142
+ # end
143
+
144
+ # it "warns if non iupac characters are present" do
145
+ # s = Sequence.new("--..9284ldkjfalsjf")
146
+ # msg = "WARNING: Sequence contains non IUPAC characters"
147
+ # expect(s).to receive(:warn).with(msg)
148
+ # s.rev_comp
149
+ # end
150
+ it "returns a reverse complement of the Sequence" do
151
+ s = Sequence.new("gARKbdctymvhu").rev_comp
152
+ expect(s).to eq "adbkraghvMYTc"
153
+
154
+ s = Sequence.new("ctyMVhgarKBda").rev_comp
155
+ expect(s).to eq "thVMytcdBKrag"
156
+ end
157
+
158
+ it "leaves non-IUPAC characters alone" do
159
+ s = Sequence.new("cccc--CCCcccga").rev_comp
160
+ expect(s).to eq "tcgggGGG--gggg"
161
+ end
162
+
163
+ it "returns a Sequence" do
164
+ s = Sequence.new("cccc--CCCcccga")
165
+ expect(s.rev_comp).to be_an_instance_of(Sequence)
166
+ end
167
+
168
+ it "gives back original sequence when called in succession" do
169
+ s = Sequence.new("cccc--CCCcccga")
170
+ expect(s.rev_comp.rev_comp).to eq s
171
+ end
172
+
173
+ context "with an empty sequence" do
174
+ it "returns an empty sequence" do
175
+ s = Sequence.new("")
176
+ expect(s.rev_comp).to be_empty
177
+ end
178
+ end
179
+ end
131
180
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_fasta
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.2
4
+ version: 1.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-19 00:00:00.000000000 Z
11
+ date: 2015-11-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler