parse_fasta 1.9.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +8 -8
  2. data/.gitignore +1 -0
  3. data/.rspec +2 -0
  4. data/CHANGELOG.md +178 -0
  5. data/README.md +42 -215
  6. data/Rakefile +2 -4
  7. data/bin/console +14 -0
  8. data/bin/setup +8 -0
  9. data/lib/parse_fasta/error.rb +39 -0
  10. data/lib/parse_fasta/record.rb +88 -0
  11. data/lib/parse_fasta/seq_file.rb +221 -114
  12. data/lib/parse_fasta/version.rb +2 -2
  13. data/lib/parse_fasta.rb +5 -20
  14. data/spec/parse_fasta/record_spec.rb +115 -0
  15. data/spec/parse_fasta/seq_file_spec.rb +238 -0
  16. data/spec/parse_fasta_spec.rb +25 -0
  17. data/spec/spec_helper.rb +2 -44
  18. data/spec/test_files/cr.fa +1 -0
  19. data/spec/test_files/cr.fa.gz +0 -0
  20. data/spec/test_files/cr.fq +3 -0
  21. data/spec/test_files/cr.fq.gz +0 -0
  22. data/spec/test_files/cr_nl.fa +4 -0
  23. data/spec/test_files/cr_nl.fa.gz +0 -0
  24. data/spec/test_files/cr_nl.fq +8 -0
  25. data/spec/test_files/cr_nl.fq.gz +0 -0
  26. data/spec/test_files/multi_blob.fa.gz +0 -0
  27. data/spec/test_files/multi_blob.fq.gz +0 -0
  28. data/spec/test_files/not_a_seq_file.txt +1 -0
  29. data/{test_files/bad.fa → spec/test_files/poorly_catted.fa} +0 -0
  30. data/{test_files/test.fa → spec/test_files/seqs.fa} +0 -0
  31. data/spec/test_files/seqs.fa.gz +0 -0
  32. data/spec/test_files/seqs.fq +8 -0
  33. data/spec/test_files/seqs.fq.gz +0 -0
  34. metadata +49 -24
  35. data/lib/parse_fasta/fasta_file.rb +0 -232
  36. data/lib/parse_fasta/fastq_file.rb +0 -160
  37. data/lib/parse_fasta/quality.rb +0 -54
  38. data/lib/parse_fasta/sequence.rb +0 -174
  39. data/spec/lib/fasta_file_spec.rb +0 -212
  40. data/spec/lib/fastq_file_spec.rb +0 -143
  41. data/spec/lib/quality_spec.rb +0 -51
  42. data/spec/lib/seq_file_spec.rb +0 -357
  43. data/spec/lib/sequence_spec.rb +0 -188
  44. data/test_files/benchmark.rb +0 -99
  45. data/test_files/bogus.txt +0 -2
  46. data/test_files/test.fa.gz +0 -0
  47. data/test_files/test.fq +0 -8
  48. data/test_files/test.fq.gz +0 -0
@@ -1,232 +0,0 @@
1
- # Copyright 2014, 2015 Ryan Moore
2
- # Contact: moorer@udel.edu
3
- #
4
- # This file is part of parse_fasta.
5
- #
6
- # parse_fasta is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # parse_fasta is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
-
19
- require 'zlib'
20
-
21
- # Provides simple interface for parsing fasta format files. Gzipped
22
- # files are no problem.
23
- class FastaFile < File
24
-
25
- # Use it like IO::open
26
- #
27
- # @param fname [String] the name of the file to open
28
- #
29
- # @return [FastaFile] a FastaFile
30
- def self.open(fname, *args)
31
- begin
32
- handle = Zlib::GzipReader.open(fname)
33
- rescue Zlib::GzipFile::Error => e
34
- handle = File.open(fname)
35
- end
36
-
37
- unless handle.each_char.peek[0] == '>'
38
- raise ParseFasta::DataFormatError
39
- end
40
-
41
- handle.close
42
-
43
- super
44
- end
45
-
46
- # Returns the records in the fasta file as a hash map with the
47
- # headers as keys and the Sequences as values.
48
- #
49
- # @example Read a fastA into a hash table.
50
- # seqs = FastaFile.open('reads.fa').to_hash
51
- #
52
- # @return [Hash] A hash with headers as keys, sequences as the
53
- # values (Sequence objects)
54
- #
55
- # @raise [ParseFasta::SequenceFormatError] if sequence has a '>'
56
- def to_hash
57
- hash = {}
58
- self.each_record do |head, seq|
59
- hash[head] = seq
60
- end
61
-
62
- hash
63
- end
64
-
65
- # Analagous to IO#each_line, #each_record is used to go through a
66
- # fasta file record by record. It will accept gzipped files as well.
67
- #
68
- # @param separate_lines [Object] If truthy, separate lines of record
69
- # into an array of Sequences, but if falsy, yield a Sequence
70
- # object for the sequence instead.
71
- #
72
- # @example Parsing a fasta file (default behavior, gzip files are fine)
73
- # FastaFile.open('reads.fna.gz').each_record do |header, sequence|
74
- # puts [header, sequence.gc].join("\t")
75
- # end
76
- #
77
- # @example Parsing a fasta file (with truthy value param)
78
- # FastaFile.open('reads.fna').each_record(1) do |header, sequence|
79
- # # header => 'sequence_1'
80
- # # sequence => ['AACTG', 'AGTCGT', ... ]
81
- # end
82
- #
83
- # @yield The header and sequence for each record in the fasta
84
- # file to the block
85
- #
86
- # @yieldparam header [String] The header of the fasta record without
87
- # the leading '>'
88
- #
89
- # @yieldparam sequence [Sequence, Array<Sequence>] The sequence of the
90
- # fasta record. If `separate_lines` is falsy (the default
91
- # behavior), will be Sequence, but if truthy will be
92
- # Array<String>.
93
- #
94
- # @raise [ParseFasta::SequenceFormatError] if sequence has a '>'
95
- def each_record(separate_lines=nil)
96
- begin
97
- f = Zlib::GzipReader.open(self)
98
- rescue Zlib::GzipFile::Error => e
99
- f = self
100
- end
101
-
102
- if separate_lines
103
- f.each("\n>") do |line|
104
- header, sequence = parse_line_separately(line)
105
- yield(header.strip, sequence)
106
- end
107
-
108
- # f.each_with_index(">") do |line, idx|
109
- # if idx.zero?
110
- # if line != ">"
111
- # raise ParseFasta::DataFormatError
112
- # end
113
- # else
114
- # header, sequence = parse_line_separately(line)
115
- # yield(header.strip, sequence)
116
- # end
117
- # end
118
- else
119
- header = ""
120
- sequence = ""
121
- f.each_line do |line|
122
- line.chomp!
123
- len = line.length
124
- if header.empty? && line.start_with?(">")
125
- header = line[1, len]
126
- elsif line.start_with?(">")
127
- yield(header.strip, Sequence.new(sequence || ""))
128
- header = line[1, len]
129
- sequence = ""
130
- else
131
- raise ParseFasta::SequenceFormatError if sequence.include? ">"
132
- sequence << line
133
- end
134
- end
135
- yield(header, Sequence.new(sequence || ""))
136
-
137
- # f.each("\n>") do |line|
138
- # header, sequence = parse_line(line)
139
- # yield(header.strip, Sequence.new(sequence || ""))
140
- # end
141
-
142
- # f.each_with_index(sep=/^>/) do |line, idx|
143
- # if idx.zero?
144
- # if line != ">"
145
- # raise ParseFasta::DataFormatError
146
- # end
147
- # else
148
- # header, sequence = parse_line(line)
149
- # yield(header.strip, Sequence.new(sequence || ""))
150
- # end
151
- # end
152
- end
153
-
154
- f.close if f.instance_of?(Zlib::GzipReader)
155
- return f
156
- end
157
-
158
- # Fast version of #each_record
159
- #
160
- # Yields the sequence as a String, not Sequence. No separate lines
161
- # option.
162
- #
163
- # @note If the fastA file has spaces in the sequence, they will be
164
- # retained. If this is a problem, use #each_record instead.
165
- #
166
- # @yield The header and sequence for each record in the fasta
167
- # file to the block
168
- #
169
- # @yieldparam header [String] The header of the fasta record without
170
- # the leading '>'
171
- #
172
- # @yieldparam sequence [String] The sequence of the fasta record
173
- #
174
- # @raise [ParseFasta::SequenceFormatError] if sequence has a '>'
175
- def each_record_fast
176
- begin
177
- f = Zlib::GzipReader.open(self)
178
- rescue Zlib::GzipFile::Error => e
179
- f = self
180
- end
181
-
182
- header = ""
183
- sequence = ""
184
- f.each_line do |line|
185
- line.chomp!
186
- len = line.length
187
- if header.empty? && line.start_with?(">")
188
- header = line[1, len]
189
- elsif line.start_with?(">")
190
- yield(header.strip, sequence)
191
- header = line[1, len]
192
- sequence = ""
193
- else
194
- raise ParseFasta::SequenceFormatError if sequence.include? ">"
195
- sequence << line
196
- end
197
- end
198
- yield(header, sequence)
199
-
200
- # f.each("\n>") do |line|
201
- # header, sequence = parse_line(line)
202
-
203
- # raise ParseFasta::SequenceFormatError if sequence.include? ">"
204
-
205
- # yield(header.strip, sequence)
206
- # end
207
-
208
- f.close if f.instance_of?(Zlib::GzipReader)
209
- return f
210
- end
211
-
212
- private
213
-
214
- def parse_line(line)
215
- line.split("\n", 2).map { |s| s.gsub(/\n|^>|>$/, '') }
216
- end
217
-
218
- def parse_line_separately(line)
219
- header, sequence =
220
- line.split("\n", 2).map { |s| s.gsub(/^>|>$/, '') }
221
-
222
- if sequence.nil?
223
- sequences = []
224
- else
225
- sequences = sequence.split("\n")
226
- .reject { |s| s.empty? }
227
- .map { |s| Sequence.new(s) }
228
- end
229
-
230
- [header, sequences]
231
- end
232
- end
@@ -1,160 +0,0 @@
1
- # Copyright 2014, 2015 Ryan Moore
2
- # Contact: moorer@udel.edu
3
- #
4
- # This file is part of parse_fasta.
5
- #
6
- # parse_fasta is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # parse_fasta is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
-
19
- require 'zlib'
20
-
21
- # Provides simple interface for parsing four-line-per-record fastq
22
- # format files. Gzipped files are no problem.
23
- class FastqFile < File
24
-
25
- # Returns the records in the fastq file as a hash map with the
26
- # headers as keys pointing to a hash map like so
27
- # { "seq1" => { head: "seq1", seq: "ACTG", desc: "", qual: "II3*"} }
28
- #
29
- # @example Read a fastQ into a hash table.
30
- # seqs = FastqFile.open('reads.fq.gz').to_hash
31
- #
32
- # @return [Hash] A hash with headers as keys, and a hash map as the
33
- # value with keys :head, :seq, :desc, :qual, for header, sequence,
34
- # description, and quality.
35
- def to_hash
36
- hash = {}
37
- self.each_record do |head, seq, desc, qual|
38
- hash[head] = { head: head, seq: seq, desc: desc, qual: qual }
39
- end
40
-
41
- hash
42
- end
43
-
44
- # Analagous to IO#each_line, #each_record is used to go through a
45
- # fastq file record by record. It will accept gzipped files as well.
46
- #
47
- # @example Parsing a fastq file
48
- # FastqFile.open('reads.fq').each_record do |head, seq, desc, qual|
49
- # # do some fun stuff here!
50
- # end
51
- # @example Use the same syntax for gzipped files!
52
- # FastqFile.open('reads.fq.gz').each_record do |head, seq, desc, qual|
53
- # # do some fun stuff here!
54
- # end
55
- #
56
- # @yield The header, sequence, description and quality string for
57
- # each record in the fastq file to the block
58
- # @yieldparam header [String] The header of the fastq record without
59
- # the leading '@'
60
- # @yieldparam sequence [Sequence] The sequence of the fastq record
61
- # @yieldparam description [String] The description line of the fastq
62
- # record without the leading '+'
63
- # @yieldparam quality_string [Quality] The quality string of the
64
- # fastq record
65
- def each_record
66
- count = 0
67
- header = ''
68
- sequence = ''
69
- description = ''
70
- quality = ''
71
-
72
- begin
73
- f = Zlib::GzipReader.open(self)
74
- rescue Zlib::GzipFile::Error => e
75
- f = self
76
- end
77
-
78
- f.each_line do |line|
79
- line.chomp!
80
-
81
- case count
82
- when 0
83
- header = line[1..-1]
84
- when 1
85
- sequence = Sequence.new(line)
86
- when 2
87
- description = line[1..-1]
88
- when 3
89
- count = -1
90
- quality = Quality.new(line)
91
- yield(header, sequence, description, quality)
92
- end
93
-
94
- count += 1
95
- end
96
-
97
- f.close if f.instance_of?(Zlib::GzipReader)
98
- return f
99
- end
100
-
101
- # Fast version of #each_record
102
- #
103
- # @note If the fastQ file has spaces in the sequence, they will be
104
- # retained. If this is a problem, use #each_record instead.
105
- #
106
- # @example Parsing a fastq file
107
- # FastqFile.open('reads.fq').each_record_fast do |head, seq, desc, qual|
108
- # # do some fun stuff here!
109
- # end
110
- # @example Use the same syntax for gzipped files!
111
- # FastqFile.open('reads.fq.gz').each_record_fast do |head, seq, desc, qual|
112
- # # do some fun stuff here!
113
- # end
114
- #
115
- # @yield The header, sequence, description and quality string for
116
- # each record in the fastq file to the block
117
- #
118
- # @yieldparam header [String] The header of the fastq record without
119
- # the leading '@'
120
- # @yieldparam sequence [String] The sequence of the fastq record
121
- # @yieldparam description [String] The description line of the fastq
122
- # record without the leading '+'
123
- # @yieldparam quality_string [String] The quality string of the
124
- # fastq record
125
- def each_record_fast
126
- count = 0
127
- header = ''
128
- sequence = ''
129
- description = ''
130
- quality = ''
131
-
132
- begin
133
- f = Zlib::GzipReader.open(self)
134
- rescue Zlib::GzipFile::Error => e
135
- f = self
136
- end
137
-
138
- f.each_line do |line|
139
- line.chomp!
140
-
141
- case count
142
- when 0
143
- header = line[1..-1]
144
- when 1
145
- sequence = line
146
- when 2
147
- description = line[1..-1]
148
- when 3
149
- count = -1
150
- quality = line
151
- yield(header, sequence, description, quality)
152
- end
153
-
154
- count += 1
155
- end
156
-
157
- f.close if f.instance_of?(Zlib::GzipReader)
158
- return f
159
- end
160
- end
@@ -1,54 +0,0 @@
1
- # Copyright 2014, 2015 Ryan Moore
2
- # Contact: moorer@udel.edu
3
- #
4
- # This file is part of parse_fasta.
5
- #
6
- # parse_fasta is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # parse_fasta is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
-
19
- # Provide some methods for dealing with common tasks regarding
20
- # quality strings.
21
- class Quality < String
22
-
23
- # Strips whitespace from the str argument before calling super
24
- #
25
- # @return [Quality] A Quality string
26
- #
27
- # @example Removes whitespace
28
- # Quality.new "I I 2 ! " #=> "II2!"
29
- def initialize(str)
30
- super(str.gsub(/ +/, ""))
31
- end
32
-
33
- # Returns the mean quality for the record. This will be a good deal
34
- # faster than getting the average with `qual_scores` and reduce.
35
- #
36
- # @example Get mean quality score for a record
37
- # Quality.new("!+5?I").mean_qual #=> 20.0
38
- #
39
- # @return [Float] Mean quality score for record
40
- def mean_qual
41
- (self.sum - (self.length * 33)) / self.length.to_f
42
- end
43
-
44
- # Returns an array of illumina style quality scores. The quality
45
- # scores generated will be Phred+33 (i.e., new Illumina).
46
- #
47
- # @example Get quality score array of a Quality
48
- # Quality.new("!+5?I").qual_scores #=> [0, 10, 20, 30, 40]
49
- #
50
- # @return [Array<Fixnum>] the quality scores
51
- def qual_scores
52
- self.each_byte.map { |b| b - 33 }
53
- end
54
- end
@@ -1,174 +0,0 @@
1
- # Copyright 2014, 2015 Ryan Moore
2
- # Contact: moorer@udel.edu
3
- #
4
- # This file is part of parse_fasta.
5
- #
6
- # parse_fasta is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # parse_fasta is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
-
19
- # Provide some methods for dealing with common tasks regarding
20
- # nucleotide sequences.
21
- class Sequence < String
22
-
23
- # # Error raised if both T and U are present
24
- # #
25
- # # @note This is NOT checked on every call to Sequence.new
26
- # class AmbiguousSequenceError < StandardError
27
- # def message
28
- # "Sequence is ambiguous -- both T and U present"
29
- # end
30
- # end
31
-
32
- # Strips whitespace from the str argument before calling super
33
- #
34
- # @return [Sequence] A Sequence string
35
- #
36
- # @example Removes whitespace
37
- # Sequence.new "AA CC TT" #=> "AACCTT"
38
- #
39
- # @raise [ParseFasta::SequenceFormatError] if sequence has a '>'
40
- def initialize(str)
41
- if str.match(/>/)
42
- raise ParseFasta::SequenceFormatError
43
- end
44
-
45
- super(str.gsub(/ +/, ""))
46
- end
47
-
48
- # Calculates GC content
49
- #
50
- # Calculates GC content by dividing count of G + C divided by count
51
- # of G + C + T + A + U. If there are both T's and U's in the
52
- # Sequence, things will get weird, but then again, that wouldn't
53
- # happen, now would it! Ambiguous bases are ignored similar to
54
- # BioRuby.
55
- #
56
- # @example Get GC of a Sequence
57
- # Sequence.new('ACTg').gc #=> 0.5
58
- # @example Using with FastaFile#each_record
59
- # FastaFile.open('reads.fna', 'r').each_record do |header, sequence|
60
- # puts [header, sequence.gc].join("\t")
61
- # end
62
- #
63
- # @return [0] if the Sequence is empty or there are no A, C, T, G or U
64
- # present
65
- # @return [Float] if the GC content is defined for the Sequence
66
- def gc
67
- s = self.downcase
68
- c = s.count('c')
69
- g = s.count('g')
70
- t = s.count('t')
71
- a = s.count('a')
72
- u = s.count('u')
73
-
74
- return 0 if c + g + t + a + u == 0
75
- return (c + g) / (c + g + t + a + u).to_f
76
- end
77
-
78
- # Returns a map of base counts
79
- #
80
- # This method will check if the sequence is DNA or RNA and return a
81
- # count map appropriate for each. If a truthy argument is given, the
82
- # count of ambiguous bases will be returned as well.
83
- #
84
- # If a sequence has both T and U present, will warn the user and
85
- # keep going. Will return a map with counts of both, however.
86
- #
87
- # @example Get base counts of DNA sequence without ambiguous bases
88
- # Sequence.new('AcTGn').base_counts
89
- # #=> { a: 1, c: 1, t: 1, g: 1 }
90
- # @example Get base counts of DNA sequence with ambiguous bases
91
- # Sequence.new('AcTGn').base_counts(true)
92
- # #=> { a: 1, c: 1, t: 1, g: 1, n: 1 }
93
- # @example Get base counts of RNA sequence without ambiguous bases
94
- # Sequence.new('AcUGn').base_counts
95
- # #=> { a: 1, c: 1, u: 1, g: 1 }
96
- # @example Get base counts of DNA sequence with ambiguous bases
97
- # Sequence.new('AcUGn').base_counts(true)
98
- # #=> { a: 1, c: 1, u: 1, g: 1, n: 1 }
99
- #
100
- # @return [Hash] A hash with base as key, count as value
101
- def base_counts(count_ambiguous_bases=nil)
102
- s = self.downcase
103
- t = s.count('t')
104
- u = s.count('u')
105
- counts = { a: s.count('a'), c: s.count('c'), g: s.count('g') }
106
-
107
- if t > 0 && u == 0
108
- counts[:t] = t
109
- elsif t == 0 && u > 0
110
- counts[:u] = u
111
- elsif t > 0 && u > 0
112
- warn('ERROR: A sequence contains both T and U')
113
- counts[:t], counts[:u] = t, u
114
- end
115
-
116
- counts[:n] = s.count('n') if count_ambiguous_bases
117
-
118
- counts
119
- end
120
-
121
- # Returns a map of base frequencies
122
- #
123
- # Counts bases with the `base_counts` method, then divides each
124
- # count by the total bases counted to give frequency for each
125
- # base. If a truthy argument is given, ambiguous bases will be
126
- # included in the total and their frequency reported. Can discern
127
- # between DNA and RNA.
128
- #
129
- # If default or falsy argument is given, ambiguous bases will not be
130
- # counted in the total base count and their frequency will not be
131
- # given.
132
- #
133
- # @example Get base frequencies of DNA sequence without ambiguous bases
134
- # Sequence.new('AcTGn').base_counts
135
- # #=> { a: 0.25, c: 0.25, t: 0.25, g: 0.25 }
136
- # @example Get base counts of DNA sequence with ambiguous bases
137
- # Sequence.new('AcTGn').base_counts(true)
138
- # #=> { a: 0.2, c: 0.2, t: 0.2, g: 0.2, n: 0.2 }
139
- #
140
- # @return [Hash] A hash with base as key, frequency as value
141
- def base_frequencies(count_ambiguous_bases=nil)
142
- base_counts = self.base_counts(count_ambiguous_bases)
143
- total_bases = base_counts.values.reduce(:+).to_f
144
- base_freqs =
145
- base_counts.map { |base, count| [base, count/total_bases] }.flatten
146
- Hash[*base_freqs]
147
- end
148
-
149
- # Returns a reverse complement of self
150
- #
151
- # @return [Sequence] a Sequence that is the reverse complement of
152
- # self
153
- #
154
- # @example Hanldes any IUPAC character and capitalization properly
155
- # Sequence.new("gARKbdctymvhu").rev_comp #=> "adbkraghvMYTc"
156
- #
157
- # @example Leaves non IUPAC characters
158
- # Sequence.new("cccc--CCCcccga").rev_comp #=> "tcgggGGG--gggg""
159
- #
160
- # @note If Sequence contains non-IUPAC characters, these are not
161
- # complemented
162
- def rev_comp
163
- # if self.match(/T/i) && self.match(/U/i)
164
- # raise Sequence::AmbiguousSequenceError
165
- # end
166
-
167
- # if self.match(/[^ATUGCYRSWKMBDHVN]/i)
168
- # warn "WARNING: Sequence contains non IUPAC characters"
169
- # end
170
-
171
- self.reverse.tr("ATUGCYRSWKMBDHVNatugcyrswkmbdhvn",
172
- "TAACGRYSWMKVHDBNtaacgryswmkvhdbn")
173
- end
174
- end