parse_fasta 1.9.2 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +8 -8
  2. data/.gitignore +1 -0
  3. data/.rspec +2 -0
  4. data/CHANGELOG.md +178 -0
  5. data/README.md +42 -215
  6. data/Rakefile +2 -4
  7. data/bin/console +14 -0
  8. data/bin/setup +8 -0
  9. data/lib/parse_fasta/error.rb +39 -0
  10. data/lib/parse_fasta/record.rb +88 -0
  11. data/lib/parse_fasta/seq_file.rb +221 -114
  12. data/lib/parse_fasta/version.rb +2 -2
  13. data/lib/parse_fasta.rb +5 -20
  14. data/spec/parse_fasta/record_spec.rb +115 -0
  15. data/spec/parse_fasta/seq_file_spec.rb +238 -0
  16. data/spec/parse_fasta_spec.rb +25 -0
  17. data/spec/spec_helper.rb +2 -44
  18. data/spec/test_files/cr.fa +1 -0
  19. data/spec/test_files/cr.fa.gz +0 -0
  20. data/spec/test_files/cr.fq +3 -0
  21. data/spec/test_files/cr.fq.gz +0 -0
  22. data/spec/test_files/cr_nl.fa +4 -0
  23. data/spec/test_files/cr_nl.fa.gz +0 -0
  24. data/spec/test_files/cr_nl.fq +8 -0
  25. data/spec/test_files/cr_nl.fq.gz +0 -0
  26. data/spec/test_files/multi_blob.fa.gz +0 -0
  27. data/spec/test_files/multi_blob.fq.gz +0 -0
  28. data/spec/test_files/not_a_seq_file.txt +1 -0
  29. data/{test_files/bad.fa → spec/test_files/poorly_catted.fa} +0 -0
  30. data/{test_files/test.fa → spec/test_files/seqs.fa} +0 -0
  31. data/spec/test_files/seqs.fa.gz +0 -0
  32. data/spec/test_files/seqs.fq +8 -0
  33. data/spec/test_files/seqs.fq.gz +0 -0
  34. metadata +49 -24
  35. data/lib/parse_fasta/fasta_file.rb +0 -232
  36. data/lib/parse_fasta/fastq_file.rb +0 -160
  37. data/lib/parse_fasta/quality.rb +0 -54
  38. data/lib/parse_fasta/sequence.rb +0 -174
  39. data/spec/lib/fasta_file_spec.rb +0 -212
  40. data/spec/lib/fastq_file_spec.rb +0 -143
  41. data/spec/lib/quality_spec.rb +0 -51
  42. data/spec/lib/seq_file_spec.rb +0 -357
  43. data/spec/lib/sequence_spec.rb +0 -188
  44. data/test_files/benchmark.rb +0 -99
  45. data/test_files/bogus.txt +0 -2
  46. data/test_files/test.fa.gz +0 -0
  47. data/test_files/test.fq +0 -8
  48. data/test_files/test.fq.gz +0 -0
@@ -1,232 +0,0 @@
1
- # Copyright 2014, 2015 Ryan Moore
2
- # Contact: moorer@udel.edu
3
- #
4
- # This file is part of parse_fasta.
5
- #
6
- # parse_fasta is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # parse_fasta is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
-
19
- require 'zlib'
20
-
21
- # Provides simple interface for parsing fasta format files. Gzipped
22
- # files are no problem.
23
- class FastaFile < File
24
-
25
- # Use it like IO::open
26
- #
27
- # @param fname [String] the name of the file to open
28
- #
29
- # @return [FastaFile] a FastaFile
30
- def self.open(fname, *args)
31
- begin
32
- handle = Zlib::GzipReader.open(fname)
33
- rescue Zlib::GzipFile::Error => e
34
- handle = File.open(fname)
35
- end
36
-
37
- unless handle.each_char.peek[0] == '>'
38
- raise ParseFasta::DataFormatError
39
- end
40
-
41
- handle.close
42
-
43
- super
44
- end
45
-
46
- # Returns the records in the fasta file as a hash map with the
47
- # headers as keys and the Sequences as values.
48
- #
49
- # @example Read a fastA into a hash table.
50
- # seqs = FastaFile.open('reads.fa').to_hash
51
- #
52
- # @return [Hash] A hash with headers as keys, sequences as the
53
- # values (Sequence objects)
54
- #
55
- # @raise [ParseFasta::SequenceFormatError] if sequence has a '>'
56
- def to_hash
57
- hash = {}
58
- self.each_record do |head, seq|
59
- hash[head] = seq
60
- end
61
-
62
- hash
63
- end
64
-
65
- # Analagous to IO#each_line, #each_record is used to go through a
66
- # fasta file record by record. It will accept gzipped files as well.
67
- #
68
- # @param separate_lines [Object] If truthy, separate lines of record
69
- # into an array of Sequences, but if falsy, yield a Sequence
70
- # object for the sequence instead.
71
- #
72
- # @example Parsing a fasta file (default behavior, gzip files are fine)
73
- # FastaFile.open('reads.fna.gz').each_record do |header, sequence|
74
- # puts [header, sequence.gc].join("\t")
75
- # end
76
- #
77
- # @example Parsing a fasta file (with truthy value param)
78
- # FastaFile.open('reads.fna').each_record(1) do |header, sequence|
79
- # # header => 'sequence_1'
80
- # # sequence => ['AACTG', 'AGTCGT', ... ]
81
- # end
82
- #
83
- # @yield The header and sequence for each record in the fasta
84
- # file to the block
85
- #
86
- # @yieldparam header [String] The header of the fasta record without
87
- # the leading '>'
88
- #
89
- # @yieldparam sequence [Sequence, Array<Sequence>] The sequence of the
90
- # fasta record. If `separate_lines` is falsy (the default
91
- # behavior), will be Sequence, but if truthy will be
92
- # Array<String>.
93
- #
94
- # @raise [ParseFasta::SequenceFormatError] if sequence has a '>'
95
- def each_record(separate_lines=nil)
96
- begin
97
- f = Zlib::GzipReader.open(self)
98
- rescue Zlib::GzipFile::Error => e
99
- f = self
100
- end
101
-
102
- if separate_lines
103
- f.each("\n>") do |line|
104
- header, sequence = parse_line_separately(line)
105
- yield(header.strip, sequence)
106
- end
107
-
108
- # f.each_with_index(">") do |line, idx|
109
- # if idx.zero?
110
- # if line != ">"
111
- # raise ParseFasta::DataFormatError
112
- # end
113
- # else
114
- # header, sequence = parse_line_separately(line)
115
- # yield(header.strip, sequence)
116
- # end
117
- # end
118
- else
119
- header = ""
120
- sequence = ""
121
- f.each_line do |line|
122
- line.chomp!
123
- len = line.length
124
- if header.empty? && line.start_with?(">")
125
- header = line[1, len]
126
- elsif line.start_with?(">")
127
- yield(header.strip, Sequence.new(sequence || ""))
128
- header = line[1, len]
129
- sequence = ""
130
- else
131
- raise ParseFasta::SequenceFormatError if sequence.include? ">"
132
- sequence << line
133
- end
134
- end
135
- yield(header, Sequence.new(sequence || ""))
136
-
137
- # f.each("\n>") do |line|
138
- # header, sequence = parse_line(line)
139
- # yield(header.strip, Sequence.new(sequence || ""))
140
- # end
141
-
142
- # f.each_with_index(sep=/^>/) do |line, idx|
143
- # if idx.zero?
144
- # if line != ">"
145
- # raise ParseFasta::DataFormatError
146
- # end
147
- # else
148
- # header, sequence = parse_line(line)
149
- # yield(header.strip, Sequence.new(sequence || ""))
150
- # end
151
- # end
152
- end
153
-
154
- f.close if f.instance_of?(Zlib::GzipReader)
155
- return f
156
- end
157
-
158
- # Fast version of #each_record
159
- #
160
- # Yields the sequence as a String, not Sequence. No separate lines
161
- # option.
162
- #
163
- # @note If the fastA file has spaces in the sequence, they will be
164
- # retained. If this is a problem, use #each_record instead.
165
- #
166
- # @yield The header and sequence for each record in the fasta
167
- # file to the block
168
- #
169
- # @yieldparam header [String] The header of the fasta record without
170
- # the leading '>'
171
- #
172
- # @yieldparam sequence [String] The sequence of the fasta record
173
- #
174
- # @raise [ParseFasta::SequenceFormatError] if sequence has a '>'
175
- def each_record_fast
176
- begin
177
- f = Zlib::GzipReader.open(self)
178
- rescue Zlib::GzipFile::Error => e
179
- f = self
180
- end
181
-
182
- header = ""
183
- sequence = ""
184
- f.each_line do |line|
185
- line.chomp!
186
- len = line.length
187
- if header.empty? && line.start_with?(">")
188
- header = line[1, len]
189
- elsif line.start_with?(">")
190
- yield(header.strip, sequence)
191
- header = line[1, len]
192
- sequence = ""
193
- else
194
- raise ParseFasta::SequenceFormatError if sequence.include? ">"
195
- sequence << line
196
- end
197
- end
198
- yield(header, sequence)
199
-
200
- # f.each("\n>") do |line|
201
- # header, sequence = parse_line(line)
202
-
203
- # raise ParseFasta::SequenceFormatError if sequence.include? ">"
204
-
205
- # yield(header.strip, sequence)
206
- # end
207
-
208
- f.close if f.instance_of?(Zlib::GzipReader)
209
- return f
210
- end
211
-
212
- private
213
-
214
- def parse_line(line)
215
- line.split("\n", 2).map { |s| s.gsub(/\n|^>|>$/, '') }
216
- end
217
-
218
- def parse_line_separately(line)
219
- header, sequence =
220
- line.split("\n", 2).map { |s| s.gsub(/^>|>$/, '') }
221
-
222
- if sequence.nil?
223
- sequences = []
224
- else
225
- sequences = sequence.split("\n")
226
- .reject { |s| s.empty? }
227
- .map { |s| Sequence.new(s) }
228
- end
229
-
230
- [header, sequences]
231
- end
232
- end
@@ -1,160 +0,0 @@
1
- # Copyright 2014, 2015 Ryan Moore
2
- # Contact: moorer@udel.edu
3
- #
4
- # This file is part of parse_fasta.
5
- #
6
- # parse_fasta is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # parse_fasta is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
-
19
- require 'zlib'
20
-
21
- # Provides simple interface for parsing four-line-per-record fastq
22
- # format files. Gzipped files are no problem.
23
- class FastqFile < File
24
-
25
- # Returns the records in the fastq file as a hash map with the
26
- # headers as keys pointing to a hash map like so
27
- # { "seq1" => { head: "seq1", seq: "ACTG", desc: "", qual: "II3*"} }
28
- #
29
- # @example Read a fastQ into a hash table.
30
- # seqs = FastqFile.open('reads.fq.gz').to_hash
31
- #
32
- # @return [Hash] A hash with headers as keys, and a hash map as the
33
- # value with keys :head, :seq, :desc, :qual, for header, sequence,
34
- # description, and quality.
35
- def to_hash
36
- hash = {}
37
- self.each_record do |head, seq, desc, qual|
38
- hash[head] = { head: head, seq: seq, desc: desc, qual: qual }
39
- end
40
-
41
- hash
42
- end
43
-
44
- # Analagous to IO#each_line, #each_record is used to go through a
45
- # fastq file record by record. It will accept gzipped files as well.
46
- #
47
- # @example Parsing a fastq file
48
- # FastqFile.open('reads.fq').each_record do |head, seq, desc, qual|
49
- # # do some fun stuff here!
50
- # end
51
- # @example Use the same syntax for gzipped files!
52
- # FastqFile.open('reads.fq.gz').each_record do |head, seq, desc, qual|
53
- # # do some fun stuff here!
54
- # end
55
- #
56
- # @yield The header, sequence, description and quality string for
57
- # each record in the fastq file to the block
58
- # @yieldparam header [String] The header of the fastq record without
59
- # the leading '@'
60
- # @yieldparam sequence [Sequence] The sequence of the fastq record
61
- # @yieldparam description [String] The description line of the fastq
62
- # record without the leading '+'
63
- # @yieldparam quality_string [Quality] The quality string of the
64
- # fastq record
65
- def each_record
66
- count = 0
67
- header = ''
68
- sequence = ''
69
- description = ''
70
- quality = ''
71
-
72
- begin
73
- f = Zlib::GzipReader.open(self)
74
- rescue Zlib::GzipFile::Error => e
75
- f = self
76
- end
77
-
78
- f.each_line do |line|
79
- line.chomp!
80
-
81
- case count
82
- when 0
83
- header = line[1..-1]
84
- when 1
85
- sequence = Sequence.new(line)
86
- when 2
87
- description = line[1..-1]
88
- when 3
89
- count = -1
90
- quality = Quality.new(line)
91
- yield(header, sequence, description, quality)
92
- end
93
-
94
- count += 1
95
- end
96
-
97
- f.close if f.instance_of?(Zlib::GzipReader)
98
- return f
99
- end
100
-
101
- # Fast version of #each_record
102
- #
103
- # @note If the fastQ file has spaces in the sequence, they will be
104
- # retained. If this is a problem, use #each_record instead.
105
- #
106
- # @example Parsing a fastq file
107
- # FastqFile.open('reads.fq').each_record_fast do |head, seq, desc, qual|
108
- # # do some fun stuff here!
109
- # end
110
- # @example Use the same syntax for gzipped files!
111
- # FastqFile.open('reads.fq.gz').each_record_fast do |head, seq, desc, qual|
112
- # # do some fun stuff here!
113
- # end
114
- #
115
- # @yield The header, sequence, description and quality string for
116
- # each record in the fastq file to the block
117
- #
118
- # @yieldparam header [String] The header of the fastq record without
119
- # the leading '@'
120
- # @yieldparam sequence [String] The sequence of the fastq record
121
- # @yieldparam description [String] The description line of the fastq
122
- # record without the leading '+'
123
- # @yieldparam quality_string [String] The quality string of the
124
- # fastq record
125
- def each_record_fast
126
- count = 0
127
- header = ''
128
- sequence = ''
129
- description = ''
130
- quality = ''
131
-
132
- begin
133
- f = Zlib::GzipReader.open(self)
134
- rescue Zlib::GzipFile::Error => e
135
- f = self
136
- end
137
-
138
- f.each_line do |line|
139
- line.chomp!
140
-
141
- case count
142
- when 0
143
- header = line[1..-1]
144
- when 1
145
- sequence = line
146
- when 2
147
- description = line[1..-1]
148
- when 3
149
- count = -1
150
- quality = line
151
- yield(header, sequence, description, quality)
152
- end
153
-
154
- count += 1
155
- end
156
-
157
- f.close if f.instance_of?(Zlib::GzipReader)
158
- return f
159
- end
160
- end
@@ -1,54 +0,0 @@
1
- # Copyright 2014, 2015 Ryan Moore
2
- # Contact: moorer@udel.edu
3
- #
4
- # This file is part of parse_fasta.
5
- #
6
- # parse_fasta is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # parse_fasta is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
-
19
- # Provide some methods for dealing with common tasks regarding
20
- # quality strings.
21
- class Quality < String
22
-
23
- # Strips whitespace from the str argument before calling super
24
- #
25
- # @return [Quality] A Quality string
26
- #
27
- # @example Removes whitespace
28
- # Quality.new "I I 2 ! " #=> "II2!"
29
- def initialize(str)
30
- super(str.gsub(/ +/, ""))
31
- end
32
-
33
- # Returns the mean quality for the record. This will be a good deal
34
- # faster than getting the average with `qual_scores` and reduce.
35
- #
36
- # @example Get mean quality score for a record
37
- # Quality.new("!+5?I").mean_qual #=> 20.0
38
- #
39
- # @return [Float] Mean quality score for record
40
- def mean_qual
41
- (self.sum - (self.length * 33)) / self.length.to_f
42
- end
43
-
44
- # Returns an array of illumina style quality scores. The quality
45
- # scores generated will be Phred+33 (i.e., new Illumina).
46
- #
47
- # @example Get quality score array of a Quality
48
- # Quality.new("!+5?I").qual_scores #=> [0, 10, 20, 30, 40]
49
- #
50
- # @return [Array<Fixnum>] the quality scores
51
- def qual_scores
52
- self.each_byte.map { |b| b - 33 }
53
- end
54
- end
@@ -1,174 +0,0 @@
1
- # Copyright 2014, 2015 Ryan Moore
2
- # Contact: moorer@udel.edu
3
- #
4
- # This file is part of parse_fasta.
5
- #
6
- # parse_fasta is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # parse_fasta is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
-
19
- # Provide some methods for dealing with common tasks regarding
20
- # nucleotide sequences.
21
- class Sequence < String
22
-
23
- # # Error raised if both T and U are present
24
- # #
25
- # # @note This is NOT checked on every call to Sequence.new
26
- # class AmbiguousSequenceError < StandardError
27
- # def message
28
- # "Sequence is ambiguous -- both T and U present"
29
- # end
30
- # end
31
-
32
- # Strips whitespace from the str argument before calling super
33
- #
34
- # @return [Sequence] A Sequence string
35
- #
36
- # @example Removes whitespace
37
- # Sequence.new "AA CC TT" #=> "AACCTT"
38
- #
39
- # @raise [ParseFasta::SequenceFormatError] if sequence has a '>'
40
- def initialize(str)
41
- if str.match(/>/)
42
- raise ParseFasta::SequenceFormatError
43
- end
44
-
45
- super(str.gsub(/ +/, ""))
46
- end
47
-
48
- # Calculates GC content
49
- #
50
- # Calculates GC content by dividing count of G + C divided by count
51
- # of G + C + T + A + U. If there are both T's and U's in the
52
- # Sequence, things will get weird, but then again, that wouldn't
53
- # happen, now would it! Ambiguous bases are ignored similar to
54
- # BioRuby.
55
- #
56
- # @example Get GC of a Sequence
57
- # Sequence.new('ACTg').gc #=> 0.5
58
- # @example Using with FastaFile#each_record
59
- # FastaFile.open('reads.fna', 'r').each_record do |header, sequence|
60
- # puts [header, sequence.gc].join("\t")
61
- # end
62
- #
63
- # @return [0] if the Sequence is empty or there are no A, C, T, G or U
64
- # present
65
- # @return [Float] if the GC content is defined for the Sequence
66
- def gc
67
- s = self.downcase
68
- c = s.count('c')
69
- g = s.count('g')
70
- t = s.count('t')
71
- a = s.count('a')
72
- u = s.count('u')
73
-
74
- return 0 if c + g + t + a + u == 0
75
- return (c + g) / (c + g + t + a + u).to_f
76
- end
77
-
78
- # Returns a map of base counts
79
- #
80
- # This method will check if the sequence is DNA or RNA and return a
81
- # count map appropriate for each. If a truthy argument is given, the
82
- # count of ambiguous bases will be returned as well.
83
- #
84
- # If a sequence has both T and U present, will warn the user and
85
- # keep going. Will return a map with counts of both, however.
86
- #
87
- # @example Get base counts of DNA sequence without ambiguous bases
88
- # Sequence.new('AcTGn').base_counts
89
- # #=> { a: 1, c: 1, t: 1, g: 1 }
90
- # @example Get base counts of DNA sequence with ambiguous bases
91
- # Sequence.new('AcTGn').base_counts(true)
92
- # #=> { a: 1, c: 1, t: 1, g: 1, n: 1 }
93
- # @example Get base counts of RNA sequence without ambiguous bases
94
- # Sequence.new('AcUGn').base_counts
95
- # #=> { a: 1, c: 1, u: 1, g: 1 }
96
- # @example Get base counts of DNA sequence with ambiguous bases
97
- # Sequence.new('AcUGn').base_counts(true)
98
- # #=> { a: 1, c: 1, u: 1, g: 1, n: 1 }
99
- #
100
- # @return [Hash] A hash with base as key, count as value
101
- def base_counts(count_ambiguous_bases=nil)
102
- s = self.downcase
103
- t = s.count('t')
104
- u = s.count('u')
105
- counts = { a: s.count('a'), c: s.count('c'), g: s.count('g') }
106
-
107
- if t > 0 && u == 0
108
- counts[:t] = t
109
- elsif t == 0 && u > 0
110
- counts[:u] = u
111
- elsif t > 0 && u > 0
112
- warn('ERROR: A sequence contains both T and U')
113
- counts[:t], counts[:u] = t, u
114
- end
115
-
116
- counts[:n] = s.count('n') if count_ambiguous_bases
117
-
118
- counts
119
- end
120
-
121
- # Returns a map of base frequencies
122
- #
123
- # Counts bases with the `base_counts` method, then divides each
124
- # count by the total bases counted to give frequency for each
125
- # base. If a truthy argument is given, ambiguous bases will be
126
- # included in the total and their frequency reported. Can discern
127
- # between DNA and RNA.
128
- #
129
- # If default or falsy argument is given, ambiguous bases will not be
130
- # counted in the total base count and their frequency will not be
131
- # given.
132
- #
133
- # @example Get base frequencies of DNA sequence without ambiguous bases
134
- # Sequence.new('AcTGn').base_counts
135
- # #=> { a: 0.25, c: 0.25, t: 0.25, g: 0.25 }
136
- # @example Get base counts of DNA sequence with ambiguous bases
137
- # Sequence.new('AcTGn').base_counts(true)
138
- # #=> { a: 0.2, c: 0.2, t: 0.2, g: 0.2, n: 0.2 }
139
- #
140
- # @return [Hash] A hash with base as key, frequency as value
141
- def base_frequencies(count_ambiguous_bases=nil)
142
- base_counts = self.base_counts(count_ambiguous_bases)
143
- total_bases = base_counts.values.reduce(:+).to_f
144
- base_freqs =
145
- base_counts.map { |base, count| [base, count/total_bases] }.flatten
146
- Hash[*base_freqs]
147
- end
148
-
149
- # Returns a reverse complement of self
150
- #
151
- # @return [Sequence] a Sequence that is the reverse complement of
152
- # self
153
- #
154
- # @example Hanldes any IUPAC character and capitalization properly
155
- # Sequence.new("gARKbdctymvhu").rev_comp #=> "adbkraghvMYTc"
156
- #
157
- # @example Leaves non IUPAC characters
158
- # Sequence.new("cccc--CCCcccga").rev_comp #=> "tcgggGGG--gggg""
159
- #
160
- # @note If Sequence contains non-IUPAC characters, these are not
161
- # complemented
162
- def rev_comp
163
- # if self.match(/T/i) && self.match(/U/i)
164
- # raise Sequence::AmbiguousSequenceError
165
- # end
166
-
167
- # if self.match(/[^ATUGCYRSWKMBDHVN]/i)
168
- # warn "WARNING: Sequence contains non IUPAC characters"
169
- # end
170
-
171
- self.reverse.tr("ATUGCYRSWKMBDHVNatugcyrswkmbdhvn",
172
- "TAACGRYSWMKVHDBNtaacgryswmkvhdbn")
173
- end
174
- end