bio-restriction_enzyme 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. data/.document +5 -0
  2. data/COPYING.txt +121 -0
  3. data/Gemfile +10 -0
  4. data/LICENSE.txt +7 -0
  5. data/README.rdoc +22 -0
  6. data/Rakefile +53 -0
  7. data/VERSION +1 -0
  8. data/bio-restriction_enzyme.gemspec +99 -0
  9. data/lib/bio-restriction_enzyme.rb +1 -0
  10. data/lib/bio/util/restriction_enzyme.rb +218 -0
  11. data/lib/bio/util/restriction_enzyme/analysis.rb +241 -0
  12. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +209 -0
  13. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +99 -0
  14. data/lib/bio/util/restriction_enzyme/double_stranded.rb +313 -0
  15. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +127 -0
  16. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +95 -0
  17. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +30 -0
  18. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +68 -0
  19. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +99 -0
  20. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +16 -0
  21. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +39 -0
  22. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +59 -0
  23. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +249 -0
  24. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +236 -0
  25. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +43 -0
  26. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +33 -0
  27. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +69 -0
  28. data/lib/bio/util/restriction_enzyme/single_strand.rb +193 -0
  29. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +127 -0
  30. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +15 -0
  31. data/lib/bio/util/restriction_enzyme/string_formatting.rb +103 -0
  32. data/test/bio-restriction_enzyme/analysis/test_calculated_cuts.rb +281 -0
  33. data/test/bio-restriction_enzyme/analysis/test_cut_ranges.rb +87 -0
  34. data/test/bio-restriction_enzyme/analysis/test_sequence_range.rb +223 -0
  35. data/test/bio-restriction_enzyme/double_stranded/test_aligned_strands.rb +84 -0
  36. data/test/bio-restriction_enzyme/double_stranded/test_cut_location_pair.rb +58 -0
  37. data/test/bio-restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +56 -0
  38. data/test/bio-restriction_enzyme/double_stranded/test_cut_locations.rb +35 -0
  39. data/test/bio-restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +87 -0
  40. data/test/bio-restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +66 -0
  41. data/test/bio-restriction_enzyme/test_analysis.rb +228 -0
  42. data/test/bio-restriction_enzyme/test_cut_symbol.rb +27 -0
  43. data/test/bio-restriction_enzyme/test_double_stranded.rb +98 -0
  44. data/test/bio-restriction_enzyme/test_single_strand.rb +131 -0
  45. data/test/bio-restriction_enzyme/test_single_strand_complement.rb +131 -0
  46. data/test/bio-restriction_enzyme/test_string_formatting.rb +43 -0
  47. data/test/helper.rb +17 -0
  48. data/test/test_bio-restriction_enzyme.rb +21 -0
  49. metadata +153 -0
@@ -0,0 +1,241 @@
1
+ # bio/util/restriction_enzyme/analysis.rb - Does the work of fragmenting the DNA from the enzymes
2
+
3
+ require 'bio/util/restriction_enzyme'
4
+ require 'bio/util/restriction_enzyme/analysis_basic'
5
+
6
+ module Bio
7
+ class RestrictionEnzyme
8
+
9
+ class Analysis
10
+
11
+ # See cut instance method
12
+ def self.cut( sequence, *args )
13
+ self.new.cut( sequence, *args )
14
+ end
15
+
16
+ # See main documentation for Bio::RestrictionEnzyme
17
+ #
18
+ #
19
+ # +cut+ takes into account
20
+ # permutations of cut variations based on competitiveness of enzymes for an
21
+ # enzyme cutsite or enzyme bindsite on a sequence.
22
+ #
23
+ # Example:
24
+ #
25
+ # FIXME add output
26
+ #
27
+ # Bio::RestrictionEnzyme::Analysis.cut('gaattc', 'EcoRI')
28
+ #
29
+ # _same as:_
30
+ #
31
+ # Bio::RestrictionEnzyme::Analysis.cut('gaattc', 'g^aattc')
32
+ # ---
33
+ # *Arguments*
34
+ # * +sequence+: +String+ kind of object that will be used as a nucleic acid sequence.
35
+ # * +args+: Series of enzyme names, enzymes sequences with cut marks, or RestrictionEnzyme objects.
36
+ # *Returns*:: Bio::RestrictionEnzyme::Fragments object populated with Bio::RestrictionEnzyme::Fragment objects. (Note: unrelated to Bio::RestrictionEnzyme::Range::SequenceRange::Fragments) or a +Symbol+ containing an error code
37
+ def cut( sequence, *args )
38
+ view_ranges = false
39
+
40
+ args.select { |i| i.class == Hash }.each do |hsh|
41
+ hsh.each do |key, value|
42
+ if key == :view_ranges
43
+ unless ( value.kind_of?(TrueClass) or value.kind_of?(FalseClass) )
44
+ raise ArgumentError, "view_ranges must be set to true or false, currently #{value.inspect}."
45
+ end
46
+ view_ranges = value
47
+ end
48
+ end
49
+ end
50
+
51
+ res = cut_and_return_by_permutations( sequence, *args )
52
+ return res if res.class == Symbol
53
+ # Format the fragments for the user
54
+ fragments_for_display( res, view_ranges )
55
+ end
56
+
57
+ #########
58
+ protected
59
+ #########
60
+
61
+ # See cut instance method
62
+ #
63
+ # ---
64
+ # *Arguments*
65
+ # * +sequence+: +String+ kind of object that will be used as a nucleic acid sequence.
66
+ # * +args+: Series of enzyme names, enzymes sequences with cut marks, or RestrictionEnzyme objects.
67
+ # May also supply a +Hash+ with the key ":max_permutations" to specificy how many permutations are allowed - a value of 0 indicates no permutations are allowed.
68
+ # *Returns*:: +Hash+ Keys are a permutation ID, values are SequenceRange objects that have cuts applied.
69
+ # _also_ may return the +Symbol+ ':sequence_empty', ':no_cuts_found', or ':too_many_permutations'
70
+ def cut_and_return_by_permutations( sequence, *args )
71
+ my_hash = {}
72
+ maximum_permutations = nil
73
+
74
+ hashes_in_args = args.select { |i| i.class == Hash }
75
+ args.delete_if { |i| i.class == Hash }
76
+ hashes_in_args.each do |hsh|
77
+ hsh.each do |key, value|
78
+ case key
79
+ when :max_permutations, 'max_permutations', :maximum_permutations, 'maximum_permutations'
80
+ maximum_permutations = value.to_i unless value == nil
81
+ when :view_ranges
82
+ else
83
+ raise ArgumentError, "Received key #{key.inspect} in argument - I only know the key ':max_permutations' and ':view_ranges' currently. Hash passed: #{hsh.inspect}"
84
+ end
85
+ end
86
+ end
87
+
88
+ if !sequence.kind_of?(String) or sequence.empty?
89
+ logger.warn "The supplied sequence is empty." if defined?(logger)
90
+ return :sequence_empty
91
+ end
92
+ sequence = Bio::Sequence::NA.new( sequence )
93
+
94
+ enzyme_actions, initial_cuts = create_enzyme_actions( sequence, *args )
95
+
96
+ if enzyme_actions.empty? and initial_cuts.empty?
97
+ logger.warn "This enzyme does not make any cuts on this sequence." if defined?(logger)
98
+ return :no_cuts_found
99
+ end
100
+
101
+ # * When enzyme_actions.size is equal to '1' that means there are no permutations.
102
+ # * If enzyme_actions.size is equal to '2' there is one
103
+ # permutation ("[0, 1]")
104
+ # * If enzyme_actions.size is equal to '3' there are two
105
+ # permutations ("[0, 1, 2]")
106
+ # * and so on..
107
+ if maximum_permutations and enzyme_actions.size > 1
108
+ if (enzyme_actions.size - 1) > maximum_permutations.to_i
109
+ logger.warn "More permutations than maximum, skipping. Found: #{enzyme_actions.size-1} Max: #{maximum_permutations.to_i}" if defined?(logger)
110
+ return :too_many_permutations
111
+ end
112
+ end
113
+
114
+ if enzyme_actions.size > 1
115
+ permutations = permute(enzyme_actions.size)
116
+
117
+ permutations.each do |permutation|
118
+ previous_cut_ranges = []
119
+ # Primary and complement strands are both measured from '0' to 'sequence.size-1' here
120
+ sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
121
+
122
+ # Add the cuts to the sequence_range from each enzyme_action contained
123
+ # in initial_cuts. These are the cuts that have no competition so are
124
+ # not subject to permutations.
125
+ initial_cuts.each do |enzyme_action|
126
+ enzyme_action.cut_ranges.each do |cut_range|
127
+ sequence_range.add_cut_range(cut_range)
128
+ end
129
+ end
130
+
131
+ permutation.each do |id|
132
+ enzyme_action = enzyme_actions[id]
133
+
134
+ # conflict is false if the current enzyme action may cut in it's range.
135
+ # conflict is true if it cannot due to a previous enzyme action making
136
+ # a cut where this enzyme action needs a whole recognition site.
137
+ conflict = false
138
+
139
+ # If current size of enzyme_action overlaps with previous cut_range, don't cut
140
+ # note that the enzyme action may fall in the middle of a previous enzyme action
141
+ # so all cut locations must be checked that would fall underneath.
142
+ previous_cut_ranges.each do |cut_range|
143
+ next unless cut_range.class == Bio::RestrictionEnzyme::Range::VerticalCutRange # we aren't concerned with horizontal cuts
144
+ previous_cut_left = cut_range.range.first
145
+ previous_cut_right = cut_range.range.last
146
+
147
+ # Keep in mind:
148
+ # * The cut location is to the immediate right of the base located at the index.
149
+ # ex: at^gc -- the cut location is at index 1
150
+ # * The enzyme action location is located at the base of the index.
151
+ # ex: atgc -- 0 => 'a', 1 => 't', 2 => 'g', 3 => 'c'
152
+ # method create_enzyme_actions has similar commentary if interested
153
+ if (enzyme_action.right <= previous_cut_left) or
154
+ (enzyme_action.left > previous_cut_right) or
155
+ (enzyme_action.left > previous_cut_left and enzyme_action.right <= previous_cut_right) # in between cuts
156
+ # no conflict
157
+ else
158
+ conflict = true
159
+ end
160
+ end
161
+
162
+ next if conflict == true
163
+ enzyme_action.cut_ranges.each { |cut_range| sequence_range.add_cut_range(cut_range) }
164
+ previous_cut_ranges += enzyme_action.cut_ranges
165
+ end # permutation.each
166
+
167
+ # Fill in the source sequence for sequence_range so it knows what bases
168
+ # to use
169
+ sequence_range.fragments.primary = sequence
170
+ sequence_range.fragments.complement = sequence.forward_complement
171
+ my_hash[permutation] = sequence_range
172
+ end # permutations.each
173
+
174
+ else # if enzyme_actions.size == 1
175
+ # no permutations, just do it
176
+ sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
177
+ initial_cuts.each { |enzyme_action| enzyme_action.cut_ranges.each { |cut_range| sequence_range.add_cut_range(cut_range) } }
178
+ sequence_range.fragments.primary = sequence
179
+ sequence_range.fragments.complement = sequence.forward_complement
180
+ my_hash[0] = sequence_range
181
+ end
182
+
183
+ my_hash
184
+ end
185
+
186
+
187
+ # Returns permutation orders for a given number of elements.
188
+ #
189
+ # Examples:
190
+ # permute(0) # => [[0]]
191
+ # permute(1) # => [[0]]
192
+ # permute(2) # => [[1, 0], [0, 1]]
193
+ # permute(3) # => [[2, 1, 0], [2, 0, 1], [1, 2, 0], [0, 2, 1], [1, 0, 2], [0, 1, 2]]
194
+ # permute(4) # => [[3, 2, 1, 0],
195
+ # [3, 2, 0, 1],
196
+ # [3, 1, 2, 0],
197
+ # [3, 0, 2, 1],
198
+ # [3, 1, 0, 2],
199
+ # [3, 0, 1, 2],
200
+ # [2, 3, 1, 0],
201
+ # [2, 3, 0, 1],
202
+ # [1, 3, 2, 0],
203
+ # [0, 3, 2, 1],
204
+ # [1, 3, 0, 2],
205
+ # [0, 3, 1, 2],
206
+ # [2, 1, 3, 0],
207
+ # [2, 0, 3, 1],
208
+ # [1, 2, 3, 0],
209
+ # [0, 2, 3, 1],
210
+ # [1, 0, 3, 2],
211
+ # [0, 1, 3, 2],
212
+ # [2, 1, 0, 3],
213
+ # [2, 0, 1, 3],
214
+ # [1, 2, 0, 3],
215
+ # [0, 2, 1, 3],
216
+ # [1, 0, 2, 3],
217
+ # [0, 1, 2, 3]]
218
+ #
219
+ # ---
220
+ # *Arguments*
221
+ # * +count+: +Number+ of different elements to be permuted
222
+ # * +permutations+: ignore - for the recursive algorithm
223
+ # *Returns*:: +Array+ of +Array+ objects with different possible permutation orders. See examples.
224
+ def permute(count, permutations = [[0]])
225
+ return permutations if count <= 1
226
+ new_arrays = []
227
+ new_array = []
228
+
229
+ (permutations[0].size + 1).times do |n|
230
+ new_array.clear
231
+ permutations.each { |a| new_array << a.dup }
232
+ new_array.each { |e| e.insert(n, permutations[0].size) }
233
+ new_arrays += new_array
234
+ end
235
+
236
+ permute(count-1, new_arrays)
237
+ end
238
+
239
+ end # Analysis
240
+ end # RestrictionEnzyme
241
+ end # Bio
@@ -0,0 +1,209 @@
1
+ # bio/util/restriction_enzyme/analysis_basic.rb - Does the work of fragmenting the DNA from the enzymes
2
+
3
+ require 'set' # for method create_enzyme_actions
4
+ require 'bio/util/restriction_enzyme'
5
+
6
+ module Bio
7
+ class RestrictionEnzyme
8
+
9
+ class Analysis
10
+
11
+ # See cut_without_permutations instance method
12
+ def self.cut_without_permutations( sequence, *args )
13
+ self.new.cut_without_permutations( sequence, *args )
14
+ end
15
+
16
+ # See main documentation for Bio::RestrictionEnzyme
17
+ #
18
+ # Bio::RestrictionEnzyme.cut is preferred over this!
19
+ #
20
+ # USE AT YOUR OWN RISK
21
+ #
22
+ # This is a simpler version of method +cut+. +cut+ takes into account
23
+ # permutations of cut variations based on competitiveness of enzymes for an
24
+ # enzyme cutsite or enzyme bindsite on a sequence. This does not take into
25
+ # account those possibilities and is therefore faster, but less likely to be
26
+ # accurate.
27
+ #
28
+ # This code is mainly included as an academic example
29
+ # without having to wade through the extra layer of complexity added by the
30
+ # permutations.
31
+ #
32
+ # Example:
33
+ #
34
+ # FIXME add output
35
+ #
36
+ # Bio::RestrictionEnzyme::Analysis.cut_without_permutations('gaattc', 'EcoRI')
37
+ #
38
+ # _same as:_
39
+ #
40
+ # Bio::RestrictionEnzyme::Analysis.cut_without_permutations('gaattc', 'g^aattc')
41
+ # ---
42
+ # *Arguments*
43
+ # * +sequence+: +String+ kind of object that will be used as a nucleic acid sequence.
44
+ # * +args+: Series of enzyme names, enzymes sequences with cut marks, or RestrictionEnzyme objects.
45
+ # *Returns*:: Bio::RestrictionEnzyme::Fragments object populated with Bio::RestrictionEnzyme::Fragment objects. (Note: unrelated to Bio::RestrictionEnzyme::Range::SequenceRange::Fragments)
46
+ def cut_without_permutations( sequence, *args )
47
+ return fragments_for_display( {} ) if !sequence.kind_of?(String) or sequence.empty?
48
+ sequence = Bio::Sequence::NA.new( sequence )
49
+
50
+ # create_enzyme_actions returns two seperate array elements, they're not
51
+ # needed separated here so we put them into one array
52
+ enzyme_actions = create_enzyme_actions( sequence, *args ).flatten
53
+ return fragments_for_display( {} ) if enzyme_actions.empty?
54
+
55
+ # Primary and complement strands are both measured from '0' to 'sequence.size-1' here
56
+ sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
57
+
58
+ # Add the cuts to the sequence_range from each enzyme_action
59
+ enzyme_actions.each do |enzyme_action|
60
+ enzyme_action.cut_ranges.each do |cut_range|
61
+ sequence_range.add_cut_range(cut_range)
62
+ end
63
+ end
64
+
65
+ # Fill in the source sequence for sequence_range so it knows what bases
66
+ # to use
67
+ sequence_range.fragments.primary = sequence
68
+ sequence_range.fragments.complement = sequence.forward_complement
69
+
70
+ # Format the fragments for the user
71
+ fragments_for_display( {0 => sequence_range} )
72
+ end
73
+
74
+ #########
75
+ protected
76
+ #########
77
+
78
+ # Take the fragments from SequenceRange objects generated from add_cut_range
79
+ # and return unique results as a Bio::RestrictionEnzyme::Analysis::Fragment object.
80
+ #
81
+ # ---
82
+ # *Arguments*
83
+ # * +hsh+: +Hash+ Keys are a permutation ID, if any. Values are SequenceRange objects that have cuts applied.
84
+ # *Returns*:: Bio::RestrictionEnzyme::Analysis::Fragments object populated with Bio::RestrictionEnzyme::Analysis::Fragment objects.
85
+ def fragments_for_display( hsh, view_ranges=false )
86
+ ary = Fragments.new
87
+ return ary unless hsh
88
+
89
+ hsh.each do |permutation_id, sequence_range|
90
+ sequence_range.fragments.for_display.each do |fragment|
91
+ if view_ranges
92
+ ary << Bio::RestrictionEnzyme::Fragment.new(fragment.primary, fragment.complement, fragment.p_left, fragment.p_right, fragment.c_left, fragment.c_right)
93
+ else
94
+ ary << Bio::RestrictionEnzyme::Fragment.new(fragment.primary, fragment.complement)
95
+ end
96
+ end
97
+ end
98
+
99
+ ary.uniq! unless view_ranges
100
+
101
+ ary
102
+ end
103
+
104
+ # Creates an array of EnzymeActions based on the DNA sequence and supplied enzymes.
105
+ #
106
+ # ---
107
+ # *Arguments*
108
+ # * +sequence+: The string of DNA to match the enzyme recognition sites against
109
+ # * +args+:: The enzymes to use.
110
+ # *Returns*:: +Array+ with the first element being an array of EnzymeAction objects that +sometimes_cut+, and are subject to competition. The second is an array of EnzymeAction objects that +always_cut+ and are not subject to competition.
111
+ def create_enzyme_actions( sequence, *args )
112
+ all_enzyme_actions = []
113
+
114
+ args.each do |enzyme|
115
+ enzyme = Bio::RestrictionEnzyme.new(enzyme) unless enzyme.class == Bio::RestrictionEnzyme::DoubleStranded
116
+
117
+ # make sure pattern is the proper size
118
+ # for more info see the internal documentation of
119
+ # Bio::RestrictionEnzyme::DoubleStranded.create_action_at
120
+ pattern = Bio::Sequence::NA.new(
121
+ Bio::RestrictionEnzyme::DoubleStranded::AlignedStrands.align(
122
+ enzyme.primary, enzyme.complement
123
+ ).primary
124
+ ).to_re
125
+
126
+ find_match_locations( sequence, pattern ).each do |offset|
127
+ all_enzyme_actions << enzyme.create_action_at( offset )
128
+ end
129
+ end
130
+
131
+ # FIXME VerticalCutRange should really be called VerticalAndHorizontalCutRange
132
+
133
+ # * all_enzyme_actions is now full of EnzymeActions at specific locations across
134
+ # the sequence.
135
+ # * all_enzyme_actions will now be examined to see if any EnzymeActions may
136
+ # conflict with one another, and if they do they'll be made note of in
137
+ # indicies_of_sometimes_cut. They will then be remove FIXME
138
+ # * a conflict occurs if another enzyme's bind site is compromised do due
139
+ # to another enzyme's cut. Enzyme's bind sites may overlap and not be
140
+ # competitive, however neither bind site may be part of the other
141
+ # enzyme's cut or else they do become competitive.
142
+ #
143
+ # Take current EnzymeAction's entire bind site and compare it to all other
144
+ # EzymeAction's cut ranges. Only look for vertical cuts as boundaries
145
+ # since trailing horizontal cuts would have no influence on the bind site.
146
+ #
147
+ # If example Enzyme A makes this cut pattern (cut range 2..5):
148
+ #
149
+ # 0 1 2|3 4 5 6 7
150
+ # +-----+
151
+ # 0 1 2 3 4 5|6 7
152
+ #
153
+ # Then the bind site (and EnzymeAction range) for Enzyme B would need it's
154
+ # right side to be at index 2 or less, or it's left side to be 6 or greater.
155
+
156
+ competition_indexes = Set.new
157
+
158
+ all_enzyme_actions[0..-2].each_with_index do |current_enzyme_action, i|
159
+ next if competition_indexes.include? i
160
+ next if current_enzyme_action.cut_ranges.empty? # no cuts, some enzymes are like this (ex. CjuI)
161
+
162
+ all_enzyme_actions[i+1..-1].each_with_index do |comparison_enzyme_action, j|
163
+ j += (i + 1)
164
+ next if competition_indexes.include? j
165
+ next if comparison_enzyme_action.cut_ranges.empty? # no cuts
166
+
167
+ if (current_enzyme_action.right <= comparison_enzyme_action.cut_ranges.min_vertical) or
168
+ (current_enzyme_action.left > comparison_enzyme_action.cut_ranges.max_vertical)
169
+ # no conflict
170
+ else
171
+ competition_indexes += [i, j] # merge both indexes into the flat set
172
+ end
173
+ end
174
+ end
175
+
176
+ sometimes_cut = all_enzyme_actions.values_at( *competition_indexes )
177
+ always_cut = all_enzyme_actions
178
+ always_cut.delete_if {|x| sometimes_cut.include? x }
179
+
180
+ [sometimes_cut, always_cut]
181
+ end
182
+
183
+ # Returns an +Array+ of the match indicies of a +RegExp+ to a string.
184
+ #
185
+ # Example:
186
+ #
187
+ # find_match_locations('abccdefeg', /[ce]/) # => [2,3,5,7]
188
+ #
189
+ # ---
190
+ # *Arguments*
191
+ # * +string+: The string to scan
192
+ # * +re+: A RegExp to use
193
+ # *Returns*:: +Array+ with indicies of match locations
194
+ def find_match_locations( string, re )
195
+ md = string.match( re )
196
+ locations = []
197
+ counter = 0
198
+ while md
199
+ # save the match index relative to the original string
200
+ locations << (counter += md.begin(0))
201
+ # find the next match
202
+ md = string[ (counter += 1)..-1 ].match( re )
203
+ end
204
+ locations
205
+ end
206
+
207
+ end # Analysis
208
+ end # RestrictionEnzyme
209
+ end # Bio
@@ -0,0 +1,99 @@
1
+ # bio/util/restriction_enzyme/cut_symbol.rb - Defines the symbol used to mark a cut in an enzyme sequence
2
+
3
+ module Bio
4
+ class RestrictionEnzyme
5
+
6
+ # = Usage
7
+ #
8
+ # #require 'bio/util/restriction_enzyme/cut_symbol'
9
+ # require 'cut_symbol'
10
+ # include Bio::RestrictionEnzyme::CutSymbol
11
+ #
12
+ # cut_symbol # => "^"
13
+ # set_cut_symbol('|') # => "|"
14
+ # cut_symbol # => "|"
15
+ # escaped_cut_symbol # => "\\|"
16
+ # re_cut_symbol # => /\|/
17
+ # set_cut_symbol('^') # => "^"
18
+ # "abc^de" =~ re_cut_symbol # => 3
19
+ # "abc^de" =~ re_cut_symbol_adjacent # => nil
20
+ # "abc^^de" =~ re_cut_symbol_adjacent # => 3
21
+ # "a^bc^^de" =~ re_cut_symbol_adjacent # => 4
22
+ # "a^bc^de" =~ re_cut_symbol_adjacent # => nil
23
+ #
24
+ module CutSymbol
25
+
26
+ # Set the token to be used as the cut symbol in a restriction enzyme sequece
27
+ #
28
+ # Starts as +^+ character
29
+ #
30
+ # ---
31
+ # *Arguments*
32
+ # * +glyph+: The single character to be used as the cut symbol in an enzyme sequence
33
+ # *Returns*:: +glyph+
34
+ def set_cut_symbol(glyph)
35
+ CutSymbol__.cut_symbol = glyph
36
+ end
37
+
38
+ # Get the token that's used as the cut symbol in a restriction enzyme sequece
39
+ #
40
+ # ---
41
+ # *Arguments*
42
+ # * _none_
43
+ # *Returns*:: +glyph+
44
+ def cut_symbol; CutSymbol__.cut_symbol; end
45
+
46
+ # Get the token that's used as the cut symbol in a restriction enzyme sequece with
47
+ # a back-slash preceding it.
48
+ #
49
+ # ---
50
+ # *Arguments*
51
+ # * _none_
52
+ # *Returns*:: +\glyph+
53
+ def escaped_cut_symbol; CutSymbol__.escaped_cut_symbol; end
54
+
55
+ # Used to check if multiple cut symbols are next to each other.
56
+ #
57
+ # ---
58
+ # *Arguments*
59
+ # * _none_
60
+ # *Returns*:: +RegExp+
61
+ def re_cut_symbol_adjacent
62
+ %r"#{escaped_cut_symbol}{2}"
63
+ end
64
+
65
+ # A Regexp of the cut_symbol.
66
+ #
67
+ # ---
68
+ # *Arguments*
69
+ # * _none_
70
+ # *Returns*:: +RegExp+
71
+ def re_cut_symbol
72
+ %r"#{escaped_cut_symbol}"
73
+ end
74
+
75
+ #########
76
+ #protected # NOTE this is a Module, can't hide CutSymbol__
77
+ #########
78
+
79
+ require 'singleton'
80
+
81
+ # Class to keep state
82
+ class CutSymbol__
83
+ include Singleton
84
+
85
+ @cut_symbol = '^'
86
+
87
+ def self.cut_symbol; @cut_symbol; end
88
+
89
+ def self.cut_symbol=(glyph);
90
+ raise ArgumentError if glyph.size != 1
91
+ @cut_symbol = glyph
92
+ end
93
+
94
+ def self.escaped_cut_symbol; "\\" + self.cut_symbol; end
95
+ end
96
+
97
+ end # CutSymbol
98
+ end # RestrictionEnzyme
99
+ end # Bio