bio-restriction_enzyme 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. data/.document +5 -0
  2. data/COPYING.txt +121 -0
  3. data/Gemfile +10 -0
  4. data/LICENSE.txt +7 -0
  5. data/README.rdoc +22 -0
  6. data/Rakefile +53 -0
  7. data/VERSION +1 -0
  8. data/bio-restriction_enzyme.gemspec +99 -0
  9. data/lib/bio-restriction_enzyme.rb +1 -0
  10. data/lib/bio/util/restriction_enzyme.rb +218 -0
  11. data/lib/bio/util/restriction_enzyme/analysis.rb +241 -0
  12. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +209 -0
  13. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +99 -0
  14. data/lib/bio/util/restriction_enzyme/double_stranded.rb +313 -0
  15. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +127 -0
  16. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +95 -0
  17. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +30 -0
  18. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +68 -0
  19. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +99 -0
  20. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +16 -0
  21. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +39 -0
  22. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +59 -0
  23. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +249 -0
  24. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +236 -0
  25. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +43 -0
  26. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +33 -0
  27. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +69 -0
  28. data/lib/bio/util/restriction_enzyme/single_strand.rb +193 -0
  29. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +127 -0
  30. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +15 -0
  31. data/lib/bio/util/restriction_enzyme/string_formatting.rb +103 -0
  32. data/test/bio-restriction_enzyme/analysis/test_calculated_cuts.rb +281 -0
  33. data/test/bio-restriction_enzyme/analysis/test_cut_ranges.rb +87 -0
  34. data/test/bio-restriction_enzyme/analysis/test_sequence_range.rb +223 -0
  35. data/test/bio-restriction_enzyme/double_stranded/test_aligned_strands.rb +84 -0
  36. data/test/bio-restriction_enzyme/double_stranded/test_cut_location_pair.rb +58 -0
  37. data/test/bio-restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +56 -0
  38. data/test/bio-restriction_enzyme/double_stranded/test_cut_locations.rb +35 -0
  39. data/test/bio-restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +87 -0
  40. data/test/bio-restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +66 -0
  41. data/test/bio-restriction_enzyme/test_analysis.rb +228 -0
  42. data/test/bio-restriction_enzyme/test_cut_symbol.rb +27 -0
  43. data/test/bio-restriction_enzyme/test_double_stranded.rb +98 -0
  44. data/test/bio-restriction_enzyme/test_single_strand.rb +131 -0
  45. data/test/bio-restriction_enzyme/test_single_strand_complement.rb +131 -0
  46. data/test/bio-restriction_enzyme/test_string_formatting.rb +43 -0
  47. data/test/helper.rb +17 -0
  48. data/test/test_bio-restriction_enzyme.rb +21 -0
  49. metadata +153 -0
@@ -0,0 +1,241 @@
1
+ # bio/util/restriction_enzyme/analysis.rb - Does the work of fragmenting the DNA from the enzymes
2
+
3
+ require 'bio/util/restriction_enzyme'
4
+ require 'bio/util/restriction_enzyme/analysis_basic'
5
+
6
+ module Bio
7
+ class RestrictionEnzyme
8
+
9
+ class Analysis
10
+
11
+ # See cut instance method
12
+ def self.cut( sequence, *args )
13
+ self.new.cut( sequence, *args )
14
+ end
15
+
16
+ # See main documentation for Bio::RestrictionEnzyme
17
+ #
18
+ #
19
+ # +cut+ takes into account
20
+ # permutations of cut variations based on competitiveness of enzymes for an
21
+ # enzyme cutsite or enzyme bindsite on a sequence.
22
+ #
23
+ # Example:
24
+ #
25
+ # FIXME add output
26
+ #
27
+ # Bio::RestrictionEnzyme::Analysis.cut('gaattc', 'EcoRI')
28
+ #
29
+ # _same as:_
30
+ #
31
+ # Bio::RestrictionEnzyme::Analysis.cut('gaattc', 'g^aattc')
32
+ # ---
33
+ # *Arguments*
34
+ # * +sequence+: +String+ kind of object that will be used as a nucleic acid sequence.
35
+ # * +args+: Series of enzyme names, enzymes sequences with cut marks, or RestrictionEnzyme objects.
36
+ # *Returns*:: Bio::RestrictionEnzyme::Fragments object populated with Bio::RestrictionEnzyme::Fragment objects. (Note: unrelated to Bio::RestrictionEnzyme::Range::SequenceRange::Fragments) or a +Symbol+ containing an error code
37
+ def cut( sequence, *args )
38
+ view_ranges = false
39
+
40
+ args.select { |i| i.class == Hash }.each do |hsh|
41
+ hsh.each do |key, value|
42
+ if key == :view_ranges
43
+ unless ( value.kind_of?(TrueClass) or value.kind_of?(FalseClass) )
44
+ raise ArgumentError, "view_ranges must be set to true or false, currently #{value.inspect}."
45
+ end
46
+ view_ranges = value
47
+ end
48
+ end
49
+ end
50
+
51
+ res = cut_and_return_by_permutations( sequence, *args )
52
+ return res if res.class == Symbol
53
+ # Format the fragments for the user
54
+ fragments_for_display( res, view_ranges )
55
+ end
56
+
57
+ #########
58
+ protected
59
+ #########
60
+
61
+ # See cut instance method
62
+ #
63
+ # ---
64
+ # *Arguments*
65
+ # * +sequence+: +String+ kind of object that will be used as a nucleic acid sequence.
66
+ # * +args+: Series of enzyme names, enzymes sequences with cut marks, or RestrictionEnzyme objects.
67
+ # May also supply a +Hash+ with the key ":max_permutations" to specificy how many permutations are allowed - a value of 0 indicates no permutations are allowed.
68
+ # *Returns*:: +Hash+ Keys are a permutation ID, values are SequenceRange objects that have cuts applied.
69
+ # _also_ may return the +Symbol+ ':sequence_empty', ':no_cuts_found', or ':too_many_permutations'
70
+ def cut_and_return_by_permutations( sequence, *args )
71
+ my_hash = {}
72
+ maximum_permutations = nil
73
+
74
+ hashes_in_args = args.select { |i| i.class == Hash }
75
+ args.delete_if { |i| i.class == Hash }
76
+ hashes_in_args.each do |hsh|
77
+ hsh.each do |key, value|
78
+ case key
79
+ when :max_permutations, 'max_permutations', :maximum_permutations, 'maximum_permutations'
80
+ maximum_permutations = value.to_i unless value == nil
81
+ when :view_ranges
82
+ else
83
+ raise ArgumentError, "Received key #{key.inspect} in argument - I only know the key ':max_permutations' and ':view_ranges' currently. Hash passed: #{hsh.inspect}"
84
+ end
85
+ end
86
+ end
87
+
88
+ if !sequence.kind_of?(String) or sequence.empty?
89
+ logger.warn "The supplied sequence is empty." if defined?(logger)
90
+ return :sequence_empty
91
+ end
92
+ sequence = Bio::Sequence::NA.new( sequence )
93
+
94
+ enzyme_actions, initial_cuts = create_enzyme_actions( sequence, *args )
95
+
96
+ if enzyme_actions.empty? and initial_cuts.empty?
97
+ logger.warn "This enzyme does not make any cuts on this sequence." if defined?(logger)
98
+ return :no_cuts_found
99
+ end
100
+
101
+ # * When enzyme_actions.size is equal to '1' that means there are no permutations.
102
+ # * If enzyme_actions.size is equal to '2' there is one
103
+ # permutation ("[0, 1]")
104
+ # * If enzyme_actions.size is equal to '3' there are two
105
+ # permutations ("[0, 1, 2]")
106
+ # * and so on..
107
+ if maximum_permutations and enzyme_actions.size > 1
108
+ if (enzyme_actions.size - 1) > maximum_permutations.to_i
109
+ logger.warn "More permutations than maximum, skipping. Found: #{enzyme_actions.size-1} Max: #{maximum_permutations.to_i}" if defined?(logger)
110
+ return :too_many_permutations
111
+ end
112
+ end
113
+
114
+ if enzyme_actions.size > 1
115
+ permutations = permute(enzyme_actions.size)
116
+
117
+ permutations.each do |permutation|
118
+ previous_cut_ranges = []
119
+ # Primary and complement strands are both measured from '0' to 'sequence.size-1' here
120
+ sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
121
+
122
+ # Add the cuts to the sequence_range from each enzyme_action contained
123
+ # in initial_cuts. These are the cuts that have no competition so are
124
+ # not subject to permutations.
125
+ initial_cuts.each do |enzyme_action|
126
+ enzyme_action.cut_ranges.each do |cut_range|
127
+ sequence_range.add_cut_range(cut_range)
128
+ end
129
+ end
130
+
131
+ permutation.each do |id|
132
+ enzyme_action = enzyme_actions[id]
133
+
134
+ # conflict is false if the current enzyme action may cut in it's range.
135
+ # conflict is true if it cannot due to a previous enzyme action making
136
+ # a cut where this enzyme action needs a whole recognition site.
137
+ conflict = false
138
+
139
+ # If current size of enzyme_action overlaps with previous cut_range, don't cut
140
+ # note that the enzyme action may fall in the middle of a previous enzyme action
141
+ # so all cut locations must be checked that would fall underneath.
142
+ previous_cut_ranges.each do |cut_range|
143
+ next unless cut_range.class == Bio::RestrictionEnzyme::Range::VerticalCutRange # we aren't concerned with horizontal cuts
144
+ previous_cut_left = cut_range.range.first
145
+ previous_cut_right = cut_range.range.last
146
+
147
+ # Keep in mind:
148
+ # * The cut location is to the immediate right of the base located at the index.
149
+ # ex: at^gc -- the cut location is at index 1
150
+ # * The enzyme action location is located at the base of the index.
151
+ # ex: atgc -- 0 => 'a', 1 => 't', 2 => 'g', 3 => 'c'
152
+ # method create_enzyme_actions has similar commentary if interested
153
+ if (enzyme_action.right <= previous_cut_left) or
154
+ (enzyme_action.left > previous_cut_right) or
155
+ (enzyme_action.left > previous_cut_left and enzyme_action.right <= previous_cut_right) # in between cuts
156
+ # no conflict
157
+ else
158
+ conflict = true
159
+ end
160
+ end
161
+
162
+ next if conflict == true
163
+ enzyme_action.cut_ranges.each { |cut_range| sequence_range.add_cut_range(cut_range) }
164
+ previous_cut_ranges += enzyme_action.cut_ranges
165
+ end # permutation.each
166
+
167
+ # Fill in the source sequence for sequence_range so it knows what bases
168
+ # to use
169
+ sequence_range.fragments.primary = sequence
170
+ sequence_range.fragments.complement = sequence.forward_complement
171
+ my_hash[permutation] = sequence_range
172
+ end # permutations.each
173
+
174
+ else # if enzyme_actions.size == 1
175
+ # no permutations, just do it
176
+ sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
177
+ initial_cuts.each { |enzyme_action| enzyme_action.cut_ranges.each { |cut_range| sequence_range.add_cut_range(cut_range) } }
178
+ sequence_range.fragments.primary = sequence
179
+ sequence_range.fragments.complement = sequence.forward_complement
180
+ my_hash[0] = sequence_range
181
+ end
182
+
183
+ my_hash
184
+ end
185
+
186
+
187
+ # Returns permutation orders for a given number of elements.
188
+ #
189
+ # Examples:
190
+ # permute(0) # => [[0]]
191
+ # permute(1) # => [[0]]
192
+ # permute(2) # => [[1, 0], [0, 1]]
193
+ # permute(3) # => [[2, 1, 0], [2, 0, 1], [1, 2, 0], [0, 2, 1], [1, 0, 2], [0, 1, 2]]
194
+ # permute(4) # => [[3, 2, 1, 0],
195
+ # [3, 2, 0, 1],
196
+ # [3, 1, 2, 0],
197
+ # [3, 0, 2, 1],
198
+ # [3, 1, 0, 2],
199
+ # [3, 0, 1, 2],
200
+ # [2, 3, 1, 0],
201
+ # [2, 3, 0, 1],
202
+ # [1, 3, 2, 0],
203
+ # [0, 3, 2, 1],
204
+ # [1, 3, 0, 2],
205
+ # [0, 3, 1, 2],
206
+ # [2, 1, 3, 0],
207
+ # [2, 0, 3, 1],
208
+ # [1, 2, 3, 0],
209
+ # [0, 2, 3, 1],
210
+ # [1, 0, 3, 2],
211
+ # [0, 1, 3, 2],
212
+ # [2, 1, 0, 3],
213
+ # [2, 0, 1, 3],
214
+ # [1, 2, 0, 3],
215
+ # [0, 2, 1, 3],
216
+ # [1, 0, 2, 3],
217
+ # [0, 1, 2, 3]]
218
+ #
219
+ # ---
220
+ # *Arguments*
221
+ # * +count+: +Number+ of different elements to be permuted
222
+ # * +permutations+: ignore - for the recursive algorithm
223
+ # *Returns*:: +Array+ of +Array+ objects with different possible permutation orders. See examples.
224
+ def permute(count, permutations = [[0]])
225
+ return permutations if count <= 1
226
+ new_arrays = []
227
+ new_array = []
228
+
229
+ (permutations[0].size + 1).times do |n|
230
+ new_array.clear
231
+ permutations.each { |a| new_array << a.dup }
232
+ new_array.each { |e| e.insert(n, permutations[0].size) }
233
+ new_arrays += new_array
234
+ end
235
+
236
+ permute(count-1, new_arrays)
237
+ end
238
+
239
+ end # Analysis
240
+ end # RestrictionEnzyme
241
+ end # Bio
@@ -0,0 +1,209 @@
1
+ # bio/util/restriction_enzyme/analysis_basic.rb - Does the work of fragmenting the DNA from the enzymes
2
+
3
+ require 'set' # for method create_enzyme_actions
4
+ require 'bio/util/restriction_enzyme'
5
+
6
+ module Bio
7
+ class RestrictionEnzyme
8
+
9
+ class Analysis
10
+
11
+ # See cut_without_permutations instance method
12
+ def self.cut_without_permutations( sequence, *args )
13
+ self.new.cut_without_permutations( sequence, *args )
14
+ end
15
+
16
+ # See main documentation for Bio::RestrictionEnzyme
17
+ #
18
+ # Bio::RestrictionEnzyme.cut is preferred over this!
19
+ #
20
+ # USE AT YOUR OWN RISK
21
+ #
22
+ # This is a simpler version of method +cut+. +cut+ takes into account
23
+ # permutations of cut variations based on competitiveness of enzymes for an
24
+ # enzyme cutsite or enzyme bindsite on a sequence. This does not take into
25
+ # account those possibilities and is therefore faster, but less likely to be
26
+ # accurate.
27
+ #
28
+ # This code is mainly included as an academic example
29
+ # without having to wade through the extra layer of complexity added by the
30
+ # permutations.
31
+ #
32
+ # Example:
33
+ #
34
+ # FIXME add output
35
+ #
36
+ # Bio::RestrictionEnzyme::Analysis.cut_without_permutations('gaattc', 'EcoRI')
37
+ #
38
+ # _same as:_
39
+ #
40
+ # Bio::RestrictionEnzyme::Analysis.cut_without_permutations('gaattc', 'g^aattc')
41
+ # ---
42
+ # *Arguments*
43
+ # * +sequence+: +String+ kind of object that will be used as a nucleic acid sequence.
44
+ # * +args+: Series of enzyme names, enzymes sequences with cut marks, or RestrictionEnzyme objects.
45
+ # *Returns*:: Bio::RestrictionEnzyme::Fragments object populated with Bio::RestrictionEnzyme::Fragment objects. (Note: unrelated to Bio::RestrictionEnzyme::Range::SequenceRange::Fragments)
46
+ def cut_without_permutations( sequence, *args )
47
+ return fragments_for_display( {} ) if !sequence.kind_of?(String) or sequence.empty?
48
+ sequence = Bio::Sequence::NA.new( sequence )
49
+
50
+ # create_enzyme_actions returns two seperate array elements, they're not
51
+ # needed separated here so we put them into one array
52
+ enzyme_actions = create_enzyme_actions( sequence, *args ).flatten
53
+ return fragments_for_display( {} ) if enzyme_actions.empty?
54
+
55
+ # Primary and complement strands are both measured from '0' to 'sequence.size-1' here
56
+ sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
57
+
58
+ # Add the cuts to the sequence_range from each enzyme_action
59
+ enzyme_actions.each do |enzyme_action|
60
+ enzyme_action.cut_ranges.each do |cut_range|
61
+ sequence_range.add_cut_range(cut_range)
62
+ end
63
+ end
64
+
65
+ # Fill in the source sequence for sequence_range so it knows what bases
66
+ # to use
67
+ sequence_range.fragments.primary = sequence
68
+ sequence_range.fragments.complement = sequence.forward_complement
69
+
70
+ # Format the fragments for the user
71
+ fragments_for_display( {0 => sequence_range} )
72
+ end
73
+
74
+ #########
75
+ protected
76
+ #########
77
+
78
+ # Take the fragments from SequenceRange objects generated from add_cut_range
79
+ # and return unique results as a Bio::RestrictionEnzyme::Analysis::Fragment object.
80
+ #
81
+ # ---
82
+ # *Arguments*
83
+ # * +hsh+: +Hash+ Keys are a permutation ID, if any. Values are SequenceRange objects that have cuts applied.
84
+ # *Returns*:: Bio::RestrictionEnzyme::Analysis::Fragments object populated with Bio::RestrictionEnzyme::Analysis::Fragment objects.
85
+ def fragments_for_display( hsh, view_ranges=false )
86
+ ary = Fragments.new
87
+ return ary unless hsh
88
+
89
+ hsh.each do |permutation_id, sequence_range|
90
+ sequence_range.fragments.for_display.each do |fragment|
91
+ if view_ranges
92
+ ary << Bio::RestrictionEnzyme::Fragment.new(fragment.primary, fragment.complement, fragment.p_left, fragment.p_right, fragment.c_left, fragment.c_right)
93
+ else
94
+ ary << Bio::RestrictionEnzyme::Fragment.new(fragment.primary, fragment.complement)
95
+ end
96
+ end
97
+ end
98
+
99
+ ary.uniq! unless view_ranges
100
+
101
+ ary
102
+ end
103
+
104
+ # Creates an array of EnzymeActions based on the DNA sequence and supplied enzymes.
105
+ #
106
+ # ---
107
+ # *Arguments*
108
+ # * +sequence+: The string of DNA to match the enzyme recognition sites against
109
+ # * +args+:: The enzymes to use.
110
+ # *Returns*:: +Array+ with the first element being an array of EnzymeAction objects that +sometimes_cut+, and are subject to competition. The second is an array of EnzymeAction objects that +always_cut+ and are not subject to competition.
111
+ def create_enzyme_actions( sequence, *args )
112
+ all_enzyme_actions = []
113
+
114
+ args.each do |enzyme|
115
+ enzyme = Bio::RestrictionEnzyme.new(enzyme) unless enzyme.class == Bio::RestrictionEnzyme::DoubleStranded
116
+
117
+ # make sure pattern is the proper size
118
+ # for more info see the internal documentation of
119
+ # Bio::RestrictionEnzyme::DoubleStranded.create_action_at
120
+ pattern = Bio::Sequence::NA.new(
121
+ Bio::RestrictionEnzyme::DoubleStranded::AlignedStrands.align(
122
+ enzyme.primary, enzyme.complement
123
+ ).primary
124
+ ).to_re
125
+
126
+ find_match_locations( sequence, pattern ).each do |offset|
127
+ all_enzyme_actions << enzyme.create_action_at( offset )
128
+ end
129
+ end
130
+
131
+ # FIXME VerticalCutRange should really be called VerticalAndHorizontalCutRange
132
+
133
+ # * all_enzyme_actions is now full of EnzymeActions at specific locations across
134
+ # the sequence.
135
+ # * all_enzyme_actions will now be examined to see if any EnzymeActions may
136
+ # conflict with one another, and if they do they'll be made note of in
137
+ # indicies_of_sometimes_cut. They will then be remove FIXME
138
+ # * a conflict occurs if another enzyme's bind site is compromised do due
139
+ # to another enzyme's cut. Enzyme's bind sites may overlap and not be
140
+ # competitive, however neither bind site may be part of the other
141
+ # enzyme's cut or else they do become competitive.
142
+ #
143
+ # Take current EnzymeAction's entire bind site and compare it to all other
144
+ # EzymeAction's cut ranges. Only look for vertical cuts as boundaries
145
+ # since trailing horizontal cuts would have no influence on the bind site.
146
+ #
147
+ # If example Enzyme A makes this cut pattern (cut range 2..5):
148
+ #
149
+ # 0 1 2|3 4 5 6 7
150
+ # +-----+
151
+ # 0 1 2 3 4 5|6 7
152
+ #
153
+ # Then the bind site (and EnzymeAction range) for Enzyme B would need it's
154
+ # right side to be at index 2 or less, or it's left side to be 6 or greater.
155
+
156
+ competition_indexes = Set.new
157
+
158
+ all_enzyme_actions[0..-2].each_with_index do |current_enzyme_action, i|
159
+ next if competition_indexes.include? i
160
+ next if current_enzyme_action.cut_ranges.empty? # no cuts, some enzymes are like this (ex. CjuI)
161
+
162
+ all_enzyme_actions[i+1..-1].each_with_index do |comparison_enzyme_action, j|
163
+ j += (i + 1)
164
+ next if competition_indexes.include? j
165
+ next if comparison_enzyme_action.cut_ranges.empty? # no cuts
166
+
167
+ if (current_enzyme_action.right <= comparison_enzyme_action.cut_ranges.min_vertical) or
168
+ (current_enzyme_action.left > comparison_enzyme_action.cut_ranges.max_vertical)
169
+ # no conflict
170
+ else
171
+ competition_indexes += [i, j] # merge both indexes into the flat set
172
+ end
173
+ end
174
+ end
175
+
176
+ sometimes_cut = all_enzyme_actions.values_at( *competition_indexes )
177
+ always_cut = all_enzyme_actions
178
+ always_cut.delete_if {|x| sometimes_cut.include? x }
179
+
180
+ [sometimes_cut, always_cut]
181
+ end
182
+
183
+ # Returns an +Array+ of the match indicies of a +RegExp+ to a string.
184
+ #
185
+ # Example:
186
+ #
187
+ # find_match_locations('abccdefeg', /[ce]/) # => [2,3,5,7]
188
+ #
189
+ # ---
190
+ # *Arguments*
191
+ # * +string+: The string to scan
192
+ # * +re+: A RegExp to use
193
+ # *Returns*:: +Array+ with indicies of match locations
194
+ def find_match_locations( string, re )
195
+ md = string.match( re )
196
+ locations = []
197
+ counter = 0
198
+ while md
199
+ # save the match index relative to the original string
200
+ locations << (counter += md.begin(0))
201
+ # find the next match
202
+ md = string[ (counter += 1)..-1 ].match( re )
203
+ end
204
+ locations
205
+ end
206
+
207
+ end # Analysis
208
+ end # RestrictionEnzyme
209
+ end # Bio
@@ -0,0 +1,99 @@
1
+ # bio/util/restriction_enzyme/cut_symbol.rb - Defines the symbol used to mark a cut in an enzyme sequence
2
+
3
+ module Bio
4
+ class RestrictionEnzyme
5
+
6
+ # = Usage
7
+ #
8
+ # #require 'bio/util/restriction_enzyme/cut_symbol'
9
+ # require 'cut_symbol'
10
+ # include Bio::RestrictionEnzyme::CutSymbol
11
+ #
12
+ # cut_symbol # => "^"
13
+ # set_cut_symbol('|') # => "|"
14
+ # cut_symbol # => "|"
15
+ # escaped_cut_symbol # => "\\|"
16
+ # re_cut_symbol # => /\|/
17
+ # set_cut_symbol('^') # => "^"
18
+ # "abc^de" =~ re_cut_symbol # => 3
19
+ # "abc^de" =~ re_cut_symbol_adjacent # => nil
20
+ # "abc^^de" =~ re_cut_symbol_adjacent # => 3
21
+ # "a^bc^^de" =~ re_cut_symbol_adjacent # => 4
22
+ # "a^bc^de" =~ re_cut_symbol_adjacent # => nil
23
+ #
24
+ module CutSymbol
25
+
26
+ # Set the token to be used as the cut symbol in a restriction enzyme sequece
27
+ #
28
+ # Starts as +^+ character
29
+ #
30
+ # ---
31
+ # *Arguments*
32
+ # * +glyph+: The single character to be used as the cut symbol in an enzyme sequence
33
+ # *Returns*:: +glyph+
34
+ def set_cut_symbol(glyph)
35
+ CutSymbol__.cut_symbol = glyph
36
+ end
37
+
38
+ # Get the token that's used as the cut symbol in a restriction enzyme sequece
39
+ #
40
+ # ---
41
+ # *Arguments*
42
+ # * _none_
43
+ # *Returns*:: +glyph+
44
+ def cut_symbol; CutSymbol__.cut_symbol; end
45
+
46
+ # Get the token that's used as the cut symbol in a restriction enzyme sequece with
47
+ # a back-slash preceding it.
48
+ #
49
+ # ---
50
+ # *Arguments*
51
+ # * _none_
52
+ # *Returns*:: +\glyph+
53
+ def escaped_cut_symbol; CutSymbol__.escaped_cut_symbol; end
54
+
55
+ # Used to check if multiple cut symbols are next to each other.
56
+ #
57
+ # ---
58
+ # *Arguments*
59
+ # * _none_
60
+ # *Returns*:: +RegExp+
61
+ def re_cut_symbol_adjacent
62
+ %r"#{escaped_cut_symbol}{2}"
63
+ end
64
+
65
+ # A Regexp of the cut_symbol.
66
+ #
67
+ # ---
68
+ # *Arguments*
69
+ # * _none_
70
+ # *Returns*:: +RegExp+
71
+ def re_cut_symbol
72
+ %r"#{escaped_cut_symbol}"
73
+ end
74
+
75
+ #########
76
+ #protected # NOTE this is a Module, can't hide CutSymbol__
77
+ #########
78
+
79
+ require 'singleton'
80
+
81
+ # Class to keep state
82
+ class CutSymbol__
83
+ include Singleton
84
+
85
+ @cut_symbol = '^'
86
+
87
+ def self.cut_symbol; @cut_symbol; end
88
+
89
+ def self.cut_symbol=(glyph);
90
+ raise ArgumentError if glyph.size != 1
91
+ @cut_symbol = glyph
92
+ end
93
+
94
+ def self.escaped_cut_symbol; "\\" + self.cut_symbol; end
95
+ end
96
+
97
+ end # CutSymbol
98
+ end # RestrictionEnzyme
99
+ end # Bio