bio-restriction_enzyme 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/COPYING.txt +121 -0
- data/Gemfile +10 -0
- data/LICENSE.txt +7 -0
- data/README.rdoc +22 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/bio-restriction_enzyme.gemspec +99 -0
- data/lib/bio-restriction_enzyme.rb +1 -0
- data/lib/bio/util/restriction_enzyme.rb +218 -0
- data/lib/bio/util/restriction_enzyme/analysis.rb +241 -0
- data/lib/bio/util/restriction_enzyme/analysis_basic.rb +209 -0
- data/lib/bio/util/restriction_enzyme/cut_symbol.rb +99 -0
- data/lib/bio/util/restriction_enzyme/double_stranded.rb +313 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +127 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +95 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +30 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +68 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +99 -0
- data/lib/bio/util/restriction_enzyme/range/cut_range.rb +16 -0
- data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +39 -0
- data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +59 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +249 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +236 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +43 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +33 -0
- data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +69 -0
- data/lib/bio/util/restriction_enzyme/single_strand.rb +193 -0
- data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +127 -0
- data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +15 -0
- data/lib/bio/util/restriction_enzyme/string_formatting.rb +103 -0
- data/test/bio-restriction_enzyme/analysis/test_calculated_cuts.rb +281 -0
- data/test/bio-restriction_enzyme/analysis/test_cut_ranges.rb +87 -0
- data/test/bio-restriction_enzyme/analysis/test_sequence_range.rb +223 -0
- data/test/bio-restriction_enzyme/double_stranded/test_aligned_strands.rb +84 -0
- data/test/bio-restriction_enzyme/double_stranded/test_cut_location_pair.rb +58 -0
- data/test/bio-restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +56 -0
- data/test/bio-restriction_enzyme/double_stranded/test_cut_locations.rb +35 -0
- data/test/bio-restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +87 -0
- data/test/bio-restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +66 -0
- data/test/bio-restriction_enzyme/test_analysis.rb +228 -0
- data/test/bio-restriction_enzyme/test_cut_symbol.rb +27 -0
- data/test/bio-restriction_enzyme/test_double_stranded.rb +98 -0
- data/test/bio-restriction_enzyme/test_single_strand.rb +131 -0
- data/test/bio-restriction_enzyme/test_single_strand_complement.rb +131 -0
- data/test/bio-restriction_enzyme/test_string_formatting.rb +43 -0
- data/test/helper.rb +17 -0
- data/test/test_bio-restriction_enzyme.rb +21 -0
- metadata +153 -0
@@ -0,0 +1,241 @@
|
|
1
|
+
# bio/util/restriction_enzyme/analysis.rb - Does the work of fragmenting the DNA from the enzymes
|
2
|
+
|
3
|
+
require 'bio/util/restriction_enzyme'
|
4
|
+
require 'bio/util/restriction_enzyme/analysis_basic'
|
5
|
+
|
6
|
+
module Bio
|
7
|
+
class RestrictionEnzyme
|
8
|
+
|
9
|
+
class Analysis
|
10
|
+
|
11
|
+
# See cut instance method
|
12
|
+
def self.cut( sequence, *args )
|
13
|
+
self.new.cut( sequence, *args )
|
14
|
+
end
|
15
|
+
|
16
|
+
# See main documentation for Bio::RestrictionEnzyme
|
17
|
+
#
|
18
|
+
#
|
19
|
+
# +cut+ takes into account
|
20
|
+
# permutations of cut variations based on competitiveness of enzymes for an
|
21
|
+
# enzyme cutsite or enzyme bindsite on a sequence.
|
22
|
+
#
|
23
|
+
# Example:
|
24
|
+
#
|
25
|
+
# FIXME add output
|
26
|
+
#
|
27
|
+
# Bio::RestrictionEnzyme::Analysis.cut('gaattc', 'EcoRI')
|
28
|
+
#
|
29
|
+
# _same as:_
|
30
|
+
#
|
31
|
+
# Bio::RestrictionEnzyme::Analysis.cut('gaattc', 'g^aattc')
|
32
|
+
# ---
|
33
|
+
# *Arguments*
|
34
|
+
# * +sequence+: +String+ kind of object that will be used as a nucleic acid sequence.
|
35
|
+
# * +args+: Series of enzyme names, enzymes sequences with cut marks, or RestrictionEnzyme objects.
|
36
|
+
# *Returns*:: Bio::RestrictionEnzyme::Fragments object populated with Bio::RestrictionEnzyme::Fragment objects. (Note: unrelated to Bio::RestrictionEnzyme::Range::SequenceRange::Fragments) or a +Symbol+ containing an error code
|
37
|
+
def cut( sequence, *args )
|
38
|
+
view_ranges = false
|
39
|
+
|
40
|
+
args.select { |i| i.class == Hash }.each do |hsh|
|
41
|
+
hsh.each do |key, value|
|
42
|
+
if key == :view_ranges
|
43
|
+
unless ( value.kind_of?(TrueClass) or value.kind_of?(FalseClass) )
|
44
|
+
raise ArgumentError, "view_ranges must be set to true or false, currently #{value.inspect}."
|
45
|
+
end
|
46
|
+
view_ranges = value
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
res = cut_and_return_by_permutations( sequence, *args )
|
52
|
+
return res if res.class == Symbol
|
53
|
+
# Format the fragments for the user
|
54
|
+
fragments_for_display( res, view_ranges )
|
55
|
+
end
|
56
|
+
|
57
|
+
#########
|
58
|
+
protected
|
59
|
+
#########
|
60
|
+
|
61
|
+
# See cut instance method
|
62
|
+
#
|
63
|
+
# ---
|
64
|
+
# *Arguments*
|
65
|
+
# * +sequence+: +String+ kind of object that will be used as a nucleic acid sequence.
|
66
|
+
# * +args+: Series of enzyme names, enzymes sequences with cut marks, or RestrictionEnzyme objects.
|
67
|
+
# May also supply a +Hash+ with the key ":max_permutations" to specificy how many permutations are allowed - a value of 0 indicates no permutations are allowed.
|
68
|
+
# *Returns*:: +Hash+ Keys are a permutation ID, values are SequenceRange objects that have cuts applied.
|
69
|
+
# _also_ may return the +Symbol+ ':sequence_empty', ':no_cuts_found', or ':too_many_permutations'
|
70
|
+
def cut_and_return_by_permutations( sequence, *args )
|
71
|
+
my_hash = {}
|
72
|
+
maximum_permutations = nil
|
73
|
+
|
74
|
+
hashes_in_args = args.select { |i| i.class == Hash }
|
75
|
+
args.delete_if { |i| i.class == Hash }
|
76
|
+
hashes_in_args.each do |hsh|
|
77
|
+
hsh.each do |key, value|
|
78
|
+
case key
|
79
|
+
when :max_permutations, 'max_permutations', :maximum_permutations, 'maximum_permutations'
|
80
|
+
maximum_permutations = value.to_i unless value == nil
|
81
|
+
when :view_ranges
|
82
|
+
else
|
83
|
+
raise ArgumentError, "Received key #{key.inspect} in argument - I only know the key ':max_permutations' and ':view_ranges' currently. Hash passed: #{hsh.inspect}"
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
if !sequence.kind_of?(String) or sequence.empty?
|
89
|
+
logger.warn "The supplied sequence is empty." if defined?(logger)
|
90
|
+
return :sequence_empty
|
91
|
+
end
|
92
|
+
sequence = Bio::Sequence::NA.new( sequence )
|
93
|
+
|
94
|
+
enzyme_actions, initial_cuts = create_enzyme_actions( sequence, *args )
|
95
|
+
|
96
|
+
if enzyme_actions.empty? and initial_cuts.empty?
|
97
|
+
logger.warn "This enzyme does not make any cuts on this sequence." if defined?(logger)
|
98
|
+
return :no_cuts_found
|
99
|
+
end
|
100
|
+
|
101
|
+
# * When enzyme_actions.size is equal to '1' that means there are no permutations.
|
102
|
+
# * If enzyme_actions.size is equal to '2' there is one
|
103
|
+
# permutation ("[0, 1]")
|
104
|
+
# * If enzyme_actions.size is equal to '3' there are two
|
105
|
+
# permutations ("[0, 1, 2]")
|
106
|
+
# * and so on..
|
107
|
+
if maximum_permutations and enzyme_actions.size > 1
|
108
|
+
if (enzyme_actions.size - 1) > maximum_permutations.to_i
|
109
|
+
logger.warn "More permutations than maximum, skipping. Found: #{enzyme_actions.size-1} Max: #{maximum_permutations.to_i}" if defined?(logger)
|
110
|
+
return :too_many_permutations
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
if enzyme_actions.size > 1
|
115
|
+
permutations = permute(enzyme_actions.size)
|
116
|
+
|
117
|
+
permutations.each do |permutation|
|
118
|
+
previous_cut_ranges = []
|
119
|
+
# Primary and complement strands are both measured from '0' to 'sequence.size-1' here
|
120
|
+
sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
|
121
|
+
|
122
|
+
# Add the cuts to the sequence_range from each enzyme_action contained
|
123
|
+
# in initial_cuts. These are the cuts that have no competition so are
|
124
|
+
# not subject to permutations.
|
125
|
+
initial_cuts.each do |enzyme_action|
|
126
|
+
enzyme_action.cut_ranges.each do |cut_range|
|
127
|
+
sequence_range.add_cut_range(cut_range)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
permutation.each do |id|
|
132
|
+
enzyme_action = enzyme_actions[id]
|
133
|
+
|
134
|
+
# conflict is false if the current enzyme action may cut in it's range.
|
135
|
+
# conflict is true if it cannot due to a previous enzyme action making
|
136
|
+
# a cut where this enzyme action needs a whole recognition site.
|
137
|
+
conflict = false
|
138
|
+
|
139
|
+
# If current size of enzyme_action overlaps with previous cut_range, don't cut
|
140
|
+
# note that the enzyme action may fall in the middle of a previous enzyme action
|
141
|
+
# so all cut locations must be checked that would fall underneath.
|
142
|
+
previous_cut_ranges.each do |cut_range|
|
143
|
+
next unless cut_range.class == Bio::RestrictionEnzyme::Range::VerticalCutRange # we aren't concerned with horizontal cuts
|
144
|
+
previous_cut_left = cut_range.range.first
|
145
|
+
previous_cut_right = cut_range.range.last
|
146
|
+
|
147
|
+
# Keep in mind:
|
148
|
+
# * The cut location is to the immediate right of the base located at the index.
|
149
|
+
# ex: at^gc -- the cut location is at index 1
|
150
|
+
# * The enzyme action location is located at the base of the index.
|
151
|
+
# ex: atgc -- 0 => 'a', 1 => 't', 2 => 'g', 3 => 'c'
|
152
|
+
# method create_enzyme_actions has similar commentary if interested
|
153
|
+
if (enzyme_action.right <= previous_cut_left) or
|
154
|
+
(enzyme_action.left > previous_cut_right) or
|
155
|
+
(enzyme_action.left > previous_cut_left and enzyme_action.right <= previous_cut_right) # in between cuts
|
156
|
+
# no conflict
|
157
|
+
else
|
158
|
+
conflict = true
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
next if conflict == true
|
163
|
+
enzyme_action.cut_ranges.each { |cut_range| sequence_range.add_cut_range(cut_range) }
|
164
|
+
previous_cut_ranges += enzyme_action.cut_ranges
|
165
|
+
end # permutation.each
|
166
|
+
|
167
|
+
# Fill in the source sequence for sequence_range so it knows what bases
|
168
|
+
# to use
|
169
|
+
sequence_range.fragments.primary = sequence
|
170
|
+
sequence_range.fragments.complement = sequence.forward_complement
|
171
|
+
my_hash[permutation] = sequence_range
|
172
|
+
end # permutations.each
|
173
|
+
|
174
|
+
else # if enzyme_actions.size == 1
|
175
|
+
# no permutations, just do it
|
176
|
+
sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
|
177
|
+
initial_cuts.each { |enzyme_action| enzyme_action.cut_ranges.each { |cut_range| sequence_range.add_cut_range(cut_range) } }
|
178
|
+
sequence_range.fragments.primary = sequence
|
179
|
+
sequence_range.fragments.complement = sequence.forward_complement
|
180
|
+
my_hash[0] = sequence_range
|
181
|
+
end
|
182
|
+
|
183
|
+
my_hash
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
# Returns permutation orders for a given number of elements.
|
188
|
+
#
|
189
|
+
# Examples:
|
190
|
+
# permute(0) # => [[0]]
|
191
|
+
# permute(1) # => [[0]]
|
192
|
+
# permute(2) # => [[1, 0], [0, 1]]
|
193
|
+
# permute(3) # => [[2, 1, 0], [2, 0, 1], [1, 2, 0], [0, 2, 1], [1, 0, 2], [0, 1, 2]]
|
194
|
+
# permute(4) # => [[3, 2, 1, 0],
|
195
|
+
# [3, 2, 0, 1],
|
196
|
+
# [3, 1, 2, 0],
|
197
|
+
# [3, 0, 2, 1],
|
198
|
+
# [3, 1, 0, 2],
|
199
|
+
# [3, 0, 1, 2],
|
200
|
+
# [2, 3, 1, 0],
|
201
|
+
# [2, 3, 0, 1],
|
202
|
+
# [1, 3, 2, 0],
|
203
|
+
# [0, 3, 2, 1],
|
204
|
+
# [1, 3, 0, 2],
|
205
|
+
# [0, 3, 1, 2],
|
206
|
+
# [2, 1, 3, 0],
|
207
|
+
# [2, 0, 3, 1],
|
208
|
+
# [1, 2, 3, 0],
|
209
|
+
# [0, 2, 3, 1],
|
210
|
+
# [1, 0, 3, 2],
|
211
|
+
# [0, 1, 3, 2],
|
212
|
+
# [2, 1, 0, 3],
|
213
|
+
# [2, 0, 1, 3],
|
214
|
+
# [1, 2, 0, 3],
|
215
|
+
# [0, 2, 1, 3],
|
216
|
+
# [1, 0, 2, 3],
|
217
|
+
# [0, 1, 2, 3]]
|
218
|
+
#
|
219
|
+
# ---
|
220
|
+
# *Arguments*
|
221
|
+
# * +count+: +Number+ of different elements to be permuted
|
222
|
+
# * +permutations+: ignore - for the recursive algorithm
|
223
|
+
# *Returns*:: +Array+ of +Array+ objects with different possible permutation orders. See examples.
|
224
|
+
def permute(count, permutations = [[0]])
|
225
|
+
return permutations if count <= 1
|
226
|
+
new_arrays = []
|
227
|
+
new_array = []
|
228
|
+
|
229
|
+
(permutations[0].size + 1).times do |n|
|
230
|
+
new_array.clear
|
231
|
+
permutations.each { |a| new_array << a.dup }
|
232
|
+
new_array.each { |e| e.insert(n, permutations[0].size) }
|
233
|
+
new_arrays += new_array
|
234
|
+
end
|
235
|
+
|
236
|
+
permute(count-1, new_arrays)
|
237
|
+
end
|
238
|
+
|
239
|
+
end # Analysis
|
240
|
+
end # RestrictionEnzyme
|
241
|
+
end # Bio
|
@@ -0,0 +1,209 @@
|
|
1
|
+
# bio/util/restriction_enzyme/analysis_basic.rb - Does the work of fragmenting the DNA from the enzymes
|
2
|
+
|
3
|
+
require 'set' # for method create_enzyme_actions
|
4
|
+
require 'bio/util/restriction_enzyme'
|
5
|
+
|
6
|
+
module Bio
|
7
|
+
class RestrictionEnzyme
|
8
|
+
|
9
|
+
class Analysis
|
10
|
+
|
11
|
+
# See cut_without_permutations instance method
|
12
|
+
def self.cut_without_permutations( sequence, *args )
|
13
|
+
self.new.cut_without_permutations( sequence, *args )
|
14
|
+
end
|
15
|
+
|
16
|
+
# See main documentation for Bio::RestrictionEnzyme
|
17
|
+
#
|
18
|
+
# Bio::RestrictionEnzyme.cut is preferred over this!
|
19
|
+
#
|
20
|
+
# USE AT YOUR OWN RISK
|
21
|
+
#
|
22
|
+
# This is a simpler version of method +cut+. +cut+ takes into account
|
23
|
+
# permutations of cut variations based on competitiveness of enzymes for an
|
24
|
+
# enzyme cutsite or enzyme bindsite on a sequence. This does not take into
|
25
|
+
# account those possibilities and is therefore faster, but less likely to be
|
26
|
+
# accurate.
|
27
|
+
#
|
28
|
+
# This code is mainly included as an academic example
|
29
|
+
# without having to wade through the extra layer of complexity added by the
|
30
|
+
# permutations.
|
31
|
+
#
|
32
|
+
# Example:
|
33
|
+
#
|
34
|
+
# FIXME add output
|
35
|
+
#
|
36
|
+
# Bio::RestrictionEnzyme::Analysis.cut_without_permutations('gaattc', 'EcoRI')
|
37
|
+
#
|
38
|
+
# _same as:_
|
39
|
+
#
|
40
|
+
# Bio::RestrictionEnzyme::Analysis.cut_without_permutations('gaattc', 'g^aattc')
|
41
|
+
# ---
|
42
|
+
# *Arguments*
|
43
|
+
# * +sequence+: +String+ kind of object that will be used as a nucleic acid sequence.
|
44
|
+
# * +args+: Series of enzyme names, enzymes sequences with cut marks, or RestrictionEnzyme objects.
|
45
|
+
# *Returns*:: Bio::RestrictionEnzyme::Fragments object populated with Bio::RestrictionEnzyme::Fragment objects. (Note: unrelated to Bio::RestrictionEnzyme::Range::SequenceRange::Fragments)
|
46
|
+
def cut_without_permutations( sequence, *args )
|
47
|
+
return fragments_for_display( {} ) if !sequence.kind_of?(String) or sequence.empty?
|
48
|
+
sequence = Bio::Sequence::NA.new( sequence )
|
49
|
+
|
50
|
+
# create_enzyme_actions returns two seperate array elements, they're not
|
51
|
+
# needed separated here so we put them into one array
|
52
|
+
enzyme_actions = create_enzyme_actions( sequence, *args ).flatten
|
53
|
+
return fragments_for_display( {} ) if enzyme_actions.empty?
|
54
|
+
|
55
|
+
# Primary and complement strands are both measured from '0' to 'sequence.size-1' here
|
56
|
+
sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
|
57
|
+
|
58
|
+
# Add the cuts to the sequence_range from each enzyme_action
|
59
|
+
enzyme_actions.each do |enzyme_action|
|
60
|
+
enzyme_action.cut_ranges.each do |cut_range|
|
61
|
+
sequence_range.add_cut_range(cut_range)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Fill in the source sequence for sequence_range so it knows what bases
|
66
|
+
# to use
|
67
|
+
sequence_range.fragments.primary = sequence
|
68
|
+
sequence_range.fragments.complement = sequence.forward_complement
|
69
|
+
|
70
|
+
# Format the fragments for the user
|
71
|
+
fragments_for_display( {0 => sequence_range} )
|
72
|
+
end
|
73
|
+
|
74
|
+
#########
|
75
|
+
protected
|
76
|
+
#########
|
77
|
+
|
78
|
+
# Take the fragments from SequenceRange objects generated from add_cut_range
|
79
|
+
# and return unique results as a Bio::RestrictionEnzyme::Analysis::Fragment object.
|
80
|
+
#
|
81
|
+
# ---
|
82
|
+
# *Arguments*
|
83
|
+
# * +hsh+: +Hash+ Keys are a permutation ID, if any. Values are SequenceRange objects that have cuts applied.
|
84
|
+
# *Returns*:: Bio::RestrictionEnzyme::Analysis::Fragments object populated with Bio::RestrictionEnzyme::Analysis::Fragment objects.
|
85
|
+
def fragments_for_display( hsh, view_ranges=false )
|
86
|
+
ary = Fragments.new
|
87
|
+
return ary unless hsh
|
88
|
+
|
89
|
+
hsh.each do |permutation_id, sequence_range|
|
90
|
+
sequence_range.fragments.for_display.each do |fragment|
|
91
|
+
if view_ranges
|
92
|
+
ary << Bio::RestrictionEnzyme::Fragment.new(fragment.primary, fragment.complement, fragment.p_left, fragment.p_right, fragment.c_left, fragment.c_right)
|
93
|
+
else
|
94
|
+
ary << Bio::RestrictionEnzyme::Fragment.new(fragment.primary, fragment.complement)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
ary.uniq! unless view_ranges
|
100
|
+
|
101
|
+
ary
|
102
|
+
end
|
103
|
+
|
104
|
+
# Creates an array of EnzymeActions based on the DNA sequence and supplied enzymes.
|
105
|
+
#
|
106
|
+
# ---
|
107
|
+
# *Arguments*
|
108
|
+
# * +sequence+: The string of DNA to match the enzyme recognition sites against
|
109
|
+
# * +args+:: The enzymes to use.
|
110
|
+
# *Returns*:: +Array+ with the first element being an array of EnzymeAction objects that +sometimes_cut+, and are subject to competition. The second is an array of EnzymeAction objects that +always_cut+ and are not subject to competition.
|
111
|
+
def create_enzyme_actions( sequence, *args )
|
112
|
+
all_enzyme_actions = []
|
113
|
+
|
114
|
+
args.each do |enzyme|
|
115
|
+
enzyme = Bio::RestrictionEnzyme.new(enzyme) unless enzyme.class == Bio::RestrictionEnzyme::DoubleStranded
|
116
|
+
|
117
|
+
# make sure pattern is the proper size
|
118
|
+
# for more info see the internal documentation of
|
119
|
+
# Bio::RestrictionEnzyme::DoubleStranded.create_action_at
|
120
|
+
pattern = Bio::Sequence::NA.new(
|
121
|
+
Bio::RestrictionEnzyme::DoubleStranded::AlignedStrands.align(
|
122
|
+
enzyme.primary, enzyme.complement
|
123
|
+
).primary
|
124
|
+
).to_re
|
125
|
+
|
126
|
+
find_match_locations( sequence, pattern ).each do |offset|
|
127
|
+
all_enzyme_actions << enzyme.create_action_at( offset )
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
# FIXME VerticalCutRange should really be called VerticalAndHorizontalCutRange
|
132
|
+
|
133
|
+
# * all_enzyme_actions is now full of EnzymeActions at specific locations across
|
134
|
+
# the sequence.
|
135
|
+
# * all_enzyme_actions will now be examined to see if any EnzymeActions may
|
136
|
+
# conflict with one another, and if they do they'll be made note of in
|
137
|
+
# indicies_of_sometimes_cut. They will then be remove FIXME
|
138
|
+
# * a conflict occurs if another enzyme's bind site is compromised do due
|
139
|
+
# to another enzyme's cut. Enzyme's bind sites may overlap and not be
|
140
|
+
# competitive, however neither bind site may be part of the other
|
141
|
+
# enzyme's cut or else they do become competitive.
|
142
|
+
#
|
143
|
+
# Take current EnzymeAction's entire bind site and compare it to all other
|
144
|
+
# EzymeAction's cut ranges. Only look for vertical cuts as boundaries
|
145
|
+
# since trailing horizontal cuts would have no influence on the bind site.
|
146
|
+
#
|
147
|
+
# If example Enzyme A makes this cut pattern (cut range 2..5):
|
148
|
+
#
|
149
|
+
# 0 1 2|3 4 5 6 7
|
150
|
+
# +-----+
|
151
|
+
# 0 1 2 3 4 5|6 7
|
152
|
+
#
|
153
|
+
# Then the bind site (and EnzymeAction range) for Enzyme B would need it's
|
154
|
+
# right side to be at index 2 or less, or it's left side to be 6 or greater.
|
155
|
+
|
156
|
+
competition_indexes = Set.new
|
157
|
+
|
158
|
+
all_enzyme_actions[0..-2].each_with_index do |current_enzyme_action, i|
|
159
|
+
next if competition_indexes.include? i
|
160
|
+
next if current_enzyme_action.cut_ranges.empty? # no cuts, some enzymes are like this (ex. CjuI)
|
161
|
+
|
162
|
+
all_enzyme_actions[i+1..-1].each_with_index do |comparison_enzyme_action, j|
|
163
|
+
j += (i + 1)
|
164
|
+
next if competition_indexes.include? j
|
165
|
+
next if comparison_enzyme_action.cut_ranges.empty? # no cuts
|
166
|
+
|
167
|
+
if (current_enzyme_action.right <= comparison_enzyme_action.cut_ranges.min_vertical) or
|
168
|
+
(current_enzyme_action.left > comparison_enzyme_action.cut_ranges.max_vertical)
|
169
|
+
# no conflict
|
170
|
+
else
|
171
|
+
competition_indexes += [i, j] # merge both indexes into the flat set
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
sometimes_cut = all_enzyme_actions.values_at( *competition_indexes )
|
177
|
+
always_cut = all_enzyme_actions
|
178
|
+
always_cut.delete_if {|x| sometimes_cut.include? x }
|
179
|
+
|
180
|
+
[sometimes_cut, always_cut]
|
181
|
+
end
|
182
|
+
|
183
|
+
# Returns an +Array+ of the match indicies of a +RegExp+ to a string.
|
184
|
+
#
|
185
|
+
# Example:
|
186
|
+
#
|
187
|
+
# find_match_locations('abccdefeg', /[ce]/) # => [2,3,5,7]
|
188
|
+
#
|
189
|
+
# ---
|
190
|
+
# *Arguments*
|
191
|
+
# * +string+: The string to scan
|
192
|
+
# * +re+: A RegExp to use
|
193
|
+
# *Returns*:: +Array+ with indicies of match locations
|
194
|
+
def find_match_locations( string, re )
|
195
|
+
md = string.match( re )
|
196
|
+
locations = []
|
197
|
+
counter = 0
|
198
|
+
while md
|
199
|
+
# save the match index relative to the original string
|
200
|
+
locations << (counter += md.begin(0))
|
201
|
+
# find the next match
|
202
|
+
md = string[ (counter += 1)..-1 ].match( re )
|
203
|
+
end
|
204
|
+
locations
|
205
|
+
end
|
206
|
+
|
207
|
+
end # Analysis
|
208
|
+
end # RestrictionEnzyme
|
209
|
+
end # Bio
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# bio/util/restriction_enzyme/cut_symbol.rb - Defines the symbol used to mark a cut in an enzyme sequence
|
2
|
+
|
3
|
+
module Bio
|
4
|
+
class RestrictionEnzyme
|
5
|
+
|
6
|
+
# = Usage
|
7
|
+
#
|
8
|
+
# #require 'bio/util/restriction_enzyme/cut_symbol'
|
9
|
+
# require 'cut_symbol'
|
10
|
+
# include Bio::RestrictionEnzyme::CutSymbol
|
11
|
+
#
|
12
|
+
# cut_symbol # => "^"
|
13
|
+
# set_cut_symbol('|') # => "|"
|
14
|
+
# cut_symbol # => "|"
|
15
|
+
# escaped_cut_symbol # => "\\|"
|
16
|
+
# re_cut_symbol # => /\|/
|
17
|
+
# set_cut_symbol('^') # => "^"
|
18
|
+
# "abc^de" =~ re_cut_symbol # => 3
|
19
|
+
# "abc^de" =~ re_cut_symbol_adjacent # => nil
|
20
|
+
# "abc^^de" =~ re_cut_symbol_adjacent # => 3
|
21
|
+
# "a^bc^^de" =~ re_cut_symbol_adjacent # => 4
|
22
|
+
# "a^bc^de" =~ re_cut_symbol_adjacent # => nil
|
23
|
+
#
|
24
|
+
module CutSymbol
|
25
|
+
|
26
|
+
# Set the token to be used as the cut symbol in a restriction enzyme sequece
|
27
|
+
#
|
28
|
+
# Starts as +^+ character
|
29
|
+
#
|
30
|
+
# ---
|
31
|
+
# *Arguments*
|
32
|
+
# * +glyph+: The single character to be used as the cut symbol in an enzyme sequence
|
33
|
+
# *Returns*:: +glyph+
|
34
|
+
def set_cut_symbol(glyph)
|
35
|
+
CutSymbol__.cut_symbol = glyph
|
36
|
+
end
|
37
|
+
|
38
|
+
# Get the token that's used as the cut symbol in a restriction enzyme sequece
|
39
|
+
#
|
40
|
+
# ---
|
41
|
+
# *Arguments*
|
42
|
+
# * _none_
|
43
|
+
# *Returns*:: +glyph+
|
44
|
+
def cut_symbol; CutSymbol__.cut_symbol; end
|
45
|
+
|
46
|
+
# Get the token that's used as the cut symbol in a restriction enzyme sequece with
|
47
|
+
# a back-slash preceding it.
|
48
|
+
#
|
49
|
+
# ---
|
50
|
+
# *Arguments*
|
51
|
+
# * _none_
|
52
|
+
# *Returns*:: +\glyph+
|
53
|
+
def escaped_cut_symbol; CutSymbol__.escaped_cut_symbol; end
|
54
|
+
|
55
|
+
# Used to check if multiple cut symbols are next to each other.
|
56
|
+
#
|
57
|
+
# ---
|
58
|
+
# *Arguments*
|
59
|
+
# * _none_
|
60
|
+
# *Returns*:: +RegExp+
|
61
|
+
def re_cut_symbol_adjacent
|
62
|
+
%r"#{escaped_cut_symbol}{2}"
|
63
|
+
end
|
64
|
+
|
65
|
+
# A Regexp of the cut_symbol.
|
66
|
+
#
|
67
|
+
# ---
|
68
|
+
# *Arguments*
|
69
|
+
# * _none_
|
70
|
+
# *Returns*:: +RegExp+
|
71
|
+
def re_cut_symbol
|
72
|
+
%r"#{escaped_cut_symbol}"
|
73
|
+
end
|
74
|
+
|
75
|
+
#########
|
76
|
+
#protected # NOTE this is a Module, can't hide CutSymbol__
|
77
|
+
#########
|
78
|
+
|
79
|
+
require 'singleton'
|
80
|
+
|
81
|
+
# Class to keep state
|
82
|
+
class CutSymbol__
|
83
|
+
include Singleton
|
84
|
+
|
85
|
+
@cut_symbol = '^'
|
86
|
+
|
87
|
+
def self.cut_symbol; @cut_symbol; end
|
88
|
+
|
89
|
+
def self.cut_symbol=(glyph);
|
90
|
+
raise ArgumentError if glyph.size != 1
|
91
|
+
@cut_symbol = glyph
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.escaped_cut_symbol; "\\" + self.cut_symbol; end
|
95
|
+
end
|
96
|
+
|
97
|
+
end # CutSymbol
|
98
|
+
end # RestrictionEnzyme
|
99
|
+
end # Bio
|