bio-restriction_enzyme 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/COPYING.txt +121 -0
- data/Gemfile +10 -0
- data/LICENSE.txt +7 -0
- data/README.rdoc +22 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/bio-restriction_enzyme.gemspec +99 -0
- data/lib/bio-restriction_enzyme.rb +1 -0
- data/lib/bio/util/restriction_enzyme.rb +218 -0
- data/lib/bio/util/restriction_enzyme/analysis.rb +241 -0
- data/lib/bio/util/restriction_enzyme/analysis_basic.rb +209 -0
- data/lib/bio/util/restriction_enzyme/cut_symbol.rb +99 -0
- data/lib/bio/util/restriction_enzyme/double_stranded.rb +313 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +127 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +95 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +30 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +68 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +99 -0
- data/lib/bio/util/restriction_enzyme/range/cut_range.rb +16 -0
- data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +39 -0
- data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +59 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +249 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +236 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +43 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +33 -0
- data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +69 -0
- data/lib/bio/util/restriction_enzyme/single_strand.rb +193 -0
- data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +127 -0
- data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +15 -0
- data/lib/bio/util/restriction_enzyme/string_formatting.rb +103 -0
- data/test/bio-restriction_enzyme/analysis/test_calculated_cuts.rb +281 -0
- data/test/bio-restriction_enzyme/analysis/test_cut_ranges.rb +87 -0
- data/test/bio-restriction_enzyme/analysis/test_sequence_range.rb +223 -0
- data/test/bio-restriction_enzyme/double_stranded/test_aligned_strands.rb +84 -0
- data/test/bio-restriction_enzyme/double_stranded/test_cut_location_pair.rb +58 -0
- data/test/bio-restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +56 -0
- data/test/bio-restriction_enzyme/double_stranded/test_cut_locations.rb +35 -0
- data/test/bio-restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +87 -0
- data/test/bio-restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +66 -0
- data/test/bio-restriction_enzyme/test_analysis.rb +228 -0
- data/test/bio-restriction_enzyme/test_cut_symbol.rb +27 -0
- data/test/bio-restriction_enzyme/test_double_stranded.rb +98 -0
- data/test/bio-restriction_enzyme/test_single_strand.rb +131 -0
- data/test/bio-restriction_enzyme/test_single_strand_complement.rb +131 -0
- data/test/bio-restriction_enzyme/test_string_formatting.rb +43 -0
- data/test/helper.rb +17 -0
- data/test/test_bio-restriction_enzyme.rb +21 -0
- metadata +153 -0
@@ -0,0 +1,241 @@
|
|
1
|
+
# bio/util/restriction_enzyme/analysis.rb - Does the work of fragmenting the DNA from the enzymes
|
2
|
+
|
3
|
+
require 'bio/util/restriction_enzyme'
|
4
|
+
require 'bio/util/restriction_enzyme/analysis_basic'
|
5
|
+
|
6
|
+
module Bio
|
7
|
+
class RestrictionEnzyme
|
8
|
+
|
9
|
+
class Analysis
|
10
|
+
|
11
|
+
# See cut instance method
|
12
|
+
def self.cut( sequence, *args )
|
13
|
+
self.new.cut( sequence, *args )
|
14
|
+
end
|
15
|
+
|
16
|
+
# See main documentation for Bio::RestrictionEnzyme
|
17
|
+
#
|
18
|
+
#
|
19
|
+
# +cut+ takes into account
|
20
|
+
# permutations of cut variations based on competitiveness of enzymes for an
|
21
|
+
# enzyme cutsite or enzyme bindsite on a sequence.
|
22
|
+
#
|
23
|
+
# Example:
|
24
|
+
#
|
25
|
+
# FIXME add output
|
26
|
+
#
|
27
|
+
# Bio::RestrictionEnzyme::Analysis.cut('gaattc', 'EcoRI')
|
28
|
+
#
|
29
|
+
# _same as:_
|
30
|
+
#
|
31
|
+
# Bio::RestrictionEnzyme::Analysis.cut('gaattc', 'g^aattc')
|
32
|
+
# ---
|
33
|
+
# *Arguments*
|
34
|
+
# * +sequence+: +String+ kind of object that will be used as a nucleic acid sequence.
|
35
|
+
# * +args+: Series of enzyme names, enzymes sequences with cut marks, or RestrictionEnzyme objects.
|
36
|
+
# *Returns*:: Bio::RestrictionEnzyme::Fragments object populated with Bio::RestrictionEnzyme::Fragment objects. (Note: unrelated to Bio::RestrictionEnzyme::Range::SequenceRange::Fragments) or a +Symbol+ containing an error code
|
37
|
+
def cut( sequence, *args )
|
38
|
+
view_ranges = false
|
39
|
+
|
40
|
+
args.select { |i| i.class == Hash }.each do |hsh|
|
41
|
+
hsh.each do |key, value|
|
42
|
+
if key == :view_ranges
|
43
|
+
unless ( value.kind_of?(TrueClass) or value.kind_of?(FalseClass) )
|
44
|
+
raise ArgumentError, "view_ranges must be set to true or false, currently #{value.inspect}."
|
45
|
+
end
|
46
|
+
view_ranges = value
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
res = cut_and_return_by_permutations( sequence, *args )
|
52
|
+
return res if res.class == Symbol
|
53
|
+
# Format the fragments for the user
|
54
|
+
fragments_for_display( res, view_ranges )
|
55
|
+
end
|
56
|
+
|
57
|
+
#########
|
58
|
+
protected
|
59
|
+
#########
|
60
|
+
|
61
|
+
# See cut instance method
|
62
|
+
#
|
63
|
+
# ---
|
64
|
+
# *Arguments*
|
65
|
+
# * +sequence+: +String+ kind of object that will be used as a nucleic acid sequence.
|
66
|
+
# * +args+: Series of enzyme names, enzymes sequences with cut marks, or RestrictionEnzyme objects.
|
67
|
+
# May also supply a +Hash+ with the key ":max_permutations" to specificy how many permutations are allowed - a value of 0 indicates no permutations are allowed.
|
68
|
+
# *Returns*:: +Hash+ Keys are a permutation ID, values are SequenceRange objects that have cuts applied.
|
69
|
+
# _also_ may return the +Symbol+ ':sequence_empty', ':no_cuts_found', or ':too_many_permutations'
|
70
|
+
def cut_and_return_by_permutations( sequence, *args )
|
71
|
+
my_hash = {}
|
72
|
+
maximum_permutations = nil
|
73
|
+
|
74
|
+
hashes_in_args = args.select { |i| i.class == Hash }
|
75
|
+
args.delete_if { |i| i.class == Hash }
|
76
|
+
hashes_in_args.each do |hsh|
|
77
|
+
hsh.each do |key, value|
|
78
|
+
case key
|
79
|
+
when :max_permutations, 'max_permutations', :maximum_permutations, 'maximum_permutations'
|
80
|
+
maximum_permutations = value.to_i unless value == nil
|
81
|
+
when :view_ranges
|
82
|
+
else
|
83
|
+
raise ArgumentError, "Received key #{key.inspect} in argument - I only know the key ':max_permutations' and ':view_ranges' currently. Hash passed: #{hsh.inspect}"
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
if !sequence.kind_of?(String) or sequence.empty?
|
89
|
+
logger.warn "The supplied sequence is empty." if defined?(logger)
|
90
|
+
return :sequence_empty
|
91
|
+
end
|
92
|
+
sequence = Bio::Sequence::NA.new( sequence )
|
93
|
+
|
94
|
+
enzyme_actions, initial_cuts = create_enzyme_actions( sequence, *args )
|
95
|
+
|
96
|
+
if enzyme_actions.empty? and initial_cuts.empty?
|
97
|
+
logger.warn "This enzyme does not make any cuts on this sequence." if defined?(logger)
|
98
|
+
return :no_cuts_found
|
99
|
+
end
|
100
|
+
|
101
|
+
# * When enzyme_actions.size is equal to '1' that means there are no permutations.
|
102
|
+
# * If enzyme_actions.size is equal to '2' there is one
|
103
|
+
# permutation ("[0, 1]")
|
104
|
+
# * If enzyme_actions.size is equal to '3' there are two
|
105
|
+
# permutations ("[0, 1, 2]")
|
106
|
+
# * and so on..
|
107
|
+
if maximum_permutations and enzyme_actions.size > 1
|
108
|
+
if (enzyme_actions.size - 1) > maximum_permutations.to_i
|
109
|
+
logger.warn "More permutations than maximum, skipping. Found: #{enzyme_actions.size-1} Max: #{maximum_permutations.to_i}" if defined?(logger)
|
110
|
+
return :too_many_permutations
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
if enzyme_actions.size > 1
|
115
|
+
permutations = permute(enzyme_actions.size)
|
116
|
+
|
117
|
+
permutations.each do |permutation|
|
118
|
+
previous_cut_ranges = []
|
119
|
+
# Primary and complement strands are both measured from '0' to 'sequence.size-1' here
|
120
|
+
sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
|
121
|
+
|
122
|
+
# Add the cuts to the sequence_range from each enzyme_action contained
|
123
|
+
# in initial_cuts. These are the cuts that have no competition so are
|
124
|
+
# not subject to permutations.
|
125
|
+
initial_cuts.each do |enzyme_action|
|
126
|
+
enzyme_action.cut_ranges.each do |cut_range|
|
127
|
+
sequence_range.add_cut_range(cut_range)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
permutation.each do |id|
|
132
|
+
enzyme_action = enzyme_actions[id]
|
133
|
+
|
134
|
+
# conflict is false if the current enzyme action may cut in it's range.
|
135
|
+
# conflict is true if it cannot due to a previous enzyme action making
|
136
|
+
# a cut where this enzyme action needs a whole recognition site.
|
137
|
+
conflict = false
|
138
|
+
|
139
|
+
# If current size of enzyme_action overlaps with previous cut_range, don't cut
|
140
|
+
# note that the enzyme action may fall in the middle of a previous enzyme action
|
141
|
+
# so all cut locations must be checked that would fall underneath.
|
142
|
+
previous_cut_ranges.each do |cut_range|
|
143
|
+
next unless cut_range.class == Bio::RestrictionEnzyme::Range::VerticalCutRange # we aren't concerned with horizontal cuts
|
144
|
+
previous_cut_left = cut_range.range.first
|
145
|
+
previous_cut_right = cut_range.range.last
|
146
|
+
|
147
|
+
# Keep in mind:
|
148
|
+
# * The cut location is to the immediate right of the base located at the index.
|
149
|
+
# ex: at^gc -- the cut location is at index 1
|
150
|
+
# * The enzyme action location is located at the base of the index.
|
151
|
+
# ex: atgc -- 0 => 'a', 1 => 't', 2 => 'g', 3 => 'c'
|
152
|
+
# method create_enzyme_actions has similar commentary if interested
|
153
|
+
if (enzyme_action.right <= previous_cut_left) or
|
154
|
+
(enzyme_action.left > previous_cut_right) or
|
155
|
+
(enzyme_action.left > previous_cut_left and enzyme_action.right <= previous_cut_right) # in between cuts
|
156
|
+
# no conflict
|
157
|
+
else
|
158
|
+
conflict = true
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
next if conflict == true
|
163
|
+
enzyme_action.cut_ranges.each { |cut_range| sequence_range.add_cut_range(cut_range) }
|
164
|
+
previous_cut_ranges += enzyme_action.cut_ranges
|
165
|
+
end # permutation.each
|
166
|
+
|
167
|
+
# Fill in the source sequence for sequence_range so it knows what bases
|
168
|
+
# to use
|
169
|
+
sequence_range.fragments.primary = sequence
|
170
|
+
sequence_range.fragments.complement = sequence.forward_complement
|
171
|
+
my_hash[permutation] = sequence_range
|
172
|
+
end # permutations.each
|
173
|
+
|
174
|
+
else # if enzyme_actions.size == 1
|
175
|
+
# no permutations, just do it
|
176
|
+
sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
|
177
|
+
initial_cuts.each { |enzyme_action| enzyme_action.cut_ranges.each { |cut_range| sequence_range.add_cut_range(cut_range) } }
|
178
|
+
sequence_range.fragments.primary = sequence
|
179
|
+
sequence_range.fragments.complement = sequence.forward_complement
|
180
|
+
my_hash[0] = sequence_range
|
181
|
+
end
|
182
|
+
|
183
|
+
my_hash
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
# Returns permutation orders for a given number of elements.
|
188
|
+
#
|
189
|
+
# Examples:
|
190
|
+
# permute(0) # => [[0]]
|
191
|
+
# permute(1) # => [[0]]
|
192
|
+
# permute(2) # => [[1, 0], [0, 1]]
|
193
|
+
# permute(3) # => [[2, 1, 0], [2, 0, 1], [1, 2, 0], [0, 2, 1], [1, 0, 2], [0, 1, 2]]
|
194
|
+
# permute(4) # => [[3, 2, 1, 0],
|
195
|
+
# [3, 2, 0, 1],
|
196
|
+
# [3, 1, 2, 0],
|
197
|
+
# [3, 0, 2, 1],
|
198
|
+
# [3, 1, 0, 2],
|
199
|
+
# [3, 0, 1, 2],
|
200
|
+
# [2, 3, 1, 0],
|
201
|
+
# [2, 3, 0, 1],
|
202
|
+
# [1, 3, 2, 0],
|
203
|
+
# [0, 3, 2, 1],
|
204
|
+
# [1, 3, 0, 2],
|
205
|
+
# [0, 3, 1, 2],
|
206
|
+
# [2, 1, 3, 0],
|
207
|
+
# [2, 0, 3, 1],
|
208
|
+
# [1, 2, 3, 0],
|
209
|
+
# [0, 2, 3, 1],
|
210
|
+
# [1, 0, 3, 2],
|
211
|
+
# [0, 1, 3, 2],
|
212
|
+
# [2, 1, 0, 3],
|
213
|
+
# [2, 0, 1, 3],
|
214
|
+
# [1, 2, 0, 3],
|
215
|
+
# [0, 2, 1, 3],
|
216
|
+
# [1, 0, 2, 3],
|
217
|
+
# [0, 1, 2, 3]]
|
218
|
+
#
|
219
|
+
# ---
|
220
|
+
# *Arguments*
|
221
|
+
# * +count+: +Number+ of different elements to be permuted
|
222
|
+
# * +permutations+: ignore - for the recursive algorithm
|
223
|
+
# *Returns*:: +Array+ of +Array+ objects with different possible permutation orders. See examples.
|
224
|
+
def permute(count, permutations = [[0]])
|
225
|
+
return permutations if count <= 1
|
226
|
+
new_arrays = []
|
227
|
+
new_array = []
|
228
|
+
|
229
|
+
(permutations[0].size + 1).times do |n|
|
230
|
+
new_array.clear
|
231
|
+
permutations.each { |a| new_array << a.dup }
|
232
|
+
new_array.each { |e| e.insert(n, permutations[0].size) }
|
233
|
+
new_arrays += new_array
|
234
|
+
end
|
235
|
+
|
236
|
+
permute(count-1, new_arrays)
|
237
|
+
end
|
238
|
+
|
239
|
+
end # Analysis
|
240
|
+
end # RestrictionEnzyme
|
241
|
+
end # Bio
|
@@ -0,0 +1,209 @@
|
|
1
|
+
# bio/util/restriction_enzyme/analysis_basic.rb - Does the work of fragmenting the DNA from the enzymes
|
2
|
+
|
3
|
+
require 'set' # for method create_enzyme_actions
|
4
|
+
require 'bio/util/restriction_enzyme'
|
5
|
+
|
6
|
+
module Bio
|
7
|
+
class RestrictionEnzyme
|
8
|
+
|
9
|
+
class Analysis
|
10
|
+
|
11
|
+
# See cut_without_permutations instance method
|
12
|
+
def self.cut_without_permutations( sequence, *args )
|
13
|
+
self.new.cut_without_permutations( sequence, *args )
|
14
|
+
end
|
15
|
+
|
16
|
+
# See main documentation for Bio::RestrictionEnzyme
|
17
|
+
#
|
18
|
+
# Bio::RestrictionEnzyme.cut is preferred over this!
|
19
|
+
#
|
20
|
+
# USE AT YOUR OWN RISK
|
21
|
+
#
|
22
|
+
# This is a simpler version of method +cut+. +cut+ takes into account
|
23
|
+
# permutations of cut variations based on competitiveness of enzymes for an
|
24
|
+
# enzyme cutsite or enzyme bindsite on a sequence. This does not take into
|
25
|
+
# account those possibilities and is therefore faster, but less likely to be
|
26
|
+
# accurate.
|
27
|
+
#
|
28
|
+
# This code is mainly included as an academic example
|
29
|
+
# without having to wade through the extra layer of complexity added by the
|
30
|
+
# permutations.
|
31
|
+
#
|
32
|
+
# Example:
|
33
|
+
#
|
34
|
+
# FIXME add output
|
35
|
+
#
|
36
|
+
# Bio::RestrictionEnzyme::Analysis.cut_without_permutations('gaattc', 'EcoRI')
|
37
|
+
#
|
38
|
+
# _same as:_
|
39
|
+
#
|
40
|
+
# Bio::RestrictionEnzyme::Analysis.cut_without_permutations('gaattc', 'g^aattc')
|
41
|
+
# ---
|
42
|
+
# *Arguments*
|
43
|
+
# * +sequence+: +String+ kind of object that will be used as a nucleic acid sequence.
|
44
|
+
# * +args+: Series of enzyme names, enzymes sequences with cut marks, or RestrictionEnzyme objects.
|
45
|
+
# *Returns*:: Bio::RestrictionEnzyme::Fragments object populated with Bio::RestrictionEnzyme::Fragment objects. (Note: unrelated to Bio::RestrictionEnzyme::Range::SequenceRange::Fragments)
|
46
|
+
def cut_without_permutations( sequence, *args )
|
47
|
+
return fragments_for_display( {} ) if !sequence.kind_of?(String) or sequence.empty?
|
48
|
+
sequence = Bio::Sequence::NA.new( sequence )
|
49
|
+
|
50
|
+
# create_enzyme_actions returns two seperate array elements, they're not
|
51
|
+
# needed separated here so we put them into one array
|
52
|
+
enzyme_actions = create_enzyme_actions( sequence, *args ).flatten
|
53
|
+
return fragments_for_display( {} ) if enzyme_actions.empty?
|
54
|
+
|
55
|
+
# Primary and complement strands are both measured from '0' to 'sequence.size-1' here
|
56
|
+
sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
|
57
|
+
|
58
|
+
# Add the cuts to the sequence_range from each enzyme_action
|
59
|
+
enzyme_actions.each do |enzyme_action|
|
60
|
+
enzyme_action.cut_ranges.each do |cut_range|
|
61
|
+
sequence_range.add_cut_range(cut_range)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Fill in the source sequence for sequence_range so it knows what bases
|
66
|
+
# to use
|
67
|
+
sequence_range.fragments.primary = sequence
|
68
|
+
sequence_range.fragments.complement = sequence.forward_complement
|
69
|
+
|
70
|
+
# Format the fragments for the user
|
71
|
+
fragments_for_display( {0 => sequence_range} )
|
72
|
+
end
|
73
|
+
|
74
|
+
#########
|
75
|
+
protected
|
76
|
+
#########
|
77
|
+
|
78
|
+
# Take the fragments from SequenceRange objects generated from add_cut_range
|
79
|
+
# and return unique results as a Bio::RestrictionEnzyme::Analysis::Fragment object.
|
80
|
+
#
|
81
|
+
# ---
|
82
|
+
# *Arguments*
|
83
|
+
# * +hsh+: +Hash+ Keys are a permutation ID, if any. Values are SequenceRange objects that have cuts applied.
|
84
|
+
# *Returns*:: Bio::RestrictionEnzyme::Analysis::Fragments object populated with Bio::RestrictionEnzyme::Analysis::Fragment objects.
|
85
|
+
def fragments_for_display( hsh, view_ranges=false )
|
86
|
+
ary = Fragments.new
|
87
|
+
return ary unless hsh
|
88
|
+
|
89
|
+
hsh.each do |permutation_id, sequence_range|
|
90
|
+
sequence_range.fragments.for_display.each do |fragment|
|
91
|
+
if view_ranges
|
92
|
+
ary << Bio::RestrictionEnzyme::Fragment.new(fragment.primary, fragment.complement, fragment.p_left, fragment.p_right, fragment.c_left, fragment.c_right)
|
93
|
+
else
|
94
|
+
ary << Bio::RestrictionEnzyme::Fragment.new(fragment.primary, fragment.complement)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
ary.uniq! unless view_ranges
|
100
|
+
|
101
|
+
ary
|
102
|
+
end
|
103
|
+
|
104
|
+
# Creates an array of EnzymeActions based on the DNA sequence and supplied enzymes.
|
105
|
+
#
|
106
|
+
# ---
|
107
|
+
# *Arguments*
|
108
|
+
# * +sequence+: The string of DNA to match the enzyme recognition sites against
|
109
|
+
# * +args+:: The enzymes to use.
|
110
|
+
# *Returns*:: +Array+ with the first element being an array of EnzymeAction objects that +sometimes_cut+, and are subject to competition. The second is an array of EnzymeAction objects that +always_cut+ and are not subject to competition.
|
111
|
+
def create_enzyme_actions( sequence, *args )
|
112
|
+
all_enzyme_actions = []
|
113
|
+
|
114
|
+
args.each do |enzyme|
|
115
|
+
enzyme = Bio::RestrictionEnzyme.new(enzyme) unless enzyme.class == Bio::RestrictionEnzyme::DoubleStranded
|
116
|
+
|
117
|
+
# make sure pattern is the proper size
|
118
|
+
# for more info see the internal documentation of
|
119
|
+
# Bio::RestrictionEnzyme::DoubleStranded.create_action_at
|
120
|
+
pattern = Bio::Sequence::NA.new(
|
121
|
+
Bio::RestrictionEnzyme::DoubleStranded::AlignedStrands.align(
|
122
|
+
enzyme.primary, enzyme.complement
|
123
|
+
).primary
|
124
|
+
).to_re
|
125
|
+
|
126
|
+
find_match_locations( sequence, pattern ).each do |offset|
|
127
|
+
all_enzyme_actions << enzyme.create_action_at( offset )
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
# FIXME VerticalCutRange should really be called VerticalAndHorizontalCutRange
|
132
|
+
|
133
|
+
# * all_enzyme_actions is now full of EnzymeActions at specific locations across
|
134
|
+
# the sequence.
|
135
|
+
# * all_enzyme_actions will now be examined to see if any EnzymeActions may
|
136
|
+
# conflict with one another, and if they do they'll be made note of in
|
137
|
+
# indicies_of_sometimes_cut. They will then be remove FIXME
|
138
|
+
# * a conflict occurs if another enzyme's bind site is compromised do due
|
139
|
+
# to another enzyme's cut. Enzyme's bind sites may overlap and not be
|
140
|
+
# competitive, however neither bind site may be part of the other
|
141
|
+
# enzyme's cut or else they do become competitive.
|
142
|
+
#
|
143
|
+
# Take current EnzymeAction's entire bind site and compare it to all other
|
144
|
+
# EzymeAction's cut ranges. Only look for vertical cuts as boundaries
|
145
|
+
# since trailing horizontal cuts would have no influence on the bind site.
|
146
|
+
#
|
147
|
+
# If example Enzyme A makes this cut pattern (cut range 2..5):
|
148
|
+
#
|
149
|
+
# 0 1 2|3 4 5 6 7
|
150
|
+
# +-----+
|
151
|
+
# 0 1 2 3 4 5|6 7
|
152
|
+
#
|
153
|
+
# Then the bind site (and EnzymeAction range) for Enzyme B would need it's
|
154
|
+
# right side to be at index 2 or less, or it's left side to be 6 or greater.
|
155
|
+
|
156
|
+
competition_indexes = Set.new
|
157
|
+
|
158
|
+
all_enzyme_actions[0..-2].each_with_index do |current_enzyme_action, i|
|
159
|
+
next if competition_indexes.include? i
|
160
|
+
next if current_enzyme_action.cut_ranges.empty? # no cuts, some enzymes are like this (ex. CjuI)
|
161
|
+
|
162
|
+
all_enzyme_actions[i+1..-1].each_with_index do |comparison_enzyme_action, j|
|
163
|
+
j += (i + 1)
|
164
|
+
next if competition_indexes.include? j
|
165
|
+
next if comparison_enzyme_action.cut_ranges.empty? # no cuts
|
166
|
+
|
167
|
+
if (current_enzyme_action.right <= comparison_enzyme_action.cut_ranges.min_vertical) or
|
168
|
+
(current_enzyme_action.left > comparison_enzyme_action.cut_ranges.max_vertical)
|
169
|
+
# no conflict
|
170
|
+
else
|
171
|
+
competition_indexes += [i, j] # merge both indexes into the flat set
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
sometimes_cut = all_enzyme_actions.values_at( *competition_indexes )
|
177
|
+
always_cut = all_enzyme_actions
|
178
|
+
always_cut.delete_if {|x| sometimes_cut.include? x }
|
179
|
+
|
180
|
+
[sometimes_cut, always_cut]
|
181
|
+
end
|
182
|
+
|
183
|
+
# Returns an +Array+ of the match indicies of a +RegExp+ to a string.
|
184
|
+
#
|
185
|
+
# Example:
|
186
|
+
#
|
187
|
+
# find_match_locations('abccdefeg', /[ce]/) # => [2,3,5,7]
|
188
|
+
#
|
189
|
+
# ---
|
190
|
+
# *Arguments*
|
191
|
+
# * +string+: The string to scan
|
192
|
+
# * +re+: A RegExp to use
|
193
|
+
# *Returns*:: +Array+ with indicies of match locations
|
194
|
+
def find_match_locations( string, re )
|
195
|
+
md = string.match( re )
|
196
|
+
locations = []
|
197
|
+
counter = 0
|
198
|
+
while md
|
199
|
+
# save the match index relative to the original string
|
200
|
+
locations << (counter += md.begin(0))
|
201
|
+
# find the next match
|
202
|
+
md = string[ (counter += 1)..-1 ].match( re )
|
203
|
+
end
|
204
|
+
locations
|
205
|
+
end
|
206
|
+
|
207
|
+
end # Analysis
|
208
|
+
end # RestrictionEnzyme
|
209
|
+
end # Bio
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# bio/util/restriction_enzyme/cut_symbol.rb - Defines the symbol used to mark a cut in an enzyme sequence
|
2
|
+
|
3
|
+
module Bio
|
4
|
+
class RestrictionEnzyme
|
5
|
+
|
6
|
+
# = Usage
|
7
|
+
#
|
8
|
+
# #require 'bio/util/restriction_enzyme/cut_symbol'
|
9
|
+
# require 'cut_symbol'
|
10
|
+
# include Bio::RestrictionEnzyme::CutSymbol
|
11
|
+
#
|
12
|
+
# cut_symbol # => "^"
|
13
|
+
# set_cut_symbol('|') # => "|"
|
14
|
+
# cut_symbol # => "|"
|
15
|
+
# escaped_cut_symbol # => "\\|"
|
16
|
+
# re_cut_symbol # => /\|/
|
17
|
+
# set_cut_symbol('^') # => "^"
|
18
|
+
# "abc^de" =~ re_cut_symbol # => 3
|
19
|
+
# "abc^de" =~ re_cut_symbol_adjacent # => nil
|
20
|
+
# "abc^^de" =~ re_cut_symbol_adjacent # => 3
|
21
|
+
# "a^bc^^de" =~ re_cut_symbol_adjacent # => 4
|
22
|
+
# "a^bc^de" =~ re_cut_symbol_adjacent # => nil
|
23
|
+
#
|
24
|
+
module CutSymbol
|
25
|
+
|
26
|
+
# Set the token to be used as the cut symbol in a restriction enzyme sequece
|
27
|
+
#
|
28
|
+
# Starts as +^+ character
|
29
|
+
#
|
30
|
+
# ---
|
31
|
+
# *Arguments*
|
32
|
+
# * +glyph+: The single character to be used as the cut symbol in an enzyme sequence
|
33
|
+
# *Returns*:: +glyph+
|
34
|
+
def set_cut_symbol(glyph)
|
35
|
+
CutSymbol__.cut_symbol = glyph
|
36
|
+
end
|
37
|
+
|
38
|
+
# Get the token that's used as the cut symbol in a restriction enzyme sequece
|
39
|
+
#
|
40
|
+
# ---
|
41
|
+
# *Arguments*
|
42
|
+
# * _none_
|
43
|
+
# *Returns*:: +glyph+
|
44
|
+
def cut_symbol; CutSymbol__.cut_symbol; end
|
45
|
+
|
46
|
+
# Get the token that's used as the cut symbol in a restriction enzyme sequece with
|
47
|
+
# a back-slash preceding it.
|
48
|
+
#
|
49
|
+
# ---
|
50
|
+
# *Arguments*
|
51
|
+
# * _none_
|
52
|
+
# *Returns*:: +\glyph+
|
53
|
+
def escaped_cut_symbol; CutSymbol__.escaped_cut_symbol; end
|
54
|
+
|
55
|
+
# Used to check if multiple cut symbols are next to each other.
|
56
|
+
#
|
57
|
+
# ---
|
58
|
+
# *Arguments*
|
59
|
+
# * _none_
|
60
|
+
# *Returns*:: +RegExp+
|
61
|
+
def re_cut_symbol_adjacent
|
62
|
+
%r"#{escaped_cut_symbol}{2}"
|
63
|
+
end
|
64
|
+
|
65
|
+
# A Regexp of the cut_symbol.
|
66
|
+
#
|
67
|
+
# ---
|
68
|
+
# *Arguments*
|
69
|
+
# * _none_
|
70
|
+
# *Returns*:: +RegExp+
|
71
|
+
def re_cut_symbol
|
72
|
+
%r"#{escaped_cut_symbol}"
|
73
|
+
end
|
74
|
+
|
75
|
+
#########
|
76
|
+
#protected # NOTE this is a Module, can't hide CutSymbol__
|
77
|
+
#########
|
78
|
+
|
79
|
+
require 'singleton'
|
80
|
+
|
81
|
+
# Class to keep state
|
82
|
+
class CutSymbol__
|
83
|
+
include Singleton
|
84
|
+
|
85
|
+
@cut_symbol = '^'
|
86
|
+
|
87
|
+
def self.cut_symbol; @cut_symbol; end
|
88
|
+
|
89
|
+
def self.cut_symbol=(glyph);
|
90
|
+
raise ArgumentError if glyph.size != 1
|
91
|
+
@cut_symbol = glyph
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.escaped_cut_symbol; "\\" + self.cut_symbol; end
|
95
|
+
end
|
96
|
+
|
97
|
+
end # CutSymbol
|
98
|
+
end # RestrictionEnzyme
|
99
|
+
end # Bio
|