bio-restriction_enzyme 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/COPYING.txt +121 -0
- data/Gemfile +10 -0
- data/LICENSE.txt +7 -0
- data/README.rdoc +22 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/bio-restriction_enzyme.gemspec +99 -0
- data/lib/bio-restriction_enzyme.rb +1 -0
- data/lib/bio/util/restriction_enzyme.rb +218 -0
- data/lib/bio/util/restriction_enzyme/analysis.rb +241 -0
- data/lib/bio/util/restriction_enzyme/analysis_basic.rb +209 -0
- data/lib/bio/util/restriction_enzyme/cut_symbol.rb +99 -0
- data/lib/bio/util/restriction_enzyme/double_stranded.rb +313 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +127 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +95 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +30 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +68 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +99 -0
- data/lib/bio/util/restriction_enzyme/range/cut_range.rb +16 -0
- data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +39 -0
- data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +59 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +249 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +236 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +43 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +33 -0
- data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +69 -0
- data/lib/bio/util/restriction_enzyme/single_strand.rb +193 -0
- data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +127 -0
- data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +15 -0
- data/lib/bio/util/restriction_enzyme/string_formatting.rb +103 -0
- data/test/bio-restriction_enzyme/analysis/test_calculated_cuts.rb +281 -0
- data/test/bio-restriction_enzyme/analysis/test_cut_ranges.rb +87 -0
- data/test/bio-restriction_enzyme/analysis/test_sequence_range.rb +223 -0
- data/test/bio-restriction_enzyme/double_stranded/test_aligned_strands.rb +84 -0
- data/test/bio-restriction_enzyme/double_stranded/test_cut_location_pair.rb +58 -0
- data/test/bio-restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +56 -0
- data/test/bio-restriction_enzyme/double_stranded/test_cut_locations.rb +35 -0
- data/test/bio-restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +87 -0
- data/test/bio-restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +66 -0
- data/test/bio-restriction_enzyme/test_analysis.rb +228 -0
- data/test/bio-restriction_enzyme/test_cut_symbol.rb +27 -0
- data/test/bio-restriction_enzyme/test_double_stranded.rb +98 -0
- data/test/bio-restriction_enzyme/test_single_strand.rb +131 -0
- data/test/bio-restriction_enzyme/test_single_strand_complement.rb +131 -0
- data/test/bio-restriction_enzyme/test_string_formatting.rb +43 -0
- data/test/helper.rb +17 -0
- data/test/test_bio-restriction_enzyme.rb +21 -0
- metadata +153 -0
@@ -0,0 +1,236 @@
|
|
1
|
+
# bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb -
|
2
|
+
|
3
|
+
require 'bio/util/restriction_enzyme'
|
4
|
+
|
5
|
+
module Bio
|
6
|
+
class RestrictionEnzyme
|
7
|
+
class Range
|
8
|
+
class SequenceRange
|
9
|
+
|
10
|
+
# cc = CalculatedCuts.new(@size)
|
11
|
+
# cc.add_cuts_from_cut_ranges(@cut_ranges)
|
12
|
+
# cc.remove_incomplete_cuts
|
13
|
+
#
|
14
|
+
# 1 2 3 4 5 6 7
|
15
|
+
# G A|T T A C A
|
16
|
+
# +-----+
|
17
|
+
# C T A A T|G T
|
18
|
+
# 1 2 3 4 5 6 7
|
19
|
+
#
|
20
|
+
# Primary cut = 2
|
21
|
+
# Complement cut = 5
|
22
|
+
# Horizontal cuts = 3, 4, 5
|
23
|
+
#
|
24
|
+
class CalculatedCuts
|
25
|
+
include CutSymbol
|
26
|
+
include StringFormatting
|
27
|
+
|
28
|
+
# +Array+ of vertical cuts on the primary strand in 0-based index notation
|
29
|
+
attr_reader :vc_primary
|
30
|
+
|
31
|
+
# +Array+ of vertical cuts on the complementary strand in 0-based index notation
|
32
|
+
attr_reader :vc_complement
|
33
|
+
|
34
|
+
# +Array+ of horizontal cuts between strands in 0-based index notation
|
35
|
+
attr_reader :hc_between_strands
|
36
|
+
|
37
|
+
# Set to +true+ if the fragment CalculatedCuts is working on is circular
|
38
|
+
attr_accessor :circular
|
39
|
+
|
40
|
+
#--
|
41
|
+
## An +Array+ with the primary strand with vertical cuts, the horizontal cuts, and the complementary strand with vertical cuts.
|
42
|
+
#attr_reader :strands_for_display
|
43
|
+
#++
|
44
|
+
|
45
|
+
# If +false+ the strands_for_display method needs to be called to update the contents
|
46
|
+
# of @strands_for_display. Becomes out of date whenever add_cuts_from_cut_ranges is called.
|
47
|
+
attr_reader :strands_for_display_current
|
48
|
+
|
49
|
+
# Size of the sequence being digested.
|
50
|
+
attr_reader :size
|
51
|
+
|
52
|
+
def initialize(size=nil, circular=false)
|
53
|
+
@size = size
|
54
|
+
@circular = circular
|
55
|
+
@vc_primary = []
|
56
|
+
@vc_complement = []
|
57
|
+
@hc_between_strands = []
|
58
|
+
end
|
59
|
+
|
60
|
+
# Accepts an +Array+ of CutRange type objects and applies them to
|
61
|
+
# @vc_complement, @vc_primary, and @hc_between_strands.
|
62
|
+
#
|
63
|
+
# ---
|
64
|
+
# *Arguments*
|
65
|
+
# * +cut_ranges+: An +Array+ of HorizontalCutRange or VerticalCutRange objects
|
66
|
+
# *Returns*:: nothing
|
67
|
+
def add_cuts_from_cut_ranges(cut_ranges)
|
68
|
+
@strands_for_display_current = false
|
69
|
+
|
70
|
+
cut_ranges.each do |cut_range|
|
71
|
+
@vc_primary += [cut_range.p_cut_left, cut_range.p_cut_right]
|
72
|
+
@vc_complement += [cut_range.c_cut_left, cut_range.c_cut_right]
|
73
|
+
|
74
|
+
# Add horizontal cut ranges. This may happen from cuts made inbetween a
|
75
|
+
# VerticalCutRange or may be specifically defined by a HorizontalCutRange.
|
76
|
+
if cut_range.class == VerticalCutRange
|
77
|
+
( cut_range.min + 1 ).upto( cut_range.max ){|i| @hc_between_strands << i} if cut_range.min < cut_range.max
|
78
|
+
elsif cut_range.class == HorizontalCutRange
|
79
|
+
( cut_range.hcuts.first ).upto( cut_range.hcuts.last ){|i| @hc_between_strands << i}
|
80
|
+
end
|
81
|
+
end
|
82
|
+
clean_all
|
83
|
+
#return
|
84
|
+
end
|
85
|
+
|
86
|
+
# There may be incomplete cuts made, this method removes the cuts that don't
|
87
|
+
# create sub-sequences for easier processing.
|
88
|
+
#
|
89
|
+
# For example, stray horizontal cuts that do not end with a left
|
90
|
+
# and right separation:
|
91
|
+
#
|
92
|
+
# G A T T A C A
|
93
|
+
# +-- ---
|
94
|
+
# C T|A A T G T
|
95
|
+
#
|
96
|
+
# Or stray vertical cuts:
|
97
|
+
#
|
98
|
+
# G A T T A C A
|
99
|
+
# +-- +
|
100
|
+
# C T|A A T|G T
|
101
|
+
#
|
102
|
+
# However note that for non-circular sequences this would be a successful
|
103
|
+
# cut which would result in a floating 'GT' sub-sequence:
|
104
|
+
#
|
105
|
+
# G A T T A C A
|
106
|
+
# +---
|
107
|
+
# C T A A T|G T
|
108
|
+
#
|
109
|
+
# Blunt cuts are also complete cuts.
|
110
|
+
# ---
|
111
|
+
# *Arguments*
|
112
|
+
# * +size+: (_optional_) Size of the sequence being digested. Defined here or during initalization of CalculatedCuts.
|
113
|
+
# *Returns*:: nothing
|
114
|
+
def remove_incomplete_cuts(size=nil)
|
115
|
+
@strands_for_display_current = false
|
116
|
+
@size = size if size
|
117
|
+
raise IndexError, "Size of the strand must be provided here or during initalization." if !@size.kind_of?(Fixnum) and not @circular
|
118
|
+
|
119
|
+
vcuts = (@vc_primary + @vc_complement).uniq.sort
|
120
|
+
hcuts = @hc_between_strands
|
121
|
+
last_index = @size - 1
|
122
|
+
good_hcuts = []
|
123
|
+
potential_hcuts = []
|
124
|
+
|
125
|
+
if @circular
|
126
|
+
# NOTE
|
127
|
+
# if it's circular we should start at the beginning of a cut for orientation,
|
128
|
+
# scan for it, hack off the first set of hcuts and move them to the back
|
129
|
+
else
|
130
|
+
vcuts.unshift(-1) unless vcuts.include?(-1)
|
131
|
+
vcuts.push(last_index) unless vcuts.include?(last_index)
|
132
|
+
end
|
133
|
+
|
134
|
+
hcuts.each do |hcut|
|
135
|
+
raise IndexError if hcut < -1 or hcut > last_index
|
136
|
+
# skipped a nucleotide
|
137
|
+
potential_hcuts.clear if !potential_hcuts.empty? and (hcut - potential_hcuts.last).abs > 1
|
138
|
+
|
139
|
+
if potential_hcuts.empty?
|
140
|
+
if vcuts.include?( hcut ) and vcuts.include?( hcut - 1 )
|
141
|
+
good_hcuts += [hcut]
|
142
|
+
elsif vcuts.include?( hcut - 1 )
|
143
|
+
potential_hcuts << hcut
|
144
|
+
end
|
145
|
+
else
|
146
|
+
if vcuts.include?( hcut )
|
147
|
+
good_hcuts += potential_hcuts + [hcut]
|
148
|
+
potential_hcuts.clear
|
149
|
+
else
|
150
|
+
potential_hcuts << hcut
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
check_vc = lambda do |vertical_cuts, opposing_vcuts|
|
156
|
+
# opposing_vcuts is here only to check for blunt cuts, so there shouldn't
|
157
|
+
# be any out-of-order problems with this
|
158
|
+
good_vc = []
|
159
|
+
vertical_cuts.each { |vc| good_vc << vc if good_hcuts.include?( vc ) or good_hcuts.include?( vc + 1 ) or opposing_vcuts.include?( vc ) }
|
160
|
+
good_vc
|
161
|
+
end
|
162
|
+
|
163
|
+
@vc_primary = check_vc.call(@vc_primary, @vc_complement)
|
164
|
+
@vc_complement = check_vc.call(@vc_complement, @vc_primary)
|
165
|
+
@hc_between_strands = good_hcuts
|
166
|
+
|
167
|
+
clean_all
|
168
|
+
end
|
169
|
+
|
170
|
+
# Sets @strands_for_display_current to +true+ and populates @strands_for_display.
|
171
|
+
#
|
172
|
+
# ---
|
173
|
+
# *Arguments*
|
174
|
+
# * +str1+: (_optional_) For displaying a primary strand. If +nil+ a numbered sequence will be used in place.
|
175
|
+
# * +str2+: (_optional_) For displaying a complementary strand. If +nil+ a numbered sequence will be used in place.
|
176
|
+
# * +vcp+: (_optional_) An array of vertical cut locations on the primary strand. If +nil+ the contents of @vc_primary is used.
|
177
|
+
# * +vcc+: (_optional_) An array of vertical cut locations on the complementary strand. If +nil+ the contents of @vc_complementary is used.
|
178
|
+
# * +hc+: (_optional_) An array of horizontal cut locations between strands. If +nil+ the contents of @hc_between_strands is used.
|
179
|
+
# *Returns*:: +Array+ An array with the primary strand with vertical cuts, the horizontal cuts, and the complementary strand with vertical cuts.
|
180
|
+
#
|
181
|
+
def strands_for_display(str1 = nil, str2 = nil, vcp=nil, vcc=nil, hc=nil)
|
182
|
+
return @strands_for_display if @strands_for_display_current
|
183
|
+
vcs = '|' # Vertical cut symbol
|
184
|
+
hcs = '-' # Horizontal cut symbol
|
185
|
+
vhcs = '+' # Intersection of vertical and horizontal cut symbol
|
186
|
+
|
187
|
+
num_txt_repeat = lambda { num_txt = '0123456789'; (num_txt * ( @size / num_txt.size.to_f ).ceil)[0..@size-1] }
|
188
|
+
(str1 == nil) ? a = num_txt_repeat.call : a = str1.dup
|
189
|
+
(str2 == nil) ? b = num_txt_repeat.call : b = str2.dup
|
190
|
+
|
191
|
+
vcp = @vc_primary if vcp==nil
|
192
|
+
vcc = @vc_complement if vcc==nil
|
193
|
+
hc = @hc_between_strands if hc==nil
|
194
|
+
|
195
|
+
vcuts = (vcp + vcc).uniq.sort
|
196
|
+
|
197
|
+
vcp.reverse.each { |c| a.insert(c+1, vcs) }
|
198
|
+
vcc.reverse.each { |c| b.insert(c+1, vcs) }
|
199
|
+
|
200
|
+
between = ' ' * @size
|
201
|
+
hc.each {|hcut| between[hcut,1] = hcs }
|
202
|
+
|
203
|
+
s_a = add_spacing(a, vcs)
|
204
|
+
s_b = add_spacing(b, vcs)
|
205
|
+
s_bet = add_spacing(between)
|
206
|
+
|
207
|
+
# NOTE watch this for circular
|
208
|
+
i = 0
|
209
|
+
0.upto( s_a.size-1 ) do
|
210
|
+
if (s_a[i,1] == vcs) or (s_b[i,1] == vcs)
|
211
|
+
s_bet[i] = vhcs
|
212
|
+
elsif i != 0 and s_bet[i-1,1] == hcs and s_bet[i+1,1] == hcs
|
213
|
+
s_bet[i] = hcs
|
214
|
+
end
|
215
|
+
i+=1
|
216
|
+
end
|
217
|
+
|
218
|
+
@strands_for_display_current = true
|
219
|
+
@strands_for_display = [s_a, s_bet, s_b]
|
220
|
+
end
|
221
|
+
|
222
|
+
#########
|
223
|
+
protected
|
224
|
+
#########
|
225
|
+
|
226
|
+
# remove nil values, remove duplicate values, and
|
227
|
+
# sort @vc_primary, @vc_complement, and @hc_between_strands
|
228
|
+
def clean_all
|
229
|
+
[@vc_primary, @vc_complement, @hc_between_strands].collect { |a| a.delete(nil); a.uniq!; a.sort! }
|
230
|
+
end
|
231
|
+
|
232
|
+
end # CalculatedCuts
|
233
|
+
end # SequenceRange
|
234
|
+
end # Range
|
235
|
+
end # RestrictionEnzyme
|
236
|
+
end # Bio
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# bio/util/restriction_enzyme/range/sequence_range/fragment.rb -
|
2
|
+
|
3
|
+
require 'bio/util/restriction_enzyme'
|
4
|
+
|
5
|
+
module Bio
|
6
|
+
class RestrictionEnzyme
|
7
|
+
class Range
|
8
|
+
class SequenceRange
|
9
|
+
|
10
|
+
class Fragment
|
11
|
+
|
12
|
+
attr_reader :size
|
13
|
+
|
14
|
+
def initialize( primary_bin, complement_bin )
|
15
|
+
@primary_bin = primary_bin
|
16
|
+
@complement_bin = complement_bin
|
17
|
+
end
|
18
|
+
|
19
|
+
DisplayFragment = Struct.new(:primary, :complement, :p_left, :p_right, :c_left, :c_right)
|
20
|
+
|
21
|
+
def for_display(p_str=nil, c_str=nil)
|
22
|
+
df = DisplayFragment.new
|
23
|
+
df.primary = ''
|
24
|
+
df.complement = ''
|
25
|
+
|
26
|
+
both_bins = (@primary_bin + @complement_bin).sort.uniq
|
27
|
+
both_bins.each do |item|
|
28
|
+
@primary_bin.include?(item) ? df.primary << p_str[item] : df.primary << ' '
|
29
|
+
@complement_bin.include?(item) ? df.complement << c_str[item] : df.complement << ' '
|
30
|
+
end
|
31
|
+
|
32
|
+
df.p_left = @primary_bin.first
|
33
|
+
df.p_right = @primary_bin.last
|
34
|
+
df.c_left = @complement_bin.first
|
35
|
+
df.c_right = @complement_bin.last
|
36
|
+
|
37
|
+
df
|
38
|
+
end
|
39
|
+
end # Fragment
|
40
|
+
end # SequenceRange
|
41
|
+
end # Range
|
42
|
+
end # RestrictionEnzyme
|
43
|
+
end # Bio
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# bio/util/restriction_enzyme/analysis/fragments.rb -
|
2
|
+
|
3
|
+
require 'bio/util/restriction_enzyme'
|
4
|
+
|
5
|
+
module Bio
|
6
|
+
class RestrictionEnzyme
|
7
|
+
class Range
|
8
|
+
class SequenceRange
|
9
|
+
|
10
|
+
class Fragments < Array
|
11
|
+
|
12
|
+
attr_accessor :primary
|
13
|
+
attr_accessor :complement
|
14
|
+
|
15
|
+
def initialize(primary, complement)
|
16
|
+
@primary = primary
|
17
|
+
@complement = complement
|
18
|
+
end
|
19
|
+
|
20
|
+
DisplayFragment = Struct.new(:primary, :complement)
|
21
|
+
|
22
|
+
def for_display(p_str=nil, c_str=nil)
|
23
|
+
p_str ||= @primary
|
24
|
+
c_str ||= @complement
|
25
|
+
pretty_fragments = []
|
26
|
+
self.each { |fragment| pretty_fragments << fragment.for_display(p_str, c_str) }
|
27
|
+
pretty_fragments
|
28
|
+
end
|
29
|
+
end # Fragments
|
30
|
+
end # SequenceRange
|
31
|
+
end # Range
|
32
|
+
end # RestrictionEnzyme
|
33
|
+
end # Bio
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# bio/util/restriction_enzyme/range/vertical_cut_range.rb -
|
2
|
+
|
3
|
+
require 'bio/util/restriction_enzyme'
|
4
|
+
|
5
|
+
module Bio
|
6
|
+
class RestrictionEnzyme
|
7
|
+
class Range
|
8
|
+
|
9
|
+
# FIXME docs are kind of out of date. Change this to VerticalAndHorizontalCutRange
|
10
|
+
class VerticalCutRange < CutRange
|
11
|
+
attr_reader :p_cut_left, :p_cut_right
|
12
|
+
attr_reader :c_cut_left, :c_cut_right
|
13
|
+
attr_reader :min, :max
|
14
|
+
attr_reader :range
|
15
|
+
|
16
|
+
# VerticalCutRange provides an extremely raw, yet precise, method of
|
17
|
+
# defining the location of cuts on primary and complementary sequences.
|
18
|
+
#
|
19
|
+
# Many VerticalCutRange objects are used with HorizontalCutRange objects
|
20
|
+
# to be contained in CutRanges to define the cut pattern that a
|
21
|
+
# specific enzyme may make.
|
22
|
+
#
|
23
|
+
# VerticalCutRange takes up to four possible cuts, two on the primary
|
24
|
+
# strand and two on the complementary strand. In typical usage
|
25
|
+
# you will want to make a single cut on the primary strand and a single
|
26
|
+
# cut on the complementary strand.
|
27
|
+
#
|
28
|
+
# However, you can construct it with whatever cuts you desire to accomadate
|
29
|
+
# the most eccentric of imaginary restriction enzymes.
|
30
|
+
#
|
31
|
+
# ---
|
32
|
+
# *Arguments*
|
33
|
+
# * +p_cut_left+: (_optional_) Left-most cut on the primary strand. +nil+ to skip
|
34
|
+
# * +p_cut_right+: (_optional_) Right-most cut on the primary strand. +nil+ to skip
|
35
|
+
# * +c_cut_left+: (_optional_) Left-most cut on the complementary strand. +nil+ to skip
|
36
|
+
# * +c_cut_right+: (_optional_) Right-most cut on the complementary strand. +nil+ to skip
|
37
|
+
# *Returns*:: nothing
|
38
|
+
def initialize( p_cut_left=nil, p_cut_right=nil, c_cut_left=nil, c_cut_right=nil )
|
39
|
+
@p_cut_left = p_cut_left
|
40
|
+
@p_cut_right = p_cut_right
|
41
|
+
@c_cut_left = c_cut_left
|
42
|
+
@c_cut_right = c_cut_right
|
43
|
+
|
44
|
+
a = [@p_cut_left, @c_cut_left, @p_cut_right, @c_cut_right]
|
45
|
+
a.delete(nil)
|
46
|
+
a.sort!
|
47
|
+
@min = a.first
|
48
|
+
@max = a.last
|
49
|
+
|
50
|
+
@range = nil
|
51
|
+
@range = (@min..@max) unless @min == nil or @max == nil
|
52
|
+
return
|
53
|
+
end
|
54
|
+
|
55
|
+
# Check if a location falls within the minimum or maximum values of this
|
56
|
+
# range.
|
57
|
+
#
|
58
|
+
# ---
|
59
|
+
# *Arguments*
|
60
|
+
# * +i+: Location to check if it is included in the range
|
61
|
+
# *Returns*:: +true+ _or_ +false+
|
62
|
+
def include?(i)
|
63
|
+
return false if @range == nil
|
64
|
+
@range.include?(i)
|
65
|
+
end
|
66
|
+
end # VerticalCutRange
|
67
|
+
end # Range
|
68
|
+
end # RestrictionEnzyme
|
69
|
+
end # Bio
|
@@ -0,0 +1,193 @@
|
|
1
|
+
# bio/util/restriction_enzyme/single_strand.rb - Single strand of a restriction enzyme sequence
|
2
|
+
|
3
|
+
require 'bio/util/restriction_enzyme'
|
4
|
+
require 'bio/sequence'
|
5
|
+
|
6
|
+
module Bio
|
7
|
+
class RestrictionEnzyme
|
8
|
+
|
9
|
+
# A single strand of restriction enzyme sequence pattern with a 5' to 3'
|
10
|
+
# orientation.
|
11
|
+
#
|
12
|
+
# DoubleStranded puts the SingleStrand and SingleStrandComplement together to
|
13
|
+
# create the sequence pattern with cuts on both strands.
|
14
|
+
#
|
15
|
+
class SingleStrand < Bio::Sequence::NA
|
16
|
+
|
17
|
+
autoload :CutLocationsInEnzymeNotation, 'bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation'
|
18
|
+
|
19
|
+
include CutSymbol
|
20
|
+
include StringFormatting
|
21
|
+
|
22
|
+
# The cut locations in enzyme notation. Contains a
|
23
|
+
# CutLocationsInEnzymeNotation object set when the SingleStrand
|
24
|
+
# object is initialized.
|
25
|
+
attr_reader :cut_locations_in_enzyme_notation
|
26
|
+
|
27
|
+
# The cut locations transformed from enzyme index notation to 0-based
|
28
|
+
# array index notation. Contains an Array.
|
29
|
+
attr_reader :cut_locations
|
30
|
+
|
31
|
+
# Orientation of the strand, 5' to 3'
|
32
|
+
def orientation; [5,3]; end
|
33
|
+
|
34
|
+
# Constructor for a Bio::RestrictionEnzyme::StingleStrand object.
|
35
|
+
#
|
36
|
+
# A single strand of restriction enzyme sequence pattern with a 5' to 3' orientation.
|
37
|
+
#
|
38
|
+
# ---
|
39
|
+
# *Arguments*
|
40
|
+
# * +sequence+: (_required_) The enzyme sequence.
|
41
|
+
# * +c+: (_optional_) Cut locations in enzyme notation.
|
42
|
+
# See Bio::RestrictionEnzyme::SingleStrand::CutLocationsInEnzymeNotation
|
43
|
+
#
|
44
|
+
# *Constraints*
|
45
|
+
# * +sequence+ cannot contain immediately adjacent cut symbols (ex. atg^^c).
|
46
|
+
# * +c+ is in enzyme index notation and therefore cannot contain a 0.
|
47
|
+
# * If +c+ is omitted, +sequence+ must contain a cut symbol.
|
48
|
+
# * You cannot provide both a sequence with cut symbols and provide cut locations - ambiguous.
|
49
|
+
#
|
50
|
+
# +sequence+ must be a kind of:
|
51
|
+
# * String
|
52
|
+
# * Bio::Sequence::NA
|
53
|
+
# * Bio::RestrictionEnzyme::SingleStrand
|
54
|
+
#
|
55
|
+
# +c+ must be a kind of:
|
56
|
+
# * Bio::RestrictionEnzyme::SingleStrand::CutLocationsInEnzymeNotation
|
57
|
+
# * Integer, one or more
|
58
|
+
# * Array
|
59
|
+
#
|
60
|
+
# *Returns*:: nothing
|
61
|
+
def initialize( sequence, *c )
|
62
|
+
c.flatten! # if an array was supplied as an argument
|
63
|
+
# NOTE t| 2009-09-19 commented out for library efficiency
|
64
|
+
# validate_args(sequence, c)
|
65
|
+
sequence = sequence.downcase
|
66
|
+
|
67
|
+
if sequence =~ re_cut_symbol
|
68
|
+
@cut_locations_in_enzyme_notation = CutLocationsInEnzymeNotation.new( strip_padding(sequence) )
|
69
|
+
else
|
70
|
+
@cut_locations_in_enzyme_notation = CutLocationsInEnzymeNotation.new( c )
|
71
|
+
end
|
72
|
+
|
73
|
+
@stripped = Bio::Sequence::NA.new( strip_cuts_and_padding( sequence ) )
|
74
|
+
super( pattern )
|
75
|
+
@cut_locations = @cut_locations_in_enzyme_notation.to_array_index
|
76
|
+
return
|
77
|
+
end
|
78
|
+
|
79
|
+
# Returns true if this enzyme is palindromic with its reverse complement.
|
80
|
+
# Does not report if the +cut_locations+ are palindromic or not.
|
81
|
+
#
|
82
|
+
# Examples:
|
83
|
+
# * This would be palindromic:
|
84
|
+
# 5' - ATGCAT - 3'
|
85
|
+
# TACGTA
|
86
|
+
#
|
87
|
+
# * This would not be palindromic:
|
88
|
+
# 5' - ATGCGTA - 3'
|
89
|
+
# TACGCAT
|
90
|
+
#
|
91
|
+
# ---
|
92
|
+
# *Arguments*
|
93
|
+
# * _none_
|
94
|
+
# *Returns*:: +true+ _or_ +false+
|
95
|
+
def palindromic?
|
96
|
+
@stripped.reverse_complement == @stripped
|
97
|
+
end
|
98
|
+
|
99
|
+
# Sequence pattern with no cut symbols and no 'n' padding.
|
100
|
+
# * <code>SingleStrand.new('garraxt', [-2, 1, 7]).stripped # => "garraxt"</code>
|
101
|
+
attr_reader :stripped
|
102
|
+
|
103
|
+
# The sequence with 'n' padding and cut symbols.
|
104
|
+
# * <code>SingleStrand.new('garraxt', [-2, 1, 7]).with_cut_symbols # => "n^ng^arraxt^n"</code>
|
105
|
+
#
|
106
|
+
# ---
|
107
|
+
# *Arguments*
|
108
|
+
# * _none_
|
109
|
+
# *Returns*:: The sequence with 'n' padding and cut symbols.
|
110
|
+
def with_cut_symbols
|
111
|
+
s = pattern
|
112
|
+
@cut_locations_in_enzyme_notation.to_array_index.sort.reverse.each { |c| s.insert(c+1, cut_symbol) }
|
113
|
+
s
|
114
|
+
end
|
115
|
+
|
116
|
+
# The sequence with 'n' padding on the left and right for cuts larger than the sequence.
|
117
|
+
# * <code>SingleStrand.new('garraxt', [-2, 1, 7]).pattern # => "nngarraxtn"</code>
|
118
|
+
#
|
119
|
+
# ---
|
120
|
+
# *Arguments*
|
121
|
+
# * _none_
|
122
|
+
# *Returns*:: The sequence with 'n' padding on the left and right for cuts larger than the sequence.
|
123
|
+
def pattern
|
124
|
+
return stripped if @cut_locations_in_enzyme_notation.min == nil
|
125
|
+
left = (@cut_locations_in_enzyme_notation.min < 0 ? 'n' * @cut_locations_in_enzyme_notation.min.abs : '')
|
126
|
+
|
127
|
+
# Add one more 'n' if a cut is at the last position
|
128
|
+
right = ( (@cut_locations_in_enzyme_notation.max >= @stripped.length) ? ('n' * (@cut_locations_in_enzyme_notation.max - @stripped.length + 1)) : '')
|
129
|
+
[left, stripped, right].join('')
|
130
|
+
end
|
131
|
+
|
132
|
+
# The sequence with 'n' pads, cut symbols, and spacing for alignment.
|
133
|
+
# * <code>SingleStrand.new('garraxt', [-2, 1, 7]).with_spaces # => "n^n g^a r r a x t^n"</code>
|
134
|
+
#
|
135
|
+
# ---
|
136
|
+
# *Arguments*
|
137
|
+
# * _none_
|
138
|
+
# *Returns*:: The sequence with 'n' pads, cut symbols, and spacing for alignment.
|
139
|
+
def with_spaces
|
140
|
+
add_spacing( with_cut_symbols )
|
141
|
+
end
|
142
|
+
|
143
|
+
#########
|
144
|
+
protected
|
145
|
+
#########
|
146
|
+
|
147
|
+
def validate_args( input_pattern, input_cut_locations )
|
148
|
+
unless input_pattern.kind_of?(String)
|
149
|
+
err = "input_pattern is not a String, Bio::Sequence::NA, or Bio::RestrictionEnzyme::SingleStrand object\n"
|
150
|
+
err += "pattern: #{input_pattern}\n"
|
151
|
+
err += "class: #{input_pattern.class}"
|
152
|
+
raise ArgumentError, err
|
153
|
+
end
|
154
|
+
|
155
|
+
if ( input_pattern =~ re_cut_symbol ) and !input_cut_locations.empty?
|
156
|
+
err = "Cut symbol found in sequence, but cut locations were also supplied. Ambiguous.\n"
|
157
|
+
err += "pattern: #{input_pattern}\n"
|
158
|
+
err += "symbol: #{cut_symbol}\n"
|
159
|
+
err += "locations: #{input_cut_locations.inspect}"
|
160
|
+
raise ArgumentError, err
|
161
|
+
end
|
162
|
+
|
163
|
+
input_pattern.each_byte do |c|
|
164
|
+
c = c.chr.downcase
|
165
|
+
unless Bio::NucleicAcid::NAMES.has_key?(c) or c == 'x' or c == 'X' or c == cut_symbol
|
166
|
+
err = "Invalid character in pattern.\n"
|
167
|
+
err += "Not a nucleotide or representation of possible nucleotides. See Bio::NucleicAcid::NAMES for more information.\n"
|
168
|
+
err += "char: #{c}\n"
|
169
|
+
err += "input_pattern: #{input_pattern}"
|
170
|
+
raise ArgumentError, err
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
# Tadayoshi Funaba's method as discussed in Programming Ruby 2ed, p390
|
176
|
+
def self.once(*ids)
|
177
|
+
for id in ids
|
178
|
+
module_eval <<-"end;"
|
179
|
+
alias_method :__#{id.__id__}__, :#{id.to_s}
|
180
|
+
private :__#{id.__id__}__
|
181
|
+
def #{id.to_s}(*args, &block)
|
182
|
+
(@__#{id.__id__}__ ||= [__#{id.__id__}__(*args, &block)])[0]
|
183
|
+
end
|
184
|
+
end;
|
185
|
+
end
|
186
|
+
end
|
187
|
+
private_class_method :once
|
188
|
+
|
189
|
+
once :pattern, :with_cut_symbols, :with_spaces, :to_re
|
190
|
+
|
191
|
+
end # SingleStrand
|
192
|
+
end # RestrictionEnzyme
|
193
|
+
end # Bio
|