bio-restriction_enzyme 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/COPYING.txt +121 -0
- data/Gemfile +10 -0
- data/LICENSE.txt +7 -0
- data/README.rdoc +22 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/bio-restriction_enzyme.gemspec +99 -0
- data/lib/bio-restriction_enzyme.rb +1 -0
- data/lib/bio/util/restriction_enzyme.rb +218 -0
- data/lib/bio/util/restriction_enzyme/analysis.rb +241 -0
- data/lib/bio/util/restriction_enzyme/analysis_basic.rb +209 -0
- data/lib/bio/util/restriction_enzyme/cut_symbol.rb +99 -0
- data/lib/bio/util/restriction_enzyme/double_stranded.rb +313 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +127 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +95 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +30 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +68 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +99 -0
- data/lib/bio/util/restriction_enzyme/range/cut_range.rb +16 -0
- data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +39 -0
- data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +59 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +249 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +236 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +43 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +33 -0
- data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +69 -0
- data/lib/bio/util/restriction_enzyme/single_strand.rb +193 -0
- data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +127 -0
- data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +15 -0
- data/lib/bio/util/restriction_enzyme/string_formatting.rb +103 -0
- data/test/bio-restriction_enzyme/analysis/test_calculated_cuts.rb +281 -0
- data/test/bio-restriction_enzyme/analysis/test_cut_ranges.rb +87 -0
- data/test/bio-restriction_enzyme/analysis/test_sequence_range.rb +223 -0
- data/test/bio-restriction_enzyme/double_stranded/test_aligned_strands.rb +84 -0
- data/test/bio-restriction_enzyme/double_stranded/test_cut_location_pair.rb +58 -0
- data/test/bio-restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +56 -0
- data/test/bio-restriction_enzyme/double_stranded/test_cut_locations.rb +35 -0
- data/test/bio-restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +87 -0
- data/test/bio-restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +66 -0
- data/test/bio-restriction_enzyme/test_analysis.rb +228 -0
- data/test/bio-restriction_enzyme/test_cut_symbol.rb +27 -0
- data/test/bio-restriction_enzyme/test_double_stranded.rb +98 -0
- data/test/bio-restriction_enzyme/test_single_strand.rb +131 -0
- data/test/bio-restriction_enzyme/test_single_strand_complement.rb +131 -0
- data/test/bio-restriction_enzyme/test_string_formatting.rb +43 -0
- data/test/helper.rb +17 -0
- data/test/test_bio-restriction_enzyme.rb +21 -0
- metadata +153 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
# bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb - Inherits from DoubleStranded::CutLocationPair
|
2
|
+
|
3
|
+
require 'bio/util/restriction_enzyme'
|
4
|
+
|
5
|
+
module Bio
|
6
|
+
class RestrictionEnzyme
|
7
|
+
class DoubleStranded
|
8
|
+
|
9
|
+
# Inherits from DoubleStranded::CutLocationPair , stores the cut location pair in
|
10
|
+
# enzyme notation instead of 0-based.
|
11
|
+
#
|
12
|
+
class CutLocationPairInEnzymeNotation < CutLocationPair
|
13
|
+
|
14
|
+
#########
|
15
|
+
protected
|
16
|
+
#########
|
17
|
+
|
18
|
+
def validate_2( a, b )
|
19
|
+
if (a == 0) or (b == 0)
|
20
|
+
raise ArgumentError, "Enzyme index notation only. 0 values are illegal."
|
21
|
+
end
|
22
|
+
|
23
|
+
if a == nil and b == nil
|
24
|
+
raise ArgumentError, "Neither strand has a cut. Ambiguous."
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end # CutLocationPair
|
28
|
+
end # DoubleStranded
|
29
|
+
end # RestrictionEnzyme
|
30
|
+
end # Bio
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# bio/util/restriction_enzyme/double_stranded/cut_locations.rb - Contains an Array of CutLocationPair objects
|
2
|
+
|
3
|
+
require 'bio/util/restriction_enzyme'
|
4
|
+
|
5
|
+
module Bio
|
6
|
+
class RestrictionEnzyme
|
7
|
+
class DoubleStranded
|
8
|
+
|
9
|
+
# Contains an +Array+ of CutLocationPair objects.
|
10
|
+
#
|
11
|
+
class CutLocations < Array
|
12
|
+
|
13
|
+
# CutLocations constructor.
|
14
|
+
#
|
15
|
+
# Contains an +Array+ of CutLocationPair objects.
|
16
|
+
#
|
17
|
+
# Example:
|
18
|
+
# clp1 = CutLocationPair.new(3,2)
|
19
|
+
# clp2 = CutLocationPair.new(7,9)
|
20
|
+
# pairs = CutLocations.new(clp1, clp2)
|
21
|
+
#
|
22
|
+
# ---
|
23
|
+
# *Arguments*
|
24
|
+
# * +args+: Any number of +CutLocationPair+ objects
|
25
|
+
# *Returns*:: nothing
|
26
|
+
def initialize(*args)
|
27
|
+
validate_args(args)
|
28
|
+
super(args)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Returns an +Array+ of locations of cuts on the primary strand
|
32
|
+
#
|
33
|
+
# ---
|
34
|
+
# *Arguments*
|
35
|
+
# * _none_
|
36
|
+
# *Returns*:: +Array+ of locations of cuts on the primary strand
|
37
|
+
def primary
|
38
|
+
self.collect {|a| a[0]}
|
39
|
+
end
|
40
|
+
|
41
|
+
# Returns an +Array+ of locations of cuts on the complementary strand
|
42
|
+
#
|
43
|
+
# ---
|
44
|
+
# *Arguments*
|
45
|
+
# * _none_
|
46
|
+
# *Returns*:: +Array+ of locations of cuts on the complementary strand
|
47
|
+
def complement
|
48
|
+
self.collect {|a| a[1]}
|
49
|
+
end
|
50
|
+
|
51
|
+
#########
|
52
|
+
protected
|
53
|
+
#########
|
54
|
+
|
55
|
+
def validate_args(args)
|
56
|
+
args.each do |a|
|
57
|
+
unless a.class == Bio::RestrictionEnzyme::DoubleStranded::CutLocationPair
|
58
|
+
err = "Not a CutLocationPair\n"
|
59
|
+
err += "class: #{a.class}\n"
|
60
|
+
err += "inspect: #{a.inspect}"
|
61
|
+
raise ArgumentError, err
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end # CutLocations
|
66
|
+
end # DoubleStranded
|
67
|
+
end # RestrictionEnzyme
|
68
|
+
end # Bio
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb - Inherits from DoubleStrand::CutLocations
|
2
|
+
|
3
|
+
require 'bio/util/restriction_enzyme'
|
4
|
+
|
5
|
+
module Bio
|
6
|
+
class RestrictionEnzyme
|
7
|
+
class DoubleStranded
|
8
|
+
|
9
|
+
# Inherits from DoubleStranded::CutLocations. Contains CutLocationPairInEnzymeNotation objects.
|
10
|
+
# Adds helper methods to convert from enzyme index notation to 0-based array index notation.
|
11
|
+
#
|
12
|
+
class CutLocationsInEnzymeNotation < CutLocations
|
13
|
+
|
14
|
+
# Returns +Array+ of locations of cuts on the primary
|
15
|
+
# strand in 0-based array index notation.
|
16
|
+
#
|
17
|
+
# ---
|
18
|
+
# *Arguments*
|
19
|
+
# * _none_
|
20
|
+
# *Returns*:: +Array+ of locations of cuts on the primary strand in 0-based array index notation.
|
21
|
+
def primary_to_array_index
|
22
|
+
helper_for_to_array_index(self.primary)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Returns +Array+ of locations of cuts on the complementary
|
26
|
+
# strand in 0-based array index notation.
|
27
|
+
#
|
28
|
+
# ---
|
29
|
+
# *Arguments*
|
30
|
+
# * _none_
|
31
|
+
# *Returns*:: +Array+ of locations of cuts on the complementary strand in 0-based array index notation.
|
32
|
+
def complement_to_array_index
|
33
|
+
helper_for_to_array_index(self.complement)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Returns the contents of the present CutLocationsInEnzymeNotation object as
|
37
|
+
# a CutLocations object with the contents converted from enzyme notation
|
38
|
+
# to 0-based array index notation.
|
39
|
+
#
|
40
|
+
# ---
|
41
|
+
# *Arguments*
|
42
|
+
# * _none_
|
43
|
+
# *Returns*:: +CutLocations+
|
44
|
+
def to_array_index
|
45
|
+
unless self.primary_to_array_index.size == self.complement_to_array_index.size
|
46
|
+
err = "Primary and complement strand cut locations are not available in equal numbers.\n"
|
47
|
+
err += "primary: #{self.primary_to_array_index.inspect}\n"
|
48
|
+
err += "primary.size: #{self.primary_to_array_index.size}\n"
|
49
|
+
err += "complement: #{self.complement_to_array_index.inspect}\n"
|
50
|
+
err += "complement.size: #{self.complement_to_array_index.size}"
|
51
|
+
raise IndexError, err
|
52
|
+
end
|
53
|
+
a = self.primary_to_array_index.zip(self.complement_to_array_index)
|
54
|
+
CutLocations.new( *a.collect {|cl| CutLocationPair.new(cl)} )
|
55
|
+
end
|
56
|
+
|
57
|
+
#########
|
58
|
+
protected
|
59
|
+
#########
|
60
|
+
|
61
|
+
def helper_for_to_array_index(a)
|
62
|
+
minimum = (self.primary + self.complement).flatten
|
63
|
+
minimum.delete(nil)
|
64
|
+
minimum = minimum.sort.first
|
65
|
+
|
66
|
+
return [] if minimum == nil # no elements
|
67
|
+
|
68
|
+
if minimum < 0
|
69
|
+
calc = lambda do |n|
|
70
|
+
unless n == nil
|
71
|
+
n -= 1 unless n < 0
|
72
|
+
n += minimum.abs
|
73
|
+
end
|
74
|
+
n
|
75
|
+
end
|
76
|
+
else
|
77
|
+
calc = lambda do |n|
|
78
|
+
n -= 1 unless n == nil
|
79
|
+
n
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
a.collect(&calc)
|
84
|
+
end
|
85
|
+
|
86
|
+
def validate_args(args)
|
87
|
+
args.each do |a|
|
88
|
+
unless a.class == Bio::RestrictionEnzyme::DoubleStranded::CutLocationPairInEnzymeNotation
|
89
|
+
err = "Not a CutLocationPairInEnzymeNotation\n"
|
90
|
+
err += "class: #{a.class}\n"
|
91
|
+
err += "inspect: #{a.inspect}"
|
92
|
+
raise TypeError, err
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end # CutLocationsInEnzymeNotation
|
97
|
+
end # DoubleStranded
|
98
|
+
end # RestrictionEnzyme
|
99
|
+
end # Bio
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# bio/util/restriction_enzyme/range/cut_range.rb - Abstract base class for HorizontalCutRange and VerticalCutRange
|
2
|
+
|
3
|
+
require 'bio/util/restriction_enzyme'
|
4
|
+
|
5
|
+
module Bio
|
6
|
+
class RestrictionEnzyme
|
7
|
+
class Range
|
8
|
+
|
9
|
+
# Abstract base class for HorizontalCutRange and VerticalCutRange
|
10
|
+
#
|
11
|
+
class CutRange
|
12
|
+
end # CutRange
|
13
|
+
|
14
|
+
end # Range
|
15
|
+
end # RestrictionEnzyme
|
16
|
+
end # Bio
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# bio/util/restriction_enzyme/range/cut_ranges.rb - Container for many CutRange objects or CutRange child objects.
|
2
|
+
|
3
|
+
require 'bio/util/restriction_enzyme'
|
4
|
+
|
5
|
+
module Bio
|
6
|
+
class RestrictionEnzyme
|
7
|
+
class Range
|
8
|
+
|
9
|
+
# Container for many CutRange objects or CutRange child objects. Inherits from array.
|
10
|
+
#
|
11
|
+
class CutRanges < Array
|
12
|
+
def min; self.collect{|a| a.min}.flatten.sort.first; end
|
13
|
+
def max; self.collect{|a| a.max}.flatten.sort.last; end
|
14
|
+
def include?(i); self.collect{|a| a.include?(i)}.include?(true); end
|
15
|
+
|
16
|
+
def min_vertical
|
17
|
+
vertical_min_max_helper( :min )
|
18
|
+
end
|
19
|
+
|
20
|
+
def max_vertical
|
21
|
+
vertical_min_max_helper( :max )
|
22
|
+
end
|
23
|
+
|
24
|
+
protected
|
25
|
+
|
26
|
+
def vertical_min_max_helper( sym_which )
|
27
|
+
tmp = []
|
28
|
+
self.each do |a|
|
29
|
+
next unless a.class == Bio::RestrictionEnzyme::Range::VerticalCutRange
|
30
|
+
tmp << a.send( sym_which )
|
31
|
+
end
|
32
|
+
z = (sym_which == :max) ? :last : :first
|
33
|
+
tmp.flatten.sort.send(z)
|
34
|
+
end
|
35
|
+
|
36
|
+
end # CutRanges
|
37
|
+
end # Range
|
38
|
+
end # RestrictionEnzyme
|
39
|
+
end # Bio
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# bio/util/restriction_enzyme/range/horizontal_cut_range.rb -
|
2
|
+
|
3
|
+
require 'bio/util/restriction_enzyme'
|
4
|
+
|
5
|
+
module Bio
|
6
|
+
class RestrictionEnzyme
|
7
|
+
class Range
|
8
|
+
|
9
|
+
class HorizontalCutRange < CutRange
|
10
|
+
attr_reader :p_cut_left, :p_cut_right
|
11
|
+
attr_reader :c_cut_left, :c_cut_right
|
12
|
+
attr_reader :min, :max
|
13
|
+
attr_reader :hcuts
|
14
|
+
|
15
|
+
def initialize( left, right=left )
|
16
|
+
raise "left > right" if left > right
|
17
|
+
|
18
|
+
# The 'range' here is actually off by one on the left
|
19
|
+
# side in relation to a normal CutRange, so using the normal
|
20
|
+
# variables from CutRange would result in bad behavior.
|
21
|
+
#
|
22
|
+
# See below - the first horizontal cut is the primary cut plus one.
|
23
|
+
#
|
24
|
+
# 1 2 3 4 5 6 7
|
25
|
+
# G A|T T A C A
|
26
|
+
# +-----+
|
27
|
+
# C T A A T|G T
|
28
|
+
# 1 2 3 4 5 6 7
|
29
|
+
#
|
30
|
+
# Primary cut = 2
|
31
|
+
# Complement cut = 5
|
32
|
+
# Horizontal cuts = 3, 4, 5
|
33
|
+
|
34
|
+
@p_cut_left = nil
|
35
|
+
@p_cut_right = nil
|
36
|
+
@c_cut_left = nil
|
37
|
+
@c_cut_right = nil
|
38
|
+
@min = left # NOTE this used to be 'nil', make sure all tests work
|
39
|
+
@max = right # NOTE this used to be 'nil', make sure all tests work
|
40
|
+
@range = (@min..@max) unless @min == nil or @max == nil # NOTE this used to be 'nil', make sure all tests work
|
41
|
+
|
42
|
+
|
43
|
+
@hcuts = (left..right)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Check if a location falls within the minimum or maximum values of this
|
47
|
+
# range.
|
48
|
+
#
|
49
|
+
# ---
|
50
|
+
# *Arguments*
|
51
|
+
# * +i+: Location to check if it is included in the range
|
52
|
+
# *Returns*:: +true+ _or_ +false+
|
53
|
+
def include?(i)
|
54
|
+
@range.include?(i)
|
55
|
+
end
|
56
|
+
end # HorizontalCutRange
|
57
|
+
end # Range
|
58
|
+
end # RestrictionEnzyme
|
59
|
+
end # Bio
|
@@ -0,0 +1,249 @@
|
|
1
|
+
# bio/util/restriction_enzyme/range/sequence_range.rb - A defined range over a nucleotide sequence
|
2
|
+
|
3
|
+
require 'bio/util/restriction_enzyme'
|
4
|
+
|
5
|
+
module Bio
|
6
|
+
class RestrictionEnzyme
|
7
|
+
class Range
|
8
|
+
|
9
|
+
autoload :CutRange, 'bio/util/restriction_enzyme/range/cut_range'
|
10
|
+
autoload :CutRanges, 'bio/util/restriction_enzyme/range/cut_ranges'
|
11
|
+
autoload :HorizontalCutRange, 'bio/util/restriction_enzyme/range/horizontal_cut_range'
|
12
|
+
autoload :VerticalCutRange, 'bio/util/restriction_enzyme/range/vertical_cut_range'
|
13
|
+
|
14
|
+
# A defined range over a nucleotide sequence.
|
15
|
+
#
|
16
|
+
# This class accomadates having cuts defined on a sequence and returning the
|
17
|
+
# fragments made by those cuts.
|
18
|
+
class SequenceRange
|
19
|
+
|
20
|
+
autoload :Fragment, 'bio/util/restriction_enzyme/range/sequence_range/fragment'
|
21
|
+
autoload :Fragments, 'bio/util/restriction_enzyme/range/sequence_range/fragments'
|
22
|
+
autoload :CalculatedCuts, 'bio/util/restriction_enzyme/range/sequence_range/calculated_cuts'
|
23
|
+
|
24
|
+
# Left-most index of primary strand
|
25
|
+
attr_reader :p_left
|
26
|
+
|
27
|
+
# Right-most index of primary strand
|
28
|
+
attr_reader :p_right
|
29
|
+
|
30
|
+
# Left-most index of complementary strand
|
31
|
+
attr_reader :c_left
|
32
|
+
|
33
|
+
# Right-most index of complementary strand
|
34
|
+
attr_reader :c_right
|
35
|
+
|
36
|
+
# Left-most index of DNA sequence
|
37
|
+
attr_reader :left
|
38
|
+
|
39
|
+
# Right-most index of DNA sequence
|
40
|
+
attr_reader :right
|
41
|
+
|
42
|
+
# Size of DNA sequence
|
43
|
+
attr_reader :size
|
44
|
+
|
45
|
+
# CutRanges in this SequenceRange
|
46
|
+
attr_reader :cut_ranges
|
47
|
+
|
48
|
+
def initialize( p_left = nil, p_right = nil, c_left = nil, c_right = nil )
|
49
|
+
raise ArgumentError if p_left == nil and c_left == nil
|
50
|
+
raise ArgumentError if p_right == nil and c_right == nil
|
51
|
+
(raise ArgumentError unless p_left <= p_right) unless p_left == nil or p_right == nil
|
52
|
+
(raise ArgumentError unless c_left <= c_right) unless c_left == nil or c_right == nil
|
53
|
+
|
54
|
+
@p_left, @p_right, @c_left, @c_right = p_left, p_right, c_left, c_right
|
55
|
+
@left = [p_left, c_left].compact.sort.first
|
56
|
+
@right = [p_right, c_right].compact.sort.last
|
57
|
+
@size = (@right - @left) + 1 unless @left == nil or @right == nil
|
58
|
+
@cut_ranges = CutRanges.new
|
59
|
+
@__fragments_current = false
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
# If the first object is HorizontalCutRange or VerticalCutRange, that is
|
64
|
+
# added to the SequenceRange. Otherwise this method
|
65
|
+
# builds a VerticalCutRange object and adds it to the SequenceRange.
|
66
|
+
#
|
67
|
+
# Note:
|
68
|
+
# Cut occurs immediately after the index supplied.
|
69
|
+
# For example, a cut at '0' would mean a cut occurs between bases 0 and 1.
|
70
|
+
#
|
71
|
+
# ---
|
72
|
+
# *Arguments*
|
73
|
+
# * +p_cut_left+: (_optional_) Left-most cut on the primary strand *or* a CutRange object. +nil+ to skip
|
74
|
+
# * +p_cut_right+: (_optional_) Right-most cut on the primary strand. +nil+ to skip
|
75
|
+
# * +c_cut_left+: (_optional_) Left-most cut on the complementary strand. +nil+ to skip
|
76
|
+
# * +c_cut_right+: (_optional_) Right-most cut on the complementary strand. +nil+ to skip
|
77
|
+
# *Returns*:: nothing
|
78
|
+
def add_cut_range( p_cut_left=nil, p_cut_right=nil, c_cut_left=nil, c_cut_right=nil )
|
79
|
+
@__fragments_current = false
|
80
|
+
if p_cut_left.kind_of? CutRange # shortcut
|
81
|
+
@cut_ranges << p_cut_left
|
82
|
+
else
|
83
|
+
[p_cut_left, p_cut_right, c_cut_left, c_cut_right].each { |n| (raise IndexError unless n >= @left and n <= @right) unless n == nil }
|
84
|
+
@cut_ranges << VerticalCutRange.new( p_cut_left, p_cut_right, c_cut_left, c_cut_right )
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Add a series of CutRange objects (HorizontalCutRange or VerticalCutRange).
|
89
|
+
#
|
90
|
+
# ---
|
91
|
+
# *Arguments*
|
92
|
+
# * +cut_ranges+: A series of CutRange objects
|
93
|
+
# *Returns*:: nothing
|
94
|
+
def add_cut_ranges(*cut_ranges)
|
95
|
+
cut_ranges.flatten.each do |cut_range|
|
96
|
+
raise TypeError, "Not of type CutRange" unless cut_range.kind_of? CutRange
|
97
|
+
self.add_cut_range( cut_range )
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Builds a HorizontalCutRange object and adds it to the SequenceRange.
|
102
|
+
#
|
103
|
+
# ---
|
104
|
+
# *Arguments*
|
105
|
+
# * +left+: Left-most cut
|
106
|
+
# * +right+: (_optional_) Right side - by default this equals the left side, default is recommended.
|
107
|
+
# *Returns*:: nothing
|
108
|
+
def add_horizontal_cut_range( left, right=left )
|
109
|
+
@__fragments_current = false
|
110
|
+
@cut_ranges << HorizontalCutRange.new( left, right )
|
111
|
+
end
|
112
|
+
|
113
|
+
# A Bio::RestrictionEnzyme::Range::SequenceRange::Bin holds an +Array+ of
|
114
|
+
# indexes for the primary and complement strands (+p+ and +c+ accessors).
|
115
|
+
#
|
116
|
+
# Example hash with Bin values:
|
117
|
+
# {0=>#<struct Bio::RestrictionEnzyme::Range::SequenceRange::Bin c=[0, 1], p=[0]>,
|
118
|
+
# 2=>#<struct Bio::RestrictionEnzyme::Range::SequenceRange::Bin c=[], p=[1, 2]>,
|
119
|
+
# 3=>#<struct Bio::RestrictionEnzyme::Range::SequenceRange::Bin c=[2, 3], p=[]>,
|
120
|
+
# 4=>#<struct Bio::RestrictionEnzyme::Range::SequenceRange::Bin c=[4, 5], p=[3, 4, 5]>}
|
121
|
+
#
|
122
|
+
# Note that the bin cannot be easily stored as a range since there may be
|
123
|
+
# nucleotides excised in the middle of a range.
|
124
|
+
#
|
125
|
+
# TODO: Perhaps store the bins as one-or-many ranges since missing
|
126
|
+
# nucleotides due to enzyme cutting is a special case.
|
127
|
+
Bin = Struct.new(:c, :p)
|
128
|
+
|
129
|
+
# Calculates the fragments over this sequence range as defined after using
|
130
|
+
# the methods add_cut_range, add_cut_ranges, and/or add_horizontal_cut_range
|
131
|
+
#
|
132
|
+
# Example return value:
|
133
|
+
# [#<Bio::RestrictionEnzyme::Range::SequenceRange::Fragment:0x277bdc
|
134
|
+
# @complement_bin=[0, 1],
|
135
|
+
# @primary_bin=[0]>,
|
136
|
+
# #<Bio::RestrictionEnzyme::Range::SequenceRange::Fragment:0x277bc8
|
137
|
+
# @complement_bin=[],
|
138
|
+
# @primary_bin=[1, 2]>,
|
139
|
+
# #<Bio::RestrictionEnzyme::Range::SequenceRange::Fragment:0x277bb4
|
140
|
+
# @complement_bin=[2, 3],
|
141
|
+
# @primary_bin=[]>,
|
142
|
+
# #<Bio::RestrictionEnzyme::Range::SequenceRange::Fragment:0x277ba0
|
143
|
+
# @complement_bin=[4, 5],
|
144
|
+
# @primary_bin=[3, 4, 5]>]
|
145
|
+
#
|
146
|
+
# ---
|
147
|
+
# *Arguments*
|
148
|
+
# * _none_
|
149
|
+
# *Returns*:: Bio::RestrictionEnzyme::Range::SequenceRange::Fragments
|
150
|
+
def fragments
|
151
|
+
return @__fragments if @__fragments_current == true
|
152
|
+
@__fragments_current = true
|
153
|
+
|
154
|
+
num_txt = '0123456789'
|
155
|
+
num_txt_repeat = (num_txt * ( @size / num_txt.size.to_f ).ceil)[0..@size-1]
|
156
|
+
fragments = Fragments.new(num_txt_repeat, num_txt_repeat)
|
157
|
+
|
158
|
+
cc = Bio::RestrictionEnzyme::Range::SequenceRange::CalculatedCuts.new(@size)
|
159
|
+
cc.add_cuts_from_cut_ranges(@cut_ranges)
|
160
|
+
cc.remove_incomplete_cuts
|
161
|
+
|
162
|
+
create_bins(cc).sort.each { |k, bin| fragments << Fragment.new( bin.p, bin.c ) }
|
163
|
+
@__fragments = fragments
|
164
|
+
return fragments
|
165
|
+
end
|
166
|
+
|
167
|
+
#########
|
168
|
+
protected
|
169
|
+
#########
|
170
|
+
|
171
|
+
# Example:
|
172
|
+
# cc = Bio::RestrictionEnzyme::Range::SequenceRange::CalculatedCuts.new(@size)
|
173
|
+
# cc.add_cuts_from_cut_ranges(@cut_ranges)
|
174
|
+
# cc.remove_incomplete_cuts
|
175
|
+
# bins = create_bins(cc)
|
176
|
+
#
|
177
|
+
# Example return value:
|
178
|
+
# {0=>#<struct Bio::RestrictionEnzyme::Range::SequenceRange::Bin c=[0, 1], p=[0]>,
|
179
|
+
# 2=>#<struct Bio::RestrictionEnzyme::Range::SequenceRange::Bin c=[], p=[1, 2]>,
|
180
|
+
# 3=>#<struct Bio::RestrictionEnzyme::Range::SequenceRange::Bin c=[2, 3], p=[]>,
|
181
|
+
# 4=>#<struct Bio::RestrictionEnzyme::Range::SequenceRange::Bin c=[4, 5], p=[3, 4, 5]>}
|
182
|
+
#
|
183
|
+
# ---
|
184
|
+
# *Arguments*
|
185
|
+
# * +cc+: Bio::RestrictionEnzyme::Range::SequenceRange::CalculatedCuts
|
186
|
+
# *Returns*:: +Hash+ Keys are unique, values are Bio::RestrictionEnzyme::Range::SequenceRange::Bin objects filled with indexes of the sequence locations they represent.
|
187
|
+
def create_bins(cc)
|
188
|
+
p_cut = cc.vc_primary
|
189
|
+
c_cut = cc.vc_complement
|
190
|
+
h_cut = cc.hc_between_strands
|
191
|
+
|
192
|
+
if @circular
|
193
|
+
# NOTE
|
194
|
+
# if it's circular we should start at the beginning of a cut for orientation
|
195
|
+
# scan for it, hack off the first set of hcuts and move them to the back
|
196
|
+
|
197
|
+
unique_id = 0
|
198
|
+
else
|
199
|
+
p_cut.unshift(-1) unless p_cut.include?(-1)
|
200
|
+
c_cut.unshift(-1) unless c_cut.include?(-1)
|
201
|
+
unique_id = -1
|
202
|
+
end
|
203
|
+
|
204
|
+
p_bin_id = c_bin_id = unique_id
|
205
|
+
bins = {}
|
206
|
+
setup_new_bin(bins, unique_id)
|
207
|
+
|
208
|
+
-1.upto(@size-1) do |idx| # NOTE - circular, for the future - should '-1' be replace with 'unique_id'?
|
209
|
+
|
210
|
+
# if bin_ids are out of sync but the strands are attached
|
211
|
+
if (p_bin_id != c_bin_id) and !h_cut.include?(idx)
|
212
|
+
min_id, max_id = [p_bin_id, c_bin_id].sort
|
213
|
+
bins.delete(max_id)
|
214
|
+
p_bin_id = c_bin_id = min_id
|
215
|
+
end
|
216
|
+
|
217
|
+
bins[ p_bin_id ].p << idx
|
218
|
+
bins[ c_bin_id ].c << idx
|
219
|
+
|
220
|
+
if p_cut.include? idx
|
221
|
+
p_bin_id = (unique_id += 1)
|
222
|
+
setup_new_bin(bins, p_bin_id)
|
223
|
+
end
|
224
|
+
|
225
|
+
if c_cut.include? idx # repetition
|
226
|
+
c_bin_id = (unique_id += 1) # repetition
|
227
|
+
setup_new_bin(bins, c_bin_id) # repetition
|
228
|
+
end # repetition
|
229
|
+
|
230
|
+
end
|
231
|
+
|
232
|
+
# Bin "-1" is an easy way to indicate the start of a strand just in case
|
233
|
+
# there is a horizontal cut at position 0
|
234
|
+
bins.delete(-1) unless @circular
|
235
|
+
bins
|
236
|
+
end
|
237
|
+
|
238
|
+
# Modifies bins in place by creating a new element with key bin_id and
|
239
|
+
# initializing the bin.
|
240
|
+
def setup_new_bin(bins, bin_id)
|
241
|
+
bins[ bin_id ] = Bin.new
|
242
|
+
bins[ bin_id ].p = []
|
243
|
+
bins[ bin_id ].c = []
|
244
|
+
end
|
245
|
+
|
246
|
+
end # SequenceRange
|
247
|
+
end # Range
|
248
|
+
end # RestrictionEnzyme
|
249
|
+
end # Bio
|