wrnap 0.12.2 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,43 @@
1
+ module Wrnap
2
+ class Rna
3
+ class Box
4
+ extend Forwardable
5
+ include Enumerable
6
+ include Wrnap::Global::Yaml
7
+
8
+ attr_reader :rnas
9
+
10
+ class << self
11
+ def load_all(pattern = "*.fa", &block)
12
+ new(Dir[File.directory?(pattern) ? pattern + "/*.fa" : pattern].map { |file| RNA.from_fasta(file, &block) })
13
+ end
14
+ end
15
+
16
+ def initialize(rnas)
17
+ @rnas = rnas.kind_of?(Array) ? rnas : [rnas]
18
+ end
19
+
20
+ def pp
21
+ rnas.each(&:pp) and nil
22
+ end
23
+
24
+ def +(arrayish)
25
+ self.class.new(rnas + (arrayish.is_a?(Box) ? arrayish.rnas : arrayish))
26
+ end
27
+
28
+ def_delegators :@rnas, *%i|size length [] []= <<|
29
+
30
+ def each(&block)
31
+ rnas.each(&block)
32
+ end
33
+
34
+ def kind_of?(klass)
35
+ klass == Array ? true : super
36
+ end
37
+
38
+ def inspect
39
+ "#<Wrnap::Rna::Box with %d RNAs>" % rnas.size
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,119 @@
1
+ module Wrnap
2
+ class Rna
3
+ module Constraints
4
+ def self.included(base)
5
+ base.send(:include, InstanceMethods)
6
+ end
7
+
8
+ module InstanceMethods
9
+ def constraint_mask
10
+ md[:constraint_mask]
11
+ end
12
+
13
+ def build_constraints(&block)
14
+ meta_rna do |metadata|
15
+ constraint_mask(ConstraintBox.new(metadata.__rna__).tap { |box| box.instance_eval(&block) })
16
+ end
17
+ end
18
+ end
19
+
20
+ class ConstraintBox
21
+ attr_reader :rna, :constraints
22
+
23
+ def initialize(rna)
24
+ @rna, @constraints = rna, []
25
+ end
26
+
27
+ def between(i, j)
28
+ Loop.new(i, j)
29
+ end
30
+
31
+ def inside(i, j)
32
+ between(i + 1, j - 1)
33
+ end
34
+
35
+ def mask!(mask_object, *args)
36
+ case mask_object
37
+ when Helix then mask_helix!(mask_object, *args)
38
+ when Loop then mask_loop!(mask_object, symbol: args[0][:symbol])
39
+ end
40
+ end
41
+
42
+ def mask_helix!(helix, side: :both, symbol: "()")
43
+ left_loop, right_loop = helix.to_loops
44
+
45
+ if symbol.length > 1
46
+ left_symbol, right_symbol = symbol.split(//)
47
+ else
48
+ left_symbol = right_symbol = symbol
49
+ end
50
+
51
+ mask_loop!(left_loop, symbol: left_symbol) if side == :left || side == :both
52
+ mask_loop!(right_loop, symbol: right_symbol) if side == :right || side == :both
53
+ end
54
+
55
+ def mask_loop!(l00p, symbol: "x")
56
+ mask_region!(l00p.i, l00p.j, symbol: symbol)
57
+ end
58
+
59
+ def mask_region!(i, j, symbol: "x")
60
+ raise ArgumentError.new("Trying to apply symbol '%s' from %d to %d, all symbols must be 1 char long." % [symbol, i, j]) if symbol.length > 1
61
+
62
+ constraints << ConstraintData.new(i, j, symbol)
63
+ prune!
64
+ end
65
+
66
+ def prune!
67
+ @constraints = constraints.group_by(&:name).map(&:last).map(&:first)
68
+ end
69
+
70
+ def inspect
71
+ "#<Constraints: %s>" % constraints.map(&:name).join(", ")
72
+ end
73
+
74
+ def to_s
75
+ (?. * rna.len).tap do |string|
76
+ constraints.each { |constraint| string[constraint.from..constraint.to] = constraint.symbol * constraint.length }
77
+ end
78
+ end
79
+
80
+ alias :mask :to_s
81
+
82
+ def method_missing(name, *args, &block)
83
+ method_name = name.to_s
84
+
85
+ if method_name =~ TreeStem::STEM_NOTATION_REGEX
86
+ rna.trunk.send(method_name)
87
+ elsif mask_type = method_name.match(/^(prohibit|force)(_(left|right)_stem)?$/)
88
+ side_symbol = mask_type[3] ? mask_type[3].to_sym : :both
89
+
90
+ case mask_type[1]
91
+ when "prohibit" then mask!(args[0], side: side_symbol, symbol: args[1] || ?x)
92
+ when "force" then mask!(args[0], side: side_symbol, symbol: args[1] || "()")
93
+ end
94
+ else super end
95
+ end
96
+ end
97
+
98
+ class ConstraintData
99
+ attr_reader :from, :to, :symbol
100
+
101
+ def initialize(from, to, symbol)
102
+ @from, @to, @symbol = from, to, symbol
103
+ end
104
+
105
+ def name
106
+ "(%d to %d as '%s')" % [from, to, symbol]
107
+ end
108
+
109
+ def length
110
+ to - from + 1
111
+ end
112
+
113
+ def inspect
114
+ "#<Constraint: %s>" % name
115
+ end
116
+ end
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,139 @@
1
+ module Wrnap
2
+ class Rna
3
+ class Context < Rna
4
+ attr_reader :accession, :from, :to, :coord_options
5
+
6
+ class << self
7
+ def init_from_entrez(accession, from, to, options = {}, &block)
8
+ new(
9
+ accession: accession,
10
+ from: from,
11
+ to: to,
12
+ options: options,
13
+ &block
14
+ )
15
+ end
16
+
17
+ def init_from_string(sequence, accession, from, to, options = {}, &block)
18
+ new(
19
+ sequence: sequence,
20
+ accession: accession,
21
+ from: from,
22
+ to: to,
23
+ options: options,
24
+ &block
25
+ )
26
+ end
27
+ end
28
+
29
+ def initialize(sequence: nil, accession: nil, from: nil, to: nil, options: {}, &block)
30
+ options = { coords: {}, rna: {} }.merge(options)
31
+
32
+ @accession, @from, @to, @coord_options = accession, from, to, options[:coords]
33
+
34
+ validate_coord_options
35
+
36
+ if sequence
37
+ @raw_sequence = (sequence.is_a?(String) ? Bio::Sequence::NA.new(sequence) : sequence).upcase
38
+ end
39
+
40
+ super(
41
+ sequence: self.sequence,
42
+ structure: options[:rna][:structure] || options[:rna][:str_1] || options[:rna][:str],
43
+ second_structure: options[:rna][:second_structure] || options[:rna][:str_2],
44
+ comment: options[:rna][:comment] || options[:rna][:name] || identifier,
45
+ &block
46
+ )
47
+
48
+ remove_instance_variable(:@sequence)
49
+ end
50
+
51
+ def validate_coord_options
52
+ unless coord_options.empty?
53
+ unless Set.new(coord_options.keys) == Set.new(%i|direction length|)
54
+ raise ArgumentError.new("coord_options keys must contain only [:direction, :length], found: %s" % coord_options.keys)
55
+ end
56
+
57
+ unless (length = coord_options[:length]).is_a?(Integer) && length > 0
58
+ raise ArgumentError.new("coord_options length must be greater than 0, found: %d" % length)
59
+ end
60
+
61
+ unless [:up, :down, :both, 5, 3].include?(direction = coord_options[:direction])
62
+ raise ArgumentError.new("coord_options directions is not a valid key, found: %s" % direction)
63
+ end
64
+ end
65
+ end
66
+
67
+ def up_coord
68
+ [from, to].min
69
+ end
70
+
71
+ def down_coord
72
+ [from, to].max
73
+ end
74
+
75
+ def seq_from
76
+ up_coord + coord_window.min
77
+ end
78
+
79
+ def seq_to
80
+ up_coord + coord_window.max
81
+ end
82
+
83
+ def strand
84
+ plus_strand? ? :plus : :minus
85
+ end
86
+
87
+ def plus_strand?
88
+ to > from
89
+ end
90
+
91
+ def minus_strand?
92
+ !plus_strand?
93
+ end
94
+
95
+ def sequence
96
+ if @raw_sequence
97
+ @raw_sequence
98
+ else
99
+ entrez_sequence = Wrnap::Global::Entrez.rna_sequence_from_entrez(accession, up_coord, coord_window)
100
+ @raw_sequence = (minus_strand? ? entrez_sequence.complement : entrez_sequence).upcase
101
+ end
102
+ end
103
+
104
+ alias :seq :sequence
105
+
106
+ def extend!(coord_options = {})
107
+ self.class.init_from_entrez(accession, from, to, coords: coord_options)
108
+ end
109
+
110
+ def coord_window
111
+ # This does not support extending the range in both directions, though it should be easy to do.
112
+ # Options from coord_options ex: { length: 300, direction: 3 }, { length: 250, direction: :both }, { length: 200, direction: :down }
113
+ range = 0..(down_coord - up_coord)
114
+
115
+ if coord_options[:length] && coord_options[:direction]
116
+ if coord_options[:direction] == :both
117
+ Range.new(range.min - coord_options[:length], range.max + coord_options[:length])
118
+ else
119
+ case [coord_options[:direction], strand]
120
+ when [3, :plus], [:down, :plus], [5, :minus], [:up, :minus] then Range.new(range.min, range.max + coord_options[:length])
121
+ when [5, :plus], [:up, :plus], [3, :minus], [:down, :minus] then Range.new(range.min - coord_options[:length], range.max)
122
+ else Wrnap.debugger { "WARNING: value for :direction key in sequence retreival needs to be one of 5, 3, :both - found (%s)" % coord_options[:direction].inspect }
123
+ end
124
+ end
125
+ else
126
+ range
127
+ end
128
+ end
129
+
130
+ def identifier
131
+ "%s %d %s %d" % [accession, from, plus_strand? ? ?+ : ?-, to]
132
+ end
133
+
134
+ def inspect
135
+ super.gsub(/((\w(::)?)+)>$/) { |_| "%s %s>" % [identifier, $1] }
136
+ end
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,102 @@
1
+ module Wrnap
2
+ class Rna
3
+ module Extensions
4
+ def self.included(base)
5
+ base.send(:include, InstanceMethods)
6
+ base.extend(ClassMethods)
7
+ base.extend(OneStructureBasedMethods)
8
+ base.extend(TwoStructureBasedMethods)
9
+ base.class_eval do
10
+ OneStructureBasedMethods.public_instance_methods.each do |class_method|
11
+ define_method(class_method) do |*args|
12
+ self.class.send(class_method, *[structure].concat(args))
13
+ end
14
+ end
15
+
16
+ TwoStructureBasedMethods.public_instance_methods.each do |class_method|
17
+ define_method(class_method) do |*args|
18
+ self.class.send(class_method, *[str_1, str_2].concat(args))
19
+ end
20
+ end
21
+ end
22
+
23
+ base.send(:include, InstanceMethods)
24
+ end
25
+
26
+ module ClassMethods
27
+ def generate_sequence(sequence_length)
28
+ # 0th order Markov chain w/ uniform probability distribution
29
+ Rna.init_from_string(sequence_length.times.inject("") { |string, _| string + %w[A U C G][rand(4)] })
30
+ end
31
+
32
+ def shuffle(sequence, token_length = 2)
33
+ Shuffle.new(sequence).shuffle(token_length)
34
+ end
35
+
36
+ def structure_from_bp_list(length, base_pairs)
37
+ base_pairs.to_a.map(&:to_a).map(&:sort).inject("." * length) { |structure, (i, j)| structure.tap { structure[i] = ?(; structure[j] = ?) } }
38
+ end
39
+ end
40
+
41
+ module InstanceMethods
42
+ def dishuffle
43
+ self.class.shuffle(sequence, 2)
44
+ end
45
+
46
+ def gc_content
47
+ seq.split(//).select { |i| i =~ /[GC]/i }.size.to_f / seq.size
48
+ end
49
+
50
+ def boltzmann_probability(dangle: 2)
51
+ Math.exp(-run(:eval, d: dangle).mfe / Wrnap::RT) / Math.exp(-run(:fold, d: dangle, p: 0).ensemble_energy / Wrnap::RT)
52
+ end
53
+ end
54
+
55
+ module OneStructureBasedMethods
56
+ def max_bp_distance(structure)
57
+ base_pairs(structure).count + ((structure.length - 3) / 2.0).floor
58
+ end
59
+
60
+ def base_pairs(structure)
61
+ get_pairings(structure).each_with_index.inject(Set.new) do |set, (j, i)|
62
+ j >= 0 ? set << Set[i, j] : set
63
+ end
64
+ end
65
+
66
+ def get_pairings(structure)
67
+ stack = []
68
+
69
+ structure.each_char.each_with_index.inject(Array.new(structure.length, -1)) do |array, (symbol, index)|
70
+ array.tap do
71
+ case symbol
72
+ when "(" then stack.push(index)
73
+ when ")" then
74
+ if stack.empty?
75
+ raise "Too many ')' in '#{structure}'"
76
+ else
77
+ stack.pop.tap do |opening|
78
+ array[opening] = index
79
+ array[index] = opening
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end.tap do
85
+ raise "Too many '(' in '#{structure}'" unless stack.empty?
86
+ end
87
+ end
88
+ end
89
+
90
+ module TwoStructureBasedMethods
91
+ def bp_distance(structure_1, structure_2)
92
+ # Takes two structures and calculates the distance between them by |symmetric difference(bp_in_a, bp_in_b)|
93
+ raise "The two structures are not the same length" unless structure_1.length == structure_2.length
94
+
95
+ bp_set_1, bp_set_2 = base_pairs(structure_1), base_pairs(structure_2)
96
+
97
+ ((bp_set_1 - bp_set_2) + (bp_set_2 - bp_set_1)).count
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,46 @@
1
+ module Wrnap
2
+ class Rna
3
+ module Metadata
4
+ def self.included(base)
5
+ base.send(:include, InstanceMethods)
6
+ end
7
+
8
+ module InstanceMethods
9
+ def self.included(base)
10
+ base.class_eval do
11
+ def_delegator :@metadata, :__data__, :md
12
+ end
13
+ end
14
+
15
+ def meta(&block)
16
+ metadata.tap { metadata.instance_eval(&block) if block_given? }
17
+ end
18
+
19
+ def meta_rna(&block)
20
+ metadata.__rna__.tap { meta(&block) }
21
+ end
22
+ end
23
+
24
+ class Container
25
+ attr_reader :__rna__, :__data__
26
+
27
+ def initialize(rna)
28
+ @__rna__, @__data__ = rna, {}
29
+ end
30
+
31
+ def inspect
32
+ "#<Metadata: %s>" % __data__.inspect
33
+ end
34
+
35
+ alias :to_s :inspect
36
+
37
+ def method_missing(name, *args, &block)
38
+ case args.size
39
+ when 0 then __data__[name]
40
+ when 1 then __data__[name.to_s.gsub(/=$/, "").to_sym] = args.first
41
+ else super end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end