wrnap 0.12.2 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,43 @@
1
+ module Wrnap
2
+ class Rna
3
+ class Box
4
+ extend Forwardable
5
+ include Enumerable
6
+ include Wrnap::Global::Yaml
7
+
8
+ attr_reader :rnas
9
+
10
+ class << self
11
+ def load_all(pattern = "*.fa", &block)
12
+ new(Dir[File.directory?(pattern) ? pattern + "/*.fa" : pattern].map { |file| RNA.from_fasta(file, &block) })
13
+ end
14
+ end
15
+
16
+ def initialize(rnas)
17
+ @rnas = rnas.kind_of?(Array) ? rnas : [rnas]
18
+ end
19
+
20
+ def pp
21
+ rnas.each(&:pp) and nil
22
+ end
23
+
24
+ def +(arrayish)
25
+ self.class.new(rnas + (arrayish.is_a?(Box) ? arrayish.rnas : arrayish))
26
+ end
27
+
28
+ def_delegators :@rnas, *%i|size length [] []= <<|
29
+
30
+ def each(&block)
31
+ rnas.each(&block)
32
+ end
33
+
34
+ def kind_of?(klass)
35
+ klass == Array ? true : super
36
+ end
37
+
38
+ def inspect
39
+ "#<Wrnap::Rna::Box with %d RNAs>" % rnas.size
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,119 @@
1
+ module Wrnap
2
+ class Rna
3
+ module Constraints
4
+ def self.included(base)
5
+ base.send(:include, InstanceMethods)
6
+ end
7
+
8
+ module InstanceMethods
9
+ def constraint_mask
10
+ md[:constraint_mask]
11
+ end
12
+
13
+ def build_constraints(&block)
14
+ meta_rna do |metadata|
15
+ constraint_mask(ConstraintBox.new(metadata.__rna__).tap { |box| box.instance_eval(&block) })
16
+ end
17
+ end
18
+ end
19
+
20
+ class ConstraintBox
21
+ attr_reader :rna, :constraints
22
+
23
+ def initialize(rna)
24
+ @rna, @constraints = rna, []
25
+ end
26
+
27
+ def between(i, j)
28
+ Loop.new(i, j)
29
+ end
30
+
31
+ def inside(i, j)
32
+ between(i + 1, j - 1)
33
+ end
34
+
35
+ def mask!(mask_object, *args)
36
+ case mask_object
37
+ when Helix then mask_helix!(mask_object, *args)
38
+ when Loop then mask_loop!(mask_object, symbol: args[0][:symbol])
39
+ end
40
+ end
41
+
42
+ def mask_helix!(helix, side: :both, symbol: "()")
43
+ left_loop, right_loop = helix.to_loops
44
+
45
+ if symbol.length > 1
46
+ left_symbol, right_symbol = symbol.split(//)
47
+ else
48
+ left_symbol = right_symbol = symbol
49
+ end
50
+
51
+ mask_loop!(left_loop, symbol: left_symbol) if side == :left || side == :both
52
+ mask_loop!(right_loop, symbol: right_symbol) if side == :right || side == :both
53
+ end
54
+
55
+ def mask_loop!(l00p, symbol: "x")
56
+ mask_region!(l00p.i, l00p.j, symbol: symbol)
57
+ end
58
+
59
+ def mask_region!(i, j, symbol: "x")
60
+ raise ArgumentError.new("Trying to apply symbol '%s' from %d to %d, all symbols must be 1 char long." % [symbol, i, j]) if symbol.length > 1
61
+
62
+ constraints << ConstraintData.new(i, j, symbol)
63
+ prune!
64
+ end
65
+
66
+ def prune!
67
+ @constraints = constraints.group_by(&:name).map(&:last).map(&:first)
68
+ end
69
+
70
+ def inspect
71
+ "#<Constraints: %s>" % constraints.map(&:name).join(", ")
72
+ end
73
+
74
+ def to_s
75
+ (?. * rna.len).tap do |string|
76
+ constraints.each { |constraint| string[constraint.from..constraint.to] = constraint.symbol * constraint.length }
77
+ end
78
+ end
79
+
80
+ alias :mask :to_s
81
+
82
+ def method_missing(name, *args, &block)
83
+ method_name = name.to_s
84
+
85
+ if method_name =~ TreeStem::STEM_NOTATION_REGEX
86
+ rna.trunk.send(method_name)
87
+ elsif mask_type = method_name.match(/^(prohibit|force)(_(left|right)_stem)?$/)
88
+ side_symbol = mask_type[3] ? mask_type[3].to_sym : :both
89
+
90
+ case mask_type[1]
91
+ when "prohibit" then mask!(args[0], side: side_symbol, symbol: args[1] || ?x)
92
+ when "force" then mask!(args[0], side: side_symbol, symbol: args[1] || "()")
93
+ end
94
+ else super end
95
+ end
96
+ end
97
+
98
+ class ConstraintData
99
+ attr_reader :from, :to, :symbol
100
+
101
+ def initialize(from, to, symbol)
102
+ @from, @to, @symbol = from, to, symbol
103
+ end
104
+
105
+ def name
106
+ "(%d to %d as '%s')" % [from, to, symbol]
107
+ end
108
+
109
+ def length
110
+ to - from + 1
111
+ end
112
+
113
+ def inspect
114
+ "#<Constraint: %s>" % name
115
+ end
116
+ end
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,139 @@
1
+ module Wrnap
2
+ class Rna
3
+ class Context < Rna
4
+ attr_reader :accession, :from, :to, :coord_options
5
+
6
+ class << self
7
+ def init_from_entrez(accession, from, to, options = {}, &block)
8
+ new(
9
+ accession: accession,
10
+ from: from,
11
+ to: to,
12
+ options: options,
13
+ &block
14
+ )
15
+ end
16
+
17
+ def init_from_string(sequence, accession, from, to, options = {}, &block)
18
+ new(
19
+ sequence: sequence,
20
+ accession: accession,
21
+ from: from,
22
+ to: to,
23
+ options: options,
24
+ &block
25
+ )
26
+ end
27
+ end
28
+
29
+ def initialize(sequence: nil, accession: nil, from: nil, to: nil, options: {}, &block)
30
+ options = { coords: {}, rna: {} }.merge(options)
31
+
32
+ @accession, @from, @to, @coord_options = accession, from, to, options[:coords]
33
+
34
+ validate_coord_options
35
+
36
+ if sequence
37
+ @raw_sequence = (sequence.is_a?(String) ? Bio::Sequence::NA.new(sequence) : sequence).upcase
38
+ end
39
+
40
+ super(
41
+ sequence: self.sequence,
42
+ structure: options[:rna][:structure] || options[:rna][:str_1] || options[:rna][:str],
43
+ second_structure: options[:rna][:second_structure] || options[:rna][:str_2],
44
+ comment: options[:rna][:comment] || options[:rna][:name] || identifier,
45
+ &block
46
+ )
47
+
48
+ remove_instance_variable(:@sequence)
49
+ end
50
+
51
+ def validate_coord_options
52
+ unless coord_options.empty?
53
+ unless Set.new(coord_options.keys) == Set.new(%i|direction length|)
54
+ raise ArgumentError.new("coord_options keys must contain only [:direction, :length], found: %s" % coord_options.keys)
55
+ end
56
+
57
+ unless (length = coord_options[:length]).is_a?(Integer) && length > 0
58
+ raise ArgumentError.new("coord_options length must be greater than 0, found: %d" % length)
59
+ end
60
+
61
+ unless [:up, :down, :both, 5, 3].include?(direction = coord_options[:direction])
62
+ raise ArgumentError.new("coord_options directions is not a valid key, found: %s" % direction)
63
+ end
64
+ end
65
+ end
66
+
67
+ def up_coord
68
+ [from, to].min
69
+ end
70
+
71
+ def down_coord
72
+ [from, to].max
73
+ end
74
+
75
+ def seq_from
76
+ up_coord + coord_window.min
77
+ end
78
+
79
+ def seq_to
80
+ up_coord + coord_window.max
81
+ end
82
+
83
+ def strand
84
+ plus_strand? ? :plus : :minus
85
+ end
86
+
87
+ def plus_strand?
88
+ to > from
89
+ end
90
+
91
+ def minus_strand?
92
+ !plus_strand?
93
+ end
94
+
95
+ def sequence
96
+ if @raw_sequence
97
+ @raw_sequence
98
+ else
99
+ entrez_sequence = Wrnap::Global::Entrez.rna_sequence_from_entrez(accession, up_coord, coord_window)
100
+ @raw_sequence = (minus_strand? ? entrez_sequence.complement : entrez_sequence).upcase
101
+ end
102
+ end
103
+
104
+ alias :seq :sequence
105
+
106
+ def extend!(coord_options = {})
107
+ self.class.init_from_entrez(accession, from, to, coords: coord_options)
108
+ end
109
+
110
+ def coord_window
111
+ # This does not support extending the range in both directions, though it should be easy to do.
112
+ # Options from coord_options ex: { length: 300, direction: 3 }, { length: 250, direction: :both }, { length: 200, direction: :down }
113
+ range = 0..(down_coord - up_coord)
114
+
115
+ if coord_options[:length] && coord_options[:direction]
116
+ if coord_options[:direction] == :both
117
+ Range.new(range.min - coord_options[:length], range.max + coord_options[:length])
118
+ else
119
+ case [coord_options[:direction], strand]
120
+ when [3, :plus], [:down, :plus], [5, :minus], [:up, :minus] then Range.new(range.min, range.max + coord_options[:length])
121
+ when [5, :plus], [:up, :plus], [3, :minus], [:down, :minus] then Range.new(range.min - coord_options[:length], range.max)
122
+ else Wrnap.debugger { "WARNING: value for :direction key in sequence retreival needs to be one of 5, 3, :both - found (%s)" % coord_options[:direction].inspect }
123
+ end
124
+ end
125
+ else
126
+ range
127
+ end
128
+ end
129
+
130
+ def identifier
131
+ "%s %d %s %d" % [accession, from, plus_strand? ? ?+ : ?-, to]
132
+ end
133
+
134
+ def inspect
135
+ super.gsub(/((\w(::)?)+)>$/) { |_| "%s %s>" % [identifier, $1] }
136
+ end
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,102 @@
1
+ module Wrnap
2
+ class Rna
3
+ module Extensions
4
+ def self.included(base)
5
+ base.send(:include, InstanceMethods)
6
+ base.extend(ClassMethods)
7
+ base.extend(OneStructureBasedMethods)
8
+ base.extend(TwoStructureBasedMethods)
9
+ base.class_eval do
10
+ OneStructureBasedMethods.public_instance_methods.each do |class_method|
11
+ define_method(class_method) do |*args|
12
+ self.class.send(class_method, *[structure].concat(args))
13
+ end
14
+ end
15
+
16
+ TwoStructureBasedMethods.public_instance_methods.each do |class_method|
17
+ define_method(class_method) do |*args|
18
+ self.class.send(class_method, *[str_1, str_2].concat(args))
19
+ end
20
+ end
21
+ end
22
+
23
+ base.send(:include, InstanceMethods)
24
+ end
25
+
26
+ module ClassMethods
27
+ def generate_sequence(sequence_length)
28
+ # 0th order Markov chain w/ uniform probability distribution
29
+ Rna.init_from_string(sequence_length.times.inject("") { |string, _| string + %w[A U C G][rand(4)] })
30
+ end
31
+
32
+ def shuffle(sequence, token_length = 2)
33
+ Shuffle.new(sequence).shuffle(token_length)
34
+ end
35
+
36
+ def structure_from_bp_list(length, base_pairs)
37
+ base_pairs.to_a.map(&:to_a).map(&:sort).inject("." * length) { |structure, (i, j)| structure.tap { structure[i] = ?(; structure[j] = ?) } }
38
+ end
39
+ end
40
+
41
+ module InstanceMethods
42
+ def dishuffle
43
+ self.class.shuffle(sequence, 2)
44
+ end
45
+
46
+ def gc_content
47
+ seq.split(//).select { |i| i =~ /[GC]/i }.size.to_f / seq.size
48
+ end
49
+
50
+ def boltzmann_probability(dangle: 2)
51
+ Math.exp(-run(:eval, d: dangle).mfe / Wrnap::RT) / Math.exp(-run(:fold, d: dangle, p: 0).ensemble_energy / Wrnap::RT)
52
+ end
53
+ end
54
+
55
+ module OneStructureBasedMethods
56
+ def max_bp_distance(structure)
57
+ base_pairs(structure).count + ((structure.length - 3) / 2.0).floor
58
+ end
59
+
60
+ def base_pairs(structure)
61
+ get_pairings(structure).each_with_index.inject(Set.new) do |set, (j, i)|
62
+ j >= 0 ? set << Set[i, j] : set
63
+ end
64
+ end
65
+
66
+ def get_pairings(structure)
67
+ stack = []
68
+
69
+ structure.each_char.each_with_index.inject(Array.new(structure.length, -1)) do |array, (symbol, index)|
70
+ array.tap do
71
+ case symbol
72
+ when "(" then stack.push(index)
73
+ when ")" then
74
+ if stack.empty?
75
+ raise "Too many ')' in '#{structure}'"
76
+ else
77
+ stack.pop.tap do |opening|
78
+ array[opening] = index
79
+ array[index] = opening
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end.tap do
85
+ raise "Too many '(' in '#{structure}'" unless stack.empty?
86
+ end
87
+ end
88
+ end
89
+
90
+ module TwoStructureBasedMethods
91
+ def bp_distance(structure_1, structure_2)
92
+ # Takes two structures and calculates the distance between them by |symmetric difference(bp_in_a, bp_in_b)|
93
+ raise "The two structures are not the same length" unless structure_1.length == structure_2.length
94
+
95
+ bp_set_1, bp_set_2 = base_pairs(structure_1), base_pairs(structure_2)
96
+
97
+ ((bp_set_1 - bp_set_2) + (bp_set_2 - bp_set_1)).count
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,46 @@
1
+ module Wrnap
2
+ class Rna
3
+ module Metadata
4
+ def self.included(base)
5
+ base.send(:include, InstanceMethods)
6
+ end
7
+
8
+ module InstanceMethods
9
+ def self.included(base)
10
+ base.class_eval do
11
+ def_delegator :@metadata, :__data__, :md
12
+ end
13
+ end
14
+
15
+ def meta(&block)
16
+ metadata.tap { metadata.instance_eval(&block) if block_given? }
17
+ end
18
+
19
+ def meta_rna(&block)
20
+ metadata.__rna__.tap { meta(&block) }
21
+ end
22
+ end
23
+
24
+ class Container
25
+ attr_reader :__rna__, :__data__
26
+
27
+ def initialize(rna)
28
+ @__rna__, @__data__ = rna, {}
29
+ end
30
+
31
+ def inspect
32
+ "#<Metadata: %s>" % __data__.inspect
33
+ end
34
+
35
+ alias :to_s :inspect
36
+
37
+ def method_missing(name, *args, &block)
38
+ case args.size
39
+ when 0 then __data__[name]
40
+ when 1 then __data__[name.to_s.gsub(/=$/, "").to_sym] = args.first
41
+ else super end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end