wrnap 0.12.2 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/wrnap.rb +34 -35
- data/lib/wrnap/etl/infernal.rb +9 -8
- data/lib/wrnap/etl/stockholm.rb +6 -6
- data/lib/wrnap/global/yaml.rb +14 -0
- data/lib/wrnap/graphing/r.rb +8 -4
- data/lib/wrnap/package/base.rb +4 -7
- data/lib/wrnap/package/fold.rb +1 -1
- data/lib/wrnap/package/fold_constrained.rb +24 -0
- data/lib/wrnap/package/mfpt.rb +1 -1
- data/lib/wrnap/package/rnabor.rb +2 -2
- data/lib/wrnap/package/varna.rb +4 -3
- data/lib/wrnap/package/xbor.rb +2 -2
- data/lib/wrnap/rna.rb +197 -0
- data/lib/wrnap/rna/box.rb +43 -0
- data/lib/wrnap/rna/constraints.rb +119 -0
- data/lib/wrnap/rna/context.rb +139 -0
- data/lib/wrnap/rna/extensions.rb +102 -0
- data/lib/wrnap/rna/metadata.rb +46 -0
- data/lib/wrnap/rna/motifs.rb +72 -0
- data/lib/wrnap/rna/tree.rb +136 -0
- data/lib/wrnap/rna/wrapper.rb +9 -0
- data/lib/wrnap/version.rb +1 -1
- metadata +14 -9
- data/lib/wrnap/global/rna.rb +0 -190
- data/lib/wrnap/global/rna/context.rb +0 -141
- data/lib/wrnap/global/rna/extensions.rb +0 -104
- data/lib/wrnap/global/rna/helix.rb +0 -36
- data/lib/wrnap/global/rna/metadata.rb +0 -48
- data/lib/wrnap/global/rna/tree.rb +0 -87
@@ -0,0 +1,43 @@
|
|
1
|
+
module Wrnap
|
2
|
+
class Rna
|
3
|
+
class Box
|
4
|
+
extend Forwardable
|
5
|
+
include Enumerable
|
6
|
+
include Wrnap::Global::Yaml
|
7
|
+
|
8
|
+
attr_reader :rnas
|
9
|
+
|
10
|
+
class << self
|
11
|
+
def load_all(pattern = "*.fa", &block)
|
12
|
+
new(Dir[File.directory?(pattern) ? pattern + "/*.fa" : pattern].map { |file| RNA.from_fasta(file, &block) })
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize(rnas)
|
17
|
+
@rnas = rnas.kind_of?(Array) ? rnas : [rnas]
|
18
|
+
end
|
19
|
+
|
20
|
+
def pp
|
21
|
+
rnas.each(&:pp) and nil
|
22
|
+
end
|
23
|
+
|
24
|
+
def +(arrayish)
|
25
|
+
self.class.new(rnas + (arrayish.is_a?(Box) ? arrayish.rnas : arrayish))
|
26
|
+
end
|
27
|
+
|
28
|
+
def_delegators :@rnas, *%i|size length [] []= <<|
|
29
|
+
|
30
|
+
def each(&block)
|
31
|
+
rnas.each(&block)
|
32
|
+
end
|
33
|
+
|
34
|
+
def kind_of?(klass)
|
35
|
+
klass == Array ? true : super
|
36
|
+
end
|
37
|
+
|
38
|
+
def inspect
|
39
|
+
"#<Wrnap::Rna::Box with %d RNAs>" % rnas.size
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
module Wrnap
|
2
|
+
class Rna
|
3
|
+
module Constraints
|
4
|
+
def self.included(base)
|
5
|
+
base.send(:include, InstanceMethods)
|
6
|
+
end
|
7
|
+
|
8
|
+
module InstanceMethods
|
9
|
+
def constraint_mask
|
10
|
+
md[:constraint_mask]
|
11
|
+
end
|
12
|
+
|
13
|
+
def build_constraints(&block)
|
14
|
+
meta_rna do |metadata|
|
15
|
+
constraint_mask(ConstraintBox.new(metadata.__rna__).tap { |box| box.instance_eval(&block) })
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class ConstraintBox
|
21
|
+
attr_reader :rna, :constraints
|
22
|
+
|
23
|
+
def initialize(rna)
|
24
|
+
@rna, @constraints = rna, []
|
25
|
+
end
|
26
|
+
|
27
|
+
def between(i, j)
|
28
|
+
Loop.new(i, j)
|
29
|
+
end
|
30
|
+
|
31
|
+
def inside(i, j)
|
32
|
+
between(i + 1, j - 1)
|
33
|
+
end
|
34
|
+
|
35
|
+
def mask!(mask_object, *args)
|
36
|
+
case mask_object
|
37
|
+
when Helix then mask_helix!(mask_object, *args)
|
38
|
+
when Loop then mask_loop!(mask_object, symbol: args[0][:symbol])
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def mask_helix!(helix, side: :both, symbol: "()")
|
43
|
+
left_loop, right_loop = helix.to_loops
|
44
|
+
|
45
|
+
if symbol.length > 1
|
46
|
+
left_symbol, right_symbol = symbol.split(//)
|
47
|
+
else
|
48
|
+
left_symbol = right_symbol = symbol
|
49
|
+
end
|
50
|
+
|
51
|
+
mask_loop!(left_loop, symbol: left_symbol) if side == :left || side == :both
|
52
|
+
mask_loop!(right_loop, symbol: right_symbol) if side == :right || side == :both
|
53
|
+
end
|
54
|
+
|
55
|
+
def mask_loop!(l00p, symbol: "x")
|
56
|
+
mask_region!(l00p.i, l00p.j, symbol: symbol)
|
57
|
+
end
|
58
|
+
|
59
|
+
def mask_region!(i, j, symbol: "x")
|
60
|
+
raise ArgumentError.new("Trying to apply symbol '%s' from %d to %d, all symbols must be 1 char long." % [symbol, i, j]) if symbol.length > 1
|
61
|
+
|
62
|
+
constraints << ConstraintData.new(i, j, symbol)
|
63
|
+
prune!
|
64
|
+
end
|
65
|
+
|
66
|
+
def prune!
|
67
|
+
@constraints = constraints.group_by(&:name).map(&:last).map(&:first)
|
68
|
+
end
|
69
|
+
|
70
|
+
def inspect
|
71
|
+
"#<Constraints: %s>" % constraints.map(&:name).join(", ")
|
72
|
+
end
|
73
|
+
|
74
|
+
def to_s
|
75
|
+
(?. * rna.len).tap do |string|
|
76
|
+
constraints.each { |constraint| string[constraint.from..constraint.to] = constraint.symbol * constraint.length }
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
alias :mask :to_s
|
81
|
+
|
82
|
+
def method_missing(name, *args, &block)
|
83
|
+
method_name = name.to_s
|
84
|
+
|
85
|
+
if method_name =~ TreeStem::STEM_NOTATION_REGEX
|
86
|
+
rna.trunk.send(method_name)
|
87
|
+
elsif mask_type = method_name.match(/^(prohibit|force)(_(left|right)_stem)?$/)
|
88
|
+
side_symbol = mask_type[3] ? mask_type[3].to_sym : :both
|
89
|
+
|
90
|
+
case mask_type[1]
|
91
|
+
when "prohibit" then mask!(args[0], side: side_symbol, symbol: args[1] || ?x)
|
92
|
+
when "force" then mask!(args[0], side: side_symbol, symbol: args[1] || "()")
|
93
|
+
end
|
94
|
+
else super end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
class ConstraintData
|
99
|
+
attr_reader :from, :to, :symbol
|
100
|
+
|
101
|
+
def initialize(from, to, symbol)
|
102
|
+
@from, @to, @symbol = from, to, symbol
|
103
|
+
end
|
104
|
+
|
105
|
+
def name
|
106
|
+
"(%d to %d as '%s')" % [from, to, symbol]
|
107
|
+
end
|
108
|
+
|
109
|
+
def length
|
110
|
+
to - from + 1
|
111
|
+
end
|
112
|
+
|
113
|
+
def inspect
|
114
|
+
"#<Constraint: %s>" % name
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,139 @@
|
|
1
|
+
module Wrnap
|
2
|
+
class Rna
|
3
|
+
class Context < Rna
|
4
|
+
attr_reader :accession, :from, :to, :coord_options
|
5
|
+
|
6
|
+
class << self
|
7
|
+
def init_from_entrez(accession, from, to, options = {}, &block)
|
8
|
+
new(
|
9
|
+
accession: accession,
|
10
|
+
from: from,
|
11
|
+
to: to,
|
12
|
+
options: options,
|
13
|
+
&block
|
14
|
+
)
|
15
|
+
end
|
16
|
+
|
17
|
+
def init_from_string(sequence, accession, from, to, options = {}, &block)
|
18
|
+
new(
|
19
|
+
sequence: sequence,
|
20
|
+
accession: accession,
|
21
|
+
from: from,
|
22
|
+
to: to,
|
23
|
+
options: options,
|
24
|
+
&block
|
25
|
+
)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def initialize(sequence: nil, accession: nil, from: nil, to: nil, options: {}, &block)
|
30
|
+
options = { coords: {}, rna: {} }.merge(options)
|
31
|
+
|
32
|
+
@accession, @from, @to, @coord_options = accession, from, to, options[:coords]
|
33
|
+
|
34
|
+
validate_coord_options
|
35
|
+
|
36
|
+
if sequence
|
37
|
+
@raw_sequence = (sequence.is_a?(String) ? Bio::Sequence::NA.new(sequence) : sequence).upcase
|
38
|
+
end
|
39
|
+
|
40
|
+
super(
|
41
|
+
sequence: self.sequence,
|
42
|
+
structure: options[:rna][:structure] || options[:rna][:str_1] || options[:rna][:str],
|
43
|
+
second_structure: options[:rna][:second_structure] || options[:rna][:str_2],
|
44
|
+
comment: options[:rna][:comment] || options[:rna][:name] || identifier,
|
45
|
+
&block
|
46
|
+
)
|
47
|
+
|
48
|
+
remove_instance_variable(:@sequence)
|
49
|
+
end
|
50
|
+
|
51
|
+
def validate_coord_options
|
52
|
+
unless coord_options.empty?
|
53
|
+
unless Set.new(coord_options.keys) == Set.new(%i|direction length|)
|
54
|
+
raise ArgumentError.new("coord_options keys must contain only [:direction, :length], found: %s" % coord_options.keys)
|
55
|
+
end
|
56
|
+
|
57
|
+
unless (length = coord_options[:length]).is_a?(Integer) && length > 0
|
58
|
+
raise ArgumentError.new("coord_options length must be greater than 0, found: %d" % length)
|
59
|
+
end
|
60
|
+
|
61
|
+
unless [:up, :down, :both, 5, 3].include?(direction = coord_options[:direction])
|
62
|
+
raise ArgumentError.new("coord_options directions is not a valid key, found: %s" % direction)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def up_coord
|
68
|
+
[from, to].min
|
69
|
+
end
|
70
|
+
|
71
|
+
def down_coord
|
72
|
+
[from, to].max
|
73
|
+
end
|
74
|
+
|
75
|
+
def seq_from
|
76
|
+
up_coord + coord_window.min
|
77
|
+
end
|
78
|
+
|
79
|
+
def seq_to
|
80
|
+
up_coord + coord_window.max
|
81
|
+
end
|
82
|
+
|
83
|
+
def strand
|
84
|
+
plus_strand? ? :plus : :minus
|
85
|
+
end
|
86
|
+
|
87
|
+
def plus_strand?
|
88
|
+
to > from
|
89
|
+
end
|
90
|
+
|
91
|
+
def minus_strand?
|
92
|
+
!plus_strand?
|
93
|
+
end
|
94
|
+
|
95
|
+
def sequence
|
96
|
+
if @raw_sequence
|
97
|
+
@raw_sequence
|
98
|
+
else
|
99
|
+
entrez_sequence = Wrnap::Global::Entrez.rna_sequence_from_entrez(accession, up_coord, coord_window)
|
100
|
+
@raw_sequence = (minus_strand? ? entrez_sequence.complement : entrez_sequence).upcase
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
alias :seq :sequence
|
105
|
+
|
106
|
+
def extend!(coord_options = {})
|
107
|
+
self.class.init_from_entrez(accession, from, to, coords: coord_options)
|
108
|
+
end
|
109
|
+
|
110
|
+
def coord_window
|
111
|
+
# This does not support extending the range in both directions, though it should be easy to do.
|
112
|
+
# Options from coord_options ex: { length: 300, direction: 3 }, { length: 250, direction: :both }, { length: 200, direction: :down }
|
113
|
+
range = 0..(down_coord - up_coord)
|
114
|
+
|
115
|
+
if coord_options[:length] && coord_options[:direction]
|
116
|
+
if coord_options[:direction] == :both
|
117
|
+
Range.new(range.min - coord_options[:length], range.max + coord_options[:length])
|
118
|
+
else
|
119
|
+
case [coord_options[:direction], strand]
|
120
|
+
when [3, :plus], [:down, :plus], [5, :minus], [:up, :minus] then Range.new(range.min, range.max + coord_options[:length])
|
121
|
+
when [5, :plus], [:up, :plus], [3, :minus], [:down, :minus] then Range.new(range.min - coord_options[:length], range.max)
|
122
|
+
else Wrnap.debugger { "WARNING: value for :direction key in sequence retreival needs to be one of 5, 3, :both - found (%s)" % coord_options[:direction].inspect }
|
123
|
+
end
|
124
|
+
end
|
125
|
+
else
|
126
|
+
range
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def identifier
|
131
|
+
"%s %d %s %d" % [accession, from, plus_strand? ? ?+ : ?-, to]
|
132
|
+
end
|
133
|
+
|
134
|
+
def inspect
|
135
|
+
super.gsub(/((\w(::)?)+)>$/) { |_| "%s %s>" % [identifier, $1] }
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
module Wrnap
|
2
|
+
class Rna
|
3
|
+
module Extensions
|
4
|
+
def self.included(base)
|
5
|
+
base.send(:include, InstanceMethods)
|
6
|
+
base.extend(ClassMethods)
|
7
|
+
base.extend(OneStructureBasedMethods)
|
8
|
+
base.extend(TwoStructureBasedMethods)
|
9
|
+
base.class_eval do
|
10
|
+
OneStructureBasedMethods.public_instance_methods.each do |class_method|
|
11
|
+
define_method(class_method) do |*args|
|
12
|
+
self.class.send(class_method, *[structure].concat(args))
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
TwoStructureBasedMethods.public_instance_methods.each do |class_method|
|
17
|
+
define_method(class_method) do |*args|
|
18
|
+
self.class.send(class_method, *[str_1, str_2].concat(args))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
base.send(:include, InstanceMethods)
|
24
|
+
end
|
25
|
+
|
26
|
+
module ClassMethods
|
27
|
+
def generate_sequence(sequence_length)
|
28
|
+
# 0th order Markov chain w/ uniform probability distribution
|
29
|
+
Rna.init_from_string(sequence_length.times.inject("") { |string, _| string + %w[A U C G][rand(4)] })
|
30
|
+
end
|
31
|
+
|
32
|
+
def shuffle(sequence, token_length = 2)
|
33
|
+
Shuffle.new(sequence).shuffle(token_length)
|
34
|
+
end
|
35
|
+
|
36
|
+
def structure_from_bp_list(length, base_pairs)
|
37
|
+
base_pairs.to_a.map(&:to_a).map(&:sort).inject("." * length) { |structure, (i, j)| structure.tap { structure[i] = ?(; structure[j] = ?) } }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
module InstanceMethods
|
42
|
+
def dishuffle
|
43
|
+
self.class.shuffle(sequence, 2)
|
44
|
+
end
|
45
|
+
|
46
|
+
def gc_content
|
47
|
+
seq.split(//).select { |i| i =~ /[GC]/i }.size.to_f / seq.size
|
48
|
+
end
|
49
|
+
|
50
|
+
def boltzmann_probability(dangle: 2)
|
51
|
+
Math.exp(-run(:eval, d: dangle).mfe / Wrnap::RT) / Math.exp(-run(:fold, d: dangle, p: 0).ensemble_energy / Wrnap::RT)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
module OneStructureBasedMethods
|
56
|
+
def max_bp_distance(structure)
|
57
|
+
base_pairs(structure).count + ((structure.length - 3) / 2.0).floor
|
58
|
+
end
|
59
|
+
|
60
|
+
def base_pairs(structure)
|
61
|
+
get_pairings(structure).each_with_index.inject(Set.new) do |set, (j, i)|
|
62
|
+
j >= 0 ? set << Set[i, j] : set
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def get_pairings(structure)
|
67
|
+
stack = []
|
68
|
+
|
69
|
+
structure.each_char.each_with_index.inject(Array.new(structure.length, -1)) do |array, (symbol, index)|
|
70
|
+
array.tap do
|
71
|
+
case symbol
|
72
|
+
when "(" then stack.push(index)
|
73
|
+
when ")" then
|
74
|
+
if stack.empty?
|
75
|
+
raise "Too many ')' in '#{structure}'"
|
76
|
+
else
|
77
|
+
stack.pop.tap do |opening|
|
78
|
+
array[opening] = index
|
79
|
+
array[index] = opening
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end.tap do
|
85
|
+
raise "Too many '(' in '#{structure}'" unless stack.empty?
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
module TwoStructureBasedMethods
|
91
|
+
def bp_distance(structure_1, structure_2)
|
92
|
+
# Takes two structures and calculates the distance between them by |symmetric difference(bp_in_a, bp_in_b)|
|
93
|
+
raise "The two structures are not the same length" unless structure_1.length == structure_2.length
|
94
|
+
|
95
|
+
bp_set_1, bp_set_2 = base_pairs(structure_1), base_pairs(structure_2)
|
96
|
+
|
97
|
+
((bp_set_1 - bp_set_2) + (bp_set_2 - bp_set_1)).count
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Wrnap
|
2
|
+
class Rna
|
3
|
+
module Metadata
|
4
|
+
def self.included(base)
|
5
|
+
base.send(:include, InstanceMethods)
|
6
|
+
end
|
7
|
+
|
8
|
+
module InstanceMethods
|
9
|
+
def self.included(base)
|
10
|
+
base.class_eval do
|
11
|
+
def_delegator :@metadata, :__data__, :md
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def meta(&block)
|
16
|
+
metadata.tap { metadata.instance_eval(&block) if block_given? }
|
17
|
+
end
|
18
|
+
|
19
|
+
def meta_rna(&block)
|
20
|
+
metadata.__rna__.tap { meta(&block) }
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class Container
|
25
|
+
attr_reader :__rna__, :__data__
|
26
|
+
|
27
|
+
def initialize(rna)
|
28
|
+
@__rna__, @__data__ = rna, {}
|
29
|
+
end
|
30
|
+
|
31
|
+
def inspect
|
32
|
+
"#<Metadata: %s>" % __data__.inspect
|
33
|
+
end
|
34
|
+
|
35
|
+
alias :to_s :inspect
|
36
|
+
|
37
|
+
def method_missing(name, *args, &block)
|
38
|
+
case args.size
|
39
|
+
when 0 then __data__[name]
|
40
|
+
when 1 then __data__[name.to_s.gsub(/=$/, "").to_sym] = args.first
|
41
|
+
else super end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|