wrnap 0.12.2 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/wrnap.rb +34 -35
- data/lib/wrnap/etl/infernal.rb +9 -8
- data/lib/wrnap/etl/stockholm.rb +6 -6
- data/lib/wrnap/global/yaml.rb +14 -0
- data/lib/wrnap/graphing/r.rb +8 -4
- data/lib/wrnap/package/base.rb +4 -7
- data/lib/wrnap/package/fold.rb +1 -1
- data/lib/wrnap/package/fold_constrained.rb +24 -0
- data/lib/wrnap/package/mfpt.rb +1 -1
- data/lib/wrnap/package/rnabor.rb +2 -2
- data/lib/wrnap/package/varna.rb +4 -3
- data/lib/wrnap/package/xbor.rb +2 -2
- data/lib/wrnap/rna.rb +197 -0
- data/lib/wrnap/rna/box.rb +43 -0
- data/lib/wrnap/rna/constraints.rb +119 -0
- data/lib/wrnap/rna/context.rb +139 -0
- data/lib/wrnap/rna/extensions.rb +102 -0
- data/lib/wrnap/rna/metadata.rb +46 -0
- data/lib/wrnap/rna/motifs.rb +72 -0
- data/lib/wrnap/rna/tree.rb +136 -0
- data/lib/wrnap/rna/wrapper.rb +9 -0
- data/lib/wrnap/version.rb +1 -1
- metadata +14 -9
- data/lib/wrnap/global/rna.rb +0 -190
- data/lib/wrnap/global/rna/context.rb +0 -141
- data/lib/wrnap/global/rna/extensions.rb +0 -104
- data/lib/wrnap/global/rna/helix.rb +0 -36
- data/lib/wrnap/global/rna/metadata.rb +0 -48
- data/lib/wrnap/global/rna/tree.rb +0 -87
@@ -0,0 +1,43 @@
|
|
1
|
+
module Wrnap
|
2
|
+
class Rna
|
3
|
+
class Box
|
4
|
+
extend Forwardable
|
5
|
+
include Enumerable
|
6
|
+
include Wrnap::Global::Yaml
|
7
|
+
|
8
|
+
attr_reader :rnas
|
9
|
+
|
10
|
+
class << self
|
11
|
+
def load_all(pattern = "*.fa", &block)
|
12
|
+
new(Dir[File.directory?(pattern) ? pattern + "/*.fa" : pattern].map { |file| RNA.from_fasta(file, &block) })
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize(rnas)
|
17
|
+
@rnas = rnas.kind_of?(Array) ? rnas : [rnas]
|
18
|
+
end
|
19
|
+
|
20
|
+
def pp
|
21
|
+
rnas.each(&:pp) and nil
|
22
|
+
end
|
23
|
+
|
24
|
+
def +(arrayish)
|
25
|
+
self.class.new(rnas + (arrayish.is_a?(Box) ? arrayish.rnas : arrayish))
|
26
|
+
end
|
27
|
+
|
28
|
+
def_delegators :@rnas, *%i|size length [] []= <<|
|
29
|
+
|
30
|
+
def each(&block)
|
31
|
+
rnas.each(&block)
|
32
|
+
end
|
33
|
+
|
34
|
+
def kind_of?(klass)
|
35
|
+
klass == Array ? true : super
|
36
|
+
end
|
37
|
+
|
38
|
+
def inspect
|
39
|
+
"#<Wrnap::Rna::Box with %d RNAs>" % rnas.size
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
module Wrnap
|
2
|
+
class Rna
|
3
|
+
module Constraints
|
4
|
+
def self.included(base)
|
5
|
+
base.send(:include, InstanceMethods)
|
6
|
+
end
|
7
|
+
|
8
|
+
module InstanceMethods
|
9
|
+
def constraint_mask
|
10
|
+
md[:constraint_mask]
|
11
|
+
end
|
12
|
+
|
13
|
+
def build_constraints(&block)
|
14
|
+
meta_rna do |metadata|
|
15
|
+
constraint_mask(ConstraintBox.new(metadata.__rna__).tap { |box| box.instance_eval(&block) })
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class ConstraintBox
|
21
|
+
attr_reader :rna, :constraints
|
22
|
+
|
23
|
+
def initialize(rna)
|
24
|
+
@rna, @constraints = rna, []
|
25
|
+
end
|
26
|
+
|
27
|
+
def between(i, j)
|
28
|
+
Loop.new(i, j)
|
29
|
+
end
|
30
|
+
|
31
|
+
def inside(i, j)
|
32
|
+
between(i + 1, j - 1)
|
33
|
+
end
|
34
|
+
|
35
|
+
def mask!(mask_object, *args)
|
36
|
+
case mask_object
|
37
|
+
when Helix then mask_helix!(mask_object, *args)
|
38
|
+
when Loop then mask_loop!(mask_object, symbol: args[0][:symbol])
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def mask_helix!(helix, side: :both, symbol: "()")
|
43
|
+
left_loop, right_loop = helix.to_loops
|
44
|
+
|
45
|
+
if symbol.length > 1
|
46
|
+
left_symbol, right_symbol = symbol.split(//)
|
47
|
+
else
|
48
|
+
left_symbol = right_symbol = symbol
|
49
|
+
end
|
50
|
+
|
51
|
+
mask_loop!(left_loop, symbol: left_symbol) if side == :left || side == :both
|
52
|
+
mask_loop!(right_loop, symbol: right_symbol) if side == :right || side == :both
|
53
|
+
end
|
54
|
+
|
55
|
+
def mask_loop!(l00p, symbol: "x")
|
56
|
+
mask_region!(l00p.i, l00p.j, symbol: symbol)
|
57
|
+
end
|
58
|
+
|
59
|
+
def mask_region!(i, j, symbol: "x")
|
60
|
+
raise ArgumentError.new("Trying to apply symbol '%s' from %d to %d, all symbols must be 1 char long." % [symbol, i, j]) if symbol.length > 1
|
61
|
+
|
62
|
+
constraints << ConstraintData.new(i, j, symbol)
|
63
|
+
prune!
|
64
|
+
end
|
65
|
+
|
66
|
+
def prune!
|
67
|
+
@constraints = constraints.group_by(&:name).map(&:last).map(&:first)
|
68
|
+
end
|
69
|
+
|
70
|
+
def inspect
|
71
|
+
"#<Constraints: %s>" % constraints.map(&:name).join(", ")
|
72
|
+
end
|
73
|
+
|
74
|
+
def to_s
|
75
|
+
(?. * rna.len).tap do |string|
|
76
|
+
constraints.each { |constraint| string[constraint.from..constraint.to] = constraint.symbol * constraint.length }
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
alias :mask :to_s
|
81
|
+
|
82
|
+
def method_missing(name, *args, &block)
|
83
|
+
method_name = name.to_s
|
84
|
+
|
85
|
+
if method_name =~ TreeStem::STEM_NOTATION_REGEX
|
86
|
+
rna.trunk.send(method_name)
|
87
|
+
elsif mask_type = method_name.match(/^(prohibit|force)(_(left|right)_stem)?$/)
|
88
|
+
side_symbol = mask_type[3] ? mask_type[3].to_sym : :both
|
89
|
+
|
90
|
+
case mask_type[1]
|
91
|
+
when "prohibit" then mask!(args[0], side: side_symbol, symbol: args[1] || ?x)
|
92
|
+
when "force" then mask!(args[0], side: side_symbol, symbol: args[1] || "()")
|
93
|
+
end
|
94
|
+
else super end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
class ConstraintData
|
99
|
+
attr_reader :from, :to, :symbol
|
100
|
+
|
101
|
+
def initialize(from, to, symbol)
|
102
|
+
@from, @to, @symbol = from, to, symbol
|
103
|
+
end
|
104
|
+
|
105
|
+
def name
|
106
|
+
"(%d to %d as '%s')" % [from, to, symbol]
|
107
|
+
end
|
108
|
+
|
109
|
+
def length
|
110
|
+
to - from + 1
|
111
|
+
end
|
112
|
+
|
113
|
+
def inspect
|
114
|
+
"#<Constraint: %s>" % name
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,139 @@
|
|
1
|
+
module Wrnap
|
2
|
+
class Rna
|
3
|
+
class Context < Rna
|
4
|
+
attr_reader :accession, :from, :to, :coord_options
|
5
|
+
|
6
|
+
class << self
|
7
|
+
def init_from_entrez(accession, from, to, options = {}, &block)
|
8
|
+
new(
|
9
|
+
accession: accession,
|
10
|
+
from: from,
|
11
|
+
to: to,
|
12
|
+
options: options,
|
13
|
+
&block
|
14
|
+
)
|
15
|
+
end
|
16
|
+
|
17
|
+
def init_from_string(sequence, accession, from, to, options = {}, &block)
|
18
|
+
new(
|
19
|
+
sequence: sequence,
|
20
|
+
accession: accession,
|
21
|
+
from: from,
|
22
|
+
to: to,
|
23
|
+
options: options,
|
24
|
+
&block
|
25
|
+
)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def initialize(sequence: nil, accession: nil, from: nil, to: nil, options: {}, &block)
|
30
|
+
options = { coords: {}, rna: {} }.merge(options)
|
31
|
+
|
32
|
+
@accession, @from, @to, @coord_options = accession, from, to, options[:coords]
|
33
|
+
|
34
|
+
validate_coord_options
|
35
|
+
|
36
|
+
if sequence
|
37
|
+
@raw_sequence = (sequence.is_a?(String) ? Bio::Sequence::NA.new(sequence) : sequence).upcase
|
38
|
+
end
|
39
|
+
|
40
|
+
super(
|
41
|
+
sequence: self.sequence,
|
42
|
+
structure: options[:rna][:structure] || options[:rna][:str_1] || options[:rna][:str],
|
43
|
+
second_structure: options[:rna][:second_structure] || options[:rna][:str_2],
|
44
|
+
comment: options[:rna][:comment] || options[:rna][:name] || identifier,
|
45
|
+
&block
|
46
|
+
)
|
47
|
+
|
48
|
+
remove_instance_variable(:@sequence)
|
49
|
+
end
|
50
|
+
|
51
|
+
def validate_coord_options
|
52
|
+
unless coord_options.empty?
|
53
|
+
unless Set.new(coord_options.keys) == Set.new(%i|direction length|)
|
54
|
+
raise ArgumentError.new("coord_options keys must contain only [:direction, :length], found: %s" % coord_options.keys)
|
55
|
+
end
|
56
|
+
|
57
|
+
unless (length = coord_options[:length]).is_a?(Integer) && length > 0
|
58
|
+
raise ArgumentError.new("coord_options length must be greater than 0, found: %d" % length)
|
59
|
+
end
|
60
|
+
|
61
|
+
unless [:up, :down, :both, 5, 3].include?(direction = coord_options[:direction])
|
62
|
+
raise ArgumentError.new("coord_options directions is not a valid key, found: %s" % direction)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def up_coord
|
68
|
+
[from, to].min
|
69
|
+
end
|
70
|
+
|
71
|
+
def down_coord
|
72
|
+
[from, to].max
|
73
|
+
end
|
74
|
+
|
75
|
+
def seq_from
|
76
|
+
up_coord + coord_window.min
|
77
|
+
end
|
78
|
+
|
79
|
+
def seq_to
|
80
|
+
up_coord + coord_window.max
|
81
|
+
end
|
82
|
+
|
83
|
+
def strand
|
84
|
+
plus_strand? ? :plus : :minus
|
85
|
+
end
|
86
|
+
|
87
|
+
def plus_strand?
|
88
|
+
to > from
|
89
|
+
end
|
90
|
+
|
91
|
+
def minus_strand?
|
92
|
+
!plus_strand?
|
93
|
+
end
|
94
|
+
|
95
|
+
def sequence
|
96
|
+
if @raw_sequence
|
97
|
+
@raw_sequence
|
98
|
+
else
|
99
|
+
entrez_sequence = Wrnap::Global::Entrez.rna_sequence_from_entrez(accession, up_coord, coord_window)
|
100
|
+
@raw_sequence = (minus_strand? ? entrez_sequence.complement : entrez_sequence).upcase
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
alias :seq :sequence
|
105
|
+
|
106
|
+
def extend!(coord_options = {})
|
107
|
+
self.class.init_from_entrez(accession, from, to, coords: coord_options)
|
108
|
+
end
|
109
|
+
|
110
|
+
def coord_window
|
111
|
+
# This does not support extending the range in both directions, though it should be easy to do.
|
112
|
+
# Options from coord_options ex: { length: 300, direction: 3 }, { length: 250, direction: :both }, { length: 200, direction: :down }
|
113
|
+
range = 0..(down_coord - up_coord)
|
114
|
+
|
115
|
+
if coord_options[:length] && coord_options[:direction]
|
116
|
+
if coord_options[:direction] == :both
|
117
|
+
Range.new(range.min - coord_options[:length], range.max + coord_options[:length])
|
118
|
+
else
|
119
|
+
case [coord_options[:direction], strand]
|
120
|
+
when [3, :plus], [:down, :plus], [5, :minus], [:up, :minus] then Range.new(range.min, range.max + coord_options[:length])
|
121
|
+
when [5, :plus], [:up, :plus], [3, :minus], [:down, :minus] then Range.new(range.min - coord_options[:length], range.max)
|
122
|
+
else Wrnap.debugger { "WARNING: value for :direction key in sequence retreival needs to be one of 5, 3, :both - found (%s)" % coord_options[:direction].inspect }
|
123
|
+
end
|
124
|
+
end
|
125
|
+
else
|
126
|
+
range
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def identifier
|
131
|
+
"%s %d %s %d" % [accession, from, plus_strand? ? ?+ : ?-, to]
|
132
|
+
end
|
133
|
+
|
134
|
+
def inspect
|
135
|
+
super.gsub(/((\w(::)?)+)>$/) { |_| "%s %s>" % [identifier, $1] }
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
module Wrnap
|
2
|
+
class Rna
|
3
|
+
module Extensions
|
4
|
+
def self.included(base)
|
5
|
+
base.send(:include, InstanceMethods)
|
6
|
+
base.extend(ClassMethods)
|
7
|
+
base.extend(OneStructureBasedMethods)
|
8
|
+
base.extend(TwoStructureBasedMethods)
|
9
|
+
base.class_eval do
|
10
|
+
OneStructureBasedMethods.public_instance_methods.each do |class_method|
|
11
|
+
define_method(class_method) do |*args|
|
12
|
+
self.class.send(class_method, *[structure].concat(args))
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
TwoStructureBasedMethods.public_instance_methods.each do |class_method|
|
17
|
+
define_method(class_method) do |*args|
|
18
|
+
self.class.send(class_method, *[str_1, str_2].concat(args))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
base.send(:include, InstanceMethods)
|
24
|
+
end
|
25
|
+
|
26
|
+
module ClassMethods
|
27
|
+
def generate_sequence(sequence_length)
|
28
|
+
# 0th order Markov chain w/ uniform probability distribution
|
29
|
+
Rna.init_from_string(sequence_length.times.inject("") { |string, _| string + %w[A U C G][rand(4)] })
|
30
|
+
end
|
31
|
+
|
32
|
+
def shuffle(sequence, token_length = 2)
|
33
|
+
Shuffle.new(sequence).shuffle(token_length)
|
34
|
+
end
|
35
|
+
|
36
|
+
def structure_from_bp_list(length, base_pairs)
|
37
|
+
base_pairs.to_a.map(&:to_a).map(&:sort).inject("." * length) { |structure, (i, j)| structure.tap { structure[i] = ?(; structure[j] = ?) } }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
module InstanceMethods
|
42
|
+
def dishuffle
|
43
|
+
self.class.shuffle(sequence, 2)
|
44
|
+
end
|
45
|
+
|
46
|
+
def gc_content
|
47
|
+
seq.split(//).select { |i| i =~ /[GC]/i }.size.to_f / seq.size
|
48
|
+
end
|
49
|
+
|
50
|
+
def boltzmann_probability(dangle: 2)
|
51
|
+
Math.exp(-run(:eval, d: dangle).mfe / Wrnap::RT) / Math.exp(-run(:fold, d: dangle, p: 0).ensemble_energy / Wrnap::RT)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
module OneStructureBasedMethods
|
56
|
+
def max_bp_distance(structure)
|
57
|
+
base_pairs(structure).count + ((structure.length - 3) / 2.0).floor
|
58
|
+
end
|
59
|
+
|
60
|
+
def base_pairs(structure)
|
61
|
+
get_pairings(structure).each_with_index.inject(Set.new) do |set, (j, i)|
|
62
|
+
j >= 0 ? set << Set[i, j] : set
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def get_pairings(structure)
|
67
|
+
stack = []
|
68
|
+
|
69
|
+
structure.each_char.each_with_index.inject(Array.new(structure.length, -1)) do |array, (symbol, index)|
|
70
|
+
array.tap do
|
71
|
+
case symbol
|
72
|
+
when "(" then stack.push(index)
|
73
|
+
when ")" then
|
74
|
+
if stack.empty?
|
75
|
+
raise "Too many ')' in '#{structure}'"
|
76
|
+
else
|
77
|
+
stack.pop.tap do |opening|
|
78
|
+
array[opening] = index
|
79
|
+
array[index] = opening
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end.tap do
|
85
|
+
raise "Too many '(' in '#{structure}'" unless stack.empty?
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
module TwoStructureBasedMethods
|
91
|
+
def bp_distance(structure_1, structure_2)
|
92
|
+
# Takes two structures and calculates the distance between them by |symmetric difference(bp_in_a, bp_in_b)|
|
93
|
+
raise "The two structures are not the same length" unless structure_1.length == structure_2.length
|
94
|
+
|
95
|
+
bp_set_1, bp_set_2 = base_pairs(structure_1), base_pairs(structure_2)
|
96
|
+
|
97
|
+
((bp_set_1 - bp_set_2) + (bp_set_2 - bp_set_1)).count
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Wrnap
|
2
|
+
class Rna
|
3
|
+
module Metadata
|
4
|
+
def self.included(base)
|
5
|
+
base.send(:include, InstanceMethods)
|
6
|
+
end
|
7
|
+
|
8
|
+
module InstanceMethods
|
9
|
+
def self.included(base)
|
10
|
+
base.class_eval do
|
11
|
+
def_delegator :@metadata, :__data__, :md
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def meta(&block)
|
16
|
+
metadata.tap { metadata.instance_eval(&block) if block_given? }
|
17
|
+
end
|
18
|
+
|
19
|
+
def meta_rna(&block)
|
20
|
+
metadata.__rna__.tap { meta(&block) }
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class Container
|
25
|
+
attr_reader :__rna__, :__data__
|
26
|
+
|
27
|
+
def initialize(rna)
|
28
|
+
@__rna__, @__data__ = rna, {}
|
29
|
+
end
|
30
|
+
|
31
|
+
def inspect
|
32
|
+
"#<Metadata: %s>" % __data__.inspect
|
33
|
+
end
|
34
|
+
|
35
|
+
alias :to_s :inspect
|
36
|
+
|
37
|
+
def method_missing(name, *args, &block)
|
38
|
+
case args.size
|
39
|
+
when 0 then __data__[name]
|
40
|
+
when 1 then __data__[name.to_s.gsub(/=$/, "").to_sym] = args.first
|
41
|
+
else super end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|