wrnap 0.12.2 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/wrnap.rb +34 -35
- data/lib/wrnap/etl/infernal.rb +9 -8
- data/lib/wrnap/etl/stockholm.rb +6 -6
- data/lib/wrnap/global/yaml.rb +14 -0
- data/lib/wrnap/graphing/r.rb +8 -4
- data/lib/wrnap/package/base.rb +4 -7
- data/lib/wrnap/package/fold.rb +1 -1
- data/lib/wrnap/package/fold_constrained.rb +24 -0
- data/lib/wrnap/package/mfpt.rb +1 -1
- data/lib/wrnap/package/rnabor.rb +2 -2
- data/lib/wrnap/package/varna.rb +4 -3
- data/lib/wrnap/package/xbor.rb +2 -2
- data/lib/wrnap/rna.rb +197 -0
- data/lib/wrnap/rna/box.rb +43 -0
- data/lib/wrnap/rna/constraints.rb +119 -0
- data/lib/wrnap/rna/context.rb +139 -0
- data/lib/wrnap/rna/extensions.rb +102 -0
- data/lib/wrnap/rna/metadata.rb +46 -0
- data/lib/wrnap/rna/motifs.rb +72 -0
- data/lib/wrnap/rna/tree.rb +136 -0
- data/lib/wrnap/rna/wrapper.rb +9 -0
- data/lib/wrnap/version.rb +1 -1
- metadata +14 -9
- data/lib/wrnap/global/rna.rb +0 -190
- data/lib/wrnap/global/rna/context.rb +0 -141
- data/lib/wrnap/global/rna/extensions.rb +0 -104
- data/lib/wrnap/global/rna/helix.rb +0 -36
- data/lib/wrnap/global/rna/metadata.rb +0 -48
- data/lib/wrnap/global/rna/tree.rb +0 -87
@@ -1,141 +0,0 @@
|
|
1
|
-
module Wrnap
|
2
|
-
module Global
|
3
|
-
class Rna
|
4
|
-
class Context < Rna
|
5
|
-
attr_reader :accession, :from, :to, :coord_options
|
6
|
-
|
7
|
-
class << self
|
8
|
-
def init_from_entrez(accession, from, to, options = {}, &block)
|
9
|
-
new(
|
10
|
-
accession: accession,
|
11
|
-
from: from,
|
12
|
-
to: to,
|
13
|
-
options: options,
|
14
|
-
&block
|
15
|
-
)
|
16
|
-
end
|
17
|
-
|
18
|
-
def init_from_string(sequence, accession, from, to, options = {}, &block)
|
19
|
-
new(
|
20
|
-
sequence: sequence,
|
21
|
-
accession: accession,
|
22
|
-
from: from,
|
23
|
-
to: to,
|
24
|
-
options: options,
|
25
|
-
&block
|
26
|
-
)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
def initialize(sequence: nil, accession: nil, from: nil, to: nil, options: {}, &block)
|
31
|
-
options = { coords: {}, rna: {} }.merge(options)
|
32
|
-
|
33
|
-
@accession, @from, @to, @coord_options = accession, from, to, options[:coords]
|
34
|
-
|
35
|
-
validate_coord_options
|
36
|
-
|
37
|
-
if sequence
|
38
|
-
@raw_sequence = (sequence.is_a?(String) ? Bio::Sequence::NA.new(sequence) : sequence).upcase
|
39
|
-
end
|
40
|
-
|
41
|
-
super(
|
42
|
-
sequence: self.sequence,
|
43
|
-
structure: options[:rna][:structure] || options[:rna][:str_1] || options[:rna][:str],
|
44
|
-
second_structure: options[:rna][:second_structure] || options[:rna][:str_2],
|
45
|
-
comment: options[:rna][:comment] || options[:rna][:name] || identifier,
|
46
|
-
&block
|
47
|
-
)
|
48
|
-
|
49
|
-
remove_instance_variable(:@sequence)
|
50
|
-
end
|
51
|
-
|
52
|
-
def validate_coord_options
|
53
|
-
unless coord_options.empty?
|
54
|
-
unless Set.new(coord_options.keys) == Set.new(%i|direction length|)
|
55
|
-
raise ArgumentError.new("coord_options keys must contain only [:direction, :length], found: %s" % coord_options.keys)
|
56
|
-
end
|
57
|
-
|
58
|
-
unless (length = coord_options[:length]).is_a?(Integer) && length > 0
|
59
|
-
raise ArgumentError.new("coord_options length must be greater than 0, found: %d" % length)
|
60
|
-
end
|
61
|
-
|
62
|
-
unless [:up, :down, :both, 5, 3].include?(direction = coord_options[:direction])
|
63
|
-
raise ArgumentError.new("coord_options directions is not a valid key, found: %s" % direction)
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
def up_coord
|
69
|
-
[from, to].min
|
70
|
-
end
|
71
|
-
|
72
|
-
def down_coord
|
73
|
-
[from, to].max
|
74
|
-
end
|
75
|
-
|
76
|
-
def seq_from
|
77
|
-
up_coord + coord_window.min
|
78
|
-
end
|
79
|
-
|
80
|
-
def seq_to
|
81
|
-
up_coord + coord_window.max
|
82
|
-
end
|
83
|
-
|
84
|
-
def strand
|
85
|
-
plus_strand? ? :plus : :minus
|
86
|
-
end
|
87
|
-
|
88
|
-
def plus_strand?
|
89
|
-
to > from
|
90
|
-
end
|
91
|
-
|
92
|
-
def minus_strand?
|
93
|
-
!plus_strand?
|
94
|
-
end
|
95
|
-
|
96
|
-
def sequence
|
97
|
-
if @raw_sequence
|
98
|
-
@raw_sequence
|
99
|
-
else
|
100
|
-
entrez_sequence = Entrez.rna_sequence_from_entrez(accession, up_coord, coord_window)
|
101
|
-
@raw_sequence = (minus_strand? ? entrez_sequence.complement : entrez_sequence).upcase
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
alias :seq :sequence
|
106
|
-
|
107
|
-
def extend!(coord_options = {})
|
108
|
-
self.class.init_from_entrez(accession, from, to, coords: coord_options)
|
109
|
-
end
|
110
|
-
|
111
|
-
def coord_window
|
112
|
-
# This does not support extending the range in both directions, though it should be easy to do.
|
113
|
-
# Options from coord_options ex: { length: 300, direction: 3 }, { length: 250, direction: :both }, { length: 200, direction: :down }
|
114
|
-
range = 0..(down_coord - up_coord)
|
115
|
-
|
116
|
-
if coord_options[:length] && coord_options[:direction]
|
117
|
-
if coord_options[:direction] == :both
|
118
|
-
Range.new(range.min - coord_options[:length], range.max + coord_options[:length])
|
119
|
-
else
|
120
|
-
case [coord_options[:direction], strand]
|
121
|
-
when [3, :plus], [:down, :plus], [5, :minus], [:up, :minus] then Range.new(range.min, range.max + coord_options[:length])
|
122
|
-
when [5, :plus], [:up, :plus], [3, :minus], [:down, :minus] then Range.new(range.min - coord_options[:length], range.max)
|
123
|
-
else Wrnap.debugger { "WARNING: value for :direction key in sequence retreival needs to be one of 5, 3, :both - found (%s)" % coord_options[:direction].inspect }
|
124
|
-
end
|
125
|
-
end
|
126
|
-
else
|
127
|
-
range
|
128
|
-
end
|
129
|
-
end
|
130
|
-
|
131
|
-
def identifier
|
132
|
-
"%s %d %s %d" % [accession, from, plus_strand? ? ?+ : ?-, to]
|
133
|
-
end
|
134
|
-
|
135
|
-
def inspect
|
136
|
-
super.gsub(/((\w(::)?)+)>$/) { |_| "%s %s>" % [identifier, $1] }
|
137
|
-
end
|
138
|
-
end
|
139
|
-
end
|
140
|
-
end
|
141
|
-
end
|
@@ -1,104 +0,0 @@
|
|
1
|
-
module Wrnap
|
2
|
-
module Global
|
3
|
-
class Rna
|
4
|
-
module Extensions
|
5
|
-
def self.included(base)
|
6
|
-
base.send(:include, InstanceMethods)
|
7
|
-
base.extend(ClassMethods)
|
8
|
-
base.extend(OneStructureBasedMethods)
|
9
|
-
base.extend(TwoStructureBasedMethods)
|
10
|
-
base.class_eval do
|
11
|
-
OneStructureBasedMethods.public_instance_methods.each do |class_method|
|
12
|
-
define_method(class_method) do |*args|
|
13
|
-
self.class.send(class_method, *[structure].concat(args))
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
TwoStructureBasedMethods.public_instance_methods.each do |class_method|
|
18
|
-
define_method(class_method) do |*args|
|
19
|
-
self.class.send(class_method, *[str_1, str_2].concat(args))
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
base.send(:include, InstanceMethods)
|
25
|
-
end
|
26
|
-
|
27
|
-
module ClassMethods
|
28
|
-
def generate_sequence(sequence_length)
|
29
|
-
# 0th order Markov chain w/ uniform probability distribution
|
30
|
-
Rna.init_from_string(sequence_length.times.inject("") { |string, _| string + %w[A U C G][rand(4)] })
|
31
|
-
end
|
32
|
-
|
33
|
-
def shuffle(sequence, token_length = 2)
|
34
|
-
Shuffle.new(sequence).shuffle(token_length)
|
35
|
-
end
|
36
|
-
|
37
|
-
def structure_from_bp_list(length, base_pairs)
|
38
|
-
base_pairs.to_a.map(&:to_a).map(&:sort).inject("." * length) { |structure, (i, j)| structure.tap { structure[i] = ?(; structure[j] = ?) } }
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
module InstanceMethods
|
43
|
-
def dishuffle
|
44
|
-
self.class.shuffle(sequence, 2)
|
45
|
-
end
|
46
|
-
|
47
|
-
def gc_content
|
48
|
-
seq.split(//).select { |i| i =~ /[GC]/i }.size.to_f / seq.size
|
49
|
-
end
|
50
|
-
|
51
|
-
def boltzmann_probability(dangle: 2)
|
52
|
-
Math.exp(-run(:eval, d: dangle).mfe / Wrnap::RT) / Math.exp(-run(:fold, d: dangle, p: 0).ensemble_energy / Wrnap::RT)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
module OneStructureBasedMethods
|
57
|
-
def max_bp_distance(structure)
|
58
|
-
base_pairs(structure).count + ((structure.length - 3) / 2.0).floor
|
59
|
-
end
|
60
|
-
|
61
|
-
def base_pairs(structure)
|
62
|
-
get_pairings(structure).each_with_index.inject(Set.new) do |set, (j, i)|
|
63
|
-
j >= 0 ? set << Set[i, j] : set
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
def get_pairings(structure)
|
68
|
-
stack = []
|
69
|
-
|
70
|
-
structure.each_char.each_with_index.inject(Array.new(structure.length, -1)) do |array, (symbol, index)|
|
71
|
-
array.tap do
|
72
|
-
case symbol
|
73
|
-
when "(" then stack.push(index)
|
74
|
-
when ")" then
|
75
|
-
if stack.empty?
|
76
|
-
raise "Too many ')' in '#{structure}'"
|
77
|
-
else
|
78
|
-
stack.pop.tap do |opening|
|
79
|
-
array[opening] = index
|
80
|
-
array[index] = opening
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end.tap do
|
86
|
-
raise "Too many '(' in '#{structure}'" unless stack.empty?
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
module TwoStructureBasedMethods
|
92
|
-
def bp_distance(structure_1, structure_2)
|
93
|
-
# Takes two structures and calculates the distance between them by |symmetric difference(bp_in_a, bp_in_b)|
|
94
|
-
raise "The two structures are not the same length" unless structure_1.length == structure_2.length
|
95
|
-
|
96
|
-
bp_set_1, bp_set_2 = base_pairs(structure_1), base_pairs(structure_2)
|
97
|
-
|
98
|
-
((bp_set_1 - bp_set_2) + (bp_set_2 - bp_set_1)).count
|
99
|
-
end
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
@@ -1,36 +0,0 @@
|
|
1
|
-
module Wrnap
|
2
|
-
module Global
|
3
|
-
class Rna
|
4
|
-
module HelixFunctions
|
5
|
-
def helices
|
6
|
-
array = base_pairs.sort_by(&:first).map(&:to_a)
|
7
|
-
|
8
|
-
unless array.empty?
|
9
|
-
array[1..-1].inject([[array.first]]) do |bins, (i, j)|
|
10
|
-
bins.tap { bins[-1][-1] == [i - 1, j + 1] ? bins[-1] << [i, j] : bins << [[i, j]] }
|
11
|
-
end
|
12
|
-
else
|
13
|
-
[]
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
def collapsed_helices
|
18
|
-
helices.map { |((i, j), *rest)| Helix.new(i, j, rest.length + 1) }
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
class Helix
|
23
|
-
attr_reader :i, :j
|
24
|
-
attr_accessor :length
|
25
|
-
|
26
|
-
def initialize(i, j, length)
|
27
|
-
@i, @j, @length = i, j, length
|
28
|
-
end
|
29
|
-
|
30
|
-
def name
|
31
|
-
"(%d, %d)" % [i, j]
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
@@ -1,48 +0,0 @@
|
|
1
|
-
module Wrnap
|
2
|
-
module Global
|
3
|
-
class Rna
|
4
|
-
module Metadata
|
5
|
-
def self.included(base)
|
6
|
-
base.send(:include, InstanceMethods)
|
7
|
-
end
|
8
|
-
|
9
|
-
module InstanceMethods
|
10
|
-
def self.included(base)
|
11
|
-
base.class_eval do
|
12
|
-
def_delegator :@metadata, :__data__, :md
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def meta(&block)
|
17
|
-
metadata.tap { metadata.instance_eval(&block) if block_given? }
|
18
|
-
end
|
19
|
-
|
20
|
-
def meta_rna(&block)
|
21
|
-
metadata.__rna__.tap { meta(&block) }
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
class Container
|
26
|
-
attr_reader :__rna__, :__data__
|
27
|
-
|
28
|
-
def initialize(rna)
|
29
|
-
@__rna__, @__data__ = rna, {}
|
30
|
-
end
|
31
|
-
|
32
|
-
def inspect
|
33
|
-
"#<Metadata: %s>" % __data__.inspect
|
34
|
-
end
|
35
|
-
|
36
|
-
alias :to_s :inspect
|
37
|
-
|
38
|
-
def method_missing(name, *args, &block)
|
39
|
-
case args.size
|
40
|
-
when 0 then __data__[name]
|
41
|
-
when 1 then __data__[name.to_s.gsub(/=$/, "").to_sym] = args.first
|
42
|
-
else super end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
@@ -1,87 +0,0 @@
|
|
1
|
-
module Wrnap
|
2
|
-
module Global
|
3
|
-
class Rna
|
4
|
-
module TreeFunctions
|
5
|
-
def with_tree
|
6
|
-
meta_rna { |metadata| tree(TreePlanter.new(metadata.__rna__)) }
|
7
|
-
end
|
8
|
-
|
9
|
-
def trunk
|
10
|
-
md[:tree] || with_tree.trunk
|
11
|
-
end
|
12
|
-
end
|
13
|
-
|
14
|
-
class TreePlanter
|
15
|
-
attr_reader :rna, :root
|
16
|
-
|
17
|
-
def initialize(rna, tree = false)
|
18
|
-
@rna = rna
|
19
|
-
@root = tree || build_tree
|
20
|
-
end
|
21
|
-
|
22
|
-
def build_tree
|
23
|
-
rna.collapsed_helices.inject(Tree::TreeNode.new(:root, rna)) do |tree, helix|
|
24
|
-
node = Tree::TreeNode.new(helix.name, helix)
|
25
|
-
|
26
|
-
if tree.is_root?
|
27
|
-
tree << node
|
28
|
-
elsif helix.i > tree.content.j
|
29
|
-
# It's a sibling, pop up until we're at its parent node.
|
30
|
-
tree = tree.parent until tree.is_root? || tree.content.j > helix.i
|
31
|
-
node.tap { tree << node }
|
32
|
-
elsif helix.j < tree.content.j
|
33
|
-
# Going deeper.
|
34
|
-
tree << node
|
35
|
-
end
|
36
|
-
end.root
|
37
|
-
end
|
38
|
-
|
39
|
-
def coalesce
|
40
|
-
self.class.new(rna, root.dup).tap { |tree| tree.merge_interior_loops! }
|
41
|
-
end
|
42
|
-
|
43
|
-
def coalesce!
|
44
|
-
tap { merge_interior_loops! }
|
45
|
-
end
|
46
|
-
|
47
|
-
def merge_interior_loops!
|
48
|
-
root.tap do
|
49
|
-
self.class.postorder_traversal(root) do |node|
|
50
|
-
if node.children.count == 1 && !node.is_root?
|
51
|
-
child = node.children.first
|
52
|
-
node.parent.add(child)
|
53
|
-
node.remove_from_parent!
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
def depth_signature
|
60
|
-
root.map(&:node_depth)
|
61
|
-
end
|
62
|
-
|
63
|
-
def pp
|
64
|
-
root.print_tree and nil
|
65
|
-
end
|
66
|
-
|
67
|
-
def inspect
|
68
|
-
"#<TreePlanter: %s>" % depth_signature.inspect
|
69
|
-
end
|
70
|
-
|
71
|
-
alias :to_s :inspect
|
72
|
-
|
73
|
-
class << self
|
74
|
-
def preorder_traversal(node, &block)
|
75
|
-
node.children.map { |child| preorder_traversal(child, &block) }
|
76
|
-
yield node
|
77
|
-
end
|
78
|
-
|
79
|
-
def postorder_traversal(node, &block)
|
80
|
-
node.children.map { |child| postorder_traversal(child, &block) }
|
81
|
-
yield node
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|