wrnap 0.12.2 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/wrnap.rb +34 -35
- data/lib/wrnap/etl/infernal.rb +9 -8
- data/lib/wrnap/etl/stockholm.rb +6 -6
- data/lib/wrnap/global/yaml.rb +14 -0
- data/lib/wrnap/graphing/r.rb +8 -4
- data/lib/wrnap/package/base.rb +4 -7
- data/lib/wrnap/package/fold.rb +1 -1
- data/lib/wrnap/package/fold_constrained.rb +24 -0
- data/lib/wrnap/package/mfpt.rb +1 -1
- data/lib/wrnap/package/rnabor.rb +2 -2
- data/lib/wrnap/package/varna.rb +4 -3
- data/lib/wrnap/package/xbor.rb +2 -2
- data/lib/wrnap/rna.rb +197 -0
- data/lib/wrnap/rna/box.rb +43 -0
- data/lib/wrnap/rna/constraints.rb +119 -0
- data/lib/wrnap/rna/context.rb +139 -0
- data/lib/wrnap/rna/extensions.rb +102 -0
- data/lib/wrnap/rna/metadata.rb +46 -0
- data/lib/wrnap/rna/motifs.rb +72 -0
- data/lib/wrnap/rna/tree.rb +136 -0
- data/lib/wrnap/rna/wrapper.rb +9 -0
- data/lib/wrnap/version.rb +1 -1
- metadata +14 -9
- data/lib/wrnap/global/rna.rb +0 -190
- data/lib/wrnap/global/rna/context.rb +0 -141
- data/lib/wrnap/global/rna/extensions.rb +0 -104
- data/lib/wrnap/global/rna/helix.rb +0 -36
- data/lib/wrnap/global/rna/metadata.rb +0 -48
- data/lib/wrnap/global/rna/tree.rb +0 -87
@@ -1,141 +0,0 @@
|
|
1
|
-
module Wrnap
|
2
|
-
module Global
|
3
|
-
class Rna
|
4
|
-
class Context < Rna
|
5
|
-
attr_reader :accession, :from, :to, :coord_options
|
6
|
-
|
7
|
-
class << self
|
8
|
-
def init_from_entrez(accession, from, to, options = {}, &block)
|
9
|
-
new(
|
10
|
-
accession: accession,
|
11
|
-
from: from,
|
12
|
-
to: to,
|
13
|
-
options: options,
|
14
|
-
&block
|
15
|
-
)
|
16
|
-
end
|
17
|
-
|
18
|
-
def init_from_string(sequence, accession, from, to, options = {}, &block)
|
19
|
-
new(
|
20
|
-
sequence: sequence,
|
21
|
-
accession: accession,
|
22
|
-
from: from,
|
23
|
-
to: to,
|
24
|
-
options: options,
|
25
|
-
&block
|
26
|
-
)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
def initialize(sequence: nil, accession: nil, from: nil, to: nil, options: {}, &block)
|
31
|
-
options = { coords: {}, rna: {} }.merge(options)
|
32
|
-
|
33
|
-
@accession, @from, @to, @coord_options = accession, from, to, options[:coords]
|
34
|
-
|
35
|
-
validate_coord_options
|
36
|
-
|
37
|
-
if sequence
|
38
|
-
@raw_sequence = (sequence.is_a?(String) ? Bio::Sequence::NA.new(sequence) : sequence).upcase
|
39
|
-
end
|
40
|
-
|
41
|
-
super(
|
42
|
-
sequence: self.sequence,
|
43
|
-
structure: options[:rna][:structure] || options[:rna][:str_1] || options[:rna][:str],
|
44
|
-
second_structure: options[:rna][:second_structure] || options[:rna][:str_2],
|
45
|
-
comment: options[:rna][:comment] || options[:rna][:name] || identifier,
|
46
|
-
&block
|
47
|
-
)
|
48
|
-
|
49
|
-
remove_instance_variable(:@sequence)
|
50
|
-
end
|
51
|
-
|
52
|
-
def validate_coord_options
|
53
|
-
unless coord_options.empty?
|
54
|
-
unless Set.new(coord_options.keys) == Set.new(%i|direction length|)
|
55
|
-
raise ArgumentError.new("coord_options keys must contain only [:direction, :length], found: %s" % coord_options.keys)
|
56
|
-
end
|
57
|
-
|
58
|
-
unless (length = coord_options[:length]).is_a?(Integer) && length > 0
|
59
|
-
raise ArgumentError.new("coord_options length must be greater than 0, found: %d" % length)
|
60
|
-
end
|
61
|
-
|
62
|
-
unless [:up, :down, :both, 5, 3].include?(direction = coord_options[:direction])
|
63
|
-
raise ArgumentError.new("coord_options directions is not a valid key, found: %s" % direction)
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
def up_coord
|
69
|
-
[from, to].min
|
70
|
-
end
|
71
|
-
|
72
|
-
def down_coord
|
73
|
-
[from, to].max
|
74
|
-
end
|
75
|
-
|
76
|
-
def seq_from
|
77
|
-
up_coord + coord_window.min
|
78
|
-
end
|
79
|
-
|
80
|
-
def seq_to
|
81
|
-
up_coord + coord_window.max
|
82
|
-
end
|
83
|
-
|
84
|
-
def strand
|
85
|
-
plus_strand? ? :plus : :minus
|
86
|
-
end
|
87
|
-
|
88
|
-
def plus_strand?
|
89
|
-
to > from
|
90
|
-
end
|
91
|
-
|
92
|
-
def minus_strand?
|
93
|
-
!plus_strand?
|
94
|
-
end
|
95
|
-
|
96
|
-
def sequence
|
97
|
-
if @raw_sequence
|
98
|
-
@raw_sequence
|
99
|
-
else
|
100
|
-
entrez_sequence = Entrez.rna_sequence_from_entrez(accession, up_coord, coord_window)
|
101
|
-
@raw_sequence = (minus_strand? ? entrez_sequence.complement : entrez_sequence).upcase
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
alias :seq :sequence
|
106
|
-
|
107
|
-
def extend!(coord_options = {})
|
108
|
-
self.class.init_from_entrez(accession, from, to, coords: coord_options)
|
109
|
-
end
|
110
|
-
|
111
|
-
def coord_window
|
112
|
-
# This does not support extending the range in both directions, though it should be easy to do.
|
113
|
-
# Options from coord_options ex: { length: 300, direction: 3 }, { length: 250, direction: :both }, { length: 200, direction: :down }
|
114
|
-
range = 0..(down_coord - up_coord)
|
115
|
-
|
116
|
-
if coord_options[:length] && coord_options[:direction]
|
117
|
-
if coord_options[:direction] == :both
|
118
|
-
Range.new(range.min - coord_options[:length], range.max + coord_options[:length])
|
119
|
-
else
|
120
|
-
case [coord_options[:direction], strand]
|
121
|
-
when [3, :plus], [:down, :plus], [5, :minus], [:up, :minus] then Range.new(range.min, range.max + coord_options[:length])
|
122
|
-
when [5, :plus], [:up, :plus], [3, :minus], [:down, :minus] then Range.new(range.min - coord_options[:length], range.max)
|
123
|
-
else Wrnap.debugger { "WARNING: value for :direction key in sequence retreival needs to be one of 5, 3, :both - found (%s)" % coord_options[:direction].inspect }
|
124
|
-
end
|
125
|
-
end
|
126
|
-
else
|
127
|
-
range
|
128
|
-
end
|
129
|
-
end
|
130
|
-
|
131
|
-
def identifier
|
132
|
-
"%s %d %s %d" % [accession, from, plus_strand? ? ?+ : ?-, to]
|
133
|
-
end
|
134
|
-
|
135
|
-
def inspect
|
136
|
-
super.gsub(/((\w(::)?)+)>$/) { |_| "%s %s>" % [identifier, $1] }
|
137
|
-
end
|
138
|
-
end
|
139
|
-
end
|
140
|
-
end
|
141
|
-
end
|
@@ -1,104 +0,0 @@
|
|
1
|
-
module Wrnap
|
2
|
-
module Global
|
3
|
-
class Rna
|
4
|
-
module Extensions
|
5
|
-
def self.included(base)
|
6
|
-
base.send(:include, InstanceMethods)
|
7
|
-
base.extend(ClassMethods)
|
8
|
-
base.extend(OneStructureBasedMethods)
|
9
|
-
base.extend(TwoStructureBasedMethods)
|
10
|
-
base.class_eval do
|
11
|
-
OneStructureBasedMethods.public_instance_methods.each do |class_method|
|
12
|
-
define_method(class_method) do |*args|
|
13
|
-
self.class.send(class_method, *[structure].concat(args))
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
TwoStructureBasedMethods.public_instance_methods.each do |class_method|
|
18
|
-
define_method(class_method) do |*args|
|
19
|
-
self.class.send(class_method, *[str_1, str_2].concat(args))
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
base.send(:include, InstanceMethods)
|
25
|
-
end
|
26
|
-
|
27
|
-
module ClassMethods
|
28
|
-
def generate_sequence(sequence_length)
|
29
|
-
# 0th order Markov chain w/ uniform probability distribution
|
30
|
-
Rna.init_from_string(sequence_length.times.inject("") { |string, _| string + %w[A U C G][rand(4)] })
|
31
|
-
end
|
32
|
-
|
33
|
-
def shuffle(sequence, token_length = 2)
|
34
|
-
Shuffle.new(sequence).shuffle(token_length)
|
35
|
-
end
|
36
|
-
|
37
|
-
def structure_from_bp_list(length, base_pairs)
|
38
|
-
base_pairs.to_a.map(&:to_a).map(&:sort).inject("." * length) { |structure, (i, j)| structure.tap { structure[i] = ?(; structure[j] = ?) } }
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
module InstanceMethods
|
43
|
-
def dishuffle
|
44
|
-
self.class.shuffle(sequence, 2)
|
45
|
-
end
|
46
|
-
|
47
|
-
def gc_content
|
48
|
-
seq.split(//).select { |i| i =~ /[GC]/i }.size.to_f / seq.size
|
49
|
-
end
|
50
|
-
|
51
|
-
def boltzmann_probability(dangle: 2)
|
52
|
-
Math.exp(-run(:eval, d: dangle).mfe / Wrnap::RT) / Math.exp(-run(:fold, d: dangle, p: 0).ensemble_energy / Wrnap::RT)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
module OneStructureBasedMethods
|
57
|
-
def max_bp_distance(structure)
|
58
|
-
base_pairs(structure).count + ((structure.length - 3) / 2.0).floor
|
59
|
-
end
|
60
|
-
|
61
|
-
def base_pairs(structure)
|
62
|
-
get_pairings(structure).each_with_index.inject(Set.new) do |set, (j, i)|
|
63
|
-
j >= 0 ? set << Set[i, j] : set
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
def get_pairings(structure)
|
68
|
-
stack = []
|
69
|
-
|
70
|
-
structure.each_char.each_with_index.inject(Array.new(structure.length, -1)) do |array, (symbol, index)|
|
71
|
-
array.tap do
|
72
|
-
case symbol
|
73
|
-
when "(" then stack.push(index)
|
74
|
-
when ")" then
|
75
|
-
if stack.empty?
|
76
|
-
raise "Too many ')' in '#{structure}'"
|
77
|
-
else
|
78
|
-
stack.pop.tap do |opening|
|
79
|
-
array[opening] = index
|
80
|
-
array[index] = opening
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end.tap do
|
86
|
-
raise "Too many '(' in '#{structure}'" unless stack.empty?
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
module TwoStructureBasedMethods
|
92
|
-
def bp_distance(structure_1, structure_2)
|
93
|
-
# Takes two structures and calculates the distance between them by |symmetric difference(bp_in_a, bp_in_b)|
|
94
|
-
raise "The two structures are not the same length" unless structure_1.length == structure_2.length
|
95
|
-
|
96
|
-
bp_set_1, bp_set_2 = base_pairs(structure_1), base_pairs(structure_2)
|
97
|
-
|
98
|
-
((bp_set_1 - bp_set_2) + (bp_set_2 - bp_set_1)).count
|
99
|
-
end
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
@@ -1,36 +0,0 @@
|
|
1
|
-
module Wrnap
|
2
|
-
module Global
|
3
|
-
class Rna
|
4
|
-
module HelixFunctions
|
5
|
-
def helices
|
6
|
-
array = base_pairs.sort_by(&:first).map(&:to_a)
|
7
|
-
|
8
|
-
unless array.empty?
|
9
|
-
array[1..-1].inject([[array.first]]) do |bins, (i, j)|
|
10
|
-
bins.tap { bins[-1][-1] == [i - 1, j + 1] ? bins[-1] << [i, j] : bins << [[i, j]] }
|
11
|
-
end
|
12
|
-
else
|
13
|
-
[]
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
def collapsed_helices
|
18
|
-
helices.map { |((i, j), *rest)| Helix.new(i, j, rest.length + 1) }
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
class Helix
|
23
|
-
attr_reader :i, :j
|
24
|
-
attr_accessor :length
|
25
|
-
|
26
|
-
def initialize(i, j, length)
|
27
|
-
@i, @j, @length = i, j, length
|
28
|
-
end
|
29
|
-
|
30
|
-
def name
|
31
|
-
"(%d, %d)" % [i, j]
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
@@ -1,48 +0,0 @@
|
|
1
|
-
module Wrnap
|
2
|
-
module Global
|
3
|
-
class Rna
|
4
|
-
module Metadata
|
5
|
-
def self.included(base)
|
6
|
-
base.send(:include, InstanceMethods)
|
7
|
-
end
|
8
|
-
|
9
|
-
module InstanceMethods
|
10
|
-
def self.included(base)
|
11
|
-
base.class_eval do
|
12
|
-
def_delegator :@metadata, :__data__, :md
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def meta(&block)
|
17
|
-
metadata.tap { metadata.instance_eval(&block) if block_given? }
|
18
|
-
end
|
19
|
-
|
20
|
-
def meta_rna(&block)
|
21
|
-
metadata.__rna__.tap { meta(&block) }
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
class Container
|
26
|
-
attr_reader :__rna__, :__data__
|
27
|
-
|
28
|
-
def initialize(rna)
|
29
|
-
@__rna__, @__data__ = rna, {}
|
30
|
-
end
|
31
|
-
|
32
|
-
def inspect
|
33
|
-
"#<Metadata: %s>" % __data__.inspect
|
34
|
-
end
|
35
|
-
|
36
|
-
alias :to_s :inspect
|
37
|
-
|
38
|
-
def method_missing(name, *args, &block)
|
39
|
-
case args.size
|
40
|
-
when 0 then __data__[name]
|
41
|
-
when 1 then __data__[name.to_s.gsub(/=$/, "").to_sym] = args.first
|
42
|
-
else super end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
@@ -1,87 +0,0 @@
|
|
1
|
-
module Wrnap
|
2
|
-
module Global
|
3
|
-
class Rna
|
4
|
-
module TreeFunctions
|
5
|
-
def with_tree
|
6
|
-
meta_rna { |metadata| tree(TreePlanter.new(metadata.__rna__)) }
|
7
|
-
end
|
8
|
-
|
9
|
-
def trunk
|
10
|
-
md[:tree] || with_tree.trunk
|
11
|
-
end
|
12
|
-
end
|
13
|
-
|
14
|
-
class TreePlanter
|
15
|
-
attr_reader :rna, :root
|
16
|
-
|
17
|
-
def initialize(rna, tree = false)
|
18
|
-
@rna = rna
|
19
|
-
@root = tree || build_tree
|
20
|
-
end
|
21
|
-
|
22
|
-
def build_tree
|
23
|
-
rna.collapsed_helices.inject(Tree::TreeNode.new(:root, rna)) do |tree, helix|
|
24
|
-
node = Tree::TreeNode.new(helix.name, helix)
|
25
|
-
|
26
|
-
if tree.is_root?
|
27
|
-
tree << node
|
28
|
-
elsif helix.i > tree.content.j
|
29
|
-
# It's a sibling, pop up until we're at its parent node.
|
30
|
-
tree = tree.parent until tree.is_root? || tree.content.j > helix.i
|
31
|
-
node.tap { tree << node }
|
32
|
-
elsif helix.j < tree.content.j
|
33
|
-
# Going deeper.
|
34
|
-
tree << node
|
35
|
-
end
|
36
|
-
end.root
|
37
|
-
end
|
38
|
-
|
39
|
-
def coalesce
|
40
|
-
self.class.new(rna, root.dup).tap { |tree| tree.merge_interior_loops! }
|
41
|
-
end
|
42
|
-
|
43
|
-
def coalesce!
|
44
|
-
tap { merge_interior_loops! }
|
45
|
-
end
|
46
|
-
|
47
|
-
def merge_interior_loops!
|
48
|
-
root.tap do
|
49
|
-
self.class.postorder_traversal(root) do |node|
|
50
|
-
if node.children.count == 1 && !node.is_root?
|
51
|
-
child = node.children.first
|
52
|
-
node.parent.add(child)
|
53
|
-
node.remove_from_parent!
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
def depth_signature
|
60
|
-
root.map(&:node_depth)
|
61
|
-
end
|
62
|
-
|
63
|
-
def pp
|
64
|
-
root.print_tree and nil
|
65
|
-
end
|
66
|
-
|
67
|
-
def inspect
|
68
|
-
"#<TreePlanter: %s>" % depth_signature.inspect
|
69
|
-
end
|
70
|
-
|
71
|
-
alias :to_s :inspect
|
72
|
-
|
73
|
-
class << self
|
74
|
-
def preorder_traversal(node, &block)
|
75
|
-
node.children.map { |child| preorder_traversal(child, &block) }
|
76
|
-
yield node
|
77
|
-
end
|
78
|
-
|
79
|
-
def postorder_traversal(node, &block)
|
80
|
-
node.children.map { |child| postorder_traversal(child, &block) }
|
81
|
-
yield node
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|