vienna_rna 0.8.6 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/vienna_rna/global/parser.rb +19 -0
- data/lib/vienna_rna/global/rna.rb +115 -0
- data/lib/vienna_rna/global/rna_extensions.rb +109 -0
- data/lib/vienna_rna/{modules/graphing.rb → graphing/r.rb} +49 -111
- data/lib/vienna_rna/package/base.rb +117 -0
- data/lib/vienna_rna/package/energy_grid_2d.rb +69 -0
- data/lib/vienna_rna/package/eval.rb +11 -0
- data/lib/vienna_rna/package/fftbor.rb +19 -0
- data/lib/vienna_rna/package/fftbor2d.rb +25 -0
- data/lib/vienna_rna/package/fold.rb +31 -0
- data/lib/vienna_rna/package/heat.rb +15 -0
- data/lib/vienna_rna/package/rna2dfold.rb +27 -0
- data/lib/vienna_rna/package/rnabor.rb +32 -0
- data/lib/vienna_rna/package/subopt.rb +19 -0
- data/lib/vienna_rna/package/xbor.rb +63 -0
- data/lib/vienna_rna.rb +27 -14
- metadata +22 -38
- data/lib/vienna_rna/modules/base.rb +0 -124
- data/lib/vienna_rna/modules/batch.rb +0 -26
- data/lib/vienna_rna/modules/energy_grid_2d.rb +0 -63
- data/lib/vienna_rna/modules/eval.rb +0 -9
- data/lib/vienna_rna/modules/fftbor.rb +0 -21
- data/lib/vienna_rna/modules/fftbor2d.rb +0 -23
- data/lib/vienna_rna/modules/ffthairpin.rb +0 -4
- data/lib/vienna_rna/modules/fftmultiloop.rb +0 -4
- data/lib/vienna_rna/modules/fold.rb +0 -29
- data/lib/vienna_rna/modules/heat.rb +0 -13
- data/lib/vienna_rna/modules/parser.rb +0 -17
- data/lib/vienna_rna/modules/rna.rb +0 -113
- data/lib/vienna_rna/modules/rna2dfold.rb +0 -25
- data/lib/vienna_rna/modules/rna_extensions.rb +0 -101
- data/lib/vienna_rna/modules/rnabor.rb +0 -33
- data/lib/vienna_rna/modules/subopt.rb +0 -17
- data/lib/vienna_rna/modules/utils.rb +0 -34
- data/lib/vienna_rna/modules/xbor.rb +0 -64
@@ -1,17 +0,0 @@
|
|
1
|
-
module ViennaRna
|
2
|
-
module Parser
|
3
|
-
REGEXP = {
|
4
|
-
mfe: / \(\s*(-?\d*\.\d*)\)$/
|
5
|
-
}
|
6
|
-
|
7
|
-
class << self
|
8
|
-
def rnafold_mfe_structure(response)
|
9
|
-
response.split(/\n/)[1].split(/\s+/).first
|
10
|
-
end
|
11
|
-
|
12
|
-
def rnafold_mfe(response)
|
13
|
-
response.split(/\n/)[1].match(REGEXP[:mfe])[1].to_f
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
@@ -1,113 +0,0 @@
|
|
1
|
-
module ViennaRna
|
2
|
-
class Rna
|
3
|
-
include ViennaRna::RnaExtensions
|
4
|
-
|
5
|
-
attr_reader :sequence, :structure, :second_structure, :raw_data
|
6
|
-
|
7
|
-
class << self
|
8
|
-
def init_from_string(sequence, structure = nil, second_structure = nil)
|
9
|
-
new(
|
10
|
-
sequence: sequence,
|
11
|
-
structure: structure,
|
12
|
-
second_structure: second_structure
|
13
|
-
)
|
14
|
-
end
|
15
|
-
|
16
|
-
def init_from_hash(hash)
|
17
|
-
new(
|
18
|
-
sequence: hash[:sequence] || hash[:seq],
|
19
|
-
structure: hash[:structure] || hash[:str_1] || hash[:str],
|
20
|
-
second_structure: hash[:second_structure] || hash[:str_2],
|
21
|
-
raw_data: hash
|
22
|
-
)
|
23
|
-
end
|
24
|
-
|
25
|
-
def init_from_array(array)
|
26
|
-
init_from_string(*array)
|
27
|
-
end
|
28
|
-
|
29
|
-
def init_from_fasta(string)
|
30
|
-
string = File.read(string).chomp if File.exist?(string)
|
31
|
-
init_from_string(*string.split(/\n/).reject { |line| line.start_with?(">") }[0, 3])
|
32
|
-
end
|
33
|
-
|
34
|
-
def init_from_self(rna)
|
35
|
-
# This happens when you call a ViennaRna library function with the output of something like ViennaRna::Fold.run(...).mfe
|
36
|
-
new(
|
37
|
-
sequence: rna.sequence,
|
38
|
-
strucutre: rna.structure,
|
39
|
-
second_strucutre: rna.second_structure,
|
40
|
-
raw_data: rna.raw_data
|
41
|
-
)
|
42
|
-
end
|
43
|
-
|
44
|
-
alias_method :placeholder, :new
|
45
|
-
end
|
46
|
-
|
47
|
-
def initialize(sequence: "", structure: "", second_structure: "", raw_data: {})
|
48
|
-
@sequence, @raw_data = sequence, raw_data
|
49
|
-
|
50
|
-
[:structure, :second_structure].each do |structure_symbol|
|
51
|
-
instance_variable_set(
|
52
|
-
:"@#{structure_symbol}",
|
53
|
-
case structure_value = eval("#{structure_symbol}")
|
54
|
-
when :empty then empty_structure
|
55
|
-
when :mfe then RNA(sequence).run(:fold).mfe_rna.structure
|
56
|
-
when String then structure_value
|
57
|
-
end
|
58
|
-
)
|
59
|
-
end
|
60
|
-
|
61
|
-
if str && seq.length != str.length
|
62
|
-
ViennaRna.debugger { "The sequence length (%d) doesn't match the structure length (%d)" % [seq, str].map(&:length) }
|
63
|
-
end
|
64
|
-
|
65
|
-
if str_2 && str_1.length != str_2.length
|
66
|
-
ViennaRna.debugger { "The first structure length (%d) doesn't match the second structure length (%d)" % [str_1, str_2].map(&:length) }
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
alias :seq :sequence
|
71
|
-
alias :str :structure
|
72
|
-
alias :str_1 :structure
|
73
|
-
alias :str_2 :second_structure
|
74
|
-
|
75
|
-
def empty_structure
|
76
|
-
"." * seq.length
|
77
|
-
end
|
78
|
-
|
79
|
-
alias :empty_str :empty_structure
|
80
|
-
|
81
|
-
def write_fa!(filename, comment = nil)
|
82
|
-
filename.tap do |filename|
|
83
|
-
File.open(filename, ?w) do |file|
|
84
|
-
file.write("> %s\n" % comment) if comment
|
85
|
-
file.write("%s\n" % seq) if seq
|
86
|
-
file.write("%s\n" % str_1) if str_1
|
87
|
-
file.write("%s\n" % str_2) if str_2
|
88
|
-
end
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
def temp_fa_file!
|
93
|
-
write_fa!(Tempfile.new("")).path
|
94
|
-
end
|
95
|
-
|
96
|
-
def run(module_name, options = {})
|
97
|
-
if rna_module = ViennaRna.const_missing("#{module_name}".camelize)
|
98
|
-
rna_module.run(self, options)
|
99
|
-
else
|
100
|
-
raise ArgumentError.new("#{module_name} can't be resolved as an executable")
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
def inspect
|
105
|
-
"#<%s>" % [
|
106
|
-
"#{self.class.name}",
|
107
|
-
("#{seq[0, 20] + (seq.length > 20 ? '...' : '')}" if seq && !seq.empty?),
|
108
|
-
("#{str_1[0, 20] + (str_1.length > 20 ? ' [truncated]' : '')}" if str_1 && !str_1.empty?),
|
109
|
-
("#{str_2[0, 20] + (str_2.length > 20 ? ' [truncated]' : '')}" if str_2 && !str_1.empty?),
|
110
|
-
].compact.join(" ")
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|
@@ -1,25 +0,0 @@
|
|
1
|
-
module ViennaRna
|
2
|
-
class Rna2dfold < EnergyGrid2d
|
3
|
-
BASE_FLAGS = {
|
4
|
-
d: 0,
|
5
|
-
p: :empty,
|
6
|
-
"-noBT" => :empty
|
7
|
-
}
|
8
|
-
|
9
|
-
self.executable_name = "RNA2Dfold"
|
10
|
-
|
11
|
-
def run_command(flags = {})
|
12
|
-
ViennaRna.debugger { "Running RNA2Dfold on #{data.inspect}" }
|
13
|
-
|
14
|
-
"cat %s | %s %s" % [
|
15
|
-
data.temp_fa_file!,
|
16
|
-
exec_name,
|
17
|
-
stringify_flags(BASE_FLAGS.merge(self.class.const_defined?(:FLAGS) ? self.class.const_get(:FLAGS) : {}).merge(flags))
|
18
|
-
]
|
19
|
-
end
|
20
|
-
|
21
|
-
def distribution
|
22
|
-
response.split(/\n/)[6..-1].map { |line| line.split(/\t/).at_indexes([0, 1, 2, 6]) }
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
@@ -1,101 +0,0 @@
|
|
1
|
-
module ViennaRna
|
2
|
-
module RnaExtensions
|
3
|
-
def self.included(base)
|
4
|
-
base.extend(ClassMethods)
|
5
|
-
base.send(:include, InstanceMethods)
|
6
|
-
base.extend(StructureBasedClassAndInstanceMethods)
|
7
|
-
|
8
|
-
# All the methods in here are also copied in as instance methods, where the first argument is the ViennaRna::Rna#structure
|
9
|
-
base.class_eval do
|
10
|
-
StructureBasedClassAndInstanceMethods.public_instance_methods.each do |class_method|
|
11
|
-
define_method(class_method) do |*args|
|
12
|
-
self.class.send(class_method, *[structure].concat(args))
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
base.send(:include, InstanceMethods)
|
18
|
-
end
|
19
|
-
|
20
|
-
module ClassMethods
|
21
|
-
def generate_sequence(sequence_length)
|
22
|
-
# 0th order Markov chain w/ uniform probability distribution
|
23
|
-
Rna.init_from_string(sequence_length.times.inject("") { |string, _| string + %w[A U C G][rand(4)] })
|
24
|
-
end
|
25
|
-
|
26
|
-
def shuffle(sequence, token_length = 2)
|
27
|
-
Shuffle.new(sequence).shuffle(token_length)
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
module InstanceMethods
|
32
|
-
def gc_content
|
33
|
-
seq.split(//).select { |i| i =~ /[GC]/i }.size.to_f / seq.size
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
module StructureBasedClassAndInstanceMethods
|
38
|
-
def bp_distance(structure_1, structure_2)
|
39
|
-
# Takes two structures and calculates the distance between them by |symmetric difference(bp_in_a, bp_in_b)|
|
40
|
-
raise "The two structures are not the same length" unless structure_1.length == structure_2.length
|
41
|
-
|
42
|
-
bp_set_1, bp_set_2 = base_pairs(structure_1), base_pairs(structure_2)
|
43
|
-
|
44
|
-
((bp_set_1 - bp_set_2) + (bp_set_2 - bp_set_1)).count
|
45
|
-
end
|
46
|
-
|
47
|
-
def symmetric_bp_distance(structure_1, structure_2)
|
48
|
-
# Takes two structures and calculates the distance between them by: sum { ((x_j - x_i) - (y_j - y_i)).abs }
|
49
|
-
raise "The two structures are not the same length" unless structure_1.length == structure_2.length
|
50
|
-
|
51
|
-
bp_dist = ->(array, i) { array[i] == -1 ? 0 : array[i] - i }
|
52
|
-
|
53
|
-
structure_1_pairings = get_pairings(structure_1)
|
54
|
-
structure_2_pairings = get_pairings(structure_2)
|
55
|
-
|
56
|
-
structure_1.length.times.inject(0) do |distance, i|
|
57
|
-
distance + (bp_dist[structure_1_pairings, i] - bp_dist[structure_2_pairings, i]).abs
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
def max_bp_distance(structure)
|
62
|
-
base_pairs(structure).count + ((structure.length - 3) / 2.0).floor
|
63
|
-
end
|
64
|
-
|
65
|
-
def base_pairs(structure)
|
66
|
-
get_pairings(structure).each_with_index.inject(Set.new) do |set, (j, i)|
|
67
|
-
j >= 0 ? set << Set[i, j] : set
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
def get_pairings(structure)
|
72
|
-
stack = []
|
73
|
-
|
74
|
-
structure.each_char.each_with_index.inject(Array.new(structure.length, -1)) do |array, (symbol, index)|
|
75
|
-
array.tap do
|
76
|
-
case symbol
|
77
|
-
when "(" then stack.push(index)
|
78
|
-
when ")" then
|
79
|
-
if stack.empty?
|
80
|
-
raise "Too many ')' in '#{structure}'"
|
81
|
-
else
|
82
|
-
stack.pop.tap do |opening|
|
83
|
-
array[opening] = index
|
84
|
-
array[index] = opening
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end.tap do
|
90
|
-
raise "Too many '(' in '#{structure}'" unless stack.empty?
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
module InstanceMethods
|
96
|
-
def dishuffle
|
97
|
-
self.class.shuffle(sequence, 2)
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|
101
|
-
end
|
@@ -1,33 +0,0 @@
|
|
1
|
-
require "tempfile"
|
2
|
-
require "bigdecimal"
|
3
|
-
|
4
|
-
module ViennaRna
|
5
|
-
class Rnabor < Xbor
|
6
|
-
FLAGS = {
|
7
|
-
nodangle: :empty
|
8
|
-
}
|
9
|
-
|
10
|
-
def partition
|
11
|
-
non_zero_shells.inject(&:+)
|
12
|
-
end
|
13
|
-
|
14
|
-
def total_count
|
15
|
-
counts.inject(&:+)
|
16
|
-
end
|
17
|
-
|
18
|
-
def counts
|
19
|
-
(non_zero_counts = self.class.parse(response).map { |row| BigDecimal.new(row[2]).to_i }) + [0] * (data.seq.length - non_zero_counts.length + 1)
|
20
|
-
end
|
21
|
-
|
22
|
-
def distribution(options = {})
|
23
|
-
options = { precision: 4 }.merge(options)
|
24
|
-
|
25
|
-
distribution_before_precision = (non_zero_distribution = non_zero_shells.map { |i| i / partition }) + [0.0] * (data.seq.length - non_zero_distribution.length + 1)
|
26
|
-
distribution_before_precision.map { |value| options[:precision].zero? ? value : (value * 10 ** options[:precision]).truncate / 10.0 ** options[:precision] }
|
27
|
-
end
|
28
|
-
|
29
|
-
def non_zero_shells
|
30
|
-
self.class.parse(response).map { |row| BigDecimal.new(row[1]) }
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
@@ -1,17 +0,0 @@
|
|
1
|
-
module ViennaRna
|
2
|
-
class Subopt < Base
|
3
|
-
attr_reader :structures
|
4
|
-
|
5
|
-
def post_process
|
6
|
-
@structures = @response.split(/\n/)[1..-1].map { |output| Rna.init_from_string(data.seq, output.split(/\s+/).first) }
|
7
|
-
end
|
8
|
-
|
9
|
-
def bin(count = 1)
|
10
|
-
run(p: count).structures.inject(Hash.new { |hash, key| hash[key] = 0 }) do |hash, structure|
|
11
|
-
hash.tap do
|
12
|
-
hash[structure] += 1
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
@@ -1,34 +0,0 @@
|
|
1
|
-
module ViennaRna
|
2
|
-
module Utils
|
3
|
-
class << self
|
4
|
-
def fastas_from_file(path)
|
5
|
-
# Force it to not be lazy.
|
6
|
-
Bio::FlatFile.auto(path).to_enum.map { |fasta| fasta }
|
7
|
-
end
|
8
|
-
|
9
|
-
def write_fastas!(fastas, directory, base_name, group_size = 10)
|
10
|
-
fastas.each_slice(group_size).each_with_index do |fasta_group, i|
|
11
|
-
path = File.join(directory, base_name + "_#{i}.fa")
|
12
|
-
|
13
|
-
unless File.exists?(path)
|
14
|
-
File.open(path, "w") do |file|
|
15
|
-
fasta_group.each do |folding|
|
16
|
-
file.write(">%s\n%s\n" % [folding.fasta.definition, folding.fasta.seq])
|
17
|
-
end
|
18
|
-
end
|
19
|
-
else
|
20
|
-
puts "Warning: file '#{path}' exists. Skipping."
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
def regress(x, y, degree)
|
26
|
-
x_data = x.map { |i| (0..degree).map { |power| i ** power.to_f } }
|
27
|
-
x_matrix = Matrix[*x_data]
|
28
|
-
y_matrix = Matrix.column_vector(y)
|
29
|
-
|
30
|
-
((x_matrix.transpose * x_matrix).inverse * x_matrix.transpose * y_matrix).transpose.to_a[0]
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
@@ -1,64 +0,0 @@
|
|
1
|
-
require "tempfile"
|
2
|
-
require "bigdecimal"
|
3
|
-
|
4
|
-
module ViennaRna
|
5
|
-
class Xbor < Base
|
6
|
-
BASE_FLAGS = {
|
7
|
-
E: "/usr/local/bin/energy.par"
|
8
|
-
}
|
9
|
-
|
10
|
-
self.executable_name = -> { name.demodulize.gsub(/^([A-Z].*)bor$/) { |match| $1.upcase + "bor" } }
|
11
|
-
|
12
|
-
def run_command(flags = {})
|
13
|
-
file = Tempfile.new("rna")
|
14
|
-
file.write("%s\n" % data.seq)
|
15
|
-
file.write("%s\n" % data.str)
|
16
|
-
file.close
|
17
|
-
|
18
|
-
ViennaRna.debugger { "Running FFTbor on #{data.inspect}" }
|
19
|
-
|
20
|
-
"%s %s %s" % [
|
21
|
-
exec_name,
|
22
|
-
stringify_flags(BASE_FLAGS.merge(self.class.const_defined?(:FLAGS) ? self.class.const_get(:FLAGS) : {}).merge(flags)),
|
23
|
-
file.path
|
24
|
-
]
|
25
|
-
end
|
26
|
-
|
27
|
-
def self.bootstrap_from_file(path, klass = self)
|
28
|
-
log = File.read(path)
|
29
|
-
sequence = log.split(/\n/).first.split(/\s+/)[1]
|
30
|
-
structure = log.split(/\n/).first.split(/\s+/)[2]
|
31
|
-
|
32
|
-
klass.bootstrap(Rna.init_from_string(sequence, structure), log)
|
33
|
-
end
|
34
|
-
|
35
|
-
def self.parse(response)
|
36
|
-
response.split(/\n/).select { |line| line =~ /^\d+\t-?\d+/ }.map { |line| line.split(/\t/) }
|
37
|
-
end
|
38
|
-
|
39
|
-
def full_distribution
|
40
|
-
distribution = run.distribution
|
41
|
-
full_distribution = distribution + ([0.0] * ((differnece = data.seq.length - distribution.length + 1) < 0 ? 0 : differnece))
|
42
|
-
end
|
43
|
-
|
44
|
-
def k_p_points
|
45
|
-
full_distribution.each_with_index.to_a.map(&:reverse)[0..data.seq.length]
|
46
|
-
end
|
47
|
-
|
48
|
-
def expected_k
|
49
|
-
k_p_points.map { |array| array.inject(&:*) }.inject(&:+)
|
50
|
-
end
|
51
|
-
|
52
|
-
def quick_plot(options = {})
|
53
|
-
ViennaRna::Graphing::Gnuplot.quick_plot(
|
54
|
-
k_p_points,
|
55
|
-
options[:title] || "%s\\n%s\\n%s" % [self.class.name, data.seq, data.safe_structure],
|
56
|
-
options
|
57
|
-
)
|
58
|
-
end
|
59
|
-
|
60
|
-
def inspect
|
61
|
-
"#<#{self.class.name} #{data.inspect}>"
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|