vienna_rna 0.8.6 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/lib/vienna_rna/global/parser.rb +19 -0
  3. data/lib/vienna_rna/global/rna.rb +115 -0
  4. data/lib/vienna_rna/global/rna_extensions.rb +109 -0
  5. data/lib/vienna_rna/{modules/graphing.rb → graphing/r.rb} +49 -111
  6. data/lib/vienna_rna/package/base.rb +117 -0
  7. data/lib/vienna_rna/package/energy_grid_2d.rb +69 -0
  8. data/lib/vienna_rna/package/eval.rb +11 -0
  9. data/lib/vienna_rna/package/fftbor.rb +19 -0
  10. data/lib/vienna_rna/package/fftbor2d.rb +25 -0
  11. data/lib/vienna_rna/package/fold.rb +31 -0
  12. data/lib/vienna_rna/package/heat.rb +15 -0
  13. data/lib/vienna_rna/package/rna2dfold.rb +27 -0
  14. data/lib/vienna_rna/package/rnabor.rb +32 -0
  15. data/lib/vienna_rna/package/subopt.rb +19 -0
  16. data/lib/vienna_rna/package/xbor.rb +63 -0
  17. data/lib/vienna_rna.rb +27 -14
  18. metadata +22 -38
  19. data/lib/vienna_rna/modules/base.rb +0 -124
  20. data/lib/vienna_rna/modules/batch.rb +0 -26
  21. data/lib/vienna_rna/modules/energy_grid_2d.rb +0 -63
  22. data/lib/vienna_rna/modules/eval.rb +0 -9
  23. data/lib/vienna_rna/modules/fftbor.rb +0 -21
  24. data/lib/vienna_rna/modules/fftbor2d.rb +0 -23
  25. data/lib/vienna_rna/modules/ffthairpin.rb +0 -4
  26. data/lib/vienna_rna/modules/fftmultiloop.rb +0 -4
  27. data/lib/vienna_rna/modules/fold.rb +0 -29
  28. data/lib/vienna_rna/modules/heat.rb +0 -13
  29. data/lib/vienna_rna/modules/parser.rb +0 -17
  30. data/lib/vienna_rna/modules/rna.rb +0 -113
  31. data/lib/vienna_rna/modules/rna2dfold.rb +0 -25
  32. data/lib/vienna_rna/modules/rna_extensions.rb +0 -101
  33. data/lib/vienna_rna/modules/rnabor.rb +0 -33
  34. data/lib/vienna_rna/modules/subopt.rb +0 -17
  35. data/lib/vienna_rna/modules/utils.rb +0 -34
  36. data/lib/vienna_rna/modules/xbor.rb +0 -64
@@ -1,17 +0,0 @@
1
- module ViennaRna
2
- module Parser
3
- REGEXP = {
4
- mfe: / \(\s*(-?\d*\.\d*)\)$/
5
- }
6
-
7
- class << self
8
- def rnafold_mfe_structure(response)
9
- response.split(/\n/)[1].split(/\s+/).first
10
- end
11
-
12
- def rnafold_mfe(response)
13
- response.split(/\n/)[1].match(REGEXP[:mfe])[1].to_f
14
- end
15
- end
16
- end
17
- end
@@ -1,113 +0,0 @@
1
- module ViennaRna
2
- class Rna
3
- include ViennaRna::RnaExtensions
4
-
5
- attr_reader :sequence, :structure, :second_structure, :raw_data
6
-
7
- class << self
8
- def init_from_string(sequence, structure = nil, second_structure = nil)
9
- new(
10
- sequence: sequence,
11
- structure: structure,
12
- second_structure: second_structure
13
- )
14
- end
15
-
16
- def init_from_hash(hash)
17
- new(
18
- sequence: hash[:sequence] || hash[:seq],
19
- structure: hash[:structure] || hash[:str_1] || hash[:str],
20
- second_structure: hash[:second_structure] || hash[:str_2],
21
- raw_data: hash
22
- )
23
- end
24
-
25
- def init_from_array(array)
26
- init_from_string(*array)
27
- end
28
-
29
- def init_from_fasta(string)
30
- string = File.read(string).chomp if File.exist?(string)
31
- init_from_string(*string.split(/\n/).reject { |line| line.start_with?(">") }[0, 3])
32
- end
33
-
34
- def init_from_self(rna)
35
- # This happens when you call a ViennaRna library function with the output of something like ViennaRna::Fold.run(...).mfe
36
- new(
37
- sequence: rna.sequence,
38
- strucutre: rna.structure,
39
- second_strucutre: rna.second_structure,
40
- raw_data: rna.raw_data
41
- )
42
- end
43
-
44
- alias_method :placeholder, :new
45
- end
46
-
47
- def initialize(sequence: "", structure: "", second_structure: "", raw_data: {})
48
- @sequence, @raw_data = sequence, raw_data
49
-
50
- [:structure, :second_structure].each do |structure_symbol|
51
- instance_variable_set(
52
- :"@#{structure_symbol}",
53
- case structure_value = eval("#{structure_symbol}")
54
- when :empty then empty_structure
55
- when :mfe then RNA(sequence).run(:fold).mfe_rna.structure
56
- when String then structure_value
57
- end
58
- )
59
- end
60
-
61
- if str && seq.length != str.length
62
- ViennaRna.debugger { "The sequence length (%d) doesn't match the structure length (%d)" % [seq, str].map(&:length) }
63
- end
64
-
65
- if str_2 && str_1.length != str_2.length
66
- ViennaRna.debugger { "The first structure length (%d) doesn't match the second structure length (%d)" % [str_1, str_2].map(&:length) }
67
- end
68
- end
69
-
70
- alias :seq :sequence
71
- alias :str :structure
72
- alias :str_1 :structure
73
- alias :str_2 :second_structure
74
-
75
- def empty_structure
76
- "." * seq.length
77
- end
78
-
79
- alias :empty_str :empty_structure
80
-
81
- def write_fa!(filename, comment = nil)
82
- filename.tap do |filename|
83
- File.open(filename, ?w) do |file|
84
- file.write("> %s\n" % comment) if comment
85
- file.write("%s\n" % seq) if seq
86
- file.write("%s\n" % str_1) if str_1
87
- file.write("%s\n" % str_2) if str_2
88
- end
89
- end
90
- end
91
-
92
- def temp_fa_file!
93
- write_fa!(Tempfile.new("")).path
94
- end
95
-
96
- def run(module_name, options = {})
97
- if rna_module = ViennaRna.const_missing("#{module_name}".camelize)
98
- rna_module.run(self, options)
99
- else
100
- raise ArgumentError.new("#{module_name} can't be resolved as an executable")
101
- end
102
- end
103
-
104
- def inspect
105
- "#<%s>" % [
106
- "#{self.class.name}",
107
- ("#{seq[0, 20] + (seq.length > 20 ? '...' : '')}" if seq && !seq.empty?),
108
- ("#{str_1[0, 20] + (str_1.length > 20 ? ' [truncated]' : '')}" if str_1 && !str_1.empty?),
109
- ("#{str_2[0, 20] + (str_2.length > 20 ? ' [truncated]' : '')}" if str_2 && !str_1.empty?),
110
- ].compact.join(" ")
111
- end
112
- end
113
- end
@@ -1,25 +0,0 @@
1
- module ViennaRna
2
- class Rna2dfold < EnergyGrid2d
3
- BASE_FLAGS = {
4
- d: 0,
5
- p: :empty,
6
- "-noBT" => :empty
7
- }
8
-
9
- self.executable_name = "RNA2Dfold"
10
-
11
- def run_command(flags = {})
12
- ViennaRna.debugger { "Running RNA2Dfold on #{data.inspect}" }
13
-
14
- "cat %s | %s %s" % [
15
- data.temp_fa_file!,
16
- exec_name,
17
- stringify_flags(BASE_FLAGS.merge(self.class.const_defined?(:FLAGS) ? self.class.const_get(:FLAGS) : {}).merge(flags))
18
- ]
19
- end
20
-
21
- def distribution
22
- response.split(/\n/)[6..-1].map { |line| line.split(/\t/).at_indexes([0, 1, 2, 6]) }
23
- end
24
- end
25
- end
@@ -1,101 +0,0 @@
1
- module ViennaRna
2
- module RnaExtensions
3
- def self.included(base)
4
- base.extend(ClassMethods)
5
- base.send(:include, InstanceMethods)
6
- base.extend(StructureBasedClassAndInstanceMethods)
7
-
8
- # All the methods in here are also copied in as instance methods, where the first argument is the ViennaRna::Rna#structure
9
- base.class_eval do
10
- StructureBasedClassAndInstanceMethods.public_instance_methods.each do |class_method|
11
- define_method(class_method) do |*args|
12
- self.class.send(class_method, *[structure].concat(args))
13
- end
14
- end
15
- end
16
-
17
- base.send(:include, InstanceMethods)
18
- end
19
-
20
- module ClassMethods
21
- def generate_sequence(sequence_length)
22
- # 0th order Markov chain w/ uniform probability distribution
23
- Rna.init_from_string(sequence_length.times.inject("") { |string, _| string + %w[A U C G][rand(4)] })
24
- end
25
-
26
- def shuffle(sequence, token_length = 2)
27
- Shuffle.new(sequence).shuffle(token_length)
28
- end
29
- end
30
-
31
- module InstanceMethods
32
- def gc_content
33
- seq.split(//).select { |i| i =~ /[GC]/i }.size.to_f / seq.size
34
- end
35
- end
36
-
37
- module StructureBasedClassAndInstanceMethods
38
- def bp_distance(structure_1, structure_2)
39
- # Takes two structures and calculates the distance between them by |symmetric difference(bp_in_a, bp_in_b)|
40
- raise "The two structures are not the same length" unless structure_1.length == structure_2.length
41
-
42
- bp_set_1, bp_set_2 = base_pairs(structure_1), base_pairs(structure_2)
43
-
44
- ((bp_set_1 - bp_set_2) + (bp_set_2 - bp_set_1)).count
45
- end
46
-
47
- def symmetric_bp_distance(structure_1, structure_2)
48
- # Takes two structures and calculates the distance between them by: sum { ((x_j - x_i) - (y_j - y_i)).abs }
49
- raise "The two structures are not the same length" unless structure_1.length == structure_2.length
50
-
51
- bp_dist = ->(array, i) { array[i] == -1 ? 0 : array[i] - i }
52
-
53
- structure_1_pairings = get_pairings(structure_1)
54
- structure_2_pairings = get_pairings(structure_2)
55
-
56
- structure_1.length.times.inject(0) do |distance, i|
57
- distance + (bp_dist[structure_1_pairings, i] - bp_dist[structure_2_pairings, i]).abs
58
- end
59
- end
60
-
61
- def max_bp_distance(structure)
62
- base_pairs(structure).count + ((structure.length - 3) / 2.0).floor
63
- end
64
-
65
- def base_pairs(structure)
66
- get_pairings(structure).each_with_index.inject(Set.new) do |set, (j, i)|
67
- j >= 0 ? set << Set[i, j] : set
68
- end
69
- end
70
-
71
- def get_pairings(structure)
72
- stack = []
73
-
74
- structure.each_char.each_with_index.inject(Array.new(structure.length, -1)) do |array, (symbol, index)|
75
- array.tap do
76
- case symbol
77
- when "(" then stack.push(index)
78
- when ")" then
79
- if stack.empty?
80
- raise "Too many ')' in '#{structure}'"
81
- else
82
- stack.pop.tap do |opening|
83
- array[opening] = index
84
- array[index] = opening
85
- end
86
- end
87
- end
88
- end
89
- end.tap do
90
- raise "Too many '(' in '#{structure}'" unless stack.empty?
91
- end
92
- end
93
- end
94
-
95
- module InstanceMethods
96
- def dishuffle
97
- self.class.shuffle(sequence, 2)
98
- end
99
- end
100
- end
101
- end
@@ -1,33 +0,0 @@
1
- require "tempfile"
2
- require "bigdecimal"
3
-
4
- module ViennaRna
5
- class Rnabor < Xbor
6
- FLAGS = {
7
- nodangle: :empty
8
- }
9
-
10
- def partition
11
- non_zero_shells.inject(&:+)
12
- end
13
-
14
- def total_count
15
- counts.inject(&:+)
16
- end
17
-
18
- def counts
19
- (non_zero_counts = self.class.parse(response).map { |row| BigDecimal.new(row[2]).to_i }) + [0] * (data.seq.length - non_zero_counts.length + 1)
20
- end
21
-
22
- def distribution(options = {})
23
- options = { precision: 4 }.merge(options)
24
-
25
- distribution_before_precision = (non_zero_distribution = non_zero_shells.map { |i| i / partition }) + [0.0] * (data.seq.length - non_zero_distribution.length + 1)
26
- distribution_before_precision.map { |value| options[:precision].zero? ? value : (value * 10 ** options[:precision]).truncate / 10.0 ** options[:precision] }
27
- end
28
-
29
- def non_zero_shells
30
- self.class.parse(response).map { |row| BigDecimal.new(row[1]) }
31
- end
32
- end
33
- end
@@ -1,17 +0,0 @@
1
- module ViennaRna
2
- class Subopt < Base
3
- attr_reader :structures
4
-
5
- def post_process
6
- @structures = @response.split(/\n/)[1..-1].map { |output| Rna.init_from_string(data.seq, output.split(/\s+/).first) }
7
- end
8
-
9
- def bin(count = 1)
10
- run(p: count).structures.inject(Hash.new { |hash, key| hash[key] = 0 }) do |hash, structure|
11
- hash.tap do
12
- hash[structure] += 1
13
- end
14
- end
15
- end
16
- end
17
- end
@@ -1,34 +0,0 @@
1
- module ViennaRna
2
- module Utils
3
- class << self
4
- def fastas_from_file(path)
5
- # Force it to not be lazy.
6
- Bio::FlatFile.auto(path).to_enum.map { |fasta| fasta }
7
- end
8
-
9
- def write_fastas!(fastas, directory, base_name, group_size = 10)
10
- fastas.each_slice(group_size).each_with_index do |fasta_group, i|
11
- path = File.join(directory, base_name + "_#{i}.fa")
12
-
13
- unless File.exists?(path)
14
- File.open(path, "w") do |file|
15
- fasta_group.each do |folding|
16
- file.write(">%s\n%s\n" % [folding.fasta.definition, folding.fasta.seq])
17
- end
18
- end
19
- else
20
- puts "Warning: file '#{path}' exists. Skipping."
21
- end
22
- end
23
- end
24
-
25
- def regress(x, y, degree)
26
- x_data = x.map { |i| (0..degree).map { |power| i ** power.to_f } }
27
- x_matrix = Matrix[*x_data]
28
- y_matrix = Matrix.column_vector(y)
29
-
30
- ((x_matrix.transpose * x_matrix).inverse * x_matrix.transpose * y_matrix).transpose.to_a[0]
31
- end
32
- end
33
- end
34
- end
@@ -1,64 +0,0 @@
1
- require "tempfile"
2
- require "bigdecimal"
3
-
4
- module ViennaRna
5
- class Xbor < Base
6
- BASE_FLAGS = {
7
- E: "/usr/local/bin/energy.par"
8
- }
9
-
10
- self.executable_name = -> { name.demodulize.gsub(/^([A-Z].*)bor$/) { |match| $1.upcase + "bor" } }
11
-
12
- def run_command(flags = {})
13
- file = Tempfile.new("rna")
14
- file.write("%s\n" % data.seq)
15
- file.write("%s\n" % data.str)
16
- file.close
17
-
18
- ViennaRna.debugger { "Running FFTbor on #{data.inspect}" }
19
-
20
- "%s %s %s" % [
21
- exec_name,
22
- stringify_flags(BASE_FLAGS.merge(self.class.const_defined?(:FLAGS) ? self.class.const_get(:FLAGS) : {}).merge(flags)),
23
- file.path
24
- ]
25
- end
26
-
27
- def self.bootstrap_from_file(path, klass = self)
28
- log = File.read(path)
29
- sequence = log.split(/\n/).first.split(/\s+/)[1]
30
- structure = log.split(/\n/).first.split(/\s+/)[2]
31
-
32
- klass.bootstrap(Rna.init_from_string(sequence, structure), log)
33
- end
34
-
35
- def self.parse(response)
36
- response.split(/\n/).select { |line| line =~ /^\d+\t-?\d+/ }.map { |line| line.split(/\t/) }
37
- end
38
-
39
- def full_distribution
40
- distribution = run.distribution
41
- full_distribution = distribution + ([0.0] * ((differnece = data.seq.length - distribution.length + 1) < 0 ? 0 : differnece))
42
- end
43
-
44
- def k_p_points
45
- full_distribution.each_with_index.to_a.map(&:reverse)[0..data.seq.length]
46
- end
47
-
48
- def expected_k
49
- k_p_points.map { |array| array.inject(&:*) }.inject(&:+)
50
- end
51
-
52
- def quick_plot(options = {})
53
- ViennaRna::Graphing::Gnuplot.quick_plot(
54
- k_p_points,
55
- options[:title] || "%s\\n%s\\n%s" % [self.class.name, data.seq, data.safe_structure],
56
- options
57
- )
58
- end
59
-
60
- def inspect
61
- "#<#{self.class.name} #{data.inspect}>"
62
- end
63
- end
64
- end