wrnap 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,229 @@
1
+ module Wrnap
2
+ module Graphing
3
+ module R
4
+ class << self
5
+ def graph(&block)
6
+ begin
7
+ (yield (r_instance = RinRuby.new)).tap { r_instance.close }
8
+ rescue RuntimeError => e
9
+ raise unless e.message == "Unsupported data type on R's end"
10
+ end
11
+ end
12
+
13
+ def overlay(data, title: nil, type: ?l, x_label: "Independent", y_label: "Dependent", legend: "topleft", filename: false)
14
+ # data: [{ data: [[x_0, y_0], ..., [x_n, y_n]], legend: "Line 1" }, ...]
15
+
16
+ x_points = data.map { |hash| hash[:data].map(&:first) }
17
+ y_points = data.map { |hash| hash[:data].map(&:last) }
18
+ x_range = Range.new(x_points.map(&:min).min.floor, x_points.map(&:max).max.ceil)
19
+ y_range = Range.new(y_points.map(&:min).min.floor, y_points.map(&:max).max.ceil)
20
+
21
+ graph do |r|
22
+ r.eval("%s('%s', 6, 6)" % [
23
+ writing_file?(filename) ? "pdf" : "quartz",
24
+ writing_file?(filename) ? filename : "Graph",
25
+ ])
26
+
27
+ r.assign("legend.titles", data.each_with_index.map { |hash, index| hash[:legend] || "Line #{index + 1}" })
28
+ r.eval("line.colors <- rainbow(%d)" % data.size)
29
+ r.eval("plot(0, 0, type = 'n', cex = .75, cex.axis = .9, xlab = '', ylab = '', xlim = c(%d, %d), ylim = c(%d, %d))" % [
30
+ x_range.min, x_range.max, y_range.min, y_range.max
31
+ ])
32
+
33
+ data.each_with_index do |hash, index|
34
+ r.assign("line_graph.x.%d" % index, x_points[index])
35
+ r.assign("line_graph.y.%d" % index, y_points[index])
36
+
37
+ r.eval <<-STR
38
+ lines(
39
+ line_graph.x.#{index},
40
+ line_graph.y.#{index},
41
+ col = line.colors[#{index + 1}],
42
+ type = "#{type}",
43
+ pch = #{index}
44
+ )
45
+ STR
46
+ end
47
+
48
+ r.eval <<-STR
49
+ title(
50
+ xlab = #{expressionify(x_label)},
51
+ ylab = #{expressionify(y_label)},
52
+ main = #{expressionify(title || "Line Graph")},
53
+ cex.main = .9,
54
+ cex.lab = .9
55
+ )
56
+ STR
57
+
58
+ if legend
59
+ r.eval <<-STR
60
+ legend(
61
+ "#{legend}",
62
+ legend.titles,
63
+ bty = "o",
64
+ bg = rgb(1, 1, 1, .5, 1),
65
+ col = line.colors,
66
+ lty = rep(1, #{data.size}),
67
+ pch = 0:#{data.size},
68
+ cex = .6
69
+ )
70
+ STR
71
+ end
72
+
73
+ r.eval("dev.off()") if writing_file?(filename)
74
+ end
75
+ end
76
+
77
+ def line_graph(data, title: nil, type: ?l, x_label: "Independent", y_label: "Dependent", filename: false)
78
+ overlay([{ data: data }], title: title, type: type, x_label: x_label, y_label: y_label, legend: false, filename: filename)
79
+ end
80
+
81
+ def scatterplot(data, title: nil, x_label: "Independent", y_label: "Dependent", filename: false)
82
+ line_graph(data, title: title || "Scatterplot", type: ?p, x_label: x_label, y_label: y_label, filename: filename)
83
+ end
84
+
85
+ def roc(data, title: nil, baseline: true, filename: false)
86
+ # data: [[-0.894, 1.0], [-0.950, 1.0], [0.516, -1.0], ..., [0.815, -1.0], [0.740, -1.0]]
87
+ auc = ROC.auc(data)
88
+ title_with_auc = title ? "%s (AUC: %.4f)" % [title, auc] : "AUC: %.4f" % auc
89
+ overlay(
90
+ [{ data: ROC.curve_points(data) }, { data: [[0, 0], [1, 1]] }],
91
+ title: title_with_auc,
92
+ x_label: "False positive rate",
93
+ y_label: "True positive rate",
94
+ legend: false,
95
+ filename: filename
96
+ )
97
+ end
98
+
99
+ def roc_overlay(data, title: nil, auc_in_legend: true, filename: false)
100
+ # [{ data: [[-0.894, 1.0], [-0.950, 1.0], [0.516, -1.0], ..., [0.815, -1.0], [0.740, -1.0]], legend: "ROC 1" }, ...]
101
+ formatted_data = data.map do |hash|
102
+ curve_points = ROC.curve_points(hash[:data])
103
+
104
+ if auc_in_legend
105
+ auc = ROC.auc(hash[:data])
106
+ legend = hash[:legend] ? "%s (AUC: %.4f)" % [hash[:legend], auc] : "AUC: %.4f" % auc
107
+
108
+ hash.merge({ data: curve_points, legend: legend })
109
+ else
110
+ hash.merge({ data: curve_points })
111
+ end
112
+ end
113
+
114
+
115
+ overlay(
116
+ formatted_data,
117
+ title: title,
118
+ x_label: "False positive rate",
119
+ y_label: "True positive rate",
120
+ legend: "bottomright",
121
+ filename: filename
122
+ )
123
+ end
124
+
125
+ def histogram(data, title: nil, x_label: "Bins", num_bins: false, bin_size: 1, x_arrow: false, relative: false, filename: false)
126
+ half = bin_size / 2.0
127
+ range = Range.new((data.min - half).floor, (data.max + half).ceil)
128
+ breaks = num_bins ? num_bins : (range.min + half).step(range.max + half, bin_size).to_a
129
+
130
+ graph do |r|
131
+ r.assign("histogram.data", data)
132
+ r.assign("histogram.breaks", breaks)
133
+
134
+ r.eval("%s('%s', 6, 6)" % [
135
+ writing_file?(filename) ? "pdf" : "quartz",
136
+ writing_file?(filename) ? filename : "Histogram",
137
+ ])
138
+
139
+ r.eval <<-STR
140
+ hist(
141
+ histogram.data,
142
+ breaks = histogram.breaks,
143
+ xlab = #{expressionify(x_label)},
144
+ main = #{expressionify(title || "Histogram")},
145
+ freq = #{relative ? 'F' : 'T'},
146
+ cex.main = 0.9,
147
+ cex.lab = 0.9,
148
+ cex.axis = 0.9
149
+ )
150
+ STR
151
+
152
+ r.eval("abline(v = #{x_arrow}, lty = 'dashed')") if x_arrow
153
+
154
+ r.eval("dev.off()") if writing_file?(filename)
155
+ end
156
+ end
157
+
158
+ def matrix_heatmap(x, y, z, title: nil, x_label: "Column index", y_label: "Row index", filename: false, num_colors: 64)
159
+ graph do |r|
160
+ if r.pull("ifelse('Matrix' %in% rownames(installed.packages()), 1, -1)") > 0
161
+ if forced_square = (x.max != y.max)
162
+ x << [x, y].map(&:max).max
163
+ y << [x, y].map(&:max).max
164
+ z << 0
165
+ end
166
+
167
+ r.assign("matrix.i", x)
168
+ r.assign("matrix.j", y)
169
+ r.assign("matrix.x", z)
170
+ r.eval <<-STR
171
+ require("Matrix")
172
+ matrix.data <- sparseMatrix(
173
+ i = matrix.i,
174
+ j = matrix.j,
175
+ x = matrix.x,
176
+ index1 = F
177
+ )
178
+ STR
179
+
180
+ generate_graph("Heatmap") do
181
+ <<-STR
182
+ filtered.values <- Filter(function(i) { is.finite(i) & i != 0 }, matrix.x)
183
+ print(apply(as.matrix(matrix.data), 2, rev))
184
+ print(c(sort(filtered.values)[2], max(filtered.values)))
185
+
186
+ image(
187
+ x = 1:max(c(dim(matrix.data)[[1]], dim(matrix.data)[[2]])),
188
+ y = 1:max(c(dim(matrix.data)[[1]], dim(matrix.data)[[2]])),
189
+ z = as.matrix(matrix.data),
190
+ col = rev(heat.colors(#{num_colors})),
191
+ zlim = #{forced_square ? "c(sort(filtered.values)[2], max(filtered.values))" : "c(min(filtered.values), max(filtered.values))"},
192
+ xlab = "#{x_label} (1-indexed)",
193
+ ylab = "#{y_label} (1-indexed)"
194
+ )
195
+ title(#{expressionify(title || "Matrix Heatmap")})
196
+ STR
197
+ end
198
+ else
199
+ puts "Please install the Matrix package for R before using this function."
200
+ end
201
+ end
202
+ end
203
+
204
+ private
205
+
206
+ def generate_graph(window_title = "ViennaRNA Graph in R", &block)
207
+ r, filename = block.binding.eval("[r, filename]")
208
+
209
+ r.eval("%s('%s', 6, 6)" % [
210
+ writing_file?(filename) ? "pdf" : "quartz",
211
+ writing_file?(filename) ? filename : window_title,
212
+ ])
213
+
214
+ r.eval(yield)
215
+
216
+ r.eval("dev.off()") if writing_file?(filename)
217
+ end
218
+
219
+ def writing_file?(filename)
220
+ filename && (filename = filename.end_with?(".pdf") ? filename : filename + ".pdf")
221
+ end
222
+
223
+ def expressionify(string)
224
+ %w|expression paste|.any?(&string.method(:start_with?)) ? string : string.inspect
225
+ end
226
+ end
227
+ end
228
+ end
229
+ end
@@ -0,0 +1,73 @@
1
+ module Wrnap
2
+ module Package
3
+ def self.lookup(package_name)
4
+ const_missing("#{package_name}".camelize) || raise(ArgumentError.new("#{package_name} can't be resolved as an executable"))
5
+ end
6
+
7
+ class Base
8
+ include Wrnap::Global::RunExtensions
9
+ include Wrnap::Global::ChainExtensions
10
+
11
+ class_attribute :executable_name
12
+ self.executable_name = ->(context) { "RNA#{context.class.name.split('::').last.underscore}" }
13
+
14
+ class_attribute :call_with
15
+ self.call_with = [:seq]
16
+
17
+ class_attribute :default_flags
18
+ self.default_flags = {}
19
+
20
+ class_attribute :quote_flag_params
21
+ self.quote_flag_params = []
22
+
23
+ class_attribute :chains_from
24
+ self.chains_from = Object
25
+
26
+ class << self
27
+ def bootstrap(data: nil, output: "")
28
+ new(data).tap do |object|
29
+ object.instance_variable_set(:@response, File.exist?(output) ? File.read(output).chomp : output)
30
+ object.post_process if object.respond_to?(:post_process)
31
+ end
32
+ end
33
+ end
34
+
35
+ attr_reader :data, :flags, :response, :runtime
36
+
37
+ def initialize(data, chaining: false)
38
+ unless chaining
39
+ data = [data] unless data.is_a?(Array)
40
+
41
+ @data = case data.map(&:class)
42
+ when [Wrnap::Global::Rna] then data.first
43
+ when *(1..3).map { |i| [String] * i } then RNA.from_string(*data)
44
+ when [Hash] then RNA.from_hash(*data)
45
+ when [Array] then RNA.from_array(*data)
46
+ when [NilClass] then Wrnap::Global::Rna.placeholder
47
+ else raise TypeError.new("Unsupported Wrnap::Global::Rna#initialize format: #{data}")
48
+ end
49
+ else
50
+ @data = transform_for_chaining(data)
51
+ end
52
+ end
53
+
54
+ def serialize
55
+ YAML.dump(self)
56
+ end
57
+
58
+ def debugger(&block)
59
+ self.class.debugger(&block)
60
+ end
61
+
62
+ def inspect
63
+ "#<%s (%.2f sec): data: %s, flags: %s, vars: %s>" % [
64
+ self.class.name,
65
+ runtime.real,
66
+ data,
67
+ flags,
68
+ (instance_variables - %i|@data @flags @response @runtime|).map(&:to_s).sort.join(", ")
69
+ ]
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,81 @@
1
+ module Wrnap
2
+ module Package
3
+ class EnergyGrid2d < Base
4
+ include Enumerable
5
+
6
+ def self.inherited(subclass)
7
+ subclass.class_eval { prepend EnergyGrid2dWrapper }
8
+ end
9
+
10
+ module EnergyGrid2dWrapper
11
+ def distribution
12
+ super.map { |row| Row2d.new(*row) }.select { |row| row.p > 0 }.sort
13
+ end
14
+ end
15
+
16
+ class Row2d
17
+ attr_reader :i, :j, :p, :ensemble
18
+
19
+ def initialize(i, j, p, ensemble)
20
+ @i, @j, @p, @ensemble = i.to_i, j.to_i, BigDecimal.new(p.to_s), BigDecimal.new(ensemble.to_s)
21
+ end
22
+
23
+ def position
24
+ [i, j]
25
+ end
26
+
27
+ def <=>(other_row)
28
+ i == other_row.i ? j <=> other_row.j : i <=> other_row.i
29
+ end
30
+
31
+ def to_csv(energy_term: :p)
32
+ "%d,%d,%.8f" % [i, j, instance_variable_get(:"@#{energy_term}")]
33
+ end
34
+
35
+ def inspect
36
+ "#<Row2d (%d, %d), p: %s, ensemble: %s>" % [i, j, p, ensemble]
37
+ end
38
+ end
39
+
40
+ def self.aligned_distributions(*energy_grids)
41
+ point_set = set_of_points(*energy_grids)
42
+
43
+ energy_grids.map do |grid|
44
+ (grid.distribution + (point_set - grid.map(&:position)).map { |i, j| Row2d.new(i, j, 0, Float::INFINITY) }).sort
45
+ end
46
+ end
47
+
48
+ def self.set_of_points(*energy_grids)
49
+ energy_grids.inject([]) { |list, grid| list + grid.map(&:position) }.uniq.sort
50
+ end
51
+
52
+ def each(&block)
53
+ distribution.each(&block)
54
+ end
55
+
56
+ def quick_plot(num_colors: 8)
57
+ Graphing::R.matrix_heatmap(
58
+ distribution.map(&:i),
59
+ distribution.map(&:j),
60
+ distribution.map { |row| Math.log(row.p) },
61
+ title: "#{self.class.name} Matrix Heatmap",
62
+ x_label: "Distance from structure 2",
63
+ y_label: "Distance from structure 1",
64
+ num_colors: num_colors
65
+ )
66
+ end
67
+
68
+ def to_csv(energy_term: :p)
69
+ map { |row| row.to_csv(energy_term: energy_term) }.join(?\n) + ?\n
70
+ end
71
+
72
+ def to_csv!(filename, energy_term: :p)
73
+ File.open(filename, ?w) { |file| file.write(to_csv(energy_term: energy_term)) }
74
+ end
75
+
76
+ def inspect
77
+ "#<#{self.class.name} on #{data.inspect}>"
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,13 @@
1
+ module Wrnap
2
+ module Package
3
+ class Eval < Base
4
+ self.call_with = [:seq, :str]
5
+
6
+ attr_reader :mfe
7
+
8
+ def post_process
9
+ @mfe = Wrnap::Global::Parser.rnafold_mfe(response)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,24 @@
1
+ # Maybe add something like flagsets so that common option groups can be combined together.
2
+ # Also, add a rerun feature.
3
+
4
+ module Wrnap
5
+ module Package
6
+ class FftMfpt < Base
7
+ self.executable_name = "FFTmfpt"
8
+
9
+ attr_reader :mfpt
10
+
11
+ def run_command(flags)
12
+ "%s %s %s" % [
13
+ exec_name,
14
+ stringify_flags(flags),
15
+ data.temp_fa_file!
16
+ ]
17
+ end
18
+
19
+ def post_process
20
+ @mfpt = response.to_f
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,19 @@
1
+ module Wrnap
2
+ module Package
3
+ class Fftbor < Xbor
4
+ def partition
5
+ response.split(/\n/).find { |line| line =~ /^Scaling factor.*:\s+(\d+\.\d+)/ }
6
+ BigDecimal.new($1)
7
+ end
8
+
9
+ def total_count
10
+ response.split(/\n/).find { |line| line =~ /^Number of structures: (\d+)/ }
11
+ $1.to_i
12
+ end
13
+
14
+ def distribution
15
+ self.class.parse(response).map { |row| BigDecimal.new(row[1]) }
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,22 @@
1
+ module Wrnap
2
+ module Package
3
+ class Fftbor2d < EnergyGrid2d
4
+ self.executable_name = "FFTbor2D"
5
+ self.default_flags = ->(_, flags) { (flags.keys & %i|M S|).empty? ? { S: :empty } : {} }
6
+
7
+ def run_command(flags)
8
+ Wrnap.debugger { "Running #{exec_name} on #{data.inspect}" }
9
+
10
+ "%s %s %s" % [
11
+ exec_name,
12
+ stringify_flags(flags),
13
+ data.temp_fa_file!
14
+ ]
15
+ end
16
+
17
+ def distribution
18
+ response.split(/\n/).map { |line| line.split(/\t/) }
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,7 @@
1
+ module Wrnap
2
+ module Package
3
+ class Ffthairpin < Fftbor
4
+ self.executable_name = "FFThairpin"
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,7 @@
1
+ module Wrnap
2
+ module Package
3
+ class Fftmultiloop < Fftbor
4
+ self.executable_name = "FFTmultiloop"
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,25 @@
1
+ module Wrnap
2
+ module Package
3
+ class Fold < Base
4
+ self.default_flags = {
5
+ "-noPS" => :empty
6
+ }
7
+
8
+ attr_reader :mfe_rna, :structure, :mfe, :ensemble_energy
9
+
10
+ def post_process
11
+ structure = Wrnap::Global::Parser.rnafold_mfe_structure(response)
12
+
13
+ unless data.seq.length == structure.length
14
+ raise "Sequence: '#{data.seq}'\nStructure: '#{structure}'"
15
+ else
16
+ @mfe_rna, @structure, @mfe = RNA.from_string(data.seq, structure), structure, Wrnap::Global::Parser.rnafold_mfe(response)
17
+ end
18
+
19
+ if flags[:p] == 0
20
+ @ensemble_energy = Wrnap::Global::Parser.rnafold_ensemble_energy(response)
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,15 @@
1
+ module Wrnap
2
+ module Package
3
+ class Heat < Base
4
+ attr_reader :specific_heats
5
+
6
+ def post_process
7
+ @specific_heats = response.split(/\n/).map { |line| line.split(/\s+/).map(&:to_f) }.inject({}) do |hash, (temp, specific_heat)|
8
+ hash.tap do
9
+ hash[temp] = specific_heat
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,24 @@
1
+ module Wrnap
2
+ module Package
3
+ class Kinwalker < Base
4
+ self.executable_name = "kinwalker"
5
+ attr_reader :nodes
6
+
7
+ def post_process
8
+ @nodes = response.split("TRAJECTORY").last.split(?\n).reject(&:empty?)[0..-2].map { |line| Node.new(*line.split(/\s+/)) }
9
+ end
10
+
11
+ def mfpt
12
+ nodes.last.time
13
+ end
14
+
15
+ class Node
16
+ attr_reader :structure, :energy, :time, :barrier, :energy_barrier, :transcribed
17
+
18
+ def initialize(structure, energy, time, barrier, energy_barrier, transcribed)
19
+ @structure, @energy, @time, @barrier, @energy_barrier, @transcribed = structure, energy.to_f, time.to_f, barrier.to_f, energy_barrier.to_f, transcribed.to_i
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,40 @@
1
+ # Maybe add something like flagsets so that common option groups can be combined together.
2
+ # Also, add a rerun feature.
3
+
4
+ module Wrnap
5
+ module Package
6
+ class Mfpt < Base
7
+ self.chains_from = Wrnap::Package::EnergyGrid2d
8
+ self.default_flags = ->(context, flags) { { X: :empty, H: :empty, N: context.data.seq.length, D: context.data.bp_distance, Q: "1e-8" } }
9
+ # These flags aren't well setup for alternative options at the moment.
10
+
11
+ attr_reader :mfpt
12
+
13
+ def transform_for_chaining(previous_package)
14
+ previous_package.data.tap do |data|
15
+ data.instance_eval do
16
+ @previous_package = previous_package
17
+
18
+ def energy_grid_csv
19
+ Tempfile.new("rna").path.tap do |energy_grid_csv|
20
+ @previous_package.to_csv!(energy_grid_csv)
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+
27
+ def run_command(flags)
28
+ "%s %s %s" % [
29
+ exec_name,
30
+ stringify_flags(flags),
31
+ data.energy_grid_csv
32
+ ]
33
+ end
34
+
35
+ def post_process
36
+ @mfpt = response.to_f
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,19 @@
1
+ module Wrnap
2
+ module Package
3
+ class Plot < Base
4
+ self.call_with = [ :comment, :seq, :str]
5
+ self.default_flags = {
6
+ t: 0,
7
+ o: "svg"
8
+ }
9
+
10
+ def run_command(flags)
11
+ "cat %s | %s %s" % [
12
+ data.temp_fa_file!,
13
+ exec_name,
14
+ stringify_flags(flags)
15
+ ]
16
+ end
17
+ end
18
+ end
19
+ end