wrnap 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,229 @@
1
+ module Wrnap
2
+ module Graphing
3
+ module R
4
+ class << self
5
+ def graph(&block)
6
+ begin
7
+ (yield (r_instance = RinRuby.new)).tap { r_instance.close }
8
+ rescue RuntimeError => e
9
+ raise unless e.message == "Unsupported data type on R's end"
10
+ end
11
+ end
12
+
13
+ def overlay(data, title: nil, type: ?l, x_label: "Independent", y_label: "Dependent", legend: "topleft", filename: false)
14
+ # data: [{ data: [[x_0, y_0], ..., [x_n, y_n]], legend: "Line 1" }, ...]
15
+
16
+ x_points = data.map { |hash| hash[:data].map(&:first) }
17
+ y_points = data.map { |hash| hash[:data].map(&:last) }
18
+ x_range = Range.new(x_points.map(&:min).min.floor, x_points.map(&:max).max.ceil)
19
+ y_range = Range.new(y_points.map(&:min).min.floor, y_points.map(&:max).max.ceil)
20
+
21
+ graph do |r|
22
+ r.eval("%s('%s', 6, 6)" % [
23
+ writing_file?(filename) ? "pdf" : "quartz",
24
+ writing_file?(filename) ? filename : "Graph",
25
+ ])
26
+
27
+ r.assign("legend.titles", data.each_with_index.map { |hash, index| hash[:legend] || "Line #{index + 1}" })
28
+ r.eval("line.colors <- rainbow(%d)" % data.size)
29
+ r.eval("plot(0, 0, type = 'n', cex = .75, cex.axis = .9, xlab = '', ylab = '', xlim = c(%d, %d), ylim = c(%d, %d))" % [
30
+ x_range.min, x_range.max, y_range.min, y_range.max
31
+ ])
32
+
33
+ data.each_with_index do |hash, index|
34
+ r.assign("line_graph.x.%d" % index, x_points[index])
35
+ r.assign("line_graph.y.%d" % index, y_points[index])
36
+
37
+ r.eval <<-STR
38
+ lines(
39
+ line_graph.x.#{index},
40
+ line_graph.y.#{index},
41
+ col = line.colors[#{index + 1}],
42
+ type = "#{type}",
43
+ pch = #{index}
44
+ )
45
+ STR
46
+ end
47
+
48
+ r.eval <<-STR
49
+ title(
50
+ xlab = #{expressionify(x_label)},
51
+ ylab = #{expressionify(y_label)},
52
+ main = #{expressionify(title || "Line Graph")},
53
+ cex.main = .9,
54
+ cex.lab = .9
55
+ )
56
+ STR
57
+
58
+ if legend
59
+ r.eval <<-STR
60
+ legend(
61
+ "#{legend}",
62
+ legend.titles,
63
+ bty = "o",
64
+ bg = rgb(1, 1, 1, .5, 1),
65
+ col = line.colors,
66
+ lty = rep(1, #{data.size}),
67
+ pch = 0:#{data.size},
68
+ cex = .6
69
+ )
70
+ STR
71
+ end
72
+
73
+ r.eval("dev.off()") if writing_file?(filename)
74
+ end
75
+ end
76
+
77
+ def line_graph(data, title: nil, type: ?l, x_label: "Independent", y_label: "Dependent", filename: false)
78
+ overlay([{ data: data }], title: title, type: type, x_label: x_label, y_label: y_label, legend: false, filename: filename)
79
+ end
80
+
81
+ def scatterplot(data, title: nil, x_label: "Independent", y_label: "Dependent", filename: false)
82
+ line_graph(data, title: title || "Scatterplot", type: ?p, x_label: x_label, y_label: y_label, filename: filename)
83
+ end
84
+
85
+ def roc(data, title: nil, baseline: true, filename: false)
86
+ # data: [[-0.894, 1.0], [-0.950, 1.0], [0.516, -1.0], ..., [0.815, -1.0], [0.740, -1.0]]
87
+ auc = ROC.auc(data)
88
+ title_with_auc = title ? "%s (AUC: %.4f)" % [title, auc] : "AUC: %.4f" % auc
89
+ overlay(
90
+ [{ data: ROC.curve_points(data) }, { data: [[0, 0], [1, 1]] }],
91
+ title: title_with_auc,
92
+ x_label: "False positive rate",
93
+ y_label: "True positive rate",
94
+ legend: false,
95
+ filename: filename
96
+ )
97
+ end
98
+
99
+ def roc_overlay(data, title: nil, auc_in_legend: true, filename: false)
100
+ # [{ data: [[-0.894, 1.0], [-0.950, 1.0], [0.516, -1.0], ..., [0.815, -1.0], [0.740, -1.0]], legend: "ROC 1" }, ...]
101
+ formatted_data = data.map do |hash|
102
+ curve_points = ROC.curve_points(hash[:data])
103
+
104
+ if auc_in_legend
105
+ auc = ROC.auc(hash[:data])
106
+ legend = hash[:legend] ? "%s (AUC: %.4f)" % [hash[:legend], auc] : "AUC: %.4f" % auc
107
+
108
+ hash.merge({ data: curve_points, legend: legend })
109
+ else
110
+ hash.merge({ data: curve_points })
111
+ end
112
+ end
113
+
114
+
115
+ overlay(
116
+ formatted_data,
117
+ title: title,
118
+ x_label: "False positive rate",
119
+ y_label: "True positive rate",
120
+ legend: "bottomright",
121
+ filename: filename
122
+ )
123
+ end
124
+
125
+ def histogram(data, title: nil, x_label: "Bins", num_bins: false, bin_size: 1, x_arrow: false, relative: false, filename: false)
126
+ half = bin_size / 2.0
127
+ range = Range.new((data.min - half).floor, (data.max + half).ceil)
128
+ breaks = num_bins ? num_bins : (range.min + half).step(range.max + half, bin_size).to_a
129
+
130
+ graph do |r|
131
+ r.assign("histogram.data", data)
132
+ r.assign("histogram.breaks", breaks)
133
+
134
+ r.eval("%s('%s', 6, 6)" % [
135
+ writing_file?(filename) ? "pdf" : "quartz",
136
+ writing_file?(filename) ? filename : "Histogram",
137
+ ])
138
+
139
+ r.eval <<-STR
140
+ hist(
141
+ histogram.data,
142
+ breaks = histogram.breaks,
143
+ xlab = #{expressionify(x_label)},
144
+ main = #{expressionify(title || "Histogram")},
145
+ freq = #{relative ? 'F' : 'T'},
146
+ cex.main = 0.9,
147
+ cex.lab = 0.9,
148
+ cex.axis = 0.9
149
+ )
150
+ STR
151
+
152
+ r.eval("abline(v = #{x_arrow}, lty = 'dashed')") if x_arrow
153
+
154
+ r.eval("dev.off()") if writing_file?(filename)
155
+ end
156
+ end
157
+
158
+ def matrix_heatmap(x, y, z, title: nil, x_label: "Column index", y_label: "Row index", filename: false, num_colors: 64)
159
+ graph do |r|
160
+ if r.pull("ifelse('Matrix' %in% rownames(installed.packages()), 1, -1)") > 0
161
+ if forced_square = (x.max != y.max)
162
+ x << [x, y].map(&:max).max
163
+ y << [x, y].map(&:max).max
164
+ z << 0
165
+ end
166
+
167
+ r.assign("matrix.i", x)
168
+ r.assign("matrix.j", y)
169
+ r.assign("matrix.x", z)
170
+ r.eval <<-STR
171
+ require("Matrix")
172
+ matrix.data <- sparseMatrix(
173
+ i = matrix.i,
174
+ j = matrix.j,
175
+ x = matrix.x,
176
+ index1 = F
177
+ )
178
+ STR
179
+
180
+ generate_graph("Heatmap") do
181
+ <<-STR
182
+ filtered.values <- Filter(function(i) { is.finite(i) & i != 0 }, matrix.x)
183
+ print(apply(as.matrix(matrix.data), 2, rev))
184
+ print(c(sort(filtered.values)[2], max(filtered.values)))
185
+
186
+ image(
187
+ x = 1:max(c(dim(matrix.data)[[1]], dim(matrix.data)[[2]])),
188
+ y = 1:max(c(dim(matrix.data)[[1]], dim(matrix.data)[[2]])),
189
+ z = as.matrix(matrix.data),
190
+ col = rev(heat.colors(#{num_colors})),
191
+ zlim = #{forced_square ? "c(sort(filtered.values)[2], max(filtered.values))" : "c(min(filtered.values), max(filtered.values))"},
192
+ xlab = "#{x_label} (1-indexed)",
193
+ ylab = "#{y_label} (1-indexed)"
194
+ )
195
+ title(#{expressionify(title || "Matrix Heatmap")})
196
+ STR
197
+ end
198
+ else
199
+ puts "Please install the Matrix package for R before using this function."
200
+ end
201
+ end
202
+ end
203
+
204
+ private
205
+
206
+ def generate_graph(window_title = "ViennaRNA Graph in R", &block)
207
+ r, filename = block.binding.eval("[r, filename]")
208
+
209
+ r.eval("%s('%s', 6, 6)" % [
210
+ writing_file?(filename) ? "pdf" : "quartz",
211
+ writing_file?(filename) ? filename : window_title,
212
+ ])
213
+
214
+ r.eval(yield)
215
+
216
+ r.eval("dev.off()") if writing_file?(filename)
217
+ end
218
+
219
+ def writing_file?(filename)
220
+ filename && (filename = filename.end_with?(".pdf") ? filename : filename + ".pdf")
221
+ end
222
+
223
+ def expressionify(string)
224
+ %w|expression paste|.any?(&string.method(:start_with?)) ? string : string.inspect
225
+ end
226
+ end
227
+ end
228
+ end
229
+ end
@@ -0,0 +1,73 @@
1
+ module Wrnap
2
+ module Package
3
+ def self.lookup(package_name)
4
+ const_missing("#{package_name}".camelize) || raise(ArgumentError.new("#{package_name} can't be resolved as an executable"))
5
+ end
6
+
7
+ class Base
8
+ include Wrnap::Global::RunExtensions
9
+ include Wrnap::Global::ChainExtensions
10
+
11
+ class_attribute :executable_name
12
+ self.executable_name = ->(context) { "RNA#{context.class.name.split('::').last.underscore}" }
13
+
14
+ class_attribute :call_with
15
+ self.call_with = [:seq]
16
+
17
+ class_attribute :default_flags
18
+ self.default_flags = {}
19
+
20
+ class_attribute :quote_flag_params
21
+ self.quote_flag_params = []
22
+
23
+ class_attribute :chains_from
24
+ self.chains_from = Object
25
+
26
+ class << self
27
+ def bootstrap(data: nil, output: "")
28
+ new(data).tap do |object|
29
+ object.instance_variable_set(:@response, File.exist?(output) ? File.read(output).chomp : output)
30
+ object.post_process if object.respond_to?(:post_process)
31
+ end
32
+ end
33
+ end
34
+
35
+ attr_reader :data, :flags, :response, :runtime
36
+
37
+ def initialize(data, chaining: false)
38
+ unless chaining
39
+ data = [data] unless data.is_a?(Array)
40
+
41
+ @data = case data.map(&:class)
42
+ when [Wrnap::Global::Rna] then data.first
43
+ when *(1..3).map { |i| [String] * i } then RNA.from_string(*data)
44
+ when [Hash] then RNA.from_hash(*data)
45
+ when [Array] then RNA.from_array(*data)
46
+ when [NilClass] then Wrnap::Global::Rna.placeholder
47
+ else raise TypeError.new("Unsupported Wrnap::Global::Rna#initialize format: #{data}")
48
+ end
49
+ else
50
+ @data = transform_for_chaining(data)
51
+ end
52
+ end
53
+
54
+ def serialize
55
+ YAML.dump(self)
56
+ end
57
+
58
+ def debugger(&block)
59
+ self.class.debugger(&block)
60
+ end
61
+
62
+ def inspect
63
+ "#<%s (%.2f sec): data: %s, flags: %s, vars: %s>" % [
64
+ self.class.name,
65
+ runtime.real,
66
+ data,
67
+ flags,
68
+ (instance_variables - %i|@data @flags @response @runtime|).map(&:to_s).sort.join(", ")
69
+ ]
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,81 @@
1
+ module Wrnap
2
+ module Package
3
+ class EnergyGrid2d < Base
4
+ include Enumerable
5
+
6
+ def self.inherited(subclass)
7
+ subclass.class_eval { prepend EnergyGrid2dWrapper }
8
+ end
9
+
10
+ module EnergyGrid2dWrapper
11
+ def distribution
12
+ super.map { |row| Row2d.new(*row) }.select { |row| row.p > 0 }.sort
13
+ end
14
+ end
15
+
16
+ class Row2d
17
+ attr_reader :i, :j, :p, :ensemble
18
+
19
+ def initialize(i, j, p, ensemble)
20
+ @i, @j, @p, @ensemble = i.to_i, j.to_i, BigDecimal.new(p.to_s), BigDecimal.new(ensemble.to_s)
21
+ end
22
+
23
+ def position
24
+ [i, j]
25
+ end
26
+
27
+ def <=>(other_row)
28
+ i == other_row.i ? j <=> other_row.j : i <=> other_row.i
29
+ end
30
+
31
+ def to_csv(energy_term: :p)
32
+ "%d,%d,%.8f" % [i, j, instance_variable_get(:"@#{energy_term}")]
33
+ end
34
+
35
+ def inspect
36
+ "#<Row2d (%d, %d), p: %s, ensemble: %s>" % [i, j, p, ensemble]
37
+ end
38
+ end
39
+
40
+ def self.aligned_distributions(*energy_grids)
41
+ point_set = set_of_points(*energy_grids)
42
+
43
+ energy_grids.map do |grid|
44
+ (grid.distribution + (point_set - grid.map(&:position)).map { |i, j| Row2d.new(i, j, 0, Float::INFINITY) }).sort
45
+ end
46
+ end
47
+
48
+ def self.set_of_points(*energy_grids)
49
+ energy_grids.inject([]) { |list, grid| list + grid.map(&:position) }.uniq.sort
50
+ end
51
+
52
+ def each(&block)
53
+ distribution.each(&block)
54
+ end
55
+
56
+ def quick_plot(num_colors: 8)
57
+ Graphing::R.matrix_heatmap(
58
+ distribution.map(&:i),
59
+ distribution.map(&:j),
60
+ distribution.map { |row| Math.log(row.p) },
61
+ title: "#{self.class.name} Matrix Heatmap",
62
+ x_label: "Distance from structure 2",
63
+ y_label: "Distance from structure 1",
64
+ num_colors: num_colors
65
+ )
66
+ end
67
+
68
+ def to_csv(energy_term: :p)
69
+ map { |row| row.to_csv(energy_term: energy_term) }.join(?\n) + ?\n
70
+ end
71
+
72
+ def to_csv!(filename, energy_term: :p)
73
+ File.open(filename, ?w) { |file| file.write(to_csv(energy_term: energy_term)) }
74
+ end
75
+
76
+ def inspect
77
+ "#<#{self.class.name} on #{data.inspect}>"
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,13 @@
1
+ module Wrnap
2
+ module Package
3
+ class Eval < Base
4
+ self.call_with = [:seq, :str]
5
+
6
+ attr_reader :mfe
7
+
8
+ def post_process
9
+ @mfe = Wrnap::Global::Parser.rnafold_mfe(response)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,24 @@
1
+ # Maybe add something like flagsets so that common option groups can be combined together.
2
+ # Also, add a rerun feature.
3
+
4
+ module Wrnap
5
+ module Package
6
+ class FftMfpt < Base
7
+ self.executable_name = "FFTmfpt"
8
+
9
+ attr_reader :mfpt
10
+
11
+ def run_command(flags)
12
+ "%s %s %s" % [
13
+ exec_name,
14
+ stringify_flags(flags),
15
+ data.temp_fa_file!
16
+ ]
17
+ end
18
+
19
+ def post_process
20
+ @mfpt = response.to_f
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,19 @@
1
+ module Wrnap
2
+ module Package
3
+ class Fftbor < Xbor
4
+ def partition
5
+ response.split(/\n/).find { |line| line =~ /^Scaling factor.*:\s+(\d+\.\d+)/ }
6
+ BigDecimal.new($1)
7
+ end
8
+
9
+ def total_count
10
+ response.split(/\n/).find { |line| line =~ /^Number of structures: (\d+)/ }
11
+ $1.to_i
12
+ end
13
+
14
+ def distribution
15
+ self.class.parse(response).map { |row| BigDecimal.new(row[1]) }
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,22 @@
1
+ module Wrnap
2
+ module Package
3
+ class Fftbor2d < EnergyGrid2d
4
+ self.executable_name = "FFTbor2D"
5
+ self.default_flags = ->(_, flags) { (flags.keys & %i|M S|).empty? ? { S: :empty } : {} }
6
+
7
+ def run_command(flags)
8
+ Wrnap.debugger { "Running #{exec_name} on #{data.inspect}" }
9
+
10
+ "%s %s %s" % [
11
+ exec_name,
12
+ stringify_flags(flags),
13
+ data.temp_fa_file!
14
+ ]
15
+ end
16
+
17
+ def distribution
18
+ response.split(/\n/).map { |line| line.split(/\t/) }
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,7 @@
1
+ module Wrnap
2
+ module Package
3
+ class Ffthairpin < Fftbor
4
+ self.executable_name = "FFThairpin"
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,7 @@
1
+ module Wrnap
2
+ module Package
3
+ class Fftmultiloop < Fftbor
4
+ self.executable_name = "FFTmultiloop"
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,25 @@
1
+ module Wrnap
2
+ module Package
3
+ class Fold < Base
4
+ self.default_flags = {
5
+ "-noPS" => :empty
6
+ }
7
+
8
+ attr_reader :mfe_rna, :structure, :mfe, :ensemble_energy
9
+
10
+ def post_process
11
+ structure = Wrnap::Global::Parser.rnafold_mfe_structure(response)
12
+
13
+ unless data.seq.length == structure.length
14
+ raise "Sequence: '#{data.seq}'\nStructure: '#{structure}'"
15
+ else
16
+ @mfe_rna, @structure, @mfe = RNA.from_string(data.seq, structure), structure, Wrnap::Global::Parser.rnafold_mfe(response)
17
+ end
18
+
19
+ if flags[:p] == 0
20
+ @ensemble_energy = Wrnap::Global::Parser.rnafold_ensemble_energy(response)
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,15 @@
1
+ module Wrnap
2
+ module Package
3
+ class Heat < Base
4
+ attr_reader :specific_heats
5
+
6
+ def post_process
7
+ @specific_heats = response.split(/\n/).map { |line| line.split(/\s+/).map(&:to_f) }.inject({}) do |hash, (temp, specific_heat)|
8
+ hash.tap do
9
+ hash[temp] = specific_heat
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,24 @@
1
+ module Wrnap
2
+ module Package
3
+ class Kinwalker < Base
4
+ self.executable_name = "kinwalker"
5
+ attr_reader :nodes
6
+
7
+ def post_process
8
+ @nodes = response.split("TRAJECTORY").last.split(?\n).reject(&:empty?)[0..-2].map { |line| Node.new(*line.split(/\s+/)) }
9
+ end
10
+
11
+ def mfpt
12
+ nodes.last.time
13
+ end
14
+
15
+ class Node
16
+ attr_reader :structure, :energy, :time, :barrier, :energy_barrier, :transcribed
17
+
18
+ def initialize(structure, energy, time, barrier, energy_barrier, transcribed)
19
+ @structure, @energy, @time, @barrier, @energy_barrier, @transcribed = structure, energy.to_f, time.to_f, barrier.to_f, energy_barrier.to_f, transcribed.to_i
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,40 @@
1
+ # Maybe add something like flagsets so that common option groups can be combined together.
2
+ # Also, add a rerun feature.
3
+
4
+ module Wrnap
5
+ module Package
6
+ class Mfpt < Base
7
+ self.chains_from = Wrnap::Package::EnergyGrid2d
8
+ self.default_flags = ->(context, flags) { { X: :empty, H: :empty, N: context.data.seq.length, D: context.data.bp_distance, Q: "1e-8" } }
9
+ # These flags aren't well setup for alternative options at the moment.
10
+
11
+ attr_reader :mfpt
12
+
13
+ def transform_for_chaining(previous_package)
14
+ previous_package.data.tap do |data|
15
+ data.instance_eval do
16
+ @previous_package = previous_package
17
+
18
+ def energy_grid_csv
19
+ Tempfile.new("rna").path.tap do |energy_grid_csv|
20
+ @previous_package.to_csv!(energy_grid_csv)
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+
27
+ def run_command(flags)
28
+ "%s %s %s" % [
29
+ exec_name,
30
+ stringify_flags(flags),
31
+ data.energy_grid_csv
32
+ ]
33
+ end
34
+
35
+ def post_process
36
+ @mfpt = response.to_f
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,19 @@
1
+ module Wrnap
2
+ module Package
3
+ class Plot < Base
4
+ self.call_with = [ :comment, :seq, :str]
5
+ self.default_flags = {
6
+ t: 0,
7
+ o: "svg"
8
+ }
9
+
10
+ def run_command(flags)
11
+ "cat %s | %s %s" % [
12
+ data.temp_fa_file!,
13
+ exec_name,
14
+ stringify_flags(flags)
15
+ ]
16
+ end
17
+ end
18
+ end
19
+ end