wrnap 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/wrnap/global/{chain_extensions.rb → chainer.rb} +6 -6
- data/lib/wrnap/global/entrez.rb +36 -0
- data/lib/wrnap/global/rna/context.rb +122 -0
- data/lib/wrnap/global/{rna_extensions.rb → rna/extensions.rb} +17 -17
- data/lib/wrnap/global/rna.rb +9 -1
- data/lib/wrnap/global/{run_extensions.rb → runner.rb} +1 -1
- data/lib/wrnap/graphing/r.rb +68 -61
- data/lib/wrnap/package/base.rb +2 -2
- data/lib/wrnap/package/population.rb +8 -4
- data/lib/wrnap/version.rb +1 -1
- data/lib/wrnap.rb +7 -3
- data/wrnap.gemspec +2 -0
- metadata +35 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 62bce1f01b0068d45befe18bcb9bb5b6891de764
|
|
4
|
+
data.tar.gz: 2ec02f8b44890f19b75995949b9146776642da9f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 28e711a4fd54857a15fa5ce3beac1f3b3d5e99824883d44b20ec5a73deff48e6191ef38f64c4d85a8198e481736c0a467ca1b88bea2f1fb243610913d47e79e7
|
|
7
|
+
data.tar.gz: 0dcdb3c401989fe413270b27334a91ee5a21f27c4f7ec9307251e2549c7bdde12c950c2757764a1d27e82618883245d56557fc3aea4550828e597fff0ca4cf18
|
|
@@ -1,26 +1,26 @@
|
|
|
1
1
|
module Wrnap
|
|
2
2
|
module Global
|
|
3
|
-
module
|
|
3
|
+
module Chainer
|
|
4
4
|
def self.included(base)
|
|
5
5
|
base.send(:include, InstanceMethods)
|
|
6
6
|
end
|
|
7
|
-
|
|
7
|
+
|
|
8
8
|
module InstanceMethods
|
|
9
9
|
def chain(package, flags = {})
|
|
10
10
|
class_chaining_to = Wrnap::Package.lookup(package)
|
|
11
|
-
|
|
11
|
+
|
|
12
12
|
unless instance_variable_defined?(:@response)
|
|
13
13
|
raise ArgumentError.new("Can only chain a package that is not the first to be called")
|
|
14
14
|
end
|
|
15
|
-
|
|
15
|
+
|
|
16
16
|
unless class_chaining_to.instance_methods.include?(:transform_for_chaining)
|
|
17
17
|
raise ArgumentError.new("#{class_chaining_to.name} doesn't support chaining because it doesn't define transform_for_chaining")
|
|
18
18
|
end
|
|
19
|
-
|
|
19
|
+
|
|
20
20
|
unless [chains_from].flatten.any?(&method(:kind_of?))
|
|
21
21
|
raise ArgumentError.new("#{class_chaining_to.name} doesn't support chaining from #{self.class.name} because it isn't in the chains_from list")
|
|
22
22
|
end
|
|
23
|
-
|
|
23
|
+
|
|
24
24
|
class_chaining_to.new(self, chaining: true).run(flags)
|
|
25
25
|
end
|
|
26
26
|
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
module Wrnap
|
|
2
|
+
module Global
|
|
3
|
+
module Entrez
|
|
4
|
+
class << self
|
|
5
|
+
def simple_rna_sequence(id, from, to)
|
|
6
|
+
sequence = rna_sequence_from_entrez(id, [from, to].min, 0..((to - from).abs))
|
|
7
|
+
|
|
8
|
+
to < from ? sequence.complement : sequence
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def rna_sequence_from_entrez(id, position, window, buffer_size = 0)
|
|
12
|
+
na_sequence_from_entrez(id, position, window, buffer_size).rna
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def na_sequence_from_entrez(id, position, window, buffer_size = 0)
|
|
16
|
+
Bio::Sequence::NA.new(sequence_from_entrez(id, position, Range.new(window.min - buffer_size, window.max + buffer_size)).seq)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def sequence_from_entrez(id, position, window)
|
|
20
|
+
Wrnap.debugger { "Retrieving sequence from Entrez: using nuccore DB (id: #{id}, seq_start: #{position + window.min}, seq_stop: #{position + window.max})" }
|
|
21
|
+
Wrnap.debugger { "> True starting position: #{position} with window #{window.min} to #{window.max}" }
|
|
22
|
+
|
|
23
|
+
fasta = ::Entrez.EFetch("nuccore", {
|
|
24
|
+
id: id,
|
|
25
|
+
seq_start: position + window.min,
|
|
26
|
+
seq_stop: position + window.max,
|
|
27
|
+
retmode: :fasta,
|
|
28
|
+
rettype: :text
|
|
29
|
+
}).response.body
|
|
30
|
+
|
|
31
|
+
Bio::FastaFormat.new(fasta)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
module Wrnap
|
|
2
|
+
module Global
|
|
3
|
+
class Context < Rna
|
|
4
|
+
attr_reader :accession, :from, :to, :coord_options
|
|
5
|
+
|
|
6
|
+
class << self
|
|
7
|
+
def init_from_entrez(accession, from, to, coord_options = {})
|
|
8
|
+
new(
|
|
9
|
+
accession: accession,
|
|
10
|
+
from: from,
|
|
11
|
+
to: to,
|
|
12
|
+
coord_options: coord_options
|
|
13
|
+
)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def init_from_string(sequence, accession, from, to, coord_options = {})
|
|
17
|
+
new(
|
|
18
|
+
sequence: sequence,
|
|
19
|
+
accession: accession,
|
|
20
|
+
from: from,
|
|
21
|
+
to: to,
|
|
22
|
+
coord_options: coord_options
|
|
23
|
+
)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def initialize(sequence: nil, accession: nil, from: nil, to: nil, coord_options: {})
|
|
28
|
+
@accession, @from, @to, @coord_options = accession, from, to, coord_options
|
|
29
|
+
|
|
30
|
+
validate_coord_options
|
|
31
|
+
|
|
32
|
+
if sequence
|
|
33
|
+
@raw_sequence = (sequence.is_a?(String) ? Bio::Sequence::NA.new(sequence) : sequence).upcase
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def validate_coord_options
|
|
38
|
+
unless coord_options.empty?
|
|
39
|
+
unless coord_options.keys == Set.new(%i|direction length|)
|
|
40
|
+
raise ArgumentError.new("coord_options keys must contain only :direction, :length, found: %s" % coord_options.keys)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
unless (length = coord_options[:length]).is_a?(Integer) && length > 0
|
|
44
|
+
raise ArgumentError.new("coord_options length must be greater than 0, found: %d" % length)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
unless [:up, :down, :both, 5, 3].include?(direction = coord_options[:direction])
|
|
48
|
+
raise ArgumentError.new("coord_options directions is not a valid key, found: %s" % direction)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def up_coord
|
|
54
|
+
[from, to].min
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def down_coord
|
|
58
|
+
[from, to].max
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def seq_from
|
|
62
|
+
up_coord + coord_window.min
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def seq_to
|
|
66
|
+
up_coord + coord_window.max
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def strand
|
|
70
|
+
plus_strand? ? :plus : :minus
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def plus_strand?
|
|
74
|
+
to > from
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def minus_strand?
|
|
78
|
+
!plus_strand?
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def sequence
|
|
82
|
+
@raw_sequence ||= Entrez.rna_sequence_from_entrez(accession, up_coord, coord_window)
|
|
83
|
+
@raw_sequence = minus_strand? ? @raw_sequence.complement : @raw_sequence
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
alias :seq :sequence
|
|
87
|
+
|
|
88
|
+
def extend!(coord_options = {})
|
|
89
|
+
tap do
|
|
90
|
+
@coord_options = coord_options unless coord_options.empty?
|
|
91
|
+
validate_coord_options
|
|
92
|
+
@extended = true
|
|
93
|
+
remove_instance_variable(:@raw_sequence)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def extended?
|
|
98
|
+
@extended
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def coord_window
|
|
102
|
+
# This does not support extending the range in both directions, though it should be easy to do.
|
|
103
|
+
# Options from coord_options ex: { length: 300, direction: 3 }, { length: 250, direction: :both }, { length: 200, direction: :down }
|
|
104
|
+
range = 0..(down_coord - up_coord)
|
|
105
|
+
|
|
106
|
+
if coord_options[:length] && coord_options[:direction]
|
|
107
|
+
if coord_options[:direction] == :both
|
|
108
|
+
Range.new(range.min - coord_options[:length], range.max + coord_options[:length])
|
|
109
|
+
else
|
|
110
|
+
case [coord_options[:direction], strand]
|
|
111
|
+
when [3, :plus], [:down, :plus], [5, :minus], [:up, :minus] then Range.new(range.min, range.max + coord_options[:length])
|
|
112
|
+
when [5, :plus], [:up, :plus], [3, :minus], [:down, :minus] then Range.new(range.min - coord_options[:length], range.max)
|
|
113
|
+
else Wrnap.debugger { "WARNING: value for :direction key in sequence retreival needs to be one of 5, 3, :both - found (%s)" % coord_options[:direction].inspect }
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
else
|
|
117
|
+
range
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
@@ -1,35 +1,35 @@
|
|
|
1
1
|
module Wrnap
|
|
2
2
|
module Global
|
|
3
|
-
module
|
|
3
|
+
module Extensions
|
|
4
4
|
def self.included(base)
|
|
5
5
|
base.send(:include, InstanceMethods)
|
|
6
6
|
base.extend(ClassMethods)
|
|
7
7
|
base.extend(OneStructureBasedMethods)
|
|
8
8
|
base.extend(TwoStructureBasedMethods)
|
|
9
|
-
|
|
9
|
+
|
|
10
10
|
base.class_eval do
|
|
11
11
|
OneStructureBasedMethods.public_instance_methods.each do |class_method|
|
|
12
12
|
define_method(class_method) do |*args|
|
|
13
13
|
self.class.send(class_method, *[structure].concat(args))
|
|
14
14
|
end
|
|
15
15
|
end
|
|
16
|
-
|
|
16
|
+
|
|
17
17
|
TwoStructureBasedMethods.public_instance_methods.each do |class_method|
|
|
18
18
|
define_method(class_method) do |*args|
|
|
19
19
|
self.class.send(class_method, *[str_1, str_2].concat(args))
|
|
20
20
|
end
|
|
21
21
|
end
|
|
22
22
|
end
|
|
23
|
-
|
|
23
|
+
|
|
24
24
|
base.send(:include, InstanceMethods)
|
|
25
25
|
end
|
|
26
|
-
|
|
26
|
+
|
|
27
27
|
module ClassMethods
|
|
28
28
|
def generate_sequence(sequence_length)
|
|
29
29
|
# 0th order Markov chain w/ uniform probability distribution
|
|
30
30
|
Rna.init_from_string(sequence_length.times.inject("") { |string, _| string + %w[A U C G][rand(4)] })
|
|
31
31
|
end
|
|
32
|
-
|
|
32
|
+
|
|
33
33
|
def shuffle(sequence, token_length = 2)
|
|
34
34
|
Shuffle.new(sequence).shuffle(token_length)
|
|
35
35
|
end
|
|
@@ -39,35 +39,35 @@ module Wrnap
|
|
|
39
39
|
def dishuffle
|
|
40
40
|
self.class.shuffle(sequence, 2)
|
|
41
41
|
end
|
|
42
|
-
|
|
42
|
+
|
|
43
43
|
def gc_content
|
|
44
44
|
seq.split(//).select { |i| i =~ /[GC]/i }.size.to_f / seq.size
|
|
45
45
|
end
|
|
46
|
-
|
|
46
|
+
|
|
47
47
|
def boltzmann_probability(dangle: 2)
|
|
48
48
|
Math.exp(-run(:eval, d: dangle).mfe / Wrnap::RT) / Math.exp(-run(:fold, d: dangle, p: 0).ensemble_energy / Wrnap::RT)
|
|
49
49
|
end
|
|
50
50
|
end
|
|
51
|
-
|
|
51
|
+
|
|
52
52
|
module OneStructureBasedMethods
|
|
53
53
|
def max_bp_distance(structure)
|
|
54
54
|
base_pairs(structure).count + ((structure.length - 3) / 2.0).floor
|
|
55
55
|
end
|
|
56
|
-
|
|
56
|
+
|
|
57
57
|
def base_pairs(structure)
|
|
58
58
|
get_pairings(structure).each_with_index.inject(Set.new) do |set, (j, i)|
|
|
59
59
|
j >= 0 ? set << Set[i, j] : set
|
|
60
60
|
end
|
|
61
61
|
end
|
|
62
|
-
|
|
62
|
+
|
|
63
63
|
def get_pairings(structure)
|
|
64
64
|
stack = []
|
|
65
|
-
|
|
65
|
+
|
|
66
66
|
structure.each_char.each_with_index.inject(Array.new(structure.length, -1)) do |array, (symbol, index)|
|
|
67
|
-
array.tap do
|
|
67
|
+
array.tap do
|
|
68
68
|
case symbol
|
|
69
69
|
when "(" then stack.push(index)
|
|
70
|
-
when ")" then
|
|
70
|
+
when ")" then
|
|
71
71
|
if stack.empty?
|
|
72
72
|
raise "Too many ')' in '#{structure}'"
|
|
73
73
|
else
|
|
@@ -83,14 +83,14 @@ module Wrnap
|
|
|
83
83
|
end
|
|
84
84
|
end
|
|
85
85
|
end
|
|
86
|
-
|
|
86
|
+
|
|
87
87
|
module TwoStructureBasedMethods
|
|
88
88
|
def bp_distance(structure_1, structure_2)
|
|
89
89
|
# Takes two structures and calculates the distance between them by |symmetric difference(bp_in_a, bp_in_b)|
|
|
90
90
|
raise "The two structures are not the same length" unless structure_1.length == structure_2.length
|
|
91
|
-
|
|
91
|
+
|
|
92
92
|
bp_set_1, bp_set_2 = base_pairs(structure_1), base_pairs(structure_2)
|
|
93
|
-
|
|
93
|
+
|
|
94
94
|
((bp_set_1 - bp_set_2) + (bp_set_2 - bp_set_1)).count
|
|
95
95
|
end
|
|
96
96
|
end
|
data/lib/wrnap/global/rna.rb
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
module Wrnap
|
|
2
2
|
module Global
|
|
3
3
|
class Rna
|
|
4
|
-
include
|
|
4
|
+
include Extensions
|
|
5
5
|
|
|
6
6
|
attr_accessor :comment
|
|
7
7
|
attr_reader :sequence, :structure, :second_structure
|
|
@@ -44,6 +44,10 @@ module Wrnap
|
|
|
44
44
|
end
|
|
45
45
|
end
|
|
46
46
|
|
|
47
|
+
def init_from_context(context: [], rna: [])
|
|
48
|
+
init_from_string(Context.init_from_entrez(*context), *rna)
|
|
49
|
+
end
|
|
50
|
+
|
|
47
51
|
def init_from_self(rna)
|
|
48
52
|
# This happens when you call a Wrnap library function with the output of something like Wrnap::Fold.run(...).mfe
|
|
49
53
|
new(
|
|
@@ -102,6 +106,8 @@ module Wrnap
|
|
|
102
106
|
self.class.init_from_string(seq, structure_1.is_a?(Symbol) ? send(structure_1) : structure_1, nil, name)
|
|
103
107
|
end
|
|
104
108
|
|
|
109
|
+
alias :one_str :one_structure
|
|
110
|
+
|
|
105
111
|
def two_structures(structure_1, structure_2)
|
|
106
112
|
self.class.init_from_string(
|
|
107
113
|
seq,
|
|
@@ -110,6 +116,8 @@ module Wrnap
|
|
|
110
116
|
)
|
|
111
117
|
end
|
|
112
118
|
|
|
119
|
+
alias :two_str :two_structures
|
|
120
|
+
|
|
113
121
|
def write_fa!(filename)
|
|
114
122
|
filename.tap do |filename|
|
|
115
123
|
File.open(filename, ?w) do |file|
|
data/lib/wrnap/graphing/r.rb
CHANGED
|
@@ -9,52 +9,52 @@ module Wrnap
|
|
|
9
9
|
raise unless e.message == "Unsupported data type on R's end"
|
|
10
10
|
end
|
|
11
11
|
end
|
|
12
|
-
|
|
12
|
+
|
|
13
13
|
def overlay(data, title: nil, type: ?l, x_label: "Independent", y_label: "Dependent", legend: "topleft", filename: false)
|
|
14
14
|
# data: [{ data: [[x_0, y_0], ..., [x_n, y_n]], legend: "Line 1" }, ...]
|
|
15
|
-
|
|
15
|
+
|
|
16
16
|
x_points = data.map { |hash| hash[:data].map(&:first) }
|
|
17
17
|
y_points = data.map { |hash| hash[:data].map(&:last) }
|
|
18
18
|
x_range = Range.new(x_points.map(&:min).min.floor, x_points.map(&:max).max.ceil)
|
|
19
19
|
y_range = Range.new(y_points.map(&:min).min.floor, y_points.map(&:max).max.ceil)
|
|
20
|
-
|
|
20
|
+
|
|
21
21
|
graph do |r|
|
|
22
22
|
r.eval("%s('%s', 6, 6)" % [
|
|
23
|
-
writing_file?(filename) ?
|
|
24
|
-
writing_file?(filename) ? filename : "Graph",
|
|
23
|
+
writing_file?(filename) ? filetype(filename) : "quartz",
|
|
24
|
+
writing_file?(filename) ? filename : "Graph",
|
|
25
25
|
])
|
|
26
|
-
|
|
26
|
+
|
|
27
27
|
r.assign("legend.titles", data.each_with_index.map { |hash, index| hash[:legend] || "Line #{index + 1}" })
|
|
28
28
|
r.eval("line.colors <- rainbow(%d)" % data.size)
|
|
29
29
|
r.eval("plot(0, 0, type = 'n', cex = .75, cex.axis = .9, xlab = '', ylab = '', xlim = c(%d, %d), ylim = c(%d, %d))" % [
|
|
30
30
|
x_range.min, x_range.max, y_range.min, y_range.max
|
|
31
31
|
])
|
|
32
|
-
|
|
32
|
+
|
|
33
33
|
data.each_with_index do |hash, index|
|
|
34
34
|
r.assign("line_graph.x.%d" % index, x_points[index])
|
|
35
35
|
r.assign("line_graph.y.%d" % index, y_points[index])
|
|
36
|
-
|
|
36
|
+
|
|
37
37
|
r.eval <<-STR
|
|
38
38
|
lines(
|
|
39
|
-
line_graph.x.#{index},
|
|
40
|
-
line_graph.y.#{index},
|
|
39
|
+
line_graph.x.#{index},
|
|
40
|
+
line_graph.y.#{index},
|
|
41
41
|
col = line.colors[#{index + 1}],
|
|
42
42
|
type = "#{type}",
|
|
43
43
|
pch = #{index}
|
|
44
44
|
)
|
|
45
45
|
STR
|
|
46
46
|
end
|
|
47
|
-
|
|
47
|
+
|
|
48
48
|
r.eval <<-STR
|
|
49
49
|
title(
|
|
50
|
-
xlab = #{expressionify(x_label)},
|
|
51
|
-
ylab = #{expressionify(y_label)},
|
|
50
|
+
xlab = #{expressionify(x_label)},
|
|
51
|
+
ylab = #{expressionify(y_label)},
|
|
52
52
|
main = #{expressionify(title || "Line Graph")},
|
|
53
53
|
cex.main = .9,
|
|
54
54
|
cex.lab = .9
|
|
55
55
|
)
|
|
56
56
|
STR
|
|
57
|
-
|
|
57
|
+
|
|
58
58
|
if legend
|
|
59
59
|
r.eval <<-STR
|
|
60
60
|
legend(
|
|
@@ -69,7 +69,7 @@ module Wrnap
|
|
|
69
69
|
)
|
|
70
70
|
STR
|
|
71
71
|
end
|
|
72
|
-
|
|
72
|
+
|
|
73
73
|
r.eval("dev.off()") if writing_file?(filename)
|
|
74
74
|
end
|
|
75
75
|
end
|
|
@@ -77,84 +77,84 @@ module Wrnap
|
|
|
77
77
|
def line_graph(data, title: nil, type: ?l, x_label: "Independent", y_label: "Dependent", filename: false)
|
|
78
78
|
overlay([{ data: data }], title: title, type: type, x_label: x_label, y_label: y_label, legend: false, filename: filename)
|
|
79
79
|
end
|
|
80
|
-
|
|
80
|
+
|
|
81
81
|
def scatterplot(data, title: nil, x_label: "Independent", y_label: "Dependent", filename: false)
|
|
82
82
|
line_graph(data, title: title || "Scatterplot", type: ?p, x_label: x_label, y_label: y_label, filename: filename)
|
|
83
83
|
end
|
|
84
|
-
|
|
84
|
+
|
|
85
85
|
def roc(data, title: nil, baseline: true, filename: false)
|
|
86
86
|
# data: [[-0.894, 1.0], [-0.950, 1.0], [0.516, -1.0], ..., [0.815, -1.0], [0.740, -1.0]]
|
|
87
87
|
auc = ROC.auc(data)
|
|
88
88
|
title_with_auc = title ? "%s (AUC: %.4f)" % [title, auc] : "AUC: %.4f" % auc
|
|
89
89
|
overlay(
|
|
90
|
-
[{ data: ROC.curve_points(data) }, { data: [[0, 0], [1, 1]] }],
|
|
91
|
-
title: title_with_auc,
|
|
92
|
-
x_label: "False positive rate",
|
|
93
|
-
y_label: "True positive rate",
|
|
94
|
-
legend: false,
|
|
90
|
+
[{ data: ROC.curve_points(data) }, { data: [[0, 0], [1, 1]] }],
|
|
91
|
+
title: title_with_auc,
|
|
92
|
+
x_label: "False positive rate",
|
|
93
|
+
y_label: "True positive rate",
|
|
94
|
+
legend: false,
|
|
95
95
|
filename: filename
|
|
96
96
|
)
|
|
97
97
|
end
|
|
98
|
-
|
|
98
|
+
|
|
99
99
|
def roc_overlay(data, title: nil, auc_in_legend: true, filename: false)
|
|
100
100
|
# [{ data: [[-0.894, 1.0], [-0.950, 1.0], [0.516, -1.0], ..., [0.815, -1.0], [0.740, -1.0]], legend: "ROC 1" }, ...]
|
|
101
101
|
formatted_data = data.map do |hash|
|
|
102
102
|
curve_points = ROC.curve_points(hash[:data])
|
|
103
|
-
|
|
103
|
+
|
|
104
104
|
if auc_in_legend
|
|
105
105
|
auc = ROC.auc(hash[:data])
|
|
106
106
|
legend = hash[:legend] ? "%s (AUC: %.4f)" % [hash[:legend], auc] : "AUC: %.4f" % auc
|
|
107
|
-
|
|
107
|
+
|
|
108
108
|
hash.merge({ data: curve_points, legend: legend })
|
|
109
109
|
else
|
|
110
110
|
hash.merge({ data: curve_points })
|
|
111
111
|
end
|
|
112
112
|
end
|
|
113
|
-
|
|
114
|
-
|
|
113
|
+
|
|
114
|
+
|
|
115
115
|
overlay(
|
|
116
|
-
formatted_data,
|
|
117
|
-
title: title,
|
|
118
|
-
x_label: "False positive rate",
|
|
119
|
-
y_label: "True positive rate",
|
|
120
|
-
legend: "bottomright",
|
|
116
|
+
formatted_data,
|
|
117
|
+
title: title,
|
|
118
|
+
x_label: "False positive rate",
|
|
119
|
+
y_label: "True positive rate",
|
|
120
|
+
legend: "bottomright",
|
|
121
121
|
filename: filename
|
|
122
122
|
)
|
|
123
123
|
end
|
|
124
|
-
|
|
124
|
+
|
|
125
125
|
def histogram(data, title: nil, x_label: "Bins", num_bins: false, bin_size: 1, x_arrow: false, relative: false, filename: false)
|
|
126
126
|
half = bin_size / 2.0
|
|
127
127
|
range = Range.new((data.min - half).floor, (data.max + half).ceil)
|
|
128
128
|
breaks = num_bins ? num_bins : (range.min + half).step(range.max + half, bin_size).to_a
|
|
129
|
-
|
|
129
|
+
|
|
130
130
|
graph do |r|
|
|
131
131
|
r.assign("histogram.data", data)
|
|
132
132
|
r.assign("histogram.breaks", breaks)
|
|
133
|
-
|
|
133
|
+
|
|
134
134
|
r.eval("%s('%s', 6, 6)" % [
|
|
135
|
-
writing_file?(filename) ?
|
|
136
|
-
writing_file?(filename) ? filename : "
|
|
135
|
+
writing_file?(filename) ? filetype(filename) : "quartz",
|
|
136
|
+
writing_file?(filename) ? filename : "Graph",
|
|
137
137
|
])
|
|
138
|
-
|
|
138
|
+
|
|
139
139
|
r.eval <<-STR
|
|
140
140
|
hist(
|
|
141
|
-
histogram.data,
|
|
142
|
-
breaks = histogram.breaks,
|
|
143
|
-
xlab = #{expressionify(x_label)},
|
|
144
|
-
main = #{expressionify(title || "Histogram")},
|
|
141
|
+
histogram.data,
|
|
142
|
+
breaks = histogram.breaks,
|
|
143
|
+
xlab = #{expressionify(x_label)},
|
|
144
|
+
main = #{expressionify(title || "Histogram")},
|
|
145
145
|
freq = #{relative ? 'F' : 'T'},
|
|
146
146
|
cex.main = 0.9,
|
|
147
147
|
cex.lab = 0.9,
|
|
148
148
|
cex.axis = 0.9
|
|
149
149
|
)
|
|
150
150
|
STR
|
|
151
|
-
|
|
151
|
+
|
|
152
152
|
r.eval("abline(v = #{x_arrow}, lty = 'dashed')") if x_arrow
|
|
153
|
-
|
|
153
|
+
|
|
154
154
|
r.eval("dev.off()") if writing_file?(filename)
|
|
155
155
|
end
|
|
156
156
|
end
|
|
157
|
-
|
|
157
|
+
|
|
158
158
|
def matrix_heatmap(x, y, z, title: nil, x_label: "Column index", y_label: "Row index", filename: false, num_colors: 64)
|
|
159
159
|
graph do |r|
|
|
160
160
|
if r.pull("ifelse('Matrix' %in% rownames(installed.packages()), 1, -1)") > 0
|
|
@@ -163,7 +163,7 @@ module Wrnap
|
|
|
163
163
|
y << [x, y].map(&:max).max
|
|
164
164
|
z << 0
|
|
165
165
|
end
|
|
166
|
-
|
|
166
|
+
|
|
167
167
|
r.assign("matrix.i", x)
|
|
168
168
|
r.assign("matrix.j", y)
|
|
169
169
|
r.assign("matrix.x", z)
|
|
@@ -176,16 +176,16 @@ module Wrnap
|
|
|
176
176
|
index1 = F
|
|
177
177
|
)
|
|
178
178
|
STR
|
|
179
|
-
|
|
179
|
+
|
|
180
180
|
generate_graph("Heatmap") do
|
|
181
181
|
<<-STR
|
|
182
182
|
filtered.values <- Filter(function(i) { is.finite(i) & i != 0 }, matrix.x)
|
|
183
183
|
print(apply(as.matrix(matrix.data), 2, rev))
|
|
184
184
|
print(c(sort(filtered.values)[2], max(filtered.values)))
|
|
185
|
-
|
|
185
|
+
|
|
186
186
|
image(
|
|
187
|
-
x = 1:max(c(dim(matrix.data)[[1]], dim(matrix.data)[[2]])),
|
|
188
|
-
y = 1:max(c(dim(matrix.data)[[1]], dim(matrix.data)[[2]])),
|
|
187
|
+
x = 1:max(c(dim(matrix.data)[[1]], dim(matrix.data)[[2]])),
|
|
188
|
+
y = 1:max(c(dim(matrix.data)[[1]], dim(matrix.data)[[2]])),
|
|
189
189
|
z = as.matrix(matrix.data),
|
|
190
190
|
col = rev(heat.colors(#{num_colors})),
|
|
191
191
|
zlim = #{forced_square ? "c(sort(filtered.values)[2], max(filtered.values))" : "c(min(filtered.values), max(filtered.values))"},
|
|
@@ -200,30 +200,37 @@ module Wrnap
|
|
|
200
200
|
end
|
|
201
201
|
end
|
|
202
202
|
end
|
|
203
|
-
|
|
203
|
+
|
|
204
204
|
private
|
|
205
|
-
|
|
205
|
+
|
|
206
206
|
def generate_graph(window_title = "ViennaRNA Graph in R", &block)
|
|
207
207
|
r, filename = block.binding.eval("[r, filename]")
|
|
208
|
-
|
|
208
|
+
|
|
209
209
|
r.eval("%s('%s', 6, 6)" % [
|
|
210
|
-
writing_file?(filename) ?
|
|
211
|
-
writing_file?(filename) ? filename :
|
|
210
|
+
writing_file?(filename) ? filetype(filename) : "quartz",
|
|
211
|
+
writing_file?(filename) ? filename : "Graph",
|
|
212
212
|
])
|
|
213
|
-
|
|
213
|
+
|
|
214
214
|
r.eval(yield)
|
|
215
|
-
|
|
215
|
+
|
|
216
216
|
r.eval("dev.off()") if writing_file?(filename)
|
|
217
217
|
end
|
|
218
|
-
|
|
218
|
+
|
|
219
|
+
def filetype(filename)
|
|
220
|
+
case File.extname(filename || "")
|
|
221
|
+
when ".pdf" then :pdf
|
|
222
|
+
when ".eps" then :eps
|
|
223
|
+
else raise(ArgumentError.new("You must explicitly provide an extension of .pdf or .eps so we know what type of file to make.")) end
|
|
224
|
+
end
|
|
225
|
+
|
|
219
226
|
def writing_file?(filename)
|
|
220
|
-
filename
|
|
227
|
+
filename
|
|
221
228
|
end
|
|
222
|
-
|
|
229
|
+
|
|
223
230
|
def expressionify(string)
|
|
224
231
|
%w|expression paste|.any?(&string.method(:start_with?)) ? string : string.inspect
|
|
225
232
|
end
|
|
226
233
|
end
|
|
227
234
|
end
|
|
228
235
|
end
|
|
229
|
-
end
|
|
236
|
+
end
|
data/lib/wrnap/package/base.rb
CHANGED
|
@@ -5,8 +5,8 @@ module Wrnap
|
|
|
5
5
|
end
|
|
6
6
|
|
|
7
7
|
class Base
|
|
8
|
-
include Wrnap::Global::
|
|
9
|
-
include Wrnap::Global::
|
|
8
|
+
include Wrnap::Global::Runner
|
|
9
|
+
include Wrnap::Global::Chainer
|
|
10
10
|
|
|
11
11
|
class_attribute :executable_name
|
|
12
12
|
self.executable_name = ->(context) { "RNA#{context.class.name.split('::').last.underscore}" }
|
|
@@ -3,7 +3,7 @@ module Wrnap
|
|
|
3
3
|
class Population < Base
|
|
4
4
|
THREE_COLUMN_REGEX = /^([+-]\d+\.\d+\t){2}[+-]\d+\.\d+$/
|
|
5
5
|
|
|
6
|
-
attr_reader :str_1_to_str_2, :str_1_to_str_1
|
|
6
|
+
attr_reader :str_1_to_str_2, :str_1_to_str_1, :equilibrium
|
|
7
7
|
|
|
8
8
|
self.default_flags = ->(context, flags) do
|
|
9
9
|
{
|
|
@@ -82,9 +82,13 @@ module Wrnap
|
|
|
82
82
|
end
|
|
83
83
|
|
|
84
84
|
def post_process
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
85
|
+
if flags.include?("-spectral-e")
|
|
86
|
+
@equilibrium = 10 ** response.strip.to_f
|
|
87
|
+
else
|
|
88
|
+
time_points, str_1_to_str_2, str_1_to_str_1 = response.split(/\n/).select { |line| line =~ THREE_COLUMN_REGEX }.map { |line| line.split(/\t/).map(&:to_f) }.transpose
|
|
89
|
+
@str_1_to_str_2 = PopulationProportion.new(time_points, str_1_to_str_2)
|
|
90
|
+
@str_1_to_str_1 = PopulationProportion.new(time_points, str_1_to_str_1)
|
|
91
|
+
end
|
|
88
92
|
end
|
|
89
93
|
end
|
|
90
94
|
end
|
data/lib/wrnap/version.rb
CHANGED
data/lib/wrnap.rb
CHANGED
|
@@ -6,15 +6,19 @@ require "rinruby"
|
|
|
6
6
|
require "tempfile"
|
|
7
7
|
require "bigdecimal"
|
|
8
8
|
require "rroc"
|
|
9
|
+
require "bio"
|
|
10
|
+
require "entrez"
|
|
9
11
|
require "active_support/inflector"
|
|
10
12
|
require "active_support/core_ext/class"
|
|
11
13
|
|
|
12
14
|
require "wrnap/version"
|
|
13
|
-
require "wrnap/global/
|
|
15
|
+
require "wrnap/global/rna/extensions"
|
|
14
16
|
require "wrnap/global/rna"
|
|
17
|
+
require "wrnap/global/rna/context"
|
|
18
|
+
require "wrnap/global/chainer"
|
|
19
|
+
require "wrnap/global/entrez"
|
|
15
20
|
require "wrnap/global/parser"
|
|
16
|
-
require "wrnap/global/
|
|
17
|
-
require "wrnap/global/chain_extensions"
|
|
21
|
+
require "wrnap/global/runner"
|
|
18
22
|
require "wrnap/graphing/r"
|
|
19
23
|
require "wrnap/package/base"
|
|
20
24
|
|
data/wrnap.gemspec
CHANGED
|
@@ -27,4 +27,6 @@ Gem::Specification.new do |spec|
|
|
|
27
27
|
spec.add_runtime_dependency "shuffle", "~> 0.1"
|
|
28
28
|
spec.add_runtime_dependency "rinruby", "~> 2.0"
|
|
29
29
|
spec.add_runtime_dependency "rroc", "~> 0.1"
|
|
30
|
+
spec.add_runtime_dependency "bio", "~> 1.4"
|
|
31
|
+
spec.add_runtime_dependency "entrez", "~> 0.5"
|
|
30
32
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: wrnap
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Evan Senter
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2014-
|
|
11
|
+
date: 2014-05-06 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -94,6 +94,34 @@ dependencies:
|
|
|
94
94
|
- - "~>"
|
|
95
95
|
- !ruby/object:Gem::Version
|
|
96
96
|
version: '0.1'
|
|
97
|
+
- !ruby/object:Gem::Dependency
|
|
98
|
+
name: bio
|
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
|
100
|
+
requirements:
|
|
101
|
+
- - "~>"
|
|
102
|
+
- !ruby/object:Gem::Version
|
|
103
|
+
version: '1.4'
|
|
104
|
+
type: :runtime
|
|
105
|
+
prerelease: false
|
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
107
|
+
requirements:
|
|
108
|
+
- - "~>"
|
|
109
|
+
- !ruby/object:Gem::Version
|
|
110
|
+
version: '1.4'
|
|
111
|
+
- !ruby/object:Gem::Dependency
|
|
112
|
+
name: entrez
|
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
|
114
|
+
requirements:
|
|
115
|
+
- - "~>"
|
|
116
|
+
- !ruby/object:Gem::Version
|
|
117
|
+
version: '0.5'
|
|
118
|
+
type: :runtime
|
|
119
|
+
prerelease: false
|
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
121
|
+
requirements:
|
|
122
|
+
- - "~>"
|
|
123
|
+
- !ruby/object:Gem::Version
|
|
124
|
+
version: '0.5'
|
|
97
125
|
description: ''
|
|
98
126
|
email:
|
|
99
127
|
- evansenter@gmail.com
|
|
@@ -107,11 +135,13 @@ files:
|
|
|
107
135
|
- README.md
|
|
108
136
|
- Rakefile
|
|
109
137
|
- lib/wrnap.rb
|
|
110
|
-
- lib/wrnap/global/
|
|
138
|
+
- lib/wrnap/global/chainer.rb
|
|
139
|
+
- lib/wrnap/global/entrez.rb
|
|
111
140
|
- lib/wrnap/global/parser.rb
|
|
112
141
|
- lib/wrnap/global/rna.rb
|
|
113
|
-
- lib/wrnap/global/
|
|
114
|
-
- lib/wrnap/global/
|
|
142
|
+
- lib/wrnap/global/rna/context.rb
|
|
143
|
+
- lib/wrnap/global/rna/extensions.rb
|
|
144
|
+
- lib/wrnap/global/runner.rb
|
|
115
145
|
- lib/wrnap/graphing/r.rb
|
|
116
146
|
- lib/wrnap/package/base.rb
|
|
117
147
|
- lib/wrnap/package/energy_grid_2d.rb
|