wrnap 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 78511600a0ab8cc9fc08c3a64c677794744fee4d
4
+ data.tar.gz: 2b940a1ccf7159bb14d5d53d22c7c58c1466a1b1
5
+ SHA512:
6
+ metadata.gz: e6e3f134d8b2724af60d26fc07b62077489844c2d74a0258fb82a3753e0c6196ec7f029f88186985bf52bc075acdea55427b7d514133c8073aa645918d73d90e
7
+ data.tar.gz: 5da6d3dfe4888a5c1b7092ab1e1e93fa0cfd8504f9153cf2fc4cc62c9a65a823f3c3eff5194a17e3481a711fbead3abdba7d28b97c34fd1e5936d4d6b49982d9
data/.gitignore ADDED
@@ -0,0 +1,22 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.bundle
19
+ *.so
20
+ *.o
21
+ *.a
22
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in wrnap.gemspec
4
+ gemspec
data/LICENSE.md ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) {{{year}}} {{{fullname}}}
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,47 @@
1
+ # Wrnap
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/wrnap.png)](http://badge.fury.io/rb/wrnap)
4
+
5
+ A simple gem for facilitating bindings to various RNA CLI packages (namely http://www.tbi.univie.ac.at/~ivo/RNA/). Note that this gem makes no effort to build and install any wrapped packages at install-time, and instead relies on its presence on the host machine. Also includes a lot of utilities surrounding RNA sequence / structure parsing, graphing using R (via RinRuby) and other analysis tools. Used privately as the foundation for much of the research I do at http://bioinformatics.bc.edu/clotelab/
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ gem 'wrnap'
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install wrnap
20
+
21
+ ## Usage
22
+
23
+ Simple use case:
24
+
25
+ > require "wrnap"
26
+ #=> true
27
+ > rna = Wrnap::Package::Fold.run(seq: "CCUCGAGGGGAACCCGAAAGGGACCCGAGAGG")
28
+ #=> #<Wrnap::Fold:0x007f9c48839dc0>
29
+ > rna.structure
30
+ #=> "((((..(((...(((....))).)))..))))"
31
+ > rna.mfe
32
+ #=> -19.7
33
+
34
+ ... now an even easier way ...
35
+
36
+ > mfe_rna = RNA("CCUCGAGGGGAACCCGAAAGGGACCCGAGAGG").run(:fold).mfe_rna
37
+ #=> echo CCUCGAGGGGAACCCGAAAGGGACCCGAGAGG | rnafold --noPS
38
+ #=> Total runtime: 0.013 sec.
39
+ #=> #<Wrnap::Rna CCUCGAGGGGAACCCGAAAG... ((((..(((...(((....) [truncated]>
40
+
41
+ ## Contributing
42
+
43
+ 1. Fork it ( https://github.com/[my-github-username]/wrnap/fork )
44
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
45
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
46
+ 4. Push to the branch (`git push origin my-new-feature`)
47
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,29 @@
1
+ module Wrnap
2
+ module Global
3
+ module ChainExtensions
4
+ def self.included(base)
5
+ base.send(:include, InstanceMethods)
6
+ end
7
+
8
+ module InstanceMethods
9
+ def chain(package, flags = {})
10
+ class_chaining_to = Wrnap::Package.lookup(package)
11
+
12
+ unless instance_variable_defined?(:@response)
13
+ raise ArgumentError.new("Can only chain a package that is not the first to be called")
14
+ end
15
+
16
+ unless class_chaining_to.instance_methods.include?(:transform_for_chaining)
17
+ raise ArgumentError.new("#{class_chaining_to.name} doesn't support chaining because it doesn't define transform_for_chaining")
18
+ end
19
+
20
+ unless [chains_from].flatten.any?(&method(:kind_of?))
21
+ raise ArgumentError.new("#{class_chaining_to.name} doesn't support chaining from #{self.class.name} because it isn't in the chains_from list")
22
+ end
23
+
24
+ class_chaining_to.new(self, chaining: true).run(flags)
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,24 @@
1
+ module Wrnap
2
+ module Global
3
+ module Parser
4
+ REGEXP = {
5
+ number: /-?\d*\.\d*/,
6
+ mfe: / \(\s*(-?\d*\.\d*)\)$/
7
+ }
8
+
9
+ class << self
10
+ def rnafold_mfe_structure(response)
11
+ response.split(/\n/)[1].split(/\s+/).first
12
+ end
13
+
14
+ def rnafold_mfe(response)
15
+ response.split(/\n/)[1].match(REGEXP[:mfe])[1].to_f
16
+ end
17
+
18
+ def rnafold_ensemble_energy(response)
19
+ response.split(/\n/)[2].split(/\s/).last.match(REGEXP[:number])[0].to_f
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,148 @@
1
+ module Wrnap
2
+ module Global
3
+ class Rna
4
+ include RnaExtensions
5
+
6
+ attr_accessor :comment
7
+ attr_reader :sequence, :structure, :second_structure
8
+
9
+ class << self
10
+ def init_from_string(sequence, structure = nil, second_structure = nil, comment = nil)
11
+ new(
12
+ sequence: sequence,
13
+ structure: structure,
14
+ second_structure: second_structure,
15
+ comment: comment
16
+ )
17
+ end
18
+
19
+ def init_from_hash(hash)
20
+ new(
21
+ sequence: hash[:sequence] || hash[:seq],
22
+ structure: hash[:structure] || hash[:str_1] || hash[:str],
23
+ second_structure: hash[:second_structure] || hash[:str_2],
24
+ comment: hash[:comment] || hash[:name]
25
+ )
26
+ end
27
+
28
+ def init_from_array(array)
29
+ init_from_string(*array)
30
+ end
31
+
32
+ def init_from_fasta(string)
33
+ if File.exist?(string)
34
+ comment = File.basename(string, string.include?(?.) ? ".%s" % string.split(?.)[-1] : "")
35
+ string = File.read(string).chomp
36
+ end
37
+
38
+ init_from_string(*string.split(/\n/).reject { |line| line.start_with?(">") }[0, 3]).tap do |rna|
39
+ if (line = string.split(/\n/).first).start_with?(">") && !(file_comment = line.gsub(/^>\s*/, "")).empty?
40
+ rna.comment = file_comment
41
+ elsif comment
42
+ rna.comment = comment
43
+ end
44
+ end
45
+ end
46
+
47
+ def init_from_self(rna)
48
+ # This happens when you call a Wrnap library function with the output of something like Wrnap::Fold.run(...).mfe
49
+ new(
50
+ sequence: rna.sequence,
51
+ strucutre: rna.structure,
52
+ second_strucutre: rna.second_structure,
53
+ comment: rna.comment
54
+ )
55
+ end
56
+
57
+ alias_method :placeholder, :new
58
+ end
59
+
60
+ def initialize(sequence: "", structure: "", second_structure: "", comment: "")
61
+ @sequence, @comment = sequence.kind_of?(Rna) ? sequence.seq : sequence, comment
62
+
63
+ [:structure, :second_structure].each do |structure_symbol|
64
+ instance_variable_set(
65
+ :"@#{structure_symbol}",
66
+ case structure_value = eval("#{structure_symbol}")
67
+ when :empty then empty_structure
68
+ when :mfe then RNA(sequence).run(:fold).mfe_rna.structure
69
+ when String then structure_value
70
+ when Hash then
71
+ if structure_value.keys.count > 1
72
+ Wrnap.debugger { "The following options hash has more than one key. This will probably produce unpredictable results: %s" % structure_value.inspect }
73
+ end
74
+
75
+ RNA(sequence).run(*structure_value.keys, *structure_value.values).mfe_rna.structure
76
+ end
77
+ )
78
+ end
79
+
80
+ if str && seq.length != str.length
81
+ Wrnap.debugger { "The sequence length (%d) doesn't match the structure length (%d)" % [seq, str].map(&:length) }
82
+ end
83
+
84
+ if str_2 && str_1.length != str_2.length
85
+ Wrnap.debugger { "The first structure length (%d) doesn't match the second structure length (%d)" % [str_1, str_2].map(&:length) }
86
+ end
87
+ end
88
+
89
+ alias :seq :sequence
90
+ alias :str :structure
91
+ alias :str_1 :structure
92
+ alias :str_2 :second_structure
93
+ alias :name :comment
94
+
95
+ def empty_structure
96
+ "." * seq.length
97
+ end
98
+
99
+ alias :empty_str :empty_structure
100
+
101
+ def one_structure(structure_1)
102
+ self.class.init_from_string(seq, structure_1.is_a?(Symbol) ? send(structure_1) : structure_1, nil, name)
103
+ end
104
+
105
+ def two_structures(structure_1, structure_2)
106
+ self.class.init_from_string(
107
+ seq,
108
+ *[structure_1, structure_2].map { |argument| argument.is_a?(Symbol) ? send(argument) : argument },
109
+ name
110
+ )
111
+ end
112
+
113
+ def write_fa!(filename)
114
+ filename.tap do |filename|
115
+ File.open(filename, ?w) do |file|
116
+ file.write("> %s\n" % name) if name
117
+ file.write("%s\n" % seq) if seq
118
+ file.write("%s\n" % str_1) if str_1
119
+ file.write("%s\n" % str_2) if str_2
120
+ end
121
+ end
122
+ end
123
+
124
+ def temp_fa_file!
125
+ write_fa!(Tempfile.new("rna")).path
126
+ end
127
+
128
+ def run(package_name, options = {})
129
+ Wrnap::Package.lookup(package_name).run(self, options)
130
+ end
131
+
132
+ def method_missing(name, *args, &block)
133
+ if (name_str = "#{name}") =~ /^run_\w+$/
134
+ run(name_str.gsub(/^run_/, ""), *args)
135
+ else super end
136
+ end
137
+
138
+ def inspect
139
+ "#<RNA: %s>" % [
140
+ ("#{seq[0, 20] + (seq.length > 20 ? '... [%d]' % seq.length : '')}" if seq && !seq.empty?),
141
+ ("#{str_1[0, 20] + (str_1.length > 20 ? ' [%d]' % seq.length : '')}" if str_1 && !str_1.empty?),
142
+ ("#{str_2[0, 20] + (str_2.length > 20 ? ' [%d]' % seq.length : '')}" if str_2 && !str_1.empty?),
143
+ (name ? name : "#{self.class.name}")
144
+ ].compact.join(", ")
145
+ end
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,99 @@
1
+ module Wrnap
2
+ module Global
3
+ module RnaExtensions
4
+ def self.included(base)
5
+ base.send(:include, InstanceMethods)
6
+ base.extend(ClassMethods)
7
+ base.extend(OneStructureBasedMethods)
8
+ base.extend(TwoStructureBasedMethods)
9
+
10
+ base.class_eval do
11
+ OneStructureBasedMethods.public_instance_methods.each do |class_method|
12
+ define_method(class_method) do |*args|
13
+ self.class.send(class_method, *[structure].concat(args))
14
+ end
15
+ end
16
+
17
+ TwoStructureBasedMethods.public_instance_methods.each do |class_method|
18
+ define_method(class_method) do |*args|
19
+ self.class.send(class_method, *[str_1, str_2].concat(args))
20
+ end
21
+ end
22
+ end
23
+
24
+ base.send(:include, InstanceMethods)
25
+ end
26
+
27
+ module ClassMethods
28
+ def generate_sequence(sequence_length)
29
+ # 0th order Markov chain w/ uniform probability distribution
30
+ Rna.init_from_string(sequence_length.times.inject("") { |string, _| string + %w[A U C G][rand(4)] })
31
+ end
32
+
33
+ def shuffle(sequence, token_length = 2)
34
+ Shuffle.new(sequence).shuffle(token_length)
35
+ end
36
+ end
37
+
38
+ module InstanceMethods
39
+ def dishuffle
40
+ self.class.shuffle(sequence, 2)
41
+ end
42
+
43
+ def gc_content
44
+ seq.split(//).select { |i| i =~ /[GC]/i }.size.to_f / seq.size
45
+ end
46
+
47
+ def boltzmann_probability(dangle: 2)
48
+ Math.exp(-run(:eval, d: dangle).mfe / Wrnap::RT) / Math.exp(-run(:fold, d: dangle, p: 0).ensemble_energy / Wrnap::RT)
49
+ end
50
+ end
51
+
52
+ module OneStructureBasedMethods
53
+ def max_bp_distance(structure)
54
+ base_pairs(structure).count + ((structure.length - 3) / 2.0).floor
55
+ end
56
+
57
+ def base_pairs(structure)
58
+ get_pairings(structure).each_with_index.inject(Set.new) do |set, (j, i)|
59
+ j >= 0 ? set << Set[i, j] : set
60
+ end
61
+ end
62
+
63
+ def get_pairings(structure)
64
+ stack = []
65
+
66
+ structure.each_char.each_with_index.inject(Array.new(structure.length, -1)) do |array, (symbol, index)|
67
+ array.tap do
68
+ case symbol
69
+ when "(" then stack.push(index)
70
+ when ")" then
71
+ if stack.empty?
72
+ raise "Too many ')' in '#{structure}'"
73
+ else
74
+ stack.pop.tap do |opening|
75
+ array[opening] = index
76
+ array[index] = opening
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end.tap do
82
+ raise "Too many '(' in '#{structure}'" unless stack.empty?
83
+ end
84
+ end
85
+ end
86
+
87
+ module TwoStructureBasedMethods
88
+ def bp_distance(structure_1, structure_2)
89
+ # Takes two structures and calculates the distance between them by |symmetric difference(bp_in_a, bp_in_b)|
90
+ raise "The two structures are not the same length" unless structure_1.length == structure_2.length
91
+
92
+ bp_set_1, bp_set_2 = base_pairs(structure_1), base_pairs(structure_2)
93
+
94
+ ((bp_set_1 - bp_set_2) + (bp_set_2 - bp_set_1)).count
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,98 @@
1
+ module Wrnap
2
+ module Global
3
+ module RunExtensions
4
+ def self.included(base)
5
+ base.send(:include, InstanceMethods)
6
+ base.extend(ClassMethods)
7
+ end
8
+
9
+ module ClassMethods
10
+ def exec_exists?(name)
11
+ !%x[which RNA#{name.to_s.downcase}].empty? || !%x[which #{name.to_s.downcase}].empty?
12
+ end
13
+
14
+ def run(*data)
15
+ flags = data.length > 1 && data.last.is_a?(Hash) ? data.pop : {}
16
+ new(data).run(flags)
17
+ end
18
+ end
19
+
20
+ module InstanceMethods
21
+ def run(flags = {})
22
+ unless response
23
+ tap do
24
+ @runtime = Benchmark.measure do
25
+ pre_run_check
26
+ merged_flags = recursively_merge_flags(flags)
27
+ runnable_command = run_command(merged_flags)
28
+
29
+ Wrnap.debugger { runnable_command }
30
+
31
+ @response = %x[#{runnable_command}]
32
+ post_process if respond_to?(:post_process)
33
+ end
34
+
35
+ Wrnap.debugger { "Total runtime: %.3f sec." % runtime.real }
36
+ end
37
+ else
38
+ self
39
+ end
40
+ end
41
+
42
+ def pre_run_check
43
+ if %x[which #{exec_name}].empty?
44
+ raise RuntimeError.new("#{exec_name} is not defined on this machine")
45
+ end
46
+ end
47
+
48
+ def exec_name
49
+ executable_name.respond_to?(:call) ? executable_name[self] : executable_name
50
+ end
51
+
52
+ def recursively_merge_flags(flags)
53
+ rmerge = ->(old_hash, new_hash) do
54
+ inner_hash = {}
55
+
56
+ old_hash.merge(new_hash) do |key, old_value, new_value|
57
+ inner_hash[key] = [old_value, new_value].map(&:class).uniq == [Hash] ? rmerge[old_value, new_value] : new_value
58
+ end
59
+ end
60
+
61
+ rmerge[base_flags(flags), flags].tap do |merged_flags|
62
+ Wrnap.debugger { "%s: %s" % [self.class.name, merged_flags.inspect] }
63
+ end
64
+ end
65
+
66
+ def base_flags(flags)
67
+ default_flags.respond_to?(:call) ? default_flags[self, flags] : default_flags
68
+ end
69
+
70
+ def run_command(flags)
71
+ "echo %s | %s %s" % [
72
+ "'%s'" % call_with.map { |datum| data.send(datum) }.join(?\n),
73
+ exec_name,
74
+ stringify_flags(flags)
75
+ ]
76
+ end
77
+
78
+ def stringify_flags(flags)
79
+ flags.inject("") do |string, (flag, value)|
80
+ parameter = if value == :empty || value.class == TrueClass
81
+ " -%s" % flag
82
+ else
83
+ if quote_flag_params.include?(flag)
84
+ " -%s '%s'" % [flag, value.to_s.gsub(/'/) { %|\'| }]
85
+ else
86
+ " -%s %s" % [flag, value]
87
+ end
88
+ end
89
+
90
+ (string + parameter).strip
91
+ end.tap do
92
+ @flags = flags
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end