wrnap 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ce3134651886e323b97794153a83b97ada50a5db
4
- data.tar.gz: a01c0eb283ca43609e06997deee79787dc98de00
3
+ metadata.gz: 29f26144274a4bd80974e243be7cf969b5f2f49a
4
+ data.tar.gz: 79793475465dacb80644623212f352010a07c910
5
5
  SHA512:
6
- metadata.gz: 4c5ed38ddcc0570d2ec46f41e46332d4e71d5b5c756f3c027c8e9e2afddca3e8f7766912246af8cf39bd44c7b78c2a89983a4e9af629902ccaebbff4f576ca1c
7
- data.tar.gz: 91d22c1f48cc245ba1135324f874858d8f2ffc0834e4b9d0383d44ed8425ecf53a3070ca8e189d9a9cbd2ce467803022185eb26deb1f1c948e2caca7c5ec97c1
6
+ metadata.gz: e614c3d69a0dbe91fe8c623c0246e90ddf0fa22fc2f31dccd847fd4bc6394899f6726e606e773927222e6d7707d1261fb2d1f71d952468fe27230cb140704806
7
+ data.tar.gz: 39640135b688f94e711bc956ea81dad208008a1a0a5bc5ca21ce6515c9641b6d66d1b5a86648fe34a97a8d436385c908b9c75dd2d6aa7a5b6ed71e962eaad545
@@ -2,15 +2,34 @@ module Wrnap
2
2
  module Etl
3
3
  module Infernal
4
4
  class << self
5
+ def parse_file(file)
6
+ output = File.exist?(file) ? File.read(file) : file
7
+
8
+ if output =~ /No hits detected that satisfy reporting thresholds/
9
+ []
10
+ else
11
+ output.
12
+ gsub(/^(.*\n)*Hit alignments:\n/, "").
13
+ gsub(/Internal CM pipeline statistics summary:\n(.*\n)*$/, "").
14
+ strip.split(?\n).reject(&:empty?).each_slice(10).map { |lines| parse_hit(lines.join(?\n)) }
15
+ end
16
+ end
17
+
5
18
  def parse_hit(output)
19
+ name = if output =~ (identifier_regex = />>\s+(\S+)(.*\n){3}.*\s(\d+)\s+(\d+)\s+[\+-].*\n/)
20
+ # This is a pretty fancy regex, and there's no guarantee that the data has this info, so let's just test the waters here.
21
+ _, id, _, seq_from, seq_to, _ = output.match(identifier_regex).to_a
22
+ "%s %d %d" % [id.split(?|).last, seq_from, seq_to]
23
+ end
24
+
6
25
  Stockholm.fit_structure_to_sequence(
7
26
  *output.split(?\n).as do |infernal|
8
27
  [
9
- infernal.select { |line| line =~ /^.*\d+\s+.*\s+\d+\s*$/ }.last.match(/^.*\s+(.*)\s+\d+\s*$/)[1].upcase.gsub(/[^AUGC]/, ?.),
28
+ infernal.select { |line| line =~ /^.*\d+\s+.*\s+\d+\s*$/ }.last.match(/^.*\s+(\S+)\s+\d+\s*$/)[1].upcase.gsub(/[^AUGC]/, ?.),
10
29
  convert_infernal_to_dot_bracket(infernal.find { |line| line =~ /CS\s*$/ }.gsub(/\s+CS\s*$/, "").strip)
11
30
  ]
12
31
  end
13
- )
32
+ ).tap { |rna| rna.comment = name if name }
14
33
  end
15
34
 
16
35
  def convert_infernal_to_dot_bracket(structure)
@@ -5,27 +5,29 @@ module Wrnap
5
5
  attr_reader :accession, :from, :to, :coord_options
6
6
 
7
7
  class << self
8
- def init_from_entrez(accession, from, to, options = {})
8
+ def init_from_entrez(accession, from, to, options = {}, &block)
9
9
  new(
10
10
  accession: accession,
11
11
  from: from,
12
12
  to: to,
13
- options: options
13
+ options: options,
14
+ &block
14
15
  )
15
16
  end
16
17
 
17
- def init_from_string(sequence, accession, from, to, options = {})
18
+ def init_from_string(sequence, accession, from, to, options = {}, &block)
18
19
  new(
19
20
  sequence: sequence,
20
21
  accession: accession,
21
22
  from: from,
22
23
  to: to,
23
- options: options
24
+ options: options,
25
+ &block
24
26
  )
25
27
  end
26
28
  end
27
29
 
28
- def initialize(sequence: nil, accession: nil, from: nil, to: nil, options: {})
30
+ def initialize(sequence: nil, accession: nil, from: nil, to: nil, options: {}, &block)
29
31
  options = { coords: {}, rna: {} }.merge(options)
30
32
 
31
33
  @accession, @from, @to, @coord_options = accession, from, to, options[:coords]
@@ -36,12 +38,13 @@ module Wrnap
36
38
  @raw_sequence = (sequence.is_a?(String) ? Bio::Sequence::NA.new(sequence) : sequence).upcase
37
39
  end
38
40
 
39
- super({
41
+ super(
40
42
  sequence: self.sequence,
41
43
  structure: options[:rna][:structure] || options[:rna][:str_1] || options[:rna][:str],
42
44
  second_structure: options[:rna][:second_structure] || options[:rna][:str_2],
43
- comment: options[:rna][:comment] || options[:rna][:name] || identifier
44
- })
45
+ comment: options[:rna][:comment] || options[:rna][:name] || identifier,
46
+ &block
47
+ )
45
48
 
46
49
  remove_instance_variable(:@sequence)
47
50
  end
@@ -1,113 +1,115 @@
1
1
  module Wrnap
2
2
  module Global
3
- module Extensions
4
- def self.included(base)
5
- base.send(:include, InstanceMethods)
6
- base.extend(ClassMethods)
7
- base.extend(OneStructureBasedMethods)
8
- base.extend(TwoStructureBasedMethods)
9
-
10
- base.class_eval do
11
- OneStructureBasedMethods.public_instance_methods.each do |class_method|
12
- define_method(class_method) do |*args|
13
- self.class.send(class_method, *[structure].concat(args))
3
+ class Rna
4
+ module Extensions
5
+ def self.included(base)
6
+ base.send(:include, InstanceMethods)
7
+ base.extend(ClassMethods)
8
+ base.extend(OneStructureBasedMethods)
9
+ base.extend(TwoStructureBasedMethods)
10
+
11
+ base.class_eval do
12
+ OneStructureBasedMethods.public_instance_methods.each do |class_method|
13
+ define_method(class_method) do |*args|
14
+ self.class.send(class_method, *[structure].concat(args))
15
+ end
14
16
  end
15
- end
16
17
 
17
- TwoStructureBasedMethods.public_instance_methods.each do |class_method|
18
- define_method(class_method) do |*args|
19
- self.class.send(class_method, *[str_1, str_2].concat(args))
18
+ TwoStructureBasedMethods.public_instance_methods.each do |class_method|
19
+ define_method(class_method) do |*args|
20
+ self.class.send(class_method, *[str_1, str_2].concat(args))
21
+ end
20
22
  end
21
23
  end
22
- end
23
-
24
- base.send(:include, InstanceMethods)
25
- end
26
24
 
27
- module ClassMethods
28
- def generate_sequence(sequence_length)
29
- # 0th order Markov chain w/ uniform probability distribution
30
- Rna.init_from_string(sequence_length.times.inject("") { |string, _| string + %w[A U C G][rand(4)] })
25
+ base.send(:include, InstanceMethods)
31
26
  end
32
27
 
33
- def shuffle(sequence, token_length = 2)
34
- Shuffle.new(sequence).shuffle(token_length)
35
- end
28
+ module ClassMethods
29
+ def generate_sequence(sequence_length)
30
+ # 0th order Markov chain w/ uniform probability distribution
31
+ Rna.init_from_string(sequence_length.times.inject("") { |string, _| string + %w[A U C G][rand(4)] })
32
+ end
36
33
 
37
- def structure_from_bp_list(length, base_pairs)
38
- base_pairs.to_a.map(&:to_a).map(&:sort).inject("." * length) { |structure, (i, j)| structure.tap { structure[i] = ?(; structure[j] = ?) } }
39
- end
40
- end
34
+ def shuffle(sequence, token_length = 2)
35
+ Shuffle.new(sequence).shuffle(token_length)
36
+ end
41
37
 
42
- module InstanceMethods
43
- def dishuffle
44
- self.class.shuffle(sequence, 2)
38
+ def structure_from_bp_list(length, base_pairs)
39
+ base_pairs.to_a.map(&:to_a).map(&:sort).inject("." * length) { |structure, (i, j)| structure.tap { structure[i] = ?(; structure[j] = ?) } }
40
+ end
45
41
  end
46
42
 
47
- def gc_content
48
- seq.split(//).select { |i| i =~ /[GC]/i }.size.to_f / seq.size
49
- end
43
+ module InstanceMethods
44
+ def dishuffle
45
+ self.class.shuffle(sequence, 2)
46
+ end
50
47
 
51
- def boltzmann_probability(dangle: 2)
52
- Math.exp(-run(:eval, d: dangle).mfe / Wrnap::RT) / Math.exp(-run(:fold, d: dangle, p: 0).ensemble_energy / Wrnap::RT)
48
+ def gc_content
49
+ seq.split(//).select { |i| i =~ /[GC]/i }.size.to_f / seq.size
50
+ end
51
+
52
+ def boltzmann_probability(dangle: 2)
53
+ Math.exp(-run(:eval, d: dangle).mfe / Wrnap::RT) / Math.exp(-run(:fold, d: dangle, p: 0).ensemble_energy / Wrnap::RT)
54
+ end
53
55
  end
54
- end
55
56
 
56
- module OneStructureBasedMethods
57
- def helices(structure)
58
- array = base_pairs(structure).sort_by(&:first).map(&:to_a)
57
+ module OneStructureBasedMethods
58
+ def helices(structure)
59
+ array = base_pairs(structure).sort_by(&:first).map(&:to_a)
59
60
 
60
- unless array.empty?
61
- array[1..-1].inject([[array.first]]) do |bins, (i, j)|
62
- bins.tap { bins[-1][-1] == [i - 1, j + 1] ? bins[-1] << [i, j] : bins << [[i, j]] }
61
+ unless array.empty?
62
+ array[1..-1].inject([[array.first]]) do |bins, (i, j)|
63
+ bins.tap { bins[-1][-1] == [i - 1, j + 1] ? bins[-1] << [i, j] : bins << [[i, j]] }
64
+ end
65
+ else
66
+ []
63
67
  end
64
- else
65
- []
66
68
  end
67
- end
68
69
 
69
- def max_bp_distance(structure)
70
- base_pairs(structure).count + ((structure.length - 3) / 2.0).floor
71
- end
70
+ def max_bp_distance(structure)
71
+ base_pairs(structure).count + ((structure.length - 3) / 2.0).floor
72
+ end
72
73
 
73
- def base_pairs(structure)
74
- get_pairings(structure).each_with_index.inject(Set.new) do |set, (j, i)|
75
- j >= 0 ? set << Set[i, j] : set
74
+ def base_pairs(structure)
75
+ get_pairings(structure).each_with_index.inject(Set.new) do |set, (j, i)|
76
+ j >= 0 ? set << Set[i, j] : set
77
+ end
76
78
  end
77
- end
78
79
 
79
- def get_pairings(structure)
80
- stack = []
81
-
82
- structure.each_char.each_with_index.inject(Array.new(structure.length, -1)) do |array, (symbol, index)|
83
- array.tap do
84
- case symbol
85
- when "(" then stack.push(index)
86
- when ")" then
87
- if stack.empty?
88
- raise "Too many ')' in '#{structure}'"
89
- else
90
- stack.pop.tap do |opening|
91
- array[opening] = index
92
- array[index] = opening
93
- end
94
- end
95
- end
96
- end
97
- end.tap do
98
- raise "Too many '(' in '#{structure}'" unless stack.empty?
99
- end
80
+ def get_pairings(structure)
81
+ stack = []
82
+
83
+ structure.each_char.each_with_index.inject(Array.new(structure.length, -1)) do |array, (symbol, index)|
84
+ array.tap do
85
+ case symbol
86
+ when "(" then stack.push(index)
87
+ when ")" then
88
+ if stack.empty?
89
+ raise "Too many ')' in '#{structure}'"
90
+ else
91
+ stack.pop.tap do |opening|
92
+ array[opening] = index
93
+ array[index] = opening
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end.tap do
99
+ raise "Too many '(' in '#{structure}'" unless stack.empty?
100
+ end
101
+ end
100
102
  end
101
- end
102
103
 
103
- module TwoStructureBasedMethods
104
- def bp_distance(structure_1, structure_2)
105
- # Takes two structures and calculates the distance between them by |symmetric difference(bp_in_a, bp_in_b)|
106
- raise "The two structures are not the same length" unless structure_1.length == structure_2.length
104
+ module TwoStructureBasedMethods
105
+ def bp_distance(structure_1, structure_2)
106
+ # Takes two structures and calculates the distance between them by |symmetric difference(bp_in_a, bp_in_b)|
107
+ raise "The two structures are not the same length" unless structure_1.length == structure_2.length
107
108
 
108
- bp_set_1, bp_set_2 = base_pairs(structure_1), base_pairs(structure_2)
109
+ bp_set_1, bp_set_2 = base_pairs(structure_1), base_pairs(structure_2)
109
110
 
110
- ((bp_set_1 - bp_set_2) + (bp_set_2 - bp_set_1)).count
111
+ ((bp_set_1 - bp_set_2) + (bp_set_2 - bp_set_1)).count
112
+ end
111
113
  end
112
114
  end
113
115
  end
@@ -0,0 +1,42 @@
1
+ module Wrnap
2
+ module Global
3
+ class Rna
4
+ module Metadata
5
+ def self.included(base)
6
+ base.send(:include, InstanceMethods)
7
+ end
8
+
9
+ module InstanceMethods
10
+ def self.included(base)
11
+ base.class_eval do
12
+ def_delegator :@metadata, :data, :md
13
+ end
14
+ end
15
+
16
+ def meta(&block)
17
+ metadata.tap { metadata.instance_eval(&block) if block_given? }
18
+ end
19
+
20
+ def meta_rna(&block)
21
+ metadata.__rna__.tap { meta(&block) }
22
+ end
23
+ end
24
+
25
+ class Container
26
+ attr_reader :__rna__, :data
27
+
28
+ def initialize(rna)
29
+ @__rna__, @data = rna, {}
30
+ end
31
+
32
+ def method_missing(name, *args, &block)
33
+ case args.size
34
+ when 0 then data[name]
35
+ when 1 then data[name.to_s.gsub(/=$/, "").to_sym] = args.first
36
+ else super end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -1,43 +1,47 @@
1
1
  module Wrnap
2
2
  module Global
3
3
  class Rna
4
+ extend Forwardable
4
5
  include Extensions
6
+ include Metadata
5
7
 
6
8
  CANONICAL_BASES = Set.new << Set.new([?G, ?C]) << Set.new([?A, ?U]) << Set.new([?G, ?U])
7
9
 
8
10
  attr_accessor :comment
9
- attr_reader :sequence, :structure, :second_structure
11
+ attr_reader :sequence, :structure, :second_structure, :metadata
10
12
 
11
13
  class << self
12
- def init_from_string(sequence, structure = nil, second_structure = nil, comment = nil)
14
+ def init_from_string(sequence, structure = nil, second_structure = nil, comment = nil, &block)
13
15
  new(
14
16
  sequence: sequence,
15
17
  structure: structure,
16
18
  second_structure: second_structure,
17
- comment: comment
19
+ comment: comment,
20
+ &block
18
21
  )
19
22
  end
20
23
 
21
- def init_from_hash(hash)
24
+ def init_from_hash(hash, &block)
22
25
  new(
23
26
  sequence: hash[:sequence] || hash[:seq],
24
27
  structure: hash[:structure] || hash[:str_1] || hash[:str],
25
28
  second_structure: hash[:second_structure] || hash[:str_2],
26
- comment: hash[:comment] || hash[:name]
29
+ comment: hash[:comment] || hash[:name],
30
+ &block
27
31
  )
28
32
  end
29
33
 
30
- def init_from_array(array)
31
- init_from_string(*array)
34
+ def init_from_array(array, &block)
35
+ init_from_string(*array, &block)
32
36
  end
33
37
 
34
- def init_from_fasta(string)
38
+ def init_from_fasta(string, &block)
35
39
  if File.exist?(string)
36
40
  comment = File.basename(string, string.include?(?.) ? ".%s" % string.split(?.)[-1] : "")
37
41
  string = File.read(string).chomp
38
42
  end
39
43
 
40
- init_from_string(*string.split(/\n/).reject { |line| line.start_with?(">") }[0, 3]).tap do |rna|
44
+ init_from_string(*string.split(/\n/).reject { |line| line.start_with?(">") }[0, 3], &block).tap do |rna|
41
45
  if (line = string.split(/\n/).first).start_with?(">") && !(file_comment = line.gsub(/^>\s*/, "")).empty?
42
46
  rna.comment = file_comment
43
47
  elsif comment
@@ -46,25 +50,26 @@ module Wrnap
46
50
  end
47
51
  end
48
52
 
49
- def init_from_context(*context, coords: {}, rna: {})
50
- Context.init_from_entrez(*context, coords: coords, rna: rna)
53
+ def init_from_context(*context, coords: {}, rna: {}, &block)
54
+ Context.init_from_entrez(*context, coords: coords, rna: rna, &block)
51
55
  end
52
56
 
53
- def init_from_self(rna)
57
+ def init_from_self(rna, &block)
54
58
  # This happens when you call a Wrnap library function with the output of something like Wrnap::Fold.run(...).mfe
55
59
  new(
56
60
  sequence: rna.sequence,
57
61
  strucutre: rna.structure,
58
62
  second_strucutre: rna.second_structure,
59
- comment: rna.comment
63
+ comment: rna.comment,
64
+ &block
60
65
  )
61
66
  end
62
67
 
63
68
  alias_method :placeholder, :new
64
69
  end
65
70
 
66
- def initialize(sequence: "", structure: "", second_structure: "", comment: "")
67
- @sequence, @comment = (sequence.kind_of?(Rna) ? sequence.seq : sequence).upcase, comment
71
+ def initialize(sequence: "", structure: "", second_structure: "", comment: "", &block)
72
+ @sequence, @comment, @metadata = (sequence.kind_of?(Rna) ? sequence.seq : sequence).upcase, comment, Metadata::Container.new(self)
68
73
 
69
74
  [:structure, :second_structure].each do |structure_symbol|
70
75
  instance_variable_set(
@@ -83,6 +88,8 @@ module Wrnap
83
88
  )
84
89
  end
85
90
 
91
+ metadata.instance_eval(&block) if block_given?
92
+
86
93
  if str && seq.length != str.length
87
94
  Wrnap.debugger { "The sequence length (%d) doesn't match the structure length (%d)" % [seq, str].map(&:length) }
88
95
  end
@@ -126,13 +133,6 @@ module Wrnap
126
133
 
127
134
  alias :two_str :two_structures
128
135
 
129
- def print_full
130
- puts name if name
131
- puts seq if seq
132
- puts str_1 if str_1
133
- puts str_2 if str_2
134
- end
135
-
136
136
  def write_fa!(filename)
137
137
  filename.tap do |filename|
138
138
  File.open(filename, ?w) do |file|
@@ -162,11 +162,20 @@ module Wrnap
162
162
  else super end
163
163
  end
164
164
 
165
+ def pp
166
+ puts("> %s" % name) if name
167
+ puts("%s" % seq) if seq
168
+ puts("%s" % str_1) if str_1
169
+ puts("%s" % str_2) if str_2
170
+ puts("%s" % meta) if meta
171
+ end
172
+
165
173
  def inspect
166
174
  "#<RNA: %s>" % [
167
175
  ("#{seq[0, 20] + (seq.length > 20 ? '... [%d]' % seq.length : '')}" if seq && !seq.empty?),
168
176
  ("#{str_1[0, 20] + (str_1.length > 20 ? ' [%d]' % seq.length : '')}" if str_1 && !str_1.empty?),
169
177
  ("#{str_2[0, 20] + (str_2.length > 20 ? ' [%d]' % seq.length : '')}" if str_2 && !str_1.empty?),
178
+ (md.to_json unless md.empty?),
170
179
  (name ? name : "#{self.class.name}")
171
180
  ].compact.join(", ")
172
181
  end
data/lib/wrnap/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Wrnap
2
- VERSION = "0.10.0"
2
+ VERSION = "0.11.0"
3
3
  end
data/lib/wrnap.rb CHANGED
@@ -14,6 +14,7 @@ require "active_support/core_ext/class"
14
14
 
15
15
  require "wrnap/version"
16
16
  require "wrnap/global/rna/extensions"
17
+ require "wrnap/global/rna/metadata"
17
18
  require "wrnap/global/rna"
18
19
  require "wrnap/global/rna/context"
19
20
  require "wrnap/global/chainer"
@@ -66,23 +67,23 @@ end
66
67
 
67
68
  # This dirties up the public namespace, but I use it so many times that I want a shorthand to it
68
69
  unless defined? RNA
69
- def RNA(*args)
70
- RNA.from_array(args)
70
+ def RNA(*args, &block)
71
+ RNA.from_array(args, &block)
71
72
  end
72
73
  end
73
74
 
74
75
  module RNA
75
- def self.load_all(pattern = "*.fa")
76
- Dir[File.directory?(pattern) ? pattern + "/*.fa" : pattern].map { |file| RNA.from_fasta(file) }
76
+ def self.load_all(pattern = "*.fa", &block)
77
+ Dir[File.directory?(pattern) ? pattern + "/*.fa" : pattern].map { |file| RNA.from_fasta(file, &block) }
77
78
  end
78
79
 
79
- def self.random(size, *args)
80
- RNA.from_array(args.unshift(Wrnap::Global::Rna.generate_sequence(size).seq))
80
+ def self.random(size, *args, &block)
81
+ RNA.from_array(args.unshift(Wrnap::Global::Rna.generate_sequence(size).seq), &block)
81
82
  end
82
83
 
83
84
  def self.method_missing(name, *args, &block)
84
85
  if "#{name}" =~ /^from_\w+$/
85
- Wrnap::Global::Rna.send("init_#{name}", *args)
86
+ Wrnap::Global::Rna.send("init_#{name}", *args, &block)
86
87
  else super end
87
88
  end
88
89
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wrnap
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Evan Senter
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-03 00:00:00.000000000 Z
11
+ date: 2014-07-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -157,6 +157,7 @@ files:
157
157
  - lib/wrnap/global/rna.rb
158
158
  - lib/wrnap/global/rna/context.rb
159
159
  - lib/wrnap/global/rna/extensions.rb
160
+ - lib/wrnap/global/rna/metadata.rb
160
161
  - lib/wrnap/global/runner.rb
161
162
  - lib/wrnap/graphing/r.rb
162
163
  - lib/wrnap/package/base.rb