wrnap 0.10.0 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ce3134651886e323b97794153a83b97ada50a5db
4
- data.tar.gz: a01c0eb283ca43609e06997deee79787dc98de00
3
+ metadata.gz: 29f26144274a4bd80974e243be7cf969b5f2f49a
4
+ data.tar.gz: 79793475465dacb80644623212f352010a07c910
5
5
  SHA512:
6
- metadata.gz: 4c5ed38ddcc0570d2ec46f41e46332d4e71d5b5c756f3c027c8e9e2afddca3e8f7766912246af8cf39bd44c7b78c2a89983a4e9af629902ccaebbff4f576ca1c
7
- data.tar.gz: 91d22c1f48cc245ba1135324f874858d8f2ffc0834e4b9d0383d44ed8425ecf53a3070ca8e189d9a9cbd2ce467803022185eb26deb1f1c948e2caca7c5ec97c1
6
+ metadata.gz: e614c3d69a0dbe91fe8c623c0246e90ddf0fa22fc2f31dccd847fd4bc6394899f6726e606e773927222e6d7707d1261fb2d1f71d952468fe27230cb140704806
7
+ data.tar.gz: 39640135b688f94e711bc956ea81dad208008a1a0a5bc5ca21ce6515c9641b6d66d1b5a86648fe34a97a8d436385c908b9c75dd2d6aa7a5b6ed71e962eaad545
@@ -2,15 +2,34 @@ module Wrnap
2
2
  module Etl
3
3
  module Infernal
4
4
  class << self
5
+ def parse_file(file)
6
+ output = File.exist?(file) ? File.read(file) : file
7
+
8
+ if output =~ /No hits detected that satisfy reporting thresholds/
9
+ []
10
+ else
11
+ output.
12
+ gsub(/^(.*\n)*Hit alignments:\n/, "").
13
+ gsub(/Internal CM pipeline statistics summary:\n(.*\n)*$/, "").
14
+ strip.split(?\n).reject(&:empty?).each_slice(10).map { |lines| parse_hit(lines.join(?\n)) }
15
+ end
16
+ end
17
+
5
18
  def parse_hit(output)
19
+ name = if output =~ (identifier_regex = />>\s+(\S+)(.*\n){3}.*\s(\d+)\s+(\d+)\s+[\+-].*\n/)
20
+ # This is a pretty fancy regex, and there's no guarantee that the data has this info, so let's just test the waters here.
21
+ _, id, _, seq_from, seq_to, _ = output.match(identifier_regex).to_a
22
+ "%s %d %d" % [id.split(?|).last, seq_from, seq_to]
23
+ end
24
+
6
25
  Stockholm.fit_structure_to_sequence(
7
26
  *output.split(?\n).as do |infernal|
8
27
  [
9
- infernal.select { |line| line =~ /^.*\d+\s+.*\s+\d+\s*$/ }.last.match(/^.*\s+(.*)\s+\d+\s*$/)[1].upcase.gsub(/[^AUGC]/, ?.),
28
+ infernal.select { |line| line =~ /^.*\d+\s+.*\s+\d+\s*$/ }.last.match(/^.*\s+(\S+)\s+\d+\s*$/)[1].upcase.gsub(/[^AUGC]/, ?.),
10
29
  convert_infernal_to_dot_bracket(infernal.find { |line| line =~ /CS\s*$/ }.gsub(/\s+CS\s*$/, "").strip)
11
30
  ]
12
31
  end
13
- )
32
+ ).tap { |rna| rna.comment = name if name }
14
33
  end
15
34
 
16
35
  def convert_infernal_to_dot_bracket(structure)
@@ -5,27 +5,29 @@ module Wrnap
5
5
  attr_reader :accession, :from, :to, :coord_options
6
6
 
7
7
  class << self
8
- def init_from_entrez(accession, from, to, options = {})
8
+ def init_from_entrez(accession, from, to, options = {}, &block)
9
9
  new(
10
10
  accession: accession,
11
11
  from: from,
12
12
  to: to,
13
- options: options
13
+ options: options,
14
+ &block
14
15
  )
15
16
  end
16
17
 
17
- def init_from_string(sequence, accession, from, to, options = {})
18
+ def init_from_string(sequence, accession, from, to, options = {}, &block)
18
19
  new(
19
20
  sequence: sequence,
20
21
  accession: accession,
21
22
  from: from,
22
23
  to: to,
23
- options: options
24
+ options: options,
25
+ &block
24
26
  )
25
27
  end
26
28
  end
27
29
 
28
- def initialize(sequence: nil, accession: nil, from: nil, to: nil, options: {})
30
+ def initialize(sequence: nil, accession: nil, from: nil, to: nil, options: {}, &block)
29
31
  options = { coords: {}, rna: {} }.merge(options)
30
32
 
31
33
  @accession, @from, @to, @coord_options = accession, from, to, options[:coords]
@@ -36,12 +38,13 @@ module Wrnap
36
38
  @raw_sequence = (sequence.is_a?(String) ? Bio::Sequence::NA.new(sequence) : sequence).upcase
37
39
  end
38
40
 
39
- super({
41
+ super(
40
42
  sequence: self.sequence,
41
43
  structure: options[:rna][:structure] || options[:rna][:str_1] || options[:rna][:str],
42
44
  second_structure: options[:rna][:second_structure] || options[:rna][:str_2],
43
- comment: options[:rna][:comment] || options[:rna][:name] || identifier
44
- })
45
+ comment: options[:rna][:comment] || options[:rna][:name] || identifier,
46
+ &block
47
+ )
45
48
 
46
49
  remove_instance_variable(:@sequence)
47
50
  end
@@ -1,113 +1,115 @@
1
1
  module Wrnap
2
2
  module Global
3
- module Extensions
4
- def self.included(base)
5
- base.send(:include, InstanceMethods)
6
- base.extend(ClassMethods)
7
- base.extend(OneStructureBasedMethods)
8
- base.extend(TwoStructureBasedMethods)
9
-
10
- base.class_eval do
11
- OneStructureBasedMethods.public_instance_methods.each do |class_method|
12
- define_method(class_method) do |*args|
13
- self.class.send(class_method, *[structure].concat(args))
3
+ class Rna
4
+ module Extensions
5
+ def self.included(base)
6
+ base.send(:include, InstanceMethods)
7
+ base.extend(ClassMethods)
8
+ base.extend(OneStructureBasedMethods)
9
+ base.extend(TwoStructureBasedMethods)
10
+
11
+ base.class_eval do
12
+ OneStructureBasedMethods.public_instance_methods.each do |class_method|
13
+ define_method(class_method) do |*args|
14
+ self.class.send(class_method, *[structure].concat(args))
15
+ end
14
16
  end
15
- end
16
17
 
17
- TwoStructureBasedMethods.public_instance_methods.each do |class_method|
18
- define_method(class_method) do |*args|
19
- self.class.send(class_method, *[str_1, str_2].concat(args))
18
+ TwoStructureBasedMethods.public_instance_methods.each do |class_method|
19
+ define_method(class_method) do |*args|
20
+ self.class.send(class_method, *[str_1, str_2].concat(args))
21
+ end
20
22
  end
21
23
  end
22
- end
23
-
24
- base.send(:include, InstanceMethods)
25
- end
26
24
 
27
- module ClassMethods
28
- def generate_sequence(sequence_length)
29
- # 0th order Markov chain w/ uniform probability distribution
30
- Rna.init_from_string(sequence_length.times.inject("") { |string, _| string + %w[A U C G][rand(4)] })
25
+ base.send(:include, InstanceMethods)
31
26
  end
32
27
 
33
- def shuffle(sequence, token_length = 2)
34
- Shuffle.new(sequence).shuffle(token_length)
35
- end
28
+ module ClassMethods
29
+ def generate_sequence(sequence_length)
30
+ # 0th order Markov chain w/ uniform probability distribution
31
+ Rna.init_from_string(sequence_length.times.inject("") { |string, _| string + %w[A U C G][rand(4)] })
32
+ end
36
33
 
37
- def structure_from_bp_list(length, base_pairs)
38
- base_pairs.to_a.map(&:to_a).map(&:sort).inject("." * length) { |structure, (i, j)| structure.tap { structure[i] = ?(; structure[j] = ?) } }
39
- end
40
- end
34
+ def shuffle(sequence, token_length = 2)
35
+ Shuffle.new(sequence).shuffle(token_length)
36
+ end
41
37
 
42
- module InstanceMethods
43
- def dishuffle
44
- self.class.shuffle(sequence, 2)
38
+ def structure_from_bp_list(length, base_pairs)
39
+ base_pairs.to_a.map(&:to_a).map(&:sort).inject("." * length) { |structure, (i, j)| structure.tap { structure[i] = ?(; structure[j] = ?) } }
40
+ end
45
41
  end
46
42
 
47
- def gc_content
48
- seq.split(//).select { |i| i =~ /[GC]/i }.size.to_f / seq.size
49
- end
43
+ module InstanceMethods
44
+ def dishuffle
45
+ self.class.shuffle(sequence, 2)
46
+ end
50
47
 
51
- def boltzmann_probability(dangle: 2)
52
- Math.exp(-run(:eval, d: dangle).mfe / Wrnap::RT) / Math.exp(-run(:fold, d: dangle, p: 0).ensemble_energy / Wrnap::RT)
48
+ def gc_content
49
+ seq.split(//).select { |i| i =~ /[GC]/i }.size.to_f / seq.size
50
+ end
51
+
52
+ def boltzmann_probability(dangle: 2)
53
+ Math.exp(-run(:eval, d: dangle).mfe / Wrnap::RT) / Math.exp(-run(:fold, d: dangle, p: 0).ensemble_energy / Wrnap::RT)
54
+ end
53
55
  end
54
- end
55
56
 
56
- module OneStructureBasedMethods
57
- def helices(structure)
58
- array = base_pairs(structure).sort_by(&:first).map(&:to_a)
57
+ module OneStructureBasedMethods
58
+ def helices(structure)
59
+ array = base_pairs(structure).sort_by(&:first).map(&:to_a)
59
60
 
60
- unless array.empty?
61
- array[1..-1].inject([[array.first]]) do |bins, (i, j)|
62
- bins.tap { bins[-1][-1] == [i - 1, j + 1] ? bins[-1] << [i, j] : bins << [[i, j]] }
61
+ unless array.empty?
62
+ array[1..-1].inject([[array.first]]) do |bins, (i, j)|
63
+ bins.tap { bins[-1][-1] == [i - 1, j + 1] ? bins[-1] << [i, j] : bins << [[i, j]] }
64
+ end
65
+ else
66
+ []
63
67
  end
64
- else
65
- []
66
68
  end
67
- end
68
69
 
69
- def max_bp_distance(structure)
70
- base_pairs(structure).count + ((structure.length - 3) / 2.0).floor
71
- end
70
+ def max_bp_distance(structure)
71
+ base_pairs(structure).count + ((structure.length - 3) / 2.0).floor
72
+ end
72
73
 
73
- def base_pairs(structure)
74
- get_pairings(structure).each_with_index.inject(Set.new) do |set, (j, i)|
75
- j >= 0 ? set << Set[i, j] : set
74
+ def base_pairs(structure)
75
+ get_pairings(structure).each_with_index.inject(Set.new) do |set, (j, i)|
76
+ j >= 0 ? set << Set[i, j] : set
77
+ end
76
78
  end
77
- end
78
79
 
79
- def get_pairings(structure)
80
- stack = []
81
-
82
- structure.each_char.each_with_index.inject(Array.new(structure.length, -1)) do |array, (symbol, index)|
83
- array.tap do
84
- case symbol
85
- when "(" then stack.push(index)
86
- when ")" then
87
- if stack.empty?
88
- raise "Too many ')' in '#{structure}'"
89
- else
90
- stack.pop.tap do |opening|
91
- array[opening] = index
92
- array[index] = opening
93
- end
94
- end
95
- end
96
- end
97
- end.tap do
98
- raise "Too many '(' in '#{structure}'" unless stack.empty?
99
- end
80
+ def get_pairings(structure)
81
+ stack = []
82
+
83
+ structure.each_char.each_with_index.inject(Array.new(structure.length, -1)) do |array, (symbol, index)|
84
+ array.tap do
85
+ case symbol
86
+ when "(" then stack.push(index)
87
+ when ")" then
88
+ if stack.empty?
89
+ raise "Too many ')' in '#{structure}'"
90
+ else
91
+ stack.pop.tap do |opening|
92
+ array[opening] = index
93
+ array[index] = opening
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end.tap do
99
+ raise "Too many '(' in '#{structure}'" unless stack.empty?
100
+ end
101
+ end
100
102
  end
101
- end
102
103
 
103
- module TwoStructureBasedMethods
104
- def bp_distance(structure_1, structure_2)
105
- # Takes two structures and calculates the distance between them by |symmetric difference(bp_in_a, bp_in_b)|
106
- raise "The two structures are not the same length" unless structure_1.length == structure_2.length
104
+ module TwoStructureBasedMethods
105
+ def bp_distance(structure_1, structure_2)
106
+ # Takes two structures and calculates the distance between them by |symmetric difference(bp_in_a, bp_in_b)|
107
+ raise "The two structures are not the same length" unless structure_1.length == structure_2.length
107
108
 
108
- bp_set_1, bp_set_2 = base_pairs(structure_1), base_pairs(structure_2)
109
+ bp_set_1, bp_set_2 = base_pairs(structure_1), base_pairs(structure_2)
109
110
 
110
- ((bp_set_1 - bp_set_2) + (bp_set_2 - bp_set_1)).count
111
+ ((bp_set_1 - bp_set_2) + (bp_set_2 - bp_set_1)).count
112
+ end
111
113
  end
112
114
  end
113
115
  end
@@ -0,0 +1,42 @@
1
+ module Wrnap
2
+ module Global
3
+ class Rna
4
+ module Metadata
5
+ def self.included(base)
6
+ base.send(:include, InstanceMethods)
7
+ end
8
+
9
+ module InstanceMethods
10
+ def self.included(base)
11
+ base.class_eval do
12
+ def_delegator :@metadata, :data, :md
13
+ end
14
+ end
15
+
16
+ def meta(&block)
17
+ metadata.tap { metadata.instance_eval(&block) if block_given? }
18
+ end
19
+
20
+ def meta_rna(&block)
21
+ metadata.__rna__.tap { meta(&block) }
22
+ end
23
+ end
24
+
25
+ class Container
26
+ attr_reader :__rna__, :data
27
+
28
+ def initialize(rna)
29
+ @__rna__, @data = rna, {}
30
+ end
31
+
32
+ def method_missing(name, *args, &block)
33
+ case args.size
34
+ when 0 then data[name]
35
+ when 1 then data[name.to_s.gsub(/=$/, "").to_sym] = args.first
36
+ else super end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -1,43 +1,47 @@
1
1
  module Wrnap
2
2
  module Global
3
3
  class Rna
4
+ extend Forwardable
4
5
  include Extensions
6
+ include Metadata
5
7
 
6
8
  CANONICAL_BASES = Set.new << Set.new([?G, ?C]) << Set.new([?A, ?U]) << Set.new([?G, ?U])
7
9
 
8
10
  attr_accessor :comment
9
- attr_reader :sequence, :structure, :second_structure
11
+ attr_reader :sequence, :structure, :second_structure, :metadata
10
12
 
11
13
  class << self
12
- def init_from_string(sequence, structure = nil, second_structure = nil, comment = nil)
14
+ def init_from_string(sequence, structure = nil, second_structure = nil, comment = nil, &block)
13
15
  new(
14
16
  sequence: sequence,
15
17
  structure: structure,
16
18
  second_structure: second_structure,
17
- comment: comment
19
+ comment: comment,
20
+ &block
18
21
  )
19
22
  end
20
23
 
21
- def init_from_hash(hash)
24
+ def init_from_hash(hash, &block)
22
25
  new(
23
26
  sequence: hash[:sequence] || hash[:seq],
24
27
  structure: hash[:structure] || hash[:str_1] || hash[:str],
25
28
  second_structure: hash[:second_structure] || hash[:str_2],
26
- comment: hash[:comment] || hash[:name]
29
+ comment: hash[:comment] || hash[:name],
30
+ &block
27
31
  )
28
32
  end
29
33
 
30
- def init_from_array(array)
31
- init_from_string(*array)
34
+ def init_from_array(array, &block)
35
+ init_from_string(*array, &block)
32
36
  end
33
37
 
34
- def init_from_fasta(string)
38
+ def init_from_fasta(string, &block)
35
39
  if File.exist?(string)
36
40
  comment = File.basename(string, string.include?(?.) ? ".%s" % string.split(?.)[-1] : "")
37
41
  string = File.read(string).chomp
38
42
  end
39
43
 
40
- init_from_string(*string.split(/\n/).reject { |line| line.start_with?(">") }[0, 3]).tap do |rna|
44
+ init_from_string(*string.split(/\n/).reject { |line| line.start_with?(">") }[0, 3], &block).tap do |rna|
41
45
  if (line = string.split(/\n/).first).start_with?(">") && !(file_comment = line.gsub(/^>\s*/, "")).empty?
42
46
  rna.comment = file_comment
43
47
  elsif comment
@@ -46,25 +50,26 @@ module Wrnap
46
50
  end
47
51
  end
48
52
 
49
- def init_from_context(*context, coords: {}, rna: {})
50
- Context.init_from_entrez(*context, coords: coords, rna: rna)
53
+ def init_from_context(*context, coords: {}, rna: {}, &block)
54
+ Context.init_from_entrez(*context, coords: coords, rna: rna, &block)
51
55
  end
52
56
 
53
- def init_from_self(rna)
57
+ def init_from_self(rna, &block)
54
58
  # This happens when you call a Wrnap library function with the output of something like Wrnap::Fold.run(...).mfe
55
59
  new(
56
60
  sequence: rna.sequence,
57
61
  strucutre: rna.structure,
58
62
  second_strucutre: rna.second_structure,
59
- comment: rna.comment
63
+ comment: rna.comment,
64
+ &block
60
65
  )
61
66
  end
62
67
 
63
68
  alias_method :placeholder, :new
64
69
  end
65
70
 
66
- def initialize(sequence: "", structure: "", second_structure: "", comment: "")
67
- @sequence, @comment = (sequence.kind_of?(Rna) ? sequence.seq : sequence).upcase, comment
71
+ def initialize(sequence: "", structure: "", second_structure: "", comment: "", &block)
72
+ @sequence, @comment, @metadata = (sequence.kind_of?(Rna) ? sequence.seq : sequence).upcase, comment, Metadata::Container.new(self)
68
73
 
69
74
  [:structure, :second_structure].each do |structure_symbol|
70
75
  instance_variable_set(
@@ -83,6 +88,8 @@ module Wrnap
83
88
  )
84
89
  end
85
90
 
91
+ metadata.instance_eval(&block) if block_given?
92
+
86
93
  if str && seq.length != str.length
87
94
  Wrnap.debugger { "The sequence length (%d) doesn't match the structure length (%d)" % [seq, str].map(&:length) }
88
95
  end
@@ -126,13 +133,6 @@ module Wrnap
126
133
 
127
134
  alias :two_str :two_structures
128
135
 
129
- def print_full
130
- puts name if name
131
- puts seq if seq
132
- puts str_1 if str_1
133
- puts str_2 if str_2
134
- end
135
-
136
136
  def write_fa!(filename)
137
137
  filename.tap do |filename|
138
138
  File.open(filename, ?w) do |file|
@@ -162,11 +162,20 @@ module Wrnap
162
162
  else super end
163
163
  end
164
164
 
165
+ def pp
166
+ puts("> %s" % name) if name
167
+ puts("%s" % seq) if seq
168
+ puts("%s" % str_1) if str_1
169
+ puts("%s" % str_2) if str_2
170
+ puts("%s" % meta) if meta
171
+ end
172
+
165
173
  def inspect
166
174
  "#<RNA: %s>" % [
167
175
  ("#{seq[0, 20] + (seq.length > 20 ? '... [%d]' % seq.length : '')}" if seq && !seq.empty?),
168
176
  ("#{str_1[0, 20] + (str_1.length > 20 ? ' [%d]' % seq.length : '')}" if str_1 && !str_1.empty?),
169
177
  ("#{str_2[0, 20] + (str_2.length > 20 ? ' [%d]' % seq.length : '')}" if str_2 && !str_1.empty?),
178
+ (md.to_json unless md.empty?),
170
179
  (name ? name : "#{self.class.name}")
171
180
  ].compact.join(", ")
172
181
  end
data/lib/wrnap/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Wrnap
2
- VERSION = "0.10.0"
2
+ VERSION = "0.11.0"
3
3
  end
data/lib/wrnap.rb CHANGED
@@ -14,6 +14,7 @@ require "active_support/core_ext/class"
14
14
 
15
15
  require "wrnap/version"
16
16
  require "wrnap/global/rna/extensions"
17
+ require "wrnap/global/rna/metadata"
17
18
  require "wrnap/global/rna"
18
19
  require "wrnap/global/rna/context"
19
20
  require "wrnap/global/chainer"
@@ -66,23 +67,23 @@ end
66
67
 
67
68
  # This dirties up the public namespace, but I use it so many times that I want a shorthand to it
68
69
  unless defined? RNA
69
- def RNA(*args)
70
- RNA.from_array(args)
70
+ def RNA(*args, &block)
71
+ RNA.from_array(args, &block)
71
72
  end
72
73
  end
73
74
 
74
75
  module RNA
75
- def self.load_all(pattern = "*.fa")
76
- Dir[File.directory?(pattern) ? pattern + "/*.fa" : pattern].map { |file| RNA.from_fasta(file) }
76
+ def self.load_all(pattern = "*.fa", &block)
77
+ Dir[File.directory?(pattern) ? pattern + "/*.fa" : pattern].map { |file| RNA.from_fasta(file, &block) }
77
78
  end
78
79
 
79
- def self.random(size, *args)
80
- RNA.from_array(args.unshift(Wrnap::Global::Rna.generate_sequence(size).seq))
80
+ def self.random(size, *args, &block)
81
+ RNA.from_array(args.unshift(Wrnap::Global::Rna.generate_sequence(size).seq), &block)
81
82
  end
82
83
 
83
84
  def self.method_missing(name, *args, &block)
84
85
  if "#{name}" =~ /^from_\w+$/
85
- Wrnap::Global::Rna.send("init_#{name}", *args)
86
+ Wrnap::Global::Rna.send("init_#{name}", *args, &block)
86
87
  else super end
87
88
  end
88
89
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wrnap
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Evan Senter
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-03 00:00:00.000000000 Z
11
+ date: 2014-07-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -157,6 +157,7 @@ files:
157
157
  - lib/wrnap/global/rna.rb
158
158
  - lib/wrnap/global/rna/context.rb
159
159
  - lib/wrnap/global/rna/extensions.rb
160
+ - lib/wrnap/global/rna/metadata.rb
160
161
  - lib/wrnap/global/runner.rb
161
162
  - lib/wrnap/graphing/r.rb
162
163
  - lib/wrnap/package/base.rb