wrnap 0.11.0 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 29f26144274a4bd80974e243be7cf969b5f2f49a
4
- data.tar.gz: 79793475465dacb80644623212f352010a07c910
3
+ metadata.gz: 5565b1bf13a5afcdda34f97af733e20c56aa8f72
4
+ data.tar.gz: 69bb1996d55296c10808270587fd8e7b0ba5cc71
5
5
  SHA512:
6
- metadata.gz: e614c3d69a0dbe91fe8c623c0246e90ddf0fa22fc2f31dccd847fd4bc6394899f6726e606e773927222e6d7707d1261fb2d1f71d952468fe27230cb140704806
7
- data.tar.gz: 39640135b688f94e711bc956ea81dad208008a1a0a5bc5ca21ce6515c9641b6d66d1b5a86648fe34a97a8d436385c908b9c75dd2d6aa7a5b6ed71e962eaad545
6
+ metadata.gz: a09b6dcf15dcac6f804776c136792f93316e294ca1d8c1230140219e5ce44ed3cd3a72c09b192ab34b2d6bb754841bc7b421fd737f05c11ee88aa5640b2eb1d2
7
+ data.tar.gz: 36a11bd543ade0dc5e0d267e16117a44b1dc5d8afa4d89e8c1921382afdfb0311094a5fa9f7e893397978dcb0c7f12f63a5ab4b447ce3c74e94ffce40f433f38
@@ -1,6 +1,10 @@
1
1
  module Wrnap
2
2
  module Etl
3
3
  module Infernal
4
+ NAME_REGEX = />>\s+(\S+)(.*\n){3}.*\s(\d+)\s+(\d+)\s+[\+-].*\n/
5
+ HIT_SEQUENCE = /^.*\d+\s+(.*)\s+\d+\s*$/
6
+ LOCAL_END = /\*\[\s*\d+\s*\]\*/
7
+
4
8
  class << self
5
9
  def parse_file(file)
6
10
  output = File.exist?(file) ? File.read(file) : file
@@ -11,25 +15,28 @@ module Wrnap
11
15
  output.
12
16
  gsub(/^(.*\n)*Hit alignments:\n/, "").
13
17
  gsub(/Internal CM pipeline statistics summary:\n(.*\n)*$/, "").
14
- strip.split(?\n).reject(&:empty?).each_slice(10).map { |lines| parse_hit(lines.join(?\n)) }
18
+ strip.split(?\n).reject(&:empty?).each_slice(10).map { |lines| parse_hit(lines.join(?\n)) }.compact
15
19
  end
16
20
  end
17
21
 
18
22
  def parse_hit(output)
19
- name = if output =~ (identifier_regex = />>\s+(\S+)(.*\n){3}.*\s(\d+)\s+(\d+)\s+[\+-].*\n/)
23
+ name = if output =~ NAME_REGEX
20
24
  # This is a pretty fancy regex, and there's no guarantee that the data has this info, so let's just test the waters here.
21
- _, id, _, seq_from, seq_to, _ = output.match(identifier_regex).to_a
25
+ _, id, _, seq_from, seq_to, _ = output.match(NAME_REGEX).to_a
22
26
  "%s %d %d" % [id.split(?|).last, seq_from, seq_to]
23
27
  end
24
28
 
25
- Stockholm.fit_structure_to_sequence(
26
- *output.split(?\n).as do |infernal|
27
- [
28
- infernal.select { |line| line =~ /^.*\d+\s+.*\s+\d+\s*$/ }.last.match(/^.*\s+(\S+)\s+\d+\s*$/)[1].upcase.gsub(/[^AUGC]/, ?.),
29
- convert_infernal_to_dot_bracket(infernal.find { |line| line =~ /CS\s*$/ }.gsub(/\s+CS\s*$/, "").strip)
30
- ]
31
- end
32
- ).tap { |rna| rna.comment = name if name }
29
+ unless (hit_sequence = pull_infernal_hit_sequence(output)) =~ LOCAL_END
30
+ Stockholm.fit_structure_to_sequence(hit_sequence, pull_infernal_hit_structure(output)).tap { |rna| rna.comment = name if name }
31
+ end
32
+ end
33
+
34
+ def pull_infernal_hit_sequence(output)
35
+ output.split(?\n).select { |line| line =~ HIT_SEQUENCE }.last.match(HIT_SEQUENCE)[1].upcase
36
+ end
37
+
38
+ def pull_infernal_hit_structure(output)
39
+ convert_infernal_to_dot_bracket(output.split(?\n).find { |line| line =~ /CS\s*$/ }.gsub(/\s+CS\s*$/, "").strip)
33
40
  end
34
41
 
35
42
  def convert_infernal_to_dot_bracket(structure)
@@ -7,7 +7,6 @@ module Wrnap
7
7
  base.extend(ClassMethods)
8
8
  base.extend(OneStructureBasedMethods)
9
9
  base.extend(TwoStructureBasedMethods)
10
-
11
10
  base.class_eval do
12
11
  OneStructureBasedMethods.public_instance_methods.each do |class_method|
13
12
  define_method(class_method) do |*args|
@@ -55,18 +54,6 @@ module Wrnap
55
54
  end
56
55
 
57
56
  module OneStructureBasedMethods
58
- def helices(structure)
59
- array = base_pairs(structure).sort_by(&:first).map(&:to_a)
60
-
61
- unless array.empty?
62
- array[1..-1].inject([[array.first]]) do |bins, (i, j)|
63
- bins.tap { bins[-1][-1] == [i - 1, j + 1] ? bins[-1] << [i, j] : bins << [[i, j]] }
64
- end
65
- else
66
- []
67
- end
68
- end
69
-
70
57
  def max_bp_distance(structure)
71
58
  base_pairs(structure).count + ((structure.length - 3) / 2.0).floor
72
59
  end
@@ -0,0 +1,36 @@
1
+ module Wrnap
2
+ module Global
3
+ class Rna
4
+ module HelixFunctions
5
+ def helices
6
+ array = base_pairs.sort_by(&:first).map(&:to_a)
7
+
8
+ unless array.empty?
9
+ array[1..-1].inject([[array.first]]) do |bins, (i, j)|
10
+ bins.tap { bins[-1][-1] == [i - 1, j + 1] ? bins[-1] << [i, j] : bins << [[i, j]] }
11
+ end
12
+ else
13
+ []
14
+ end
15
+ end
16
+
17
+ def collapsed_helices
18
+ helices.map { |((i, j), *rest)| Helix.new(i, j, rest.length + 1) }
19
+ end
20
+ end
21
+
22
+ class Helix
23
+ attr_reader :i, :j
24
+ attr_accessor :length
25
+
26
+ def initialize(i, j, length)
27
+ @i, @j, @length = i, j, length
28
+ end
29
+
30
+ def name
31
+ "(%d, %d)" % [i, j]
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,85 @@
1
+ module Wrnap
2
+ module Global
3
+ class Rna
4
+ module TreeFunctions
5
+ def with_tree
6
+ meta_rna { |metadata| tree(TreePlanter.new(metadata.__rna__)) }
7
+ end
8
+
9
+ def trunk; md[:tree] || with_tree.trunk; end
10
+ end
11
+
12
+ class TreePlanter
13
+ attr_reader :rna, :root
14
+
15
+ def initialize(rna, tree = false)
16
+ @rna = rna
17
+ @root = tree || build_tree
18
+ end
19
+
20
+ def build_tree
21
+ rna.collapsed_helices.inject(Tree::TreeNode.new(:root, rna)) do |tree, helix|
22
+ node = Tree::TreeNode.new(helix.name, helix)
23
+
24
+ if tree.is_root?
25
+ tree << node
26
+ elsif helix.i > tree.content.j
27
+ # It's a sibling, pop up until we're at its parent node.
28
+ tree = tree.parent until tree.is_root? || tree.content.j > helix.i
29
+ node.tap { tree << node }
30
+ elsif helix.j < tree.content.j
31
+ # Going deeper.
32
+ tree << node
33
+ end
34
+ end.root
35
+ end
36
+
37
+ def coalesce
38
+ self.class.new(rna, root.dup).tap { |tree| tree.merge_interior_loops! }
39
+ end
40
+
41
+ def coalesce!
42
+ tap { merge_interior_loops! }
43
+ end
44
+
45
+ def merge_interior_loops!
46
+ root.tap do
47
+ self.class.postorder_traversal(root) do |node|
48
+ if node.children.count == 1 && !node.is_root?
49
+ child = node.children.first
50
+ node.parent.add(child)
51
+ node.remove_from_parent!
52
+ end
53
+ end
54
+ end
55
+ end
56
+
57
+ def depth_signature
58
+ root.map(&:node_depth)
59
+ end
60
+
61
+ def pp
62
+ root.print_tree and nil
63
+ end
64
+
65
+ def inspect
66
+ "#<TreePlanter: %s>" % depth_signature.inspect
67
+ end
68
+
69
+ alias :to_s :inspect
70
+
71
+ class << self
72
+ def preorder_traversal(node, &block)
73
+ node.children.map { |child| preorder_traversal(child, &block) }
74
+ yield node
75
+ end
76
+
77
+ def postorder_traversal(node, &block)
78
+ node.children.map { |child| postorder_traversal(child, &block) }
79
+ yield node
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
@@ -1,9 +1,11 @@
1
1
  module Wrnap
2
2
  module Global
3
3
  class Rna
4
- extend Forwardable
4
+ extend Forwardable
5
5
  include Extensions
6
6
  include Metadata
7
+ include TreeFunctions
8
+ include HelixFunctions
7
9
 
8
10
  CANONICAL_BASES = Set.new << Set.new([?G, ?C]) << Set.new([?A, ?U]) << Set.new([?G, ?U])
9
11
 
@@ -175,7 +177,7 @@ module Wrnap
175
177
  ("#{seq[0, 20] + (seq.length > 20 ? '... [%d]' % seq.length : '')}" if seq && !seq.empty?),
176
178
  ("#{str_1[0, 20] + (str_1.length > 20 ? ' [%d]' % seq.length : '')}" if str_1 && !str_1.empty?),
177
179
  ("#{str_2[0, 20] + (str_2.length > 20 ? ' [%d]' % seq.length : '')}" if str_2 && !str_1.empty?),
178
- (md.to_json unless md.empty?),
180
+ (md.inspect unless md.empty?),
179
181
  (name ? name : "#{self.class.name}")
180
182
  ].compact.join(", ")
181
183
  end
data/lib/wrnap/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Wrnap
2
- VERSION = "0.11.0"
2
+ VERSION = "0.12.0"
3
3
  end
data/lib/wrnap.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require "yaml"
2
2
  require "benchmark"
3
3
  require "set"
4
+ require "tree"
4
5
  require "shuffle"
5
6
  require "rinruby"
6
7
  require "tempfile"
@@ -15,6 +16,8 @@ require "active_support/core_ext/class"
15
16
  require "wrnap/version"
16
17
  require "wrnap/global/rna/extensions"
17
18
  require "wrnap/global/rna/metadata"
19
+ require "wrnap/global/rna/tree"
20
+ require "wrnap/global/rna/helix"
18
21
  require "wrnap/global/rna"
19
22
  require "wrnap/global/rna/context"
20
23
  require "wrnap/global/chainer"
data/wrnap.gemspec CHANGED
@@ -30,4 +30,5 @@ Gem::Specification.new do |spec|
30
30
  spec.add_runtime_dependency "bio", "~> 1.4"
31
31
  spec.add_runtime_dependency "bio-stockholm", "~> 0.0.1"
32
32
  spec.add_runtime_dependency "entrez", "~> 0.5"
33
+ spec.add_runtime_dependency "rubytree", "~> 0.9"
33
34
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wrnap
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 0.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Evan Senter
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-16 00:00:00.000000000 Z
11
+ date: 2014-07-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -136,6 +136,20 @@ dependencies:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0.5'
139
+ - !ruby/object:Gem::Dependency
140
+ name: rubytree
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '0.9'
146
+ type: :runtime
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '0.9'
139
153
  description: ''
140
154
  email:
141
155
  - evansenter@gmail.com
@@ -157,7 +171,9 @@ files:
157
171
  - lib/wrnap/global/rna.rb
158
172
  - lib/wrnap/global/rna/context.rb
159
173
  - lib/wrnap/global/rna/extensions.rb
174
+ - lib/wrnap/global/rna/helix.rb
160
175
  - lib/wrnap/global/rna/metadata.rb
176
+ - lib/wrnap/global/rna/tree.rb
161
177
  - lib/wrnap/global/runner.rb
162
178
  - lib/wrnap/graphing/r.rb
163
179
  - lib/wrnap/package/base.rb