wrnap 0.9.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/wrnap/etl/infernal.rb +23 -0
- data/lib/wrnap/etl/stockholm.rb +49 -0
- data/lib/wrnap/global/rna.rb +9 -0
- data/lib/wrnap/version.rb +1 -1
- data/lib/wrnap.rb +3 -0
- data/wrnap.gemspec +1 -0
- metadata +17 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ce3134651886e323b97794153a83b97ada50a5db
|
4
|
+
data.tar.gz: a01c0eb283ca43609e06997deee79787dc98de00
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c5ed38ddcc0570d2ec46f41e46332d4e71d5b5c756f3c027c8e9e2afddca3e8f7766912246af8cf39bd44c7b78c2a89983a4e9af629902ccaebbff4f576ca1c
|
7
|
+
data.tar.gz: 91d22c1f48cc245ba1135324f874858d8f2ffc0834e4b9d0383d44ed8425ecf53a3070ca8e189d9a9cbd2ce467803022185eb26deb1f1c948e2caca7c5ec97c1
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Wrnap
|
2
|
+
module Etl
|
3
|
+
module Infernal
|
4
|
+
class << self
|
5
|
+
def parse_hit(output)
|
6
|
+
Stockholm.fit_structure_to_sequence(
|
7
|
+
*output.split(?\n).as do |infernal|
|
8
|
+
[
|
9
|
+
infernal.select { |line| line =~ /^.*\d+\s+.*\s+\d+\s*$/ }.last.match(/^.*\s+(.*)\s+\d+\s*$/)[1].upcase.gsub(/[^AUGC]/, ?.),
|
10
|
+
convert_infernal_to_dot_bracket(infernal.find { |line| line =~ /CS\s*$/ }.gsub(/\s+CS\s*$/, "").strip)
|
11
|
+
]
|
12
|
+
end
|
13
|
+
)
|
14
|
+
end
|
15
|
+
|
16
|
+
def convert_infernal_to_dot_bracket(structure)
|
17
|
+
# http://jalview-rnasupport.blogspot.com/2010/06/parsing-wuss-notation-of-rna-secondary.html
|
18
|
+
structure.gsub(/[:,_-]/, ?.).gsub(/[<\[\{]/, ?().gsub(/[>\]\}]/, ?))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module Wrnap
|
2
|
+
module Etl
|
3
|
+
module Stockholm
|
4
|
+
class << self
|
5
|
+
def load_all(file)
|
6
|
+
entries = Bio::Stockholm::Reader.parse_from_file(file)[0]
|
7
|
+
sequences = entries.records.map(&:last).map(&:sequence)
|
8
|
+
structure = dot_bracket_from_stockholm(entries.gc_features["SS_cons"])
|
9
|
+
|
10
|
+
sequences.map { |sequence| fit_structure_to_sequence(sequence, structure) }
|
11
|
+
end
|
12
|
+
|
13
|
+
def dot_bracket_from_stockholm(structure)
|
14
|
+
structure.gsub(/</, ?().gsub(/>/, ?))
|
15
|
+
end
|
16
|
+
|
17
|
+
def fit_structure_to_sequence(sequence, consensus_structure)
|
18
|
+
theta_filter(prune_gaps(balanced_consensus_from_sequence(sequence, consensus_structure)))
|
19
|
+
end
|
20
|
+
|
21
|
+
def balanced_consensus_from_sequence(sequence, structure)
|
22
|
+
Wrnap::Global::Rna.init_from_string(
|
23
|
+
sequence,
|
24
|
+
Wrnap::Global::Rna.structure_from_bp_list(
|
25
|
+
sequence.length,
|
26
|
+
sequence.split(//).zip(structure.split(//)).each_with_index.inject(
|
27
|
+
Wrnap::Global::Rna.base_pairs(structure).map(&:to_a).select { |i, j| Wrnap::Global::Rna::CANONICAL_BASES.include?(Set.new([sequence[i], sequence[j]])) }
|
28
|
+
) do |valid_bases, ((bp, symbol), i)|
|
29
|
+
valid_bases - (bp == ?. && symbol != ?. ? (valid_bases.select { |bps| bps.any? { |j| i == j } }) : [])
|
30
|
+
end
|
31
|
+
)
|
32
|
+
)
|
33
|
+
end
|
34
|
+
|
35
|
+
def prune_gaps(rna)
|
36
|
+
Wrnap::Global::Rna.init_from_array(rna.seq.split(//).zip(rna.str.split(//)).reject { |nucleotide, _| nucleotide == ?. }.transpose.map(&:join))
|
37
|
+
end
|
38
|
+
|
39
|
+
def theta_filter(rna)
|
40
|
+
# Needs to happen after gap pruning.
|
41
|
+
Wrnap::Global::Rna.init_from_string(
|
42
|
+
rna.seq,
|
43
|
+
Wrnap::Global::Rna.structure_from_bp_list(rna.seq.length, rna.base_pairs.map(&:to_a).select { |i, j| (j - i).abs > 3 })
|
44
|
+
)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
data/lib/wrnap/global/rna.rb
CHANGED
@@ -3,6 +3,8 @@ module Wrnap
|
|
3
3
|
class Rna
|
4
4
|
include Extensions
|
5
5
|
|
6
|
+
CANONICAL_BASES = Set.new << Set.new([?G, ?C]) << Set.new([?A, ?U]) << Set.new([?G, ?U])
|
7
|
+
|
6
8
|
attr_accessor :comment
|
7
9
|
attr_reader :sequence, :structure, :second_structure
|
8
10
|
|
@@ -124,6 +126,13 @@ module Wrnap
|
|
124
126
|
|
125
127
|
alias :two_str :two_structures
|
126
128
|
|
129
|
+
def print_full
|
130
|
+
puts name if name
|
131
|
+
puts seq if seq
|
132
|
+
puts str_1 if str_1
|
133
|
+
puts str_2 if str_2
|
134
|
+
end
|
135
|
+
|
127
136
|
def write_fa!(filename)
|
128
137
|
filename.tap do |filename|
|
129
138
|
File.open(filename, ?w) do |file|
|
data/lib/wrnap/version.rb
CHANGED
data/lib/wrnap.rb
CHANGED
@@ -7,6 +7,7 @@ require "tempfile"
|
|
7
7
|
require "bigdecimal"
|
8
8
|
require "rroc"
|
9
9
|
require "bio"
|
10
|
+
require "bio-stockholm"
|
10
11
|
require "entrez"
|
11
12
|
require "active_support/inflector"
|
12
13
|
require "active_support/core_ext/class"
|
@@ -19,6 +20,8 @@ require "wrnap/global/chainer"
|
|
19
20
|
require "wrnap/global/entrez"
|
20
21
|
require "wrnap/global/parser"
|
21
22
|
require "wrnap/global/runner"
|
23
|
+
require "wrnap/etl/infernal"
|
24
|
+
require "wrnap/etl/stockholm"
|
22
25
|
require "wrnap/graphing/r"
|
23
26
|
require "wrnap/package/base"
|
24
27
|
|
data/wrnap.gemspec
CHANGED
@@ -28,5 +28,6 @@ Gem::Specification.new do |spec|
|
|
28
28
|
spec.add_runtime_dependency "rinruby", "~> 2.0"
|
29
29
|
spec.add_runtime_dependency "rroc", "~> 0.1"
|
30
30
|
spec.add_runtime_dependency "bio", "~> 1.4"
|
31
|
+
spec.add_runtime_dependency "bio-stockholm", "~> 0.0.1"
|
31
32
|
spec.add_runtime_dependency "entrez", "~> 0.5"
|
32
33
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wrnap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Evan Senter
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '1.4'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: bio-stockholm
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 0.0.1
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 0.0.1
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: entrez
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -135,6 +149,8 @@ files:
|
|
135
149
|
- README.md
|
136
150
|
- Rakefile
|
137
151
|
- lib/wrnap.rb
|
152
|
+
- lib/wrnap/etl/infernal.rb
|
153
|
+
- lib/wrnap/etl/stockholm.rb
|
138
154
|
- lib/wrnap/global/chainer.rb
|
139
155
|
- lib/wrnap/global/entrez.rb
|
140
156
|
- lib/wrnap/global/parser.rb
|