wrnap 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/wrnap/etl/infernal.rb +23 -0
- data/lib/wrnap/etl/stockholm.rb +49 -0
- data/lib/wrnap/global/rna.rb +9 -0
- data/lib/wrnap/version.rb +1 -1
- data/lib/wrnap.rb +3 -0
- data/wrnap.gemspec +1 -0
- metadata +17 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ce3134651886e323b97794153a83b97ada50a5db
|
4
|
+
data.tar.gz: a01c0eb283ca43609e06997deee79787dc98de00
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c5ed38ddcc0570d2ec46f41e46332d4e71d5b5c756f3c027c8e9e2afddca3e8f7766912246af8cf39bd44c7b78c2a89983a4e9af629902ccaebbff4f576ca1c
|
7
|
+
data.tar.gz: 91d22c1f48cc245ba1135324f874858d8f2ffc0834e4b9d0383d44ed8425ecf53a3070ca8e189d9a9cbd2ce467803022185eb26deb1f1c948e2caca7c5ec97c1
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Wrnap
|
2
|
+
module Etl
|
3
|
+
module Infernal
|
4
|
+
class << self
|
5
|
+
def parse_hit(output)
|
6
|
+
Stockholm.fit_structure_to_sequence(
|
7
|
+
*output.split(?\n).as do |infernal|
|
8
|
+
[
|
9
|
+
infernal.select { |line| line =~ /^.*\d+\s+.*\s+\d+\s*$/ }.last.match(/^.*\s+(.*)\s+\d+\s*$/)[1].upcase.gsub(/[^AUGC]/, ?.),
|
10
|
+
convert_infernal_to_dot_bracket(infernal.find { |line| line =~ /CS\s*$/ }.gsub(/\s+CS\s*$/, "").strip)
|
11
|
+
]
|
12
|
+
end
|
13
|
+
)
|
14
|
+
end
|
15
|
+
|
16
|
+
def convert_infernal_to_dot_bracket(structure)
|
17
|
+
# http://jalview-rnasupport.blogspot.com/2010/06/parsing-wuss-notation-of-rna-secondary.html
|
18
|
+
structure.gsub(/[:,_-]/, ?.).gsub(/[<\[\{]/, ?().gsub(/[>\]\}]/, ?))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module Wrnap
|
2
|
+
module Etl
|
3
|
+
module Stockholm
|
4
|
+
class << self
|
5
|
+
def load_all(file)
|
6
|
+
entries = Bio::Stockholm::Reader.parse_from_file(file)[0]
|
7
|
+
sequences = entries.records.map(&:last).map(&:sequence)
|
8
|
+
structure = dot_bracket_from_stockholm(entries.gc_features["SS_cons"])
|
9
|
+
|
10
|
+
sequences.map { |sequence| fit_structure_to_sequence(sequence, structure) }
|
11
|
+
end
|
12
|
+
|
13
|
+
def dot_bracket_from_stockholm(structure)
|
14
|
+
structure.gsub(/</, ?().gsub(/>/, ?))
|
15
|
+
end
|
16
|
+
|
17
|
+
def fit_structure_to_sequence(sequence, consensus_structure)
|
18
|
+
theta_filter(prune_gaps(balanced_consensus_from_sequence(sequence, consensus_structure)))
|
19
|
+
end
|
20
|
+
|
21
|
+
def balanced_consensus_from_sequence(sequence, structure)
|
22
|
+
Wrnap::Global::Rna.init_from_string(
|
23
|
+
sequence,
|
24
|
+
Wrnap::Global::Rna.structure_from_bp_list(
|
25
|
+
sequence.length,
|
26
|
+
sequence.split(//).zip(structure.split(//)).each_with_index.inject(
|
27
|
+
Wrnap::Global::Rna.base_pairs(structure).map(&:to_a).select { |i, j| Wrnap::Global::Rna::CANONICAL_BASES.include?(Set.new([sequence[i], sequence[j]])) }
|
28
|
+
) do |valid_bases, ((bp, symbol), i)|
|
29
|
+
valid_bases - (bp == ?. && symbol != ?. ? (valid_bases.select { |bps| bps.any? { |j| i == j } }) : [])
|
30
|
+
end
|
31
|
+
)
|
32
|
+
)
|
33
|
+
end
|
34
|
+
|
35
|
+
def prune_gaps(rna)
|
36
|
+
Wrnap::Global::Rna.init_from_array(rna.seq.split(//).zip(rna.str.split(//)).reject { |nucleotide, _| nucleotide == ?. }.transpose.map(&:join))
|
37
|
+
end
|
38
|
+
|
39
|
+
def theta_filter(rna)
|
40
|
+
# Needs to happen after gap pruning.
|
41
|
+
Wrnap::Global::Rna.init_from_string(
|
42
|
+
rna.seq,
|
43
|
+
Wrnap::Global::Rna.structure_from_bp_list(rna.seq.length, rna.base_pairs.map(&:to_a).select { |i, j| (j - i).abs > 3 })
|
44
|
+
)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
data/lib/wrnap/global/rna.rb
CHANGED
@@ -3,6 +3,8 @@ module Wrnap
|
|
3
3
|
class Rna
|
4
4
|
include Extensions
|
5
5
|
|
6
|
+
CANONICAL_BASES = Set.new << Set.new([?G, ?C]) << Set.new([?A, ?U]) << Set.new([?G, ?U])
|
7
|
+
|
6
8
|
attr_accessor :comment
|
7
9
|
attr_reader :sequence, :structure, :second_structure
|
8
10
|
|
@@ -124,6 +126,13 @@ module Wrnap
|
|
124
126
|
|
125
127
|
alias :two_str :two_structures
|
126
128
|
|
129
|
+
def print_full
|
130
|
+
puts name if name
|
131
|
+
puts seq if seq
|
132
|
+
puts str_1 if str_1
|
133
|
+
puts str_2 if str_2
|
134
|
+
end
|
135
|
+
|
127
136
|
def write_fa!(filename)
|
128
137
|
filename.tap do |filename|
|
129
138
|
File.open(filename, ?w) do |file|
|
data/lib/wrnap/version.rb
CHANGED
data/lib/wrnap.rb
CHANGED
@@ -7,6 +7,7 @@ require "tempfile"
|
|
7
7
|
require "bigdecimal"
|
8
8
|
require "rroc"
|
9
9
|
require "bio"
|
10
|
+
require "bio-stockholm"
|
10
11
|
require "entrez"
|
11
12
|
require "active_support/inflector"
|
12
13
|
require "active_support/core_ext/class"
|
@@ -19,6 +20,8 @@ require "wrnap/global/chainer"
|
|
19
20
|
require "wrnap/global/entrez"
|
20
21
|
require "wrnap/global/parser"
|
21
22
|
require "wrnap/global/runner"
|
23
|
+
require "wrnap/etl/infernal"
|
24
|
+
require "wrnap/etl/stockholm"
|
22
25
|
require "wrnap/graphing/r"
|
23
26
|
require "wrnap/package/base"
|
24
27
|
|
data/wrnap.gemspec
CHANGED
@@ -28,5 +28,6 @@ Gem::Specification.new do |spec|
|
|
28
28
|
spec.add_runtime_dependency "rinruby", "~> 2.0"
|
29
29
|
spec.add_runtime_dependency "rroc", "~> 0.1"
|
30
30
|
spec.add_runtime_dependency "bio", "~> 1.4"
|
31
|
+
spec.add_runtime_dependency "bio-stockholm", "~> 0.0.1"
|
31
32
|
spec.add_runtime_dependency "entrez", "~> 0.5"
|
32
33
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wrnap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Evan Senter
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '1.4'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: bio-stockholm
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 0.0.1
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 0.0.1
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: entrez
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -135,6 +149,8 @@ files:
|
|
135
149
|
- README.md
|
136
150
|
- Rakefile
|
137
151
|
- lib/wrnap.rb
|
152
|
+
- lib/wrnap/etl/infernal.rb
|
153
|
+
- lib/wrnap/etl/stockholm.rb
|
138
154
|
- lib/wrnap/global/chainer.rb
|
139
155
|
- lib/wrnap/global/entrez.rb
|
140
156
|
- lib/wrnap/global/parser.rb
|