rbfam 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/modules/alignment.rb +60 -0
- data/lib/modules/family.rb +60 -0
- data/lib/modules/sequence.rb +95 -0
- data/lib/modules/utils.rb +34 -0
- data/lib/rbfam.rb +14 -0
- data/lib/scripts/sequences_in_mysql.rb +49 -0
- metadata +115 -0
@@ -0,0 +1,60 @@
|
|
1
|
+
module Rbfam
|
2
|
+
class Alignment
|
3
|
+
LINE_REGEXP = /^([\w\.]+)\/(\d+)\-(\d+)\s+([AUGC\.]+)$/
|
4
|
+
|
5
|
+
attr_reader :family, :seed
|
6
|
+
|
7
|
+
def initialize(family)
|
8
|
+
@family = family
|
9
|
+
end
|
10
|
+
|
11
|
+
def entries(alignment = :seed)
|
12
|
+
@parsed_entries ||= pull_from_server(alignment).split(/\n/).reject do |line|
|
13
|
+
line =~ /^#/
|
14
|
+
end.select do |line|
|
15
|
+
line =~ LINE_REGEXP
|
16
|
+
end.map(&method(:parse_line)).tap do
|
17
|
+
@seed = alignment == :seed
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def save_entries!
|
22
|
+
entries.each { |sequence| sequence.save!(seed: seed) }
|
23
|
+
end
|
24
|
+
|
25
|
+
def load_entries!(options = {})
|
26
|
+
Rbfam.script("sequences_in_mysql")
|
27
|
+
|
28
|
+
@parsed_entries = SequenceTable.where({ family: family.family_name }.merge(options)).map do |entry|
|
29
|
+
entry.to_rbfam_sequence(family)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def pull_from_server(alignment)
|
36
|
+
url = "http://rfam.sanger.ac.uk/family/alignment/download/format?acc=%s&alnType=%s&nseLabels=1&format=pfam&download=0" % [
|
37
|
+
family.family_name,
|
38
|
+
alignment
|
39
|
+
]
|
40
|
+
puts "GET: %s" % url unless @reponse
|
41
|
+
|
42
|
+
@response ||= if (party = HTTParty.get(url)).response.code == "200"
|
43
|
+
puts "RESPONSE: 200 OK"
|
44
|
+
party.parsed_response
|
45
|
+
else
|
46
|
+
raise RuntimeError.new("HTTParty raised the following error when retrieving family %s: %s %s" % [
|
47
|
+
family_name,
|
48
|
+
party.response.code,
|
49
|
+
party.response.message
|
50
|
+
])
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def parse_line(line)
|
55
|
+
line_match = line.match(LINE_REGEXP)
|
56
|
+
|
57
|
+
Rbfam::Sequence.new(family, line_match[1], line_match[2].to_i, line_match[3].to_i, autoload: { length: 300, extend: 3 })
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module Rbfam
|
2
|
+
class Family
|
3
|
+
attr_reader :family_name
|
4
|
+
|
5
|
+
class << self
|
6
|
+
def purine; new("RF00167"); end
|
7
|
+
def tpp; new("RF00059"); end
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(family_name)
|
11
|
+
@family_name = family_name
|
12
|
+
end
|
13
|
+
|
14
|
+
def alignment
|
15
|
+
Rbfam::Alignment.new(self)
|
16
|
+
end
|
17
|
+
|
18
|
+
def entries
|
19
|
+
@parsed_entries ||= pull_from_server.split(/\n/).reject { |line| line =~ /^#/ }.map(&method(:parse_line))
|
20
|
+
end
|
21
|
+
|
22
|
+
def load_entries!(options = {})
|
23
|
+
Rbfam.script("sequences_in_mysql")
|
24
|
+
|
25
|
+
@parsed_entries = SequenceTable.where({ family: family_name }.merge(options)).map do |entry|
|
26
|
+
entry.to_rbfam_sequence(self)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def save_entries!
|
31
|
+
entries.each(&:save!)
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def pull_from_server
|
37
|
+
# It isn't the greatest design pattern to memoize a block where a branch has unmanaged exception raising, but for my uses that should never
|
38
|
+
# happen and needs to blow up hard if it does.
|
39
|
+
url = "http://rfam.sanger.ac.uk/family/regions?entry=%s" % family_name
|
40
|
+
puts "GET: %s" % url unless @reponse
|
41
|
+
|
42
|
+
@reponse ||= if (party = HTTParty.get(url)).response.code == "200"
|
43
|
+
puts "RESPONSE: 200 OK"
|
44
|
+
party.parsed_response
|
45
|
+
else
|
46
|
+
raise RuntimeError.new("HTTParty raised the following error when retrieving family %s: %s %s" % [
|
47
|
+
family_name,
|
48
|
+
party.response.code,
|
49
|
+
party.response.message
|
50
|
+
])
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def parse_line(line)
|
55
|
+
split_line = line.split(/\t/)
|
56
|
+
|
57
|
+
Rbfam::Sequence.new(self, split_line[0], split_line[2].to_i, split_line[3].to_i, autoload: { length: 300, extend: 3 })
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
module Rbfam
|
2
|
+
class Sequence
|
3
|
+
attr_reader :family, :accession, :from, :to
|
4
|
+
|
5
|
+
def initialize(family, accession, from, to, options = {})
|
6
|
+
@family, @accession, @from, @to = family, accession, from, to
|
7
|
+
|
8
|
+
if options[:sequence]
|
9
|
+
@raw_sequence = options[:sequence]
|
10
|
+
end
|
11
|
+
|
12
|
+
if options[:autoload]
|
13
|
+
sequence(options[:autoload].is_a?(Hash) ? options[:autoload] : {})
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def save!(options = {})
|
18
|
+
Rbfam.script("sequences_in_mysql")
|
19
|
+
|
20
|
+
SequenceTable.create({
|
21
|
+
family: family.family_name,
|
22
|
+
accession: accession,
|
23
|
+
sequence: sequence,
|
24
|
+
sequence_length: sequence.length,
|
25
|
+
from: from,
|
26
|
+
to: to,
|
27
|
+
seq_from: up_coord + coord_window({ length: 300, extend: 3 }).min,
|
28
|
+
seq_to: up_coord + coord_window({ length: 300, extend: 3 }).max,
|
29
|
+
seed: options[:seed]
|
30
|
+
})
|
31
|
+
end
|
32
|
+
|
33
|
+
def up_coord
|
34
|
+
[from, to].min
|
35
|
+
end
|
36
|
+
|
37
|
+
def down_coord
|
38
|
+
[from, to].max
|
39
|
+
end
|
40
|
+
|
41
|
+
def strand
|
42
|
+
plus_strand? ? :plus : :minus
|
43
|
+
end
|
44
|
+
|
45
|
+
def plus_strand?
|
46
|
+
to > from
|
47
|
+
end
|
48
|
+
|
49
|
+
def minus_strand?
|
50
|
+
!plus_strand?
|
51
|
+
end
|
52
|
+
|
53
|
+
def sequence(options = {})
|
54
|
+
@raw_sequence ||= Rbfam::Utils.rna_sequence_from_entrez(accession, up_coord, coord_window(options))
|
55
|
+
@raw_sequence = minus_strand? ? @raw_sequence.complement : @raw_sequence
|
56
|
+
end
|
57
|
+
|
58
|
+
alias :seq :sequence
|
59
|
+
|
60
|
+
def mfe_structure
|
61
|
+
@mfe_structure ||= ViennaRna::Fold.run(seq).structure
|
62
|
+
end
|
63
|
+
|
64
|
+
def fftbor
|
65
|
+
@fftbor ||= ViennaRna::Fftbor.run(seq: seq, str: mfe_structure)
|
66
|
+
end
|
67
|
+
|
68
|
+
def coord_window(options = {})
|
69
|
+
range = 0..(down_coord - up_coord)
|
70
|
+
|
71
|
+
if options[:length] && options[:extend]
|
72
|
+
if range.count < options[:length]
|
73
|
+
length_difference = options[:length] - range.count
|
74
|
+
|
75
|
+
case [options[:extend], strand]
|
76
|
+
when [3, :plus], [5, :minus] then Range.new(range.min, range.max + length_difference)
|
77
|
+
when [5, :plus], [3, :minus] then Range.new(range.min - length_difference, range.max)
|
78
|
+
else puts "WARNING: value for :extend key in sequence retreival needs to be one of 5, 3 - found (%s)" % options[:extend]
|
79
|
+
end
|
80
|
+
else
|
81
|
+
puts "WARNING: %s %d-%d (%s) is length %d, but only %d nt. have been requested. Providing the full sequence anyways." % [
|
82
|
+
accession,
|
83
|
+
from,
|
84
|
+
to,
|
85
|
+
strand,
|
86
|
+
range.count,
|
87
|
+
options[:length]
|
88
|
+
]
|
89
|
+
end
|
90
|
+
else
|
91
|
+
range
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require "entrez"
|
2
|
+
|
3
|
+
module Rbfam
|
4
|
+
module Utils
|
5
|
+
class << self
|
6
|
+
def rna_sequence_from_entrez(id, position, window, buffer_size = 0)
|
7
|
+
na_sequence_from_entrez(id, position, window, buffer_size).rna
|
8
|
+
end
|
9
|
+
|
10
|
+
def na_sequence_from_entrez(id, position, window, buffer_size = 0)
|
11
|
+
Bio::Sequence::NA.new(sequence_from_entrez(id, position, Range.new(window.min - buffer_size, window.max + buffer_size)).seq)
|
12
|
+
end
|
13
|
+
|
14
|
+
def aa_sequence_from_entrez(id, position, window)
|
15
|
+
Bio::Sequence::AA.new(sequence_from_entrez(id, position, window).seq)
|
16
|
+
end
|
17
|
+
|
18
|
+
def sequence_from_entrez(id, position, window)
|
19
|
+
puts "Retrieving sequence from Entrez: using nuccore DB (id: #{id}, seq_start: #{position + window.min}, seq_stop: #{position + window.max})"
|
20
|
+
puts "> True starting position: #{position} with window #{window.min} to #{window.max}"
|
21
|
+
|
22
|
+
fasta = Entrez.EFetch("nuccore", {
|
23
|
+
id: id,
|
24
|
+
seq_start: position + window.min,
|
25
|
+
seq_stop: position + window.max,
|
26
|
+
retmode: :fasta,
|
27
|
+
rettype: :text
|
28
|
+
}).response.body
|
29
|
+
|
30
|
+
Bio::FastaFormat.new(fasta)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/rbfam.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require "bio"
|
2
|
+
require "vienna_rna"
|
3
|
+
require "httparty"
|
4
|
+
require "active_support/inflector"
|
5
|
+
|
6
|
+
module Rbfam
|
7
|
+
Dir[File.join(File.dirname(__FILE__), "/modules/*")].each do |file|
|
8
|
+
autoload File.basename(file, ".rb").camelize.to_sym, file
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.script(name)
|
12
|
+
require File.dirname(__FILE__) + "/scripts/#{File.basename(name, '.rb')}.rb"
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require "mysql2"
|
2
|
+
require "active_record"
|
3
|
+
|
4
|
+
class Object; def this; self; end; end
|
5
|
+
|
6
|
+
class SequenceTable < ActiveRecord::Base
|
7
|
+
self.table_name = "sequences"
|
8
|
+
|
9
|
+
validates_uniqueness_of :accession, scope: [:seq_from, :seq_to]
|
10
|
+
|
11
|
+
def self.connect
|
12
|
+
ActiveRecord::Base.establish_connection(config = { adapter: "mysql2", username: "root", reconnect: true })
|
13
|
+
|
14
|
+
unless ActiveRecord::Base.connection.execute("show databases").map { |i| i }.flatten.include?("rbfam")
|
15
|
+
ActiveRecord::Base.connection.create_database("rbfam")
|
16
|
+
end
|
17
|
+
|
18
|
+
ActiveRecord::Base.establish_connection(config.merge(database: "rbfam"))
|
19
|
+
|
20
|
+
inline_rails if defined?(inline_rails)
|
21
|
+
end
|
22
|
+
|
23
|
+
def to_rbfam_sequence(family)
|
24
|
+
Rbfam::Sequence.new(family, accession, from, to, sequence: sequence)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
SequenceTable.connect
|
29
|
+
|
30
|
+
class BuildSequence < ActiveRecord::Migration
|
31
|
+
def self.up
|
32
|
+
create_table :sequences do |table|
|
33
|
+
table.string :family
|
34
|
+
table.string :accession
|
35
|
+
table.text :sequence
|
36
|
+
table.integer :sequence_length
|
37
|
+
table.integer :from
|
38
|
+
table.integer :to
|
39
|
+
table.integer :seq_from
|
40
|
+
table.integer :seq_to
|
41
|
+
table.boolean :seed, default: false
|
42
|
+
table.timestamps
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
unless ActiveRecord::Base.connection.execute("show tables").map(&:this).flatten.include?("sequences")
|
48
|
+
BuildSequence.up
|
49
|
+
end
|
metadata
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rbfam
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Evan Senter
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-07-06 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bio
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.4.2
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.4.2
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: entrez
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 0.5.8.1
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 0.5.8.1
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: httparty
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 0.8.3
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.8.3
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: vienna_rna
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 0.1.3
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 0.1.3
|
78
|
+
description: Light wrapper for RFam data in Ruby.
|
79
|
+
email: evansenter@gmail.com
|
80
|
+
executables: []
|
81
|
+
extensions: []
|
82
|
+
extra_rdoc_files: []
|
83
|
+
files:
|
84
|
+
- ./lib/modules/alignment.rb
|
85
|
+
- ./lib/modules/family.rb
|
86
|
+
- ./lib/modules/sequence.rb
|
87
|
+
- ./lib/modules/utils.rb
|
88
|
+
- ./lib/rbfam.rb
|
89
|
+
- ./lib/scripts/sequences_in_mysql.rb
|
90
|
+
homepage: http://rubygems.org/gems/rbfam
|
91
|
+
licenses: []
|
92
|
+
post_install_message:
|
93
|
+
rdoc_options: []
|
94
|
+
require_paths:
|
95
|
+
- lib
|
96
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
103
|
+
none: false
|
104
|
+
requirements:
|
105
|
+
- - ! '>='
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0'
|
108
|
+
requirements: []
|
109
|
+
rubyforge_project:
|
110
|
+
rubygems_version: 1.8.24
|
111
|
+
signing_key:
|
112
|
+
specification_version: 3
|
113
|
+
summary: Bindings to Rfam.
|
114
|
+
test_files: []
|
115
|
+
has_rdoc:
|