rbfam 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/modules/alignment.rb +60 -0
- data/lib/modules/family.rb +60 -0
- data/lib/modules/sequence.rb +95 -0
- data/lib/modules/utils.rb +34 -0
- data/lib/rbfam.rb +14 -0
- data/lib/scripts/sequences_in_mysql.rb +49 -0
- metadata +115 -0
@@ -0,0 +1,60 @@
|
|
1
|
+
module Rbfam
|
2
|
+
class Alignment
|
3
|
+
LINE_REGEXP = /^([\w\.]+)\/(\d+)\-(\d+)\s+([AUGC\.]+)$/
|
4
|
+
|
5
|
+
attr_reader :family, :seed
|
6
|
+
|
7
|
+
def initialize(family)
|
8
|
+
@family = family
|
9
|
+
end
|
10
|
+
|
11
|
+
def entries(alignment = :seed)
|
12
|
+
@parsed_entries ||= pull_from_server(alignment).split(/\n/).reject do |line|
|
13
|
+
line =~ /^#/
|
14
|
+
end.select do |line|
|
15
|
+
line =~ LINE_REGEXP
|
16
|
+
end.map(&method(:parse_line)).tap do
|
17
|
+
@seed = alignment == :seed
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def save_entries!
|
22
|
+
entries.each { |sequence| sequence.save!(seed: seed) }
|
23
|
+
end
|
24
|
+
|
25
|
+
def load_entries!(options = {})
|
26
|
+
Rbfam.script("sequences_in_mysql")
|
27
|
+
|
28
|
+
@parsed_entries = SequenceTable.where({ family: family.family_name }.merge(options)).map do |entry|
|
29
|
+
entry.to_rbfam_sequence(family)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def pull_from_server(alignment)
|
36
|
+
url = "http://rfam.sanger.ac.uk/family/alignment/download/format?acc=%s&alnType=%s&nseLabels=1&format=pfam&download=0" % [
|
37
|
+
family.family_name,
|
38
|
+
alignment
|
39
|
+
]
|
40
|
+
puts "GET: %s" % url unless @reponse
|
41
|
+
|
42
|
+
@response ||= if (party = HTTParty.get(url)).response.code == "200"
|
43
|
+
puts "RESPONSE: 200 OK"
|
44
|
+
party.parsed_response
|
45
|
+
else
|
46
|
+
raise RuntimeError.new("HTTParty raised the following error when retrieving family %s: %s %s" % [
|
47
|
+
family_name,
|
48
|
+
party.response.code,
|
49
|
+
party.response.message
|
50
|
+
])
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def parse_line(line)
|
55
|
+
line_match = line.match(LINE_REGEXP)
|
56
|
+
|
57
|
+
Rbfam::Sequence.new(family, line_match[1], line_match[2].to_i, line_match[3].to_i, autoload: { length: 300, extend: 3 })
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module Rbfam
|
2
|
+
class Family
|
3
|
+
attr_reader :family_name
|
4
|
+
|
5
|
+
class << self
|
6
|
+
def purine; new("RF00167"); end
|
7
|
+
def tpp; new("RF00059"); end
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(family_name)
|
11
|
+
@family_name = family_name
|
12
|
+
end
|
13
|
+
|
14
|
+
def alignment
|
15
|
+
Rbfam::Alignment.new(self)
|
16
|
+
end
|
17
|
+
|
18
|
+
def entries
|
19
|
+
@parsed_entries ||= pull_from_server.split(/\n/).reject { |line| line =~ /^#/ }.map(&method(:parse_line))
|
20
|
+
end
|
21
|
+
|
22
|
+
def load_entries!(options = {})
|
23
|
+
Rbfam.script("sequences_in_mysql")
|
24
|
+
|
25
|
+
@parsed_entries = SequenceTable.where({ family: family_name }.merge(options)).map do |entry|
|
26
|
+
entry.to_rbfam_sequence(self)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def save_entries!
|
31
|
+
entries.each(&:save!)
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def pull_from_server
|
37
|
+
# It isn't the greatest design pattern to memoize a block where a branch has unmanaged exception raising, but for my uses that should never
|
38
|
+
# happen and needs to blow up hard if it does.
|
39
|
+
url = "http://rfam.sanger.ac.uk/family/regions?entry=%s" % family_name
|
40
|
+
puts "GET: %s" % url unless @reponse
|
41
|
+
|
42
|
+
@reponse ||= if (party = HTTParty.get(url)).response.code == "200"
|
43
|
+
puts "RESPONSE: 200 OK"
|
44
|
+
party.parsed_response
|
45
|
+
else
|
46
|
+
raise RuntimeError.new("HTTParty raised the following error when retrieving family %s: %s %s" % [
|
47
|
+
family_name,
|
48
|
+
party.response.code,
|
49
|
+
party.response.message
|
50
|
+
])
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def parse_line(line)
|
55
|
+
split_line = line.split(/\t/)
|
56
|
+
|
57
|
+
Rbfam::Sequence.new(self, split_line[0], split_line[2].to_i, split_line[3].to_i, autoload: { length: 300, extend: 3 })
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
module Rbfam
|
2
|
+
class Sequence
|
3
|
+
attr_reader :family, :accession, :from, :to
|
4
|
+
|
5
|
+
def initialize(family, accession, from, to, options = {})
|
6
|
+
@family, @accession, @from, @to = family, accession, from, to
|
7
|
+
|
8
|
+
if options[:sequence]
|
9
|
+
@raw_sequence = options[:sequence]
|
10
|
+
end
|
11
|
+
|
12
|
+
if options[:autoload]
|
13
|
+
sequence(options[:autoload].is_a?(Hash) ? options[:autoload] : {})
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def save!(options = {})
|
18
|
+
Rbfam.script("sequences_in_mysql")
|
19
|
+
|
20
|
+
SequenceTable.create({
|
21
|
+
family: family.family_name,
|
22
|
+
accession: accession,
|
23
|
+
sequence: sequence,
|
24
|
+
sequence_length: sequence.length,
|
25
|
+
from: from,
|
26
|
+
to: to,
|
27
|
+
seq_from: up_coord + coord_window({ length: 300, extend: 3 }).min,
|
28
|
+
seq_to: up_coord + coord_window({ length: 300, extend: 3 }).max,
|
29
|
+
seed: options[:seed]
|
30
|
+
})
|
31
|
+
end
|
32
|
+
|
33
|
+
def up_coord
|
34
|
+
[from, to].min
|
35
|
+
end
|
36
|
+
|
37
|
+
def down_coord
|
38
|
+
[from, to].max
|
39
|
+
end
|
40
|
+
|
41
|
+
def strand
|
42
|
+
plus_strand? ? :plus : :minus
|
43
|
+
end
|
44
|
+
|
45
|
+
def plus_strand?
|
46
|
+
to > from
|
47
|
+
end
|
48
|
+
|
49
|
+
def minus_strand?
|
50
|
+
!plus_strand?
|
51
|
+
end
|
52
|
+
|
53
|
+
def sequence(options = {})
|
54
|
+
@raw_sequence ||= Rbfam::Utils.rna_sequence_from_entrez(accession, up_coord, coord_window(options))
|
55
|
+
@raw_sequence = minus_strand? ? @raw_sequence.complement : @raw_sequence
|
56
|
+
end
|
57
|
+
|
58
|
+
alias :seq :sequence
|
59
|
+
|
60
|
+
def mfe_structure
|
61
|
+
@mfe_structure ||= ViennaRna::Fold.run(seq).structure
|
62
|
+
end
|
63
|
+
|
64
|
+
def fftbor
|
65
|
+
@fftbor ||= ViennaRna::Fftbor.run(seq: seq, str: mfe_structure)
|
66
|
+
end
|
67
|
+
|
68
|
+
def coord_window(options = {})
|
69
|
+
range = 0..(down_coord - up_coord)
|
70
|
+
|
71
|
+
if options[:length] && options[:extend]
|
72
|
+
if range.count < options[:length]
|
73
|
+
length_difference = options[:length] - range.count
|
74
|
+
|
75
|
+
case [options[:extend], strand]
|
76
|
+
when [3, :plus], [5, :minus] then Range.new(range.min, range.max + length_difference)
|
77
|
+
when [5, :plus], [3, :minus] then Range.new(range.min - length_difference, range.max)
|
78
|
+
else puts "WARNING: value for :extend key in sequence retreival needs to be one of 5, 3 - found (%s)" % options[:extend]
|
79
|
+
end
|
80
|
+
else
|
81
|
+
puts "WARNING: %s %d-%d (%s) is length %d, but only %d nt. have been requested. Providing the full sequence anyways." % [
|
82
|
+
accession,
|
83
|
+
from,
|
84
|
+
to,
|
85
|
+
strand,
|
86
|
+
range.count,
|
87
|
+
options[:length]
|
88
|
+
]
|
89
|
+
end
|
90
|
+
else
|
91
|
+
range
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require "entrez"
|
2
|
+
|
3
|
+
module Rbfam
|
4
|
+
module Utils
|
5
|
+
class << self
|
6
|
+
def rna_sequence_from_entrez(id, position, window, buffer_size = 0)
|
7
|
+
na_sequence_from_entrez(id, position, window, buffer_size).rna
|
8
|
+
end
|
9
|
+
|
10
|
+
def na_sequence_from_entrez(id, position, window, buffer_size = 0)
|
11
|
+
Bio::Sequence::NA.new(sequence_from_entrez(id, position, Range.new(window.min - buffer_size, window.max + buffer_size)).seq)
|
12
|
+
end
|
13
|
+
|
14
|
+
def aa_sequence_from_entrez(id, position, window)
|
15
|
+
Bio::Sequence::AA.new(sequence_from_entrez(id, position, window).seq)
|
16
|
+
end
|
17
|
+
|
18
|
+
def sequence_from_entrez(id, position, window)
|
19
|
+
puts "Retrieving sequence from Entrez: using nuccore DB (id: #{id}, seq_start: #{position + window.min}, seq_stop: #{position + window.max})"
|
20
|
+
puts "> True starting position: #{position} with window #{window.min} to #{window.max}"
|
21
|
+
|
22
|
+
fasta = Entrez.EFetch("nuccore", {
|
23
|
+
id: id,
|
24
|
+
seq_start: position + window.min,
|
25
|
+
seq_stop: position + window.max,
|
26
|
+
retmode: :fasta,
|
27
|
+
rettype: :text
|
28
|
+
}).response.body
|
29
|
+
|
30
|
+
Bio::FastaFormat.new(fasta)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/rbfam.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require "bio"
|
2
|
+
require "vienna_rna"
|
3
|
+
require "httparty"
|
4
|
+
require "active_support/inflector"
|
5
|
+
|
6
|
+
module Rbfam
|
7
|
+
Dir[File.join(File.dirname(__FILE__), "/modules/*")].each do |file|
|
8
|
+
autoload File.basename(file, ".rb").camelize.to_sym, file
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.script(name)
|
12
|
+
require File.dirname(__FILE__) + "/scripts/#{File.basename(name, '.rb')}.rb"
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require "mysql2"
|
2
|
+
require "active_record"
|
3
|
+
|
4
|
+
class Object; def this; self; end; end
|
5
|
+
|
6
|
+
class SequenceTable < ActiveRecord::Base
|
7
|
+
self.table_name = "sequences"
|
8
|
+
|
9
|
+
validates_uniqueness_of :accession, scope: [:seq_from, :seq_to]
|
10
|
+
|
11
|
+
def self.connect
|
12
|
+
ActiveRecord::Base.establish_connection(config = { adapter: "mysql2", username: "root", reconnect: true })
|
13
|
+
|
14
|
+
unless ActiveRecord::Base.connection.execute("show databases").map { |i| i }.flatten.include?("rbfam")
|
15
|
+
ActiveRecord::Base.connection.create_database("rbfam")
|
16
|
+
end
|
17
|
+
|
18
|
+
ActiveRecord::Base.establish_connection(config.merge(database: "rbfam"))
|
19
|
+
|
20
|
+
inline_rails if defined?(inline_rails)
|
21
|
+
end
|
22
|
+
|
23
|
+
def to_rbfam_sequence(family)
|
24
|
+
Rbfam::Sequence.new(family, accession, from, to, sequence: sequence)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
SequenceTable.connect
|
29
|
+
|
30
|
+
class BuildSequence < ActiveRecord::Migration
|
31
|
+
def self.up
|
32
|
+
create_table :sequences do |table|
|
33
|
+
table.string :family
|
34
|
+
table.string :accession
|
35
|
+
table.text :sequence
|
36
|
+
table.integer :sequence_length
|
37
|
+
table.integer :from
|
38
|
+
table.integer :to
|
39
|
+
table.integer :seq_from
|
40
|
+
table.integer :seq_to
|
41
|
+
table.boolean :seed, default: false
|
42
|
+
table.timestamps
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
unless ActiveRecord::Base.connection.execute("show tables").map(&:this).flatten.include?("sequences")
|
48
|
+
BuildSequence.up
|
49
|
+
end
|
metadata
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rbfam
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Evan Senter
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-07-06 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bio
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.4.2
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.4.2
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: entrez
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 0.5.8.1
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 0.5.8.1
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: httparty
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 0.8.3
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.8.3
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: vienna_rna
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 0.1.3
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 0.1.3
|
78
|
+
description: Light wrapper for RFam data in Ruby.
|
79
|
+
email: evansenter@gmail.com
|
80
|
+
executables: []
|
81
|
+
extensions: []
|
82
|
+
extra_rdoc_files: []
|
83
|
+
files:
|
84
|
+
- ./lib/modules/alignment.rb
|
85
|
+
- ./lib/modules/family.rb
|
86
|
+
- ./lib/modules/sequence.rb
|
87
|
+
- ./lib/modules/utils.rb
|
88
|
+
- ./lib/rbfam.rb
|
89
|
+
- ./lib/scripts/sequences_in_mysql.rb
|
90
|
+
homepage: http://rubygems.org/gems/rbfam
|
91
|
+
licenses: []
|
92
|
+
post_install_message:
|
93
|
+
rdoc_options: []
|
94
|
+
require_paths:
|
95
|
+
- lib
|
96
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
103
|
+
none: false
|
104
|
+
requirements:
|
105
|
+
- - ! '>='
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0'
|
108
|
+
requirements: []
|
109
|
+
rubyforge_project:
|
110
|
+
rubygems_version: 1.8.24
|
111
|
+
signing_key:
|
112
|
+
specification_version: 3
|
113
|
+
summary: Bindings to Rfam.
|
114
|
+
test_files: []
|
115
|
+
has_rdoc:
|