rbfam 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,60 @@
1
+ module Rbfam
2
+ class Alignment
3
+ LINE_REGEXP = /^([\w\.]+)\/(\d+)\-(\d+)\s+([AUGC\.]+)$/
4
+
5
+ attr_reader :family, :seed
6
+
7
+ def initialize(family)
8
+ @family = family
9
+ end
10
+
11
+ def entries(alignment = :seed)
12
+ @parsed_entries ||= pull_from_server(alignment).split(/\n/).reject do |line|
13
+ line =~ /^#/
14
+ end.select do |line|
15
+ line =~ LINE_REGEXP
16
+ end.map(&method(:parse_line)).tap do
17
+ @seed = alignment == :seed
18
+ end
19
+ end
20
+
21
+ def save_entries!
22
+ entries.each { |sequence| sequence.save!(seed: seed) }
23
+ end
24
+
25
+ def load_entries!(options = {})
26
+ Rbfam.script("sequences_in_mysql")
27
+
28
+ @parsed_entries = SequenceTable.where({ family: family.family_name }.merge(options)).map do |entry|
29
+ entry.to_rbfam_sequence(family)
30
+ end
31
+ end
32
+
33
+ private
34
+
35
+ def pull_from_server(alignment)
36
+ url = "http://rfam.sanger.ac.uk/family/alignment/download/format?acc=%s&alnType=%s&nseLabels=1&format=pfam&download=0" % [
37
+ family.family_name,
38
+ alignment
39
+ ]
40
+ puts "GET: %s" % url unless @reponse
41
+
42
+ @response ||= if (party = HTTParty.get(url)).response.code == "200"
43
+ puts "RESPONSE: 200 OK"
44
+ party.parsed_response
45
+ else
46
+ raise RuntimeError.new("HTTParty raised the following error when retrieving family %s: %s %s" % [
47
+ family_name,
48
+ party.response.code,
49
+ party.response.message
50
+ ])
51
+ end
52
+ end
53
+
54
+ def parse_line(line)
55
+ line_match = line.match(LINE_REGEXP)
56
+
57
+ Rbfam::Sequence.new(family, line_match[1], line_match[2].to_i, line_match[3].to_i, autoload: { length: 300, extend: 3 })
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,60 @@
1
+ module Rbfam
2
+ class Family
3
+ attr_reader :family_name
4
+
5
+ class << self
6
+ def purine; new("RF00167"); end
7
+ def tpp; new("RF00059"); end
8
+ end
9
+
10
+ def initialize(family_name)
11
+ @family_name = family_name
12
+ end
13
+
14
+ def alignment
15
+ Rbfam::Alignment.new(self)
16
+ end
17
+
18
+ def entries
19
+ @parsed_entries ||= pull_from_server.split(/\n/).reject { |line| line =~ /^#/ }.map(&method(:parse_line))
20
+ end
21
+
22
+ def load_entries!(options = {})
23
+ Rbfam.script("sequences_in_mysql")
24
+
25
+ @parsed_entries = SequenceTable.where({ family: family_name }.merge(options)).map do |entry|
26
+ entry.to_rbfam_sequence(self)
27
+ end
28
+ end
29
+
30
+ def save_entries!
31
+ entries.each(&:save!)
32
+ end
33
+
34
+ private
35
+
36
+ def pull_from_server
37
+ # It isn't the greatest design pattern to memoize a block where a branch has unmanaged exception raising, but for my uses that should never
38
+ # happen and needs to blow up hard if it does.
39
+ url = "http://rfam.sanger.ac.uk/family/regions?entry=%s" % family_name
40
+ puts "GET: %s" % url unless @reponse
41
+
42
+ @reponse ||= if (party = HTTParty.get(url)).response.code == "200"
43
+ puts "RESPONSE: 200 OK"
44
+ party.parsed_response
45
+ else
46
+ raise RuntimeError.new("HTTParty raised the following error when retrieving family %s: %s %s" % [
47
+ family_name,
48
+ party.response.code,
49
+ party.response.message
50
+ ])
51
+ end
52
+ end
53
+
54
+ def parse_line(line)
55
+ split_line = line.split(/\t/)
56
+
57
+ Rbfam::Sequence.new(self, split_line[0], split_line[2].to_i, split_line[3].to_i, autoload: { length: 300, extend: 3 })
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,95 @@
1
+ module Rbfam
2
+ class Sequence
3
+ attr_reader :family, :accession, :from, :to
4
+
5
+ def initialize(family, accession, from, to, options = {})
6
+ @family, @accession, @from, @to = family, accession, from, to
7
+
8
+ if options[:sequence]
9
+ @raw_sequence = options[:sequence]
10
+ end
11
+
12
+ if options[:autoload]
13
+ sequence(options[:autoload].is_a?(Hash) ? options[:autoload] : {})
14
+ end
15
+ end
16
+
17
+ def save!(options = {})
18
+ Rbfam.script("sequences_in_mysql")
19
+
20
+ SequenceTable.create({
21
+ family: family.family_name,
22
+ accession: accession,
23
+ sequence: sequence,
24
+ sequence_length: sequence.length,
25
+ from: from,
26
+ to: to,
27
+ seq_from: up_coord + coord_window({ length: 300, extend: 3 }).min,
28
+ seq_to: up_coord + coord_window({ length: 300, extend: 3 }).max,
29
+ seed: options[:seed]
30
+ })
31
+ end
32
+
33
+ def up_coord
34
+ [from, to].min
35
+ end
36
+
37
+ def down_coord
38
+ [from, to].max
39
+ end
40
+
41
+ def strand
42
+ plus_strand? ? :plus : :minus
43
+ end
44
+
45
+ def plus_strand?
46
+ to > from
47
+ end
48
+
49
+ def minus_strand?
50
+ !plus_strand?
51
+ end
52
+
53
+ def sequence(options = {})
54
+ @raw_sequence ||= Rbfam::Utils.rna_sequence_from_entrez(accession, up_coord, coord_window(options))
55
+ @raw_sequence = minus_strand? ? @raw_sequence.complement : @raw_sequence
56
+ end
57
+
58
+ alias :seq :sequence
59
+
60
+ def mfe_structure
61
+ @mfe_structure ||= ViennaRna::Fold.run(seq).structure
62
+ end
63
+
64
+ def fftbor
65
+ @fftbor ||= ViennaRna::Fftbor.run(seq: seq, str: mfe_structure)
66
+ end
67
+
68
+ def coord_window(options = {})
69
+ range = 0..(down_coord - up_coord)
70
+
71
+ if options[:length] && options[:extend]
72
+ if range.count < options[:length]
73
+ length_difference = options[:length] - range.count
74
+
75
+ case [options[:extend], strand]
76
+ when [3, :plus], [5, :minus] then Range.new(range.min, range.max + length_difference)
77
+ when [5, :plus], [3, :minus] then Range.new(range.min - length_difference, range.max)
78
+ else puts "WARNING: value for :extend key in sequence retreival needs to be one of 5, 3 - found (%s)" % options[:extend]
79
+ end
80
+ else
81
+ puts "WARNING: %s %d-%d (%s) is length %d, but only %d nt. have been requested. Providing the full sequence anyways." % [
82
+ accession,
83
+ from,
84
+ to,
85
+ strand,
86
+ range.count,
87
+ options[:length]
88
+ ]
89
+ end
90
+ else
91
+ range
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,34 @@
1
+ require "entrez"
2
+
3
+ module Rbfam
4
+ module Utils
5
+ class << self
6
+ def rna_sequence_from_entrez(id, position, window, buffer_size = 0)
7
+ na_sequence_from_entrez(id, position, window, buffer_size).rna
8
+ end
9
+
10
+ def na_sequence_from_entrez(id, position, window, buffer_size = 0)
11
+ Bio::Sequence::NA.new(sequence_from_entrez(id, position, Range.new(window.min - buffer_size, window.max + buffer_size)).seq)
12
+ end
13
+
14
+ def aa_sequence_from_entrez(id, position, window)
15
+ Bio::Sequence::AA.new(sequence_from_entrez(id, position, window).seq)
16
+ end
17
+
18
+ def sequence_from_entrez(id, position, window)
19
+ puts "Retrieving sequence from Entrez: using nuccore DB (id: #{id}, seq_start: #{position + window.min}, seq_stop: #{position + window.max})"
20
+ puts "> True starting position: #{position} with window #{window.min} to #{window.max}"
21
+
22
+ fasta = Entrez.EFetch("nuccore", {
23
+ id: id,
24
+ seq_start: position + window.min,
25
+ seq_stop: position + window.max,
26
+ retmode: :fasta,
27
+ rettype: :text
28
+ }).response.body
29
+
30
+ Bio::FastaFormat.new(fasta)
31
+ end
32
+ end
33
+ end
34
+ end
data/lib/rbfam.rb ADDED
@@ -0,0 +1,14 @@
1
+ require "bio"
2
+ require "vienna_rna"
3
+ require "httparty"
4
+ require "active_support/inflector"
5
+
6
+ module Rbfam
7
+ Dir[File.join(File.dirname(__FILE__), "/modules/*")].each do |file|
8
+ autoload File.basename(file, ".rb").camelize.to_sym, file
9
+ end
10
+
11
+ def self.script(name)
12
+ require File.dirname(__FILE__) + "/scripts/#{File.basename(name, '.rb')}.rb"
13
+ end
14
+ end
@@ -0,0 +1,49 @@
1
+ require "mysql2"
2
+ require "active_record"
3
+
4
+ class Object; def this; self; end; end
5
+
6
+ class SequenceTable < ActiveRecord::Base
7
+ self.table_name = "sequences"
8
+
9
+ validates_uniqueness_of :accession, scope: [:seq_from, :seq_to]
10
+
11
+ def self.connect
12
+ ActiveRecord::Base.establish_connection(config = { adapter: "mysql2", username: "root", reconnect: true })
13
+
14
+ unless ActiveRecord::Base.connection.execute("show databases").map { |i| i }.flatten.include?("rbfam")
15
+ ActiveRecord::Base.connection.create_database("rbfam")
16
+ end
17
+
18
+ ActiveRecord::Base.establish_connection(config.merge(database: "rbfam"))
19
+
20
+ inline_rails if defined?(inline_rails)
21
+ end
22
+
23
+ def to_rbfam_sequence(family)
24
+ Rbfam::Sequence.new(family, accession, from, to, sequence: sequence)
25
+ end
26
+ end
27
+
28
+ SequenceTable.connect
29
+
30
+ class BuildSequence < ActiveRecord::Migration
31
+ def self.up
32
+ create_table :sequences do |table|
33
+ table.string :family
34
+ table.string :accession
35
+ table.text :sequence
36
+ table.integer :sequence_length
37
+ table.integer :from
38
+ table.integer :to
39
+ table.integer :seq_from
40
+ table.integer :seq_to
41
+ table.boolean :seed, default: false
42
+ table.timestamps
43
+ end
44
+ end
45
+ end
46
+
47
+ unless ActiveRecord::Base.connection.execute("show tables").map(&:this).flatten.include?("sequences")
48
+ BuildSequence.up
49
+ end
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rbfam
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Evan Senter
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-07-06 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bio
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 1.4.2
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.2
30
+ - !ruby/object:Gem::Dependency
31
+ name: entrez
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: 0.5.8.1
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: 0.5.8.1
46
+ - !ruby/object:Gem::Dependency
47
+ name: httparty
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: 0.8.3
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: 0.8.3
62
+ - !ruby/object:Gem::Dependency
63
+ name: vienna_rna
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: 0.1.3
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: 0.1.3
78
+ description: Light wrapper for RFam data in Ruby.
79
+ email: evansenter@gmail.com
80
+ executables: []
81
+ extensions: []
82
+ extra_rdoc_files: []
83
+ files:
84
+ - ./lib/modules/alignment.rb
85
+ - ./lib/modules/family.rb
86
+ - ./lib/modules/sequence.rb
87
+ - ./lib/modules/utils.rb
88
+ - ./lib/rbfam.rb
89
+ - ./lib/scripts/sequences_in_mysql.rb
90
+ homepage: http://rubygems.org/gems/rbfam
91
+ licenses: []
92
+ post_install_message:
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ none: false
104
+ requirements:
105
+ - - ! '>='
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ requirements: []
109
+ rubyforge_project:
110
+ rubygems_version: 1.8.24
111
+ signing_key:
112
+ specification_version: 3
113
+ summary: Bindings to Rfam.
114
+ test_files: []
115
+ has_rdoc: