rbfam 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +19 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +95 -0
- data/Rakefile +3 -0
- data/db/config.yml +5 -0
- data/db/migrate/20140114232757_add_alignment_table.rb +14 -0
- data/db/migrate/20140114232805_add_sequence_table.rb +17 -0
- data/db/migrate/20140114232810_add_family_table.rb +15 -0
- data/db/schema.rb +44 -0
- data/lib/rbfam.rb +10 -3
- data/lib/rbfam/modules/alignment.rb +15 -17
- data/lib/rbfam/modules/family.rb +17 -19
- data/lib/rbfam/modules/sequence.rb +4 -14
- data/lib/rbfam/scripts/sequences_in_mysql.rb +38 -21
- data/lib/rbfam/version.rb +3 -0
- data/load_rbfam_seed.rb +28 -0
- data/rbfam.gemspec +34 -0
- data/rfam_12.seed.utf8 +139884 -0
- metadata +169 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a266530dd38f3e5f9505a9f4ed0ce6ca4ccdea89
|
4
|
+
data.tar.gz: 2d0376d8ba04b9afe22a7d434f75d9e8bc17a4d2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e1534185f816b75dc489fd5b92bb37f5e133a1cf2c6f9c7dbb65fda8184bb8cd7d7ffb5b40e6ca10a608b75355a209246ad41843e9def666f8c9a07422d846a7
|
7
|
+
data.tar.gz: cbdf33c78a5a248a5eb59a02a39affaa6b3ed66fd51651a68e8ac0db18c2ed22157b891a4a2a5c16d37fff48a4c0a8d20a37d008b5e2da251c16639f914402af
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
rbfam (0.2.0)
|
5
|
+
activerecord
|
6
|
+
activesupport
|
7
|
+
bio (~> 1.4, >= 1.4.2)
|
8
|
+
bio-stockholm (~> 0.0, >= 0.0.1)
|
9
|
+
entrez (~> 0.5, >= 0.5.8.1)
|
10
|
+
httparty (~> 0.8, >= 0.8.3)
|
11
|
+
mysql2 (~> 0.3, >= 0.3.14)
|
12
|
+
nokogiri (~> 1.6, >= 1.6.1)
|
13
|
+
parallel (~> 1.3, >= 1.3.2)
|
14
|
+
|
15
|
+
GEM
|
16
|
+
remote: https://rubygems.org/
|
17
|
+
specs:
|
18
|
+
actionpack (3.2.21)
|
19
|
+
activemodel (= 3.2.21)
|
20
|
+
activesupport (= 3.2.21)
|
21
|
+
builder (~> 3.0.0)
|
22
|
+
erubis (~> 2.7.0)
|
23
|
+
journey (~> 1.0.4)
|
24
|
+
rack (~> 1.4.5)
|
25
|
+
rack-cache (~> 1.2)
|
26
|
+
rack-test (~> 0.6.1)
|
27
|
+
sprockets (~> 2.2.1)
|
28
|
+
activemodel (3.2.21)
|
29
|
+
activesupport (= 3.2.21)
|
30
|
+
builder (~> 3.0.0)
|
31
|
+
activerecord (3.2.21)
|
32
|
+
activemodel (= 3.2.21)
|
33
|
+
activesupport (= 3.2.21)
|
34
|
+
arel (~> 3.0.2)
|
35
|
+
tzinfo (~> 0.3.29)
|
36
|
+
activesupport (3.2.21)
|
37
|
+
i18n (~> 0.6, >= 0.6.4)
|
38
|
+
multi_json (~> 1.0)
|
39
|
+
arel (3.0.3)
|
40
|
+
bio (1.4.3.0001)
|
41
|
+
bio-stockholm (0.0.1)
|
42
|
+
builder (3.0.4)
|
43
|
+
entrez (0.5.8.1)
|
44
|
+
httparty
|
45
|
+
erubis (2.7.0)
|
46
|
+
hike (1.2.3)
|
47
|
+
httparty (0.13.3)
|
48
|
+
json (~> 1.8)
|
49
|
+
multi_xml (>= 0.5.2)
|
50
|
+
i18n (0.6.11)
|
51
|
+
journey (1.0.4)
|
52
|
+
json (1.8.1)
|
53
|
+
mini_portile (0.6.1)
|
54
|
+
multi_json (1.10.1)
|
55
|
+
multi_xml (0.5.5)
|
56
|
+
mysql2 (0.3.17)
|
57
|
+
nokogiri (1.6.5)
|
58
|
+
mini_portile (~> 0.6.0)
|
59
|
+
parallel (1.3.3)
|
60
|
+
rack (1.4.5)
|
61
|
+
rack-cache (1.2)
|
62
|
+
rack (>= 0.4)
|
63
|
+
rack-ssl (1.3.4)
|
64
|
+
rack
|
65
|
+
rack-test (0.6.2)
|
66
|
+
rack (>= 1.0)
|
67
|
+
railties (3.2.21)
|
68
|
+
actionpack (= 3.2.21)
|
69
|
+
activesupport (= 3.2.21)
|
70
|
+
rack-ssl (~> 1.3.2)
|
71
|
+
rake (>= 0.8.7)
|
72
|
+
rdoc (~> 3.4)
|
73
|
+
thor (>= 0.14.6, < 2.0)
|
74
|
+
rake (10.4.2)
|
75
|
+
rdoc (3.12.2)
|
76
|
+
json (~> 1.4)
|
77
|
+
sprockets (2.2.3)
|
78
|
+
hike (~> 1.2)
|
79
|
+
multi_json (~> 1.0)
|
80
|
+
rack (~> 1.0)
|
81
|
+
tilt (~> 1.1, != 1.3.0)
|
82
|
+
standalone_migrations (2.1.5)
|
83
|
+
activerecord (~> 3.2)
|
84
|
+
railties (~> 3.2)
|
85
|
+
rake (~> 10.0)
|
86
|
+
thor (0.19.1)
|
87
|
+
tilt (1.4.1)
|
88
|
+
tzinfo (0.3.42)
|
89
|
+
|
90
|
+
PLATFORMS
|
91
|
+
ruby
|
92
|
+
|
93
|
+
DEPENDENCIES
|
94
|
+
rbfam!
|
95
|
+
standalone_migrations (~> 2.1, >= 2.1.5)
|
data/Rakefile
ADDED
data/db/config.yml
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
class AddAlignmentTable < ActiveRecord::Migration
|
2
|
+
def up
|
3
|
+
create_table :alignments do |table|
|
4
|
+
table.timestamps
|
5
|
+
table.belongs_to :family
|
6
|
+
table.text :stockholm, limit: 4294967295
|
7
|
+
table.text :consensus_structure
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def down
|
12
|
+
drop_table :alignments
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
class AddSequenceTable < ActiveRecord::Migration
|
2
|
+
def up
|
3
|
+
create_table :sequences do |table|
|
4
|
+
table.timestamps
|
5
|
+
table.belongs_to :alignment
|
6
|
+
table.string :accession
|
7
|
+
table.text :stripped_sequence
|
8
|
+
table.text :alignment_sequence
|
9
|
+
table.integer :from
|
10
|
+
table.integer :to
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def down
|
15
|
+
drop_table :sequences
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
class AddFamilyTable < ActiveRecord::Migration
|
2
|
+
def up
|
3
|
+
create_table :families do |table|
|
4
|
+
table.timestamps
|
5
|
+
table.string :name
|
6
|
+
table.string :description
|
7
|
+
end
|
8
|
+
|
9
|
+
add_index :families, :name, unique: true
|
10
|
+
end
|
11
|
+
|
12
|
+
def down
|
13
|
+
drop_table :families
|
14
|
+
end
|
15
|
+
end
|
data/db/schema.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
# This file is auto-generated from the current state of the database. Instead
|
3
|
+
# of editing this file, please use the migrations feature of Active Record to
|
4
|
+
# incrementally modify your database, and then regenerate this schema definition.
|
5
|
+
#
|
6
|
+
# Note that this schema.rb definition is the authoritative source for your
|
7
|
+
# database schema. If you need to create the application database on another
|
8
|
+
# system, you should be using db:schema:load, not running all the migrations
|
9
|
+
# from scratch. The latter is a flawed and unsustainable approach (the more migrations
|
10
|
+
# you'll amass, the slower it'll run and the greater likelihood for issues).
|
11
|
+
#
|
12
|
+
# It's strongly recommended to check this file into your version control system.
|
13
|
+
|
14
|
+
ActiveRecord::Schema.define(:version => 20140114232810) do
|
15
|
+
|
16
|
+
create_table "alignments", :force => true do |t|
|
17
|
+
t.datetime "created_at", :null => false
|
18
|
+
t.datetime "updated_at", :null => false
|
19
|
+
t.integer "family_id"
|
20
|
+
t.text "stockholm", :limit => 2147483647
|
21
|
+
t.text "consensus_structure"
|
22
|
+
end
|
23
|
+
|
24
|
+
create_table "families", :force => true do |t|
|
25
|
+
t.datetime "created_at", :null => false
|
26
|
+
t.datetime "updated_at", :null => false
|
27
|
+
t.string "name"
|
28
|
+
t.string "description"
|
29
|
+
end
|
30
|
+
|
31
|
+
add_index "families", ["name"], :name => "index_families_on_name", :unique => true
|
32
|
+
|
33
|
+
create_table "sequences", :force => true do |t|
|
34
|
+
t.datetime "created_at", :null => false
|
35
|
+
t.datetime "updated_at", :null => false
|
36
|
+
t.integer "alignment_id"
|
37
|
+
t.string "accession"
|
38
|
+
t.text "stripped_sequence"
|
39
|
+
t.text "alignment_sequence"
|
40
|
+
t.integer "from"
|
41
|
+
t.integer "to"
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
data/lib/rbfam.rb
CHANGED
@@ -1,15 +1,22 @@
|
|
1
|
-
require "
|
1
|
+
require "active_record"
|
2
|
+
require "active_support/inflector"
|
2
3
|
require "bio"
|
3
|
-
require "
|
4
|
+
require "bio-stockholm"
|
4
5
|
require "entrez"
|
5
6
|
require "httparty"
|
6
|
-
require "
|
7
|
+
require "json"
|
8
|
+
require "mysql2"
|
9
|
+
require "parallel"
|
7
10
|
|
8
11
|
%W|helpers modules|.each do |folder|
|
9
12
|
Dir[File.join(File.dirname(__FILE__), "rbfam", folder, "*.rb")].each { |name| require "rbfam/#{folder}/#{File.basename(name, '.rb')}" }
|
10
13
|
end
|
11
14
|
|
12
15
|
module Rbfam
|
16
|
+
def self.connect(config: nil)
|
17
|
+
ActiveRecord::Base.establish_connection(config || YAML.load_file(File.join(File.dirname(__FILE__), "..", "db", "config.yml"))["development"])
|
18
|
+
end
|
19
|
+
|
13
20
|
def self.script(name)
|
14
21
|
require "rbfam/scripts/#{File.basename(name, '.rb')}"
|
15
22
|
end
|
@@ -1,46 +1,44 @@
|
|
1
1
|
module Rbfam
|
2
2
|
class Alignment
|
3
3
|
include Rbfam::CommonHelpers
|
4
|
-
|
4
|
+
|
5
5
|
LINE_REGEXP = /^([\w\.]+)\/(\d+)\-(\d+)\s+([AUGC\.]+)$/
|
6
|
-
|
6
|
+
|
7
7
|
attr_reader :family, :seed
|
8
|
-
|
8
|
+
|
9
9
|
def initialize(family)
|
10
10
|
@family = family
|
11
11
|
end
|
12
|
-
|
12
|
+
|
13
13
|
def entries(options = {})
|
14
14
|
options = { alignment: :seed, limit: false }.merge(options)
|
15
|
-
|
16
|
-
@parsed_entries ||= (pull_from_server(options[:alignment]) || "").split(/\n/).reject do |line|
|
15
|
+
|
16
|
+
@parsed_entries ||= (pull_from_server(options[:alignment]) || "").split(/\n/).reject do |line|
|
17
17
|
line =~ /^#/
|
18
|
-
end.select do |line|
|
18
|
+
end.select do |line|
|
19
19
|
line =~ LINE_REGEXP
|
20
20
|
end[options[:limit] ? 0...options[:limit] : 0..-1].map(&method(:parse_line)).tap do
|
21
21
|
@seed = options[:alignment] == :seed
|
22
22
|
end
|
23
23
|
end
|
24
|
-
|
24
|
+
|
25
25
|
def save_entries!
|
26
26
|
entries.each { |sequence| sequence.save!(seed: seed) }
|
27
27
|
end
|
28
|
-
|
28
|
+
|
29
29
|
def load_entries!(options = {})
|
30
|
-
options = { extended: false }.merge(options)
|
31
|
-
|
32
30
|
@parsed_entries = family.load_entries!(options)
|
33
31
|
end
|
34
|
-
|
32
|
+
|
35
33
|
private
|
36
|
-
|
34
|
+
|
37
35
|
def pull_from_server(alignment)
|
38
36
|
url = "http://rfam.sanger.ac.uk/family/alignment/download/format?acc=%s&alnType=%s&nseLabels=1&format=pfam&download=0" % [
|
39
37
|
family.family_name,
|
40
38
|
alignment
|
41
39
|
]
|
42
40
|
puts "GET: %s" % url unless @reponse
|
43
|
-
|
41
|
+
|
44
42
|
@response ||= if (party = HTTParty.get(url)).response.code == "200"
|
45
43
|
puts "RESPONSE: 200 OK"
|
46
44
|
party.parsed_response
|
@@ -52,11 +50,11 @@ module Rbfam
|
|
52
50
|
])
|
53
51
|
end
|
54
52
|
end
|
55
|
-
|
53
|
+
|
56
54
|
def parse_line(line)
|
57
55
|
line_match = line.match(LINE_REGEXP)
|
58
|
-
|
56
|
+
|
59
57
|
Rbfam::Sequence.new(family, line_match[1], line_match[2].to_i, line_match[3].to_i, autoload: true)
|
60
58
|
end
|
61
59
|
end
|
62
|
-
end
|
60
|
+
end
|
data/lib/rbfam/modules/family.rb
CHANGED
@@ -1,56 +1,54 @@
|
|
1
1
|
module Rbfam
|
2
2
|
class Family
|
3
3
|
include Rbfam::CommonHelpers
|
4
|
-
|
4
|
+
|
5
5
|
attr_reader :family_name, :description
|
6
|
-
|
6
|
+
|
7
7
|
class << self
|
8
8
|
def method_missing(name, *args, &block)
|
9
9
|
READABLE[name.to_s] ? new(READABLE[name.to_s]) : super
|
10
10
|
end
|
11
11
|
end
|
12
|
-
|
12
|
+
|
13
13
|
def initialize(family_name, description = "")
|
14
14
|
@family_name = family_name
|
15
15
|
@description = description
|
16
16
|
end
|
17
|
-
|
17
|
+
|
18
18
|
def id
|
19
19
|
("%s %s" % [family_name, description.downcase]).strip.gsub(/\W+/, "_")
|
20
20
|
end
|
21
|
-
|
21
|
+
|
22
22
|
def alignment
|
23
23
|
Rbfam::Alignment.new(self)
|
24
24
|
end
|
25
|
-
|
25
|
+
|
26
26
|
def entries(options = {})
|
27
27
|
options = { limit: false }.merge(options)
|
28
|
-
|
28
|
+
|
29
29
|
@parsed_entries ||= pull_from_server.split(/\n/).reject { |line| line =~ /^#/ }[options[:limit] ? 0...options[:limit] : 0..-1].map(&method(:parse_line))
|
30
30
|
end
|
31
|
-
|
31
|
+
|
32
32
|
def load_entries!(options = {})
|
33
|
-
options = { extended: false, seed: true }.merge(options)
|
34
|
-
|
35
33
|
Rbfam.script("sequences_in_mysql")
|
36
|
-
|
37
|
-
|
34
|
+
|
35
|
+
Rbfam::DB::Sequence.joins(alignment: :family).where(families: { name: family_name }.merge(options)).map do |entry|
|
38
36
|
entry.to_rbfam(self)
|
39
37
|
end
|
40
38
|
end
|
41
|
-
|
39
|
+
|
42
40
|
def save_entries!
|
43
41
|
entries.each(&:save!)
|
44
42
|
end
|
45
|
-
|
43
|
+
|
46
44
|
private
|
47
|
-
|
45
|
+
|
48
46
|
def pull_from_server
|
49
47
|
# It isn't the greatest design pattern to memoize a block where a branch has unmanaged exception raising, but for my uses that should never
|
50
48
|
# happen and needs to blow up hard if it does.
|
51
49
|
url = "http://rfam.sanger.ac.uk/family/regions?entry=%s" % family_name
|
52
50
|
puts "GET: %s" % url unless @reponse
|
53
|
-
|
51
|
+
|
54
52
|
@reponse ||= if (party = HTTParty.get(url)).response.code == "200"
|
55
53
|
puts "RESPONSE: 200 OK"
|
56
54
|
party.parsed_response
|
@@ -62,11 +60,11 @@ module Rbfam
|
|
62
60
|
])
|
63
61
|
end
|
64
62
|
end
|
65
|
-
|
63
|
+
|
66
64
|
def parse_line(line)
|
67
65
|
split_line = line.split(/\t/)
|
68
|
-
|
66
|
+
|
69
67
|
Rbfam::Sequence.new(self, split_line[0], split_line[2].to_i, split_line[3].to_i, autoload: true)
|
70
68
|
end
|
71
69
|
end
|
72
|
-
end
|
70
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Rbfam
|
2
|
-
class Sequence
|
2
|
+
class Sequence < ActiveRecord::Base
|
3
3
|
attr_reader :family, :accession, :from, :to, :coord_options
|
4
4
|
|
5
5
|
def initialize(family, accession, from, to, options = {})
|
@@ -66,25 +66,15 @@ module Rbfam
|
|
66
66
|
!plus_strand?
|
67
67
|
end
|
68
68
|
|
69
|
-
def sequence
|
69
|
+
alias_method :seq, def sequence
|
70
70
|
@raw_sequence ||= Rbfam::Utils.rna_sequence_from_entrez(accession, up_coord, coord_window)
|
71
71
|
@raw_sequence = minus_strand? ? @raw_sequence.complement : @raw_sequence
|
72
72
|
end
|
73
73
|
|
74
|
-
alias :seq :sequence
|
75
|
-
|
76
|
-
def mfe_structure
|
77
|
-
@mfe_structure ||= ViennaRna::Fold.run(seq).structure
|
78
|
-
end
|
79
|
-
|
80
74
|
def description
|
81
75
|
("%s %s %s" % [accession, from, to]).gsub(/\W+/, "_")
|
82
76
|
end
|
83
77
|
|
84
|
-
def fftbor
|
85
|
-
@fftbor ||= ViennaRna::Fftbor.run(seq: seq, str: mfe_structure)
|
86
|
-
end
|
87
|
-
|
88
78
|
def extend!(coord_options = {})
|
89
79
|
tap do
|
90
80
|
@extended = true
|
@@ -109,7 +99,7 @@ module Rbfam
|
|
109
99
|
case [coord_options[:direction], strand]
|
110
100
|
when [3, :plus], [5, :minus] then Range.new(range.min, range.max + coord_options[:length])
|
111
101
|
when [5, :plus], [3, :minus] then Range.new(range.min - coord_options[:length], range.max)
|
112
|
-
else puts "WARNING: value for :direction key in sequence
|
102
|
+
else puts "WARNING: value for :direction key in sequence retrieval needs to be one of 5, 3, :both - found (%s)" % coord_options[:direction].inspect
|
113
103
|
end
|
114
104
|
end
|
115
105
|
else
|
@@ -121,4 +111,4 @@ module Rbfam
|
|
121
111
|
"#<Rbfam::Sequence #{description} #{seq[0, 20] + ('...' if seq.length > 20)}>"
|
122
112
|
end
|
123
113
|
end
|
124
|
-
end
|
114
|
+
end
|