rbfam 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +19 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +95 -0
- data/Rakefile +3 -0
- data/db/config.yml +5 -0
- data/db/migrate/20140114232757_add_alignment_table.rb +14 -0
- data/db/migrate/20140114232805_add_sequence_table.rb +17 -0
- data/db/migrate/20140114232810_add_family_table.rb +15 -0
- data/db/schema.rb +44 -0
- data/lib/rbfam.rb +10 -3
- data/lib/rbfam/modules/alignment.rb +15 -17
- data/lib/rbfam/modules/family.rb +17 -19
- data/lib/rbfam/modules/sequence.rb +4 -14
- data/lib/rbfam/scripts/sequences_in_mysql.rb +38 -21
- data/lib/rbfam/version.rb +3 -0
- data/load_rbfam_seed.rb +28 -0
- data/rbfam.gemspec +34 -0
- data/rfam_12.seed.utf8 +139884 -0
- metadata +169 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a266530dd38f3e5f9505a9f4ed0ce6ca4ccdea89
|
4
|
+
data.tar.gz: 2d0376d8ba04b9afe22a7d434f75d9e8bc17a4d2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e1534185f816b75dc489fd5b92bb37f5e133a1cf2c6f9c7dbb65fda8184bb8cd7d7ffb5b40e6ca10a608b75355a209246ad41843e9def666f8c9a07422d846a7
|
7
|
+
data.tar.gz: cbdf33c78a5a248a5eb59a02a39affaa6b3ed66fd51651a68e8ac0db18c2ed22157b891a4a2a5c16d37fff48a4c0a8d20a37d008b5e2da251c16639f914402af
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
rbfam (0.2.0)
|
5
|
+
activerecord
|
6
|
+
activesupport
|
7
|
+
bio (~> 1.4, >= 1.4.2)
|
8
|
+
bio-stockholm (~> 0.0, >= 0.0.1)
|
9
|
+
entrez (~> 0.5, >= 0.5.8.1)
|
10
|
+
httparty (~> 0.8, >= 0.8.3)
|
11
|
+
mysql2 (~> 0.3, >= 0.3.14)
|
12
|
+
nokogiri (~> 1.6, >= 1.6.1)
|
13
|
+
parallel (~> 1.3, >= 1.3.2)
|
14
|
+
|
15
|
+
GEM
|
16
|
+
remote: https://rubygems.org/
|
17
|
+
specs:
|
18
|
+
actionpack (3.2.21)
|
19
|
+
activemodel (= 3.2.21)
|
20
|
+
activesupport (= 3.2.21)
|
21
|
+
builder (~> 3.0.0)
|
22
|
+
erubis (~> 2.7.0)
|
23
|
+
journey (~> 1.0.4)
|
24
|
+
rack (~> 1.4.5)
|
25
|
+
rack-cache (~> 1.2)
|
26
|
+
rack-test (~> 0.6.1)
|
27
|
+
sprockets (~> 2.2.1)
|
28
|
+
activemodel (3.2.21)
|
29
|
+
activesupport (= 3.2.21)
|
30
|
+
builder (~> 3.0.0)
|
31
|
+
activerecord (3.2.21)
|
32
|
+
activemodel (= 3.2.21)
|
33
|
+
activesupport (= 3.2.21)
|
34
|
+
arel (~> 3.0.2)
|
35
|
+
tzinfo (~> 0.3.29)
|
36
|
+
activesupport (3.2.21)
|
37
|
+
i18n (~> 0.6, >= 0.6.4)
|
38
|
+
multi_json (~> 1.0)
|
39
|
+
arel (3.0.3)
|
40
|
+
bio (1.4.3.0001)
|
41
|
+
bio-stockholm (0.0.1)
|
42
|
+
builder (3.0.4)
|
43
|
+
entrez (0.5.8.1)
|
44
|
+
httparty
|
45
|
+
erubis (2.7.0)
|
46
|
+
hike (1.2.3)
|
47
|
+
httparty (0.13.3)
|
48
|
+
json (~> 1.8)
|
49
|
+
multi_xml (>= 0.5.2)
|
50
|
+
i18n (0.6.11)
|
51
|
+
journey (1.0.4)
|
52
|
+
json (1.8.1)
|
53
|
+
mini_portile (0.6.1)
|
54
|
+
multi_json (1.10.1)
|
55
|
+
multi_xml (0.5.5)
|
56
|
+
mysql2 (0.3.17)
|
57
|
+
nokogiri (1.6.5)
|
58
|
+
mini_portile (~> 0.6.0)
|
59
|
+
parallel (1.3.3)
|
60
|
+
rack (1.4.5)
|
61
|
+
rack-cache (1.2)
|
62
|
+
rack (>= 0.4)
|
63
|
+
rack-ssl (1.3.4)
|
64
|
+
rack
|
65
|
+
rack-test (0.6.2)
|
66
|
+
rack (>= 1.0)
|
67
|
+
railties (3.2.21)
|
68
|
+
actionpack (= 3.2.21)
|
69
|
+
activesupport (= 3.2.21)
|
70
|
+
rack-ssl (~> 1.3.2)
|
71
|
+
rake (>= 0.8.7)
|
72
|
+
rdoc (~> 3.4)
|
73
|
+
thor (>= 0.14.6, < 2.0)
|
74
|
+
rake (10.4.2)
|
75
|
+
rdoc (3.12.2)
|
76
|
+
json (~> 1.4)
|
77
|
+
sprockets (2.2.3)
|
78
|
+
hike (~> 1.2)
|
79
|
+
multi_json (~> 1.0)
|
80
|
+
rack (~> 1.0)
|
81
|
+
tilt (~> 1.1, != 1.3.0)
|
82
|
+
standalone_migrations (2.1.5)
|
83
|
+
activerecord (~> 3.2)
|
84
|
+
railties (~> 3.2)
|
85
|
+
rake (~> 10.0)
|
86
|
+
thor (0.19.1)
|
87
|
+
tilt (1.4.1)
|
88
|
+
tzinfo (0.3.42)
|
89
|
+
|
90
|
+
PLATFORMS
|
91
|
+
ruby
|
92
|
+
|
93
|
+
DEPENDENCIES
|
94
|
+
rbfam!
|
95
|
+
standalone_migrations (~> 2.1, >= 2.1.5)
|
data/Rakefile
ADDED
data/db/config.yml
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
class AddAlignmentTable < ActiveRecord::Migration
|
2
|
+
def up
|
3
|
+
create_table :alignments do |table|
|
4
|
+
table.timestamps
|
5
|
+
table.belongs_to :family
|
6
|
+
table.text :stockholm, limit: 4294967295
|
7
|
+
table.text :consensus_structure
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def down
|
12
|
+
drop_table :alignments
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
class AddSequenceTable < ActiveRecord::Migration
|
2
|
+
def up
|
3
|
+
create_table :sequences do |table|
|
4
|
+
table.timestamps
|
5
|
+
table.belongs_to :alignment
|
6
|
+
table.string :accession
|
7
|
+
table.text :stripped_sequence
|
8
|
+
table.text :alignment_sequence
|
9
|
+
table.integer :from
|
10
|
+
table.integer :to
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def down
|
15
|
+
drop_table :sequences
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
class AddFamilyTable < ActiveRecord::Migration
|
2
|
+
def up
|
3
|
+
create_table :families do |table|
|
4
|
+
table.timestamps
|
5
|
+
table.string :name
|
6
|
+
table.string :description
|
7
|
+
end
|
8
|
+
|
9
|
+
add_index :families, :name, unique: true
|
10
|
+
end
|
11
|
+
|
12
|
+
def down
|
13
|
+
drop_table :families
|
14
|
+
end
|
15
|
+
end
|
data/db/schema.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
# This file is auto-generated from the current state of the database. Instead
|
3
|
+
# of editing this file, please use the migrations feature of Active Record to
|
4
|
+
# incrementally modify your database, and then regenerate this schema definition.
|
5
|
+
#
|
6
|
+
# Note that this schema.rb definition is the authoritative source for your
|
7
|
+
# database schema. If you need to create the application database on another
|
8
|
+
# system, you should be using db:schema:load, not running all the migrations
|
9
|
+
# from scratch. The latter is a flawed and unsustainable approach (the more migrations
|
10
|
+
# you'll amass, the slower it'll run and the greater likelihood for issues).
|
11
|
+
#
|
12
|
+
# It's strongly recommended to check this file into your version control system.
|
13
|
+
|
14
|
+
ActiveRecord::Schema.define(:version => 20140114232810) do
|
15
|
+
|
16
|
+
create_table "alignments", :force => true do |t|
|
17
|
+
t.datetime "created_at", :null => false
|
18
|
+
t.datetime "updated_at", :null => false
|
19
|
+
t.integer "family_id"
|
20
|
+
t.text "stockholm", :limit => 2147483647
|
21
|
+
t.text "consensus_structure"
|
22
|
+
end
|
23
|
+
|
24
|
+
create_table "families", :force => true do |t|
|
25
|
+
t.datetime "created_at", :null => false
|
26
|
+
t.datetime "updated_at", :null => false
|
27
|
+
t.string "name"
|
28
|
+
t.string "description"
|
29
|
+
end
|
30
|
+
|
31
|
+
add_index "families", ["name"], :name => "index_families_on_name", :unique => true
|
32
|
+
|
33
|
+
create_table "sequences", :force => true do |t|
|
34
|
+
t.datetime "created_at", :null => false
|
35
|
+
t.datetime "updated_at", :null => false
|
36
|
+
t.integer "alignment_id"
|
37
|
+
t.string "accession"
|
38
|
+
t.text "stripped_sequence"
|
39
|
+
t.text "alignment_sequence"
|
40
|
+
t.integer "from"
|
41
|
+
t.integer "to"
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
data/lib/rbfam.rb
CHANGED
@@ -1,15 +1,22 @@
|
|
1
|
-
require "
|
1
|
+
require "active_record"
|
2
|
+
require "active_support/inflector"
|
2
3
|
require "bio"
|
3
|
-
require "
|
4
|
+
require "bio-stockholm"
|
4
5
|
require "entrez"
|
5
6
|
require "httparty"
|
6
|
-
require "
|
7
|
+
require "json"
|
8
|
+
require "mysql2"
|
9
|
+
require "parallel"
|
7
10
|
|
8
11
|
%W|helpers modules|.each do |folder|
|
9
12
|
Dir[File.join(File.dirname(__FILE__), "rbfam", folder, "*.rb")].each { |name| require "rbfam/#{folder}/#{File.basename(name, '.rb')}" }
|
10
13
|
end
|
11
14
|
|
12
15
|
module Rbfam
|
16
|
+
def self.connect(config: nil)
|
17
|
+
ActiveRecord::Base.establish_connection(config || YAML.load_file(File.join(File.dirname(__FILE__), "..", "db", "config.yml"))["development"])
|
18
|
+
end
|
19
|
+
|
13
20
|
def self.script(name)
|
14
21
|
require "rbfam/scripts/#{File.basename(name, '.rb')}"
|
15
22
|
end
|
@@ -1,46 +1,44 @@
|
|
1
1
|
module Rbfam
|
2
2
|
class Alignment
|
3
3
|
include Rbfam::CommonHelpers
|
4
|
-
|
4
|
+
|
5
5
|
LINE_REGEXP = /^([\w\.]+)\/(\d+)\-(\d+)\s+([AUGC\.]+)$/
|
6
|
-
|
6
|
+
|
7
7
|
attr_reader :family, :seed
|
8
|
-
|
8
|
+
|
9
9
|
def initialize(family)
|
10
10
|
@family = family
|
11
11
|
end
|
12
|
-
|
12
|
+
|
13
13
|
def entries(options = {})
|
14
14
|
options = { alignment: :seed, limit: false }.merge(options)
|
15
|
-
|
16
|
-
@parsed_entries ||= (pull_from_server(options[:alignment]) || "").split(/\n/).reject do |line|
|
15
|
+
|
16
|
+
@parsed_entries ||= (pull_from_server(options[:alignment]) || "").split(/\n/).reject do |line|
|
17
17
|
line =~ /^#/
|
18
|
-
end.select do |line|
|
18
|
+
end.select do |line|
|
19
19
|
line =~ LINE_REGEXP
|
20
20
|
end[options[:limit] ? 0...options[:limit] : 0..-1].map(&method(:parse_line)).tap do
|
21
21
|
@seed = options[:alignment] == :seed
|
22
22
|
end
|
23
23
|
end
|
24
|
-
|
24
|
+
|
25
25
|
def save_entries!
|
26
26
|
entries.each { |sequence| sequence.save!(seed: seed) }
|
27
27
|
end
|
28
|
-
|
28
|
+
|
29
29
|
def load_entries!(options = {})
|
30
|
-
options = { extended: false }.merge(options)
|
31
|
-
|
32
30
|
@parsed_entries = family.load_entries!(options)
|
33
31
|
end
|
34
|
-
|
32
|
+
|
35
33
|
private
|
36
|
-
|
34
|
+
|
37
35
|
def pull_from_server(alignment)
|
38
36
|
url = "http://rfam.sanger.ac.uk/family/alignment/download/format?acc=%s&alnType=%s&nseLabels=1&format=pfam&download=0" % [
|
39
37
|
family.family_name,
|
40
38
|
alignment
|
41
39
|
]
|
42
40
|
puts "GET: %s" % url unless @reponse
|
43
|
-
|
41
|
+
|
44
42
|
@response ||= if (party = HTTParty.get(url)).response.code == "200"
|
45
43
|
puts "RESPONSE: 200 OK"
|
46
44
|
party.parsed_response
|
@@ -52,11 +50,11 @@ module Rbfam
|
|
52
50
|
])
|
53
51
|
end
|
54
52
|
end
|
55
|
-
|
53
|
+
|
56
54
|
def parse_line(line)
|
57
55
|
line_match = line.match(LINE_REGEXP)
|
58
|
-
|
56
|
+
|
59
57
|
Rbfam::Sequence.new(family, line_match[1], line_match[2].to_i, line_match[3].to_i, autoload: true)
|
60
58
|
end
|
61
59
|
end
|
62
|
-
end
|
60
|
+
end
|
data/lib/rbfam/modules/family.rb
CHANGED
@@ -1,56 +1,54 @@
|
|
1
1
|
module Rbfam
|
2
2
|
class Family
|
3
3
|
include Rbfam::CommonHelpers
|
4
|
-
|
4
|
+
|
5
5
|
attr_reader :family_name, :description
|
6
|
-
|
6
|
+
|
7
7
|
class << self
|
8
8
|
def method_missing(name, *args, &block)
|
9
9
|
READABLE[name.to_s] ? new(READABLE[name.to_s]) : super
|
10
10
|
end
|
11
11
|
end
|
12
|
-
|
12
|
+
|
13
13
|
def initialize(family_name, description = "")
|
14
14
|
@family_name = family_name
|
15
15
|
@description = description
|
16
16
|
end
|
17
|
-
|
17
|
+
|
18
18
|
def id
|
19
19
|
("%s %s" % [family_name, description.downcase]).strip.gsub(/\W+/, "_")
|
20
20
|
end
|
21
|
-
|
21
|
+
|
22
22
|
def alignment
|
23
23
|
Rbfam::Alignment.new(self)
|
24
24
|
end
|
25
|
-
|
25
|
+
|
26
26
|
def entries(options = {})
|
27
27
|
options = { limit: false }.merge(options)
|
28
|
-
|
28
|
+
|
29
29
|
@parsed_entries ||= pull_from_server.split(/\n/).reject { |line| line =~ /^#/ }[options[:limit] ? 0...options[:limit] : 0..-1].map(&method(:parse_line))
|
30
30
|
end
|
31
|
-
|
31
|
+
|
32
32
|
def load_entries!(options = {})
|
33
|
-
options = { extended: false, seed: true }.merge(options)
|
34
|
-
|
35
33
|
Rbfam.script("sequences_in_mysql")
|
36
|
-
|
37
|
-
|
34
|
+
|
35
|
+
Rbfam::DB::Sequence.joins(alignment: :family).where(families: { name: family_name }.merge(options)).map do |entry|
|
38
36
|
entry.to_rbfam(self)
|
39
37
|
end
|
40
38
|
end
|
41
|
-
|
39
|
+
|
42
40
|
def save_entries!
|
43
41
|
entries.each(&:save!)
|
44
42
|
end
|
45
|
-
|
43
|
+
|
46
44
|
private
|
47
|
-
|
45
|
+
|
48
46
|
def pull_from_server
|
49
47
|
# It isn't the greatest design pattern to memoize a block where a branch has unmanaged exception raising, but for my uses that should never
|
50
48
|
# happen and needs to blow up hard if it does.
|
51
49
|
url = "http://rfam.sanger.ac.uk/family/regions?entry=%s" % family_name
|
52
50
|
puts "GET: %s" % url unless @reponse
|
53
|
-
|
51
|
+
|
54
52
|
@reponse ||= if (party = HTTParty.get(url)).response.code == "200"
|
55
53
|
puts "RESPONSE: 200 OK"
|
56
54
|
party.parsed_response
|
@@ -62,11 +60,11 @@ module Rbfam
|
|
62
60
|
])
|
63
61
|
end
|
64
62
|
end
|
65
|
-
|
63
|
+
|
66
64
|
def parse_line(line)
|
67
65
|
split_line = line.split(/\t/)
|
68
|
-
|
66
|
+
|
69
67
|
Rbfam::Sequence.new(self, split_line[0], split_line[2].to_i, split_line[3].to_i, autoload: true)
|
70
68
|
end
|
71
69
|
end
|
72
|
-
end
|
70
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Rbfam
|
2
|
-
class Sequence
|
2
|
+
class Sequence < ActiveRecord::Base
|
3
3
|
attr_reader :family, :accession, :from, :to, :coord_options
|
4
4
|
|
5
5
|
def initialize(family, accession, from, to, options = {})
|
@@ -66,25 +66,15 @@ module Rbfam
|
|
66
66
|
!plus_strand?
|
67
67
|
end
|
68
68
|
|
69
|
-
def sequence
|
69
|
+
alias_method :seq, def sequence
|
70
70
|
@raw_sequence ||= Rbfam::Utils.rna_sequence_from_entrez(accession, up_coord, coord_window)
|
71
71
|
@raw_sequence = minus_strand? ? @raw_sequence.complement : @raw_sequence
|
72
72
|
end
|
73
73
|
|
74
|
-
alias :seq :sequence
|
75
|
-
|
76
|
-
def mfe_structure
|
77
|
-
@mfe_structure ||= ViennaRna::Fold.run(seq).structure
|
78
|
-
end
|
79
|
-
|
80
74
|
def description
|
81
75
|
("%s %s %s" % [accession, from, to]).gsub(/\W+/, "_")
|
82
76
|
end
|
83
77
|
|
84
|
-
def fftbor
|
85
|
-
@fftbor ||= ViennaRna::Fftbor.run(seq: seq, str: mfe_structure)
|
86
|
-
end
|
87
|
-
|
88
78
|
def extend!(coord_options = {})
|
89
79
|
tap do
|
90
80
|
@extended = true
|
@@ -109,7 +99,7 @@ module Rbfam
|
|
109
99
|
case [coord_options[:direction], strand]
|
110
100
|
when [3, :plus], [5, :minus] then Range.new(range.min, range.max + coord_options[:length])
|
111
101
|
when [5, :plus], [3, :minus] then Range.new(range.min - coord_options[:length], range.max)
|
112
|
-
else puts "WARNING: value for :direction key in sequence
|
102
|
+
else puts "WARNING: value for :direction key in sequence retrieval needs to be one of 5, 3, :both - found (%s)" % coord_options[:direction].inspect
|
113
103
|
end
|
114
104
|
end
|
115
105
|
else
|
@@ -121,4 +111,4 @@ module Rbfam
|
|
121
111
|
"#<Rbfam::Sequence #{description} #{seq[0, 20] + ('...' if seq.length > 20)}>"
|
122
112
|
end
|
123
113
|
end
|
124
|
-
end
|
114
|
+
end
|