rbfam 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rbfam.rb CHANGED
@@ -4,7 +4,9 @@ require "entrez"
4
4
  require "httparty"
5
5
  require "active_support/inflector"
6
6
 
7
- Dir[File.join(File.dirname(__FILE__), "rbfam", "modules", "*.rb")].each { |name| require "rbfam/modules/#{File.basename(name, '.rb')}" }
7
+ %W|helpers modules|.each do |folder|
8
+ Dir[File.join(File.dirname(__FILE__), "rbfam", folder, "*.rb")].each { |name| require "rbfam/#{folder}/#{File.basename(name, '.rb')}" }
9
+ end
8
10
 
9
11
  module Rbfam
10
12
  def self.script(name)
@@ -0,0 +1,14 @@
1
+ module Rbfam
2
+ module CommonHelpers
3
+ def self.included(base)
4
+ base.send(:include, InstanceMethods)
5
+ end
6
+
7
+ module InstanceMethods
8
+ def entries!(options = {})
9
+ remove_instance_variable(:@parsed_entries)
10
+ entries(options)
11
+ end
12
+ end
13
+ end
14
+ end
@@ -1,5 +1,7 @@
1
1
  module Rbfam
2
2
  class Alignment
3
+ include Rbfam::CommonHelpers
4
+
3
5
  LINE_REGEXP = /^([\w\.]+)\/(\d+)\-(\d+)\s+([AUGC\.]+)$/
4
6
 
5
7
  attr_reader :family, :seed
@@ -8,13 +10,15 @@ module Rbfam
8
10
  @family = family
9
11
  end
10
12
 
11
- def entries(alignment = :seed)
12
- @parsed_entries ||= pull_from_server(alignment).split(/\n/).reject do |line|
13
+ def entries(options = {})
14
+ options = { alignment: :seed, limit: false }.merge(options)
15
+
16
+ @parsed_entries ||= pull_from_server(options[:alignment]).split(/\n/).reject do |line|
13
17
  line =~ /^#/
14
18
  end.select do |line|
15
19
  line =~ LINE_REGEXP
16
- end.map(&method(:parse_line)).tap do
17
- @seed = alignment == :seed
20
+ end[options[:limit] ? 0...options[:limit] : 0..-1].map(&method(:parse_line)).tap do
21
+ @seed = options[:alignment] == :seed
18
22
  end
19
23
  end
20
24
 
@@ -23,11 +27,9 @@ module Rbfam
23
27
  end
24
28
 
25
29
  def load_entries!(options = {})
26
- Rbfam.script("sequences_in_mysql")
30
+ options = { extended: false }.merge(options)
27
31
 
28
- @parsed_entries = SequenceTable.where({ family: family.family_name }.merge(options)).map do |entry|
29
- entry.to_rbfam_sequence(family)
30
- end
32
+ @parsed_entries = family.load_entries!(options)
31
33
  end
32
34
 
33
35
  private
@@ -43,7 +45,7 @@ module Rbfam
43
45
  puts "RESPONSE: 200 OK"
44
46
  party.parsed_response
45
47
  else
46
- raise RuntimeError.new("HTTParty raised the following error when retrieving family %s: %s %s" % [
48
+ raise RuntimeError.new("HTTParty raised the following error when retrieving alignment %s: %s %s" % [
47
49
  family_name,
48
50
  party.response.code,
49
51
  party.response.message
@@ -1,11 +1,17 @@
1
1
  module Rbfam
2
2
  class Family
3
+ include Rbfam::CommonHelpers
4
+
3
5
  attr_reader :family_name
4
6
 
5
7
  class << self
6
8
  def purine; new("RF00167"); end
7
9
  def tpp; new("RF00059"); end
8
10
  def secis_1; new("RF00031"); end
11
+ def trna; new("RF00005"); end
12
+ def let_7; new("RF00027"); end
13
+ def snora71; new("RF00056"); end
14
+ def u7; new("RF00066"); end
9
15
  end
10
16
 
11
17
  def initialize(family_name)
@@ -16,15 +22,19 @@ module Rbfam
16
22
  Rbfam::Alignment.new(self)
17
23
  end
18
24
 
19
- def entries
20
- @parsed_entries ||= pull_from_server.split(/\n/).reject { |line| line =~ /^#/ }.map(&method(:parse_line))
25
+ def entries(options = {})
26
+ options = { limit: false }.merge(options)
27
+
28
+ @parsed_entries ||= pull_from_server.split(/\n/).reject { |line| line =~ /^#/ }[options[:limit] ? 0...options[:limit] : 0..-1].map(&method(:parse_line))
21
29
  end
22
30
 
23
31
  def load_entries!(options = {})
32
+ options = { extended: false }.merge(options)
33
+
24
34
  Rbfam.script("sequences_in_mysql")
25
35
 
26
36
  @parsed_entries = SequenceTable.where({ family: family_name }.merge(options)).map do |entry|
27
- entry.to_rbfam_sequence(self)
37
+ entry.to_rbfam(self)
28
38
  end
29
39
  end
30
40
 
@@ -1,16 +1,20 @@
1
1
  module Rbfam
2
2
  class Sequence
3
- attr_reader :family, :accession, :from, :to
3
+ attr_reader :family, :accession, :from, :to, :coord_options
4
4
 
5
5
  def initialize(family, accession, from, to, options = {})
6
- @family, @accession, @from, @to = family, accession, from, to
6
+ @family = family
7
+ @accession = accession
8
+ @from = from
9
+ @to = to
10
+ @coord_options = options[:autoload].is_a?(Hash) ? options[:autoload] : {}
7
11
 
8
12
  if options[:sequence]
9
- @raw_sequence = (options[:sequence].is_a?(String) ? Bio::Sequence::NA.new(options[:sequence]) : options[:sequence])
13
+ @raw_sequence = (options[:sequence].is_a?(String) ? Bio::Sequence::NA.new(options[:sequence]) : options[:sequence]).upcase
10
14
  end
11
15
 
12
16
  if options[:autoload]
13
- sequence(options[:autoload].is_a?(Hash) ? options[:autoload] : {})
17
+ sequence
14
18
  end
15
19
  end
16
20
 
@@ -24,8 +28,8 @@ module Rbfam
24
28
  sequence_length: sequence.length,
25
29
  from: from,
26
30
  to: to,
27
- seq_from: up_coord + coord_window(options[:coord_window] || {}).min,
28
- seq_to: up_coord + coord_window(options[:coord_window] || {}).max,
31
+ seq_from: seq_from,
32
+ seq_to: seq_to,
29
33
  seed: options[:seed]
30
34
  })
31
35
  end
@@ -38,6 +42,14 @@ module Rbfam
38
42
  [from, to].max
39
43
  end
40
44
 
45
+ def seq_from
46
+ up_coord + coord_window.min
47
+ end
48
+
49
+ def seq_to
50
+ up_coord + coord_window.max
51
+ end
52
+
41
53
  def strand
42
54
  plus_strand? ? :plus : :minus
43
55
  end
@@ -50,8 +62,8 @@ module Rbfam
50
62
  !plus_strand?
51
63
  end
52
64
 
53
- def sequence(options = {})
54
- @raw_sequence ||= Rbfam::Utils.rna_sequence_from_entrez(accession, up_coord, coord_window(options))
65
+ def sequence
66
+ @raw_sequence ||= Rbfam::Utils.rna_sequence_from_entrez(accession, up_coord, coord_window)
55
67
  @raw_sequence = minus_strand? ? @raw_sequence.complement : @raw_sequence
56
68
  end
57
69
 
@@ -61,23 +73,27 @@ module Rbfam
61
73
  @mfe_structure ||= ViennaRna::Fold.run(seq).structure
62
74
  end
63
75
 
76
+ def description
77
+ ("%s %s %s" % [accession, from, to]).gsub(/\W+/, "_")
78
+ end
79
+
64
80
  def fftbor
65
81
  @fftbor ||= ViennaRna::Fftbor.run(seq: seq, str: mfe_structure)
66
82
  end
67
83
 
68
- def coord_window(options = {})
69
- # Ex: { length: 300, extend: 3 }
84
+ def coord_window
85
+ # Options from @coord_options ex: { length: 300, extend: 3 }
70
86
 
71
87
  range = 0..(down_coord - up_coord)
72
88
 
73
- if options[:length] && options[:extend]
74
- if range.count < options[:length]
75
- length_difference = options[:length] - range.count
89
+ if @coord_options[:length] && @coord_options[:extend]
90
+ if range.count < @coord_options[:length]
91
+ length_difference = @coord_options[:length] - range.count
76
92
 
77
- case [options[:extend], strand]
93
+ case [@coord_options[:extend], strand]
78
94
  when [3, :plus], [5, :minus] then Range.new(range.min, range.max + length_difference)
79
95
  when [5, :plus], [3, :minus] then Range.new(range.min - length_difference, range.max)
80
- else puts "WARNING: value for :extend key in sequence retreival needs to be one of 5, 3 - found (%s)" % options[:extend]
96
+ else puts "WARNING: value for :extend key in sequence retreival needs to be one of 5, 3 - found (%s)" % @coord_options[:extend]
81
97
  end
82
98
  else
83
99
  puts "WARNING: %s %d-%d (%s) is length %d, but only %d nt. have been requested. Providing the full sequence anyways." % [
@@ -86,12 +102,16 @@ module Rbfam
86
102
  to,
87
103
  strand,
88
104
  range.count,
89
- options[:length]
105
+ @coord_options[:length]
90
106
  ]
91
107
  end
92
108
  else
93
109
  range
94
110
  end
95
111
  end
112
+
113
+ def inspect
114
+ "#<Rbfam::Sequence #{description} #{seq[0, 20] + ('...' if seq.length > 20)}>"
115
+ end
96
116
  end
97
117
  end
@@ -1,3 +1,5 @@
1
+ # SequenceTable(id: integer, family: string, accession: string, sequence: text, sequence_length: integer, from: integer, to: integer, seq_from: integer, seq_to: integer, seed: boolean, created_at: datetime, updated_at: datetime, extended: boolean)
2
+
1
3
  require "mysql2"
2
4
  require "active_record"
3
5
 
@@ -6,7 +8,7 @@ class Object; def this; self; end; end
6
8
  class SequenceTable < ActiveRecord::Base
7
9
  self.table_name = "sequences"
8
10
 
9
- validates_uniqueness_of :accession, scope: [:sequence, :structure]
11
+ validates_uniqueness_of :accession, scope: [:sequence, :seq_from, :seq_to]
10
12
 
11
13
  def self.connect
12
14
  ActiveRecord::Base.establish_connection(config = { adapter: "mysql2", username: "root", reconnect: true })
@@ -20,34 +22,10 @@ class SequenceTable < ActiveRecord::Base
20
22
  inline_rails if defined?(inline_rails)
21
23
  end
22
24
 
23
- def clean_description
24
- ("%s %s %s" % [accession, seq_from, seq_to]).gsub(/[^A-Za-z0-9]/, "_")
25
- end
26
-
27
- def to_rbfam_sequence(family = nil)
25
+ def to_rbfam(family = nil)
26
+ # Should use a singleton pattern here to look up the family.
28
27
  Rbfam::Sequence.new(family || Rbfam::Family.new(family), accession, from, to, sequence: sequence)
29
28
  end
30
29
  end
31
30
 
32
- SequenceTable.connect
33
-
34
- class BuildSequence < ActiveRecord::Migration
35
- def self.up
36
- create_table :sequences do |table|
37
- table.string :family
38
- table.string :accession
39
- table.text :sequence
40
- table.integer :sequence_length
41
- table.integer :from
42
- table.integer :to
43
- table.integer :seq_from
44
- table.integer :seq_to
45
- table.boolean :seed, default: false
46
- table.timestamps
47
- end
48
- end
49
- end
50
-
51
- unless ActiveRecord::Base.connection.execute("show tables").map(&:this).flatten.include?("sequences")
52
- BuildSequence.up
53
- end
31
+ SequenceTable.connect
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbfam
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-16 00:00:00.000000000 Z
12
+ date: 2012-09-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bio
@@ -81,6 +81,7 @@ executables: []
81
81
  extensions: []
82
82
  extra_rdoc_files: []
83
83
  files:
84
+ - lib/rbfam/helpers/common.rb
84
85
  - lib/rbfam/modules/alignment.rb
85
86
  - lib/rbfam/modules/family.rb
86
87
  - lib/rbfam/modules/sequence.rb