bio 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. data/COPYING +56 -0
  2. data/COPYING.ja +51 -0
  3. data/ChangeLog +540 -0
  4. data/GPL +340 -0
  5. data/LEGAL +141 -0
  6. data/LGPL +504 -0
  7. data/README.rdoc +4 -2
  8. data/Rakefile +2 -2
  9. data/bioruby.gemspec +17 -29
  10. data/doc/Tutorial.rd +118 -90
  11. data/doc/Tutorial.rd.html +124 -87
  12. data/lib/bio/appl/blast.rb +2 -2
  13. data/lib/bio/appl/blast/format0.rb +1 -1
  14. data/lib/bio/appl/fasta.rb +5 -12
  15. data/lib/bio/appl/fasta/format10.rb +96 -6
  16. data/lib/bio/appl/gcg/msf.rb +11 -14
  17. data/lib/bio/appl/pts1.rb +0 -4
  18. data/lib/bio/appl/sim4/report.rb +50 -17
  19. data/lib/bio/db/biosql/biosql_to_biosequence.rb +10 -0
  20. data/lib/bio/db/biosql/sequence.rb +234 -298
  21. data/lib/bio/db/embl/embl.rb +0 -3
  22. data/lib/bio/db/genbank/common.rb +3 -1
  23. data/lib/bio/io/biosql/ar-biosql.rb +257 -0
  24. data/lib/bio/io/biosql/biosql.rb +39 -0
  25. data/lib/bio/io/biosql/config/database.yml +5 -4
  26. data/lib/bio/io/ncbirest.rb +12 -5
  27. data/lib/bio/io/pubmed.rb +5 -1
  28. data/lib/bio/io/sql.rb +43 -150
  29. data/lib/bio/sequence/compat.rb +5 -1
  30. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +6 -4
  31. data/lib/bio/version.rb +1 -1
  32. data/test/data/gcg/pileup-aa.msf +67 -0
  33. data/test/data/sim4/complement-A4.sim4 +43 -0
  34. data/test/data/sim4/simple-A4.sim4 +25 -0
  35. data/test/data/sim4/simple2-A4.sim4 +25 -0
  36. data/test/functional/bio/io/test_pubmed.rb +129 -0
  37. data/test/unit/bio/appl/bl2seq/test_report.rb +5 -5
  38. data/test/unit/bio/appl/gcg/test_msf.rb +154 -0
  39. data/test/unit/bio/appl/hmmer/test_report.rb +2 -2
  40. data/test/unit/bio/appl/sim4/test_report.rb +869 -0
  41. data/test/unit/bio/appl/test_blast.rb +1 -1
  42. data/test/unit/bio/db/biosql/tc_biosql.rb +110 -0
  43. data/test/unit/bio/db/biosql/ts_suite_biosql.rb +8 -0
  44. data/test/unit/bio/test_feature.rb +18 -17
  45. data/test/unit/bio/test_reference.rb +18 -18
  46. data/test/unit/bio/test_sequence.rb +1 -1
  47. metadata +18 -30
  48. data/lib/bio/io/biosql/biodatabase.rb +0 -64
  49. data/lib/bio/io/biosql/bioentry.rb +0 -29
  50. data/lib/bio/io/biosql/bioentry_dbxref.rb +0 -11
  51. data/lib/bio/io/biosql/bioentry_path.rb +0 -12
  52. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +0 -10
  53. data/lib/bio/io/biosql/bioentry_reference.rb +0 -10
  54. data/lib/bio/io/biosql/bioentry_relationship.rb +0 -10
  55. data/lib/bio/io/biosql/biosequence.rb +0 -11
  56. data/lib/bio/io/biosql/comment.rb +0 -7
  57. data/lib/bio/io/biosql/dbxref.rb +0 -13
  58. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +0 -12
  59. data/lib/bio/io/biosql/location.rb +0 -32
  60. data/lib/bio/io/biosql/location_qualifier_value.rb +0 -11
  61. data/lib/bio/io/biosql/ontology.rb +0 -10
  62. data/lib/bio/io/biosql/reference.rb +0 -9
  63. data/lib/bio/io/biosql/seqfeature.rb +0 -32
  64. data/lib/bio/io/biosql/seqfeature_dbxref.rb +0 -11
  65. data/lib/bio/io/biosql/seqfeature_path.rb +0 -11
  66. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +0 -20
  67. data/lib/bio/io/biosql/seqfeature_relationship.rb +0 -11
  68. data/lib/bio/io/biosql/taxon.rb +0 -12
  69. data/lib/bio/io/biosql/taxon_name.rb +0 -9
  70. data/lib/bio/io/biosql/term.rb +0 -27
  71. data/lib/bio/io/biosql/term_dbxref.rb +0 -11
  72. data/lib/bio/io/biosql/term_path.rb +0 -12
  73. data/lib/bio/io/biosql/term_relationship.rb +0 -13
  74. data/lib/bio/io/biosql/term_relationship_term.rb +0 -11
  75. data/lib/bio/io/biosql/term_synonym.rb +0 -10
@@ -448,9 +448,6 @@ class EMBL < EMBLDB
448
448
  # taxonomy classfication
449
449
  alias classification oc
450
450
 
451
- # features
452
- alias features ft
453
-
454
451
 
455
452
  # converts the entry to Bio::Sequence object
456
453
  # ---
@@ -158,7 +158,9 @@ module Common
158
158
  authors = authors.flatten.map { |a| a.sub(/,/, ', ') }
159
159
  hash['authors'] = authors
160
160
  when /TITLE/
161
- hash['title'] = truncate(tag_cut(field)) + '.'
161
+ hash['title'] = truncate(tag_cut(field))
162
+ # CHECK Actually GenBank is not demanding for dot at the end of TITLE
163
+ #+ '.'
162
164
  when /JOURNAL/
163
165
  journal = truncate(tag_cut(field))
164
166
  if journal =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
@@ -0,0 +1,257 @@
1
+ module Bio
2
+ class SQL
3
+ class Biodatabase < DummyBase
4
+ has_many :bioentries, :class_name =>"Bioentry", :foreign_key => "biodatabase_id"
5
+ validates_uniqueness_of :name
6
+ end
7
+ class BioentryDbxref < DummyBase
8
+ #delete set_sequence_name nil
9
+ set_primary_key nil #bioentry_id,dbxref_id
10
+ belongs_to :bioentry, :class_name => "Bioentry"
11
+ belongs_to :dbxref, :class_name => "Dbxref"
12
+ end
13
+
14
+ class BioentryPath < DummyBase
15
+ set_primary_key nil
16
+ #delete set_sequence_name nil
17
+ belongs_to :term, :class_name => "Term"
18
+ #da sistemare per poter procedere.
19
+ belongs_to :object_bioentry, :class_name=>"Bioentry"
20
+ belongs_to :subject_bioentry, :class_name=>"Bioentry"
21
+ end #BioentryPath
22
+
23
+ class BioentryQualifierValue < DummyBase
24
+ #NOTE: added rank to primary_keys, now it's finished.
25
+ set_primary_keys :bioentry_id, :term_id, :rank
26
+ belongs_to :bioentry, :class_name => "Bioentry"
27
+ belongs_to :term, :class_name => "Term"
28
+ end #BioentryQualifierValue
29
+
30
+ class Bioentry < DummyBase
31
+ belongs_to :biodatabase, :class_name => "Biodatabase"
32
+ belongs_to :taxon, :class_name => "Taxon"
33
+ has_one :biosequence
34
+ #, :class_name => "Biosequence", :foreign_key => "bioentry_id"
35
+ has_many :comments, :class_name =>"Comment", :order =>'rank'
36
+ has_many :seqfeatures, :class_name => "Seqfeature", :order=>'rank'
37
+ has_many :bioentry_references, :class_name=>"BioentryReference" #, :foreign_key => "bioentry_id"
38
+ has_many :bioentry_dbxrefs, :class_name => "BioentryDbxref"
39
+ has_many :object_bioentry_relationships, :class_name=>"BioentryRelationship", :foreign_key=>"object_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
40
+ has_many :subject_bioentry_relationships, :class_name=>"BioentryRelationship", :foreign_key=>"subject_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
41
+ has_many :object_bioentry_paths, :class_name=>"BioentryPath", :foreign_key=>"object_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
42
+ has_many :subject_bioentry_paths, :class_name=>"BioentryPath", :foreign_key=>"subject_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
43
+
44
+ has_many :cdsfeatures, :class_name=>"Seqfeature", :foreign_key =>"bioentry_id", :conditions=>["term.name='CDS'"], :include=>"type_term"
45
+ has_many :references, :through=>:bioentry_references, :class_name => "Reference"
46
+ has_many :terms, :through=>:bioentry_qualifier_values, :class_name => "Term"
47
+ #NOTE: added order_by for multiple hit and manage ranks correctly
48
+ has_many :bioentry_qualifier_values, :order=>"bioentry_id,term_id,rank", :class_name => "BioentryQualifierValue"
49
+
50
+ #per la creazione richiesti:
51
+ #name, accession, version
52
+ # validates_uniqueness_of :accession, :scope=>[:biodatabase_id]
53
+ # validates_uniqueness_of :name, :scope=>[:biodatabase_id]
54
+ # validates_uniqueness_of :identifier, :scope=>[:biodatabase_id]
55
+
56
+ end
57
+ class BioentryReference < DummyBase
58
+ set_primary_keys :bioentry_id, :reference_id, :rank
59
+ belongs_to :bioentry, :class_name => "Bioentry"
60
+ belongs_to :reference , :class_name => "Reference"
61
+ end
62
+ class BioentryRelationship < DummyBase
63
+ #delete set_primary_key "bioentry_relationship_id"
64
+ set_sequence_name "bieontry_relationship_pk_seq"
65
+ belongs_to :object_bioentry, :class_name => "Bioentry"
66
+ belongs_to :subject_bioentry, :class_name => "Bioentry"
67
+ belongs_to :term
68
+ end
69
+ class Biosequence < DummyBase
70
+ set_primary_keys :bioentry_id, :version
71
+ #delete set_sequence_name "biosequence_pk_seq"
72
+ belongs_to :bioentry, :foreign_key=>"bioentry_id"
73
+ #has_one :bioentry
74
+ #, :class_name => "Bioentry"
75
+ end
76
+ class Comment < DummyBase
77
+ belongs_to :bioentry, :class_name => "Bioentry"
78
+ end
79
+ class DbxrefQualifierValue < DummyBase
80
+ #think to use composite primary key
81
+ set_primary_key nil #dbxref_id, term_id, rank
82
+ #delete set_sequence_name nil
83
+ belongs_to :dbxref, :class_name => "Dbxref"
84
+ belongs_to :term, :class_name => "Term"
85
+ end
86
+ class Dbxref < DummyBase
87
+ #set_sequence_name "dbxref_pk_seq"
88
+ has_many :dbxref_qualifier_values, :class_name => "DbxrefQualifierValue"
89
+ has_many :locations, :class_name => "Location"
90
+ has_many :references, :class_name=>"Reference"
91
+ has_many :term_dbxrefs, :class_name => "TermDbxref"
92
+ has_many :bioentry_dbxrefs, :class_name => "BioentryDbxref"
93
+ #TODO: check is with bioentry there is an has_and_belongs_to_many relationship has specified in schema overview.
94
+ end
95
+ class LocationQualifierValue < DummyBase
96
+ set_primary_key nil #location_id, term_id
97
+ #delete set_sequence_name nil
98
+ belongs_to :location, :class_name => "Location"
99
+ belongs_to :term, :class_name => "Term"
100
+ end
101
+ class Location < DummyBase
102
+ #set_sequence_name "location_pk_seq"
103
+ belongs_to :seqfeature, :class_name => "Seqfeature"
104
+ belongs_to :dbxref, :class_name => "Dbxref"
105
+ belongs_to :term, :class_name => "Term"
106
+ has_many :location_qualifier_values, :class_name => "LocationQualifierValue"
107
+
108
+ def to_s
109
+ if strand==-1
110
+ str="complement("+start_pos.to_s+".."+end_pos.to_s+")"
111
+ else
112
+ str=start_pos.to_s+".."+end_pos.to_s
113
+ end
114
+ return str
115
+ end
116
+
117
+ def sequence
118
+ seq=""
119
+ unless self.seqfeature.bioentry.biosequence.seq.nil?
120
+ seq=Bio::Sequence::NA.new(self.seqfeature.bioentry.biosequence.seq[start_pos-1..end_pos-1])
121
+ seq.reverse_complement! if strand==-1
122
+ end
123
+ return seq
124
+ end
125
+
126
+
127
+
128
+ end
129
+ class Ontology < DummyBase
130
+ has_many :terms, :class_name => "Term"
131
+ has_many :term_paths, :class_name => "TermPath"
132
+ has_many :term_relationships, :class_name => "TermRelationship"
133
+ end
134
+ class Reference < DummyBase
135
+ belongs_to :dbxref, :class_name => "Dbxref"
136
+ has_many :bioentry_references, :class_name=>"BioentryReference"
137
+ has_many :bioentries, :through=>:bioentry_references
138
+ end
139
+ class SeqfeatureDbxref < DummyBase
140
+ set_primary_keys :seqfeature_id, :dbxref_id
141
+ #delete set_sequence_name nil
142
+ belongs_to :seqfeature, :class_name => "Seqfeature", :foreign_key => "seqfeature_id"
143
+ belongs_to :dbxref, :class_name => "Dbxref", :foreign_key => "dbxref_id"
144
+ end
145
+ class SeqfeaturePath < DummyBase
146
+ set_primary_keys :object_seqfeature_id, :subject_seqfeature_id, :term_id
147
+ set_sequence_name nil
148
+ belongs_to :object_seqfeature, :class_name => "Seqfeature", :foreign_key => "object_seqfeature_id"
149
+ belongs_to :subject_seqfeature, :class_name => "Seqfeature", :foreign_key => "subject_seqfeature_id"
150
+ belongs_to :term, :class_name => "Term"
151
+ end
152
+ class SeqfeatureQualifierValue < DummyBase
153
+ set_primary_keys :seqfeature_id, :term_id, :rank
154
+ set_sequence_name nil
155
+ belongs_to :seqfeature
156
+ belongs_to :term, :class_name => "Term"
157
+ end
158
+ class Seqfeature <DummyBase
159
+ set_sequence_name "seqfeature_pk_seq"
160
+ belongs_to :bioentry
161
+ #, :class_name => "Bioentry"
162
+ belongs_to :type_term, :class_name => "Term", :foreign_key => "type_term_id"
163
+ belongs_to :source_term, :class_name => "Term", :foreign_key =>"source_term_id"
164
+ has_many :seqfeature_dbxrefs, :class_name => "SeqfeatureDbxref", :foreign_key => "seqfeature_id"
165
+ has_many :seqfeature_qualifier_values, :order=>'rank', :foreign_key => "seqfeature_id"
166
+ #, :class_name => "SeqfeatureQualifierValue"
167
+ has_many :locations, :class_name => "Location", :order=>'rank'
168
+ has_many :object_seqfeature_paths, :class_name => "SeqfeaturePath", :foreign_key => "object_seqfeature_id"
169
+ has_many :subject_seqfeature_paths, :class_name => "SeqfeaturePath", :foreign_key => "subject_seqfeature_id"
170
+ has_many :object_seqfeature_relationships, :class_name => "SeqfeatureRelationship", :foreign_key => "object_seqfeature_id"
171
+ has_many :subject_seqfeature_relationships, :class_name => "SeqfeatureRelationship", :foreign_key => "subject_seqfeature_id"
172
+
173
+ #get the subsequence described by the locations objects
174
+ def sequence
175
+ return self.locations.inject(Bio::Sequence::NA.new("")){|seq, location| seq<<location.sequence}
176
+ end
177
+
178
+ #translate the subsequences represented by the feature and its locations
179
+ #not considering the qualifiers
180
+ #Return a Bio::Sequence::AA object
181
+ def translate(*args)
182
+ self.sequence.translate(*args)
183
+ end
184
+ end
185
+ class SeqfeatureRelationship <DummyBase
186
+ set_sequence_name "seqfeatue_relationship_pk_seq"
187
+ belongs_to :term, :class_name => "Term"
188
+ belongs_to :object_seqfeature, :class_name => "Seqfeature"
189
+ belongs_to :subject_seqfeature, :class_name => "Seqfeature"
190
+ end
191
+ class TaxonName < DummyBase
192
+ set_primary_keys :taxon_id, :name, :name_class
193
+ belongs_to :taxon, :class_name => "Taxon"
194
+ end
195
+ class Taxon < DummyBase
196
+ set_sequence_name "taxon_pk_seq"
197
+ has_many :taxon_names, :class_name => "TaxonName"
198
+ has_one :taxon_scientific_name, :class_name => "TaxonName", :conditions=>"name_class = 'scientific name'"
199
+ has_one :taxon_genbank_common_name, :class_name => "TaxonName", :conditions=>"name_class = 'genbank common name'"
200
+ has_one :bioentry, :class_name => "Bioentry"
201
+ end
202
+ class TermDbxref < DummyBase
203
+ set_primary_key nil #term_id, dbxref_id
204
+ #delete set_sequence_name nil
205
+ belongs_to :term, :class_name => "Term"
206
+ belongs_to :dbxref, :class_name => "Dbxref"
207
+ end
208
+ class TermPath < DummyBase
209
+ set_sequence_name "term_path_pk_seq"
210
+ belongs_to :ontology, :class_name => "Ontology"
211
+ belongs_to :subject_term, :class_name => "Term"
212
+ belongs_to :object_term, :class_name => "Term"
213
+ belongs_to :predicate_term, :class_name => "Term"
214
+ end
215
+ class Term < DummyBase
216
+ belongs_to :ontology, :class_name => "Ontology"
217
+ has_many :seqfeature_qualifier_values, :class_name => "SeqfeatureQualifierValue"
218
+ has_many :dbxref_qualifier_values, :class_name => "DbxrefQualifierValue"
219
+ has_many :bioentry_qualifer_values, :class_name => "BioentryQualifierValue"
220
+ has_many :bioentries, :through=>:bioentry_qualifier_values
221
+ has_many :locations, :class_name => "Location"
222
+ has_many :seqfeature_relationships, :class_name => "SeqfeatureRelationship"
223
+ has_many :term_dbxrefs, :class_name => "TermDbxref"
224
+ has_many :term_relationship_terms, :class_name => "TermRelationshipTerm"
225
+ has_many :term_synonyms, :class_name => "TermSynonym"
226
+ has_many :location_qualifier_values, :class_name => "LocationQualifierValue"
227
+ has_many :seqfeature_types, :class_name => "Seqfeature", :foreign_key => "type_term_id"
228
+ has_many :seqfeature_sources, :class_name => "Seqfeature", :foreign_key => "source_term_id"
229
+ has_many :term_path_subjects, :class_name => "TermPath", :foreign_key => "subject_term_id"
230
+ has_many :term_path_predicates, :class_name => "TermPath", :foreign_key => "predicate_term_id"
231
+ has_many :term_path_objects, :class_name => "TermPath", :foreign_key => "object_term_id"
232
+ has_many :term_relationship_subjects, :class_name => "TermRelationship", :foreign_key =>"subject_term_id"
233
+ has_many :term_relationship_predicates, :class_name => "TermRelationship", :foreign_key =>"predicate_term_id"
234
+ has_many :term_relationship_objects, :class_name => "TermRelationship", :foreign_key =>"object_term_id"
235
+ has_many :seqfeature_paths, :class_name => "SeqfeaturePath"
236
+ end
237
+ class TermRelationship < DummyBase
238
+ set_sequence_name "term_relationship_pk_seq"
239
+ belongs_to :ontology, :class_name => "Ontology"
240
+ belongs_to :subject_term, :class_name => "Term"
241
+ belongs_to :predicate_term, :class_name => "Term"
242
+ belongs_to :object_term, :class_name => "Term"
243
+ has_one :term_relationship_term, :class_name => "TermRelationshipTerm"
244
+ end
245
+ class TermRelationshipTerm < DummyBase
246
+ #delete set_sequence_name nil
247
+ set_primary_key :term_relationship_id
248
+ belongs_to :term_relationship, :class_name => "TermRelationship"
249
+ belongs_to :term, :class_name => "Term"
250
+ end
251
+ class TermSynonym < DummyBase
252
+ #delete set_sequence_name nil
253
+ set_primary_key nil
254
+ belongs_to :term, :class_name => "Term"
255
+ end
256
+ end #SQL
257
+ end #Bio
@@ -0,0 +1,39 @@
1
+ #require 'dm-ar-finders'
2
+ #require 'dm-core'
3
+ require 'erb'
4
+ require 'composite_primary_keys'
5
+
6
+ module Bio
7
+ class SQL
8
+ class DummyBase < ActiveRecord::Base
9
+ #NOTE: Using postgresql, not setting sequence name, system will discover the name by default.
10
+ #NOTE: this class will not establish the connection automatically
11
+ self.abstract_class = true
12
+ self.pluralize_table_names = false
13
+ #prepend table name to the usual id, avoid to specify primary id for every table
14
+ self.primary_key_prefix_type = :table_name_with_underscore
15
+ #biosql_configurations=YAML::load(ERB.new(IO.read(File.join(File.dirname(__FILE__),'./config', 'database.yml'))).result)
16
+ #self.configurations=biosql_configurations
17
+ #self.establish_connection "development"
18
+ end #DummyBase
19
+
20
+ require 'bio/io/biosql/ar-biosql'
21
+
22
+ # #no check is made
23
+ def self.establish_connection(configurations, env)
24
+ # #configurations is an hash similar what YAML returns.
25
+
26
+ #configurations.assert_valid_keys('development', 'production','test')
27
+ #configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
28
+ DummyBase.configurations = configurations
29
+ connection = DummyBase.establish_connection "#{env}"
30
+ #Init of basis terms and ontologies
31
+ Ontology.first(:conditions => ["name = ?", 'Annotation Tags']) || Ontology.create({:name => 'Annotation Tags'})
32
+ Ontology.first(:conditions => ["name = ?", 'SeqFeature Keys']) || Ontology.create({:name => 'SeqFeature Keys'})
33
+ Ontology.first(:conditions => ["name = ?", 'SeqFeature Sources']) ||Ontology.create({:name => 'SeqFeature Sources'})
34
+ Term.first(:conditions => ["name = ?", 'EMBLGenBankSwit']) || Term.create({:name => 'EMBLGenBankSwit', :ontology => Ontology.first(:conditions => ["name = ?", 'SeqFeature Sources'])})
35
+ connection
36
+ end #establish_connection
37
+
38
+ end #SQL
39
+ end #Bio
@@ -2,10 +2,11 @@
2
2
  #User can configure it's db here
3
3
 
4
4
  development:
5
- adapter: postgresql
6
- database: biorails_development
7
- username: rails
8
- password:
5
+ adapter: jdbcmysql
6
+ database: bioseq
7
+ username: febo
8
+ password:
9
+ hostname: localhost
9
10
 
10
11
  test:
11
12
  adapter: postgresql
@@ -26,8 +26,10 @@ class REST
26
26
  # weekdays for any series of more than 100 requests.
27
27
  # -> Not implemented yet in BioRuby
28
28
 
29
- # Make no more than one request every 3 seconds.
30
- NCBI_INTERVAL = 3
29
+ # Make no more than one request every 1 seconds.
30
+ # (NCBI's restriction is "Make no more than 3 requests every 1 second.",
31
+ # but limited to 1/sec partly because of keeping the value in integer.)
32
+ NCBI_INTERVAL = 1
31
33
  @@last_access = nil
32
34
 
33
35
  private
@@ -127,10 +129,10 @@ class REST
127
129
  # * _mindate_: 2001
128
130
  # * _maxdate_: 2002/01/01
129
131
  # * _datetype_: "edat"
130
- # * _limit_: maximum number of entries to be returned (0 for unlimited)
132
+ # * _limit_: maximum number of entries to be returned (0 for unlimited; nil for the "retmax" value in the hash or the internal default value (=100))
131
133
  # * _step_: maximum number of entries retrieved at a time
132
134
  # *Returns*:: array of entry IDs or a number of results
133
- def esearch(str, hash = {}, limit = 100, step = 10000)
135
+ def esearch(str, hash = {}, limit = nil, step = 10000)
134
136
  serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
135
137
  opts = {
136
138
  "tool" => "bioruby",
@@ -143,12 +145,17 @@ class REST
143
145
  count = esearch_count(str, opts)
144
146
  return count
145
147
  else
148
+ retstart = 0
149
+ retstart = hash["retstart"].to_i if hash["retstart"]
150
+
151
+ limit ||= hash["retmax"].to_i if hash["retmax"]
152
+ limit ||= 100 # default limit is 100
146
153
  limit = esearch_count(str, opts) if limit == 0 # unlimit
147
154
 
148
155
  list = []
149
156
  0.step(limit, step) do |i|
150
157
  retmax = [step, limit - i].min
151
- opts.update("retmax" => retmax, "retstart" => i)
158
+ opts.update("retmax" => retmax, "retstart" => i + retstart)
152
159
  ncbi_access_wait
153
160
  response = Bio::Command.post_form(serv, opts)
154
161
  result = response.body
@@ -117,7 +117,11 @@ class PubMed < Bio::NCBI::REST
117
117
  def efetch(ids, hash = {})
118
118
  opts = { "db" => "pubmed", "rettype" => "medline" }
119
119
  opts.update(hash)
120
- super(ids, opts)
120
+ result = super(ids, opts)
121
+ if !opts["retmode"] or opts["retmode"] == "text"
122
+ result = result.split(/\n\n+/)
123
+ end
124
+ result
121
125
  end
122
126
 
123
127
  # Search the PubMed database by given keywords using entrez query and returns
@@ -1,186 +1,79 @@
1
+ #module Bio
2
+ # class SQL
3
+ # #no check is made
4
+ # def self.establish_connection(configurations, env)
5
+ # #configurations is an hash similar what YAML returns.
6
+ # #{:database=>"biorails_development", :adapter=>"postgresql", :username=>"rails", :password=>nil}
7
+ # configurations.assert_valid_keys('development', 'production','test')
8
+ # configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
9
+ # DummyBase.configurations = configurations
10
+ # DummyBase.establish_connection "#{env}"
11
+ #end
1
12
 
2
- require 'rubygems'
3
- require 'erb'
4
- require 'composite_primary_keys'
13
+
14
+ #require 'rubygems'
15
+ #require 'composite_primary_keys'
16
+ #require 'erb'
5
17
  # BiosqlPlug
6
18
 
7
19
  =begin
8
20
  Ok Hilmar gives to me some clarification
9
21
  1) "EMBL/GenBank/SwissProt" name in term table, is only a convention assuming data loaded by genbank embl ans swissprot formats.
10
- If your features come from others ways for example blast or alignment ... whatever.. the user as to take care about the source.
22
+ If your features come from others ways for example blast or alignment ... whatever.. the user as to take care about the source.
11
23
 
12
24
 
13
25
  =end
14
26
  =begin
15
27
  TODO:
16
28
  1) source_term_id => surce_term and check before if the source term is present or not and the level, the root should always be something "EMBL/GenBank/SwissProt" or contestualized.
17
- 2) Into DummyBase class delete connection there and use Bio::ArSQL.establish_connection which reads info from a yml file.
29
+ 2) Into DummyBase class delete connection there and use Bio::ArSQL.establish_connection which reads info from a yml file.
18
30
  3) Chk Locations in Biofeatures ArSQL
19
31
  =end
20
32
  module Bio
21
- class SQL
22
- #no check is made
23
- def self.establish_connection(configurations, env)
24
- #configurations is an hash similar what YAML returns.
25
- #{:database=>"biorails_development", :adapter=>"postgresql", :username=>"rails", :password=>nil}
26
- configurations.assert_valid_keys('development', 'production','test')
27
- configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
28
- DummyBase.configurations = configurations
29
- DummyBase.establish_connection "#{env}"
30
- end
31
-
33
+ class SQL
34
+
35
+ require 'bio/io/biosql/biosql'
36
+ autoload :Sequence, 'bio/db/biosql/sequence'
37
+
32
38
  def self.fetch_id(id)
33
39
  Bio::SQL::Bioentry.find(id)
34
40
  end
35
-
41
+
36
42
  def self.fetch_accession(accession)
37
- accession = accession.upcase
38
- Bio::SQL::Bioentry.exists?(:accession => accession) ? Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession)) : nil
43
+ # Bio::SQL::Bioentry.exists?(:accession => accession) ? Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession)) : nil
44
+ Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession.upcase))
39
45
  end
40
-
46
+
41
47
  def self.exists_accession(accession)
42
- Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil? ? false : true
48
+ # Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil? ? false : true
49
+ !Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil?
43
50
  end
44
-
51
+
45
52
  def self.exists_database(name)
46
- Bio::SQL::Biodatabase.find_by_name(name).nil? ? false : true
53
+ # Bio::SQL::Biodatabase.find_by_name(name).nil? ? false : true
54
+ !Bio::SQL::Biodatabase.first(:name=>name).nil?
47
55
  end
48
-
56
+
49
57
  def self.list_entries
50
- Bio::SQL::Bioentry.find(:all).collect{|entry|
58
+ Bio::SQL::Bioentry.all.collect do|entry|
51
59
  {:id=>entry.bioentry_id, :accession=>entry.accession}
52
- }
60
+ end
53
61
  end
54
-
62
+
55
63
  def self.list_databases
56
- Bio::SQL::Biodatabase.find(:all).collect{|entry|
64
+ Bio::SQL::Biodatabase.all.collect do|entry|
57
65
  {:id=>entry.biodatabase_id, :name => entry.name}
58
- }
66
+ end
59
67
  end
60
-
68
+
61
69
  def self.delete_entry_id(id)
62
- Bioentry.delete(id)
70
+ Bio::SQL::Bioentry.delete(id)
63
71
  end
64
-
72
+
65
73
  def self.delete_entry_accession(accession)
66
- Bioentry.delete(Bioentry.find_by_accession(accession))
74
+ Bio::SQL::Bioentry.find_by_accession(accession.upcase).destroy!
67
75
  end
68
-
69
-
70
- class DummyBase < ActiveRecord::Base
71
- #NOTE: Using postgresql, not setting sequence name, system will discover the name by default.
72
- #NOTE: this class will not establish the connection automatically
73
- self.abstract_class = true
74
- self.pluralize_table_names = false
75
- #prepend table name to the usual id, avoid to specify primary id for every table
76
- self.primary_key_prefix_type = :table_name_with_underscore
77
- #biosql_configurations=YAML::load(ERB.new(IO.read(File.join(File.dirname(__FILE__),'../config', 'database.yml'))).result)
78
- #self.configurations=biosql_configurations
79
- #self.establish_connection "development"
80
- end #DummyBase
81
-
82
- autoload :Biodatabase, 'bio/io/biosql/biodatabase'
83
- autoload :Bioentry, 'bio/io/biosql/bioentry'
84
- autoload :BioentryDbxref, 'bio/io/biosql/bioentry_dbxref'
85
- autoload :BioentryPath, 'bio/io/biosql/bioentry_path'
86
- autoload :BioentryQualifierValue, 'bio/io/biosql/bioentry_qualifier_value'
87
- autoload :BioentryReference, 'bio/io/biosql/bioentry_reference'
88
- autoload :BioentryRelationship, 'bio/io/biosql/bioentry_relationship'
89
- autoload :Biosequence, 'bio/io/biosql/biosequence'
90
- autoload :Comment, 'bio/io/biosql/comment'
91
- autoload :Dbxref, 'bio/io/biosql/dbxref'
92
- autoload :DbxrefQualifierValue, 'bio/io/biosql/dbxref_qualifier_value'
93
- autoload :Location, 'bio/io/biosql/location'
94
- autoload :LocationQualifierValue, 'bio/io/biosql/location_qualifier_value'
95
- autoload :Ontology, 'bio/io/biosql/ontology'
96
- autoload :Reference, 'bio/io/biosql/reference'
97
- autoload :Seqfeature, 'bio/io/biosql/seqfeature'
98
- autoload :SeqfeatureDbxref, 'bio/io/biosql/seqfeature_dbxref'
99
- autoload :SeqfeaturePath, 'bio/io/biosql/seqfeature_path'
100
- autoload :SeqfeatureQualifierValue, 'bio/io/biosql/seqfeature_qualifier_value'
101
- autoload :SeqfeatureRelationship, 'bio/io/biosql/seqfeature_relationship'
102
- autoload :Taxon, 'bio/io/biosql/taxon'
103
- autoload :TaxonName, 'bio/io/biosql/taxon_name'
104
- autoload :Term, 'bio/io/biosql/term'
105
- autoload :TermDbxref, 'bio/io/biosql/term_dbxref'
106
- autoload :TermPath, 'bio/io/biosql/term_path'
107
- autoload :TermRelationship, 'bio/io/biosql/term_relationship'
108
- autoload :TermRelationshipTerm, 'bio/io/biosql/term_relationship_term'
109
- autoload :Sequence, 'bio/db/biosql/sequence'
110
- end #biosql
111
-
112
- end #Bio
113
76
 
114
- if __FILE__ == $0
115
- require 'rubygems'
116
- require 'composite_primary_keys'
117
- require 'bio'
118
- require 'pp'
119
-
120
- # pp connection = Bio::SQL.establish_connection('bio/io/biosql/config/database.yml','development')
121
- connection = Bio::SQL.establish_connection({'development'=>{'database'=>"bio_test", 'adapter'=>"postgresql", 'username'=>"rails", 'password'=>nil}},'development')
122
- #pp YAML::load(ERB.new(IO.read('bio/io/biosql/config/database.yml')).result)
123
- if true
124
- #Bio::SQL.list_entries
125
-
126
- # biosequence = data.to_biosequence
127
- # puts biosequence.output(:genbank)
128
- db=Bio::SQL::Biodatabase.new(:name=>'JEFF', :authority=>'ME', :description=>'YOU')
129
- db.save!
130
-
131
- puts "### FileFile.auto"
132
- if ARGV.size > 0
133
- #embl = Bio::FlatFile.auto(ARGF.read)
134
- Bio::FlatFile.auto(ARGF) do |ff|
135
- ff.each do |data|
136
- biosequence=data.to_biosequence
137
- puts biosequence.output(:fasta)
138
- sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
139
- sqlseq.save
140
- sqlseq.to_biosequence.output(:fasta)
141
- end
142
- end
143
- else
144
- require 'bio/io/fetch'
145
- server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
146
- data = Bio::EMBL.new(server.fetch('embl','AJ224123'))
147
- end
77
+ end #biosql
148
78
 
149
-
150
- # sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
151
- # sqlseq.save
152
- # sqlseq_bioseq=sqlseq.to_biosequence
153
- # puts sqlseq_bioseq.output(:genbank)
154
-
155
-
156
-
157
- # bioseq = Bio::SQL.fetch_accession('AJ224122')
158
- # pp bioseq
159
- # pp bioseq.entry_id
160
- #TODO create a test only for tables not sequence here
161
- # pp bioseq.molecule_type
162
- #pp bioseq.molecule_type.class
163
- #bioseq.molecule_type_update('dna', 1)
164
- ## pp Bio::SQL::Taxon.find(8121).taxon_names
165
-
166
- #sqlseq.to_biosequence
167
-
168
- # sqlseq.delete
169
-
170
- # db.destroy
171
- end
172
- #pp bioseq.molecule_type
173
- #term = Bio::SQL::Term.find_by_name('mol_type')
174
- #pp term
175
- #pp bioseq.entry.bioentry_qualifier_values.create(:term=>term, :rank=>2, :value=>'pippo')
176
- #pp bioseq.entry.bioentry_qualifier_values.inspect
177
- #pp bioseq.entry.bioentry_qualifier_values.find_all_by_term_id(26)
178
- #pp primo.class
179
- # pp primo.value='dna'
180
- # pp primo.save
181
- #pp bioseq.molecule_type= 'prova'
182
-
183
- #Bio::SQL::BioentryQualifierValue.delete(delete.bioentry_id,delete.term_id,delete.rank)
184
-
185
-
186
- end
79
+ end #Bio