bio 1.3.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. data/COPYING +56 -0
  2. data/COPYING.ja +51 -0
  3. data/ChangeLog +540 -0
  4. data/GPL +340 -0
  5. data/LEGAL +141 -0
  6. data/LGPL +504 -0
  7. data/README.rdoc +4 -2
  8. data/Rakefile +2 -2
  9. data/bioruby.gemspec +17 -29
  10. data/doc/Tutorial.rd +118 -90
  11. data/doc/Tutorial.rd.html +124 -87
  12. data/lib/bio/appl/blast.rb +2 -2
  13. data/lib/bio/appl/blast/format0.rb +1 -1
  14. data/lib/bio/appl/fasta.rb +5 -12
  15. data/lib/bio/appl/fasta/format10.rb +96 -6
  16. data/lib/bio/appl/gcg/msf.rb +11 -14
  17. data/lib/bio/appl/pts1.rb +0 -4
  18. data/lib/bio/appl/sim4/report.rb +50 -17
  19. data/lib/bio/db/biosql/biosql_to_biosequence.rb +10 -0
  20. data/lib/bio/db/biosql/sequence.rb +234 -298
  21. data/lib/bio/db/embl/embl.rb +0 -3
  22. data/lib/bio/db/genbank/common.rb +3 -1
  23. data/lib/bio/io/biosql/ar-biosql.rb +257 -0
  24. data/lib/bio/io/biosql/biosql.rb +39 -0
  25. data/lib/bio/io/biosql/config/database.yml +5 -4
  26. data/lib/bio/io/ncbirest.rb +12 -5
  27. data/lib/bio/io/pubmed.rb +5 -1
  28. data/lib/bio/io/sql.rb +43 -150
  29. data/lib/bio/sequence/compat.rb +5 -1
  30. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +6 -4
  31. data/lib/bio/version.rb +1 -1
  32. data/test/data/gcg/pileup-aa.msf +67 -0
  33. data/test/data/sim4/complement-A4.sim4 +43 -0
  34. data/test/data/sim4/simple-A4.sim4 +25 -0
  35. data/test/data/sim4/simple2-A4.sim4 +25 -0
  36. data/test/functional/bio/io/test_pubmed.rb +129 -0
  37. data/test/unit/bio/appl/bl2seq/test_report.rb +5 -5
  38. data/test/unit/bio/appl/gcg/test_msf.rb +154 -0
  39. data/test/unit/bio/appl/hmmer/test_report.rb +2 -2
  40. data/test/unit/bio/appl/sim4/test_report.rb +869 -0
  41. data/test/unit/bio/appl/test_blast.rb +1 -1
  42. data/test/unit/bio/db/biosql/tc_biosql.rb +110 -0
  43. data/test/unit/bio/db/biosql/ts_suite_biosql.rb +8 -0
  44. data/test/unit/bio/test_feature.rb +18 -17
  45. data/test/unit/bio/test_reference.rb +18 -18
  46. data/test/unit/bio/test_sequence.rb +1 -1
  47. metadata +18 -30
  48. data/lib/bio/io/biosql/biodatabase.rb +0 -64
  49. data/lib/bio/io/biosql/bioentry.rb +0 -29
  50. data/lib/bio/io/biosql/bioentry_dbxref.rb +0 -11
  51. data/lib/bio/io/biosql/bioentry_path.rb +0 -12
  52. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +0 -10
  53. data/lib/bio/io/biosql/bioentry_reference.rb +0 -10
  54. data/lib/bio/io/biosql/bioentry_relationship.rb +0 -10
  55. data/lib/bio/io/biosql/biosequence.rb +0 -11
  56. data/lib/bio/io/biosql/comment.rb +0 -7
  57. data/lib/bio/io/biosql/dbxref.rb +0 -13
  58. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +0 -12
  59. data/lib/bio/io/biosql/location.rb +0 -32
  60. data/lib/bio/io/biosql/location_qualifier_value.rb +0 -11
  61. data/lib/bio/io/biosql/ontology.rb +0 -10
  62. data/lib/bio/io/biosql/reference.rb +0 -9
  63. data/lib/bio/io/biosql/seqfeature.rb +0 -32
  64. data/lib/bio/io/biosql/seqfeature_dbxref.rb +0 -11
  65. data/lib/bio/io/biosql/seqfeature_path.rb +0 -11
  66. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +0 -20
  67. data/lib/bio/io/biosql/seqfeature_relationship.rb +0 -11
  68. data/lib/bio/io/biosql/taxon.rb +0 -12
  69. data/lib/bio/io/biosql/taxon_name.rb +0 -9
  70. data/lib/bio/io/biosql/term.rb +0 -27
  71. data/lib/bio/io/biosql/term_dbxref.rb +0 -11
  72. data/lib/bio/io/biosql/term_path.rb +0 -12
  73. data/lib/bio/io/biosql/term_relationship.rb +0 -13
  74. data/lib/bio/io/biosql/term_relationship_term.rb +0 -11
  75. data/lib/bio/io/biosql/term_synonym.rb +0 -10
@@ -448,9 +448,6 @@ class EMBL < EMBLDB
448
448
  # taxonomy classfication
449
449
  alias classification oc
450
450
 
451
- # features
452
- alias features ft
453
-
454
451
 
455
452
  # converts the entry to Bio::Sequence object
456
453
  # ---
@@ -158,7 +158,9 @@ module Common
158
158
  authors = authors.flatten.map { |a| a.sub(/,/, ', ') }
159
159
  hash['authors'] = authors
160
160
  when /TITLE/
161
- hash['title'] = truncate(tag_cut(field)) + '.'
161
+ hash['title'] = truncate(tag_cut(field))
162
+ # CHECK Actually GenBank is not demanding for dot at the end of TITLE
163
+ #+ '.'
162
164
  when /JOURNAL/
163
165
  journal = truncate(tag_cut(field))
164
166
  if journal =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
@@ -0,0 +1,257 @@
1
+ module Bio
2
+ class SQL
3
+ class Biodatabase < DummyBase
4
+ has_many :bioentries, :class_name =>"Bioentry", :foreign_key => "biodatabase_id"
5
+ validates_uniqueness_of :name
6
+ end
7
+ class BioentryDbxref < DummyBase
8
+ #delete set_sequence_name nil
9
+ set_primary_key nil #bioentry_id,dbxref_id
10
+ belongs_to :bioentry, :class_name => "Bioentry"
11
+ belongs_to :dbxref, :class_name => "Dbxref"
12
+ end
13
+
14
+ class BioentryPath < DummyBase
15
+ set_primary_key nil
16
+ #delete set_sequence_name nil
17
+ belongs_to :term, :class_name => "Term"
18
+ #da sistemare per poter procedere.
19
+ belongs_to :object_bioentry, :class_name=>"Bioentry"
20
+ belongs_to :subject_bioentry, :class_name=>"Bioentry"
21
+ end #BioentryPath
22
+
23
+ class BioentryQualifierValue < DummyBase
24
+ #NOTE: added rank to primary_keys, now it's finished.
25
+ set_primary_keys :bioentry_id, :term_id, :rank
26
+ belongs_to :bioentry, :class_name => "Bioentry"
27
+ belongs_to :term, :class_name => "Term"
28
+ end #BioentryQualifierValue
29
+
30
+ class Bioentry < DummyBase
31
+ belongs_to :biodatabase, :class_name => "Biodatabase"
32
+ belongs_to :taxon, :class_name => "Taxon"
33
+ has_one :biosequence
34
+ #, :class_name => "Biosequence", :foreign_key => "bioentry_id"
35
+ has_many :comments, :class_name =>"Comment", :order =>'rank'
36
+ has_many :seqfeatures, :class_name => "Seqfeature", :order=>'rank'
37
+ has_many :bioentry_references, :class_name=>"BioentryReference" #, :foreign_key => "bioentry_id"
38
+ has_many :bioentry_dbxrefs, :class_name => "BioentryDbxref"
39
+ has_many :object_bioentry_relationships, :class_name=>"BioentryRelationship", :foreign_key=>"object_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
40
+ has_many :subject_bioentry_relationships, :class_name=>"BioentryRelationship", :foreign_key=>"subject_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
41
+ has_many :object_bioentry_paths, :class_name=>"BioentryPath", :foreign_key=>"object_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
42
+ has_many :subject_bioentry_paths, :class_name=>"BioentryPath", :foreign_key=>"subject_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
43
+
44
+ has_many :cdsfeatures, :class_name=>"Seqfeature", :foreign_key =>"bioentry_id", :conditions=>["term.name='CDS'"], :include=>"type_term"
45
+ has_many :references, :through=>:bioentry_references, :class_name => "Reference"
46
+ has_many :terms, :through=>:bioentry_qualifier_values, :class_name => "Term"
47
+ #NOTE: added order_by for multiple hit and manage ranks correctly
48
+ has_many :bioentry_qualifier_values, :order=>"bioentry_id,term_id,rank", :class_name => "BioentryQualifierValue"
49
+
50
+ #per la creazione richiesti:
51
+ #name, accession, version
52
+ # validates_uniqueness_of :accession, :scope=>[:biodatabase_id]
53
+ # validates_uniqueness_of :name, :scope=>[:biodatabase_id]
54
+ # validates_uniqueness_of :identifier, :scope=>[:biodatabase_id]
55
+
56
+ end
57
+ class BioentryReference < DummyBase
58
+ set_primary_keys :bioentry_id, :reference_id, :rank
59
+ belongs_to :bioentry, :class_name => "Bioentry"
60
+ belongs_to :reference , :class_name => "Reference"
61
+ end
62
+ class BioentryRelationship < DummyBase
63
+ #delete set_primary_key "bioentry_relationship_id"
64
+ set_sequence_name "bieontry_relationship_pk_seq"
65
+ belongs_to :object_bioentry, :class_name => "Bioentry"
66
+ belongs_to :subject_bioentry, :class_name => "Bioentry"
67
+ belongs_to :term
68
+ end
69
+ class Biosequence < DummyBase
70
+ set_primary_keys :bioentry_id, :version
71
+ #delete set_sequence_name "biosequence_pk_seq"
72
+ belongs_to :bioentry, :foreign_key=>"bioentry_id"
73
+ #has_one :bioentry
74
+ #, :class_name => "Bioentry"
75
+ end
76
+ class Comment < DummyBase
77
+ belongs_to :bioentry, :class_name => "Bioentry"
78
+ end
79
+ class DbxrefQualifierValue < DummyBase
80
+ #think to use composite primary key
81
+ set_primary_key nil #dbxref_id, term_id, rank
82
+ #delete set_sequence_name nil
83
+ belongs_to :dbxref, :class_name => "Dbxref"
84
+ belongs_to :term, :class_name => "Term"
85
+ end
86
+ class Dbxref < DummyBase
87
+ #set_sequence_name "dbxref_pk_seq"
88
+ has_many :dbxref_qualifier_values, :class_name => "DbxrefQualifierValue"
89
+ has_many :locations, :class_name => "Location"
90
+ has_many :references, :class_name=>"Reference"
91
+ has_many :term_dbxrefs, :class_name => "TermDbxref"
92
+ has_many :bioentry_dbxrefs, :class_name => "BioentryDbxref"
93
+ #TODO: check is with bioentry there is an has_and_belongs_to_many relationship has specified in schema overview.
94
+ end
95
+ class LocationQualifierValue < DummyBase
96
+ set_primary_key nil #location_id, term_id
97
+ #delete set_sequence_name nil
98
+ belongs_to :location, :class_name => "Location"
99
+ belongs_to :term, :class_name => "Term"
100
+ end
101
+ class Location < DummyBase
102
+ #set_sequence_name "location_pk_seq"
103
+ belongs_to :seqfeature, :class_name => "Seqfeature"
104
+ belongs_to :dbxref, :class_name => "Dbxref"
105
+ belongs_to :term, :class_name => "Term"
106
+ has_many :location_qualifier_values, :class_name => "LocationQualifierValue"
107
+
108
+ def to_s
109
+ if strand==-1
110
+ str="complement("+start_pos.to_s+".."+end_pos.to_s+")"
111
+ else
112
+ str=start_pos.to_s+".."+end_pos.to_s
113
+ end
114
+ return str
115
+ end
116
+
117
+ def sequence
118
+ seq=""
119
+ unless self.seqfeature.bioentry.biosequence.seq.nil?
120
+ seq=Bio::Sequence::NA.new(self.seqfeature.bioentry.biosequence.seq[start_pos-1..end_pos-1])
121
+ seq.reverse_complement! if strand==-1
122
+ end
123
+ return seq
124
+ end
125
+
126
+
127
+
128
+ end
129
+ class Ontology < DummyBase
130
+ has_many :terms, :class_name => "Term"
131
+ has_many :term_paths, :class_name => "TermPath"
132
+ has_many :term_relationships, :class_name => "TermRelationship"
133
+ end
134
+ class Reference < DummyBase
135
+ belongs_to :dbxref, :class_name => "Dbxref"
136
+ has_many :bioentry_references, :class_name=>"BioentryReference"
137
+ has_many :bioentries, :through=>:bioentry_references
138
+ end
139
+ class SeqfeatureDbxref < DummyBase
140
+ set_primary_keys :seqfeature_id, :dbxref_id
141
+ #delete set_sequence_name nil
142
+ belongs_to :seqfeature, :class_name => "Seqfeature", :foreign_key => "seqfeature_id"
143
+ belongs_to :dbxref, :class_name => "Dbxref", :foreign_key => "dbxref_id"
144
+ end
145
+ class SeqfeaturePath < DummyBase
146
+ set_primary_keys :object_seqfeature_id, :subject_seqfeature_id, :term_id
147
+ set_sequence_name nil
148
+ belongs_to :object_seqfeature, :class_name => "Seqfeature", :foreign_key => "object_seqfeature_id"
149
+ belongs_to :subject_seqfeature, :class_name => "Seqfeature", :foreign_key => "subject_seqfeature_id"
150
+ belongs_to :term, :class_name => "Term"
151
+ end
152
+ class SeqfeatureQualifierValue < DummyBase
153
+ set_primary_keys :seqfeature_id, :term_id, :rank
154
+ set_sequence_name nil
155
+ belongs_to :seqfeature
156
+ belongs_to :term, :class_name => "Term"
157
+ end
158
+ class Seqfeature <DummyBase
159
+ set_sequence_name "seqfeature_pk_seq"
160
+ belongs_to :bioentry
161
+ #, :class_name => "Bioentry"
162
+ belongs_to :type_term, :class_name => "Term", :foreign_key => "type_term_id"
163
+ belongs_to :source_term, :class_name => "Term", :foreign_key =>"source_term_id"
164
+ has_many :seqfeature_dbxrefs, :class_name => "SeqfeatureDbxref", :foreign_key => "seqfeature_id"
165
+ has_many :seqfeature_qualifier_values, :order=>'rank', :foreign_key => "seqfeature_id"
166
+ #, :class_name => "SeqfeatureQualifierValue"
167
+ has_many :locations, :class_name => "Location", :order=>'rank'
168
+ has_many :object_seqfeature_paths, :class_name => "SeqfeaturePath", :foreign_key => "object_seqfeature_id"
169
+ has_many :subject_seqfeature_paths, :class_name => "SeqfeaturePath", :foreign_key => "subject_seqfeature_id"
170
+ has_many :object_seqfeature_relationships, :class_name => "SeqfeatureRelationship", :foreign_key => "object_seqfeature_id"
171
+ has_many :subject_seqfeature_relationships, :class_name => "SeqfeatureRelationship", :foreign_key => "subject_seqfeature_id"
172
+
173
+ #get the subsequence described by the locations objects
174
+ def sequence
175
+ return self.locations.inject(Bio::Sequence::NA.new("")){|seq, location| seq<<location.sequence}
176
+ end
177
+
178
+ #translate the subsequences represented by the feature and its locations
179
+ #not considering the qualifiers
180
+ #Return a Bio::Sequence::AA object
181
+ def translate(*args)
182
+ self.sequence.translate(*args)
183
+ end
184
+ end
185
+ class SeqfeatureRelationship <DummyBase
186
+ set_sequence_name "seqfeatue_relationship_pk_seq"
187
+ belongs_to :term, :class_name => "Term"
188
+ belongs_to :object_seqfeature, :class_name => "Seqfeature"
189
+ belongs_to :subject_seqfeature, :class_name => "Seqfeature"
190
+ end
191
+ class TaxonName < DummyBase
192
+ set_primary_keys :taxon_id, :name, :name_class
193
+ belongs_to :taxon, :class_name => "Taxon"
194
+ end
195
+ class Taxon < DummyBase
196
+ set_sequence_name "taxon_pk_seq"
197
+ has_many :taxon_names, :class_name => "TaxonName"
198
+ has_one :taxon_scientific_name, :class_name => "TaxonName", :conditions=>"name_class = 'scientific name'"
199
+ has_one :taxon_genbank_common_name, :class_name => "TaxonName", :conditions=>"name_class = 'genbank common name'"
200
+ has_one :bioentry, :class_name => "Bioentry"
201
+ end
202
+ class TermDbxref < DummyBase
203
+ set_primary_key nil #term_id, dbxref_id
204
+ #delete set_sequence_name nil
205
+ belongs_to :term, :class_name => "Term"
206
+ belongs_to :dbxref, :class_name => "Dbxref"
207
+ end
208
+ class TermPath < DummyBase
209
+ set_sequence_name "term_path_pk_seq"
210
+ belongs_to :ontology, :class_name => "Ontology"
211
+ belongs_to :subject_term, :class_name => "Term"
212
+ belongs_to :object_term, :class_name => "Term"
213
+ belongs_to :predicate_term, :class_name => "Term"
214
+ end
215
+ class Term < DummyBase
216
+ belongs_to :ontology, :class_name => "Ontology"
217
+ has_many :seqfeature_qualifier_values, :class_name => "SeqfeatureQualifierValue"
218
+ has_many :dbxref_qualifier_values, :class_name => "DbxrefQualifierValue"
219
+ has_many :bioentry_qualifer_values, :class_name => "BioentryQualifierValue"
220
+ has_many :bioentries, :through=>:bioentry_qualifier_values
221
+ has_many :locations, :class_name => "Location"
222
+ has_many :seqfeature_relationships, :class_name => "SeqfeatureRelationship"
223
+ has_many :term_dbxrefs, :class_name => "TermDbxref"
224
+ has_many :term_relationship_terms, :class_name => "TermRelationshipTerm"
225
+ has_many :term_synonyms, :class_name => "TermSynonym"
226
+ has_many :location_qualifier_values, :class_name => "LocationQualifierValue"
227
+ has_many :seqfeature_types, :class_name => "Seqfeature", :foreign_key => "type_term_id"
228
+ has_many :seqfeature_sources, :class_name => "Seqfeature", :foreign_key => "source_term_id"
229
+ has_many :term_path_subjects, :class_name => "TermPath", :foreign_key => "subject_term_id"
230
+ has_many :term_path_predicates, :class_name => "TermPath", :foreign_key => "predicate_term_id"
231
+ has_many :term_path_objects, :class_name => "TermPath", :foreign_key => "object_term_id"
232
+ has_many :term_relationship_subjects, :class_name => "TermRelationship", :foreign_key =>"subject_term_id"
233
+ has_many :term_relationship_predicates, :class_name => "TermRelationship", :foreign_key =>"predicate_term_id"
234
+ has_many :term_relationship_objects, :class_name => "TermRelationship", :foreign_key =>"object_term_id"
235
+ has_many :seqfeature_paths, :class_name => "SeqfeaturePath"
236
+ end
237
+ class TermRelationship < DummyBase
238
+ set_sequence_name "term_relationship_pk_seq"
239
+ belongs_to :ontology, :class_name => "Ontology"
240
+ belongs_to :subject_term, :class_name => "Term"
241
+ belongs_to :predicate_term, :class_name => "Term"
242
+ belongs_to :object_term, :class_name => "Term"
243
+ has_one :term_relationship_term, :class_name => "TermRelationshipTerm"
244
+ end
245
+ class TermRelationshipTerm < DummyBase
246
+ #delete set_sequence_name nil
247
+ set_primary_key :term_relationship_id
248
+ belongs_to :term_relationship, :class_name => "TermRelationship"
249
+ belongs_to :term, :class_name => "Term"
250
+ end
251
+ class TermSynonym < DummyBase
252
+ #delete set_sequence_name nil
253
+ set_primary_key nil
254
+ belongs_to :term, :class_name => "Term"
255
+ end
256
+ end #SQL
257
+ end #Bio
@@ -0,0 +1,39 @@
1
+ #require 'dm-ar-finders'
2
+ #require 'dm-core'
3
+ require 'erb'
4
+ require 'composite_primary_keys'
5
+
6
+ module Bio
7
+ class SQL
8
+ class DummyBase < ActiveRecord::Base
9
+ #NOTE: Using postgresql, not setting sequence name, system will discover the name by default.
10
+ #NOTE: this class will not establish the connection automatically
11
+ self.abstract_class = true
12
+ self.pluralize_table_names = false
13
+ #prepend table name to the usual id, avoid to specify primary id for every table
14
+ self.primary_key_prefix_type = :table_name_with_underscore
15
+ #biosql_configurations=YAML::load(ERB.new(IO.read(File.join(File.dirname(__FILE__),'./config', 'database.yml'))).result)
16
+ #self.configurations=biosql_configurations
17
+ #self.establish_connection "development"
18
+ end #DummyBase
19
+
20
+ require 'bio/io/biosql/ar-biosql'
21
+
22
+ # #no check is made
23
+ def self.establish_connection(configurations, env)
24
+ # #configurations is an hash similar what YAML returns.
25
+
26
+ #configurations.assert_valid_keys('development', 'production','test')
27
+ #configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
28
+ DummyBase.configurations = configurations
29
+ connection = DummyBase.establish_connection "#{env}"
30
+ #Init of basis terms and ontologies
31
+ Ontology.first(:conditions => ["name = ?", 'Annotation Tags']) || Ontology.create({:name => 'Annotation Tags'})
32
+ Ontology.first(:conditions => ["name = ?", 'SeqFeature Keys']) || Ontology.create({:name => 'SeqFeature Keys'})
33
+ Ontology.first(:conditions => ["name = ?", 'SeqFeature Sources']) ||Ontology.create({:name => 'SeqFeature Sources'})
34
+ Term.first(:conditions => ["name = ?", 'EMBLGenBankSwit']) || Term.create({:name => 'EMBLGenBankSwit', :ontology => Ontology.first(:conditions => ["name = ?", 'SeqFeature Sources'])})
35
+ connection
36
+ end #establish_connection
37
+
38
+ end #SQL
39
+ end #Bio
@@ -2,10 +2,11 @@
2
2
  #User can configure it's db here
3
3
 
4
4
  development:
5
- adapter: postgresql
6
- database: biorails_development
7
- username: rails
8
- password:
5
+ adapter: jdbcmysql
6
+ database: bioseq
7
+ username: febo
8
+ password:
9
+ hostname: localhost
9
10
 
10
11
  test:
11
12
  adapter: postgresql
@@ -26,8 +26,10 @@ class REST
26
26
  # weekdays for any series of more than 100 requests.
27
27
  # -> Not implemented yet in BioRuby
28
28
 
29
- # Make no more than one request every 3 seconds.
30
- NCBI_INTERVAL = 3
29
+ # Make no more than one request every 1 seconds.
30
+ # (NCBI's restriction is "Make no more than 3 requests every 1 second.",
31
+ # but limited to 1/sec partly because of keeping the value in integer.)
32
+ NCBI_INTERVAL = 1
31
33
  @@last_access = nil
32
34
 
33
35
  private
@@ -127,10 +129,10 @@ class REST
127
129
  # * _mindate_: 2001
128
130
  # * _maxdate_: 2002/01/01
129
131
  # * _datetype_: "edat"
130
- # * _limit_: maximum number of entries to be returned (0 for unlimited)
132
+ # * _limit_: maximum number of entries to be returned (0 for unlimited; nil for the "retmax" value in the hash or the internal default value (=100))
131
133
  # * _step_: maximum number of entries retrieved at a time
132
134
  # *Returns*:: array of entry IDs or a number of results
133
- def esearch(str, hash = {}, limit = 100, step = 10000)
135
+ def esearch(str, hash = {}, limit = nil, step = 10000)
134
136
  serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
135
137
  opts = {
136
138
  "tool" => "bioruby",
@@ -143,12 +145,17 @@ class REST
143
145
  count = esearch_count(str, opts)
144
146
  return count
145
147
  else
148
+ retstart = 0
149
+ retstart = hash["retstart"].to_i if hash["retstart"]
150
+
151
+ limit ||= hash["retmax"].to_i if hash["retmax"]
152
+ limit ||= 100 # default limit is 100
146
153
  limit = esearch_count(str, opts) if limit == 0 # unlimit
147
154
 
148
155
  list = []
149
156
  0.step(limit, step) do |i|
150
157
  retmax = [step, limit - i].min
151
- opts.update("retmax" => retmax, "retstart" => i)
158
+ opts.update("retmax" => retmax, "retstart" => i + retstart)
152
159
  ncbi_access_wait
153
160
  response = Bio::Command.post_form(serv, opts)
154
161
  result = response.body
@@ -117,7 +117,11 @@ class PubMed < Bio::NCBI::REST
117
117
  def efetch(ids, hash = {})
118
118
  opts = { "db" => "pubmed", "rettype" => "medline" }
119
119
  opts.update(hash)
120
- super(ids, opts)
120
+ result = super(ids, opts)
121
+ if !opts["retmode"] or opts["retmode"] == "text"
122
+ result = result.split(/\n\n+/)
123
+ end
124
+ result
121
125
  end
122
126
 
123
127
  # Search the PubMed database by given keywords using entrez query and returns
@@ -1,186 +1,79 @@
1
+ #module Bio
2
+ # class SQL
3
+ # #no check is made
4
+ # def self.establish_connection(configurations, env)
5
+ # #configurations is an hash similar what YAML returns.
6
+ # #{:database=>"biorails_development", :adapter=>"postgresql", :username=>"rails", :password=>nil}
7
+ # configurations.assert_valid_keys('development', 'production','test')
8
+ # configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
9
+ # DummyBase.configurations = configurations
10
+ # DummyBase.establish_connection "#{env}"
11
+ #end
1
12
 
2
- require 'rubygems'
3
- require 'erb'
4
- require 'composite_primary_keys'
13
+
14
+ #require 'rubygems'
15
+ #require 'composite_primary_keys'
16
+ #require 'erb'
5
17
  # BiosqlPlug
6
18
 
7
19
  =begin
8
20
  Ok Hilmar gives to me some clarification
9
21
  1) "EMBL/GenBank/SwissProt" name in term table, is only a convention assuming data loaded by genbank embl ans swissprot formats.
10
- If your features come from others ways for example blast or alignment ... whatever.. the user as to take care about the source.
22
+ If your features come from others ways for example blast or alignment ... whatever.. the user as to take care about the source.
11
23
 
12
24
 
13
25
  =end
14
26
  =begin
15
27
  TODO:
16
28
  1) source_term_id => surce_term and check before if the source term is present or not and the level, the root should always be something "EMBL/GenBank/SwissProt" or contestualized.
17
- 2) Into DummyBase class delete connection there and use Bio::ArSQL.establish_connection which reads info from a yml file.
29
+ 2) Into DummyBase class delete connection there and use Bio::ArSQL.establish_connection which reads info from a yml file.
18
30
  3) Chk Locations in Biofeatures ArSQL
19
31
  =end
20
32
  module Bio
21
- class SQL
22
- #no check is made
23
- def self.establish_connection(configurations, env)
24
- #configurations is an hash similar what YAML returns.
25
- #{:database=>"biorails_development", :adapter=>"postgresql", :username=>"rails", :password=>nil}
26
- configurations.assert_valid_keys('development', 'production','test')
27
- configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
28
- DummyBase.configurations = configurations
29
- DummyBase.establish_connection "#{env}"
30
- end
31
-
33
+ class SQL
34
+
35
+ require 'bio/io/biosql/biosql'
36
+ autoload :Sequence, 'bio/db/biosql/sequence'
37
+
32
38
  def self.fetch_id(id)
33
39
  Bio::SQL::Bioentry.find(id)
34
40
  end
35
-
41
+
36
42
  def self.fetch_accession(accession)
37
- accession = accession.upcase
38
- Bio::SQL::Bioentry.exists?(:accession => accession) ? Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession)) : nil
43
+ # Bio::SQL::Bioentry.exists?(:accession => accession) ? Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession)) : nil
44
+ Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession.upcase))
39
45
  end
40
-
46
+
41
47
  def self.exists_accession(accession)
42
- Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil? ? false : true
48
+ # Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil? ? false : true
49
+ !Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil?
43
50
  end
44
-
51
+
45
52
  def self.exists_database(name)
46
- Bio::SQL::Biodatabase.find_by_name(name).nil? ? false : true
53
+ # Bio::SQL::Biodatabase.find_by_name(name).nil? ? false : true
54
+ !Bio::SQL::Biodatabase.first(:name=>name).nil?
47
55
  end
48
-
56
+
49
57
  def self.list_entries
50
- Bio::SQL::Bioentry.find(:all).collect{|entry|
58
+ Bio::SQL::Bioentry.all.collect do|entry|
51
59
  {:id=>entry.bioentry_id, :accession=>entry.accession}
52
- }
60
+ end
53
61
  end
54
-
62
+
55
63
  def self.list_databases
56
- Bio::SQL::Biodatabase.find(:all).collect{|entry|
64
+ Bio::SQL::Biodatabase.all.collect do|entry|
57
65
  {:id=>entry.biodatabase_id, :name => entry.name}
58
- }
66
+ end
59
67
  end
60
-
68
+
61
69
  def self.delete_entry_id(id)
62
- Bioentry.delete(id)
70
+ Bio::SQL::Bioentry.delete(id)
63
71
  end
64
-
72
+
65
73
  def self.delete_entry_accession(accession)
66
- Bioentry.delete(Bioentry.find_by_accession(accession))
74
+ Bio::SQL::Bioentry.find_by_accession(accession.upcase).destroy!
67
75
  end
68
-
69
-
70
- class DummyBase < ActiveRecord::Base
71
- #NOTE: Using postgresql, not setting sequence name, system will discover the name by default.
72
- #NOTE: this class will not establish the connection automatically
73
- self.abstract_class = true
74
- self.pluralize_table_names = false
75
- #prepend table name to the usual id, avoid to specify primary id for every table
76
- self.primary_key_prefix_type = :table_name_with_underscore
77
- #biosql_configurations=YAML::load(ERB.new(IO.read(File.join(File.dirname(__FILE__),'../config', 'database.yml'))).result)
78
- #self.configurations=biosql_configurations
79
- #self.establish_connection "development"
80
- end #DummyBase
81
-
82
- autoload :Biodatabase, 'bio/io/biosql/biodatabase'
83
- autoload :Bioentry, 'bio/io/biosql/bioentry'
84
- autoload :BioentryDbxref, 'bio/io/biosql/bioentry_dbxref'
85
- autoload :BioentryPath, 'bio/io/biosql/bioentry_path'
86
- autoload :BioentryQualifierValue, 'bio/io/biosql/bioentry_qualifier_value'
87
- autoload :BioentryReference, 'bio/io/biosql/bioentry_reference'
88
- autoload :BioentryRelationship, 'bio/io/biosql/bioentry_relationship'
89
- autoload :Biosequence, 'bio/io/biosql/biosequence'
90
- autoload :Comment, 'bio/io/biosql/comment'
91
- autoload :Dbxref, 'bio/io/biosql/dbxref'
92
- autoload :DbxrefQualifierValue, 'bio/io/biosql/dbxref_qualifier_value'
93
- autoload :Location, 'bio/io/biosql/location'
94
- autoload :LocationQualifierValue, 'bio/io/biosql/location_qualifier_value'
95
- autoload :Ontology, 'bio/io/biosql/ontology'
96
- autoload :Reference, 'bio/io/biosql/reference'
97
- autoload :Seqfeature, 'bio/io/biosql/seqfeature'
98
- autoload :SeqfeatureDbxref, 'bio/io/biosql/seqfeature_dbxref'
99
- autoload :SeqfeaturePath, 'bio/io/biosql/seqfeature_path'
100
- autoload :SeqfeatureQualifierValue, 'bio/io/biosql/seqfeature_qualifier_value'
101
- autoload :SeqfeatureRelationship, 'bio/io/biosql/seqfeature_relationship'
102
- autoload :Taxon, 'bio/io/biosql/taxon'
103
- autoload :TaxonName, 'bio/io/biosql/taxon_name'
104
- autoload :Term, 'bio/io/biosql/term'
105
- autoload :TermDbxref, 'bio/io/biosql/term_dbxref'
106
- autoload :TermPath, 'bio/io/biosql/term_path'
107
- autoload :TermRelationship, 'bio/io/biosql/term_relationship'
108
- autoload :TermRelationshipTerm, 'bio/io/biosql/term_relationship_term'
109
- autoload :Sequence, 'bio/db/biosql/sequence'
110
- end #biosql
111
-
112
- end #Bio
113
76
 
114
- if __FILE__ == $0
115
- require 'rubygems'
116
- require 'composite_primary_keys'
117
- require 'bio'
118
- require 'pp'
119
-
120
- # pp connection = Bio::SQL.establish_connection('bio/io/biosql/config/database.yml','development')
121
- connection = Bio::SQL.establish_connection({'development'=>{'database'=>"bio_test", 'adapter'=>"postgresql", 'username'=>"rails", 'password'=>nil}},'development')
122
- #pp YAML::load(ERB.new(IO.read('bio/io/biosql/config/database.yml')).result)
123
- if true
124
- #Bio::SQL.list_entries
125
-
126
- # biosequence = data.to_biosequence
127
- # puts biosequence.output(:genbank)
128
- db=Bio::SQL::Biodatabase.new(:name=>'JEFF', :authority=>'ME', :description=>'YOU')
129
- db.save!
130
-
131
- puts "### FileFile.auto"
132
- if ARGV.size > 0
133
- #embl = Bio::FlatFile.auto(ARGF.read)
134
- Bio::FlatFile.auto(ARGF) do |ff|
135
- ff.each do |data|
136
- biosequence=data.to_biosequence
137
- puts biosequence.output(:fasta)
138
- sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
139
- sqlseq.save
140
- sqlseq.to_biosequence.output(:fasta)
141
- end
142
- end
143
- else
144
- require 'bio/io/fetch'
145
- server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
146
- data = Bio::EMBL.new(server.fetch('embl','AJ224123'))
147
- end
77
+ end #biosql
148
78
 
149
-
150
- # sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
151
- # sqlseq.save
152
- # sqlseq_bioseq=sqlseq.to_biosequence
153
- # puts sqlseq_bioseq.output(:genbank)
154
-
155
-
156
-
157
- # bioseq = Bio::SQL.fetch_accession('AJ224122')
158
- # pp bioseq
159
- # pp bioseq.entry_id
160
- #TODO create a test only for tables not sequence here
161
- # pp bioseq.molecule_type
162
- #pp bioseq.molecule_type.class
163
- #bioseq.molecule_type_update('dna', 1)
164
- ## pp Bio::SQL::Taxon.find(8121).taxon_names
165
-
166
- #sqlseq.to_biosequence
167
-
168
- # sqlseq.delete
169
-
170
- # db.destroy
171
- end
172
- #pp bioseq.molecule_type
173
- #term = Bio::SQL::Term.find_by_name('mol_type')
174
- #pp term
175
- #pp bioseq.entry.bioentry_qualifier_values.create(:term=>term, :rank=>2, :value=>'pippo')
176
- #pp bioseq.entry.bioentry_qualifier_values.inspect
177
- #pp bioseq.entry.bioentry_qualifier_values.find_all_by_term_id(26)
178
- #pp primo.class
179
- # pp primo.value='dna'
180
- # pp primo.save
181
- #pp bioseq.molecule_type= 'prova'
182
-
183
- #Bio::SQL::BioentryQualifierValue.delete(delete.bioentry_id,delete.term_id,delete.rank)
184
-
185
-
186
- end
79
+ end #Bio