bio 1.3.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING +56 -0
- data/COPYING.ja +51 -0
- data/ChangeLog +540 -0
- data/GPL +340 -0
- data/LEGAL +141 -0
- data/LGPL +504 -0
- data/README.rdoc +4 -2
- data/Rakefile +2 -2
- data/bioruby.gemspec +17 -29
- data/doc/Tutorial.rd +118 -90
- data/doc/Tutorial.rd.html +124 -87
- data/lib/bio/appl/blast.rb +2 -2
- data/lib/bio/appl/blast/format0.rb +1 -1
- data/lib/bio/appl/fasta.rb +5 -12
- data/lib/bio/appl/fasta/format10.rb +96 -6
- data/lib/bio/appl/gcg/msf.rb +11 -14
- data/lib/bio/appl/pts1.rb +0 -4
- data/lib/bio/appl/sim4/report.rb +50 -17
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +10 -0
- data/lib/bio/db/biosql/sequence.rb +234 -298
- data/lib/bio/db/embl/embl.rb +0 -3
- data/lib/bio/db/genbank/common.rb +3 -1
- data/lib/bio/io/biosql/ar-biosql.rb +257 -0
- data/lib/bio/io/biosql/biosql.rb +39 -0
- data/lib/bio/io/biosql/config/database.yml +5 -4
- data/lib/bio/io/ncbirest.rb +12 -5
- data/lib/bio/io/pubmed.rb +5 -1
- data/lib/bio/io/sql.rb +43 -150
- data/lib/bio/sequence/compat.rb +5 -1
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +6 -4
- data/lib/bio/version.rb +1 -1
- data/test/data/gcg/pileup-aa.msf +67 -0
- data/test/data/sim4/complement-A4.sim4 +43 -0
- data/test/data/sim4/simple-A4.sim4 +25 -0
- data/test/data/sim4/simple2-A4.sim4 +25 -0
- data/test/functional/bio/io/test_pubmed.rb +129 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +5 -5
- data/test/unit/bio/appl/gcg/test_msf.rb +154 -0
- data/test/unit/bio/appl/hmmer/test_report.rb +2 -2
- data/test/unit/bio/appl/sim4/test_report.rb +869 -0
- data/test/unit/bio/appl/test_blast.rb +1 -1
- data/test/unit/bio/db/biosql/tc_biosql.rb +110 -0
- data/test/unit/bio/db/biosql/ts_suite_biosql.rb +8 -0
- data/test/unit/bio/test_feature.rb +18 -17
- data/test/unit/bio/test_reference.rb +18 -18
- data/test/unit/bio/test_sequence.rb +1 -1
- metadata +18 -30
- data/lib/bio/io/biosql/biodatabase.rb +0 -64
- data/lib/bio/io/biosql/bioentry.rb +0 -29
- data/lib/bio/io/biosql/bioentry_dbxref.rb +0 -11
- data/lib/bio/io/biosql/bioentry_path.rb +0 -12
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +0 -10
- data/lib/bio/io/biosql/bioentry_reference.rb +0 -10
- data/lib/bio/io/biosql/bioentry_relationship.rb +0 -10
- data/lib/bio/io/biosql/biosequence.rb +0 -11
- data/lib/bio/io/biosql/comment.rb +0 -7
- data/lib/bio/io/biosql/dbxref.rb +0 -13
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +0 -12
- data/lib/bio/io/biosql/location.rb +0 -32
- data/lib/bio/io/biosql/location_qualifier_value.rb +0 -11
- data/lib/bio/io/biosql/ontology.rb +0 -10
- data/lib/bio/io/biosql/reference.rb +0 -9
- data/lib/bio/io/biosql/seqfeature.rb +0 -32
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +0 -11
- data/lib/bio/io/biosql/seqfeature_path.rb +0 -11
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +0 -20
- data/lib/bio/io/biosql/seqfeature_relationship.rb +0 -11
- data/lib/bio/io/biosql/taxon.rb +0 -12
- data/lib/bio/io/biosql/taxon_name.rb +0 -9
- data/lib/bio/io/biosql/term.rb +0 -27
- data/lib/bio/io/biosql/term_dbxref.rb +0 -11
- data/lib/bio/io/biosql/term_path.rb +0 -12
- data/lib/bio/io/biosql/term_relationship.rb +0 -13
- data/lib/bio/io/biosql/term_relationship_term.rb +0 -11
- data/lib/bio/io/biosql/term_synonym.rb +0 -10
data/lib/bio/db/embl/embl.rb
CHANGED
@@ -158,7 +158,9 @@ module Common
|
|
158
158
|
authors = authors.flatten.map { |a| a.sub(/,/, ', ') }
|
159
159
|
hash['authors'] = authors
|
160
160
|
when /TITLE/
|
161
|
-
hash['title'] = truncate(tag_cut(field))
|
161
|
+
hash['title'] = truncate(tag_cut(field))
|
162
|
+
# CHECK Actually GenBank is not demanding for dot at the end of TITLE
|
163
|
+
#+ '.'
|
162
164
|
when /JOURNAL/
|
163
165
|
journal = truncate(tag_cut(field))
|
164
166
|
if journal =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
|
@@ -0,0 +1,257 @@
|
|
1
|
+
module Bio
|
2
|
+
class SQL
|
3
|
+
class Biodatabase < DummyBase
|
4
|
+
has_many :bioentries, :class_name =>"Bioentry", :foreign_key => "biodatabase_id"
|
5
|
+
validates_uniqueness_of :name
|
6
|
+
end
|
7
|
+
class BioentryDbxref < DummyBase
|
8
|
+
#delete set_sequence_name nil
|
9
|
+
set_primary_key nil #bioentry_id,dbxref_id
|
10
|
+
belongs_to :bioentry, :class_name => "Bioentry"
|
11
|
+
belongs_to :dbxref, :class_name => "Dbxref"
|
12
|
+
end
|
13
|
+
|
14
|
+
class BioentryPath < DummyBase
|
15
|
+
set_primary_key nil
|
16
|
+
#delete set_sequence_name nil
|
17
|
+
belongs_to :term, :class_name => "Term"
|
18
|
+
#da sistemare per poter procedere.
|
19
|
+
belongs_to :object_bioentry, :class_name=>"Bioentry"
|
20
|
+
belongs_to :subject_bioentry, :class_name=>"Bioentry"
|
21
|
+
end #BioentryPath
|
22
|
+
|
23
|
+
class BioentryQualifierValue < DummyBase
|
24
|
+
#NOTE: added rank to primary_keys, now it's finished.
|
25
|
+
set_primary_keys :bioentry_id, :term_id, :rank
|
26
|
+
belongs_to :bioentry, :class_name => "Bioentry"
|
27
|
+
belongs_to :term, :class_name => "Term"
|
28
|
+
end #BioentryQualifierValue
|
29
|
+
|
30
|
+
class Bioentry < DummyBase
|
31
|
+
belongs_to :biodatabase, :class_name => "Biodatabase"
|
32
|
+
belongs_to :taxon, :class_name => "Taxon"
|
33
|
+
has_one :biosequence
|
34
|
+
#, :class_name => "Biosequence", :foreign_key => "bioentry_id"
|
35
|
+
has_many :comments, :class_name =>"Comment", :order =>'rank'
|
36
|
+
has_many :seqfeatures, :class_name => "Seqfeature", :order=>'rank'
|
37
|
+
has_many :bioentry_references, :class_name=>"BioentryReference" #, :foreign_key => "bioentry_id"
|
38
|
+
has_many :bioentry_dbxrefs, :class_name => "BioentryDbxref"
|
39
|
+
has_many :object_bioentry_relationships, :class_name=>"BioentryRelationship", :foreign_key=>"object_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
|
40
|
+
has_many :subject_bioentry_relationships, :class_name=>"BioentryRelationship", :foreign_key=>"subject_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
|
41
|
+
has_many :object_bioentry_paths, :class_name=>"BioentryPath", :foreign_key=>"object_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
|
42
|
+
has_many :subject_bioentry_paths, :class_name=>"BioentryPath", :foreign_key=>"subject_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
|
43
|
+
|
44
|
+
has_many :cdsfeatures, :class_name=>"Seqfeature", :foreign_key =>"bioentry_id", :conditions=>["term.name='CDS'"], :include=>"type_term"
|
45
|
+
has_many :references, :through=>:bioentry_references, :class_name => "Reference"
|
46
|
+
has_many :terms, :through=>:bioentry_qualifier_values, :class_name => "Term"
|
47
|
+
#NOTE: added order_by for multiple hit and manage ranks correctly
|
48
|
+
has_many :bioentry_qualifier_values, :order=>"bioentry_id,term_id,rank", :class_name => "BioentryQualifierValue"
|
49
|
+
|
50
|
+
#per la creazione richiesti:
|
51
|
+
#name, accession, version
|
52
|
+
# validates_uniqueness_of :accession, :scope=>[:biodatabase_id]
|
53
|
+
# validates_uniqueness_of :name, :scope=>[:biodatabase_id]
|
54
|
+
# validates_uniqueness_of :identifier, :scope=>[:biodatabase_id]
|
55
|
+
|
56
|
+
end
|
57
|
+
class BioentryReference < DummyBase
|
58
|
+
set_primary_keys :bioentry_id, :reference_id, :rank
|
59
|
+
belongs_to :bioentry, :class_name => "Bioentry"
|
60
|
+
belongs_to :reference , :class_name => "Reference"
|
61
|
+
end
|
62
|
+
class BioentryRelationship < DummyBase
|
63
|
+
#delete set_primary_key "bioentry_relationship_id"
|
64
|
+
set_sequence_name "bieontry_relationship_pk_seq"
|
65
|
+
belongs_to :object_bioentry, :class_name => "Bioentry"
|
66
|
+
belongs_to :subject_bioentry, :class_name => "Bioentry"
|
67
|
+
belongs_to :term
|
68
|
+
end
|
69
|
+
class Biosequence < DummyBase
|
70
|
+
set_primary_keys :bioentry_id, :version
|
71
|
+
#delete set_sequence_name "biosequence_pk_seq"
|
72
|
+
belongs_to :bioentry, :foreign_key=>"bioentry_id"
|
73
|
+
#has_one :bioentry
|
74
|
+
#, :class_name => "Bioentry"
|
75
|
+
end
|
76
|
+
class Comment < DummyBase
|
77
|
+
belongs_to :bioentry, :class_name => "Bioentry"
|
78
|
+
end
|
79
|
+
class DbxrefQualifierValue < DummyBase
|
80
|
+
#think to use composite primary key
|
81
|
+
set_primary_key nil #dbxref_id, term_id, rank
|
82
|
+
#delete set_sequence_name nil
|
83
|
+
belongs_to :dbxref, :class_name => "Dbxref"
|
84
|
+
belongs_to :term, :class_name => "Term"
|
85
|
+
end
|
86
|
+
class Dbxref < DummyBase
|
87
|
+
#set_sequence_name "dbxref_pk_seq"
|
88
|
+
has_many :dbxref_qualifier_values, :class_name => "DbxrefQualifierValue"
|
89
|
+
has_many :locations, :class_name => "Location"
|
90
|
+
has_many :references, :class_name=>"Reference"
|
91
|
+
has_many :term_dbxrefs, :class_name => "TermDbxref"
|
92
|
+
has_many :bioentry_dbxrefs, :class_name => "BioentryDbxref"
|
93
|
+
#TODO: check is with bioentry there is an has_and_belongs_to_many relationship has specified in schema overview.
|
94
|
+
end
|
95
|
+
class LocationQualifierValue < DummyBase
|
96
|
+
set_primary_key nil #location_id, term_id
|
97
|
+
#delete set_sequence_name nil
|
98
|
+
belongs_to :location, :class_name => "Location"
|
99
|
+
belongs_to :term, :class_name => "Term"
|
100
|
+
end
|
101
|
+
class Location < DummyBase
|
102
|
+
#set_sequence_name "location_pk_seq"
|
103
|
+
belongs_to :seqfeature, :class_name => "Seqfeature"
|
104
|
+
belongs_to :dbxref, :class_name => "Dbxref"
|
105
|
+
belongs_to :term, :class_name => "Term"
|
106
|
+
has_many :location_qualifier_values, :class_name => "LocationQualifierValue"
|
107
|
+
|
108
|
+
def to_s
|
109
|
+
if strand==-1
|
110
|
+
str="complement("+start_pos.to_s+".."+end_pos.to_s+")"
|
111
|
+
else
|
112
|
+
str=start_pos.to_s+".."+end_pos.to_s
|
113
|
+
end
|
114
|
+
return str
|
115
|
+
end
|
116
|
+
|
117
|
+
def sequence
|
118
|
+
seq=""
|
119
|
+
unless self.seqfeature.bioentry.biosequence.seq.nil?
|
120
|
+
seq=Bio::Sequence::NA.new(self.seqfeature.bioentry.biosequence.seq[start_pos-1..end_pos-1])
|
121
|
+
seq.reverse_complement! if strand==-1
|
122
|
+
end
|
123
|
+
return seq
|
124
|
+
end
|
125
|
+
|
126
|
+
|
127
|
+
|
128
|
+
end
|
129
|
+
class Ontology < DummyBase
|
130
|
+
has_many :terms, :class_name => "Term"
|
131
|
+
has_many :term_paths, :class_name => "TermPath"
|
132
|
+
has_many :term_relationships, :class_name => "TermRelationship"
|
133
|
+
end
|
134
|
+
class Reference < DummyBase
|
135
|
+
belongs_to :dbxref, :class_name => "Dbxref"
|
136
|
+
has_many :bioentry_references, :class_name=>"BioentryReference"
|
137
|
+
has_many :bioentries, :through=>:bioentry_references
|
138
|
+
end
|
139
|
+
class SeqfeatureDbxref < DummyBase
|
140
|
+
set_primary_keys :seqfeature_id, :dbxref_id
|
141
|
+
#delete set_sequence_name nil
|
142
|
+
belongs_to :seqfeature, :class_name => "Seqfeature", :foreign_key => "seqfeature_id"
|
143
|
+
belongs_to :dbxref, :class_name => "Dbxref", :foreign_key => "dbxref_id"
|
144
|
+
end
|
145
|
+
class SeqfeaturePath < DummyBase
|
146
|
+
set_primary_keys :object_seqfeature_id, :subject_seqfeature_id, :term_id
|
147
|
+
set_sequence_name nil
|
148
|
+
belongs_to :object_seqfeature, :class_name => "Seqfeature", :foreign_key => "object_seqfeature_id"
|
149
|
+
belongs_to :subject_seqfeature, :class_name => "Seqfeature", :foreign_key => "subject_seqfeature_id"
|
150
|
+
belongs_to :term, :class_name => "Term"
|
151
|
+
end
|
152
|
+
class SeqfeatureQualifierValue < DummyBase
|
153
|
+
set_primary_keys :seqfeature_id, :term_id, :rank
|
154
|
+
set_sequence_name nil
|
155
|
+
belongs_to :seqfeature
|
156
|
+
belongs_to :term, :class_name => "Term"
|
157
|
+
end
|
158
|
+
class Seqfeature <DummyBase
|
159
|
+
set_sequence_name "seqfeature_pk_seq"
|
160
|
+
belongs_to :bioentry
|
161
|
+
#, :class_name => "Bioentry"
|
162
|
+
belongs_to :type_term, :class_name => "Term", :foreign_key => "type_term_id"
|
163
|
+
belongs_to :source_term, :class_name => "Term", :foreign_key =>"source_term_id"
|
164
|
+
has_many :seqfeature_dbxrefs, :class_name => "SeqfeatureDbxref", :foreign_key => "seqfeature_id"
|
165
|
+
has_many :seqfeature_qualifier_values, :order=>'rank', :foreign_key => "seqfeature_id"
|
166
|
+
#, :class_name => "SeqfeatureQualifierValue"
|
167
|
+
has_many :locations, :class_name => "Location", :order=>'rank'
|
168
|
+
has_many :object_seqfeature_paths, :class_name => "SeqfeaturePath", :foreign_key => "object_seqfeature_id"
|
169
|
+
has_many :subject_seqfeature_paths, :class_name => "SeqfeaturePath", :foreign_key => "subject_seqfeature_id"
|
170
|
+
has_many :object_seqfeature_relationships, :class_name => "SeqfeatureRelationship", :foreign_key => "object_seqfeature_id"
|
171
|
+
has_many :subject_seqfeature_relationships, :class_name => "SeqfeatureRelationship", :foreign_key => "subject_seqfeature_id"
|
172
|
+
|
173
|
+
#get the subsequence described by the locations objects
|
174
|
+
def sequence
|
175
|
+
return self.locations.inject(Bio::Sequence::NA.new("")){|seq, location| seq<<location.sequence}
|
176
|
+
end
|
177
|
+
|
178
|
+
#translate the subsequences represented by the feature and its locations
|
179
|
+
#not considering the qualifiers
|
180
|
+
#Return a Bio::Sequence::AA object
|
181
|
+
def translate(*args)
|
182
|
+
self.sequence.translate(*args)
|
183
|
+
end
|
184
|
+
end
|
185
|
+
class SeqfeatureRelationship <DummyBase
|
186
|
+
set_sequence_name "seqfeatue_relationship_pk_seq"
|
187
|
+
belongs_to :term, :class_name => "Term"
|
188
|
+
belongs_to :object_seqfeature, :class_name => "Seqfeature"
|
189
|
+
belongs_to :subject_seqfeature, :class_name => "Seqfeature"
|
190
|
+
end
|
191
|
+
class TaxonName < DummyBase
|
192
|
+
set_primary_keys :taxon_id, :name, :name_class
|
193
|
+
belongs_to :taxon, :class_name => "Taxon"
|
194
|
+
end
|
195
|
+
class Taxon < DummyBase
|
196
|
+
set_sequence_name "taxon_pk_seq"
|
197
|
+
has_many :taxon_names, :class_name => "TaxonName"
|
198
|
+
has_one :taxon_scientific_name, :class_name => "TaxonName", :conditions=>"name_class = 'scientific name'"
|
199
|
+
has_one :taxon_genbank_common_name, :class_name => "TaxonName", :conditions=>"name_class = 'genbank common name'"
|
200
|
+
has_one :bioentry, :class_name => "Bioentry"
|
201
|
+
end
|
202
|
+
class TermDbxref < DummyBase
|
203
|
+
set_primary_key nil #term_id, dbxref_id
|
204
|
+
#delete set_sequence_name nil
|
205
|
+
belongs_to :term, :class_name => "Term"
|
206
|
+
belongs_to :dbxref, :class_name => "Dbxref"
|
207
|
+
end
|
208
|
+
class TermPath < DummyBase
|
209
|
+
set_sequence_name "term_path_pk_seq"
|
210
|
+
belongs_to :ontology, :class_name => "Ontology"
|
211
|
+
belongs_to :subject_term, :class_name => "Term"
|
212
|
+
belongs_to :object_term, :class_name => "Term"
|
213
|
+
belongs_to :predicate_term, :class_name => "Term"
|
214
|
+
end
|
215
|
+
class Term < DummyBase
|
216
|
+
belongs_to :ontology, :class_name => "Ontology"
|
217
|
+
has_many :seqfeature_qualifier_values, :class_name => "SeqfeatureQualifierValue"
|
218
|
+
has_many :dbxref_qualifier_values, :class_name => "DbxrefQualifierValue"
|
219
|
+
has_many :bioentry_qualifer_values, :class_name => "BioentryQualifierValue"
|
220
|
+
has_many :bioentries, :through=>:bioentry_qualifier_values
|
221
|
+
has_many :locations, :class_name => "Location"
|
222
|
+
has_many :seqfeature_relationships, :class_name => "SeqfeatureRelationship"
|
223
|
+
has_many :term_dbxrefs, :class_name => "TermDbxref"
|
224
|
+
has_many :term_relationship_terms, :class_name => "TermRelationshipTerm"
|
225
|
+
has_many :term_synonyms, :class_name => "TermSynonym"
|
226
|
+
has_many :location_qualifier_values, :class_name => "LocationQualifierValue"
|
227
|
+
has_many :seqfeature_types, :class_name => "Seqfeature", :foreign_key => "type_term_id"
|
228
|
+
has_many :seqfeature_sources, :class_name => "Seqfeature", :foreign_key => "source_term_id"
|
229
|
+
has_many :term_path_subjects, :class_name => "TermPath", :foreign_key => "subject_term_id"
|
230
|
+
has_many :term_path_predicates, :class_name => "TermPath", :foreign_key => "predicate_term_id"
|
231
|
+
has_many :term_path_objects, :class_name => "TermPath", :foreign_key => "object_term_id"
|
232
|
+
has_many :term_relationship_subjects, :class_name => "TermRelationship", :foreign_key =>"subject_term_id"
|
233
|
+
has_many :term_relationship_predicates, :class_name => "TermRelationship", :foreign_key =>"predicate_term_id"
|
234
|
+
has_many :term_relationship_objects, :class_name => "TermRelationship", :foreign_key =>"object_term_id"
|
235
|
+
has_many :seqfeature_paths, :class_name => "SeqfeaturePath"
|
236
|
+
end
|
237
|
+
class TermRelationship < DummyBase
|
238
|
+
set_sequence_name "term_relationship_pk_seq"
|
239
|
+
belongs_to :ontology, :class_name => "Ontology"
|
240
|
+
belongs_to :subject_term, :class_name => "Term"
|
241
|
+
belongs_to :predicate_term, :class_name => "Term"
|
242
|
+
belongs_to :object_term, :class_name => "Term"
|
243
|
+
has_one :term_relationship_term, :class_name => "TermRelationshipTerm"
|
244
|
+
end
|
245
|
+
class TermRelationshipTerm < DummyBase
|
246
|
+
#delete set_sequence_name nil
|
247
|
+
set_primary_key :term_relationship_id
|
248
|
+
belongs_to :term_relationship, :class_name => "TermRelationship"
|
249
|
+
belongs_to :term, :class_name => "Term"
|
250
|
+
end
|
251
|
+
class TermSynonym < DummyBase
|
252
|
+
#delete set_sequence_name nil
|
253
|
+
set_primary_key nil
|
254
|
+
belongs_to :term, :class_name => "Term"
|
255
|
+
end
|
256
|
+
end #SQL
|
257
|
+
end #Bio
|
@@ -0,0 +1,39 @@
|
|
1
|
+
#require 'dm-ar-finders'
|
2
|
+
#require 'dm-core'
|
3
|
+
require 'erb'
|
4
|
+
require 'composite_primary_keys'
|
5
|
+
|
6
|
+
module Bio
|
7
|
+
class SQL
|
8
|
+
class DummyBase < ActiveRecord::Base
|
9
|
+
#NOTE: Using postgresql, not setting sequence name, system will discover the name by default.
|
10
|
+
#NOTE: this class will not establish the connection automatically
|
11
|
+
self.abstract_class = true
|
12
|
+
self.pluralize_table_names = false
|
13
|
+
#prepend table name to the usual id, avoid to specify primary id for every table
|
14
|
+
self.primary_key_prefix_type = :table_name_with_underscore
|
15
|
+
#biosql_configurations=YAML::load(ERB.new(IO.read(File.join(File.dirname(__FILE__),'./config', 'database.yml'))).result)
|
16
|
+
#self.configurations=biosql_configurations
|
17
|
+
#self.establish_connection "development"
|
18
|
+
end #DummyBase
|
19
|
+
|
20
|
+
require 'bio/io/biosql/ar-biosql'
|
21
|
+
|
22
|
+
# #no check is made
|
23
|
+
def self.establish_connection(configurations, env)
|
24
|
+
# #configurations is an hash similar what YAML returns.
|
25
|
+
|
26
|
+
#configurations.assert_valid_keys('development', 'production','test')
|
27
|
+
#configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
|
28
|
+
DummyBase.configurations = configurations
|
29
|
+
connection = DummyBase.establish_connection "#{env}"
|
30
|
+
#Init of basis terms and ontologies
|
31
|
+
Ontology.first(:conditions => ["name = ?", 'Annotation Tags']) || Ontology.create({:name => 'Annotation Tags'})
|
32
|
+
Ontology.first(:conditions => ["name = ?", 'SeqFeature Keys']) || Ontology.create({:name => 'SeqFeature Keys'})
|
33
|
+
Ontology.first(:conditions => ["name = ?", 'SeqFeature Sources']) ||Ontology.create({:name => 'SeqFeature Sources'})
|
34
|
+
Term.first(:conditions => ["name = ?", 'EMBLGenBankSwit']) || Term.create({:name => 'EMBLGenBankSwit', :ontology => Ontology.first(:conditions => ["name = ?", 'SeqFeature Sources'])})
|
35
|
+
connection
|
36
|
+
end #establish_connection
|
37
|
+
|
38
|
+
end #SQL
|
39
|
+
end #Bio
|
@@ -2,10 +2,11 @@
|
|
2
2
|
#User can configure it's db here
|
3
3
|
|
4
4
|
development:
|
5
|
-
adapter:
|
6
|
-
database:
|
7
|
-
username:
|
8
|
-
password:
|
5
|
+
adapter: jdbcmysql
|
6
|
+
database: bioseq
|
7
|
+
username: febo
|
8
|
+
password:
|
9
|
+
hostname: localhost
|
9
10
|
|
10
11
|
test:
|
11
12
|
adapter: postgresql
|
data/lib/bio/io/ncbirest.rb
CHANGED
@@ -26,8 +26,10 @@ class REST
|
|
26
26
|
# weekdays for any series of more than 100 requests.
|
27
27
|
# -> Not implemented yet in BioRuby
|
28
28
|
|
29
|
-
# Make no more than one request every
|
30
|
-
|
29
|
+
# Make no more than one request every 1 seconds.
|
30
|
+
# (NCBI's restriction is "Make no more than 3 requests every 1 second.",
|
31
|
+
# but limited to 1/sec partly because of keeping the value in integer.)
|
32
|
+
NCBI_INTERVAL = 1
|
31
33
|
@@last_access = nil
|
32
34
|
|
33
35
|
private
|
@@ -127,10 +129,10 @@ class REST
|
|
127
129
|
# * _mindate_: 2001
|
128
130
|
# * _maxdate_: 2002/01/01
|
129
131
|
# * _datetype_: "edat"
|
130
|
-
# * _limit_: maximum number of entries to be returned (0 for unlimited)
|
132
|
+
# * _limit_: maximum number of entries to be returned (0 for unlimited; nil for the "retmax" value in the hash or the internal default value (=100))
|
131
133
|
# * _step_: maximum number of entries retrieved at a time
|
132
134
|
# *Returns*:: array of entry IDs or a number of results
|
133
|
-
def esearch(str, hash = {}, limit =
|
135
|
+
def esearch(str, hash = {}, limit = nil, step = 10000)
|
134
136
|
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
135
137
|
opts = {
|
136
138
|
"tool" => "bioruby",
|
@@ -143,12 +145,17 @@ class REST
|
|
143
145
|
count = esearch_count(str, opts)
|
144
146
|
return count
|
145
147
|
else
|
148
|
+
retstart = 0
|
149
|
+
retstart = hash["retstart"].to_i if hash["retstart"]
|
150
|
+
|
151
|
+
limit ||= hash["retmax"].to_i if hash["retmax"]
|
152
|
+
limit ||= 100 # default limit is 100
|
146
153
|
limit = esearch_count(str, opts) if limit == 0 # unlimit
|
147
154
|
|
148
155
|
list = []
|
149
156
|
0.step(limit, step) do |i|
|
150
157
|
retmax = [step, limit - i].min
|
151
|
-
opts.update("retmax" => retmax, "retstart" => i)
|
158
|
+
opts.update("retmax" => retmax, "retstart" => i + retstart)
|
152
159
|
ncbi_access_wait
|
153
160
|
response = Bio::Command.post_form(serv, opts)
|
154
161
|
result = response.body
|
data/lib/bio/io/pubmed.rb
CHANGED
@@ -117,7 +117,11 @@ class PubMed < Bio::NCBI::REST
|
|
117
117
|
def efetch(ids, hash = {})
|
118
118
|
opts = { "db" => "pubmed", "rettype" => "medline" }
|
119
119
|
opts.update(hash)
|
120
|
-
super(ids, opts)
|
120
|
+
result = super(ids, opts)
|
121
|
+
if !opts["retmode"] or opts["retmode"] == "text"
|
122
|
+
result = result.split(/\n\n+/)
|
123
|
+
end
|
124
|
+
result
|
121
125
|
end
|
122
126
|
|
123
127
|
# Search the PubMed database by given keywords using entrez query and returns
|
data/lib/bio/io/sql.rb
CHANGED
@@ -1,186 +1,79 @@
|
|
1
|
+
#module Bio
|
2
|
+
# class SQL
|
3
|
+
# #no check is made
|
4
|
+
# def self.establish_connection(configurations, env)
|
5
|
+
# #configurations is an hash similar what YAML returns.
|
6
|
+
# #{:database=>"biorails_development", :adapter=>"postgresql", :username=>"rails", :password=>nil}
|
7
|
+
# configurations.assert_valid_keys('development', 'production','test')
|
8
|
+
# configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
|
9
|
+
# DummyBase.configurations = configurations
|
10
|
+
# DummyBase.establish_connection "#{env}"
|
11
|
+
#end
|
1
12
|
|
2
|
-
|
3
|
-
require '
|
4
|
-
require 'composite_primary_keys'
|
13
|
+
|
14
|
+
#require 'rubygems'
|
15
|
+
#require 'composite_primary_keys'
|
16
|
+
#require 'erb'
|
5
17
|
# BiosqlPlug
|
6
18
|
|
7
19
|
=begin
|
8
20
|
Ok Hilmar gives to me some clarification
|
9
21
|
1) "EMBL/GenBank/SwissProt" name in term table, is only a convention assuming data loaded by genbank embl ans swissprot formats.
|
10
|
-
If your features come from others ways for example blast or alignment ... whatever.. the user as to take care about the source.
|
22
|
+
If your features come from others ways for example blast or alignment ... whatever.. the user as to take care about the source.
|
11
23
|
|
12
24
|
|
13
25
|
=end
|
14
26
|
=begin
|
15
27
|
TODO:
|
16
28
|
1) source_term_id => surce_term and check before if the source term is present or not and the level, the root should always be something "EMBL/GenBank/SwissProt" or contestualized.
|
17
|
-
2) Into DummyBase class delete connection there and use Bio::ArSQL.establish_connection which reads info from a yml file.
|
29
|
+
2) Into DummyBase class delete connection there and use Bio::ArSQL.establish_connection which reads info from a yml file.
|
18
30
|
3) Chk Locations in Biofeatures ArSQL
|
19
31
|
=end
|
20
32
|
module Bio
|
21
|
-
class SQL
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
configurations.assert_valid_keys('development', 'production','test')
|
27
|
-
configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
|
28
|
-
DummyBase.configurations = configurations
|
29
|
-
DummyBase.establish_connection "#{env}"
|
30
|
-
end
|
31
|
-
|
33
|
+
class SQL
|
34
|
+
|
35
|
+
require 'bio/io/biosql/biosql'
|
36
|
+
autoload :Sequence, 'bio/db/biosql/sequence'
|
37
|
+
|
32
38
|
def self.fetch_id(id)
|
33
39
|
Bio::SQL::Bioentry.find(id)
|
34
40
|
end
|
35
|
-
|
41
|
+
|
36
42
|
def self.fetch_accession(accession)
|
37
|
-
|
38
|
-
Bio::SQL::
|
43
|
+
# Bio::SQL::Bioentry.exists?(:accession => accession) ? Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession)) : nil
|
44
|
+
Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession.upcase))
|
39
45
|
end
|
40
|
-
|
46
|
+
|
41
47
|
def self.exists_accession(accession)
|
42
|
-
Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil? ? false : true
|
48
|
+
# Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil? ? false : true
|
49
|
+
!Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil?
|
43
50
|
end
|
44
|
-
|
51
|
+
|
45
52
|
def self.exists_database(name)
|
46
|
-
Bio::SQL::Biodatabase.find_by_name(name).nil? ? false : true
|
53
|
+
# Bio::SQL::Biodatabase.find_by_name(name).nil? ? false : true
|
54
|
+
!Bio::SQL::Biodatabase.first(:name=>name).nil?
|
47
55
|
end
|
48
|
-
|
56
|
+
|
49
57
|
def self.list_entries
|
50
|
-
Bio::SQL::Bioentry.
|
58
|
+
Bio::SQL::Bioentry.all.collect do|entry|
|
51
59
|
{:id=>entry.bioentry_id, :accession=>entry.accession}
|
52
|
-
|
60
|
+
end
|
53
61
|
end
|
54
|
-
|
62
|
+
|
55
63
|
def self.list_databases
|
56
|
-
Bio::SQL::Biodatabase.
|
64
|
+
Bio::SQL::Biodatabase.all.collect do|entry|
|
57
65
|
{:id=>entry.biodatabase_id, :name => entry.name}
|
58
|
-
|
66
|
+
end
|
59
67
|
end
|
60
|
-
|
68
|
+
|
61
69
|
def self.delete_entry_id(id)
|
62
|
-
Bioentry.delete(id)
|
70
|
+
Bio::SQL::Bioentry.delete(id)
|
63
71
|
end
|
64
|
-
|
72
|
+
|
65
73
|
def self.delete_entry_accession(accession)
|
66
|
-
Bioentry.
|
74
|
+
Bio::SQL::Bioentry.find_by_accession(accession.upcase).destroy!
|
67
75
|
end
|
68
|
-
|
69
|
-
|
70
|
-
class DummyBase < ActiveRecord::Base
|
71
|
-
#NOTE: Using postgresql, not setting sequence name, system will discover the name by default.
|
72
|
-
#NOTE: this class will not establish the connection automatically
|
73
|
-
self.abstract_class = true
|
74
|
-
self.pluralize_table_names = false
|
75
|
-
#prepend table name to the usual id, avoid to specify primary id for every table
|
76
|
-
self.primary_key_prefix_type = :table_name_with_underscore
|
77
|
-
#biosql_configurations=YAML::load(ERB.new(IO.read(File.join(File.dirname(__FILE__),'../config', 'database.yml'))).result)
|
78
|
-
#self.configurations=biosql_configurations
|
79
|
-
#self.establish_connection "development"
|
80
|
-
end #DummyBase
|
81
|
-
|
82
|
-
autoload :Biodatabase, 'bio/io/biosql/biodatabase'
|
83
|
-
autoload :Bioentry, 'bio/io/biosql/bioentry'
|
84
|
-
autoload :BioentryDbxref, 'bio/io/biosql/bioentry_dbxref'
|
85
|
-
autoload :BioentryPath, 'bio/io/biosql/bioentry_path'
|
86
|
-
autoload :BioentryQualifierValue, 'bio/io/biosql/bioentry_qualifier_value'
|
87
|
-
autoload :BioentryReference, 'bio/io/biosql/bioentry_reference'
|
88
|
-
autoload :BioentryRelationship, 'bio/io/biosql/bioentry_relationship'
|
89
|
-
autoload :Biosequence, 'bio/io/biosql/biosequence'
|
90
|
-
autoload :Comment, 'bio/io/biosql/comment'
|
91
|
-
autoload :Dbxref, 'bio/io/biosql/dbxref'
|
92
|
-
autoload :DbxrefQualifierValue, 'bio/io/biosql/dbxref_qualifier_value'
|
93
|
-
autoload :Location, 'bio/io/biosql/location'
|
94
|
-
autoload :LocationQualifierValue, 'bio/io/biosql/location_qualifier_value'
|
95
|
-
autoload :Ontology, 'bio/io/biosql/ontology'
|
96
|
-
autoload :Reference, 'bio/io/biosql/reference'
|
97
|
-
autoload :Seqfeature, 'bio/io/biosql/seqfeature'
|
98
|
-
autoload :SeqfeatureDbxref, 'bio/io/biosql/seqfeature_dbxref'
|
99
|
-
autoload :SeqfeaturePath, 'bio/io/biosql/seqfeature_path'
|
100
|
-
autoload :SeqfeatureQualifierValue, 'bio/io/biosql/seqfeature_qualifier_value'
|
101
|
-
autoload :SeqfeatureRelationship, 'bio/io/biosql/seqfeature_relationship'
|
102
|
-
autoload :Taxon, 'bio/io/biosql/taxon'
|
103
|
-
autoload :TaxonName, 'bio/io/biosql/taxon_name'
|
104
|
-
autoload :Term, 'bio/io/biosql/term'
|
105
|
-
autoload :TermDbxref, 'bio/io/biosql/term_dbxref'
|
106
|
-
autoload :TermPath, 'bio/io/biosql/term_path'
|
107
|
-
autoload :TermRelationship, 'bio/io/biosql/term_relationship'
|
108
|
-
autoload :TermRelationshipTerm, 'bio/io/biosql/term_relationship_term'
|
109
|
-
autoload :Sequence, 'bio/db/biosql/sequence'
|
110
|
-
end #biosql
|
111
|
-
|
112
|
-
end #Bio
|
113
76
|
|
114
|
-
|
115
|
-
require 'rubygems'
|
116
|
-
require 'composite_primary_keys'
|
117
|
-
require 'bio'
|
118
|
-
require 'pp'
|
119
|
-
|
120
|
-
# pp connection = Bio::SQL.establish_connection('bio/io/biosql/config/database.yml','development')
|
121
|
-
connection = Bio::SQL.establish_connection({'development'=>{'database'=>"bio_test", 'adapter'=>"postgresql", 'username'=>"rails", 'password'=>nil}},'development')
|
122
|
-
#pp YAML::load(ERB.new(IO.read('bio/io/biosql/config/database.yml')).result)
|
123
|
-
if true
|
124
|
-
#Bio::SQL.list_entries
|
125
|
-
|
126
|
-
# biosequence = data.to_biosequence
|
127
|
-
# puts biosequence.output(:genbank)
|
128
|
-
db=Bio::SQL::Biodatabase.new(:name=>'JEFF', :authority=>'ME', :description=>'YOU')
|
129
|
-
db.save!
|
130
|
-
|
131
|
-
puts "### FileFile.auto"
|
132
|
-
if ARGV.size > 0
|
133
|
-
#embl = Bio::FlatFile.auto(ARGF.read)
|
134
|
-
Bio::FlatFile.auto(ARGF) do |ff|
|
135
|
-
ff.each do |data|
|
136
|
-
biosequence=data.to_biosequence
|
137
|
-
puts biosequence.output(:fasta)
|
138
|
-
sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
|
139
|
-
sqlseq.save
|
140
|
-
sqlseq.to_biosequence.output(:fasta)
|
141
|
-
end
|
142
|
-
end
|
143
|
-
else
|
144
|
-
require 'bio/io/fetch'
|
145
|
-
server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
|
146
|
-
data = Bio::EMBL.new(server.fetch('embl','AJ224123'))
|
147
|
-
end
|
77
|
+
end #biosql
|
148
78
|
|
149
|
-
|
150
|
-
# sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
|
151
|
-
# sqlseq.save
|
152
|
-
# sqlseq_bioseq=sqlseq.to_biosequence
|
153
|
-
# puts sqlseq_bioseq.output(:genbank)
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
# bioseq = Bio::SQL.fetch_accession('AJ224122')
|
158
|
-
# pp bioseq
|
159
|
-
# pp bioseq.entry_id
|
160
|
-
#TODO create a test only for tables not sequence here
|
161
|
-
# pp bioseq.molecule_type
|
162
|
-
#pp bioseq.molecule_type.class
|
163
|
-
#bioseq.molecule_type_update('dna', 1)
|
164
|
-
## pp Bio::SQL::Taxon.find(8121).taxon_names
|
165
|
-
|
166
|
-
#sqlseq.to_biosequence
|
167
|
-
|
168
|
-
# sqlseq.delete
|
169
|
-
|
170
|
-
# db.destroy
|
171
|
-
end
|
172
|
-
#pp bioseq.molecule_type
|
173
|
-
#term = Bio::SQL::Term.find_by_name('mol_type')
|
174
|
-
#pp term
|
175
|
-
#pp bioseq.entry.bioentry_qualifier_values.create(:term=>term, :rank=>2, :value=>'pippo')
|
176
|
-
#pp bioseq.entry.bioentry_qualifier_values.inspect
|
177
|
-
#pp bioseq.entry.bioentry_qualifier_values.find_all_by_term_id(26)
|
178
|
-
#pp primo.class
|
179
|
-
# pp primo.value='dna'
|
180
|
-
# pp primo.save
|
181
|
-
#pp bioseq.molecule_type= 'prova'
|
182
|
-
|
183
|
-
#Bio::SQL::BioentryQualifierValue.delete(delete.bioentry_id,delete.term_id,delete.rank)
|
184
|
-
|
185
|
-
|
186
|
-
end
|
79
|
+
end #Bio
|