bio 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +56 -0
- data/COPYING.ja +51 -0
- data/ChangeLog +540 -0
- data/GPL +340 -0
- data/LEGAL +141 -0
- data/LGPL +504 -0
- data/README.rdoc +4 -2
- data/Rakefile +2 -2
- data/bioruby.gemspec +17 -29
- data/doc/Tutorial.rd +118 -90
- data/doc/Tutorial.rd.html +124 -87
- data/lib/bio/appl/blast.rb +2 -2
- data/lib/bio/appl/blast/format0.rb +1 -1
- data/lib/bio/appl/fasta.rb +5 -12
- data/lib/bio/appl/fasta/format10.rb +96 -6
- data/lib/bio/appl/gcg/msf.rb +11 -14
- data/lib/bio/appl/pts1.rb +0 -4
- data/lib/bio/appl/sim4/report.rb +50 -17
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +10 -0
- data/lib/bio/db/biosql/sequence.rb +234 -298
- data/lib/bio/db/embl/embl.rb +0 -3
- data/lib/bio/db/genbank/common.rb +3 -1
- data/lib/bio/io/biosql/ar-biosql.rb +257 -0
- data/lib/bio/io/biosql/biosql.rb +39 -0
- data/lib/bio/io/biosql/config/database.yml +5 -4
- data/lib/bio/io/ncbirest.rb +12 -5
- data/lib/bio/io/pubmed.rb +5 -1
- data/lib/bio/io/sql.rb +43 -150
- data/lib/bio/sequence/compat.rb +5 -1
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +6 -4
- data/lib/bio/version.rb +1 -1
- data/test/data/gcg/pileup-aa.msf +67 -0
- data/test/data/sim4/complement-A4.sim4 +43 -0
- data/test/data/sim4/simple-A4.sim4 +25 -0
- data/test/data/sim4/simple2-A4.sim4 +25 -0
- data/test/functional/bio/io/test_pubmed.rb +129 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +5 -5
- data/test/unit/bio/appl/gcg/test_msf.rb +154 -0
- data/test/unit/bio/appl/hmmer/test_report.rb +2 -2
- data/test/unit/bio/appl/sim4/test_report.rb +869 -0
- data/test/unit/bio/appl/test_blast.rb +1 -1
- data/test/unit/bio/db/biosql/tc_biosql.rb +110 -0
- data/test/unit/bio/db/biosql/ts_suite_biosql.rb +8 -0
- data/test/unit/bio/test_feature.rb +18 -17
- data/test/unit/bio/test_reference.rb +18 -18
- data/test/unit/bio/test_sequence.rb +1 -1
- metadata +18 -30
- data/lib/bio/io/biosql/biodatabase.rb +0 -64
- data/lib/bio/io/biosql/bioentry.rb +0 -29
- data/lib/bio/io/biosql/bioentry_dbxref.rb +0 -11
- data/lib/bio/io/biosql/bioentry_path.rb +0 -12
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +0 -10
- data/lib/bio/io/biosql/bioentry_reference.rb +0 -10
- data/lib/bio/io/biosql/bioentry_relationship.rb +0 -10
- data/lib/bio/io/biosql/biosequence.rb +0 -11
- data/lib/bio/io/biosql/comment.rb +0 -7
- data/lib/bio/io/biosql/dbxref.rb +0 -13
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +0 -12
- data/lib/bio/io/biosql/location.rb +0 -32
- data/lib/bio/io/biosql/location_qualifier_value.rb +0 -11
- data/lib/bio/io/biosql/ontology.rb +0 -10
- data/lib/bio/io/biosql/reference.rb +0 -9
- data/lib/bio/io/biosql/seqfeature.rb +0 -32
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +0 -11
- data/lib/bio/io/biosql/seqfeature_path.rb +0 -11
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +0 -20
- data/lib/bio/io/biosql/seqfeature_relationship.rb +0 -11
- data/lib/bio/io/biosql/taxon.rb +0 -12
- data/lib/bio/io/biosql/taxon_name.rb +0 -9
- data/lib/bio/io/biosql/term.rb +0 -27
- data/lib/bio/io/biosql/term_dbxref.rb +0 -11
- data/lib/bio/io/biosql/term_path.rb +0 -12
- data/lib/bio/io/biosql/term_relationship.rb +0 -13
- data/lib/bio/io/biosql/term_relationship_term.rb +0 -11
- data/lib/bio/io/biosql/term_synonym.rb +0 -10
data/lib/bio/db/embl/embl.rb
CHANGED
@@ -158,7 +158,9 @@ module Common
|
|
158
158
|
authors = authors.flatten.map { |a| a.sub(/,/, ', ') }
|
159
159
|
hash['authors'] = authors
|
160
160
|
when /TITLE/
|
161
|
-
hash['title'] = truncate(tag_cut(field))
|
161
|
+
hash['title'] = truncate(tag_cut(field))
|
162
|
+
# CHECK Actually GenBank is not demanding for dot at the end of TITLE
|
163
|
+
#+ '.'
|
162
164
|
when /JOURNAL/
|
163
165
|
journal = truncate(tag_cut(field))
|
164
166
|
if journal =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
|
@@ -0,0 +1,257 @@
|
|
1
|
+
module Bio
|
2
|
+
class SQL
|
3
|
+
class Biodatabase < DummyBase
|
4
|
+
has_many :bioentries, :class_name =>"Bioentry", :foreign_key => "biodatabase_id"
|
5
|
+
validates_uniqueness_of :name
|
6
|
+
end
|
7
|
+
class BioentryDbxref < DummyBase
|
8
|
+
#delete set_sequence_name nil
|
9
|
+
set_primary_key nil #bioentry_id,dbxref_id
|
10
|
+
belongs_to :bioentry, :class_name => "Bioentry"
|
11
|
+
belongs_to :dbxref, :class_name => "Dbxref"
|
12
|
+
end
|
13
|
+
|
14
|
+
class BioentryPath < DummyBase
|
15
|
+
set_primary_key nil
|
16
|
+
#delete set_sequence_name nil
|
17
|
+
belongs_to :term, :class_name => "Term"
|
18
|
+
#da sistemare per poter procedere.
|
19
|
+
belongs_to :object_bioentry, :class_name=>"Bioentry"
|
20
|
+
belongs_to :subject_bioentry, :class_name=>"Bioentry"
|
21
|
+
end #BioentryPath
|
22
|
+
|
23
|
+
class BioentryQualifierValue < DummyBase
|
24
|
+
#NOTE: added rank to primary_keys, now it's finished.
|
25
|
+
set_primary_keys :bioentry_id, :term_id, :rank
|
26
|
+
belongs_to :bioentry, :class_name => "Bioentry"
|
27
|
+
belongs_to :term, :class_name => "Term"
|
28
|
+
end #BioentryQualifierValue
|
29
|
+
|
30
|
+
class Bioentry < DummyBase
|
31
|
+
belongs_to :biodatabase, :class_name => "Biodatabase"
|
32
|
+
belongs_to :taxon, :class_name => "Taxon"
|
33
|
+
has_one :biosequence
|
34
|
+
#, :class_name => "Biosequence", :foreign_key => "bioentry_id"
|
35
|
+
has_many :comments, :class_name =>"Comment", :order =>'rank'
|
36
|
+
has_many :seqfeatures, :class_name => "Seqfeature", :order=>'rank'
|
37
|
+
has_many :bioentry_references, :class_name=>"BioentryReference" #, :foreign_key => "bioentry_id"
|
38
|
+
has_many :bioentry_dbxrefs, :class_name => "BioentryDbxref"
|
39
|
+
has_many :object_bioentry_relationships, :class_name=>"BioentryRelationship", :foreign_key=>"object_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
|
40
|
+
has_many :subject_bioentry_relationships, :class_name=>"BioentryRelationship", :foreign_key=>"subject_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
|
41
|
+
has_many :object_bioentry_paths, :class_name=>"BioentryPath", :foreign_key=>"object_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
|
42
|
+
has_many :subject_bioentry_paths, :class_name=>"BioentryPath", :foreign_key=>"subject_bioentry_id" #non mi convince molto credo non funzioni nel modo corretto
|
43
|
+
|
44
|
+
has_many :cdsfeatures, :class_name=>"Seqfeature", :foreign_key =>"bioentry_id", :conditions=>["term.name='CDS'"], :include=>"type_term"
|
45
|
+
has_many :references, :through=>:bioentry_references, :class_name => "Reference"
|
46
|
+
has_many :terms, :through=>:bioentry_qualifier_values, :class_name => "Term"
|
47
|
+
#NOTE: added order_by for multiple hit and manage ranks correctly
|
48
|
+
has_many :bioentry_qualifier_values, :order=>"bioentry_id,term_id,rank", :class_name => "BioentryQualifierValue"
|
49
|
+
|
50
|
+
#per la creazione richiesti:
|
51
|
+
#name, accession, version
|
52
|
+
# validates_uniqueness_of :accession, :scope=>[:biodatabase_id]
|
53
|
+
# validates_uniqueness_of :name, :scope=>[:biodatabase_id]
|
54
|
+
# validates_uniqueness_of :identifier, :scope=>[:biodatabase_id]
|
55
|
+
|
56
|
+
end
|
57
|
+
class BioentryReference < DummyBase
|
58
|
+
set_primary_keys :bioentry_id, :reference_id, :rank
|
59
|
+
belongs_to :bioentry, :class_name => "Bioentry"
|
60
|
+
belongs_to :reference , :class_name => "Reference"
|
61
|
+
end
|
62
|
+
class BioentryRelationship < DummyBase
|
63
|
+
#delete set_primary_key "bioentry_relationship_id"
|
64
|
+
set_sequence_name "bieontry_relationship_pk_seq"
|
65
|
+
belongs_to :object_bioentry, :class_name => "Bioentry"
|
66
|
+
belongs_to :subject_bioentry, :class_name => "Bioentry"
|
67
|
+
belongs_to :term
|
68
|
+
end
|
69
|
+
class Biosequence < DummyBase
|
70
|
+
set_primary_keys :bioentry_id, :version
|
71
|
+
#delete set_sequence_name "biosequence_pk_seq"
|
72
|
+
belongs_to :bioentry, :foreign_key=>"bioentry_id"
|
73
|
+
#has_one :bioentry
|
74
|
+
#, :class_name => "Bioentry"
|
75
|
+
end
|
76
|
+
class Comment < DummyBase
|
77
|
+
belongs_to :bioentry, :class_name => "Bioentry"
|
78
|
+
end
|
79
|
+
class DbxrefQualifierValue < DummyBase
|
80
|
+
#think to use composite primary key
|
81
|
+
set_primary_key nil #dbxref_id, term_id, rank
|
82
|
+
#delete set_sequence_name nil
|
83
|
+
belongs_to :dbxref, :class_name => "Dbxref"
|
84
|
+
belongs_to :term, :class_name => "Term"
|
85
|
+
end
|
86
|
+
class Dbxref < DummyBase
|
87
|
+
#set_sequence_name "dbxref_pk_seq"
|
88
|
+
has_many :dbxref_qualifier_values, :class_name => "DbxrefQualifierValue"
|
89
|
+
has_many :locations, :class_name => "Location"
|
90
|
+
has_many :references, :class_name=>"Reference"
|
91
|
+
has_many :term_dbxrefs, :class_name => "TermDbxref"
|
92
|
+
has_many :bioentry_dbxrefs, :class_name => "BioentryDbxref"
|
93
|
+
#TODO: check is with bioentry there is an has_and_belongs_to_many relationship has specified in schema overview.
|
94
|
+
end
|
95
|
+
class LocationQualifierValue < DummyBase
|
96
|
+
set_primary_key nil #location_id, term_id
|
97
|
+
#delete set_sequence_name nil
|
98
|
+
belongs_to :location, :class_name => "Location"
|
99
|
+
belongs_to :term, :class_name => "Term"
|
100
|
+
end
|
101
|
+
class Location < DummyBase
|
102
|
+
#set_sequence_name "location_pk_seq"
|
103
|
+
belongs_to :seqfeature, :class_name => "Seqfeature"
|
104
|
+
belongs_to :dbxref, :class_name => "Dbxref"
|
105
|
+
belongs_to :term, :class_name => "Term"
|
106
|
+
has_many :location_qualifier_values, :class_name => "LocationQualifierValue"
|
107
|
+
|
108
|
+
def to_s
|
109
|
+
if strand==-1
|
110
|
+
str="complement("+start_pos.to_s+".."+end_pos.to_s+")"
|
111
|
+
else
|
112
|
+
str=start_pos.to_s+".."+end_pos.to_s
|
113
|
+
end
|
114
|
+
return str
|
115
|
+
end
|
116
|
+
|
117
|
+
def sequence
|
118
|
+
seq=""
|
119
|
+
unless self.seqfeature.bioentry.biosequence.seq.nil?
|
120
|
+
seq=Bio::Sequence::NA.new(self.seqfeature.bioentry.biosequence.seq[start_pos-1..end_pos-1])
|
121
|
+
seq.reverse_complement! if strand==-1
|
122
|
+
end
|
123
|
+
return seq
|
124
|
+
end
|
125
|
+
|
126
|
+
|
127
|
+
|
128
|
+
end
|
129
|
+
class Ontology < DummyBase
|
130
|
+
has_many :terms, :class_name => "Term"
|
131
|
+
has_many :term_paths, :class_name => "TermPath"
|
132
|
+
has_many :term_relationships, :class_name => "TermRelationship"
|
133
|
+
end
|
134
|
+
class Reference < DummyBase
|
135
|
+
belongs_to :dbxref, :class_name => "Dbxref"
|
136
|
+
has_many :bioentry_references, :class_name=>"BioentryReference"
|
137
|
+
has_many :bioentries, :through=>:bioentry_references
|
138
|
+
end
|
139
|
+
class SeqfeatureDbxref < DummyBase
|
140
|
+
set_primary_keys :seqfeature_id, :dbxref_id
|
141
|
+
#delete set_sequence_name nil
|
142
|
+
belongs_to :seqfeature, :class_name => "Seqfeature", :foreign_key => "seqfeature_id"
|
143
|
+
belongs_to :dbxref, :class_name => "Dbxref", :foreign_key => "dbxref_id"
|
144
|
+
end
|
145
|
+
class SeqfeaturePath < DummyBase
|
146
|
+
set_primary_keys :object_seqfeature_id, :subject_seqfeature_id, :term_id
|
147
|
+
set_sequence_name nil
|
148
|
+
belongs_to :object_seqfeature, :class_name => "Seqfeature", :foreign_key => "object_seqfeature_id"
|
149
|
+
belongs_to :subject_seqfeature, :class_name => "Seqfeature", :foreign_key => "subject_seqfeature_id"
|
150
|
+
belongs_to :term, :class_name => "Term"
|
151
|
+
end
|
152
|
+
class SeqfeatureQualifierValue < DummyBase
|
153
|
+
set_primary_keys :seqfeature_id, :term_id, :rank
|
154
|
+
set_sequence_name nil
|
155
|
+
belongs_to :seqfeature
|
156
|
+
belongs_to :term, :class_name => "Term"
|
157
|
+
end
|
158
|
+
class Seqfeature <DummyBase
|
159
|
+
set_sequence_name "seqfeature_pk_seq"
|
160
|
+
belongs_to :bioentry
|
161
|
+
#, :class_name => "Bioentry"
|
162
|
+
belongs_to :type_term, :class_name => "Term", :foreign_key => "type_term_id"
|
163
|
+
belongs_to :source_term, :class_name => "Term", :foreign_key =>"source_term_id"
|
164
|
+
has_many :seqfeature_dbxrefs, :class_name => "SeqfeatureDbxref", :foreign_key => "seqfeature_id"
|
165
|
+
has_many :seqfeature_qualifier_values, :order=>'rank', :foreign_key => "seqfeature_id"
|
166
|
+
#, :class_name => "SeqfeatureQualifierValue"
|
167
|
+
has_many :locations, :class_name => "Location", :order=>'rank'
|
168
|
+
has_many :object_seqfeature_paths, :class_name => "SeqfeaturePath", :foreign_key => "object_seqfeature_id"
|
169
|
+
has_many :subject_seqfeature_paths, :class_name => "SeqfeaturePath", :foreign_key => "subject_seqfeature_id"
|
170
|
+
has_many :object_seqfeature_relationships, :class_name => "SeqfeatureRelationship", :foreign_key => "object_seqfeature_id"
|
171
|
+
has_many :subject_seqfeature_relationships, :class_name => "SeqfeatureRelationship", :foreign_key => "subject_seqfeature_id"
|
172
|
+
|
173
|
+
#get the subsequence described by the locations objects
|
174
|
+
def sequence
|
175
|
+
return self.locations.inject(Bio::Sequence::NA.new("")){|seq, location| seq<<location.sequence}
|
176
|
+
end
|
177
|
+
|
178
|
+
#translate the subsequences represented by the feature and its locations
|
179
|
+
#not considering the qualifiers
|
180
|
+
#Return a Bio::Sequence::AA object
|
181
|
+
def translate(*args)
|
182
|
+
self.sequence.translate(*args)
|
183
|
+
end
|
184
|
+
end
|
185
|
+
class SeqfeatureRelationship <DummyBase
|
186
|
+
set_sequence_name "seqfeatue_relationship_pk_seq"
|
187
|
+
belongs_to :term, :class_name => "Term"
|
188
|
+
belongs_to :object_seqfeature, :class_name => "Seqfeature"
|
189
|
+
belongs_to :subject_seqfeature, :class_name => "Seqfeature"
|
190
|
+
end
|
191
|
+
class TaxonName < DummyBase
|
192
|
+
set_primary_keys :taxon_id, :name, :name_class
|
193
|
+
belongs_to :taxon, :class_name => "Taxon"
|
194
|
+
end
|
195
|
+
class Taxon < DummyBase
|
196
|
+
set_sequence_name "taxon_pk_seq"
|
197
|
+
has_many :taxon_names, :class_name => "TaxonName"
|
198
|
+
has_one :taxon_scientific_name, :class_name => "TaxonName", :conditions=>"name_class = 'scientific name'"
|
199
|
+
has_one :taxon_genbank_common_name, :class_name => "TaxonName", :conditions=>"name_class = 'genbank common name'"
|
200
|
+
has_one :bioentry, :class_name => "Bioentry"
|
201
|
+
end
|
202
|
+
class TermDbxref < DummyBase
|
203
|
+
set_primary_key nil #term_id, dbxref_id
|
204
|
+
#delete set_sequence_name nil
|
205
|
+
belongs_to :term, :class_name => "Term"
|
206
|
+
belongs_to :dbxref, :class_name => "Dbxref"
|
207
|
+
end
|
208
|
+
class TermPath < DummyBase
|
209
|
+
set_sequence_name "term_path_pk_seq"
|
210
|
+
belongs_to :ontology, :class_name => "Ontology"
|
211
|
+
belongs_to :subject_term, :class_name => "Term"
|
212
|
+
belongs_to :object_term, :class_name => "Term"
|
213
|
+
belongs_to :predicate_term, :class_name => "Term"
|
214
|
+
end
|
215
|
+
class Term < DummyBase
|
216
|
+
belongs_to :ontology, :class_name => "Ontology"
|
217
|
+
has_many :seqfeature_qualifier_values, :class_name => "SeqfeatureQualifierValue"
|
218
|
+
has_many :dbxref_qualifier_values, :class_name => "DbxrefQualifierValue"
|
219
|
+
has_many :bioentry_qualifer_values, :class_name => "BioentryQualifierValue"
|
220
|
+
has_many :bioentries, :through=>:bioentry_qualifier_values
|
221
|
+
has_many :locations, :class_name => "Location"
|
222
|
+
has_many :seqfeature_relationships, :class_name => "SeqfeatureRelationship"
|
223
|
+
has_many :term_dbxrefs, :class_name => "TermDbxref"
|
224
|
+
has_many :term_relationship_terms, :class_name => "TermRelationshipTerm"
|
225
|
+
has_many :term_synonyms, :class_name => "TermSynonym"
|
226
|
+
has_many :location_qualifier_values, :class_name => "LocationQualifierValue"
|
227
|
+
has_many :seqfeature_types, :class_name => "Seqfeature", :foreign_key => "type_term_id"
|
228
|
+
has_many :seqfeature_sources, :class_name => "Seqfeature", :foreign_key => "source_term_id"
|
229
|
+
has_many :term_path_subjects, :class_name => "TermPath", :foreign_key => "subject_term_id"
|
230
|
+
has_many :term_path_predicates, :class_name => "TermPath", :foreign_key => "predicate_term_id"
|
231
|
+
has_many :term_path_objects, :class_name => "TermPath", :foreign_key => "object_term_id"
|
232
|
+
has_many :term_relationship_subjects, :class_name => "TermRelationship", :foreign_key =>"subject_term_id"
|
233
|
+
has_many :term_relationship_predicates, :class_name => "TermRelationship", :foreign_key =>"predicate_term_id"
|
234
|
+
has_many :term_relationship_objects, :class_name => "TermRelationship", :foreign_key =>"object_term_id"
|
235
|
+
has_many :seqfeature_paths, :class_name => "SeqfeaturePath"
|
236
|
+
end
|
237
|
+
class TermRelationship < DummyBase
|
238
|
+
set_sequence_name "term_relationship_pk_seq"
|
239
|
+
belongs_to :ontology, :class_name => "Ontology"
|
240
|
+
belongs_to :subject_term, :class_name => "Term"
|
241
|
+
belongs_to :predicate_term, :class_name => "Term"
|
242
|
+
belongs_to :object_term, :class_name => "Term"
|
243
|
+
has_one :term_relationship_term, :class_name => "TermRelationshipTerm"
|
244
|
+
end
|
245
|
+
class TermRelationshipTerm < DummyBase
|
246
|
+
#delete set_sequence_name nil
|
247
|
+
set_primary_key :term_relationship_id
|
248
|
+
belongs_to :term_relationship, :class_name => "TermRelationship"
|
249
|
+
belongs_to :term, :class_name => "Term"
|
250
|
+
end
|
251
|
+
class TermSynonym < DummyBase
|
252
|
+
#delete set_sequence_name nil
|
253
|
+
set_primary_key nil
|
254
|
+
belongs_to :term, :class_name => "Term"
|
255
|
+
end
|
256
|
+
end #SQL
|
257
|
+
end #Bio
|
@@ -0,0 +1,39 @@
|
|
1
|
+
#require 'dm-ar-finders'
|
2
|
+
#require 'dm-core'
|
3
|
+
require 'erb'
|
4
|
+
require 'composite_primary_keys'
|
5
|
+
|
6
|
+
module Bio
|
7
|
+
class SQL
|
8
|
+
class DummyBase < ActiveRecord::Base
|
9
|
+
#NOTE: Using postgresql, not setting sequence name, system will discover the name by default.
|
10
|
+
#NOTE: this class will not establish the connection automatically
|
11
|
+
self.abstract_class = true
|
12
|
+
self.pluralize_table_names = false
|
13
|
+
#prepend table name to the usual id, avoid to specify primary id for every table
|
14
|
+
self.primary_key_prefix_type = :table_name_with_underscore
|
15
|
+
#biosql_configurations=YAML::load(ERB.new(IO.read(File.join(File.dirname(__FILE__),'./config', 'database.yml'))).result)
|
16
|
+
#self.configurations=biosql_configurations
|
17
|
+
#self.establish_connection "development"
|
18
|
+
end #DummyBase
|
19
|
+
|
20
|
+
require 'bio/io/biosql/ar-biosql'
|
21
|
+
|
22
|
+
# #no check is made
|
23
|
+
def self.establish_connection(configurations, env)
|
24
|
+
# #configurations is an hash similar what YAML returns.
|
25
|
+
|
26
|
+
#configurations.assert_valid_keys('development', 'production','test')
|
27
|
+
#configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
|
28
|
+
DummyBase.configurations = configurations
|
29
|
+
connection = DummyBase.establish_connection "#{env}"
|
30
|
+
#Init of basis terms and ontologies
|
31
|
+
Ontology.first(:conditions => ["name = ?", 'Annotation Tags']) || Ontology.create({:name => 'Annotation Tags'})
|
32
|
+
Ontology.first(:conditions => ["name = ?", 'SeqFeature Keys']) || Ontology.create({:name => 'SeqFeature Keys'})
|
33
|
+
Ontology.first(:conditions => ["name = ?", 'SeqFeature Sources']) ||Ontology.create({:name => 'SeqFeature Sources'})
|
34
|
+
Term.first(:conditions => ["name = ?", 'EMBLGenBankSwit']) || Term.create({:name => 'EMBLGenBankSwit', :ontology => Ontology.first(:conditions => ["name = ?", 'SeqFeature Sources'])})
|
35
|
+
connection
|
36
|
+
end #establish_connection
|
37
|
+
|
38
|
+
end #SQL
|
39
|
+
end #Bio
|
@@ -2,10 +2,11 @@
|
|
2
2
|
#User can configure it's db here
|
3
3
|
|
4
4
|
development:
|
5
|
-
adapter:
|
6
|
-
database:
|
7
|
-
username:
|
8
|
-
password:
|
5
|
+
adapter: jdbcmysql
|
6
|
+
database: bioseq
|
7
|
+
username: febo
|
8
|
+
password:
|
9
|
+
hostname: localhost
|
9
10
|
|
10
11
|
test:
|
11
12
|
adapter: postgresql
|
data/lib/bio/io/ncbirest.rb
CHANGED
@@ -26,8 +26,10 @@ class REST
|
|
26
26
|
# weekdays for any series of more than 100 requests.
|
27
27
|
# -> Not implemented yet in BioRuby
|
28
28
|
|
29
|
-
# Make no more than one request every
|
30
|
-
|
29
|
+
# Make no more than one request every 1 seconds.
|
30
|
+
# (NCBI's restriction is "Make no more than 3 requests every 1 second.",
|
31
|
+
# but limited to 1/sec partly because of keeping the value in integer.)
|
32
|
+
NCBI_INTERVAL = 1
|
31
33
|
@@last_access = nil
|
32
34
|
|
33
35
|
private
|
@@ -127,10 +129,10 @@ class REST
|
|
127
129
|
# * _mindate_: 2001
|
128
130
|
# * _maxdate_: 2002/01/01
|
129
131
|
# * _datetype_: "edat"
|
130
|
-
# * _limit_: maximum number of entries to be returned (0 for unlimited)
|
132
|
+
# * _limit_: maximum number of entries to be returned (0 for unlimited; nil for the "retmax" value in the hash or the internal default value (=100))
|
131
133
|
# * _step_: maximum number of entries retrieved at a time
|
132
134
|
# *Returns*:: array of entry IDs or a number of results
|
133
|
-
def esearch(str, hash = {}, limit =
|
135
|
+
def esearch(str, hash = {}, limit = nil, step = 10000)
|
134
136
|
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
135
137
|
opts = {
|
136
138
|
"tool" => "bioruby",
|
@@ -143,12 +145,17 @@ class REST
|
|
143
145
|
count = esearch_count(str, opts)
|
144
146
|
return count
|
145
147
|
else
|
148
|
+
retstart = 0
|
149
|
+
retstart = hash["retstart"].to_i if hash["retstart"]
|
150
|
+
|
151
|
+
limit ||= hash["retmax"].to_i if hash["retmax"]
|
152
|
+
limit ||= 100 # default limit is 100
|
146
153
|
limit = esearch_count(str, opts) if limit == 0 # unlimit
|
147
154
|
|
148
155
|
list = []
|
149
156
|
0.step(limit, step) do |i|
|
150
157
|
retmax = [step, limit - i].min
|
151
|
-
opts.update("retmax" => retmax, "retstart" => i)
|
158
|
+
opts.update("retmax" => retmax, "retstart" => i + retstart)
|
152
159
|
ncbi_access_wait
|
153
160
|
response = Bio::Command.post_form(serv, opts)
|
154
161
|
result = response.body
|
data/lib/bio/io/pubmed.rb
CHANGED
@@ -117,7 +117,11 @@ class PubMed < Bio::NCBI::REST
|
|
117
117
|
def efetch(ids, hash = {})
|
118
118
|
opts = { "db" => "pubmed", "rettype" => "medline" }
|
119
119
|
opts.update(hash)
|
120
|
-
super(ids, opts)
|
120
|
+
result = super(ids, opts)
|
121
|
+
if !opts["retmode"] or opts["retmode"] == "text"
|
122
|
+
result = result.split(/\n\n+/)
|
123
|
+
end
|
124
|
+
result
|
121
125
|
end
|
122
126
|
|
123
127
|
# Search the PubMed database by given keywords using entrez query and returns
|
data/lib/bio/io/sql.rb
CHANGED
@@ -1,186 +1,79 @@
|
|
1
|
+
#module Bio
|
2
|
+
# class SQL
|
3
|
+
# #no check is made
|
4
|
+
# def self.establish_connection(configurations, env)
|
5
|
+
# #configurations is an hash similar what YAML returns.
|
6
|
+
# #{:database=>"biorails_development", :adapter=>"postgresql", :username=>"rails", :password=>nil}
|
7
|
+
# configurations.assert_valid_keys('development', 'production','test')
|
8
|
+
# configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
|
9
|
+
# DummyBase.configurations = configurations
|
10
|
+
# DummyBase.establish_connection "#{env}"
|
11
|
+
#end
|
1
12
|
|
2
|
-
|
3
|
-
require '
|
4
|
-
require 'composite_primary_keys'
|
13
|
+
|
14
|
+
#require 'rubygems'
|
15
|
+
#require 'composite_primary_keys'
|
16
|
+
#require 'erb'
|
5
17
|
# BiosqlPlug
|
6
18
|
|
7
19
|
=begin
|
8
20
|
Ok Hilmar gives to me some clarification
|
9
21
|
1) "EMBL/GenBank/SwissProt" name in term table, is only a convention assuming data loaded by genbank embl ans swissprot formats.
|
10
|
-
If your features come from others ways for example blast or alignment ... whatever.. the user as to take care about the source.
|
22
|
+
If your features come from others ways for example blast or alignment ... whatever.. the user as to take care about the source.
|
11
23
|
|
12
24
|
|
13
25
|
=end
|
14
26
|
=begin
|
15
27
|
TODO:
|
16
28
|
1) source_term_id => surce_term and check before if the source term is present or not and the level, the root should always be something "EMBL/GenBank/SwissProt" or contestualized.
|
17
|
-
2) Into DummyBase class delete connection there and use Bio::ArSQL.establish_connection which reads info from a yml file.
|
29
|
+
2) Into DummyBase class delete connection there and use Bio::ArSQL.establish_connection which reads info from a yml file.
|
18
30
|
3) Chk Locations in Biofeatures ArSQL
|
19
31
|
=end
|
20
32
|
module Bio
|
21
|
-
class SQL
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
configurations.assert_valid_keys('development', 'production','test')
|
27
|
-
configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
|
28
|
-
DummyBase.configurations = configurations
|
29
|
-
DummyBase.establish_connection "#{env}"
|
30
|
-
end
|
31
|
-
|
33
|
+
class SQL
|
34
|
+
|
35
|
+
require 'bio/io/biosql/biosql'
|
36
|
+
autoload :Sequence, 'bio/db/biosql/sequence'
|
37
|
+
|
32
38
|
def self.fetch_id(id)
|
33
39
|
Bio::SQL::Bioentry.find(id)
|
34
40
|
end
|
35
|
-
|
41
|
+
|
36
42
|
def self.fetch_accession(accession)
|
37
|
-
|
38
|
-
Bio::SQL::
|
43
|
+
# Bio::SQL::Bioentry.exists?(:accession => accession) ? Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession)) : nil
|
44
|
+
Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession.upcase))
|
39
45
|
end
|
40
|
-
|
46
|
+
|
41
47
|
def self.exists_accession(accession)
|
42
|
-
Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil? ? false : true
|
48
|
+
# Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil? ? false : true
|
49
|
+
!Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil?
|
43
50
|
end
|
44
|
-
|
51
|
+
|
45
52
|
def self.exists_database(name)
|
46
|
-
Bio::SQL::Biodatabase.find_by_name(name).nil? ? false : true
|
53
|
+
# Bio::SQL::Biodatabase.find_by_name(name).nil? ? false : true
|
54
|
+
!Bio::SQL::Biodatabase.first(:name=>name).nil?
|
47
55
|
end
|
48
|
-
|
56
|
+
|
49
57
|
def self.list_entries
|
50
|
-
Bio::SQL::Bioentry.
|
58
|
+
Bio::SQL::Bioentry.all.collect do|entry|
|
51
59
|
{:id=>entry.bioentry_id, :accession=>entry.accession}
|
52
|
-
|
60
|
+
end
|
53
61
|
end
|
54
|
-
|
62
|
+
|
55
63
|
def self.list_databases
|
56
|
-
Bio::SQL::Biodatabase.
|
64
|
+
Bio::SQL::Biodatabase.all.collect do|entry|
|
57
65
|
{:id=>entry.biodatabase_id, :name => entry.name}
|
58
|
-
|
66
|
+
end
|
59
67
|
end
|
60
|
-
|
68
|
+
|
61
69
|
def self.delete_entry_id(id)
|
62
|
-
Bioentry.delete(id)
|
70
|
+
Bio::SQL::Bioentry.delete(id)
|
63
71
|
end
|
64
|
-
|
72
|
+
|
65
73
|
def self.delete_entry_accession(accession)
|
66
|
-
Bioentry.
|
74
|
+
Bio::SQL::Bioentry.find_by_accession(accession.upcase).destroy!
|
67
75
|
end
|
68
|
-
|
69
|
-
|
70
|
-
class DummyBase < ActiveRecord::Base
|
71
|
-
#NOTE: Using postgresql, not setting sequence name, system will discover the name by default.
|
72
|
-
#NOTE: this class will not establish the connection automatically
|
73
|
-
self.abstract_class = true
|
74
|
-
self.pluralize_table_names = false
|
75
|
-
#prepend table name to the usual id, avoid to specify primary id for every table
|
76
|
-
self.primary_key_prefix_type = :table_name_with_underscore
|
77
|
-
#biosql_configurations=YAML::load(ERB.new(IO.read(File.join(File.dirname(__FILE__),'../config', 'database.yml'))).result)
|
78
|
-
#self.configurations=biosql_configurations
|
79
|
-
#self.establish_connection "development"
|
80
|
-
end #DummyBase
|
81
|
-
|
82
|
-
autoload :Biodatabase, 'bio/io/biosql/biodatabase'
|
83
|
-
autoload :Bioentry, 'bio/io/biosql/bioentry'
|
84
|
-
autoload :BioentryDbxref, 'bio/io/biosql/bioentry_dbxref'
|
85
|
-
autoload :BioentryPath, 'bio/io/biosql/bioentry_path'
|
86
|
-
autoload :BioentryQualifierValue, 'bio/io/biosql/bioentry_qualifier_value'
|
87
|
-
autoload :BioentryReference, 'bio/io/biosql/bioentry_reference'
|
88
|
-
autoload :BioentryRelationship, 'bio/io/biosql/bioentry_relationship'
|
89
|
-
autoload :Biosequence, 'bio/io/biosql/biosequence'
|
90
|
-
autoload :Comment, 'bio/io/biosql/comment'
|
91
|
-
autoload :Dbxref, 'bio/io/biosql/dbxref'
|
92
|
-
autoload :DbxrefQualifierValue, 'bio/io/biosql/dbxref_qualifier_value'
|
93
|
-
autoload :Location, 'bio/io/biosql/location'
|
94
|
-
autoload :LocationQualifierValue, 'bio/io/biosql/location_qualifier_value'
|
95
|
-
autoload :Ontology, 'bio/io/biosql/ontology'
|
96
|
-
autoload :Reference, 'bio/io/biosql/reference'
|
97
|
-
autoload :Seqfeature, 'bio/io/biosql/seqfeature'
|
98
|
-
autoload :SeqfeatureDbxref, 'bio/io/biosql/seqfeature_dbxref'
|
99
|
-
autoload :SeqfeaturePath, 'bio/io/biosql/seqfeature_path'
|
100
|
-
autoload :SeqfeatureQualifierValue, 'bio/io/biosql/seqfeature_qualifier_value'
|
101
|
-
autoload :SeqfeatureRelationship, 'bio/io/biosql/seqfeature_relationship'
|
102
|
-
autoload :Taxon, 'bio/io/biosql/taxon'
|
103
|
-
autoload :TaxonName, 'bio/io/biosql/taxon_name'
|
104
|
-
autoload :Term, 'bio/io/biosql/term'
|
105
|
-
autoload :TermDbxref, 'bio/io/biosql/term_dbxref'
|
106
|
-
autoload :TermPath, 'bio/io/biosql/term_path'
|
107
|
-
autoload :TermRelationship, 'bio/io/biosql/term_relationship'
|
108
|
-
autoload :TermRelationshipTerm, 'bio/io/biosql/term_relationship_term'
|
109
|
-
autoload :Sequence, 'bio/db/biosql/sequence'
|
110
|
-
end #biosql
|
111
|
-
|
112
|
-
end #Bio
|
113
76
|
|
114
|
-
|
115
|
-
require 'rubygems'
|
116
|
-
require 'composite_primary_keys'
|
117
|
-
require 'bio'
|
118
|
-
require 'pp'
|
119
|
-
|
120
|
-
# pp connection = Bio::SQL.establish_connection('bio/io/biosql/config/database.yml','development')
|
121
|
-
connection = Bio::SQL.establish_connection({'development'=>{'database'=>"bio_test", 'adapter'=>"postgresql", 'username'=>"rails", 'password'=>nil}},'development')
|
122
|
-
#pp YAML::load(ERB.new(IO.read('bio/io/biosql/config/database.yml')).result)
|
123
|
-
if true
|
124
|
-
#Bio::SQL.list_entries
|
125
|
-
|
126
|
-
# biosequence = data.to_biosequence
|
127
|
-
# puts biosequence.output(:genbank)
|
128
|
-
db=Bio::SQL::Biodatabase.new(:name=>'JEFF', :authority=>'ME', :description=>'YOU')
|
129
|
-
db.save!
|
130
|
-
|
131
|
-
puts "### FileFile.auto"
|
132
|
-
if ARGV.size > 0
|
133
|
-
#embl = Bio::FlatFile.auto(ARGF.read)
|
134
|
-
Bio::FlatFile.auto(ARGF) do |ff|
|
135
|
-
ff.each do |data|
|
136
|
-
biosequence=data.to_biosequence
|
137
|
-
puts biosequence.output(:fasta)
|
138
|
-
sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
|
139
|
-
sqlseq.save
|
140
|
-
sqlseq.to_biosequence.output(:fasta)
|
141
|
-
end
|
142
|
-
end
|
143
|
-
else
|
144
|
-
require 'bio/io/fetch'
|
145
|
-
server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
|
146
|
-
data = Bio::EMBL.new(server.fetch('embl','AJ224123'))
|
147
|
-
end
|
77
|
+
end #biosql
|
148
78
|
|
149
|
-
|
150
|
-
# sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
|
151
|
-
# sqlseq.save
|
152
|
-
# sqlseq_bioseq=sqlseq.to_biosequence
|
153
|
-
# puts sqlseq_bioseq.output(:genbank)
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
# bioseq = Bio::SQL.fetch_accession('AJ224122')
|
158
|
-
# pp bioseq
|
159
|
-
# pp bioseq.entry_id
|
160
|
-
#TODO create a test only for tables not sequence here
|
161
|
-
# pp bioseq.molecule_type
|
162
|
-
#pp bioseq.molecule_type.class
|
163
|
-
#bioseq.molecule_type_update('dna', 1)
|
164
|
-
## pp Bio::SQL::Taxon.find(8121).taxon_names
|
165
|
-
|
166
|
-
#sqlseq.to_biosequence
|
167
|
-
|
168
|
-
# sqlseq.delete
|
169
|
-
|
170
|
-
# db.destroy
|
171
|
-
end
|
172
|
-
#pp bioseq.molecule_type
|
173
|
-
#term = Bio::SQL::Term.find_by_name('mol_type')
|
174
|
-
#pp term
|
175
|
-
#pp bioseq.entry.bioentry_qualifier_values.create(:term=>term, :rank=>2, :value=>'pippo')
|
176
|
-
#pp bioseq.entry.bioentry_qualifier_values.inspect
|
177
|
-
#pp bioseq.entry.bioentry_qualifier_values.find_all_by_term_id(26)
|
178
|
-
#pp primo.class
|
179
|
-
# pp primo.value='dna'
|
180
|
-
# pp primo.save
|
181
|
-
#pp bioseq.molecule_type= 'prova'
|
182
|
-
|
183
|
-
#Bio::SQL::BioentryQualifierValue.delete(delete.bioentry_id,delete.term_id,delete.rank)
|
184
|
-
|
185
|
-
|
186
|
-
end
|
79
|
+
end #Bio
|