bio 0.7.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +71 -27
- data/bin/br_biofetch.rb +5 -17
- data/bin/br_bioflat.rb +14 -26
- data/bin/br_biogetseq.rb +6 -18
- data/bin/br_pmfetch.rb +6 -16
- data/doc/Changes-0.7.rd +35 -0
- data/doc/KEGG_API.rd +287 -172
- data/doc/KEGG_API.rd.ja +273 -160
- data/doc/Tutorial.rd +18 -9
- data/doc/Tutorial.rd.ja +656 -138
- data/lib/bio.rb +6 -24
- data/lib/bio/alignment.rb +5 -5
- data/lib/bio/appl/blast.rb +132 -98
- data/lib/bio/appl/blast/format0.rb +9 -19
- data/lib/bio/appl/blast/wublast.rb +5 -18
- data/lib/bio/appl/emboss.rb +40 -47
- data/lib/bio/appl/hmmer.rb +116 -82
- data/lib/bio/appl/hmmer/report.rb +509 -364
- data/lib/bio/appl/spidey/report.rb +7 -18
- data/lib/bio/data/na.rb +3 -21
- data/lib/bio/db.rb +3 -21
- data/lib/bio/db/aaindex.rb +147 -52
- data/lib/bio/db/embl/common.rb +27 -6
- data/lib/bio/db/embl/embl.rb +18 -10
- data/lib/bio/db/embl/sptr.rb +87 -67
- data/lib/bio/db/embl/swissprot.rb +32 -3
- data/lib/bio/db/embl/trembl.rb +32 -3
- data/lib/bio/db/embl/uniprot.rb +32 -3
- data/lib/bio/db/fasta.rb +327 -289
- data/lib/bio/db/medline.rb +25 -4
- data/lib/bio/db/nbrf.rb +12 -20
- data/lib/bio/db/pdb.rb +4 -1
- data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
- data/lib/bio/db/pdb/pdb.rb +13 -8
- data/lib/bio/db/rebase.rb +93 -97
- data/lib/bio/feature.rb +2 -31
- data/lib/bio/io/ddbjxml.rb +167 -139
- data/lib/bio/io/fastacmd.rb +89 -56
- data/lib/bio/io/flatfile.rb +994 -278
- data/lib/bio/io/flatfile/index.rb +257 -194
- data/lib/bio/io/flatfile/indexer.rb +37 -29
- data/lib/bio/reference.rb +147 -64
- data/lib/bio/sequence.rb +57 -417
- data/lib/bio/sequence/aa.rb +64 -0
- data/lib/bio/sequence/common.rb +175 -0
- data/lib/bio/sequence/compat.rb +68 -0
- data/lib/bio/sequence/format.rb +134 -0
- data/lib/bio/sequence/generic.rb +24 -0
- data/lib/bio/sequence/na.rb +189 -0
- data/lib/bio/shell.rb +9 -23
- data/lib/bio/shell/core.rb +130 -125
- data/lib/bio/shell/demo.rb +143 -0
- data/lib/bio/shell/{session.rb → interface.rb} +42 -40
- data/lib/bio/shell/object.rb +52 -0
- data/lib/bio/shell/plugin/codon.rb +4 -22
- data/lib/bio/shell/plugin/emboss.rb +23 -0
- data/lib/bio/shell/plugin/entry.rb +34 -25
- data/lib/bio/shell/plugin/flatfile.rb +5 -23
- data/lib/bio/shell/plugin/keggapi.rb +11 -24
- data/lib/bio/shell/plugin/midi.rb +5 -23
- data/lib/bio/shell/plugin/obda.rb +4 -22
- data/lib/bio/shell/plugin/seq.rb +6 -24
- data/lib/bio/shell/rails/Rakefile +10 -0
- data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
- data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
- data/lib/bio/shell/rails/config/boot.rb +19 -0
- data/lib/bio/shell/rails/config/database.yml +85 -0
- data/lib/bio/shell/rails/config/environment.rb +53 -0
- data/lib/bio/shell/rails/config/environments/development.rb +19 -0
- data/lib/bio/shell/rails/config/environments/production.rb +19 -0
- data/lib/bio/shell/rails/config/environments/test.rb +19 -0
- data/lib/bio/shell/rails/config/routes.rb +19 -0
- data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
- data/lib/bio/shell/rails/public/404.html +8 -0
- data/lib/bio/shell/rails/public/500.html +8 -0
- data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
- data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
- data/lib/bio/shell/rails/public/dispatch.rb +10 -0
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/icon.png +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +277 -0
- data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
- data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
- data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
- data/lib/bio/shell/rails/public/robots.txt +1 -0
- data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
- data/lib/bio/shell/rails/script/about +3 -0
- data/lib/bio/shell/rails/script/breakpointer +3 -0
- data/lib/bio/shell/rails/script/console +3 -0
- data/lib/bio/shell/rails/script/destroy +3 -0
- data/lib/bio/shell/rails/script/generate +3 -0
- data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
- data/lib/bio/shell/rails/script/performance/profiler +3 -0
- data/lib/bio/shell/rails/script/plugin +3 -0
- data/lib/bio/shell/rails/script/process/reaper +3 -0
- data/lib/bio/shell/rails/script/process/spawner +3 -0
- data/lib/bio/shell/rails/script/process/spinner +3 -0
- data/lib/bio/shell/rails/script/runner +3 -0
- data/lib/bio/shell/rails/script/server +42 -0
- data/lib/bio/shell/rails/test/test_helper.rb +28 -0
- data/lib/bio/shell/web.rb +90 -0
- data/lib/bio/util/contingency_table.rb +231 -225
- data/sample/any2fasta.rb +59 -0
- data/test/data/HMMER/hmmpfam.out +64 -0
- data/test/data/HMMER/hmmsearch.out +88 -0
- data/test/data/aaindex/DAYM780301 +30 -0
- data/test/data/aaindex/PRAM900102 +20 -0
- data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
- data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
- data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
- data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
- data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
- data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
- data/test/unit/bio/appl/blast/test_report.rb +15 -12
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
- data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
- data/test/unit/bio/appl/test_blast.rb +5 -5
- data/test/unit/bio/data/test_na.rb +9 -18
- data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
- data/test/unit/bio/db/test_aaindex.rb +197 -0
- data/test/unit/bio/io/test_fastacmd.rb +55 -0
- data/test/unit/bio/sequence/test_aa.rb +102 -0
- data/test/unit/bio/sequence/test_common.rb +178 -0
- data/test/unit/bio/sequence/test_compat.rb +82 -0
- data/test/unit/bio/sequence/test_na.rb +242 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
- data/test/unit/bio/test_alignment.rb +15 -7
- data/test/unit/bio/test_reference.rb +198 -0
- data/test/unit/bio/test_sequence.rb +4 -49
- data/test/unit/bio/test_shell.rb +2 -2
- metadata +118 -15
- data/lib/bio/io/brdb.rb +0 -103
- data/lib/bioruby.rb +0 -34
|
@@ -1,28 +1,68 @@
|
|
|
1
1
|
#
|
|
2
|
-
# bio/io/flatfile/index.rb - OBDA flatfile index
|
|
3
|
-
#
|
|
4
|
-
# Copyright (C) 2002 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
|
|
5
|
-
#
|
|
6
|
-
# This library is free software; you can redistribute it and/or
|
|
7
|
-
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
-
# License as published by the Free Software Foundation; either
|
|
9
|
-
# version 2 of the License, or (at your option) any later version.
|
|
10
|
-
#
|
|
11
|
-
# This library is distributed in the hope that it will be useful,
|
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
-
# Lesser General Public License for more details.
|
|
15
|
-
#
|
|
16
|
-
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
-
# License along with this library; if not, write to the Free Software
|
|
18
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
-
#
|
|
20
|
-
# $Id: index.rb,v 1.15 2005/11/28 05:08:26 k Exp $
|
|
2
|
+
# = bio/io/flatfile/index.rb - OBDA flatfile index
|
|
21
3
|
#
|
|
4
|
+
# Copyright:: Copyright (C) 2002
|
|
5
|
+
# GOTO Naohisa <ng@bioruby.org>
|
|
6
|
+
# License:: Ruby's
|
|
7
|
+
#
|
|
8
|
+
# $Id: index.rb,v 1.18 2006/02/22 08:40:31 ngoto Exp $
|
|
9
|
+
#
|
|
10
|
+
# = About Bio::FlatFileIndex
|
|
11
|
+
#
|
|
12
|
+
# Please refer documents of following classes.
|
|
13
|
+
# Classes/modules marked '#' are internal use only.
|
|
14
|
+
#
|
|
15
|
+
# == Classes/modules in index.rb
|
|
16
|
+
# * class Bio::FlatFileIndex
|
|
17
|
+
# * class Bio::FlatFileIndex::Results
|
|
18
|
+
# * module Bio::FlatFileIndex::DEBUG
|
|
19
|
+
# * #module Bio::FlatFileIndex::Template
|
|
20
|
+
# * #class Bio::FlatFileIndex::Template::NameSpace
|
|
21
|
+
# * #class Bio::FlatFileIndex::FileID
|
|
22
|
+
# * #class Bio::FlatFileIndex::FileIDs
|
|
23
|
+
# * #module Bio::FlatFileIndex::Flat_1
|
|
24
|
+
# * #class Bio::FlatFileIndex::Flat_1::Record
|
|
25
|
+
# * #class Bio::FlatFileIndex::Flat_1::FlatMappingFile
|
|
26
|
+
# * #class Bio::FlatFileIndex::Flat_1::PrimaryNameSpace
|
|
27
|
+
# * #class Bio::FlatFileIndex::Flat_1::SecondaryNameSpace
|
|
28
|
+
# * #class Bio::FlatFileIndex::NameSpaces
|
|
29
|
+
# * #class Bio::FlatFileIndex::DataBank
|
|
30
|
+
#
|
|
31
|
+
# == Classes/modules in indexer.rb
|
|
32
|
+
# * module Bio::FlatFileIndex::Indexer
|
|
33
|
+
# * #class Bio::FlatFileIndex::Indexer::NameSpace
|
|
34
|
+
# * #class Bio::FlatFileIndex::Indexer::NameSpaces
|
|
35
|
+
# * #module Bio::FlatFileIndex::Indexer::Parser
|
|
36
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::TemplateParser
|
|
37
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::GenBankParser
|
|
38
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::GenPeptParser
|
|
39
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::EMBLParser
|
|
40
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::SPTRParser
|
|
41
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::FastaFormatParser
|
|
42
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::MaXMLSequenceParser
|
|
43
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::MaXMLClusterParser
|
|
44
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::BlastDefaultParser
|
|
45
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::PDBChemicalComponentParser
|
|
46
|
+
#
|
|
47
|
+
# == Classes/modules in bdb.rb
|
|
48
|
+
# * #module Bio::FlatFileIndex::BDBDefault
|
|
49
|
+
# * #class Bio::FlatFileIndex::BDBWrapper
|
|
50
|
+
# * #module Bio::FlatFileIndex::BDB_1
|
|
51
|
+
# * #class Bio::FlatFileIndex::BDB_1::BDBMappingFile
|
|
52
|
+
# * #class Bio::FlatFileIndex::BDB_1::PrimaryNameSpace
|
|
53
|
+
# * #class Bio::FlatFileIndex::BDB_1::SecondaryNameSpace
|
|
54
|
+
#
|
|
55
|
+
# = References
|
|
56
|
+
# * ((<URL:http://obda.open-bio.org/>))
|
|
57
|
+
# * ((<URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/?cvsroot=obf-common>))
|
|
58
|
+
#
|
|
22
59
|
|
|
23
60
|
require 'bio/io/flatfile/indexer'
|
|
24
61
|
|
|
25
62
|
module Bio
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# Bio::FlatFileIndex is a class for OBDA flatfile index.
|
|
26
66
|
class FlatFileIndex
|
|
27
67
|
|
|
28
68
|
autoload :Indexer, 'bio/io/flatfile/indexer'
|
|
@@ -30,10 +70,21 @@ module Bio
|
|
|
30
70
|
autoload :BDBwrapper, 'bio/io/flatfile/bdb'
|
|
31
71
|
autoload :BDB_1, 'bio/io/flatfile/bdb'
|
|
32
72
|
|
|
73
|
+
# magic string for flat/1 index
|
|
33
74
|
MAGIC_FLAT = 'flat/1'
|
|
75
|
+
|
|
76
|
+
# magic string for BerkeleyDB/1 index
|
|
34
77
|
MAGIC_BDB = 'BerkeleyDB/1'
|
|
35
78
|
|
|
36
79
|
#########################################################
|
|
80
|
+
|
|
81
|
+
# Opens existing databank. Databank is a directory which contains
|
|
82
|
+
# indexed files and configuration files. The type of the databank
|
|
83
|
+
# (flat or BerkeleyDB) are determined automatically.
|
|
84
|
+
#
|
|
85
|
+
# If block is given, the databank object is passed to the block.
|
|
86
|
+
# The databank will be automatically closed when the block terminates.
|
|
87
|
+
#
|
|
37
88
|
def self.open(name)
|
|
38
89
|
if block_given? then
|
|
39
90
|
begin
|
|
@@ -53,22 +104,38 @@ module Bio
|
|
|
53
104
|
r
|
|
54
105
|
end
|
|
55
106
|
|
|
107
|
+
# Opens existing databank. Databank is a directory which contains
|
|
108
|
+
# indexed files and configuration files. The type of the databank
|
|
109
|
+
# (flat or BerkeleyDB) are determined automatically.
|
|
110
|
+
#
|
|
111
|
+
# Unlike +FlatFileIndex.open+, block is not allowed.
|
|
112
|
+
#
|
|
56
113
|
def initialize(name)
|
|
57
114
|
@db = DataBank.open(name)
|
|
58
115
|
end
|
|
59
116
|
|
|
60
117
|
# common interface defined in registry.rb
|
|
118
|
+
# Searching databank and returns entry (or entries) as a string.
|
|
119
|
+
# Multiple entries (contatinated to one string) may be returned.
|
|
120
|
+
# Returns empty string if not found.
|
|
121
|
+
#
|
|
61
122
|
def get_by_id(key)
|
|
62
123
|
search(key).to_s
|
|
63
124
|
end
|
|
64
125
|
|
|
126
|
+
#--
|
|
65
127
|
# original methods
|
|
128
|
+
#++
|
|
129
|
+
|
|
130
|
+
# Closes the databank.
|
|
131
|
+
# Returns nil.
|
|
66
132
|
def close
|
|
67
133
|
check_closed?
|
|
68
134
|
@db.close
|
|
69
135
|
@db = nil
|
|
70
136
|
end
|
|
71
137
|
|
|
138
|
+
# Returns true if already closed. Otherwise, returns false.
|
|
72
139
|
def closed?
|
|
73
140
|
if @db then
|
|
74
141
|
false
|
|
@@ -77,6 +144,19 @@ module Bio
|
|
|
77
144
|
end
|
|
78
145
|
end
|
|
79
146
|
|
|
147
|
+
# Set default namespaces.
|
|
148
|
+
# <code>default_namespaces = nil</code>
|
|
149
|
+
# means all namespaces in the databank.
|
|
150
|
+
#
|
|
151
|
+
# <code>default_namespaces= [ str1, str2, ... ]</code>
|
|
152
|
+
# means set default namespeces to str1, str2, ...
|
|
153
|
+
#
|
|
154
|
+
# Default namespaces specified in this method only affect
|
|
155
|
+
# #get_by_id, #search, and #include? methods.
|
|
156
|
+
#
|
|
157
|
+
# Default of default namespaces is nil (that is, all namespaces
|
|
158
|
+
# are search destinations by default).
|
|
159
|
+
#
|
|
80
160
|
def default_namespaces=(names)
|
|
81
161
|
if names then
|
|
82
162
|
@names = []
|
|
@@ -86,10 +166,14 @@ module Bio
|
|
|
86
166
|
end
|
|
87
167
|
end
|
|
88
168
|
|
|
169
|
+
# Returns default namespaces.
|
|
170
|
+
# Returns an array of strings or nil.
|
|
171
|
+
# nil means all namespaces.
|
|
89
172
|
def default_namespaces
|
|
90
173
|
@names
|
|
91
174
|
end
|
|
92
175
|
|
|
176
|
+
# Searching databank and returns a Bio::FlatFileIndex::Results object.
|
|
93
177
|
def search(key)
|
|
94
178
|
check_closed?
|
|
95
179
|
if @names then
|
|
@@ -99,16 +183,30 @@ module Bio
|
|
|
99
183
|
end
|
|
100
184
|
end
|
|
101
185
|
|
|
186
|
+
# Searching only specified namespeces.
|
|
187
|
+
# Returns a Bio::FlatFileIndex::Results object.
|
|
188
|
+
#
|
|
102
189
|
def search_namespaces(key, *names)
|
|
103
190
|
check_closed?
|
|
104
191
|
@db.search_namespaces(key, *names)
|
|
105
192
|
end
|
|
106
193
|
|
|
194
|
+
# Searching only primary namespece.
|
|
195
|
+
# Returns a Bio::FlatFileIndex::Results object.
|
|
196
|
+
#
|
|
107
197
|
def search_primary(key)
|
|
108
198
|
check_closed?
|
|
109
199
|
@db.search_primary(key)
|
|
110
200
|
end
|
|
111
201
|
|
|
202
|
+
# Searching databank.
|
|
203
|
+
# If some entries are found, returns an array of
|
|
204
|
+
# unique IDs (primary identifiers).
|
|
205
|
+
# If not found anything, returns nil.
|
|
206
|
+
#
|
|
207
|
+
# This method is useful when search result is very large and
|
|
208
|
+
# #search method is very slow.
|
|
209
|
+
#
|
|
112
210
|
def include?(key)
|
|
113
211
|
check_closed?
|
|
114
212
|
if @names then
|
|
@@ -123,6 +221,8 @@ module Bio
|
|
|
123
221
|
end
|
|
124
222
|
end
|
|
125
223
|
|
|
224
|
+
# Same as #include?, but serching only specified namespaces.
|
|
225
|
+
#
|
|
126
226
|
def include_in_namespaces?(key, *names)
|
|
127
227
|
check_closed?
|
|
128
228
|
r = @db.search_namespaces_get_unique_id(key, *names)
|
|
@@ -133,6 +233,8 @@ module Bio
|
|
|
133
233
|
end
|
|
134
234
|
end
|
|
135
235
|
|
|
236
|
+
# Same as #include?, but serching only primary namespace.
|
|
237
|
+
#
|
|
136
238
|
def include_in_primary?(key)
|
|
137
239
|
check_closed?
|
|
138
240
|
r = @db.search_primary_get_unique_id(key)
|
|
@@ -143,6 +245,9 @@ module Bio
|
|
|
143
245
|
end
|
|
144
246
|
end
|
|
145
247
|
|
|
248
|
+
# Returns names of namespaces defined in the databank.
|
|
249
|
+
# (example: [ 'LOCUS', 'ACCESSION', 'VERSION' ] )
|
|
250
|
+
#
|
|
146
251
|
def namespaces
|
|
147
252
|
check_closed?
|
|
148
253
|
r = secondary_namespaces
|
|
@@ -150,38 +255,82 @@ module Bio
|
|
|
150
255
|
r
|
|
151
256
|
end
|
|
152
257
|
|
|
258
|
+
# Returns name of primary namespace as a string.
|
|
153
259
|
def primary_namespace
|
|
154
260
|
check_closed?
|
|
155
261
|
@db.primary.name
|
|
156
262
|
end
|
|
157
263
|
|
|
264
|
+
# Returns names of secondary namespaces as an array of strings.
|
|
158
265
|
def secondary_namespaces
|
|
159
266
|
check_closed?
|
|
160
267
|
@db.secondary.names
|
|
161
268
|
end
|
|
162
269
|
|
|
270
|
+
# Check consistency between the databank(index) and original flat files.
|
|
271
|
+
#
|
|
272
|
+
# If the original flat files are changed after creating
|
|
273
|
+
# the databank, raises RuntimeError.
|
|
274
|
+
#
|
|
275
|
+
# Note that this check only compares file sizes as
|
|
276
|
+
# described in the OBDA specification.
|
|
277
|
+
#
|
|
163
278
|
def check_consistency
|
|
164
279
|
check_closed?
|
|
165
280
|
@db.check_consistency
|
|
166
281
|
end
|
|
167
282
|
|
|
283
|
+
# If true is given, consistency checks will be performed every time
|
|
284
|
+
# accessing flatfiles. If nil/false, no checks are performed.
|
|
285
|
+
#
|
|
286
|
+
# By default, always_check_consistency is true.
|
|
287
|
+
#
|
|
168
288
|
def always_check_consistency=(bool)
|
|
169
289
|
@db.always_check=(bool)
|
|
170
290
|
end
|
|
291
|
+
|
|
292
|
+
# If true, consistency checks will be performed every time
|
|
293
|
+
# accessing flatfiles. If nil/false, no checks are performed.
|
|
294
|
+
#
|
|
295
|
+
# By default, always_check_consistency is true.
|
|
296
|
+
#
|
|
171
297
|
def always_check_consistency(bool)
|
|
172
298
|
@db.always_check
|
|
173
299
|
end
|
|
174
300
|
|
|
301
|
+
#--
|
|
175
302
|
# private methods
|
|
303
|
+
#++
|
|
304
|
+
|
|
305
|
+
# If the databank is closed, raises IOError.
|
|
176
306
|
def check_closed?
|
|
177
307
|
@db or raise IOError, 'closed databank'
|
|
178
308
|
end
|
|
179
309
|
private :check_closed?
|
|
180
310
|
|
|
311
|
+
#--
|
|
181
312
|
#########################################################
|
|
182
|
-
|
|
313
|
+
#++
|
|
314
|
+
|
|
315
|
+
# <code>Results</code> stores search results created by
|
|
316
|
+
# <code>Bio::FlatFileIndex</code> methods.
|
|
317
|
+
#
|
|
318
|
+
# Currently, this class inherits Hash, but internal
|
|
319
|
+
# structure of this class may be changed anytime.
|
|
320
|
+
# Only using methods described below are strongly recomended.
|
|
321
|
+
#
|
|
183
322
|
class Results < Hash
|
|
184
323
|
|
|
324
|
+
# Add search results.
|
|
325
|
+
# "a + b" means "a OR b".
|
|
326
|
+
# * Example
|
|
327
|
+
# # I want to search 'ADH_IRON_1' OR 'ADH_IRON_2'
|
|
328
|
+
# db = Bio::FlatFIleIndex.new(location)
|
|
329
|
+
# a1 = db.search('ADH_IRON_1')
|
|
330
|
+
# a2 = db.search('ADH_IRON_2')
|
|
331
|
+
# # a1 and a2 are Bio::FlatFileIndex::Results objects.
|
|
332
|
+
# print a1 + a2
|
|
333
|
+
#
|
|
185
334
|
def +(a)
|
|
186
335
|
raise 'argument must be Results class' unless a.is_a?(self.class)
|
|
187
336
|
res = self.dup
|
|
@@ -189,6 +338,16 @@ module Bio
|
|
|
189
338
|
res
|
|
190
339
|
end
|
|
191
340
|
|
|
341
|
+
# Returns set intersection of results.
|
|
342
|
+
# "a * b" means "a AND b".
|
|
343
|
+
# * Example
|
|
344
|
+
# # I want to search 'HIS_KIN' AND 'human'
|
|
345
|
+
# db = Bio::FlatFIleIndex.new(location)
|
|
346
|
+
# hk = db.search('HIS_KIN')
|
|
347
|
+
# hu = db.search('human')
|
|
348
|
+
# # hk and hu are Bio::FlatFileIndex::Results objects.
|
|
349
|
+
# print hk * hu
|
|
350
|
+
#
|
|
192
351
|
def *(a)
|
|
193
352
|
raise 'argument must be Results class' unless a.is_a?(self.class)
|
|
194
353
|
res = self.class.new
|
|
@@ -196,26 +355,63 @@ module Bio
|
|
|
196
355
|
res
|
|
197
356
|
end
|
|
198
357
|
|
|
358
|
+
# Returns a string. (concatinated if multiple results exists).
|
|
359
|
+
# Same as <code>to_a.join('')</code>.
|
|
360
|
+
#
|
|
199
361
|
def to_s
|
|
200
362
|
self.values.join
|
|
201
363
|
end
|
|
202
364
|
|
|
365
|
+
#--
|
|
203
366
|
#alias each_orig each
|
|
367
|
+
#++
|
|
368
|
+
|
|
369
|
+
# alias for each_value.
|
|
204
370
|
alias each each_value
|
|
371
|
+
|
|
372
|
+
# Iterates over each result (string).
|
|
373
|
+
# Same as to_a.each.
|
|
374
|
+
def each(&x) #:yields: str
|
|
375
|
+
each_value(&x)
|
|
376
|
+
end if false #dummy for RDoc
|
|
377
|
+
|
|
378
|
+
#--
|
|
205
379
|
#alias to_a_orig to_a
|
|
380
|
+
#++
|
|
381
|
+
|
|
382
|
+
# alias for to_a.
|
|
206
383
|
alias to_a values
|
|
207
384
|
|
|
385
|
+
# Returns an array of strings.
|
|
386
|
+
# If no search results are exist, returns an empty array.
|
|
387
|
+
#
|
|
388
|
+
def to_a; values; end if false #dummy for RDoc
|
|
389
|
+
|
|
390
|
+
# Returns number of results.
|
|
391
|
+
# Same as to_a.size.
|
|
392
|
+
def size; end if false #dummy for RDoc
|
|
393
|
+
|
|
208
394
|
end #class Results
|
|
209
395
|
|
|
210
396
|
#########################################################
|
|
211
397
|
|
|
398
|
+
# Module for output debug messages.
|
|
399
|
+
# Default setting: If $DEBUG or $VERBOSE is true, output debug
|
|
400
|
+
# messages to $stderr; Otherwise, don't output messages.
|
|
401
|
+
#
|
|
212
402
|
module DEBUG
|
|
213
|
-
@@out =
|
|
403
|
+
@@out = $stderr
|
|
214
404
|
@@flag = nil
|
|
405
|
+
|
|
406
|
+
# Set debug messages output destination.
|
|
407
|
+
# If true is given, outputs to $stderr.
|
|
408
|
+
# If nil is given, outputs nothing.
|
|
409
|
+
# This method affects ALL of FlatFileIndex related objects/methods.
|
|
410
|
+
#
|
|
215
411
|
def self.out=(io)
|
|
216
412
|
if io then
|
|
217
413
|
@@out = io
|
|
218
|
-
@@out =
|
|
414
|
+
@@out = $stderr if io == true
|
|
219
415
|
@@flag = true
|
|
220
416
|
else
|
|
221
417
|
@@out = nil
|
|
@@ -223,9 +419,13 @@ module Bio
|
|
|
223
419
|
end
|
|
224
420
|
@@out
|
|
225
421
|
end
|
|
422
|
+
|
|
423
|
+
# get current debug messeages output destination
|
|
226
424
|
def self.out
|
|
227
425
|
@@out
|
|
228
426
|
end
|
|
427
|
+
|
|
428
|
+
# prints debug messages
|
|
229
429
|
def self.print(*arg)
|
|
230
430
|
@@flag = true if $DEBUG or $VERBOSE
|
|
231
431
|
@@out.print(*arg) if @@out and @@flag
|
|
@@ -234,7 +434,14 @@ module Bio
|
|
|
234
434
|
|
|
235
435
|
#########################################################
|
|
236
436
|
|
|
437
|
+
# Templates
|
|
438
|
+
#
|
|
439
|
+
# Internal use only.
|
|
237
440
|
module Template
|
|
441
|
+
|
|
442
|
+
# templates of namespace
|
|
443
|
+
#
|
|
444
|
+
# Internal use only.
|
|
238
445
|
class NameSpace
|
|
239
446
|
def filename
|
|
240
447
|
# should be redifined in child class
|
|
@@ -275,6 +482,9 @@ module Bio
|
|
|
275
482
|
end #class NameSpace
|
|
276
483
|
end #module Template
|
|
277
484
|
|
|
485
|
+
# FileID class.
|
|
486
|
+
#
|
|
487
|
+
# Internal use only.
|
|
278
488
|
class FileID
|
|
279
489
|
def self.new_from_string(str)
|
|
280
490
|
a = str.split("\t", 2)
|
|
@@ -355,6 +565,9 @@ module Bio
|
|
|
355
565
|
end
|
|
356
566
|
end #class FileID
|
|
357
567
|
|
|
568
|
+
# FileIDs class.
|
|
569
|
+
#
|
|
570
|
+
# Internal use only.
|
|
358
571
|
class FileIDs < Array
|
|
359
572
|
def initialize(prefix, hash)
|
|
360
573
|
@hash = hash
|
|
@@ -471,7 +684,14 @@ module Bio
|
|
|
471
684
|
|
|
472
685
|
end #class FileIDs
|
|
473
686
|
|
|
687
|
+
# module for flat/1 databank
|
|
688
|
+
#
|
|
689
|
+
# Internal use only.
|
|
474
690
|
module Flat_1
|
|
691
|
+
|
|
692
|
+
# Record class.
|
|
693
|
+
#
|
|
694
|
+
# Internal use only.
|
|
475
695
|
class Record
|
|
476
696
|
def initialize(str, size = nil)
|
|
477
697
|
a = str.split("\t")
|
|
@@ -500,6 +720,9 @@ module Bio
|
|
|
500
720
|
end
|
|
501
721
|
end #class Record
|
|
502
722
|
|
|
723
|
+
# FlatMappingFile class.
|
|
724
|
+
#
|
|
725
|
+
# Internal use only.
|
|
503
726
|
class FlatMappingFile
|
|
504
727
|
@@recsize_width = 4
|
|
505
728
|
@@recsize_regex = /\A\d{4}\z/
|
|
@@ -785,6 +1008,9 @@ module Bio
|
|
|
785
1008
|
end
|
|
786
1009
|
end #class FlatMappingFile
|
|
787
1010
|
|
|
1011
|
+
# primary name space
|
|
1012
|
+
#
|
|
1013
|
+
# Internal use only.
|
|
788
1014
|
class PrimaryNameSpace < Template::NameSpace
|
|
789
1015
|
def mapping(filename)
|
|
790
1016
|
FlatMappingFile.new(filename)
|
|
@@ -794,6 +1020,9 @@ module Bio
|
|
|
794
1020
|
end
|
|
795
1021
|
end #class PrimaryNameSpace
|
|
796
1022
|
|
|
1023
|
+
# secondary name space
|
|
1024
|
+
#
|
|
1025
|
+
# Internal use only.
|
|
797
1026
|
class SecondaryNameSpace < Template::NameSpace
|
|
798
1027
|
def mapping(filename)
|
|
799
1028
|
FlatMappingFile.new(filename)
|
|
@@ -810,7 +1039,9 @@ module Bio
|
|
|
810
1039
|
end #class SecondaryNameSpace
|
|
811
1040
|
end #module Flat_1
|
|
812
1041
|
|
|
813
|
-
|
|
1042
|
+
# namespaces
|
|
1043
|
+
#
|
|
1044
|
+
# Internal use only.
|
|
814
1045
|
class NameSpaces < Hash
|
|
815
1046
|
def initialize(dbname, nsclass, arg)
|
|
816
1047
|
@dbname = dbname
|
|
@@ -872,6 +1103,9 @@ module Bio
|
|
|
872
1103
|
end
|
|
873
1104
|
end #class NameSpaces
|
|
874
1105
|
|
|
1106
|
+
# databank
|
|
1107
|
+
#
|
|
1108
|
+
# Internal use only.
|
|
875
1109
|
class DataBank
|
|
876
1110
|
def self.file2hash(fileobj)
|
|
877
1111
|
hash = {}
|
|
@@ -1135,174 +1369,3 @@ module Bio
|
|
|
1135
1369
|
end #class FlatFileIndex
|
|
1136
1370
|
end #module Bio
|
|
1137
1371
|
|
|
1138
|
-
######################################################################
|
|
1139
|
-
|
|
1140
|
-
=begin
|
|
1141
|
-
|
|
1142
|
-
= Bio::FlatFileIndex
|
|
1143
|
-
|
|
1144
|
-
--- Bio::FlatFileIndex.new(dbname)
|
|
1145
|
-
--- Bio::FlatFileIndex.open(dbname)
|
|
1146
|
-
|
|
1147
|
-
Opens existing databank. Databank is a directory which contains
|
|
1148
|
-
indexed files and configuration files. The type of the databank
|
|
1149
|
-
(flat or BerkeleyDB) are determined automatically.
|
|
1150
|
-
|
|
1151
|
-
--- Bio::FlatFileIndex#close
|
|
1152
|
-
|
|
1153
|
-
Closes opened databank.
|
|
1154
|
-
|
|
1155
|
-
--- Bio::FlatFileIndex#closed?
|
|
1156
|
-
|
|
1157
|
-
Returns true if already closed. Otherwise, returns false.
|
|
1158
|
-
|
|
1159
|
-
--- Bio::FlatFileIndex#get_by_id(key)
|
|
1160
|
-
|
|
1161
|
-
Common interface defined in registry.rb.
|
|
1162
|
-
Searching databank and returns entry (or entries) as a string.
|
|
1163
|
-
Multiple entries (contatinated to one string) may be returned.
|
|
1164
|
-
Returns empty string If not found.
|
|
1165
|
-
|
|
1166
|
-
--- Bio::FlatFileIndex#search(key)
|
|
1167
|
-
|
|
1168
|
-
Searching databank and returns a Bio::FlatFileIndex::Results object.
|
|
1169
|
-
|
|
1170
|
-
--- Bio::FlatFileIndex#include?(key)
|
|
1171
|
-
|
|
1172
|
-
Searching databank.
|
|
1173
|
-
If found, returns an array of unique IDs (primary identifiers).
|
|
1174
|
-
If not found, returns nil.
|
|
1175
|
-
|
|
1176
|
-
--- Bio::FlatFileIndex#search_primary(key)
|
|
1177
|
-
|
|
1178
|
-
Searching only primary namespece.
|
|
1179
|
-
Returns a Bio::FlatFileIndex::Results object.
|
|
1180
|
-
|
|
1181
|
-
--- Bio::FlatFileIndex#search_namespaces(key, name1, name2, ...)
|
|
1182
|
-
|
|
1183
|
-
Searching only specific namespeces.
|
|
1184
|
-
Returns a Bio::FlatFileIndex::Results object.
|
|
1185
|
-
|
|
1186
|
-
--- Bio::FlatFileIndex#include_in_primary?(key)
|
|
1187
|
-
|
|
1188
|
-
Same as #include?, but serching only primary namespace.
|
|
1189
|
-
|
|
1190
|
-
--- Bio::FlatFileIndex#include_in_namespaces?(key, name1, name2, ...)
|
|
1191
|
-
|
|
1192
|
-
Same as #include?, but serching only specific namespaces.
|
|
1193
|
-
|
|
1194
|
-
--- Bio::FlatFileIndex#namespaces
|
|
1195
|
-
|
|
1196
|
-
Returns names of namespaces defined in the databank.
|
|
1197
|
-
(example: [ 'LOCUS', 'ACCESSION', 'VERSION' ] )
|
|
1198
|
-
|
|
1199
|
-
--- Bio::FlatFileIndex#primary_namespace
|
|
1200
|
-
|
|
1201
|
-
Returns name of primary namespace.
|
|
1202
|
-
|
|
1203
|
-
--- Bio::FlatFileIndex#secondary_namespaces
|
|
1204
|
-
|
|
1205
|
-
Returns names of secondary namespaces.
|
|
1206
|
-
|
|
1207
|
-
--- Bio::FlatFileIndex#default_namespaces= [ str1, str2, ... ]
|
|
1208
|
-
--- Bio::FlatFileIndex#default_namespaces= nil
|
|
1209
|
-
|
|
1210
|
-
Set default namespaces.
|
|
1211
|
-
nil means all namespaces in the databank.
|
|
1212
|
-
Default namespaces specified in this method only affect
|
|
1213
|
-
#get_by_id, #search, and #include? methods.
|
|
1214
|
-
Default of default namespaces is nil (that is, all namespaces
|
|
1215
|
-
are search destinations by default).
|
|
1216
|
-
|
|
1217
|
-
--- Bio::FlatFileIndex#default_namespaces
|
|
1218
|
-
|
|
1219
|
-
Returns default namespaces.
|
|
1220
|
-
nil means all namespaces.
|
|
1221
|
-
|
|
1222
|
-
--- Bio::FlatFileIndex#check_consistency
|
|
1223
|
-
|
|
1224
|
-
Raise RuntimeError if flatfiles are changed after creating
|
|
1225
|
-
the databank. (This check only compare file sizes as
|
|
1226
|
-
described in the OBDA specification.)
|
|
1227
|
-
|
|
1228
|
-
--- Bio::FlatFileIndex#always_check_consistency=(bool)
|
|
1229
|
-
--- Bio::FlatFileIndex#always_check_consistency
|
|
1230
|
-
|
|
1231
|
-
If true, consistency checks are performed every time
|
|
1232
|
-
accessing flatfiles. If nil/false, no checks are performed.
|
|
1233
|
-
Default of always_check_consistency is true.
|
|
1234
|
-
|
|
1235
|
-
== Bio::FlatFileIndex::Results
|
|
1236
|
-
|
|
1237
|
-
This object is made by Bio::FlatFileIndex methods.
|
|
1238
|
-
Currently, this class inherits Hash, but internal
|
|
1239
|
-
structure of this class may be changed anytime.
|
|
1240
|
-
Only using methods described below are strongly recomended.
|
|
1241
|
-
|
|
1242
|
-
--- Bio::FlatFileIndex::Results#to_a
|
|
1243
|
-
|
|
1244
|
-
Returns an array of strings.
|
|
1245
|
-
If no search results are exist, returns an empty array.
|
|
1246
|
-
|
|
1247
|
-
--- Bio::FlatFileIndex::Results#each
|
|
1248
|
-
|
|
1249
|
-
Iterates over each result(string).
|
|
1250
|
-
Same as to_a.each.
|
|
1251
|
-
|
|
1252
|
-
--- Bio::FlatFileIndex::Results#to_s
|
|
1253
|
-
|
|
1254
|
-
Returns a string. (concatinated if multiple results exists).
|
|
1255
|
-
Same as to_a.join('').
|
|
1256
|
-
|
|
1257
|
-
--- Bio::FlatFileIndex::Results#size
|
|
1258
|
-
|
|
1259
|
-
Returns number of results.
|
|
1260
|
-
Same as to_a.size.
|
|
1261
|
-
|
|
1262
|
-
--- Bio::FlatFileIndex::Results#+(res)
|
|
1263
|
-
|
|
1264
|
-
Add search results.
|
|
1265
|
-
"a + b" means "a OR b".
|
|
1266
|
-
* Example
|
|
1267
|
-
# I want to search 'ADH_IRON_1' OR 'ADH_IRON_2'
|
|
1268
|
-
db = Bio::FlatFIleIndex.new(location)
|
|
1269
|
-
a1 = db.search('ADH_IRON_1')
|
|
1270
|
-
a2 = db.search('ADH_IRON_2')
|
|
1271
|
-
# a1 and a2 are Bio::FlatFileIndex::Results objects.
|
|
1272
|
-
print a1 + a2
|
|
1273
|
-
|
|
1274
|
-
--- Bio::FlatFileIndex::Results#*(res)
|
|
1275
|
-
|
|
1276
|
-
Returns set intersection of results.
|
|
1277
|
-
"a * b" means "a AND b".
|
|
1278
|
-
* Example
|
|
1279
|
-
# I want to search 'HIS_KIN' AND 'human'
|
|
1280
|
-
db = Bio::FlatFIleIndex.new(location)
|
|
1281
|
-
hk = db.search('HIS_KIN')
|
|
1282
|
-
hu = db.search('human')
|
|
1283
|
-
# hk and hu are Bio::FlatFileIndex::Results objects.
|
|
1284
|
-
print hk * hu
|
|
1285
|
-
|
|
1286
|
-
== Bio::FlatFileIndex::DEBUG
|
|
1287
|
-
|
|
1288
|
-
Module for output debug messages.
|
|
1289
|
-
Default setting: If $DEBUG or $VERBOSE is true, output debug
|
|
1290
|
-
messages to STDERR; Otherwise, don't output messages.
|
|
1291
|
-
|
|
1292
|
-
--- Bio::FlatFileIndex::DEBUG.out=(io)
|
|
1293
|
-
|
|
1294
|
-
Set debug messages output destination.
|
|
1295
|
-
If true is given, outputs to STDERR.
|
|
1296
|
-
If nil is given, outputs nothing.
|
|
1297
|
-
This method affects ALL of FlatFileIndex related objects/methods.
|
|
1298
|
-
|
|
1299
|
-
== Other classes/modules
|
|
1300
|
-
|
|
1301
|
-
Classes/modules not described in this file are internal use only.
|
|
1302
|
-
|
|
1303
|
-
== SEE ALSO
|
|
1304
|
-
|
|
1305
|
-
* ((<URL:http://obda.open-bio.org/>))
|
|
1306
|
-
* ((<URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/?cvsroot=obf-common>))
|
|
1307
|
-
|
|
1308
|
-
=end
|