bio 0.7.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/bioruby +71 -27
- data/bin/br_biofetch.rb +5 -17
- data/bin/br_bioflat.rb +14 -26
- data/bin/br_biogetseq.rb +6 -18
- data/bin/br_pmfetch.rb +6 -16
- data/doc/Changes-0.7.rd +35 -0
- data/doc/KEGG_API.rd +287 -172
- data/doc/KEGG_API.rd.ja +273 -160
- data/doc/Tutorial.rd +18 -9
- data/doc/Tutorial.rd.ja +656 -138
- data/lib/bio.rb +6 -24
- data/lib/bio/alignment.rb +5 -5
- data/lib/bio/appl/blast.rb +132 -98
- data/lib/bio/appl/blast/format0.rb +9 -19
- data/lib/bio/appl/blast/wublast.rb +5 -18
- data/lib/bio/appl/emboss.rb +40 -47
- data/lib/bio/appl/hmmer.rb +116 -82
- data/lib/bio/appl/hmmer/report.rb +509 -364
- data/lib/bio/appl/spidey/report.rb +7 -18
- data/lib/bio/data/na.rb +3 -21
- data/lib/bio/db.rb +3 -21
- data/lib/bio/db/aaindex.rb +147 -52
- data/lib/bio/db/embl/common.rb +27 -6
- data/lib/bio/db/embl/embl.rb +18 -10
- data/lib/bio/db/embl/sptr.rb +87 -67
- data/lib/bio/db/embl/swissprot.rb +32 -3
- data/lib/bio/db/embl/trembl.rb +32 -3
- data/lib/bio/db/embl/uniprot.rb +32 -3
- data/lib/bio/db/fasta.rb +327 -289
- data/lib/bio/db/medline.rb +25 -4
- data/lib/bio/db/nbrf.rb +12 -20
- data/lib/bio/db/pdb.rb +4 -1
- data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
- data/lib/bio/db/pdb/pdb.rb +13 -8
- data/lib/bio/db/rebase.rb +93 -97
- data/lib/bio/feature.rb +2 -31
- data/lib/bio/io/ddbjxml.rb +167 -139
- data/lib/bio/io/fastacmd.rb +89 -56
- data/lib/bio/io/flatfile.rb +994 -278
- data/lib/bio/io/flatfile/index.rb +257 -194
- data/lib/bio/io/flatfile/indexer.rb +37 -29
- data/lib/bio/reference.rb +147 -64
- data/lib/bio/sequence.rb +57 -417
- data/lib/bio/sequence/aa.rb +64 -0
- data/lib/bio/sequence/common.rb +175 -0
- data/lib/bio/sequence/compat.rb +68 -0
- data/lib/bio/sequence/format.rb +134 -0
- data/lib/bio/sequence/generic.rb +24 -0
- data/lib/bio/sequence/na.rb +189 -0
- data/lib/bio/shell.rb +9 -23
- data/lib/bio/shell/core.rb +130 -125
- data/lib/bio/shell/demo.rb +143 -0
- data/lib/bio/shell/{session.rb → interface.rb} +42 -40
- data/lib/bio/shell/object.rb +52 -0
- data/lib/bio/shell/plugin/codon.rb +4 -22
- data/lib/bio/shell/plugin/emboss.rb +23 -0
- data/lib/bio/shell/plugin/entry.rb +34 -25
- data/lib/bio/shell/plugin/flatfile.rb +5 -23
- data/lib/bio/shell/plugin/keggapi.rb +11 -24
- data/lib/bio/shell/plugin/midi.rb +5 -23
- data/lib/bio/shell/plugin/obda.rb +4 -22
- data/lib/bio/shell/plugin/seq.rb +6 -24
- data/lib/bio/shell/rails/Rakefile +10 -0
- data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
- data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
- data/lib/bio/shell/rails/config/boot.rb +19 -0
- data/lib/bio/shell/rails/config/database.yml +85 -0
- data/lib/bio/shell/rails/config/environment.rb +53 -0
- data/lib/bio/shell/rails/config/environments/development.rb +19 -0
- data/lib/bio/shell/rails/config/environments/production.rb +19 -0
- data/lib/bio/shell/rails/config/environments/test.rb +19 -0
- data/lib/bio/shell/rails/config/routes.rb +19 -0
- data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
- data/lib/bio/shell/rails/public/404.html +8 -0
- data/lib/bio/shell/rails/public/500.html +8 -0
- data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
- data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
- data/lib/bio/shell/rails/public/dispatch.rb +10 -0
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/icon.png +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +277 -0
- data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
- data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
- data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
- data/lib/bio/shell/rails/public/robots.txt +1 -0
- data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
- data/lib/bio/shell/rails/script/about +3 -0
- data/lib/bio/shell/rails/script/breakpointer +3 -0
- data/lib/bio/shell/rails/script/console +3 -0
- data/lib/bio/shell/rails/script/destroy +3 -0
- data/lib/bio/shell/rails/script/generate +3 -0
- data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
- data/lib/bio/shell/rails/script/performance/profiler +3 -0
- data/lib/bio/shell/rails/script/plugin +3 -0
- data/lib/bio/shell/rails/script/process/reaper +3 -0
- data/lib/bio/shell/rails/script/process/spawner +3 -0
- data/lib/bio/shell/rails/script/process/spinner +3 -0
- data/lib/bio/shell/rails/script/runner +3 -0
- data/lib/bio/shell/rails/script/server +42 -0
- data/lib/bio/shell/rails/test/test_helper.rb +28 -0
- data/lib/bio/shell/web.rb +90 -0
- data/lib/bio/util/contingency_table.rb +231 -225
- data/sample/any2fasta.rb +59 -0
- data/test/data/HMMER/hmmpfam.out +64 -0
- data/test/data/HMMER/hmmsearch.out +88 -0
- data/test/data/aaindex/DAYM780301 +30 -0
- data/test/data/aaindex/PRAM900102 +20 -0
- data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
- data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
- data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
- data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
- data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
- data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
- data/test/unit/bio/appl/blast/test_report.rb +15 -12
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
- data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
- data/test/unit/bio/appl/test_blast.rb +5 -5
- data/test/unit/bio/data/test_na.rb +9 -18
- data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
- data/test/unit/bio/db/test_aaindex.rb +197 -0
- data/test/unit/bio/io/test_fastacmd.rb +55 -0
- data/test/unit/bio/sequence/test_aa.rb +102 -0
- data/test/unit/bio/sequence/test_common.rb +178 -0
- data/test/unit/bio/sequence/test_compat.rb +82 -0
- data/test/unit/bio/sequence/test_na.rb +242 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
- data/test/unit/bio/test_alignment.rb +15 -7
- data/test/unit/bio/test_reference.rb +198 -0
- data/test/unit/bio/test_sequence.rb +4 -49
- data/test/unit/bio/test_shell.rb +2 -2
- metadata +118 -15
- data/lib/bio/io/brdb.rb +0 -103
- data/lib/bioruby.rb +0 -34
@@ -1,28 +1,68 @@
|
|
1
1
|
#
|
2
|
-
# bio/io/flatfile/index.rb - OBDA flatfile index
|
3
|
-
#
|
4
|
-
# Copyright (C) 2002 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
|
5
|
-
#
|
6
|
-
# This library is free software; you can redistribute it and/or
|
7
|
-
# modify it under the terms of the GNU Lesser General Public
|
8
|
-
# License as published by the Free Software Foundation; either
|
9
|
-
# version 2 of the License, or (at your option) any later version.
|
10
|
-
#
|
11
|
-
# This library is distributed in the hope that it will be useful,
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14
|
-
# Lesser General Public License for more details.
|
15
|
-
#
|
16
|
-
# You should have received a copy of the GNU Lesser General Public
|
17
|
-
# License along with this library; if not, write to the Free Software
|
18
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
19
|
-
#
|
20
|
-
# $Id: index.rb,v 1.15 2005/11/28 05:08:26 k Exp $
|
2
|
+
# = bio/io/flatfile/index.rb - OBDA flatfile index
|
21
3
|
#
|
4
|
+
# Copyright:: Copyright (C) 2002
|
5
|
+
# GOTO Naohisa <ng@bioruby.org>
|
6
|
+
# License:: Ruby's
|
7
|
+
#
|
8
|
+
# $Id: index.rb,v 1.18 2006/02/22 08:40:31 ngoto Exp $
|
9
|
+
#
|
10
|
+
# = About Bio::FlatFileIndex
|
11
|
+
#
|
12
|
+
# Please refer documents of following classes.
|
13
|
+
# Classes/modules marked '#' are internal use only.
|
14
|
+
#
|
15
|
+
# == Classes/modules in index.rb
|
16
|
+
# * class Bio::FlatFileIndex
|
17
|
+
# * class Bio::FlatFileIndex::Results
|
18
|
+
# * module Bio::FlatFileIndex::DEBUG
|
19
|
+
# * #module Bio::FlatFileIndex::Template
|
20
|
+
# * #class Bio::FlatFileIndex::Template::NameSpace
|
21
|
+
# * #class Bio::FlatFileIndex::FileID
|
22
|
+
# * #class Bio::FlatFileIndex::FileIDs
|
23
|
+
# * #module Bio::FlatFileIndex::Flat_1
|
24
|
+
# * #class Bio::FlatFileIndex::Flat_1::Record
|
25
|
+
# * #class Bio::FlatFileIndex::Flat_1::FlatMappingFile
|
26
|
+
# * #class Bio::FlatFileIndex::Flat_1::PrimaryNameSpace
|
27
|
+
# * #class Bio::FlatFileIndex::Flat_1::SecondaryNameSpace
|
28
|
+
# * #class Bio::FlatFileIndex::NameSpaces
|
29
|
+
# * #class Bio::FlatFileIndex::DataBank
|
30
|
+
#
|
31
|
+
# == Classes/modules in indexer.rb
|
32
|
+
# * module Bio::FlatFileIndex::Indexer
|
33
|
+
# * #class Bio::FlatFileIndex::Indexer::NameSpace
|
34
|
+
# * #class Bio::FlatFileIndex::Indexer::NameSpaces
|
35
|
+
# * #module Bio::FlatFileIndex::Indexer::Parser
|
36
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::TemplateParser
|
37
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::GenBankParser
|
38
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::GenPeptParser
|
39
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::EMBLParser
|
40
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::SPTRParser
|
41
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::FastaFormatParser
|
42
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::MaXMLSequenceParser
|
43
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::MaXMLClusterParser
|
44
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::BlastDefaultParser
|
45
|
+
# * #class Bio::FlatFileIndex::Indexer::Parser::PDBChemicalComponentParser
|
46
|
+
#
|
47
|
+
# == Classes/modules in bdb.rb
|
48
|
+
# * #module Bio::FlatFileIndex::BDBDefault
|
49
|
+
# * #class Bio::FlatFileIndex::BDBWrapper
|
50
|
+
# * #module Bio::FlatFileIndex::BDB_1
|
51
|
+
# * #class Bio::FlatFileIndex::BDB_1::BDBMappingFile
|
52
|
+
# * #class Bio::FlatFileIndex::BDB_1::PrimaryNameSpace
|
53
|
+
# * #class Bio::FlatFileIndex::BDB_1::SecondaryNameSpace
|
54
|
+
#
|
55
|
+
# = References
|
56
|
+
# * ((<URL:http://obda.open-bio.org/>))
|
57
|
+
# * ((<URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/?cvsroot=obf-common>))
|
58
|
+
#
|
22
59
|
|
23
60
|
require 'bio/io/flatfile/indexer'
|
24
61
|
|
25
62
|
module Bio
|
63
|
+
|
64
|
+
|
65
|
+
# Bio::FlatFileIndex is a class for OBDA flatfile index.
|
26
66
|
class FlatFileIndex
|
27
67
|
|
28
68
|
autoload :Indexer, 'bio/io/flatfile/indexer'
|
@@ -30,10 +70,21 @@ module Bio
|
|
30
70
|
autoload :BDBwrapper, 'bio/io/flatfile/bdb'
|
31
71
|
autoload :BDB_1, 'bio/io/flatfile/bdb'
|
32
72
|
|
73
|
+
# magic string for flat/1 index
|
33
74
|
MAGIC_FLAT = 'flat/1'
|
75
|
+
|
76
|
+
# magic string for BerkeleyDB/1 index
|
34
77
|
MAGIC_BDB = 'BerkeleyDB/1'
|
35
78
|
|
36
79
|
#########################################################
|
80
|
+
|
81
|
+
# Opens existing databank. Databank is a directory which contains
|
82
|
+
# indexed files and configuration files. The type of the databank
|
83
|
+
# (flat or BerkeleyDB) are determined automatically.
|
84
|
+
#
|
85
|
+
# If block is given, the databank object is passed to the block.
|
86
|
+
# The databank will be automatically closed when the block terminates.
|
87
|
+
#
|
37
88
|
def self.open(name)
|
38
89
|
if block_given? then
|
39
90
|
begin
|
@@ -53,22 +104,38 @@ module Bio
|
|
53
104
|
r
|
54
105
|
end
|
55
106
|
|
107
|
+
# Opens existing databank. Databank is a directory which contains
|
108
|
+
# indexed files and configuration files. The type of the databank
|
109
|
+
# (flat or BerkeleyDB) are determined automatically.
|
110
|
+
#
|
111
|
+
# Unlike +FlatFileIndex.open+, block is not allowed.
|
112
|
+
#
|
56
113
|
def initialize(name)
|
57
114
|
@db = DataBank.open(name)
|
58
115
|
end
|
59
116
|
|
60
117
|
# common interface defined in registry.rb
|
118
|
+
# Searching databank and returns entry (or entries) as a string.
|
119
|
+
# Multiple entries (contatinated to one string) may be returned.
|
120
|
+
# Returns empty string if not found.
|
121
|
+
#
|
61
122
|
def get_by_id(key)
|
62
123
|
search(key).to_s
|
63
124
|
end
|
64
125
|
|
126
|
+
#--
|
65
127
|
# original methods
|
128
|
+
#++
|
129
|
+
|
130
|
+
# Closes the databank.
|
131
|
+
# Returns nil.
|
66
132
|
def close
|
67
133
|
check_closed?
|
68
134
|
@db.close
|
69
135
|
@db = nil
|
70
136
|
end
|
71
137
|
|
138
|
+
# Returns true if already closed. Otherwise, returns false.
|
72
139
|
def closed?
|
73
140
|
if @db then
|
74
141
|
false
|
@@ -77,6 +144,19 @@ module Bio
|
|
77
144
|
end
|
78
145
|
end
|
79
146
|
|
147
|
+
# Set default namespaces.
|
148
|
+
# <code>default_namespaces = nil</code>
|
149
|
+
# means all namespaces in the databank.
|
150
|
+
#
|
151
|
+
# <code>default_namespaces= [ str1, str2, ... ]</code>
|
152
|
+
# means set default namespeces to str1, str2, ...
|
153
|
+
#
|
154
|
+
# Default namespaces specified in this method only affect
|
155
|
+
# #get_by_id, #search, and #include? methods.
|
156
|
+
#
|
157
|
+
# Default of default namespaces is nil (that is, all namespaces
|
158
|
+
# are search destinations by default).
|
159
|
+
#
|
80
160
|
def default_namespaces=(names)
|
81
161
|
if names then
|
82
162
|
@names = []
|
@@ -86,10 +166,14 @@ module Bio
|
|
86
166
|
end
|
87
167
|
end
|
88
168
|
|
169
|
+
# Returns default namespaces.
|
170
|
+
# Returns an array of strings or nil.
|
171
|
+
# nil means all namespaces.
|
89
172
|
def default_namespaces
|
90
173
|
@names
|
91
174
|
end
|
92
175
|
|
176
|
+
# Searching databank and returns a Bio::FlatFileIndex::Results object.
|
93
177
|
def search(key)
|
94
178
|
check_closed?
|
95
179
|
if @names then
|
@@ -99,16 +183,30 @@ module Bio
|
|
99
183
|
end
|
100
184
|
end
|
101
185
|
|
186
|
+
# Searching only specified namespeces.
|
187
|
+
# Returns a Bio::FlatFileIndex::Results object.
|
188
|
+
#
|
102
189
|
def search_namespaces(key, *names)
|
103
190
|
check_closed?
|
104
191
|
@db.search_namespaces(key, *names)
|
105
192
|
end
|
106
193
|
|
194
|
+
# Searching only primary namespece.
|
195
|
+
# Returns a Bio::FlatFileIndex::Results object.
|
196
|
+
#
|
107
197
|
def search_primary(key)
|
108
198
|
check_closed?
|
109
199
|
@db.search_primary(key)
|
110
200
|
end
|
111
201
|
|
202
|
+
# Searching databank.
|
203
|
+
# If some entries are found, returns an array of
|
204
|
+
# unique IDs (primary identifiers).
|
205
|
+
# If not found anything, returns nil.
|
206
|
+
#
|
207
|
+
# This method is useful when search result is very large and
|
208
|
+
# #search method is very slow.
|
209
|
+
#
|
112
210
|
def include?(key)
|
113
211
|
check_closed?
|
114
212
|
if @names then
|
@@ -123,6 +221,8 @@ module Bio
|
|
123
221
|
end
|
124
222
|
end
|
125
223
|
|
224
|
+
# Same as #include?, but serching only specified namespaces.
|
225
|
+
#
|
126
226
|
def include_in_namespaces?(key, *names)
|
127
227
|
check_closed?
|
128
228
|
r = @db.search_namespaces_get_unique_id(key, *names)
|
@@ -133,6 +233,8 @@ module Bio
|
|
133
233
|
end
|
134
234
|
end
|
135
235
|
|
236
|
+
# Same as #include?, but serching only primary namespace.
|
237
|
+
#
|
136
238
|
def include_in_primary?(key)
|
137
239
|
check_closed?
|
138
240
|
r = @db.search_primary_get_unique_id(key)
|
@@ -143,6 +245,9 @@ module Bio
|
|
143
245
|
end
|
144
246
|
end
|
145
247
|
|
248
|
+
# Returns names of namespaces defined in the databank.
|
249
|
+
# (example: [ 'LOCUS', 'ACCESSION', 'VERSION' ] )
|
250
|
+
#
|
146
251
|
def namespaces
|
147
252
|
check_closed?
|
148
253
|
r = secondary_namespaces
|
@@ -150,38 +255,82 @@ module Bio
|
|
150
255
|
r
|
151
256
|
end
|
152
257
|
|
258
|
+
# Returns name of primary namespace as a string.
|
153
259
|
def primary_namespace
|
154
260
|
check_closed?
|
155
261
|
@db.primary.name
|
156
262
|
end
|
157
263
|
|
264
|
+
# Returns names of secondary namespaces as an array of strings.
|
158
265
|
def secondary_namespaces
|
159
266
|
check_closed?
|
160
267
|
@db.secondary.names
|
161
268
|
end
|
162
269
|
|
270
|
+
# Check consistency between the databank(index) and original flat files.
|
271
|
+
#
|
272
|
+
# If the original flat files are changed after creating
|
273
|
+
# the databank, raises RuntimeError.
|
274
|
+
#
|
275
|
+
# Note that this check only compares file sizes as
|
276
|
+
# described in the OBDA specification.
|
277
|
+
#
|
163
278
|
def check_consistency
|
164
279
|
check_closed?
|
165
280
|
@db.check_consistency
|
166
281
|
end
|
167
282
|
|
283
|
+
# If true is given, consistency checks will be performed every time
|
284
|
+
# accessing flatfiles. If nil/false, no checks are performed.
|
285
|
+
#
|
286
|
+
# By default, always_check_consistency is true.
|
287
|
+
#
|
168
288
|
def always_check_consistency=(bool)
|
169
289
|
@db.always_check=(bool)
|
170
290
|
end
|
291
|
+
|
292
|
+
# If true, consistency checks will be performed every time
|
293
|
+
# accessing flatfiles. If nil/false, no checks are performed.
|
294
|
+
#
|
295
|
+
# By default, always_check_consistency is true.
|
296
|
+
#
|
171
297
|
def always_check_consistency(bool)
|
172
298
|
@db.always_check
|
173
299
|
end
|
174
300
|
|
301
|
+
#--
|
175
302
|
# private methods
|
303
|
+
#++
|
304
|
+
|
305
|
+
# If the databank is closed, raises IOError.
|
176
306
|
def check_closed?
|
177
307
|
@db or raise IOError, 'closed databank'
|
178
308
|
end
|
179
309
|
private :check_closed?
|
180
310
|
|
311
|
+
#--
|
181
312
|
#########################################################
|
182
|
-
|
313
|
+
#++
|
314
|
+
|
315
|
+
# <code>Results</code> stores search results created by
|
316
|
+
# <code>Bio::FlatFileIndex</code> methods.
|
317
|
+
#
|
318
|
+
# Currently, this class inherits Hash, but internal
|
319
|
+
# structure of this class may be changed anytime.
|
320
|
+
# Only using methods described below are strongly recomended.
|
321
|
+
#
|
183
322
|
class Results < Hash
|
184
323
|
|
324
|
+
# Add search results.
|
325
|
+
# "a + b" means "a OR b".
|
326
|
+
# * Example
|
327
|
+
# # I want to search 'ADH_IRON_1' OR 'ADH_IRON_2'
|
328
|
+
# db = Bio::FlatFIleIndex.new(location)
|
329
|
+
# a1 = db.search('ADH_IRON_1')
|
330
|
+
# a2 = db.search('ADH_IRON_2')
|
331
|
+
# # a1 and a2 are Bio::FlatFileIndex::Results objects.
|
332
|
+
# print a1 + a2
|
333
|
+
#
|
185
334
|
def +(a)
|
186
335
|
raise 'argument must be Results class' unless a.is_a?(self.class)
|
187
336
|
res = self.dup
|
@@ -189,6 +338,16 @@ module Bio
|
|
189
338
|
res
|
190
339
|
end
|
191
340
|
|
341
|
+
# Returns set intersection of results.
|
342
|
+
# "a * b" means "a AND b".
|
343
|
+
# * Example
|
344
|
+
# # I want to search 'HIS_KIN' AND 'human'
|
345
|
+
# db = Bio::FlatFIleIndex.new(location)
|
346
|
+
# hk = db.search('HIS_KIN')
|
347
|
+
# hu = db.search('human')
|
348
|
+
# # hk and hu are Bio::FlatFileIndex::Results objects.
|
349
|
+
# print hk * hu
|
350
|
+
#
|
192
351
|
def *(a)
|
193
352
|
raise 'argument must be Results class' unless a.is_a?(self.class)
|
194
353
|
res = self.class.new
|
@@ -196,26 +355,63 @@ module Bio
|
|
196
355
|
res
|
197
356
|
end
|
198
357
|
|
358
|
+
# Returns a string. (concatinated if multiple results exists).
|
359
|
+
# Same as <code>to_a.join('')</code>.
|
360
|
+
#
|
199
361
|
def to_s
|
200
362
|
self.values.join
|
201
363
|
end
|
202
364
|
|
365
|
+
#--
|
203
366
|
#alias each_orig each
|
367
|
+
#++
|
368
|
+
|
369
|
+
# alias for each_value.
|
204
370
|
alias each each_value
|
371
|
+
|
372
|
+
# Iterates over each result (string).
|
373
|
+
# Same as to_a.each.
|
374
|
+
def each(&x) #:yields: str
|
375
|
+
each_value(&x)
|
376
|
+
end if false #dummy for RDoc
|
377
|
+
|
378
|
+
#--
|
205
379
|
#alias to_a_orig to_a
|
380
|
+
#++
|
381
|
+
|
382
|
+
# alias for to_a.
|
206
383
|
alias to_a values
|
207
384
|
|
385
|
+
# Returns an array of strings.
|
386
|
+
# If no search results are exist, returns an empty array.
|
387
|
+
#
|
388
|
+
def to_a; values; end if false #dummy for RDoc
|
389
|
+
|
390
|
+
# Returns number of results.
|
391
|
+
# Same as to_a.size.
|
392
|
+
def size; end if false #dummy for RDoc
|
393
|
+
|
208
394
|
end #class Results
|
209
395
|
|
210
396
|
#########################################################
|
211
397
|
|
398
|
+
# Module for output debug messages.
|
399
|
+
# Default setting: If $DEBUG or $VERBOSE is true, output debug
|
400
|
+
# messages to $stderr; Otherwise, don't output messages.
|
401
|
+
#
|
212
402
|
module DEBUG
|
213
|
-
@@out =
|
403
|
+
@@out = $stderr
|
214
404
|
@@flag = nil
|
405
|
+
|
406
|
+
# Set debug messages output destination.
|
407
|
+
# If true is given, outputs to $stderr.
|
408
|
+
# If nil is given, outputs nothing.
|
409
|
+
# This method affects ALL of FlatFileIndex related objects/methods.
|
410
|
+
#
|
215
411
|
def self.out=(io)
|
216
412
|
if io then
|
217
413
|
@@out = io
|
218
|
-
@@out =
|
414
|
+
@@out = $stderr if io == true
|
219
415
|
@@flag = true
|
220
416
|
else
|
221
417
|
@@out = nil
|
@@ -223,9 +419,13 @@ module Bio
|
|
223
419
|
end
|
224
420
|
@@out
|
225
421
|
end
|
422
|
+
|
423
|
+
# get current debug messeages output destination
|
226
424
|
def self.out
|
227
425
|
@@out
|
228
426
|
end
|
427
|
+
|
428
|
+
# prints debug messages
|
229
429
|
def self.print(*arg)
|
230
430
|
@@flag = true if $DEBUG or $VERBOSE
|
231
431
|
@@out.print(*arg) if @@out and @@flag
|
@@ -234,7 +434,14 @@ module Bio
|
|
234
434
|
|
235
435
|
#########################################################
|
236
436
|
|
437
|
+
# Templates
|
438
|
+
#
|
439
|
+
# Internal use only.
|
237
440
|
module Template
|
441
|
+
|
442
|
+
# templates of namespace
|
443
|
+
#
|
444
|
+
# Internal use only.
|
238
445
|
class NameSpace
|
239
446
|
def filename
|
240
447
|
# should be redifined in child class
|
@@ -275,6 +482,9 @@ module Bio
|
|
275
482
|
end #class NameSpace
|
276
483
|
end #module Template
|
277
484
|
|
485
|
+
# FileID class.
|
486
|
+
#
|
487
|
+
# Internal use only.
|
278
488
|
class FileID
|
279
489
|
def self.new_from_string(str)
|
280
490
|
a = str.split("\t", 2)
|
@@ -355,6 +565,9 @@ module Bio
|
|
355
565
|
end
|
356
566
|
end #class FileID
|
357
567
|
|
568
|
+
# FileIDs class.
|
569
|
+
#
|
570
|
+
# Internal use only.
|
358
571
|
class FileIDs < Array
|
359
572
|
def initialize(prefix, hash)
|
360
573
|
@hash = hash
|
@@ -471,7 +684,14 @@ module Bio
|
|
471
684
|
|
472
685
|
end #class FileIDs
|
473
686
|
|
687
|
+
# module for flat/1 databank
|
688
|
+
#
|
689
|
+
# Internal use only.
|
474
690
|
module Flat_1
|
691
|
+
|
692
|
+
# Record class.
|
693
|
+
#
|
694
|
+
# Internal use only.
|
475
695
|
class Record
|
476
696
|
def initialize(str, size = nil)
|
477
697
|
a = str.split("\t")
|
@@ -500,6 +720,9 @@ module Bio
|
|
500
720
|
end
|
501
721
|
end #class Record
|
502
722
|
|
723
|
+
# FlatMappingFile class.
|
724
|
+
#
|
725
|
+
# Internal use only.
|
503
726
|
class FlatMappingFile
|
504
727
|
@@recsize_width = 4
|
505
728
|
@@recsize_regex = /\A\d{4}\z/
|
@@ -785,6 +1008,9 @@ module Bio
|
|
785
1008
|
end
|
786
1009
|
end #class FlatMappingFile
|
787
1010
|
|
1011
|
+
# primary name space
|
1012
|
+
#
|
1013
|
+
# Internal use only.
|
788
1014
|
class PrimaryNameSpace < Template::NameSpace
|
789
1015
|
def mapping(filename)
|
790
1016
|
FlatMappingFile.new(filename)
|
@@ -794,6 +1020,9 @@ module Bio
|
|
794
1020
|
end
|
795
1021
|
end #class PrimaryNameSpace
|
796
1022
|
|
1023
|
+
# secondary name space
|
1024
|
+
#
|
1025
|
+
# Internal use only.
|
797
1026
|
class SecondaryNameSpace < Template::NameSpace
|
798
1027
|
def mapping(filename)
|
799
1028
|
FlatMappingFile.new(filename)
|
@@ -810,7 +1039,9 @@ module Bio
|
|
810
1039
|
end #class SecondaryNameSpace
|
811
1040
|
end #module Flat_1
|
812
1041
|
|
813
|
-
|
1042
|
+
# namespaces
|
1043
|
+
#
|
1044
|
+
# Internal use only.
|
814
1045
|
class NameSpaces < Hash
|
815
1046
|
def initialize(dbname, nsclass, arg)
|
816
1047
|
@dbname = dbname
|
@@ -872,6 +1103,9 @@ module Bio
|
|
872
1103
|
end
|
873
1104
|
end #class NameSpaces
|
874
1105
|
|
1106
|
+
# databank
|
1107
|
+
#
|
1108
|
+
# Internal use only.
|
875
1109
|
class DataBank
|
876
1110
|
def self.file2hash(fileobj)
|
877
1111
|
hash = {}
|
@@ -1135,174 +1369,3 @@ module Bio
|
|
1135
1369
|
end #class FlatFileIndex
|
1136
1370
|
end #module Bio
|
1137
1371
|
|
1138
|
-
######################################################################
|
1139
|
-
|
1140
|
-
=begin
|
1141
|
-
|
1142
|
-
= Bio::FlatFileIndex
|
1143
|
-
|
1144
|
-
--- Bio::FlatFileIndex.new(dbname)
|
1145
|
-
--- Bio::FlatFileIndex.open(dbname)
|
1146
|
-
|
1147
|
-
Opens existing databank. Databank is a directory which contains
|
1148
|
-
indexed files and configuration files. The type of the databank
|
1149
|
-
(flat or BerkeleyDB) are determined automatically.
|
1150
|
-
|
1151
|
-
--- Bio::FlatFileIndex#close
|
1152
|
-
|
1153
|
-
Closes opened databank.
|
1154
|
-
|
1155
|
-
--- Bio::FlatFileIndex#closed?
|
1156
|
-
|
1157
|
-
Returns true if already closed. Otherwise, returns false.
|
1158
|
-
|
1159
|
-
--- Bio::FlatFileIndex#get_by_id(key)
|
1160
|
-
|
1161
|
-
Common interface defined in registry.rb.
|
1162
|
-
Searching databank and returns entry (or entries) as a string.
|
1163
|
-
Multiple entries (contatinated to one string) may be returned.
|
1164
|
-
Returns empty string If not found.
|
1165
|
-
|
1166
|
-
--- Bio::FlatFileIndex#search(key)
|
1167
|
-
|
1168
|
-
Searching databank and returns a Bio::FlatFileIndex::Results object.
|
1169
|
-
|
1170
|
-
--- Bio::FlatFileIndex#include?(key)
|
1171
|
-
|
1172
|
-
Searching databank.
|
1173
|
-
If found, returns an array of unique IDs (primary identifiers).
|
1174
|
-
If not found, returns nil.
|
1175
|
-
|
1176
|
-
--- Bio::FlatFileIndex#search_primary(key)
|
1177
|
-
|
1178
|
-
Searching only primary namespece.
|
1179
|
-
Returns a Bio::FlatFileIndex::Results object.
|
1180
|
-
|
1181
|
-
--- Bio::FlatFileIndex#search_namespaces(key, name1, name2, ...)
|
1182
|
-
|
1183
|
-
Searching only specific namespeces.
|
1184
|
-
Returns a Bio::FlatFileIndex::Results object.
|
1185
|
-
|
1186
|
-
--- Bio::FlatFileIndex#include_in_primary?(key)
|
1187
|
-
|
1188
|
-
Same as #include?, but serching only primary namespace.
|
1189
|
-
|
1190
|
-
--- Bio::FlatFileIndex#include_in_namespaces?(key, name1, name2, ...)
|
1191
|
-
|
1192
|
-
Same as #include?, but serching only specific namespaces.
|
1193
|
-
|
1194
|
-
--- Bio::FlatFileIndex#namespaces
|
1195
|
-
|
1196
|
-
Returns names of namespaces defined in the databank.
|
1197
|
-
(example: [ 'LOCUS', 'ACCESSION', 'VERSION' ] )
|
1198
|
-
|
1199
|
-
--- Bio::FlatFileIndex#primary_namespace
|
1200
|
-
|
1201
|
-
Returns name of primary namespace.
|
1202
|
-
|
1203
|
-
--- Bio::FlatFileIndex#secondary_namespaces
|
1204
|
-
|
1205
|
-
Returns names of secondary namespaces.
|
1206
|
-
|
1207
|
-
--- Bio::FlatFileIndex#default_namespaces= [ str1, str2, ... ]
|
1208
|
-
--- Bio::FlatFileIndex#default_namespaces= nil
|
1209
|
-
|
1210
|
-
Set default namespaces.
|
1211
|
-
nil means all namespaces in the databank.
|
1212
|
-
Default namespaces specified in this method only affect
|
1213
|
-
#get_by_id, #search, and #include? methods.
|
1214
|
-
Default of default namespaces is nil (that is, all namespaces
|
1215
|
-
are search destinations by default).
|
1216
|
-
|
1217
|
-
--- Bio::FlatFileIndex#default_namespaces
|
1218
|
-
|
1219
|
-
Returns default namespaces.
|
1220
|
-
nil means all namespaces.
|
1221
|
-
|
1222
|
-
--- Bio::FlatFileIndex#check_consistency
|
1223
|
-
|
1224
|
-
Raise RuntimeError if flatfiles are changed after creating
|
1225
|
-
the databank. (This check only compare file sizes as
|
1226
|
-
described in the OBDA specification.)
|
1227
|
-
|
1228
|
-
--- Bio::FlatFileIndex#always_check_consistency=(bool)
|
1229
|
-
--- Bio::FlatFileIndex#always_check_consistency
|
1230
|
-
|
1231
|
-
If true, consistency checks are performed every time
|
1232
|
-
accessing flatfiles. If nil/false, no checks are performed.
|
1233
|
-
Default of always_check_consistency is true.
|
1234
|
-
|
1235
|
-
== Bio::FlatFileIndex::Results
|
1236
|
-
|
1237
|
-
This object is made by Bio::FlatFileIndex methods.
|
1238
|
-
Currently, this class inherits Hash, but internal
|
1239
|
-
structure of this class may be changed anytime.
|
1240
|
-
Only using methods described below are strongly recomended.
|
1241
|
-
|
1242
|
-
--- Bio::FlatFileIndex::Results#to_a
|
1243
|
-
|
1244
|
-
Returns an array of strings.
|
1245
|
-
If no search results are exist, returns an empty array.
|
1246
|
-
|
1247
|
-
--- Bio::FlatFileIndex::Results#each
|
1248
|
-
|
1249
|
-
Iterates over each result(string).
|
1250
|
-
Same as to_a.each.
|
1251
|
-
|
1252
|
-
--- Bio::FlatFileIndex::Results#to_s
|
1253
|
-
|
1254
|
-
Returns a string. (concatinated if multiple results exists).
|
1255
|
-
Same as to_a.join('').
|
1256
|
-
|
1257
|
-
--- Bio::FlatFileIndex::Results#size
|
1258
|
-
|
1259
|
-
Returns number of results.
|
1260
|
-
Same as to_a.size.
|
1261
|
-
|
1262
|
-
--- Bio::FlatFileIndex::Results#+(res)
|
1263
|
-
|
1264
|
-
Add search results.
|
1265
|
-
"a + b" means "a OR b".
|
1266
|
-
* Example
|
1267
|
-
# I want to search 'ADH_IRON_1' OR 'ADH_IRON_2'
|
1268
|
-
db = Bio::FlatFIleIndex.new(location)
|
1269
|
-
a1 = db.search('ADH_IRON_1')
|
1270
|
-
a2 = db.search('ADH_IRON_2')
|
1271
|
-
# a1 and a2 are Bio::FlatFileIndex::Results objects.
|
1272
|
-
print a1 + a2
|
1273
|
-
|
1274
|
-
--- Bio::FlatFileIndex::Results#*(res)
|
1275
|
-
|
1276
|
-
Returns set intersection of results.
|
1277
|
-
"a * b" means "a AND b".
|
1278
|
-
* Example
|
1279
|
-
# I want to search 'HIS_KIN' AND 'human'
|
1280
|
-
db = Bio::FlatFIleIndex.new(location)
|
1281
|
-
hk = db.search('HIS_KIN')
|
1282
|
-
hu = db.search('human')
|
1283
|
-
# hk and hu are Bio::FlatFileIndex::Results objects.
|
1284
|
-
print hk * hu
|
1285
|
-
|
1286
|
-
== Bio::FlatFileIndex::DEBUG
|
1287
|
-
|
1288
|
-
Module for output debug messages.
|
1289
|
-
Default setting: If $DEBUG or $VERBOSE is true, output debug
|
1290
|
-
messages to STDERR; Otherwise, don't output messages.
|
1291
|
-
|
1292
|
-
--- Bio::FlatFileIndex::DEBUG.out=(io)
|
1293
|
-
|
1294
|
-
Set debug messages output destination.
|
1295
|
-
If true is given, outputs to STDERR.
|
1296
|
-
If nil is given, outputs nothing.
|
1297
|
-
This method affects ALL of FlatFileIndex related objects/methods.
|
1298
|
-
|
1299
|
-
== Other classes/modules
|
1300
|
-
|
1301
|
-
Classes/modules not described in this file are internal use only.
|
1302
|
-
|
1303
|
-
== SEE ALSO
|
1304
|
-
|
1305
|
-
* ((<URL:http://obda.open-bio.org/>))
|
1306
|
-
* ((<URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/?cvsroot=obf-common>))
|
1307
|
-
|
1308
|
-
=end
|