bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
|
@@ -0,0 +1,778 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/io/flatfile/indexer.rb - OBDA flatfile indexer
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2002 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: indexer.rb,v 1.21 2005/09/26 13:00:08 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
require 'bio/io/flatfile/index'
|
|
24
|
+
|
|
25
|
+
module Bio
|
|
26
|
+
class FlatFileIndex
|
|
27
|
+
|
|
28
|
+
module Indexer
|
|
29
|
+
|
|
30
|
+
class NameSpace
|
|
31
|
+
def initialize(name, method)
|
|
32
|
+
@name = name
|
|
33
|
+
@proc = method
|
|
34
|
+
end
|
|
35
|
+
attr_reader :name, :proc
|
|
36
|
+
end #class NameSpace
|
|
37
|
+
|
|
38
|
+
class NameSpaces < Hash
|
|
39
|
+
def initialize(*arg)
|
|
40
|
+
super()
|
|
41
|
+
arg.each do |x|
|
|
42
|
+
self.store(x.name, x)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
def names
|
|
46
|
+
self.keys
|
|
47
|
+
end
|
|
48
|
+
def <<(x)
|
|
49
|
+
self.store(x.name, x)
|
|
50
|
+
end
|
|
51
|
+
def add(x)
|
|
52
|
+
self.store(x.name, x)
|
|
53
|
+
end
|
|
54
|
+
#alias each_orig each
|
|
55
|
+
alias each each_value
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
module Parser
|
|
59
|
+
def self.new(format, *arg)
|
|
60
|
+
case format.to_s
|
|
61
|
+
when 'embl', 'Bio::EMBL'
|
|
62
|
+
EMBLParser.new(*arg)
|
|
63
|
+
when 'swiss', 'Bio::SPTR', 'Bio::TrEMBL', 'Bio::SwissProt'
|
|
64
|
+
SPTRParser.new(*arg)
|
|
65
|
+
when 'genbank', 'Bio::GenBank', 'Bio::RefSeq', 'Bio::DDBJ'
|
|
66
|
+
GenBankParser.new(*arg)
|
|
67
|
+
when 'Bio::GenPept'
|
|
68
|
+
GenPeptParser.new(*arg)
|
|
69
|
+
when 'fasta', 'Bio::FastaFormat'
|
|
70
|
+
FastaFormatParser.new(*arg)
|
|
71
|
+
when 'Bio::FANTOM::MaXML::Sequence'
|
|
72
|
+
MaXMLSequenceParser.new(*arg)
|
|
73
|
+
when 'Bio::FANTOM::MaXML::Cluster'
|
|
74
|
+
MaXMLClusterParser.new(*arg)
|
|
75
|
+
when 'Bio::Blast::Default::Report'
|
|
76
|
+
BlastDefaultParser.new(Bio::Blast::Default::Report, *arg)
|
|
77
|
+
when 'Bio::Blast::Default::Report_TBlast'
|
|
78
|
+
BlastDefaultParser.new(Bio::Blast::Default::Report_TBlast, *arg)
|
|
79
|
+
when 'Bio::Blast::WU::Report'
|
|
80
|
+
BlastDefaultParser.new(Bio::Blast::WU::Report, *arg)
|
|
81
|
+
when 'Bio::Blast::WU::Report_TBlast'
|
|
82
|
+
BlastDefaultParser.new(Bio::Blast::WU::Report_TBlast, *arg)
|
|
83
|
+
else
|
|
84
|
+
raise 'unknown or unsupported format'
|
|
85
|
+
end #case dbclass.to_s
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
class TemplateParser
|
|
89
|
+
NAMESTYLE = NameSpaces.new
|
|
90
|
+
def initialize
|
|
91
|
+
@namestyle = self.class::NAMESTYLE
|
|
92
|
+
@secondary = NameSpaces.new
|
|
93
|
+
@errorlog = []
|
|
94
|
+
end
|
|
95
|
+
attr_reader :primary, :secondary, :format, :dbclass
|
|
96
|
+
attr_reader :errorlog
|
|
97
|
+
|
|
98
|
+
def set_primary_namespace(name)
|
|
99
|
+
DEBUG.print "set_primary_namespace: #{name.inspect}\n"
|
|
100
|
+
if name.is_a?(NameSpace) then
|
|
101
|
+
@primary = name
|
|
102
|
+
else
|
|
103
|
+
@primary = @namestyle[name]
|
|
104
|
+
end
|
|
105
|
+
raise 'unknown primary namespace' unless @primary
|
|
106
|
+
@primary
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def add_secondary_namespaces(*names)
|
|
110
|
+
DEBUG.print "add_secondary_namespaces: #{names.inspect}\n"
|
|
111
|
+
names.each do |x|
|
|
112
|
+
unless x.is_a?(NameSpace) then
|
|
113
|
+
y = @namestyle[x]
|
|
114
|
+
raise 'unknown secondary namespace' unless y
|
|
115
|
+
@secondary << y
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
true
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# administration of a single flatfile
|
|
122
|
+
def open_flatfile(fileid, file)
|
|
123
|
+
@fileid = fileid
|
|
124
|
+
@flatfilename = file
|
|
125
|
+
DEBUG.print "fileid=#{fileid} file=#{@flatfilename.inspect}\n"
|
|
126
|
+
@flatfile = Bio::FlatFile.open(@dbclass, file, 'rb')
|
|
127
|
+
@flatfile.raw = nil
|
|
128
|
+
@entry = nil
|
|
129
|
+
end
|
|
130
|
+
attr_reader :fileid
|
|
131
|
+
|
|
132
|
+
def each
|
|
133
|
+
pos = @flatfile.pos
|
|
134
|
+
@flatfile.each do |x|
|
|
135
|
+
@entry = x
|
|
136
|
+
len = @flatfile.entry_raw.length
|
|
137
|
+
begin
|
|
138
|
+
yield pos, len
|
|
139
|
+
rescue RuntimeError, NameError => evar
|
|
140
|
+
DEBUG.print "Caught error: #{evar.inspect}\n"
|
|
141
|
+
DEBUG.print "in #{@flatfilename.inspect} position #{pos}\n"
|
|
142
|
+
DEBUG.print "===begin===\n"
|
|
143
|
+
DEBUG.print @flatfile.entry_raw.to_s.chomp
|
|
144
|
+
DEBUG.print "\n===end===\n"
|
|
145
|
+
@errorlog << [ evar, @flatfilename, pos ]
|
|
146
|
+
if @fatal then
|
|
147
|
+
DEBUG.print "Fatal error occurred, stop creating index...\n"
|
|
148
|
+
raise evar
|
|
149
|
+
else
|
|
150
|
+
DEBUG.print "This entry shall be incorrectly indexed.\n"
|
|
151
|
+
end
|
|
152
|
+
end #rescue
|
|
153
|
+
pos = @flatfile.pos
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def parse_primary
|
|
158
|
+
r = self.primary.proc.call(@entry)
|
|
159
|
+
unless r.is_a?(String) and r.length > 0
|
|
160
|
+
#@fatal = true
|
|
161
|
+
raise 'primary id must be a non-void string (skipped this entry)'
|
|
162
|
+
end
|
|
163
|
+
r
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def parse_secondary
|
|
167
|
+
self.secondary.each do |x|
|
|
168
|
+
p = x.proc.call(@entry)
|
|
169
|
+
p.each do |y|
|
|
170
|
+
yield x.name, y if y.length > 0
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def close_flatfile
|
|
176
|
+
DEBUG.print "close flatfile #{@flatfilename.inspect}\n"
|
|
177
|
+
@flatfile.close
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
protected
|
|
181
|
+
attr_writer :format, :dbclass
|
|
182
|
+
end #class TemplateParser
|
|
183
|
+
|
|
184
|
+
class GenBankParser < TemplateParser
|
|
185
|
+
NAMESTYLE = NameSpaces.new(
|
|
186
|
+
NameSpace.new( 'VERSION', Proc.new { |x| x.acc_version } ),
|
|
187
|
+
NameSpace.new( 'LOCUS', Proc.new { |x| x.entry_id } ),
|
|
188
|
+
NameSpace.new( 'ACCESSION',
|
|
189
|
+
Proc.new { |x| x.accessions } ),
|
|
190
|
+
NameSpace.new( 'GI', Proc.new { |x|
|
|
191
|
+
x.gi.to_s.gsub(/\AGI\:/, '') } )
|
|
192
|
+
)
|
|
193
|
+
PRIMARY = 'VERSION'
|
|
194
|
+
def initialize(pri_name = nil, sec_names = nil)
|
|
195
|
+
super()
|
|
196
|
+
self.format = 'genbank'
|
|
197
|
+
self.dbclass = Bio::GenBank
|
|
198
|
+
self.set_primary_namespace((pri_name or PRIMARY))
|
|
199
|
+
unless sec_names then
|
|
200
|
+
sec_names = []
|
|
201
|
+
@namestyle.each_value do |x|
|
|
202
|
+
sec_names << x.name if x.name != self.primary.name
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
self.add_secondary_namespaces(*sec_names)
|
|
206
|
+
end
|
|
207
|
+
def open_flatfile(fileid, file)
|
|
208
|
+
super
|
|
209
|
+
@flatfile.pos = 0
|
|
210
|
+
begin
|
|
211
|
+
pos = @flatfile.pos
|
|
212
|
+
line = @flatfile.gets
|
|
213
|
+
end until (!line or line =~ /^LOCUS /)
|
|
214
|
+
@flatfile.pos = pos
|
|
215
|
+
end
|
|
216
|
+
end #class GenBankParser
|
|
217
|
+
|
|
218
|
+
class GenPeptParser < GenBankParser
|
|
219
|
+
def initialize(*arg)
|
|
220
|
+
super(*arg)
|
|
221
|
+
self.dbclass = Bio::GenPept
|
|
222
|
+
end
|
|
223
|
+
end #class GenPeptParser
|
|
224
|
+
|
|
225
|
+
class EMBLParser < TemplateParser
|
|
226
|
+
NAMESTYLE = NameSpaces.new(
|
|
227
|
+
NameSpace.new( 'ID', Proc.new { |x| x.entry_id } ),
|
|
228
|
+
NameSpace.new( 'AC', Proc.new { |x| x.accessions } ),
|
|
229
|
+
NameSpace.new( 'SV', Proc.new { |x| x.sv } ),
|
|
230
|
+
NameSpace.new( 'DR', Proc.new { |x|
|
|
231
|
+
y = []
|
|
232
|
+
x.dr.each_value { |z| y << z }
|
|
233
|
+
y.flatten!
|
|
234
|
+
y.find_all { |z| z.length > 1 } }
|
|
235
|
+
)
|
|
236
|
+
)
|
|
237
|
+
PRIMARY = 'ID'
|
|
238
|
+
SECONDARY = [ 'AC', 'SV' ]
|
|
239
|
+
def initialize(pri_name = nil, sec_names = nil)
|
|
240
|
+
super()
|
|
241
|
+
self.format = 'embl'
|
|
242
|
+
self.dbclass = Bio::EMBL
|
|
243
|
+
self.set_primary_namespace((pri_name or PRIMARY))
|
|
244
|
+
unless sec_names then
|
|
245
|
+
sec_names = self.class::SECONDARY
|
|
246
|
+
end
|
|
247
|
+
self.add_secondary_namespaces(*sec_names)
|
|
248
|
+
end
|
|
249
|
+
end #class EMBLParser
|
|
250
|
+
|
|
251
|
+
class SPTRParser < EMBLParser
|
|
252
|
+
SECONDARY = [ 'AC' ]
|
|
253
|
+
def initialize(*arg)
|
|
254
|
+
super(*arg)
|
|
255
|
+
self.format = 'swiss'
|
|
256
|
+
self.dbclass = Bio::SPTR
|
|
257
|
+
end
|
|
258
|
+
end #class SPTRParser
|
|
259
|
+
|
|
260
|
+
class FastaFormatParser < TemplateParser
|
|
261
|
+
NAMESTYLE = NameSpaces.new(
|
|
262
|
+
NameSpace.new( 'UNIQUE', nil ),
|
|
263
|
+
NameSpace.new( 'entry_id', Proc.new { |x| x.entry_id } ),
|
|
264
|
+
NameSpace.new( 'accession', Proc.new { |x| x.accessions } ),
|
|
265
|
+
NameSpace.new( 'id_string', Proc.new { |x|
|
|
266
|
+
x.identifiers.id_strings
|
|
267
|
+
}),
|
|
268
|
+
NameSpace.new( 'word', Proc.new { |x|
|
|
269
|
+
x.identifiers.words
|
|
270
|
+
})
|
|
271
|
+
)
|
|
272
|
+
PRIMARY = 'UNIQUE'
|
|
273
|
+
SECONDARY = [ 'entry_id', 'accession', 'id_string', 'word' ]
|
|
274
|
+
|
|
275
|
+
def unique_primary_key
|
|
276
|
+
r = "#{@flatfilename}:#{@count}"
|
|
277
|
+
@count += 1
|
|
278
|
+
r
|
|
279
|
+
end
|
|
280
|
+
private :unique_primary_key
|
|
281
|
+
|
|
282
|
+
def parse_primary
|
|
283
|
+
if p = self.primary.proc then
|
|
284
|
+
r = p.call(@entry)
|
|
285
|
+
unless r.is_a?(String) and r.length > 0
|
|
286
|
+
#@fatal = true
|
|
287
|
+
raise 'primary id must be a non-void string (skipped this entry)'
|
|
288
|
+
end
|
|
289
|
+
r
|
|
290
|
+
else
|
|
291
|
+
unique_primary_key
|
|
292
|
+
end
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
def initialize(pri_name = nil, sec_names = nil)
|
|
296
|
+
super()
|
|
297
|
+
self.format = 'fasta'
|
|
298
|
+
self.dbclass = Bio::FastaFormat
|
|
299
|
+
self.set_primary_namespace((pri_name or PRIMARY))
|
|
300
|
+
unless sec_names then
|
|
301
|
+
sec_names = self.class::SECONDARY
|
|
302
|
+
end
|
|
303
|
+
self.add_secondary_namespaces(*sec_names)
|
|
304
|
+
end
|
|
305
|
+
def open_flatfile(fileid, file)
|
|
306
|
+
super
|
|
307
|
+
@count = 1
|
|
308
|
+
@flatfilename_base = File.basename(@flatfilename)
|
|
309
|
+
@flatfile.pos = 0
|
|
310
|
+
begin
|
|
311
|
+
pos = @flatfile.pos
|
|
312
|
+
line = @flatfile.gets
|
|
313
|
+
end until (!line or line =~ /^\>/)
|
|
314
|
+
@flatfile.pos = pos
|
|
315
|
+
end
|
|
316
|
+
end #class FastaFormatParser
|
|
317
|
+
|
|
318
|
+
class MaXMLSequenceParser < TemplateParser
|
|
319
|
+
NAMESTYLE = NameSpaces.new(
|
|
320
|
+
NameSpace.new( 'id', Proc.new { |x| x.entry_id } ),
|
|
321
|
+
NameSpace.new( 'altid', Proc.new { |x| x.id_strings } ),
|
|
322
|
+
NameSpace.new( 'gene_ontology', Proc.new { |x|
|
|
323
|
+
x.annotations.get_all_by_qualifier('gene_ontology').collect { |y|
|
|
324
|
+
y.anntext
|
|
325
|
+
}
|
|
326
|
+
}),
|
|
327
|
+
NameSpace.new( 'datasrc', Proc.new { |x|
|
|
328
|
+
a = []
|
|
329
|
+
x.annotations.each { |y|
|
|
330
|
+
y.datasrc.each { |z|
|
|
331
|
+
a << z.split('|',2)[-1]
|
|
332
|
+
a << z
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
a.sort!
|
|
336
|
+
a.uniq!
|
|
337
|
+
a
|
|
338
|
+
})
|
|
339
|
+
)
|
|
340
|
+
PRIMARY = 'id'
|
|
341
|
+
SECONDARY = [ 'altid', 'gene_ontology', 'datasrc' ]
|
|
342
|
+
def initialize(pri_name = nil, sec_names = nil)
|
|
343
|
+
super()
|
|
344
|
+
self.format = 'raw'
|
|
345
|
+
self.dbclass = Bio::FANTOM::MaXML::Sequence
|
|
346
|
+
self.set_primary_namespace((pri_name or PRIMARY))
|
|
347
|
+
unless sec_names then
|
|
348
|
+
sec_names = self.class::SECONDARY
|
|
349
|
+
end
|
|
350
|
+
self.add_secondary_namespaces(*sec_names)
|
|
351
|
+
end
|
|
352
|
+
end #class MaXMLSequenceParser
|
|
353
|
+
|
|
354
|
+
class MaXMLClusterParser < TemplateParser
|
|
355
|
+
NAMESTYLE = NameSpaces.new(
|
|
356
|
+
NameSpace.new( 'id', Proc.new { |x| x.entry_id } ),
|
|
357
|
+
NameSpace.new( 'altid', Proc.new { |x| x.sequences.id_strings } ),
|
|
358
|
+
NameSpace.new( 'datasrc', Proc.new { |x|
|
|
359
|
+
a = x.sequences.collect { |y|
|
|
360
|
+
MaXMLSequenceParser::NAMESTYLE['datasrc'].proc.call(y)
|
|
361
|
+
}
|
|
362
|
+
a.flatten!
|
|
363
|
+
a.sort!
|
|
364
|
+
a.uniq!
|
|
365
|
+
a
|
|
366
|
+
}),
|
|
367
|
+
NameSpace.new( 'gene_ontology', Proc.new { |x|
|
|
368
|
+
a = x.sequences.collect { |y|
|
|
369
|
+
MaXMLSequenceParser::NAMESTYLE['gene_ontology'].proc.call(y)
|
|
370
|
+
}
|
|
371
|
+
a.flatten!
|
|
372
|
+
a.sort!
|
|
373
|
+
a.uniq!
|
|
374
|
+
a
|
|
375
|
+
})
|
|
376
|
+
)
|
|
377
|
+
PRIMARY = 'id'
|
|
378
|
+
SECONDARY = [ 'altid', 'gene_ontology', 'datasrc' ]
|
|
379
|
+
def initialize(pri_name = nil, sec_names = nil)
|
|
380
|
+
super()
|
|
381
|
+
self.format = 'raw'
|
|
382
|
+
self.dbclass = Bio::FANTOM::MaXML::Cluster
|
|
383
|
+
self.set_primary_namespace((pri_name or PRIMARY))
|
|
384
|
+
unless sec_names then
|
|
385
|
+
sec_names = self.class::SECONDARY
|
|
386
|
+
end
|
|
387
|
+
self.add_secondary_namespaces(*sec_names)
|
|
388
|
+
end
|
|
389
|
+
end #class MaXMLSequenceParser
|
|
390
|
+
|
|
391
|
+
class BlastDefaultParser < TemplateParser
|
|
392
|
+
NAMESTYLE = NameSpaces.new(
|
|
393
|
+
NameSpace.new( 'QUERY', Proc.new { |x| x.query_def } ),
|
|
394
|
+
NameSpace.new( 'query_id', Proc.new { |x|
|
|
395
|
+
a = Bio::FastaDefline.new(x.query_def.to_s).id_strings
|
|
396
|
+
a << x.query_def.to_s.split(/\s+/,2)[0]
|
|
397
|
+
a
|
|
398
|
+
} ),
|
|
399
|
+
NameSpace.new( 'hit', Proc.new { |x|
|
|
400
|
+
a = x.hits.collect { |y|
|
|
401
|
+
b = Bio::FastaDefline.new(y.definition.to_s).id_strings
|
|
402
|
+
b << y.definition
|
|
403
|
+
b << y.definition.to_s.split(/\s+/,2)[0]
|
|
404
|
+
b
|
|
405
|
+
}
|
|
406
|
+
a.flatten!
|
|
407
|
+
a
|
|
408
|
+
} )
|
|
409
|
+
)
|
|
410
|
+
PRIMARY = 'QUERY'
|
|
411
|
+
SECONDARY = [ 'query_id', 'hit' ]
|
|
412
|
+
def initialize(klass, pri_name = nil, sec_names = nil)
|
|
413
|
+
super()
|
|
414
|
+
self.format = 'raw'
|
|
415
|
+
self.dbclass = klass
|
|
416
|
+
self.set_primary_namespace((pri_name or PRIMARY))
|
|
417
|
+
unless sec_names then
|
|
418
|
+
sec_names = []
|
|
419
|
+
@namestyle.each_value do |x|
|
|
420
|
+
sec_names << x.name if x.name != self.primary.name
|
|
421
|
+
end
|
|
422
|
+
end
|
|
423
|
+
self.add_secondary_namespaces(*sec_names)
|
|
424
|
+
end
|
|
425
|
+
def open_flatfile(fileid, file)
|
|
426
|
+
super
|
|
427
|
+
@flatfile.rewind
|
|
428
|
+
@flatfile.dbclass = nil
|
|
429
|
+
@flatfile.autodetect
|
|
430
|
+
@flatfile.dbclass = self.dbclass unless @flatfile.dbclass
|
|
431
|
+
@flatfile.rewind
|
|
432
|
+
begin
|
|
433
|
+
pos = @flatfile.pos
|
|
434
|
+
line = @flatfile.gets
|
|
435
|
+
end until (!line or line =~ /^T?BLAST/)
|
|
436
|
+
@flatfile.pos = pos
|
|
437
|
+
end
|
|
438
|
+
end #class BlastDefaultReportParser
|
|
439
|
+
|
|
440
|
+
end #module Parser
|
|
441
|
+
|
|
442
|
+
def self.makeindexBDB(name, parser, options, *files)
|
|
443
|
+
# options are not used in this method
|
|
444
|
+
unless defined?(BDB)
|
|
445
|
+
raise RuntimeError, "Berkeley DB support not found"
|
|
446
|
+
end
|
|
447
|
+
DEBUG.print "makeing BDB DataBank...\n"
|
|
448
|
+
db = DataBank.new(name, MAGIC_BDB)
|
|
449
|
+
db.format = parser.format
|
|
450
|
+
db.fileids.add(*files)
|
|
451
|
+
db.fileids.recalc
|
|
452
|
+
|
|
453
|
+
db.primary = parser.primary.name
|
|
454
|
+
db.secondary = parser.secondary.names
|
|
455
|
+
|
|
456
|
+
DEBUG.print "writing config.dat, config, fileids ...\n"
|
|
457
|
+
db.write('wb', BDBdefault::flag_write)
|
|
458
|
+
|
|
459
|
+
DEBUG.print "reading files...\n"
|
|
460
|
+
|
|
461
|
+
addindex_bdb(db, BDBdefault::flag_write, (0...(files.size)),
|
|
462
|
+
parser, options)
|
|
463
|
+
db.close
|
|
464
|
+
true
|
|
465
|
+
end #def
|
|
466
|
+
|
|
467
|
+
def self.addindex_bdb(db, flag, need_update, parser, options)
|
|
468
|
+
DEBUG.print "reading files...\n"
|
|
469
|
+
|
|
470
|
+
pn = db.primary
|
|
471
|
+
pn.file.close
|
|
472
|
+
pn.file.flag = flag
|
|
473
|
+
|
|
474
|
+
db.secondary.each_files do |x|
|
|
475
|
+
x.file.close
|
|
476
|
+
x.file.flag = flag
|
|
477
|
+
x.file.open
|
|
478
|
+
x.file.close
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
need_update.each do |fileid|
|
|
482
|
+
filename = db.fileids[fileid].filename
|
|
483
|
+
parser.open_flatfile(fileid, filename)
|
|
484
|
+
parser.each do |pos, len|
|
|
485
|
+
p = parser.parse_primary
|
|
486
|
+
#pn.file.add_exclusive(p, [ fileid, pos, len ])
|
|
487
|
+
pn.file.add_overwrite(p, [ fileid, pos, len ])
|
|
488
|
+
#DEBUG.print "#{p} #{fileid} #{pos} #{len}\n"
|
|
489
|
+
parser.parse_secondary do |sn, sp|
|
|
490
|
+
db.secondary[sn].file.add_nr(sp, p)
|
|
491
|
+
#DEBUG.print "#{sp} #{p}\n"
|
|
492
|
+
end
|
|
493
|
+
end
|
|
494
|
+
parser.close_flatfile
|
|
495
|
+
end
|
|
496
|
+
true
|
|
497
|
+
end #def
|
|
498
|
+
|
|
499
|
+
def self.makeindexFlat(name, parser, options, *files)
|
|
500
|
+
DEBUG.print "makeing flat/1 DataBank using temporary files...\n"
|
|
501
|
+
|
|
502
|
+
db = DataBank.new(name, nil)
|
|
503
|
+
db.format = parser.format
|
|
504
|
+
db.fileids.add(*files)
|
|
505
|
+
db.primary = parser.primary.name
|
|
506
|
+
db.secondary = parser.secondary.names
|
|
507
|
+
db.fileids.recalc
|
|
508
|
+
DEBUG.print "writing DabaBank...\n"
|
|
509
|
+
db.write('wb')
|
|
510
|
+
|
|
511
|
+
addindex_flat(db, :new, (0...(files.size)), parser, options)
|
|
512
|
+
db.close
|
|
513
|
+
true
|
|
514
|
+
end #def
|
|
515
|
+
|
|
516
|
+
def self.addindex_flat(db, mode, need_update, parser, options)
|
|
517
|
+
require 'tempfile'
|
|
518
|
+
prog = options['sort_program']
|
|
519
|
+
|
|
520
|
+
return false if need_update.to_a.size == 0
|
|
521
|
+
|
|
522
|
+
DEBUG.print "prepare temporary files...\n"
|
|
523
|
+
tempbase = "bioflat#{rand(10000)}-"
|
|
524
|
+
pfile = Tempfile.open(tempbase + 'primary-')
|
|
525
|
+
DEBUG.print "open temporary file #{pfile.path.inspect}\n"
|
|
526
|
+
sfiles = {}
|
|
527
|
+
parser.secondary.names.each do |x|
|
|
528
|
+
sfiles[x] = Tempfile.open(tempbase + 'secondary-')
|
|
529
|
+
DEBUG.print "open temporary file #{sfiles[x].path.inspect}\n"
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
DEBUG.print "reading files...\n"
|
|
533
|
+
need_update.each do |fileid|
|
|
534
|
+
filename = db.fileids[fileid].filename
|
|
535
|
+
parser.open_flatfile(fileid, filename)
|
|
536
|
+
parser.each do |pos, len|
|
|
537
|
+
p = parser.parse_primary
|
|
538
|
+
pfile << "#{p}\t#{fileid}\t#{pos}\t#{len}\n"
|
|
539
|
+
#DEBUG.print "#{p} #{fileid} #{pos} #{len}\n"
|
|
540
|
+
parser.parse_secondary do |sn, sp|
|
|
541
|
+
sfiles[sn] << "#{sp}\t#{p}\n"
|
|
542
|
+
#DEBUG.print "#{sp} #{p}\n"
|
|
543
|
+
end
|
|
544
|
+
end
|
|
545
|
+
parser.close_flatfile
|
|
546
|
+
fileid += 1
|
|
547
|
+
end
|
|
548
|
+
|
|
549
|
+
sort_proc = chose_sort_proc(prog, mode)
|
|
550
|
+
pfile.close(false)
|
|
551
|
+
DEBUG.print "sorting primary (#{parser.primary.name})...\n"
|
|
552
|
+
db.primary.file.import_tsv_files(true, mode, sort_proc, pfile.path)
|
|
553
|
+
pfile.close(true)
|
|
554
|
+
|
|
555
|
+
parser.secondary.names.each do |x|
|
|
556
|
+
DEBUG.print "sorting secondary (#{x})...\n"
|
|
557
|
+
sfiles[x].close(false)
|
|
558
|
+
db.secondary[x].file.import_tsv_files(false, mode, sort_proc,
|
|
559
|
+
sfiles[x].path)
|
|
560
|
+
sfiles[x].close(true)
|
|
561
|
+
end
|
|
562
|
+
true
|
|
563
|
+
end #def
|
|
564
|
+
|
|
565
|
+
DEFAULT_SORT = '/usr/bin/sort'
|
|
566
|
+
def self.chose_sort_proc(prog, mode = :new)
|
|
567
|
+
case prog
|
|
568
|
+
when /^builtin$/i, /^hs$/i, /^lm$/i
|
|
569
|
+
DEBUG.print "sort: internal sort routine\n"
|
|
570
|
+
sort_proc = mapfile.internal_sort_proc
|
|
571
|
+
when nil, ''
|
|
572
|
+
if FileTest.executable?(DEFAULT_SORT)
|
|
573
|
+
DEBUG.print "sort: #{DEFAULT_SORT}\n"
|
|
574
|
+
if mode == :new then
|
|
575
|
+
sort_proc = Flat_1::FlatMappingFile::external_sort_proc(DEFAULT_SORT)
|
|
576
|
+
else
|
|
577
|
+
sort_proc = Flat_1::FlatMappingFile::external_merge_sort_proc(DEFAULT_SORT)
|
|
578
|
+
end
|
|
579
|
+
else
|
|
580
|
+
DEBUG.print "sort: internal sort routine\n"
|
|
581
|
+
sort_proc = Flat_1::FlatMappingFile::internal_sort_proc
|
|
582
|
+
end
|
|
583
|
+
else
|
|
584
|
+
DEBUG.print "sort: #{prog}\n"
|
|
585
|
+
if mode == :new then
|
|
586
|
+
sort_proc = Flat_1::FlatMappingFile::external_sort_proc(prog)
|
|
587
|
+
else
|
|
588
|
+
sort_proc = Flat_1::FlatMappingFile::external_merge_sort_proc(prog)
|
|
589
|
+
end
|
|
590
|
+
end
|
|
591
|
+
sort_proc
|
|
592
|
+
end
|
|
593
|
+
|
|
594
|
+
def self.update_index(name, parser, options, *files)
|
|
595
|
+
db = DataBank.open(name)
|
|
596
|
+
|
|
597
|
+
if parser then
|
|
598
|
+
raise 'file format mismatch' if db.format != parser.format
|
|
599
|
+
else
|
|
600
|
+
|
|
601
|
+
begin
|
|
602
|
+
dbclass_orig =
|
|
603
|
+
Bio::FlatFile.autodetect_file(db.fileids[0].filename)
|
|
604
|
+
rescue TypeError, Errno::ENOENT
|
|
605
|
+
end
|
|
606
|
+
begin
|
|
607
|
+
dbclass_new =
|
|
608
|
+
Bio::FlatFile.autodetect_file(files[0])
|
|
609
|
+
rescue TypeError, Errno::ENOENT
|
|
610
|
+
end
|
|
611
|
+
|
|
612
|
+
case db.format
|
|
613
|
+
when 'swiss', 'embl'
|
|
614
|
+
parser = Parser.new(db.format)
|
|
615
|
+
if dbclass_new and dbclass_new != parser.dbclass
|
|
616
|
+
raise 'file format mismatch'
|
|
617
|
+
end
|
|
618
|
+
when 'genbank'
|
|
619
|
+
dbclass = dbclass_orig or dbclass_new
|
|
620
|
+
if dbclass == Bio::GenBank or dbclass == Bio::GenPept
|
|
621
|
+
parser = Parser.new(dbclass_orig)
|
|
622
|
+
elsif !dbclass then
|
|
623
|
+
raise 'cannnot determine format. please specify manually.'
|
|
624
|
+
else
|
|
625
|
+
raise 'file format mismatch'
|
|
626
|
+
end
|
|
627
|
+
if dbclass_new and dbclass_new != parser.dbclass
|
|
628
|
+
raise 'file format mismatch'
|
|
629
|
+
end
|
|
630
|
+
else
|
|
631
|
+
raise 'unsupported format'
|
|
632
|
+
end
|
|
633
|
+
end
|
|
634
|
+
|
|
635
|
+
parser.set_primary_namespace(db.primary.name)
|
|
636
|
+
parser.add_secondary_namespaces(*db.secondary.names)
|
|
637
|
+
|
|
638
|
+
if options['renew'] then
|
|
639
|
+
newfiles = db.fileids.filenames.find_all do |x|
|
|
640
|
+
FileTest.exist?(x)
|
|
641
|
+
end
|
|
642
|
+
newfiles.concat(files)
|
|
643
|
+
newfiles2 = newfiles.sort
|
|
644
|
+
newfiles2.uniq!
|
|
645
|
+
newfiles3 = []
|
|
646
|
+
newfiles.each do |x|
|
|
647
|
+
newfiles3 << x if newfiles2.delete(x)
|
|
648
|
+
end
|
|
649
|
+
t = db.index_type
|
|
650
|
+
db.close
|
|
651
|
+
case t
|
|
652
|
+
when MAGIC_BDB
|
|
653
|
+
Indexer::makeindexBDB(name, parser, options, *newfiles3)
|
|
654
|
+
when MAGIC_FLAT
|
|
655
|
+
Indexer::makeindexFlat(name, parser, options, *newfiles3)
|
|
656
|
+
else
|
|
657
|
+
raise 'Unsupported index type'
|
|
658
|
+
end
|
|
659
|
+
return true
|
|
660
|
+
end
|
|
661
|
+
|
|
662
|
+
need_update = []
|
|
663
|
+
newfiles = files.dup
|
|
664
|
+
db.fileids.cache_all
|
|
665
|
+
db.fileids.each_with_index do |f, i|
|
|
666
|
+
need_update << i unless f.check
|
|
667
|
+
newfiles.delete(f.filename)
|
|
668
|
+
end
|
|
669
|
+
|
|
670
|
+
b = db.fileids.size
|
|
671
|
+
begin
|
|
672
|
+
db.fileids.recalc
|
|
673
|
+
rescue Errno::ENOENT => evar
|
|
674
|
+
DEBUG.print "Error: #{evar}\n"
|
|
675
|
+
DEBUG.print "assumed --renew option\n"
|
|
676
|
+
db.close
|
|
677
|
+
options = options.dup
|
|
678
|
+
options['renew'] = true
|
|
679
|
+
update_index(name, parser, options, *files)
|
|
680
|
+
return true
|
|
681
|
+
end
|
|
682
|
+
# add new files
|
|
683
|
+
db.fileids.add(*newfiles)
|
|
684
|
+
db.fileids.recalc
|
|
685
|
+
|
|
686
|
+
need_update.concat((b...(b + newfiles.size)).to_a)
|
|
687
|
+
|
|
688
|
+
DEBUG.print "writing DabaBank...\n"
|
|
689
|
+
db.write('wb', BDBdefault::flag_append)
|
|
690
|
+
|
|
691
|
+
case db.index_type
|
|
692
|
+
when MAGIC_BDB
|
|
693
|
+
addindex_bdb(db, BDBdefault::flag_append,
|
|
694
|
+
need_update, parser, options)
|
|
695
|
+
when MAGIC_FLAT
|
|
696
|
+
addindex_flat(db, :add, need_update, parser, options)
|
|
697
|
+
else
|
|
698
|
+
raise 'Unsupported index type'
|
|
699
|
+
end
|
|
700
|
+
|
|
701
|
+
db.close
|
|
702
|
+
true
|
|
703
|
+
end #def
|
|
704
|
+
end #module Indexer
|
|
705
|
+
|
|
706
|
+
##############################################################
|
|
707
|
+
def self.formatstring2class(format_string)
|
|
708
|
+
case format
|
|
709
|
+
when /genbank/i
|
|
710
|
+
dbclass = Bio::GenBank
|
|
711
|
+
when /genpept/i
|
|
712
|
+
dbclass = Bio::GenPept
|
|
713
|
+
when /embl/i
|
|
714
|
+
dbclass = Bio::EMBL
|
|
715
|
+
when /sptr/i
|
|
716
|
+
dbclass = Bio::SPTR
|
|
717
|
+
when /fasta/i
|
|
718
|
+
dbclass = Bio::FastaFormat
|
|
719
|
+
else
|
|
720
|
+
raise "Unsupported format : #{format}"
|
|
721
|
+
end
|
|
722
|
+
end
|
|
723
|
+
|
|
724
|
+
def self.makeindex(is_bdb, dbname, format, options, *files)
|
|
725
|
+
if format then
|
|
726
|
+
dbclass = formatstring2class(format)
|
|
727
|
+
else
|
|
728
|
+
dbclass = Bio::FlatFile.autodetect_file(files[0])
|
|
729
|
+
raise "Cannot determine format" unless dbclass
|
|
730
|
+
DEBUG.print "file format is #{dbclass}\n"
|
|
731
|
+
end
|
|
732
|
+
|
|
733
|
+
options = {} unless options
|
|
734
|
+
pns = options['primary_namespace']
|
|
735
|
+
sns = options['secondary_namespaces']
|
|
736
|
+
|
|
737
|
+
parser = Indexer::Parser.new(dbclass, pns, sns)
|
|
738
|
+
|
|
739
|
+
#if /(EMBL|SPTR)/ =~ dbclass.to_s then
|
|
740
|
+
#a = [ 'DR' ]
|
|
741
|
+
#parser.add_secondary_namespaces(*a)
|
|
742
|
+
#end
|
|
743
|
+
if sns = options['additional_secondary_namespaces'] then
|
|
744
|
+
parser.add_secondary_namespaces(*sns)
|
|
745
|
+
end
|
|
746
|
+
|
|
747
|
+
if is_bdb then
|
|
748
|
+
Indexer::makeindexBDB(dbname, parser, options, *files)
|
|
749
|
+
else
|
|
750
|
+
Indexer::makeindexFlat(dbname, parser, options, *files)
|
|
751
|
+
end
|
|
752
|
+
end #def makeindex
|
|
753
|
+
|
|
754
|
+
def self.update_index(dbname, format, options, *files)
|
|
755
|
+
if format then
|
|
756
|
+
parser = Indexer::Parser.new(dbclass)
|
|
757
|
+
else
|
|
758
|
+
parser = nil
|
|
759
|
+
end
|
|
760
|
+
Indexer::update_index(dbname, parser, options, *files)
|
|
761
|
+
end #def update_index
|
|
762
|
+
|
|
763
|
+
end #class FlatFileIndex
|
|
764
|
+
end #module Bio
|
|
765
|
+
|
|
766
|
+
=begin
|
|
767
|
+
|
|
768
|
+
= Bio::FlatFile
|
|
769
|
+
|
|
770
|
+
--- Bio::FlatFile.makeindex(is_bdb, dbname, format, options, *files)
|
|
771
|
+
|
|
772
|
+
Create index files (called a databank) of given files.
|
|
773
|
+
|
|
774
|
+
--- Bio::FlatFile.update_index(dbname, format, options, *files)
|
|
775
|
+
|
|
776
|
+
Add entries to databank.
|
|
777
|
+
|
|
778
|
+
=end
|