bio 1.2.1 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +3421 -0
- data/KNOWN_ISSUES.rdoc +88 -0
- data/README.rdoc +252 -0
- data/README_DEV.rdoc +285 -0
- data/Rakefile +143 -0
- data/bin/bioruby +0 -0
- data/bin/br_biofetch.rb +0 -0
- data/bin/br_bioflat.rb +12 -1
- data/bin/br_biogetseq.rb +0 -0
- data/bin/br_pmfetch.rb +4 -3
- data/bioruby.gemspec +477 -0
- data/bioruby.gemspec.erb +117 -0
- data/doc/Changes-0.7.rd +7 -0
- data/doc/Changes-1.3.rdoc +239 -0
- data/doc/Tutorial.rd +296 -184
- data/doc/Tutorial.rd.html +1031 -0
- data/doc/Tutorial.rd.ja +111 -45
- data/doc/Tutorial.rd.ja.html +2225 -0
- data/doc/bioruby.css +281 -0
- data/extconf.rb +2 -0
- data/lib/bio.rb +29 -4
- data/lib/bio/appl/blast.rb +306 -121
- data/lib/bio/appl/blast/ddbj.rb +142 -0
- data/lib/bio/appl/blast/format0.rb +35 -25
- data/lib/bio/appl/blast/format8.rb +2 -2
- data/lib/bio/appl/blast/genomenet.rb +263 -0
- data/lib/bio/appl/blast/ncbioptions.rb +220 -0
- data/lib/bio/appl/blast/remote.rb +106 -0
- data/lib/bio/appl/blast/report.rb +260 -9
- data/lib/bio/appl/blast/rexml.rb +12 -5
- data/lib/bio/appl/blast/rpsblast.rb +277 -0
- data/lib/bio/appl/blast/wublast.rb +133 -12
- data/lib/bio/appl/blast/xmlparser.rb +35 -18
- data/lib/bio/appl/blat/report.rb +46 -5
- data/lib/bio/appl/emboss.rb +62 -13
- data/lib/bio/appl/fasta.rb +9 -11
- data/lib/bio/appl/genscan/report.rb +3 -3
- data/lib/bio/appl/hmmer.rb +1 -1
- data/lib/bio/appl/hmmer/report.rb +10 -10
- data/lib/bio/appl/paml/baseml.rb +95 -0
- data/lib/bio/appl/paml/baseml/report.rb +32 -0
- data/lib/bio/appl/paml/codeml.rb +242 -0
- data/lib/bio/appl/paml/codeml/rates.rb +67 -0
- data/lib/bio/appl/paml/codeml/report.rb +67 -0
- data/lib/bio/appl/paml/common.rb +348 -0
- data/lib/bio/appl/paml/common_report.rb +38 -0
- data/lib/bio/appl/paml/yn00.rb +103 -0
- data/lib/bio/appl/paml/yn00/report.rb +32 -0
- data/lib/bio/appl/psort.rb +2 -2
- data/lib/bio/appl/pts1.rb +5 -5
- data/lib/bio/appl/tmhmm/report.rb +10 -1
- data/lib/bio/command.rb +297 -41
- data/lib/bio/compat/features.rb +157 -0
- data/lib/bio/compat/references.rb +128 -0
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
- data/lib/bio/db/biosql/sequence.rb +508 -0
- data/lib/bio/db/embl/common.rb +28 -12
- data/lib/bio/db/embl/embl.rb +107 -9
- data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
- data/lib/bio/db/embl/format_embl.rb +190 -0
- data/lib/bio/db/embl/sptr.rb +15 -16
- data/lib/bio/db/fantom.rb +6 -8
- data/lib/bio/db/fasta.rb +10 -507
- data/lib/bio/db/fasta/defline.rb +532 -0
- data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
- data/lib/bio/db/fasta/format_fasta.rb +97 -0
- data/lib/bio/db/genbank/common.rb +25 -8
- data/lib/bio/db/genbank/format_genbank.rb +187 -0
- data/lib/bio/db/genbank/genbank.rb +36 -1
- data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
- data/lib/bio/db/gff.rb +1791 -119
- data/lib/bio/db/kegg/glycan.rb +2 -6
- data/lib/bio/db/lasergene.rb +3 -3
- data/lib/bio/db/medline.rb +4 -1
- data/lib/bio/db/newick.rb +10 -10
- data/lib/bio/db/pdb/chain.rb +6 -2
- data/lib/bio/db/pdb/pdb.rb +12 -3
- data/lib/bio/db/rebase.rb +7 -8
- data/lib/bio/db/soft.rb +3 -3
- data/lib/bio/feature.rb +1 -88
- data/lib/bio/io/biosql/biodatabase.rb +64 -0
- data/lib/bio/io/biosql/bioentry.rb +29 -0
- data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
- data/lib/bio/io/biosql/bioentry_path.rb +12 -0
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
- data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
- data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
- data/lib/bio/io/biosql/biosequence.rb +11 -0
- data/lib/bio/io/biosql/comment.rb +7 -0
- data/lib/bio/io/biosql/config/database.yml +20 -0
- data/lib/bio/io/biosql/dbxref.rb +13 -0
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
- data/lib/bio/io/biosql/location.rb +32 -0
- data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
- data/lib/bio/io/biosql/ontology.rb +10 -0
- data/lib/bio/io/biosql/reference.rb +9 -0
- data/lib/bio/io/biosql/seqfeature.rb +32 -0
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
- data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
- data/lib/bio/io/biosql/taxon.rb +12 -0
- data/lib/bio/io/biosql/taxon_name.rb +9 -0
- data/lib/bio/io/biosql/term.rb +27 -0
- data/lib/bio/io/biosql/term_dbxref.rb +11 -0
- data/lib/bio/io/biosql/term_path.rb +12 -0
- data/lib/bio/io/biosql/term_relationship.rb +13 -0
- data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
- data/lib/bio/io/biosql/term_synonym.rb +10 -0
- data/lib/bio/io/das.rb +7 -7
- data/lib/bio/io/ddbjxml.rb +57 -0
- data/lib/bio/io/ensembl.rb +2 -2
- data/lib/bio/io/fetch.rb +28 -14
- data/lib/bio/io/flatfile.rb +17 -853
- data/lib/bio/io/flatfile/autodetection.rb +545 -0
- data/lib/bio/io/flatfile/buffer.rb +237 -0
- data/lib/bio/io/flatfile/index.rb +17 -7
- data/lib/bio/io/flatfile/indexer.rb +30 -12
- data/lib/bio/io/flatfile/splitter.rb +297 -0
- data/lib/bio/io/hinv.rb +442 -0
- data/lib/bio/io/keggapi.rb +2 -2
- data/lib/bio/io/ncbirest.rb +733 -0
- data/lib/bio/io/pubmed.rb +34 -80
- data/lib/bio/io/registry.rb +2 -2
- data/lib/bio/io/sql.rb +178 -357
- data/lib/bio/io/togows.rb +458 -0
- data/lib/bio/location.rb +106 -11
- data/lib/bio/pathway.rb +120 -14
- data/lib/bio/reference.rb +115 -101
- data/lib/bio/sequence.rb +164 -183
- data/lib/bio/sequence/adapter.rb +108 -0
- data/lib/bio/sequence/common.rb +22 -45
- data/lib/bio/sequence/compat.rb +2 -2
- data/lib/bio/sequence/dblink.rb +54 -0
- data/lib/bio/sequence/format.rb +254 -77
- data/lib/bio/sequence/format_raw.rb +23 -0
- data/lib/bio/shell.rb +3 -1
- data/lib/bio/shell/core.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +33 -4
- data/lib/bio/shell/plugin/ncbirest.rb +64 -0
- data/lib/bio/shell/plugin/togows.rb +40 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
- data/lib/bio/tree.rb +4 -2
- data/lib/bio/util/color_scheme.rb +2 -2
- data/lib/bio/util/contingency_table.rb +2 -2
- data/lib/bio/util/restriction_enzyme.rb +2 -2
- data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
- data/lib/bio/version.rb +25 -0
- data/rdoc.zsh +8 -0
- data/sample/any2fasta.rb +0 -0
- data/sample/biofetch.rb +0 -0
- data/sample/dbget +0 -0
- data/sample/demo_sequence.rb +158 -0
- data/sample/enzymes.rb +0 -0
- data/sample/fasta2tab.rb +0 -0
- data/sample/fastagrep.rb +72 -0
- data/sample/fastasort.rb +54 -0
- data/sample/fsplit.rb +0 -0
- data/sample/gb2fasta.rb +2 -3
- data/sample/gb2tab.rb +0 -0
- data/sample/gbtab2mysql.rb +0 -0
- data/sample/genes2nuc.rb +0 -0
- data/sample/genes2pep.rb +0 -0
- data/sample/genes2tab.rb +0 -0
- data/sample/genome2rb.rb +0 -0
- data/sample/genome2tab.rb +0 -0
- data/sample/goslim.rb +0 -0
- data/sample/gt2fasta.rb +0 -0
- data/sample/na2aa.rb +34 -0
- data/sample/pmfetch.rb +0 -0
- data/sample/pmsearch.rb +0 -0
- data/sample/ssearch2tab.rb +0 -0
- data/sample/tfastx2tab.rb +0 -0
- data/sample/vs-genes.rb +0 -0
- data/setup.rb +1596 -0
- data/test/data/blast/blastp-multi.m7 +188 -0
- data/test/data/command/echoarg2.bat +1 -0
- data/test/data/paml/codeml/control_file.txt +30 -0
- data/test/data/paml/codeml/output.txt +78 -0
- data/test/data/paml/codeml/rates +217 -0
- data/test/data/rpsblast/misc.rpsblast +193 -0
- data/test/data/soft/GDS100_partial.soft +0 -0
- data/test/data/soft/GSE3457_family_partial.soft +0 -0
- data/test/functional/bio/appl/test_pts1.rb +115 -0
- data/test/functional/bio/io/test_ensembl.rb +123 -80
- data/test/functional/bio/io/test_togows.rb +267 -0
- data/test/functional/bio/sequence/test_output_embl.rb +51 -0
- data/test/functional/bio/test_command.rb +301 -0
- data/test/runner.rb +17 -1
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
- data/test/unit/bio/appl/blast/test_report.rb +753 -35
- data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
- data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
- data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
- data/test/unit/bio/appl/test_blast.rb +135 -4
- data/test/unit/bio/appl/test_fasta.rb +2 -2
- data/test/unit/bio/appl/test_pts1.rb +1 -64
- data/test/unit/bio/db/embl/test_common.rb +15 -15
- data/test/unit/bio/db/embl/test_embl.rb +4 -4
- data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
- data/test/unit/bio/db/embl/test_sptr.rb +38 -1
- data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
- data/test/unit/bio/db/test_gff.rb +1151 -25
- data/test/unit/bio/db/test_medline.rb +127 -0
- data/test/unit/bio/db/test_nexus.rb +5 -1
- data/test/unit/bio/db/test_prosite.rb +4 -4
- data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
- data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
- data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
- data/test/unit/bio/io/test_ddbjxml.rb +8 -3
- data/test/unit/bio/io/test_fastacmd.rb +5 -5
- data/test/unit/bio/io/test_flatfile.rb +357 -106
- data/test/unit/bio/io/test_soapwsdl.rb +2 -2
- data/test/unit/bio/io/test_togows.rb +161 -0
- data/test/unit/bio/sequence/test_common.rb +210 -11
- data/test/unit/bio/sequence/test_compat.rb +3 -3
- data/test/unit/bio/sequence/test_dblink.rb +58 -0
- data/test/unit/bio/sequence/test_na.rb +2 -2
- data/test/unit/bio/test_command.rb +111 -50
- data/test/unit/bio/test_feature.rb +29 -1
- data/test/unit/bio/test_location.rb +566 -6
- data/test/unit/bio/test_pathway.rb +91 -65
- data/test/unit/bio/test_reference.rb +67 -13
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
- metadata +202 -167
- data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
data/lib/bio/io/ddbjxml.rb
CHANGED
@@ -333,6 +333,63 @@ class XML < Bio::SOAPWSDL
|
|
333
333
|
SERVER_URI = BASE_URI + "PML.wsdl"
|
334
334
|
end
|
335
335
|
|
336
|
+
# === RequestManager
|
337
|
+
#
|
338
|
+
# Sequence Retrieving System
|
339
|
+
#
|
340
|
+
# * http://xml.nig.ac.jp/doc/RequestManager.txt
|
341
|
+
#
|
342
|
+
# === Examples
|
343
|
+
#
|
344
|
+
# serv = Bio::DDBJ::XML::RequestManager.new
|
345
|
+
# puts serv.getAsyncResult('20070420102828140')
|
346
|
+
#
|
347
|
+
# === WSDL Methods
|
348
|
+
#
|
349
|
+
# * getAsyncResult( requestId )
|
350
|
+
# * getAsyncResultMime( requestId )
|
351
|
+
#
|
352
|
+
# === Examples
|
353
|
+
#
|
354
|
+
# * http://xml.nig.ac.jp/doc/RequestManager.txt
|
355
|
+
#
|
356
|
+
class RequestManager < XML
|
357
|
+
SERVER_URI = BASE_URI + "RequestManager.wsdl"
|
358
|
+
|
359
|
+
# RequestManager using DDBJ REST interface
|
360
|
+
class REST
|
361
|
+
require 'bio/command'
|
362
|
+
|
363
|
+
Uri = 'http://xml.nig.ac.jp/rest/Invoke'
|
364
|
+
Service = 'RequestManager'
|
365
|
+
|
366
|
+
def getAsyncResult(requestId)
|
367
|
+
params = {
|
368
|
+
'service' => Service,
|
369
|
+
'method' => 'getAsyncResult',
|
370
|
+
'requestId' => requestId.to_s
|
371
|
+
}
|
372
|
+
r = Bio::Command.post_form(Uri, params)
|
373
|
+
r.body
|
374
|
+
end
|
375
|
+
end #class REST
|
376
|
+
|
377
|
+
unless defined? new_orig then
|
378
|
+
class << RequestManager
|
379
|
+
alias new_orig new
|
380
|
+
private :new_orig
|
381
|
+
end
|
382
|
+
end
|
383
|
+
|
384
|
+
# creates a new driver
|
385
|
+
def self.new(wsdl = nil)
|
386
|
+
begin
|
387
|
+
new_orig(wsdl)
|
388
|
+
rescue RuntimeError
|
389
|
+
REST.new
|
390
|
+
end
|
391
|
+
end
|
392
|
+
end #class RequestManager
|
336
393
|
|
337
394
|
# === SRS
|
338
395
|
#
|
data/lib/bio/io/ensembl.rb
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
# Mitsuteru C. Nakao <n@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id
|
8
|
+
# $Id:$
|
9
9
|
#
|
10
10
|
# == Description
|
11
11
|
#
|
@@ -185,7 +185,7 @@ class Ensembl
|
|
185
185
|
|
186
186
|
params = defaults.update(options)
|
187
187
|
|
188
|
-
result
|
188
|
+
result = Bio::Command.post_form("#{@uri}/exportview", params)
|
189
189
|
|
190
190
|
return result.body
|
191
191
|
end
|
data/lib/bio/io/fetch.rb
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
# Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id
|
8
|
+
# $Id:$
|
9
9
|
#
|
10
10
|
# == DESCRIPTION
|
11
11
|
#
|
@@ -26,6 +26,7 @@
|
|
26
26
|
#
|
27
27
|
|
28
28
|
require 'uri'
|
29
|
+
require 'cgi'
|
29
30
|
require 'bio/command'
|
30
31
|
|
31
32
|
module Bio
|
@@ -102,11 +103,12 @@ module Bio
|
|
102
103
|
# * _style_: [raw|html] (default = 'raw')
|
103
104
|
# * _format_: name of output format (see Bio::Fetch#formats)
|
104
105
|
def fetch(db, id, style = 'raw', format = nil)
|
105
|
-
query = [
|
106
|
-
|
107
|
-
|
106
|
+
query = [ [ 'db', db ],
|
107
|
+
[ 'id', id ],
|
108
|
+
[ 'style', style ] ]
|
109
|
+
query.push([ 'format', format ]) if format
|
108
110
|
|
109
|
-
|
111
|
+
_get(query)
|
110
112
|
end
|
111
113
|
|
112
114
|
# Shortcut for using BioRuby's BioFetch server. You can fetch an entry
|
@@ -139,9 +141,7 @@ module Bio
|
|
139
141
|
# ---
|
140
142
|
# *Returns*:: array of database names
|
141
143
|
def databases
|
142
|
-
|
143
|
-
|
144
|
-
Bio::Command.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
|
144
|
+
_get_single('info', 'dbs').strip.split(/\s+/)
|
145
145
|
end
|
146
146
|
|
147
147
|
# Lists the formats that are available for a given database. Like the
|
@@ -156,9 +156,9 @@ module Bio
|
|
156
156
|
# *Returns*:: array of formats
|
157
157
|
def formats(database = @database)
|
158
158
|
if database
|
159
|
-
query =
|
160
|
-
|
161
|
-
|
159
|
+
query = [ [ 'info', 'formats' ],
|
160
|
+
[ 'db', database ] ]
|
161
|
+
_get(query).strip.split(/\s+/)
|
162
162
|
end
|
163
163
|
end
|
164
164
|
|
@@ -170,11 +170,25 @@ module Bio
|
|
170
170
|
# *Arguments*: none
|
171
171
|
# *Returns*:: number
|
172
172
|
def maxids
|
173
|
-
|
173
|
+
_get_single('info', 'maxids').to_i
|
174
|
+
end
|
174
175
|
|
175
|
-
|
176
|
+
private
|
177
|
+
# (private) query to the server.
|
178
|
+
# ary must be nested array, e.g. [ [ key0, val0 ], [ key1, val1 ], ... ]
|
179
|
+
def _get(ary)
|
180
|
+
query = ary.collect do |a|
|
181
|
+
"#{CGI.escape(a[0])}=#{CGI.escape(a[1])}"
|
182
|
+
end.join('&')
|
183
|
+
Bio::Command.read_uri(@url + '?' + query)
|
176
184
|
end
|
177
|
-
|
185
|
+
|
186
|
+
# (private) query with single parameter
|
187
|
+
def _get_single(key, val)
|
188
|
+
query = "#{CGI.escape(key)}=#{CGI.escape(val)}"
|
189
|
+
Bio::Command.read_uri(@url + '?' + query)
|
190
|
+
end
|
191
|
+
|
178
192
|
end
|
179
193
|
|
180
194
|
end # module Bio
|
data/lib/bio/io/flatfile.rb
CHANGED
@@ -13,7 +13,6 @@
|
|
13
13
|
# It can automatically detect data format, and users do not need to tell
|
14
14
|
# the class what the data is.
|
15
15
|
#
|
16
|
-
require 'tsort'
|
17
16
|
|
18
17
|
module Bio
|
19
18
|
|
@@ -23,333 +22,11 @@ module Bio
|
|
23
22
|
# the class what the data is.
|
24
23
|
class FlatFile
|
25
24
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
# It can input with a buffer.
|
30
|
-
class BufferedInputStream
|
31
|
-
# Creates a new input stream wrapper
|
32
|
-
def initialize(io, path)
|
33
|
-
@io = io
|
34
|
-
@path = path
|
35
|
-
# initialize prefetch buffer
|
36
|
-
@buffer = ''
|
37
|
-
end
|
38
|
-
|
39
|
-
# Creates a new input stream wrapper from the given IO object.
|
40
|
-
def self.for_io(io)
|
41
|
-
begin
|
42
|
-
path = io.path
|
43
|
-
rescue NameError
|
44
|
-
path = nil
|
45
|
-
end
|
46
|
-
self.new(io, path)
|
47
|
-
end
|
48
|
-
|
49
|
-
# Creates a new input stream wrapper to open file _filename_
|
50
|
-
# by using File.open.
|
51
|
-
# *arg is passed to File.open.
|
52
|
-
#
|
53
|
-
# Like File.open, a block can be accepted.
|
54
|
-
def self.open_file(filename, *arg)
|
55
|
-
if block_given? then
|
56
|
-
File.open(filename, *arg) do |fobj|
|
57
|
-
yield self.new(fobj, filename)
|
58
|
-
end
|
59
|
-
else
|
60
|
-
fobj = File.open(filename, *arg)
|
61
|
-
self.new(fobj, filename)
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
# Creates a new input stream wrapper from URI specified as _uri_.
|
66
|
-
# by using OpenURI.open_uri or URI#open.
|
67
|
-
# _uri_ must be a String or URI object.
|
68
|
-
# *arg is passed to OpenURI.open_uri or URI#open.
|
69
|
-
#
|
70
|
-
# Like OpenURI.open_uri, it can accept a block.
|
71
|
-
def self.open_uri(uri, *arg)
|
72
|
-
if uri.kind_of?(URI)
|
73
|
-
if block_given?
|
74
|
-
uri.open(*arg) do |fobj|
|
75
|
-
yield self.new(fobj, uri.to_s)
|
76
|
-
end
|
77
|
-
else
|
78
|
-
fobj = uri.open(*arg)
|
79
|
-
self.new(fobj, uri.to_s)
|
80
|
-
end
|
81
|
-
else
|
82
|
-
if block_given?
|
83
|
-
OpenURI.open_uri(uri, *arg) do |fobj|
|
84
|
-
yield self.new(fobj, uri)
|
85
|
-
end
|
86
|
-
else
|
87
|
-
fobj = OpenURI.open_uri(uri, *arg)
|
88
|
-
self.new(fobj, uri)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
# Pathname, filename or URI to open the object.
|
94
|
-
# Like File#path, returned value isn't normalized.
|
95
|
-
attr_reader :path
|
96
|
-
|
97
|
-
# Converts to IO object if possible
|
98
|
-
def to_io
|
99
|
-
@io.to_io
|
100
|
-
end
|
101
|
-
|
102
|
-
# Closes the IO object if possible
|
103
|
-
def close
|
104
|
-
@io.close
|
105
|
-
end
|
106
|
-
|
107
|
-
# Rewinds the IO object if possible
|
108
|
-
# Internal buffer in this wrapper is cleared.
|
109
|
-
def rewind
|
110
|
-
r = @io.rewind
|
111
|
-
@buffer = ''
|
112
|
-
r
|
113
|
-
end
|
114
|
-
|
115
|
-
# Returns current file position
|
116
|
-
def pos
|
117
|
-
@io.pos - @buffer.size
|
118
|
-
end
|
119
|
-
|
120
|
-
# Sets current file position if possible
|
121
|
-
# Internal buffer in this wrapper is cleared.
|
122
|
-
def pos=(p)
|
123
|
-
r = (@io.pos = p)
|
124
|
-
@buffer = ''
|
125
|
-
r
|
126
|
-
end
|
127
|
-
|
128
|
-
# Returns true if end-of-file. Otherwise, returns false.
|
129
|
-
#
|
130
|
-
# Note that it returns false if internal buffer is this wrapper
|
131
|
-
# is not empty,
|
132
|
-
def eof?
|
133
|
-
if @buffer.size > 0
|
134
|
-
false
|
135
|
-
else
|
136
|
-
@io.eof?
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
# Same as IO#gets.
|
141
|
-
def gets(io_rs = $/)
|
142
|
-
if @buffer.size > 0
|
143
|
-
if io_rs == nil then
|
144
|
-
r = @buffer + @io.gets(nil).to_s
|
145
|
-
@buffer = ''
|
146
|
-
else
|
147
|
-
if io_rs == '' then
|
148
|
-
sp_rs = /\n\n/n
|
149
|
-
sp_rs_orig = "\n\n"
|
150
|
-
else
|
151
|
-
sp_rs = Regexp.new(Regexp.escape(io_rs, 'n'), 0, 'n')
|
152
|
-
sp_rs_orig = io_rs
|
153
|
-
end
|
154
|
-
a = @buffer.split(sp_rs, 2)
|
155
|
-
if a.size > 1 then
|
156
|
-
r = a[0] + sp_rs_orig
|
157
|
-
@buffer = a[1]
|
158
|
-
else
|
159
|
-
@buffer << @io.gets(io_rs).to_s
|
160
|
-
a = @buffer.split(sp_rs, 2)
|
161
|
-
if a.size > 1 then
|
162
|
-
r = a[0] + sp_rs_orig
|
163
|
-
@buffer = a[1].to_s
|
164
|
-
else
|
165
|
-
r = @buffer
|
166
|
-
@buffer = ''
|
167
|
-
end
|
168
|
-
end
|
169
|
-
end
|
170
|
-
r
|
171
|
-
else
|
172
|
-
@io.gets(io_rs)
|
173
|
-
end
|
174
|
-
end
|
175
|
-
|
176
|
-
# Pushes back given str to the internal buffer.
|
177
|
-
# Returns nil.
|
178
|
-
# str must be read previously with the wrapper object.
|
179
|
-
#
|
180
|
-
# Note that in current implementation, the str can be everything,
|
181
|
-
# but please don't depend on it.
|
182
|
-
#
|
183
|
-
def ungets(str)
|
184
|
-
@buffer = str + @buffer
|
185
|
-
nil
|
186
|
-
end
|
187
|
-
|
188
|
-
# Same as IO#getc.
|
189
|
-
def getc
|
190
|
-
if @buffer.size > 0 then
|
191
|
-
r = @buffer[0]
|
192
|
-
@buffer = @buffer[1..-1]
|
193
|
-
else
|
194
|
-
r = @io.getc
|
195
|
-
end
|
196
|
-
r
|
197
|
-
end
|
198
|
-
|
199
|
-
# Pushes back one character into the internal buffer.
|
200
|
-
# Unlike IO#getc, it can be called more than one time.
|
201
|
-
def ungetc(c)
|
202
|
-
@buffer = sprintf("%c", c) + @buffer
|
203
|
-
nil
|
204
|
-
end
|
25
|
+
autoload :AutoDetect, 'bio/io/flatfile/autodetection'
|
26
|
+
autoload :Splitter, 'bio/io/flatfile/splitter'
|
27
|
+
autoload :BufferedInputStream, 'bio/io/flatfile/buffer'
|
205
28
|
|
206
|
-
|
207
|
-
def prefetch_buffer
|
208
|
-
@buffer
|
209
|
-
end
|
210
|
-
|
211
|
-
# It does @io.gets, and addes returned string
|
212
|
-
# to the internal buffer, and returns the string.
|
213
|
-
def prefetch_gets(*arg)
|
214
|
-
r = @io.gets(*arg)
|
215
|
-
@buffer << r if r
|
216
|
-
r
|
217
|
-
end
|
218
|
-
|
219
|
-
# It does @io.readpartial, and addes returned string
|
220
|
-
# to the internal buffer, and returns the string.
|
221
|
-
def prefetch_readpartial(*arg)
|
222
|
-
r = @io.readpartial(*arg)
|
223
|
-
@buffer << r if r
|
224
|
-
r
|
225
|
-
end
|
226
|
-
|
227
|
-
# Skips space characters in the stream.
|
228
|
-
# returns nil.
|
229
|
-
def skip_spaces
|
230
|
-
ws = { ?\s => true, ?\n => true, ?\r => true, ?\t => true }
|
231
|
-
while r = self.getc
|
232
|
-
unless ws[r] then
|
233
|
-
self.ungetc(r)
|
234
|
-
break
|
235
|
-
end
|
236
|
-
end
|
237
|
-
nil
|
238
|
-
end
|
239
|
-
end #class BufferedInputStream
|
240
|
-
|
241
|
-
# Splitter is a class to get entries from a buffered input stream.
|
242
|
-
module Splitter
|
243
|
-
# This is a template of splitter.
|
244
|
-
class Template
|
245
|
-
# Creates a new splitter.
|
246
|
-
def initialize(klass, bstream)
|
247
|
-
@stream = bstream
|
248
|
-
raise NotImplementedError
|
249
|
-
end
|
250
|
-
|
251
|
-
# skips leader of the entry.
|
252
|
-
def skip_leader
|
253
|
-
raise NotImplementedError
|
254
|
-
end
|
255
|
-
|
256
|
-
# Gets entry as a string
|
257
|
-
def get_entry
|
258
|
-
raise NotImplementedError
|
259
|
-
end
|
260
|
-
|
261
|
-
# the last entry read from the stream
|
262
|
-
attr_reader :entry
|
263
|
-
|
264
|
-
# a flag to write down entry start and end positions
|
265
|
-
attr_accessor :entry_pos_flag
|
266
|
-
|
267
|
-
# start position of the entry
|
268
|
-
attr_reader :entry_start_pos
|
269
|
-
|
270
|
-
# (end position of the entry) + 1
|
271
|
-
attr_reader :entry_ended_pos
|
272
|
-
end
|
273
|
-
|
274
|
-
# Default splitter.
|
275
|
-
# It sees following constants in the given class.
|
276
|
-
# DELIMITER:: (String) delimiter indicates the end of a entry.
|
277
|
-
# FLATFILE_HEADER:: (String) start of a entry, located on head of a line.
|
278
|
-
# DELIMITER_OVERRUN:: (Integer) excess read size included in DELIMITER.
|
279
|
-
#
|
280
|
-
class Default < Template
|
281
|
-
# Creates a new splitter.
|
282
|
-
# klass:: database class
|
283
|
-
# bstream:: input stream. It must be a BufferedInputStream object.
|
284
|
-
def initialize(klass, bstream)
|
285
|
-
@stream = bstream
|
286
|
-
@delimiter = klass::DELIMITER rescue nil
|
287
|
-
@header = klass::FLATFILE_HEADER rescue nil
|
288
|
-
# for specific classes' benefit
|
289
|
-
unless header
|
290
|
-
if klass == Bio::GenBank or klass == Bio::GenPept
|
291
|
-
@header = 'LOCUS '
|
292
|
-
end
|
293
|
-
end
|
294
|
-
@delimiter_overrun = klass::DELIMITER_OVERRUN rescue nil
|
295
|
-
@entry_pos_flag = nil
|
296
|
-
end
|
297
|
-
|
298
|
-
# (String) delimiter indicates the end of a entry.
|
299
|
-
attr_accessor :delimiter
|
300
|
-
|
301
|
-
# (String) start of a entry, located on head of a line.
|
302
|
-
attr_accessor :header
|
303
|
-
|
304
|
-
# (Integer) excess read data size included in delimiter.
|
305
|
-
attr_accessor :delimiter_overrun
|
306
|
-
|
307
|
-
# Skips leader of the entry.
|
308
|
-
#
|
309
|
-
# If @header is not nil, it reads till the contents of @header
|
310
|
-
# comes at the head of a line.
|
311
|
-
# If correct FLATFILE_HEADER is found, returns true.
|
312
|
-
# Otherwise, returns nil.
|
313
|
-
def skip_leader
|
314
|
-
if @header then
|
315
|
-
data = ''
|
316
|
-
while s = @stream.gets(@header)
|
317
|
-
data << s
|
318
|
-
if data.split(/[\r\n]+/)[-1] == @header then
|
319
|
-
@stream.ungets(@header)
|
320
|
-
return true
|
321
|
-
end
|
322
|
-
end
|
323
|
-
# @header was not found. For safety,
|
324
|
-
# pushes back data with removing white spaces in the head.
|
325
|
-
data.sub(/\A\s+/, '')
|
326
|
-
@stream.ungets(data)
|
327
|
-
return nil
|
328
|
-
else
|
329
|
-
@stream.skip_spaces
|
330
|
-
return nil
|
331
|
-
end
|
332
|
-
end
|
333
|
-
|
334
|
-
# gets a entry
|
335
|
-
def get_entry
|
336
|
-
p0 = @entry_pos_flag ? @stream.pos : nil
|
337
|
-
e = @stream.gets(@delimiter)
|
338
|
-
if e and @delimiter_overrun then
|
339
|
-
if e[-@delimiter.size, @delimiter.size ] == @delimiter then
|
340
|
-
overrun = e[-@delimiter_overrun, @delimiter_overrun]
|
341
|
-
e[-@delimiter_overrun, @delimiter_overrun] = ''
|
342
|
-
@stream.ungets(overrun)
|
343
|
-
end
|
344
|
-
end
|
345
|
-
p1 = @entry_pos_flag ? @stream.pos : nil
|
346
|
-
@entry_start_pos = p0
|
347
|
-
@entry = e
|
348
|
-
@entry_ended_pos = p1
|
349
|
-
@entry
|
350
|
-
end
|
351
|
-
end #class Defalult
|
352
|
-
end #module Splitter
|
29
|
+
include Enumerable
|
353
30
|
|
354
31
|
#
|
355
32
|
# Bio::FlatFile.open(file, *arg)
|
@@ -605,13 +282,17 @@ module Bio
|
|
605
282
|
@skip_leader_mode == :everytime)
|
606
283
|
@splitter.skip_leader
|
607
284
|
end
|
608
|
-
|
285
|
+
if raw then
|
286
|
+
r = @splitter.get_entry
|
287
|
+
else
|
288
|
+
r = @splitter.get_parsed_entry
|
289
|
+
end
|
609
290
|
@firsttime_flag = false
|
610
291
|
return nil unless r
|
611
292
|
if raw then
|
612
293
|
r
|
613
294
|
else
|
614
|
-
|
295
|
+
@entry = r
|
615
296
|
@entry
|
616
297
|
end
|
617
298
|
end
|
@@ -660,7 +341,7 @@ module Bio
|
|
660
341
|
# Resets file pointer to the start of the flatfile.
|
661
342
|
# (similar to IO#rewind)
|
662
343
|
def rewind
|
663
|
-
r = @stream.rewind
|
344
|
+
r = (@splitter || @stream).rewind
|
664
345
|
@firsttime_flag = true
|
665
346
|
r
|
666
347
|
end
|
@@ -722,7 +403,12 @@ module Bio
|
|
722
403
|
begin
|
723
404
|
@splitter = @dbclass.flatfile_splitter(@dbclass, @stream)
|
724
405
|
rescue NameError, NoMethodError
|
725
|
-
|
406
|
+
begin
|
407
|
+
splitter_class = @dbclass::FLATFILE_SPLITTER
|
408
|
+
rescue NameError
|
409
|
+
splitter_class = Splitter::Default
|
410
|
+
end
|
411
|
+
@splitter = splitter_class.new(klass, @stream)
|
726
412
|
end
|
727
413
|
else
|
728
414
|
@dbclass = nil
|
@@ -775,528 +461,6 @@ module Bio
|
|
775
461
|
AutoDetect.default.autodetect(text)
|
776
462
|
end
|
777
463
|
|
778
|
-
|
779
|
-
# AutoDetect automatically determines database class of given data.
|
780
|
-
class AutoDetect
|
781
|
-
|
782
|
-
include TSort
|
783
|
-
|
784
|
-
# Array to store autodetection rules.
|
785
|
-
# This is defined only for inspect.
|
786
|
-
class RulesArray < Array
|
787
|
-
# visualize contents
|
788
|
-
def inspect
|
789
|
-
"[#{self.collect { |e| e.name.inspect }.join(' ')}]"
|
790
|
-
end
|
791
|
-
end #class RulesArray
|
792
|
-
|
793
|
-
# Template of a single rule of autodetection
|
794
|
-
class RuleTemplate
|
795
|
-
# Creates a new element.
|
796
|
-
def self.[](*arg)
|
797
|
-
self.new(*arg)
|
798
|
-
end
|
799
|
-
|
800
|
-
# Creates a new element.
|
801
|
-
def initialize
|
802
|
-
@higher_priority_elements = RulesArray.new
|
803
|
-
@lower_priority_elements = RulesArray.new
|
804
|
-
@name = nil
|
805
|
-
end
|
806
|
-
|
807
|
-
# self is prior to the _elem_.
|
808
|
-
def is_prior_to(elem)
|
809
|
-
return nil if self == elem
|
810
|
-
elem.higher_priority_elements << self
|
811
|
-
self.lower_priority_elements << elem
|
812
|
-
true
|
813
|
-
end
|
814
|
-
|
815
|
-
# higher priority elements
|
816
|
-
attr_reader :higher_priority_elements
|
817
|
-
# lower priority elements
|
818
|
-
attr_reader :lower_priority_elements
|
819
|
-
|
820
|
-
# database classes
|
821
|
-
attr_reader :dbclasses
|
822
|
-
|
823
|
-
# unique name of the element
|
824
|
-
attr_accessor :name
|
825
|
-
|
826
|
-
# If given text (and/or meta information) is known, returns
|
827
|
-
# the database class.
|
828
|
-
# Otherwise, returns nil or false.
|
829
|
-
#
|
830
|
-
# _text_ will be a String.
|
831
|
-
# _meta_ will be a Hash.
|
832
|
-
# _meta_ may contain following keys.
|
833
|
-
# :path => pathname, filename or uri.
|
834
|
-
def guess(text, meta)
|
835
|
-
nil
|
836
|
-
end
|
837
|
-
|
838
|
-
private
|
839
|
-
# Gets constant from constant name given as a string.
|
840
|
-
def str2const(str)
|
841
|
-
const = Object
|
842
|
-
str.split(/\:\:/).each do |x|
|
843
|
-
const = const.const_get(x)
|
844
|
-
end
|
845
|
-
const
|
846
|
-
end
|
847
|
-
|
848
|
-
# Gets database class from given object.
|
849
|
-
# Current implementation is:
|
850
|
-
# if _obj_ is kind of String, regarded as a constant.
|
851
|
-
# Otherwise, returns _obj_ as is.
|
852
|
-
def get_dbclass(obj)
|
853
|
-
obj.kind_of?(String) ? str2const(obj) : obj
|
854
|
-
end
|
855
|
-
end #class Rule_Template
|
856
|
-
|
857
|
-
# RuleDebug is a class for debugging autodetect classes/methods
|
858
|
-
class RuleDebug < RuleTemplate
|
859
|
-
# Creates a new instance.
|
860
|
-
def initialize(name)
|
861
|
-
super()
|
862
|
-
@name = name
|
863
|
-
end
|
864
|
-
|
865
|
-
# prints information to the $stderr.
|
866
|
-
def guess(text, meta)
|
867
|
-
$stderr.puts @name
|
868
|
-
$stderr.puts text.inspect
|
869
|
-
$stderr.puts meta.inspect
|
870
|
-
nil
|
871
|
-
end
|
872
|
-
end #class RuleDebug
|
873
|
-
|
874
|
-
# Special element that is always top or bottom priority.
|
875
|
-
class RuleSpecial < RuleTemplate
|
876
|
-
def initialize(name)
|
877
|
-
#super()
|
878
|
-
@name = name
|
879
|
-
end
|
880
|
-
# modification of @name is inhibited.
|
881
|
-
def name=(x)
|
882
|
-
raise 'cannot modify name'
|
883
|
-
end
|
884
|
-
|
885
|
-
# always returns void array
|
886
|
-
def higher_priority_elements
|
887
|
-
[]
|
888
|
-
end
|
889
|
-
# always returns void array
|
890
|
-
def lower_priority_elements
|
891
|
-
[]
|
892
|
-
end
|
893
|
-
end #class RuleSpecial
|
894
|
-
|
895
|
-
# Special element that is always top priority.
|
896
|
-
TopRule = RuleSpecial.new('top')
|
897
|
-
# Special element that is always bottom priority.
|
898
|
-
BottomRule = RuleSpecial.new('bottom')
|
899
|
-
|
900
|
-
# A autodetection rule to use a regular expression
|
901
|
-
class RuleRegexp < RuleTemplate
|
902
|
-
# Creates a new instance.
|
903
|
-
def initialize(dbclass, re)
|
904
|
-
super()
|
905
|
-
@re = re
|
906
|
-
@name = dbclass.to_s
|
907
|
-
@dbclass = nil
|
908
|
-
@dbclass_lazy = dbclass
|
909
|
-
end
|
910
|
-
|
911
|
-
# database class (lazy evaluation)
|
912
|
-
def dbclass
|
913
|
-
unless @dbclass
|
914
|
-
@dbclass = get_dbclass(@dbclass_lazy)
|
915
|
-
end
|
916
|
-
@dbclass
|
917
|
-
end
|
918
|
-
private :dbclass
|
919
|
-
|
920
|
-
# returns database classes
|
921
|
-
def dbclasses
|
922
|
-
[ dbclass ]
|
923
|
-
end
|
924
|
-
|
925
|
-
# If given text matches the regexp, returns the database class.
|
926
|
-
# Otherwise, returns nil or false.
|
927
|
-
# _meta_ is ignored.
|
928
|
-
def guess(text, meta)
|
929
|
-
@re =~ text ? dbclass : nil
|
930
|
-
end
|
931
|
-
end #class RuleRegexp
|
932
|
-
|
933
|
-
# A autodetection rule to use more than two regular expressions.
|
934
|
-
# If given string matches one of the regular expressions,
|
935
|
-
# returns the database class.
|
936
|
-
class RuleRegexp2 < RuleRegexp
|
937
|
-
# Creates a new instance.
|
938
|
-
def initialize(dbclass, *regexps)
|
939
|
-
super(dbclass, nil)
|
940
|
-
@regexps = regexps
|
941
|
-
end
|
942
|
-
|
943
|
-
# If given text matches one of the regexp, returns the database class.
|
944
|
-
# Otherwise, returns nil or false.
|
945
|
-
# _meta_ is ignored.
|
946
|
-
def guess(text, meta)
|
947
|
-
@regexps.each do |re|
|
948
|
-
return dbclass if re =~ text
|
949
|
-
end
|
950
|
-
nil
|
951
|
-
end
|
952
|
-
end #class RuleRegexp
|
953
|
-
|
954
|
-
# A autodetection rule that passes data to the proc object.
|
955
|
-
class RuleProc < RuleTemplate
|
956
|
-
# Creates a new instance.
|
957
|
-
def initialize(*dbclasses, &proc)
|
958
|
-
super()
|
959
|
-
@proc = proc
|
960
|
-
@dbclasses = nil
|
961
|
-
@dbclasses_lazy = dbclasses
|
962
|
-
@name = dbclasses.collect { |x| x.to_s }.join('|')
|
963
|
-
end
|
964
|
-
|
965
|
-
# database classes (lazy evaluation)
|
966
|
-
def dbclasses
|
967
|
-
unless @dbclasses
|
968
|
-
@dbclasses = @dbclasses_lazy.collect { |x| get_dbclass(x) }
|
969
|
-
end
|
970
|
-
@dbclasses
|
971
|
-
end
|
972
|
-
|
973
|
-
# If given text (and/or meta information) is known, returns
|
974
|
-
# the database class.
|
975
|
-
# Otherwise, returns nil or false.
|
976
|
-
#
|
977
|
-
# Refer RuleTemplate#guess for _meta_.
|
978
|
-
def guess(text, meta)
|
979
|
-
@proc.call(text)
|
980
|
-
end
|
981
|
-
end #class RuleProc
|
982
|
-
|
983
|
-
# Creates a new Autodetect object
|
984
|
-
def initialize
|
985
|
-
# stores autodetection rules.
|
986
|
-
@rules = Hash.new
|
987
|
-
# stores elements (cache)
|
988
|
-
@elements = nil
|
989
|
-
self.add(TopRule)
|
990
|
-
self.add(BottomRule)
|
991
|
-
end
|
992
|
-
|
993
|
-
# Adds a new element.
|
994
|
-
# Returns _elem_.
|
995
|
-
def add(elem)
|
996
|
-
raise 'element name conflicts' if @rules[elem.name]
|
997
|
-
@elements = nil
|
998
|
-
@rules[elem.name] = elem
|
999
|
-
elem
|
1000
|
-
end
|
1001
|
-
|
1002
|
-
# (required by TSort.)
|
1003
|
-
# For all elements, yields each element.
|
1004
|
-
def tsort_each_node(&x)
|
1005
|
-
@rules.each_value(&x)
|
1006
|
-
end
|
1007
|
-
|
1008
|
-
# (required by TSort.)
|
1009
|
-
# For a given element, yields each child
|
1010
|
-
# (= lower priority elements) of the element.
|
1011
|
-
def tsort_each_child(elem)
|
1012
|
-
if elem == TopRule then
|
1013
|
-
@rules.each_value do |e|
|
1014
|
-
yield e unless e == TopRule or
|
1015
|
-
e.lower_priority_elements.index(TopRule)
|
1016
|
-
end
|
1017
|
-
elsif elem == BottomRule then
|
1018
|
-
@rules.each_value do |e|
|
1019
|
-
yield e if e.higher_priority_elements.index(BottomRule)
|
1020
|
-
end
|
1021
|
-
else
|
1022
|
-
elem.lower_priority_elements.each do |e|
|
1023
|
-
yield e if e != BottomRule
|
1024
|
-
end
|
1025
|
-
unless elem.higher_priority_elements.index(BottomRule)
|
1026
|
-
yield BottomRule
|
1027
|
-
end
|
1028
|
-
end
|
1029
|
-
end
|
1030
|
-
|
1031
|
-
# Returns current elements as an array
|
1032
|
-
# whose order fulfills all elements' priorities.
|
1033
|
-
def elements
|
1034
|
-
unless @elements
|
1035
|
-
ary = tsort
|
1036
|
-
ary.reverse!
|
1037
|
-
@elements = ary
|
1038
|
-
end
|
1039
|
-
@elements
|
1040
|
-
end
|
1041
|
-
|
1042
|
-
# rebuilds the object and clears internal cache.
|
1043
|
-
def rehash
|
1044
|
-
@rules.rehash
|
1045
|
-
@elements = nil
|
1046
|
-
end
|
1047
|
-
|
1048
|
-
# visualizes the object (mainly for debug)
|
1049
|
-
def inspect
|
1050
|
-
"<#{self.class.to_s} " +
|
1051
|
-
self.elements.collect { |e| e.name.inspect }.join(' ') +
|
1052
|
-
">"
|
1053
|
-
end
|
1054
|
-
|
1055
|
-
# Iterates over each element.
|
1056
|
-
def each_rule(&x) #:yields: elem
|
1057
|
-
elements.each(&x)
|
1058
|
-
end
|
1059
|
-
|
1060
|
-
# Autodetect from the text.
|
1061
|
-
# Returns a database class if succeeded.
|
1062
|
-
# Returns nil if failed.
|
1063
|
-
def autodetect(text, meta = {})
|
1064
|
-
r = nil
|
1065
|
-
elements.each do |e|
|
1066
|
-
#$stderr.puts e.name
|
1067
|
-
r = e.guess(text, meta)
|
1068
|
-
break if r
|
1069
|
-
end
|
1070
|
-
r
|
1071
|
-
end
|
1072
|
-
|
1073
|
-
# autodetect from the FlatFile object.
|
1074
|
-
# Returns a database class if succeeded.
|
1075
|
-
# Returns nil if failed.
|
1076
|
-
def autodetect_flatfile(ff, lines = 31)
|
1077
|
-
meta = {}
|
1078
|
-
stream = ff.instance_eval { @stream }
|
1079
|
-
begin
|
1080
|
-
path = stream.path
|
1081
|
-
rescue NameError
|
1082
|
-
end
|
1083
|
-
if path then
|
1084
|
-
meta[:path] = path
|
1085
|
-
# call autodetect onece with meta and without any read action
|
1086
|
-
if r = self.autodetect(stream.prefetch_buffer, meta)
|
1087
|
-
return r
|
1088
|
-
end
|
1089
|
-
end
|
1090
|
-
# reading stream
|
1091
|
-
1.upto(lines) do |x|
|
1092
|
-
break unless line = stream.prefetch_gets
|
1093
|
-
if line.strip.size > 0 then
|
1094
|
-
if r = self.autodetect(stream.prefetch_buffer, meta)
|
1095
|
-
return r
|
1096
|
-
end
|
1097
|
-
end
|
1098
|
-
end
|
1099
|
-
return nil
|
1100
|
-
end
|
1101
|
-
|
1102
|
-
# default autodetect object for class method
|
1103
|
-
@default = nil
|
1104
|
-
|
1105
|
-
# returns the default autodetect object
|
1106
|
-
def self.default
|
1107
|
-
unless @default then
|
1108
|
-
@default = self.make_default
|
1109
|
-
end
|
1110
|
-
@default
|
1111
|
-
end
|
1112
|
-
|
1113
|
-
# sets the default autodetect object.
|
1114
|
-
def self.default=(ad)
|
1115
|
-
@default = ad
|
1116
|
-
end
|
1117
|
-
|
1118
|
-
# make a new autodetect object
|
1119
|
-
def self.[](*arg)
|
1120
|
-
a = self.new
|
1121
|
-
arg.each { |e| a.add(e) }
|
1122
|
-
a
|
1123
|
-
end
|
1124
|
-
|
1125
|
-
# make a default of default autodetect object
|
1126
|
-
def self.make_default
|
1127
|
-
a = self[
|
1128
|
-
genbank = RuleRegexp[ 'Bio::GenBank',
|
1129
|
-
/^LOCUS .+ bp .*[a-z]*[DR]?NA/ ],
|
1130
|
-
genpept = RuleRegexp[ 'Bio::GenPept',
|
1131
|
-
/^LOCUS .+ aa .+/ ],
|
1132
|
-
medline = RuleRegexp[ 'Bio::MEDLINE',
|
1133
|
-
/^PMID\- [0-9]+$/ ],
|
1134
|
-
embl = RuleRegexp[ 'Bio::EMBL',
|
1135
|
-
/^ID .+\; .*(DNA|RNA|XXX)\;/ ],
|
1136
|
-
sptr = RuleRegexp2[ 'Bio::SPTR',
|
1137
|
-
/^ID .+\; *PRT\;/,
|
1138
|
-
/^ID [-A-Za-z0-9_\.]+ .+\; *[0-9]+ *AA\./ ],
|
1139
|
-
prosite = RuleRegexp[ 'Bio::PROSITE',
|
1140
|
-
/^ID [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
|
1141
|
-
transfac = RuleRegexp[ 'Bio::TRANSFAC',
|
1142
|
-
/^AC [-A-Za-z0-9_\.]+$/ ],
|
1143
|
-
|
1144
|
-
aaindex = RuleProc.new('Bio::AAindex1', 'Bio::AAindex2') do |text|
|
1145
|
-
if /^H [-A-Z0-9_\.]+$/ =~ text then
|
1146
|
-
if text =~ /^M [rc]/ then
|
1147
|
-
Bio::AAindex2
|
1148
|
-
elsif text =~ /^I A\/L/ then
|
1149
|
-
Bio::AAindex1
|
1150
|
-
else
|
1151
|
-
false #fail to determine
|
1152
|
-
end
|
1153
|
-
else
|
1154
|
-
nil
|
1155
|
-
end
|
1156
|
-
end,
|
1157
|
-
|
1158
|
-
litdb = RuleRegexp[ 'Bio::LITDB',
|
1159
|
-
/^CODE [0-9]+$/ ],
|
1160
|
-
brite = RuleRegexp[ 'Bio::KEGG::BRITE',
|
1161
|
-
/^Entry [A-Z0-9]+/ ],
|
1162
|
-
orthology = RuleRegexp[ 'Bio::KEGG::ORTHOLOGY',
|
1163
|
-
/^ENTRY .+ KO\s*/ ],
|
1164
|
-
drug = RuleRegexp[ 'Bio::KEGG::DRUG',
|
1165
|
-
/^ENTRY .+ Drug\s*/ ],
|
1166
|
-
glycan = RuleRegexp[ 'Bio::KEGG::GLYCAN',
|
1167
|
-
/^ENTRY .+ Glycan\s*/ ],
|
1168
|
-
enzyme = RuleRegexp2[ 'Bio::KEGG::ENZYME',
|
1169
|
-
/^ENTRY EC [0-9\.]+$/,
|
1170
|
-
/^ENTRY .+ Enzyme\s*/
|
1171
|
-
],
|
1172
|
-
compound = RuleRegexp2[ 'Bio::KEGG::COMPOUND',
|
1173
|
-
/^ENTRY C[A-Za-z0-9\._]+$/,
|
1174
|
-
/^ENTRY .+ Compound\s*/
|
1175
|
-
],
|
1176
|
-
reaction = RuleRegexp2[ 'Bio::KEGG::REACTION',
|
1177
|
-
/^ENTRY R[A-Za-z0-9\._]+$/,
|
1178
|
-
/^ENTRY .+ Reaction\s*/
|
1179
|
-
],
|
1180
|
-
genes = RuleRegexp[ 'Bio::KEGG::GENES',
|
1181
|
-
/^ENTRY .+ (CDS|gene|.*RNA|Contig) / ],
|
1182
|
-
genome = RuleRegexp[ 'Bio::KEGG::GENOME',
|
1183
|
-
/^ENTRY [a-z]+$/ ],
|
1184
|
-
|
1185
|
-
fantom = RuleProc.new('Bio::FANTOM::MaXML::Cluster',
|
1186
|
-
'Bio::FANTOM::MaXML::Sequence') do |text|
|
1187
|
-
if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
|
1188
|
-
case $1
|
1189
|
-
when 'clusters'
|
1190
|
-
Bio::FANTOM::MaXML::Cluster
|
1191
|
-
when 'sequences'
|
1192
|
-
Bio::FANTOM::MaXML::Sequence
|
1193
|
-
else
|
1194
|
-
nil #unknown
|
1195
|
-
end
|
1196
|
-
else
|
1197
|
-
nil
|
1198
|
-
end
|
1199
|
-
end,
|
1200
|
-
|
1201
|
-
pdb = RuleRegexp[ 'Bio::PDB',
|
1202
|
-
/^HEADER .{40}\d\d\-[A-Z]{3}\-\d\d [0-9A-Z]{4}/ ],
|
1203
|
-
het = RuleRegexp[ 'Bio::PDB::ChemicalComponent',
|
1204
|
-
/^RESIDUE +.+ +\d+\s*$/ ],
|
1205
|
-
|
1206
|
-
clustal = RuleRegexp2[ 'Bio::ClustalW::Report',
|
1207
|
-
/^CLUSTAL .*\(.*\).*sequence +alignment/,
|
1208
|
-
/^CLUSTAL FORMAT for T-COFFEE/ ],
|
1209
|
-
|
1210
|
-
gcg_msf = RuleRegexp[ 'Bio::GCG::Msf',
|
1211
|
-
/^!!(N|A)A_MULTIPLE_ALIGNMENT .+/ ],
|
1212
|
-
|
1213
|
-
gcg_seq = RuleRegexp[ 'Bio::GCG::Seq',
|
1214
|
-
/^!!(N|A)A_SEQUENCE .+/ ],
|
1215
|
-
|
1216
|
-
blastxml = RuleRegexp[ 'Bio::Blast::Report',
|
1217
|
-
/\<\!DOCTYPE BlastOutput PUBLIC / ],
|
1218
|
-
wublast = RuleRegexp[ 'Bio::Blast::WU::Report',
|
1219
|
-
/^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
|
1220
|
-
wutblast = RuleRegexp[ 'Bio::Blast::WU::Report_TBlast',
|
1221
|
-
/^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
|
1222
|
-
blast = RuleRegexp[ 'Bio::Blast::Default::Report',
|
1223
|
-
/^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
|
1224
|
-
tblast = RuleRegexp[ 'Bio::Blast::Default::Report_TBlast',
|
1225
|
-
/^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
|
1226
|
-
|
1227
|
-
blat = RuleRegexp[ 'Bio::Blat::Report',
|
1228
|
-
/^psLayout version \d+/ ],
|
1229
|
-
spidey = RuleRegexp[ 'Bio::Spidey::Report',
|
1230
|
-
/^\-\-SPIDEY version .+\-\-$/ ],
|
1231
|
-
hmmer = RuleRegexp[ 'Bio::HMMER::Report',
|
1232
|
-
/^HMMER +\d+\./ ],
|
1233
|
-
sim4 = RuleRegexp[ 'Bio::Sim4::Report',
|
1234
|
-
/^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
|
1235
|
-
|
1236
|
-
fastaformat = RuleProc.new('Bio::FastaFormat',
|
1237
|
-
'Bio::NBRF',
|
1238
|
-
'Bio::FastaNumericFormat') do |text|
|
1239
|
-
if /^>.+$/ =~ text
|
1240
|
-
case text
|
1241
|
-
when /^>([PF]1|[DR][LC]|N[13]|XX)\;.+/
|
1242
|
-
Bio::NBRF
|
1243
|
-
when /^>.+$\s+(^\#.*$\s*)*^\s*\d*\s*[-a-zA-Z_\.\[\]\(\)\*\+\$]+/
|
1244
|
-
Bio::FastaFormat
|
1245
|
-
when /^>.+$\s+^\s*\d+(\s+\d+)*\s*$/
|
1246
|
-
Bio::FastaNumericFormat
|
1247
|
-
else
|
1248
|
-
false
|
1249
|
-
end
|
1250
|
-
else
|
1251
|
-
nil
|
1252
|
-
end
|
1253
|
-
end
|
1254
|
-
]
|
1255
|
-
|
1256
|
-
# dependencies
|
1257
|
-
# NCBI
|
1258
|
-
genbank.is_prior_to genpept
|
1259
|
-
# EMBL/UniProt
|
1260
|
-
embl.is_prior_to sptr
|
1261
|
-
sptr.is_prior_to prosite
|
1262
|
-
prosite.is_prior_to transfac
|
1263
|
-
# KEGG
|
1264
|
-
#aaindex.is_prior_to litdb
|
1265
|
-
#litdb.is_prior_to brite
|
1266
|
-
brite.is_prior_to orthology
|
1267
|
-
orthology.is_prior_to drug
|
1268
|
-
drug.is_prior_to glycan
|
1269
|
-
glycan.is_prior_to enzyme
|
1270
|
-
enzyme.is_prior_to compound
|
1271
|
-
compound.is_prior_to reaction
|
1272
|
-
reaction.is_prior_to genes
|
1273
|
-
genes.is_prior_to genome
|
1274
|
-
# PDB
|
1275
|
-
pdb.is_prior_to het
|
1276
|
-
# BLAST
|
1277
|
-
wublast.is_prior_to wutblast
|
1278
|
-
wutblast.is_prior_to blast
|
1279
|
-
blast.is_prior_to tblast
|
1280
|
-
# FastaFormat
|
1281
|
-
BottomRule.is_prior_to(fastaformat)
|
1282
|
-
|
1283
|
-
# for debug
|
1284
|
-
#debug_first = RuleDebug.new('debug_first')
|
1285
|
-
#a.add(debug_first)
|
1286
|
-
#debug_first.is_prior_to(TopRule)
|
1287
|
-
|
1288
|
-
## for debug
|
1289
|
-
#debug_last = RuleDebug.new('debug_last')
|
1290
|
-
#a.add(debug_last)
|
1291
|
-
#BottomRule.is_prior_to(debug_last)
|
1292
|
-
#fastaformat.is_prior_to(debug_last)
|
1293
|
-
|
1294
|
-
a.rehash
|
1295
|
-
return a
|
1296
|
-
end
|
1297
|
-
|
1298
|
-
end #class AutoDetect
|
1299
|
-
|
1300
464
|
end #class FlatFile
|
1301
465
|
|
1302
466
|
end #module Bio
|