bio 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +3421 -0
- data/KNOWN_ISSUES.rdoc +88 -0
- data/README.rdoc +252 -0
- data/README_DEV.rdoc +285 -0
- data/Rakefile +143 -0
- data/bin/bioruby +0 -0
- data/bin/br_biofetch.rb +0 -0
- data/bin/br_bioflat.rb +12 -1
- data/bin/br_biogetseq.rb +0 -0
- data/bin/br_pmfetch.rb +4 -3
- data/bioruby.gemspec +477 -0
- data/bioruby.gemspec.erb +117 -0
- data/doc/Changes-0.7.rd +7 -0
- data/doc/Changes-1.3.rdoc +239 -0
- data/doc/Tutorial.rd +296 -184
- data/doc/Tutorial.rd.html +1031 -0
- data/doc/Tutorial.rd.ja +111 -45
- data/doc/Tutorial.rd.ja.html +2225 -0
- data/doc/bioruby.css +281 -0
- data/extconf.rb +2 -0
- data/lib/bio.rb +29 -4
- data/lib/bio/appl/blast.rb +306 -121
- data/lib/bio/appl/blast/ddbj.rb +142 -0
- data/lib/bio/appl/blast/format0.rb +35 -25
- data/lib/bio/appl/blast/format8.rb +2 -2
- data/lib/bio/appl/blast/genomenet.rb +263 -0
- data/lib/bio/appl/blast/ncbioptions.rb +220 -0
- data/lib/bio/appl/blast/remote.rb +106 -0
- data/lib/bio/appl/blast/report.rb +260 -9
- data/lib/bio/appl/blast/rexml.rb +12 -5
- data/lib/bio/appl/blast/rpsblast.rb +277 -0
- data/lib/bio/appl/blast/wublast.rb +133 -12
- data/lib/bio/appl/blast/xmlparser.rb +35 -18
- data/lib/bio/appl/blat/report.rb +46 -5
- data/lib/bio/appl/emboss.rb +62 -13
- data/lib/bio/appl/fasta.rb +9 -11
- data/lib/bio/appl/genscan/report.rb +3 -3
- data/lib/bio/appl/hmmer.rb +1 -1
- data/lib/bio/appl/hmmer/report.rb +10 -10
- data/lib/bio/appl/paml/baseml.rb +95 -0
- data/lib/bio/appl/paml/baseml/report.rb +32 -0
- data/lib/bio/appl/paml/codeml.rb +242 -0
- data/lib/bio/appl/paml/codeml/rates.rb +67 -0
- data/lib/bio/appl/paml/codeml/report.rb +67 -0
- data/lib/bio/appl/paml/common.rb +348 -0
- data/lib/bio/appl/paml/common_report.rb +38 -0
- data/lib/bio/appl/paml/yn00.rb +103 -0
- data/lib/bio/appl/paml/yn00/report.rb +32 -0
- data/lib/bio/appl/psort.rb +2 -2
- data/lib/bio/appl/pts1.rb +5 -5
- data/lib/bio/appl/tmhmm/report.rb +10 -1
- data/lib/bio/command.rb +297 -41
- data/lib/bio/compat/features.rb +157 -0
- data/lib/bio/compat/references.rb +128 -0
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
- data/lib/bio/db/biosql/sequence.rb +508 -0
- data/lib/bio/db/embl/common.rb +28 -12
- data/lib/bio/db/embl/embl.rb +107 -9
- data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
- data/lib/bio/db/embl/format_embl.rb +190 -0
- data/lib/bio/db/embl/sptr.rb +15 -16
- data/lib/bio/db/fantom.rb +6 -8
- data/lib/bio/db/fasta.rb +10 -507
- data/lib/bio/db/fasta/defline.rb +532 -0
- data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
- data/lib/bio/db/fasta/format_fasta.rb +97 -0
- data/lib/bio/db/genbank/common.rb +25 -8
- data/lib/bio/db/genbank/format_genbank.rb +187 -0
- data/lib/bio/db/genbank/genbank.rb +36 -1
- data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
- data/lib/bio/db/gff.rb +1791 -119
- data/lib/bio/db/kegg/glycan.rb +2 -6
- data/lib/bio/db/lasergene.rb +3 -3
- data/lib/bio/db/medline.rb +4 -1
- data/lib/bio/db/newick.rb +10 -10
- data/lib/bio/db/pdb/chain.rb +6 -2
- data/lib/bio/db/pdb/pdb.rb +12 -3
- data/lib/bio/db/rebase.rb +7 -8
- data/lib/bio/db/soft.rb +3 -3
- data/lib/bio/feature.rb +1 -88
- data/lib/bio/io/biosql/biodatabase.rb +64 -0
- data/lib/bio/io/biosql/bioentry.rb +29 -0
- data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
- data/lib/bio/io/biosql/bioentry_path.rb +12 -0
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
- data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
- data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
- data/lib/bio/io/biosql/biosequence.rb +11 -0
- data/lib/bio/io/biosql/comment.rb +7 -0
- data/lib/bio/io/biosql/config/database.yml +20 -0
- data/lib/bio/io/biosql/dbxref.rb +13 -0
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
- data/lib/bio/io/biosql/location.rb +32 -0
- data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
- data/lib/bio/io/biosql/ontology.rb +10 -0
- data/lib/bio/io/biosql/reference.rb +9 -0
- data/lib/bio/io/biosql/seqfeature.rb +32 -0
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
- data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
- data/lib/bio/io/biosql/taxon.rb +12 -0
- data/lib/bio/io/biosql/taxon_name.rb +9 -0
- data/lib/bio/io/biosql/term.rb +27 -0
- data/lib/bio/io/biosql/term_dbxref.rb +11 -0
- data/lib/bio/io/biosql/term_path.rb +12 -0
- data/lib/bio/io/biosql/term_relationship.rb +13 -0
- data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
- data/lib/bio/io/biosql/term_synonym.rb +10 -0
- data/lib/bio/io/das.rb +7 -7
- data/lib/bio/io/ddbjxml.rb +57 -0
- data/lib/bio/io/ensembl.rb +2 -2
- data/lib/bio/io/fetch.rb +28 -14
- data/lib/bio/io/flatfile.rb +17 -853
- data/lib/bio/io/flatfile/autodetection.rb +545 -0
- data/lib/bio/io/flatfile/buffer.rb +237 -0
- data/lib/bio/io/flatfile/index.rb +17 -7
- data/lib/bio/io/flatfile/indexer.rb +30 -12
- data/lib/bio/io/flatfile/splitter.rb +297 -0
- data/lib/bio/io/hinv.rb +442 -0
- data/lib/bio/io/keggapi.rb +2 -2
- data/lib/bio/io/ncbirest.rb +733 -0
- data/lib/bio/io/pubmed.rb +34 -80
- data/lib/bio/io/registry.rb +2 -2
- data/lib/bio/io/sql.rb +178 -357
- data/lib/bio/io/togows.rb +458 -0
- data/lib/bio/location.rb +106 -11
- data/lib/bio/pathway.rb +120 -14
- data/lib/bio/reference.rb +115 -101
- data/lib/bio/sequence.rb +164 -183
- data/lib/bio/sequence/adapter.rb +108 -0
- data/lib/bio/sequence/common.rb +22 -45
- data/lib/bio/sequence/compat.rb +2 -2
- data/lib/bio/sequence/dblink.rb +54 -0
- data/lib/bio/sequence/format.rb +254 -77
- data/lib/bio/sequence/format_raw.rb +23 -0
- data/lib/bio/shell.rb +3 -1
- data/lib/bio/shell/core.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +33 -4
- data/lib/bio/shell/plugin/ncbirest.rb +64 -0
- data/lib/bio/shell/plugin/togows.rb +40 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
- data/lib/bio/tree.rb +4 -2
- data/lib/bio/util/color_scheme.rb +2 -2
- data/lib/bio/util/contingency_table.rb +2 -2
- data/lib/bio/util/restriction_enzyme.rb +2 -2
- data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
- data/lib/bio/version.rb +25 -0
- data/rdoc.zsh +8 -0
- data/sample/any2fasta.rb +0 -0
- data/sample/biofetch.rb +0 -0
- data/sample/dbget +0 -0
- data/sample/demo_sequence.rb +158 -0
- data/sample/enzymes.rb +0 -0
- data/sample/fasta2tab.rb +0 -0
- data/sample/fastagrep.rb +72 -0
- data/sample/fastasort.rb +54 -0
- data/sample/fsplit.rb +0 -0
- data/sample/gb2fasta.rb +2 -3
- data/sample/gb2tab.rb +0 -0
- data/sample/gbtab2mysql.rb +0 -0
- data/sample/genes2nuc.rb +0 -0
- data/sample/genes2pep.rb +0 -0
- data/sample/genes2tab.rb +0 -0
- data/sample/genome2rb.rb +0 -0
- data/sample/genome2tab.rb +0 -0
- data/sample/goslim.rb +0 -0
- data/sample/gt2fasta.rb +0 -0
- data/sample/na2aa.rb +34 -0
- data/sample/pmfetch.rb +0 -0
- data/sample/pmsearch.rb +0 -0
- data/sample/ssearch2tab.rb +0 -0
- data/sample/tfastx2tab.rb +0 -0
- data/sample/vs-genes.rb +0 -0
- data/setup.rb +1596 -0
- data/test/data/blast/blastp-multi.m7 +188 -0
- data/test/data/command/echoarg2.bat +1 -0
- data/test/data/paml/codeml/control_file.txt +30 -0
- data/test/data/paml/codeml/output.txt +78 -0
- data/test/data/paml/codeml/rates +217 -0
- data/test/data/rpsblast/misc.rpsblast +193 -0
- data/test/data/soft/GDS100_partial.soft +0 -0
- data/test/data/soft/GSE3457_family_partial.soft +0 -0
- data/test/functional/bio/appl/test_pts1.rb +115 -0
- data/test/functional/bio/io/test_ensembl.rb +123 -80
- data/test/functional/bio/io/test_togows.rb +267 -0
- data/test/functional/bio/sequence/test_output_embl.rb +51 -0
- data/test/functional/bio/test_command.rb +301 -0
- data/test/runner.rb +17 -1
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
- data/test/unit/bio/appl/blast/test_report.rb +753 -35
- data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
- data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
- data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
- data/test/unit/bio/appl/test_blast.rb +135 -4
- data/test/unit/bio/appl/test_fasta.rb +2 -2
- data/test/unit/bio/appl/test_pts1.rb +1 -64
- data/test/unit/bio/db/embl/test_common.rb +15 -15
- data/test/unit/bio/db/embl/test_embl.rb +4 -4
- data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
- data/test/unit/bio/db/embl/test_sptr.rb +38 -1
- data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
- data/test/unit/bio/db/test_gff.rb +1151 -25
- data/test/unit/bio/db/test_medline.rb +127 -0
- data/test/unit/bio/db/test_nexus.rb +5 -1
- data/test/unit/bio/db/test_prosite.rb +4 -4
- data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
- data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
- data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
- data/test/unit/bio/io/test_ddbjxml.rb +8 -3
- data/test/unit/bio/io/test_fastacmd.rb +5 -5
- data/test/unit/bio/io/test_flatfile.rb +357 -106
- data/test/unit/bio/io/test_soapwsdl.rb +2 -2
- data/test/unit/bio/io/test_togows.rb +161 -0
- data/test/unit/bio/sequence/test_common.rb +210 -11
- data/test/unit/bio/sequence/test_compat.rb +3 -3
- data/test/unit/bio/sequence/test_dblink.rb +58 -0
- data/test/unit/bio/sequence/test_na.rb +2 -2
- data/test/unit/bio/test_command.rb +111 -50
- data/test/unit/bio/test_feature.rb +29 -1
- data/test/unit/bio/test_location.rb +566 -6
- data/test/unit/bio/test_pathway.rb +91 -65
- data/test/unit/bio/test_reference.rb +67 -13
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
- metadata +202 -167
- data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
data/lib/bio/io/ddbjxml.rb
CHANGED
|
@@ -333,6 +333,63 @@ class XML < Bio::SOAPWSDL
|
|
|
333
333
|
SERVER_URI = BASE_URI + "PML.wsdl"
|
|
334
334
|
end
|
|
335
335
|
|
|
336
|
+
# === RequestManager
|
|
337
|
+
#
|
|
338
|
+
# Sequence Retrieving System
|
|
339
|
+
#
|
|
340
|
+
# * http://xml.nig.ac.jp/doc/RequestManager.txt
|
|
341
|
+
#
|
|
342
|
+
# === Examples
|
|
343
|
+
#
|
|
344
|
+
# serv = Bio::DDBJ::XML::RequestManager.new
|
|
345
|
+
# puts serv.getAsyncResult('20070420102828140')
|
|
346
|
+
#
|
|
347
|
+
# === WSDL Methods
|
|
348
|
+
#
|
|
349
|
+
# * getAsyncResult( requestId )
|
|
350
|
+
# * getAsyncResultMime( requestId )
|
|
351
|
+
#
|
|
352
|
+
# === Examples
|
|
353
|
+
#
|
|
354
|
+
# * http://xml.nig.ac.jp/doc/RequestManager.txt
|
|
355
|
+
#
|
|
356
|
+
class RequestManager < XML
|
|
357
|
+
SERVER_URI = BASE_URI + "RequestManager.wsdl"
|
|
358
|
+
|
|
359
|
+
# RequestManager using DDBJ REST interface
|
|
360
|
+
class REST
|
|
361
|
+
require 'bio/command'
|
|
362
|
+
|
|
363
|
+
Uri = 'http://xml.nig.ac.jp/rest/Invoke'
|
|
364
|
+
Service = 'RequestManager'
|
|
365
|
+
|
|
366
|
+
def getAsyncResult(requestId)
|
|
367
|
+
params = {
|
|
368
|
+
'service' => Service,
|
|
369
|
+
'method' => 'getAsyncResult',
|
|
370
|
+
'requestId' => requestId.to_s
|
|
371
|
+
}
|
|
372
|
+
r = Bio::Command.post_form(Uri, params)
|
|
373
|
+
r.body
|
|
374
|
+
end
|
|
375
|
+
end #class REST
|
|
376
|
+
|
|
377
|
+
unless defined? new_orig then
|
|
378
|
+
class << RequestManager
|
|
379
|
+
alias new_orig new
|
|
380
|
+
private :new_orig
|
|
381
|
+
end
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
# creates a new driver
|
|
385
|
+
def self.new(wsdl = nil)
|
|
386
|
+
begin
|
|
387
|
+
new_orig(wsdl)
|
|
388
|
+
rescue RuntimeError
|
|
389
|
+
REST.new
|
|
390
|
+
end
|
|
391
|
+
end
|
|
392
|
+
end #class RequestManager
|
|
336
393
|
|
|
337
394
|
# === SRS
|
|
338
395
|
#
|
data/lib/bio/io/ensembl.rb
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
# Mitsuteru C. Nakao <n@bioruby.org>
|
|
6
6
|
# License:: The Ruby License
|
|
7
7
|
#
|
|
8
|
-
# $Id
|
|
8
|
+
# $Id:$
|
|
9
9
|
#
|
|
10
10
|
# == Description
|
|
11
11
|
#
|
|
@@ -185,7 +185,7 @@ class Ensembl
|
|
|
185
185
|
|
|
186
186
|
params = defaults.update(options)
|
|
187
187
|
|
|
188
|
-
result
|
|
188
|
+
result = Bio::Command.post_form("#{@uri}/exportview", params)
|
|
189
189
|
|
|
190
190
|
return result.body
|
|
191
191
|
end
|
data/lib/bio/io/fetch.rb
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
# Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
|
|
6
6
|
# License:: The Ruby License
|
|
7
7
|
#
|
|
8
|
-
# $Id
|
|
8
|
+
# $Id:$
|
|
9
9
|
#
|
|
10
10
|
# == DESCRIPTION
|
|
11
11
|
#
|
|
@@ -26,6 +26,7 @@
|
|
|
26
26
|
#
|
|
27
27
|
|
|
28
28
|
require 'uri'
|
|
29
|
+
require 'cgi'
|
|
29
30
|
require 'bio/command'
|
|
30
31
|
|
|
31
32
|
module Bio
|
|
@@ -102,11 +103,12 @@ module Bio
|
|
|
102
103
|
# * _style_: [raw|html] (default = 'raw')
|
|
103
104
|
# * _format_: name of output format (see Bio::Fetch#formats)
|
|
104
105
|
def fetch(db, id, style = 'raw', format = nil)
|
|
105
|
-
query = [
|
|
106
|
-
|
|
107
|
-
|
|
106
|
+
query = [ [ 'db', db ],
|
|
107
|
+
[ 'id', id ],
|
|
108
|
+
[ 'style', style ] ]
|
|
109
|
+
query.push([ 'format', format ]) if format
|
|
108
110
|
|
|
109
|
-
|
|
111
|
+
_get(query)
|
|
110
112
|
end
|
|
111
113
|
|
|
112
114
|
# Shortcut for using BioRuby's BioFetch server. You can fetch an entry
|
|
@@ -139,9 +141,7 @@ module Bio
|
|
|
139
141
|
# ---
|
|
140
142
|
# *Returns*:: array of database names
|
|
141
143
|
def databases
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
Bio::Command.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
|
|
144
|
+
_get_single('info', 'dbs').strip.split(/\s+/)
|
|
145
145
|
end
|
|
146
146
|
|
|
147
147
|
# Lists the formats that are available for a given database. Like the
|
|
@@ -156,9 +156,9 @@ module Bio
|
|
|
156
156
|
# *Returns*:: array of formats
|
|
157
157
|
def formats(database = @database)
|
|
158
158
|
if database
|
|
159
|
-
query =
|
|
160
|
-
|
|
161
|
-
|
|
159
|
+
query = [ [ 'info', 'formats' ],
|
|
160
|
+
[ 'db', database ] ]
|
|
161
|
+
_get(query).strip.split(/\s+/)
|
|
162
162
|
end
|
|
163
163
|
end
|
|
164
164
|
|
|
@@ -170,11 +170,25 @@ module Bio
|
|
|
170
170
|
# *Arguments*: none
|
|
171
171
|
# *Returns*:: number
|
|
172
172
|
def maxids
|
|
173
|
-
|
|
173
|
+
_get_single('info', 'maxids').to_i
|
|
174
|
+
end
|
|
174
175
|
|
|
175
|
-
|
|
176
|
+
private
|
|
177
|
+
# (private) query to the server.
|
|
178
|
+
# ary must be nested array, e.g. [ [ key0, val0 ], [ key1, val1 ], ... ]
|
|
179
|
+
def _get(ary)
|
|
180
|
+
query = ary.collect do |a|
|
|
181
|
+
"#{CGI.escape(a[0])}=#{CGI.escape(a[1])}"
|
|
182
|
+
end.join('&')
|
|
183
|
+
Bio::Command.read_uri(@url + '?' + query)
|
|
176
184
|
end
|
|
177
|
-
|
|
185
|
+
|
|
186
|
+
# (private) query with single parameter
|
|
187
|
+
def _get_single(key, val)
|
|
188
|
+
query = "#{CGI.escape(key)}=#{CGI.escape(val)}"
|
|
189
|
+
Bio::Command.read_uri(@url + '?' + query)
|
|
190
|
+
end
|
|
191
|
+
|
|
178
192
|
end
|
|
179
193
|
|
|
180
194
|
end # module Bio
|
data/lib/bio/io/flatfile.rb
CHANGED
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
# It can automatically detect data format, and users do not need to tell
|
|
14
14
|
# the class what the data is.
|
|
15
15
|
#
|
|
16
|
-
require 'tsort'
|
|
17
16
|
|
|
18
17
|
module Bio
|
|
19
18
|
|
|
@@ -23,333 +22,11 @@ module Bio
|
|
|
23
22
|
# the class what the data is.
|
|
24
23
|
class FlatFile
|
|
25
24
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
# It can input with a buffer.
|
|
30
|
-
class BufferedInputStream
|
|
31
|
-
# Creates a new input stream wrapper
|
|
32
|
-
def initialize(io, path)
|
|
33
|
-
@io = io
|
|
34
|
-
@path = path
|
|
35
|
-
# initialize prefetch buffer
|
|
36
|
-
@buffer = ''
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
# Creates a new input stream wrapper from the given IO object.
|
|
40
|
-
def self.for_io(io)
|
|
41
|
-
begin
|
|
42
|
-
path = io.path
|
|
43
|
-
rescue NameError
|
|
44
|
-
path = nil
|
|
45
|
-
end
|
|
46
|
-
self.new(io, path)
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
# Creates a new input stream wrapper to open file _filename_
|
|
50
|
-
# by using File.open.
|
|
51
|
-
# *arg is passed to File.open.
|
|
52
|
-
#
|
|
53
|
-
# Like File.open, a block can be accepted.
|
|
54
|
-
def self.open_file(filename, *arg)
|
|
55
|
-
if block_given? then
|
|
56
|
-
File.open(filename, *arg) do |fobj|
|
|
57
|
-
yield self.new(fobj, filename)
|
|
58
|
-
end
|
|
59
|
-
else
|
|
60
|
-
fobj = File.open(filename, *arg)
|
|
61
|
-
self.new(fobj, filename)
|
|
62
|
-
end
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
# Creates a new input stream wrapper from URI specified as _uri_.
|
|
66
|
-
# by using OpenURI.open_uri or URI#open.
|
|
67
|
-
# _uri_ must be a String or URI object.
|
|
68
|
-
# *arg is passed to OpenURI.open_uri or URI#open.
|
|
69
|
-
#
|
|
70
|
-
# Like OpenURI.open_uri, it can accept a block.
|
|
71
|
-
def self.open_uri(uri, *arg)
|
|
72
|
-
if uri.kind_of?(URI)
|
|
73
|
-
if block_given?
|
|
74
|
-
uri.open(*arg) do |fobj|
|
|
75
|
-
yield self.new(fobj, uri.to_s)
|
|
76
|
-
end
|
|
77
|
-
else
|
|
78
|
-
fobj = uri.open(*arg)
|
|
79
|
-
self.new(fobj, uri.to_s)
|
|
80
|
-
end
|
|
81
|
-
else
|
|
82
|
-
if block_given?
|
|
83
|
-
OpenURI.open_uri(uri, *arg) do |fobj|
|
|
84
|
-
yield self.new(fobj, uri)
|
|
85
|
-
end
|
|
86
|
-
else
|
|
87
|
-
fobj = OpenURI.open_uri(uri, *arg)
|
|
88
|
-
self.new(fobj, uri)
|
|
89
|
-
end
|
|
90
|
-
end
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
# Pathname, filename or URI to open the object.
|
|
94
|
-
# Like File#path, returned value isn't normalized.
|
|
95
|
-
attr_reader :path
|
|
96
|
-
|
|
97
|
-
# Converts to IO object if possible
|
|
98
|
-
def to_io
|
|
99
|
-
@io.to_io
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
# Closes the IO object if possible
|
|
103
|
-
def close
|
|
104
|
-
@io.close
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
# Rewinds the IO object if possible
|
|
108
|
-
# Internal buffer in this wrapper is cleared.
|
|
109
|
-
def rewind
|
|
110
|
-
r = @io.rewind
|
|
111
|
-
@buffer = ''
|
|
112
|
-
r
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
# Returns current file position
|
|
116
|
-
def pos
|
|
117
|
-
@io.pos - @buffer.size
|
|
118
|
-
end
|
|
119
|
-
|
|
120
|
-
# Sets current file position if possible
|
|
121
|
-
# Internal buffer in this wrapper is cleared.
|
|
122
|
-
def pos=(p)
|
|
123
|
-
r = (@io.pos = p)
|
|
124
|
-
@buffer = ''
|
|
125
|
-
r
|
|
126
|
-
end
|
|
127
|
-
|
|
128
|
-
# Returns true if end-of-file. Otherwise, returns false.
|
|
129
|
-
#
|
|
130
|
-
# Note that it returns false if internal buffer is this wrapper
|
|
131
|
-
# is not empty,
|
|
132
|
-
def eof?
|
|
133
|
-
if @buffer.size > 0
|
|
134
|
-
false
|
|
135
|
-
else
|
|
136
|
-
@io.eof?
|
|
137
|
-
end
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
# Same as IO#gets.
|
|
141
|
-
def gets(io_rs = $/)
|
|
142
|
-
if @buffer.size > 0
|
|
143
|
-
if io_rs == nil then
|
|
144
|
-
r = @buffer + @io.gets(nil).to_s
|
|
145
|
-
@buffer = ''
|
|
146
|
-
else
|
|
147
|
-
if io_rs == '' then
|
|
148
|
-
sp_rs = /\n\n/n
|
|
149
|
-
sp_rs_orig = "\n\n"
|
|
150
|
-
else
|
|
151
|
-
sp_rs = Regexp.new(Regexp.escape(io_rs, 'n'), 0, 'n')
|
|
152
|
-
sp_rs_orig = io_rs
|
|
153
|
-
end
|
|
154
|
-
a = @buffer.split(sp_rs, 2)
|
|
155
|
-
if a.size > 1 then
|
|
156
|
-
r = a[0] + sp_rs_orig
|
|
157
|
-
@buffer = a[1]
|
|
158
|
-
else
|
|
159
|
-
@buffer << @io.gets(io_rs).to_s
|
|
160
|
-
a = @buffer.split(sp_rs, 2)
|
|
161
|
-
if a.size > 1 then
|
|
162
|
-
r = a[0] + sp_rs_orig
|
|
163
|
-
@buffer = a[1].to_s
|
|
164
|
-
else
|
|
165
|
-
r = @buffer
|
|
166
|
-
@buffer = ''
|
|
167
|
-
end
|
|
168
|
-
end
|
|
169
|
-
end
|
|
170
|
-
r
|
|
171
|
-
else
|
|
172
|
-
@io.gets(io_rs)
|
|
173
|
-
end
|
|
174
|
-
end
|
|
175
|
-
|
|
176
|
-
# Pushes back given str to the internal buffer.
|
|
177
|
-
# Returns nil.
|
|
178
|
-
# str must be read previously with the wrapper object.
|
|
179
|
-
#
|
|
180
|
-
# Note that in current implementation, the str can be everything,
|
|
181
|
-
# but please don't depend on it.
|
|
182
|
-
#
|
|
183
|
-
def ungets(str)
|
|
184
|
-
@buffer = str + @buffer
|
|
185
|
-
nil
|
|
186
|
-
end
|
|
187
|
-
|
|
188
|
-
# Same as IO#getc.
|
|
189
|
-
def getc
|
|
190
|
-
if @buffer.size > 0 then
|
|
191
|
-
r = @buffer[0]
|
|
192
|
-
@buffer = @buffer[1..-1]
|
|
193
|
-
else
|
|
194
|
-
r = @io.getc
|
|
195
|
-
end
|
|
196
|
-
r
|
|
197
|
-
end
|
|
198
|
-
|
|
199
|
-
# Pushes back one character into the internal buffer.
|
|
200
|
-
# Unlike IO#getc, it can be called more than one time.
|
|
201
|
-
def ungetc(c)
|
|
202
|
-
@buffer = sprintf("%c", c) + @buffer
|
|
203
|
-
nil
|
|
204
|
-
end
|
|
25
|
+
autoload :AutoDetect, 'bio/io/flatfile/autodetection'
|
|
26
|
+
autoload :Splitter, 'bio/io/flatfile/splitter'
|
|
27
|
+
autoload :BufferedInputStream, 'bio/io/flatfile/buffer'
|
|
205
28
|
|
|
206
|
-
|
|
207
|
-
def prefetch_buffer
|
|
208
|
-
@buffer
|
|
209
|
-
end
|
|
210
|
-
|
|
211
|
-
# It does @io.gets, and addes returned string
|
|
212
|
-
# to the internal buffer, and returns the string.
|
|
213
|
-
def prefetch_gets(*arg)
|
|
214
|
-
r = @io.gets(*arg)
|
|
215
|
-
@buffer << r if r
|
|
216
|
-
r
|
|
217
|
-
end
|
|
218
|
-
|
|
219
|
-
# It does @io.readpartial, and addes returned string
|
|
220
|
-
# to the internal buffer, and returns the string.
|
|
221
|
-
def prefetch_readpartial(*arg)
|
|
222
|
-
r = @io.readpartial(*arg)
|
|
223
|
-
@buffer << r if r
|
|
224
|
-
r
|
|
225
|
-
end
|
|
226
|
-
|
|
227
|
-
# Skips space characters in the stream.
|
|
228
|
-
# returns nil.
|
|
229
|
-
def skip_spaces
|
|
230
|
-
ws = { ?\s => true, ?\n => true, ?\r => true, ?\t => true }
|
|
231
|
-
while r = self.getc
|
|
232
|
-
unless ws[r] then
|
|
233
|
-
self.ungetc(r)
|
|
234
|
-
break
|
|
235
|
-
end
|
|
236
|
-
end
|
|
237
|
-
nil
|
|
238
|
-
end
|
|
239
|
-
end #class BufferedInputStream
|
|
240
|
-
|
|
241
|
-
# Splitter is a class to get entries from a buffered input stream.
|
|
242
|
-
module Splitter
|
|
243
|
-
# This is a template of splitter.
|
|
244
|
-
class Template
|
|
245
|
-
# Creates a new splitter.
|
|
246
|
-
def initialize(klass, bstream)
|
|
247
|
-
@stream = bstream
|
|
248
|
-
raise NotImplementedError
|
|
249
|
-
end
|
|
250
|
-
|
|
251
|
-
# skips leader of the entry.
|
|
252
|
-
def skip_leader
|
|
253
|
-
raise NotImplementedError
|
|
254
|
-
end
|
|
255
|
-
|
|
256
|
-
# Gets entry as a string
|
|
257
|
-
def get_entry
|
|
258
|
-
raise NotImplementedError
|
|
259
|
-
end
|
|
260
|
-
|
|
261
|
-
# the last entry read from the stream
|
|
262
|
-
attr_reader :entry
|
|
263
|
-
|
|
264
|
-
# a flag to write down entry start and end positions
|
|
265
|
-
attr_accessor :entry_pos_flag
|
|
266
|
-
|
|
267
|
-
# start position of the entry
|
|
268
|
-
attr_reader :entry_start_pos
|
|
269
|
-
|
|
270
|
-
# (end position of the entry) + 1
|
|
271
|
-
attr_reader :entry_ended_pos
|
|
272
|
-
end
|
|
273
|
-
|
|
274
|
-
# Default splitter.
|
|
275
|
-
# It sees following constants in the given class.
|
|
276
|
-
# DELIMITER:: (String) delimiter indicates the end of a entry.
|
|
277
|
-
# FLATFILE_HEADER:: (String) start of a entry, located on head of a line.
|
|
278
|
-
# DELIMITER_OVERRUN:: (Integer) excess read size included in DELIMITER.
|
|
279
|
-
#
|
|
280
|
-
class Default < Template
|
|
281
|
-
# Creates a new splitter.
|
|
282
|
-
# klass:: database class
|
|
283
|
-
# bstream:: input stream. It must be a BufferedInputStream object.
|
|
284
|
-
def initialize(klass, bstream)
|
|
285
|
-
@stream = bstream
|
|
286
|
-
@delimiter = klass::DELIMITER rescue nil
|
|
287
|
-
@header = klass::FLATFILE_HEADER rescue nil
|
|
288
|
-
# for specific classes' benefit
|
|
289
|
-
unless header
|
|
290
|
-
if klass == Bio::GenBank or klass == Bio::GenPept
|
|
291
|
-
@header = 'LOCUS '
|
|
292
|
-
end
|
|
293
|
-
end
|
|
294
|
-
@delimiter_overrun = klass::DELIMITER_OVERRUN rescue nil
|
|
295
|
-
@entry_pos_flag = nil
|
|
296
|
-
end
|
|
297
|
-
|
|
298
|
-
# (String) delimiter indicates the end of a entry.
|
|
299
|
-
attr_accessor :delimiter
|
|
300
|
-
|
|
301
|
-
# (String) start of a entry, located on head of a line.
|
|
302
|
-
attr_accessor :header
|
|
303
|
-
|
|
304
|
-
# (Integer) excess read data size included in delimiter.
|
|
305
|
-
attr_accessor :delimiter_overrun
|
|
306
|
-
|
|
307
|
-
# Skips leader of the entry.
|
|
308
|
-
#
|
|
309
|
-
# If @header is not nil, it reads till the contents of @header
|
|
310
|
-
# comes at the head of a line.
|
|
311
|
-
# If correct FLATFILE_HEADER is found, returns true.
|
|
312
|
-
# Otherwise, returns nil.
|
|
313
|
-
def skip_leader
|
|
314
|
-
if @header then
|
|
315
|
-
data = ''
|
|
316
|
-
while s = @stream.gets(@header)
|
|
317
|
-
data << s
|
|
318
|
-
if data.split(/[\r\n]+/)[-1] == @header then
|
|
319
|
-
@stream.ungets(@header)
|
|
320
|
-
return true
|
|
321
|
-
end
|
|
322
|
-
end
|
|
323
|
-
# @header was not found. For safety,
|
|
324
|
-
# pushes back data with removing white spaces in the head.
|
|
325
|
-
data.sub(/\A\s+/, '')
|
|
326
|
-
@stream.ungets(data)
|
|
327
|
-
return nil
|
|
328
|
-
else
|
|
329
|
-
@stream.skip_spaces
|
|
330
|
-
return nil
|
|
331
|
-
end
|
|
332
|
-
end
|
|
333
|
-
|
|
334
|
-
# gets a entry
|
|
335
|
-
def get_entry
|
|
336
|
-
p0 = @entry_pos_flag ? @stream.pos : nil
|
|
337
|
-
e = @stream.gets(@delimiter)
|
|
338
|
-
if e and @delimiter_overrun then
|
|
339
|
-
if e[-@delimiter.size, @delimiter.size ] == @delimiter then
|
|
340
|
-
overrun = e[-@delimiter_overrun, @delimiter_overrun]
|
|
341
|
-
e[-@delimiter_overrun, @delimiter_overrun] = ''
|
|
342
|
-
@stream.ungets(overrun)
|
|
343
|
-
end
|
|
344
|
-
end
|
|
345
|
-
p1 = @entry_pos_flag ? @stream.pos : nil
|
|
346
|
-
@entry_start_pos = p0
|
|
347
|
-
@entry = e
|
|
348
|
-
@entry_ended_pos = p1
|
|
349
|
-
@entry
|
|
350
|
-
end
|
|
351
|
-
end #class Defalult
|
|
352
|
-
end #module Splitter
|
|
29
|
+
include Enumerable
|
|
353
30
|
|
|
354
31
|
#
|
|
355
32
|
# Bio::FlatFile.open(file, *arg)
|
|
@@ -605,13 +282,17 @@ module Bio
|
|
|
605
282
|
@skip_leader_mode == :everytime)
|
|
606
283
|
@splitter.skip_leader
|
|
607
284
|
end
|
|
608
|
-
|
|
285
|
+
if raw then
|
|
286
|
+
r = @splitter.get_entry
|
|
287
|
+
else
|
|
288
|
+
r = @splitter.get_parsed_entry
|
|
289
|
+
end
|
|
609
290
|
@firsttime_flag = false
|
|
610
291
|
return nil unless r
|
|
611
292
|
if raw then
|
|
612
293
|
r
|
|
613
294
|
else
|
|
614
|
-
|
|
295
|
+
@entry = r
|
|
615
296
|
@entry
|
|
616
297
|
end
|
|
617
298
|
end
|
|
@@ -660,7 +341,7 @@ module Bio
|
|
|
660
341
|
# Resets file pointer to the start of the flatfile.
|
|
661
342
|
# (similar to IO#rewind)
|
|
662
343
|
def rewind
|
|
663
|
-
r = @stream.rewind
|
|
344
|
+
r = (@splitter || @stream).rewind
|
|
664
345
|
@firsttime_flag = true
|
|
665
346
|
r
|
|
666
347
|
end
|
|
@@ -722,7 +403,12 @@ module Bio
|
|
|
722
403
|
begin
|
|
723
404
|
@splitter = @dbclass.flatfile_splitter(@dbclass, @stream)
|
|
724
405
|
rescue NameError, NoMethodError
|
|
725
|
-
|
|
406
|
+
begin
|
|
407
|
+
splitter_class = @dbclass::FLATFILE_SPLITTER
|
|
408
|
+
rescue NameError
|
|
409
|
+
splitter_class = Splitter::Default
|
|
410
|
+
end
|
|
411
|
+
@splitter = splitter_class.new(klass, @stream)
|
|
726
412
|
end
|
|
727
413
|
else
|
|
728
414
|
@dbclass = nil
|
|
@@ -775,528 +461,6 @@ module Bio
|
|
|
775
461
|
AutoDetect.default.autodetect(text)
|
|
776
462
|
end
|
|
777
463
|
|
|
778
|
-
|
|
779
|
-
# AutoDetect automatically determines database class of given data.
|
|
780
|
-
class AutoDetect
|
|
781
|
-
|
|
782
|
-
include TSort
|
|
783
|
-
|
|
784
|
-
# Array to store autodetection rules.
|
|
785
|
-
# This is defined only for inspect.
|
|
786
|
-
class RulesArray < Array
|
|
787
|
-
# visualize contents
|
|
788
|
-
def inspect
|
|
789
|
-
"[#{self.collect { |e| e.name.inspect }.join(' ')}]"
|
|
790
|
-
end
|
|
791
|
-
end #class RulesArray
|
|
792
|
-
|
|
793
|
-
# Template of a single rule of autodetection
|
|
794
|
-
class RuleTemplate
|
|
795
|
-
# Creates a new element.
|
|
796
|
-
def self.[](*arg)
|
|
797
|
-
self.new(*arg)
|
|
798
|
-
end
|
|
799
|
-
|
|
800
|
-
# Creates a new element.
|
|
801
|
-
def initialize
|
|
802
|
-
@higher_priority_elements = RulesArray.new
|
|
803
|
-
@lower_priority_elements = RulesArray.new
|
|
804
|
-
@name = nil
|
|
805
|
-
end
|
|
806
|
-
|
|
807
|
-
# self is prior to the _elem_.
|
|
808
|
-
def is_prior_to(elem)
|
|
809
|
-
return nil if self == elem
|
|
810
|
-
elem.higher_priority_elements << self
|
|
811
|
-
self.lower_priority_elements << elem
|
|
812
|
-
true
|
|
813
|
-
end
|
|
814
|
-
|
|
815
|
-
# higher priority elements
|
|
816
|
-
attr_reader :higher_priority_elements
|
|
817
|
-
# lower priority elements
|
|
818
|
-
attr_reader :lower_priority_elements
|
|
819
|
-
|
|
820
|
-
# database classes
|
|
821
|
-
attr_reader :dbclasses
|
|
822
|
-
|
|
823
|
-
# unique name of the element
|
|
824
|
-
attr_accessor :name
|
|
825
|
-
|
|
826
|
-
# If given text (and/or meta information) is known, returns
|
|
827
|
-
# the database class.
|
|
828
|
-
# Otherwise, returns nil or false.
|
|
829
|
-
#
|
|
830
|
-
# _text_ will be a String.
|
|
831
|
-
# _meta_ will be a Hash.
|
|
832
|
-
# _meta_ may contain following keys.
|
|
833
|
-
# :path => pathname, filename or uri.
|
|
834
|
-
def guess(text, meta)
|
|
835
|
-
nil
|
|
836
|
-
end
|
|
837
|
-
|
|
838
|
-
private
|
|
839
|
-
# Gets constant from constant name given as a string.
|
|
840
|
-
def str2const(str)
|
|
841
|
-
const = Object
|
|
842
|
-
str.split(/\:\:/).each do |x|
|
|
843
|
-
const = const.const_get(x)
|
|
844
|
-
end
|
|
845
|
-
const
|
|
846
|
-
end
|
|
847
|
-
|
|
848
|
-
# Gets database class from given object.
|
|
849
|
-
# Current implementation is:
|
|
850
|
-
# if _obj_ is kind of String, regarded as a constant.
|
|
851
|
-
# Otherwise, returns _obj_ as is.
|
|
852
|
-
def get_dbclass(obj)
|
|
853
|
-
obj.kind_of?(String) ? str2const(obj) : obj
|
|
854
|
-
end
|
|
855
|
-
end #class Rule_Template
|
|
856
|
-
|
|
857
|
-
# RuleDebug is a class for debugging autodetect classes/methods
|
|
858
|
-
class RuleDebug < RuleTemplate
|
|
859
|
-
# Creates a new instance.
|
|
860
|
-
def initialize(name)
|
|
861
|
-
super()
|
|
862
|
-
@name = name
|
|
863
|
-
end
|
|
864
|
-
|
|
865
|
-
# prints information to the $stderr.
|
|
866
|
-
def guess(text, meta)
|
|
867
|
-
$stderr.puts @name
|
|
868
|
-
$stderr.puts text.inspect
|
|
869
|
-
$stderr.puts meta.inspect
|
|
870
|
-
nil
|
|
871
|
-
end
|
|
872
|
-
end #class RuleDebug
|
|
873
|
-
|
|
874
|
-
# Special element that is always top or bottom priority.
|
|
875
|
-
class RuleSpecial < RuleTemplate
|
|
876
|
-
def initialize(name)
|
|
877
|
-
#super()
|
|
878
|
-
@name = name
|
|
879
|
-
end
|
|
880
|
-
# modification of @name is inhibited.
|
|
881
|
-
def name=(x)
|
|
882
|
-
raise 'cannot modify name'
|
|
883
|
-
end
|
|
884
|
-
|
|
885
|
-
# always returns void array
|
|
886
|
-
def higher_priority_elements
|
|
887
|
-
[]
|
|
888
|
-
end
|
|
889
|
-
# always returns void array
|
|
890
|
-
def lower_priority_elements
|
|
891
|
-
[]
|
|
892
|
-
end
|
|
893
|
-
end #class RuleSpecial
|
|
894
|
-
|
|
895
|
-
# Special element that is always top priority.
|
|
896
|
-
TopRule = RuleSpecial.new('top')
|
|
897
|
-
# Special element that is always bottom priority.
|
|
898
|
-
BottomRule = RuleSpecial.new('bottom')
|
|
899
|
-
|
|
900
|
-
# A autodetection rule to use a regular expression
|
|
901
|
-
class RuleRegexp < RuleTemplate
|
|
902
|
-
# Creates a new instance.
|
|
903
|
-
def initialize(dbclass, re)
|
|
904
|
-
super()
|
|
905
|
-
@re = re
|
|
906
|
-
@name = dbclass.to_s
|
|
907
|
-
@dbclass = nil
|
|
908
|
-
@dbclass_lazy = dbclass
|
|
909
|
-
end
|
|
910
|
-
|
|
911
|
-
# database class (lazy evaluation)
|
|
912
|
-
def dbclass
|
|
913
|
-
unless @dbclass
|
|
914
|
-
@dbclass = get_dbclass(@dbclass_lazy)
|
|
915
|
-
end
|
|
916
|
-
@dbclass
|
|
917
|
-
end
|
|
918
|
-
private :dbclass
|
|
919
|
-
|
|
920
|
-
# returns database classes
|
|
921
|
-
def dbclasses
|
|
922
|
-
[ dbclass ]
|
|
923
|
-
end
|
|
924
|
-
|
|
925
|
-
# If given text matches the regexp, returns the database class.
|
|
926
|
-
# Otherwise, returns nil or false.
|
|
927
|
-
# _meta_ is ignored.
|
|
928
|
-
def guess(text, meta)
|
|
929
|
-
@re =~ text ? dbclass : nil
|
|
930
|
-
end
|
|
931
|
-
end #class RuleRegexp
|
|
932
|
-
|
|
933
|
-
# A autodetection rule to use more than two regular expressions.
|
|
934
|
-
# If given string matches one of the regular expressions,
|
|
935
|
-
# returns the database class.
|
|
936
|
-
class RuleRegexp2 < RuleRegexp
|
|
937
|
-
# Creates a new instance.
|
|
938
|
-
def initialize(dbclass, *regexps)
|
|
939
|
-
super(dbclass, nil)
|
|
940
|
-
@regexps = regexps
|
|
941
|
-
end
|
|
942
|
-
|
|
943
|
-
# If given text matches one of the regexp, returns the database class.
|
|
944
|
-
# Otherwise, returns nil or false.
|
|
945
|
-
# _meta_ is ignored.
|
|
946
|
-
def guess(text, meta)
|
|
947
|
-
@regexps.each do |re|
|
|
948
|
-
return dbclass if re =~ text
|
|
949
|
-
end
|
|
950
|
-
nil
|
|
951
|
-
end
|
|
952
|
-
end #class RuleRegexp
|
|
953
|
-
|
|
954
|
-
# A autodetection rule that passes data to the proc object.
|
|
955
|
-
class RuleProc < RuleTemplate
|
|
956
|
-
# Creates a new instance.
|
|
957
|
-
def initialize(*dbclasses, &proc)
|
|
958
|
-
super()
|
|
959
|
-
@proc = proc
|
|
960
|
-
@dbclasses = nil
|
|
961
|
-
@dbclasses_lazy = dbclasses
|
|
962
|
-
@name = dbclasses.collect { |x| x.to_s }.join('|')
|
|
963
|
-
end
|
|
964
|
-
|
|
965
|
-
# database classes (lazy evaluation)
|
|
966
|
-
def dbclasses
|
|
967
|
-
unless @dbclasses
|
|
968
|
-
@dbclasses = @dbclasses_lazy.collect { |x| get_dbclass(x) }
|
|
969
|
-
end
|
|
970
|
-
@dbclasses
|
|
971
|
-
end
|
|
972
|
-
|
|
973
|
-
# If given text (and/or meta information) is known, returns
|
|
974
|
-
# the database class.
|
|
975
|
-
# Otherwise, returns nil or false.
|
|
976
|
-
#
|
|
977
|
-
# Refer RuleTemplate#guess for _meta_.
|
|
978
|
-
def guess(text, meta)
|
|
979
|
-
@proc.call(text)
|
|
980
|
-
end
|
|
981
|
-
end #class RuleProc
|
|
982
|
-
|
|
983
|
-
# Creates a new Autodetect object
|
|
984
|
-
def initialize
|
|
985
|
-
# stores autodetection rules.
|
|
986
|
-
@rules = Hash.new
|
|
987
|
-
# stores elements (cache)
|
|
988
|
-
@elements = nil
|
|
989
|
-
self.add(TopRule)
|
|
990
|
-
self.add(BottomRule)
|
|
991
|
-
end
|
|
992
|
-
|
|
993
|
-
# Adds a new element.
|
|
994
|
-
# Returns _elem_.
|
|
995
|
-
def add(elem)
|
|
996
|
-
raise 'element name conflicts' if @rules[elem.name]
|
|
997
|
-
@elements = nil
|
|
998
|
-
@rules[elem.name] = elem
|
|
999
|
-
elem
|
|
1000
|
-
end
|
|
1001
|
-
|
|
1002
|
-
# (required by TSort.)
|
|
1003
|
-
# For all elements, yields each element.
|
|
1004
|
-
def tsort_each_node(&x)
|
|
1005
|
-
@rules.each_value(&x)
|
|
1006
|
-
end
|
|
1007
|
-
|
|
1008
|
-
# (required by TSort.)
|
|
1009
|
-
# For a given element, yields each child
|
|
1010
|
-
# (= lower priority elements) of the element.
|
|
1011
|
-
def tsort_each_child(elem)
|
|
1012
|
-
if elem == TopRule then
|
|
1013
|
-
@rules.each_value do |e|
|
|
1014
|
-
yield e unless e == TopRule or
|
|
1015
|
-
e.lower_priority_elements.index(TopRule)
|
|
1016
|
-
end
|
|
1017
|
-
elsif elem == BottomRule then
|
|
1018
|
-
@rules.each_value do |e|
|
|
1019
|
-
yield e if e.higher_priority_elements.index(BottomRule)
|
|
1020
|
-
end
|
|
1021
|
-
else
|
|
1022
|
-
elem.lower_priority_elements.each do |e|
|
|
1023
|
-
yield e if e != BottomRule
|
|
1024
|
-
end
|
|
1025
|
-
unless elem.higher_priority_elements.index(BottomRule)
|
|
1026
|
-
yield BottomRule
|
|
1027
|
-
end
|
|
1028
|
-
end
|
|
1029
|
-
end
|
|
1030
|
-
|
|
1031
|
-
# Returns current elements as an array
|
|
1032
|
-
# whose order fulfills all elements' priorities.
|
|
1033
|
-
def elements
|
|
1034
|
-
unless @elements
|
|
1035
|
-
ary = tsort
|
|
1036
|
-
ary.reverse!
|
|
1037
|
-
@elements = ary
|
|
1038
|
-
end
|
|
1039
|
-
@elements
|
|
1040
|
-
end
|
|
1041
|
-
|
|
1042
|
-
# rebuilds the object and clears internal cache.
|
|
1043
|
-
def rehash
|
|
1044
|
-
@rules.rehash
|
|
1045
|
-
@elements = nil
|
|
1046
|
-
end
|
|
1047
|
-
|
|
1048
|
-
# visualizes the object (mainly for debug)
|
|
1049
|
-
def inspect
|
|
1050
|
-
"<#{self.class.to_s} " +
|
|
1051
|
-
self.elements.collect { |e| e.name.inspect }.join(' ') +
|
|
1052
|
-
">"
|
|
1053
|
-
end
|
|
1054
|
-
|
|
1055
|
-
# Iterates over each element.
|
|
1056
|
-
def each_rule(&x) #:yields: elem
|
|
1057
|
-
elements.each(&x)
|
|
1058
|
-
end
|
|
1059
|
-
|
|
1060
|
-
# Autodetect from the text.
|
|
1061
|
-
# Returns a database class if succeeded.
|
|
1062
|
-
# Returns nil if failed.
|
|
1063
|
-
def autodetect(text, meta = {})
|
|
1064
|
-
r = nil
|
|
1065
|
-
elements.each do |e|
|
|
1066
|
-
#$stderr.puts e.name
|
|
1067
|
-
r = e.guess(text, meta)
|
|
1068
|
-
break if r
|
|
1069
|
-
end
|
|
1070
|
-
r
|
|
1071
|
-
end
|
|
1072
|
-
|
|
1073
|
-
# autodetect from the FlatFile object.
|
|
1074
|
-
# Returns a database class if succeeded.
|
|
1075
|
-
# Returns nil if failed.
|
|
1076
|
-
def autodetect_flatfile(ff, lines = 31)
|
|
1077
|
-
meta = {}
|
|
1078
|
-
stream = ff.instance_eval { @stream }
|
|
1079
|
-
begin
|
|
1080
|
-
path = stream.path
|
|
1081
|
-
rescue NameError
|
|
1082
|
-
end
|
|
1083
|
-
if path then
|
|
1084
|
-
meta[:path] = path
|
|
1085
|
-
# call autodetect onece with meta and without any read action
|
|
1086
|
-
if r = self.autodetect(stream.prefetch_buffer, meta)
|
|
1087
|
-
return r
|
|
1088
|
-
end
|
|
1089
|
-
end
|
|
1090
|
-
# reading stream
|
|
1091
|
-
1.upto(lines) do |x|
|
|
1092
|
-
break unless line = stream.prefetch_gets
|
|
1093
|
-
if line.strip.size > 0 then
|
|
1094
|
-
if r = self.autodetect(stream.prefetch_buffer, meta)
|
|
1095
|
-
return r
|
|
1096
|
-
end
|
|
1097
|
-
end
|
|
1098
|
-
end
|
|
1099
|
-
return nil
|
|
1100
|
-
end
|
|
1101
|
-
|
|
1102
|
-
# default autodetect object for class method
|
|
1103
|
-
@default = nil
|
|
1104
|
-
|
|
1105
|
-
# returns the default autodetect object
|
|
1106
|
-
def self.default
|
|
1107
|
-
unless @default then
|
|
1108
|
-
@default = self.make_default
|
|
1109
|
-
end
|
|
1110
|
-
@default
|
|
1111
|
-
end
|
|
1112
|
-
|
|
1113
|
-
# sets the default autodetect object.
|
|
1114
|
-
def self.default=(ad)
|
|
1115
|
-
@default = ad
|
|
1116
|
-
end
|
|
1117
|
-
|
|
1118
|
-
# make a new autodetect object
|
|
1119
|
-
def self.[](*arg)
|
|
1120
|
-
a = self.new
|
|
1121
|
-
arg.each { |e| a.add(e) }
|
|
1122
|
-
a
|
|
1123
|
-
end
|
|
1124
|
-
|
|
1125
|
-
# make a default of default autodetect object
|
|
1126
|
-
def self.make_default
|
|
1127
|
-
a = self[
|
|
1128
|
-
genbank = RuleRegexp[ 'Bio::GenBank',
|
|
1129
|
-
/^LOCUS .+ bp .*[a-z]*[DR]?NA/ ],
|
|
1130
|
-
genpept = RuleRegexp[ 'Bio::GenPept',
|
|
1131
|
-
/^LOCUS .+ aa .+/ ],
|
|
1132
|
-
medline = RuleRegexp[ 'Bio::MEDLINE',
|
|
1133
|
-
/^PMID\- [0-9]+$/ ],
|
|
1134
|
-
embl = RuleRegexp[ 'Bio::EMBL',
|
|
1135
|
-
/^ID .+\; .*(DNA|RNA|XXX)\;/ ],
|
|
1136
|
-
sptr = RuleRegexp2[ 'Bio::SPTR',
|
|
1137
|
-
/^ID .+\; *PRT\;/,
|
|
1138
|
-
/^ID [-A-Za-z0-9_\.]+ .+\; *[0-9]+ *AA\./ ],
|
|
1139
|
-
prosite = RuleRegexp[ 'Bio::PROSITE',
|
|
1140
|
-
/^ID [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
|
|
1141
|
-
transfac = RuleRegexp[ 'Bio::TRANSFAC',
|
|
1142
|
-
/^AC [-A-Za-z0-9_\.]+$/ ],
|
|
1143
|
-
|
|
1144
|
-
aaindex = RuleProc.new('Bio::AAindex1', 'Bio::AAindex2') do |text|
|
|
1145
|
-
if /^H [-A-Z0-9_\.]+$/ =~ text then
|
|
1146
|
-
if text =~ /^M [rc]/ then
|
|
1147
|
-
Bio::AAindex2
|
|
1148
|
-
elsif text =~ /^I A\/L/ then
|
|
1149
|
-
Bio::AAindex1
|
|
1150
|
-
else
|
|
1151
|
-
false #fail to determine
|
|
1152
|
-
end
|
|
1153
|
-
else
|
|
1154
|
-
nil
|
|
1155
|
-
end
|
|
1156
|
-
end,
|
|
1157
|
-
|
|
1158
|
-
litdb = RuleRegexp[ 'Bio::LITDB',
|
|
1159
|
-
/^CODE [0-9]+$/ ],
|
|
1160
|
-
brite = RuleRegexp[ 'Bio::KEGG::BRITE',
|
|
1161
|
-
/^Entry [A-Z0-9]+/ ],
|
|
1162
|
-
orthology = RuleRegexp[ 'Bio::KEGG::ORTHOLOGY',
|
|
1163
|
-
/^ENTRY .+ KO\s*/ ],
|
|
1164
|
-
drug = RuleRegexp[ 'Bio::KEGG::DRUG',
|
|
1165
|
-
/^ENTRY .+ Drug\s*/ ],
|
|
1166
|
-
glycan = RuleRegexp[ 'Bio::KEGG::GLYCAN',
|
|
1167
|
-
/^ENTRY .+ Glycan\s*/ ],
|
|
1168
|
-
enzyme = RuleRegexp2[ 'Bio::KEGG::ENZYME',
|
|
1169
|
-
/^ENTRY EC [0-9\.]+$/,
|
|
1170
|
-
/^ENTRY .+ Enzyme\s*/
|
|
1171
|
-
],
|
|
1172
|
-
compound = RuleRegexp2[ 'Bio::KEGG::COMPOUND',
|
|
1173
|
-
/^ENTRY C[A-Za-z0-9\._]+$/,
|
|
1174
|
-
/^ENTRY .+ Compound\s*/
|
|
1175
|
-
],
|
|
1176
|
-
reaction = RuleRegexp2[ 'Bio::KEGG::REACTION',
|
|
1177
|
-
/^ENTRY R[A-Za-z0-9\._]+$/,
|
|
1178
|
-
/^ENTRY .+ Reaction\s*/
|
|
1179
|
-
],
|
|
1180
|
-
genes = RuleRegexp[ 'Bio::KEGG::GENES',
|
|
1181
|
-
/^ENTRY .+ (CDS|gene|.*RNA|Contig) / ],
|
|
1182
|
-
genome = RuleRegexp[ 'Bio::KEGG::GENOME',
|
|
1183
|
-
/^ENTRY [a-z]+$/ ],
|
|
1184
|
-
|
|
1185
|
-
fantom = RuleProc.new('Bio::FANTOM::MaXML::Cluster',
|
|
1186
|
-
'Bio::FANTOM::MaXML::Sequence') do |text|
|
|
1187
|
-
if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
|
|
1188
|
-
case $1
|
|
1189
|
-
when 'clusters'
|
|
1190
|
-
Bio::FANTOM::MaXML::Cluster
|
|
1191
|
-
when 'sequences'
|
|
1192
|
-
Bio::FANTOM::MaXML::Sequence
|
|
1193
|
-
else
|
|
1194
|
-
nil #unknown
|
|
1195
|
-
end
|
|
1196
|
-
else
|
|
1197
|
-
nil
|
|
1198
|
-
end
|
|
1199
|
-
end,
|
|
1200
|
-
|
|
1201
|
-
pdb = RuleRegexp[ 'Bio::PDB',
|
|
1202
|
-
/^HEADER .{40}\d\d\-[A-Z]{3}\-\d\d [0-9A-Z]{4}/ ],
|
|
1203
|
-
het = RuleRegexp[ 'Bio::PDB::ChemicalComponent',
|
|
1204
|
-
/^RESIDUE +.+ +\d+\s*$/ ],
|
|
1205
|
-
|
|
1206
|
-
clustal = RuleRegexp2[ 'Bio::ClustalW::Report',
|
|
1207
|
-
/^CLUSTAL .*\(.*\).*sequence +alignment/,
|
|
1208
|
-
/^CLUSTAL FORMAT for T-COFFEE/ ],
|
|
1209
|
-
|
|
1210
|
-
gcg_msf = RuleRegexp[ 'Bio::GCG::Msf',
|
|
1211
|
-
/^!!(N|A)A_MULTIPLE_ALIGNMENT .+/ ],
|
|
1212
|
-
|
|
1213
|
-
gcg_seq = RuleRegexp[ 'Bio::GCG::Seq',
|
|
1214
|
-
/^!!(N|A)A_SEQUENCE .+/ ],
|
|
1215
|
-
|
|
1216
|
-
blastxml = RuleRegexp[ 'Bio::Blast::Report',
|
|
1217
|
-
/\<\!DOCTYPE BlastOutput PUBLIC / ],
|
|
1218
|
-
wublast = RuleRegexp[ 'Bio::Blast::WU::Report',
|
|
1219
|
-
/^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
|
|
1220
|
-
wutblast = RuleRegexp[ 'Bio::Blast::WU::Report_TBlast',
|
|
1221
|
-
/^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
|
|
1222
|
-
blast = RuleRegexp[ 'Bio::Blast::Default::Report',
|
|
1223
|
-
/^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
|
|
1224
|
-
tblast = RuleRegexp[ 'Bio::Blast::Default::Report_TBlast',
|
|
1225
|
-
/^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
|
|
1226
|
-
|
|
1227
|
-
blat = RuleRegexp[ 'Bio::Blat::Report',
|
|
1228
|
-
/^psLayout version \d+/ ],
|
|
1229
|
-
spidey = RuleRegexp[ 'Bio::Spidey::Report',
|
|
1230
|
-
/^\-\-SPIDEY version .+\-\-$/ ],
|
|
1231
|
-
hmmer = RuleRegexp[ 'Bio::HMMER::Report',
|
|
1232
|
-
/^HMMER +\d+\./ ],
|
|
1233
|
-
sim4 = RuleRegexp[ 'Bio::Sim4::Report',
|
|
1234
|
-
/^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
|
|
1235
|
-
|
|
1236
|
-
fastaformat = RuleProc.new('Bio::FastaFormat',
|
|
1237
|
-
'Bio::NBRF',
|
|
1238
|
-
'Bio::FastaNumericFormat') do |text|
|
|
1239
|
-
if /^>.+$/ =~ text
|
|
1240
|
-
case text
|
|
1241
|
-
when /^>([PF]1|[DR][LC]|N[13]|XX)\;.+/
|
|
1242
|
-
Bio::NBRF
|
|
1243
|
-
when /^>.+$\s+(^\#.*$\s*)*^\s*\d*\s*[-a-zA-Z_\.\[\]\(\)\*\+\$]+/
|
|
1244
|
-
Bio::FastaFormat
|
|
1245
|
-
when /^>.+$\s+^\s*\d+(\s+\d+)*\s*$/
|
|
1246
|
-
Bio::FastaNumericFormat
|
|
1247
|
-
else
|
|
1248
|
-
false
|
|
1249
|
-
end
|
|
1250
|
-
else
|
|
1251
|
-
nil
|
|
1252
|
-
end
|
|
1253
|
-
end
|
|
1254
|
-
]
|
|
1255
|
-
|
|
1256
|
-
# dependencies
|
|
1257
|
-
# NCBI
|
|
1258
|
-
genbank.is_prior_to genpept
|
|
1259
|
-
# EMBL/UniProt
|
|
1260
|
-
embl.is_prior_to sptr
|
|
1261
|
-
sptr.is_prior_to prosite
|
|
1262
|
-
prosite.is_prior_to transfac
|
|
1263
|
-
# KEGG
|
|
1264
|
-
#aaindex.is_prior_to litdb
|
|
1265
|
-
#litdb.is_prior_to brite
|
|
1266
|
-
brite.is_prior_to orthology
|
|
1267
|
-
orthology.is_prior_to drug
|
|
1268
|
-
drug.is_prior_to glycan
|
|
1269
|
-
glycan.is_prior_to enzyme
|
|
1270
|
-
enzyme.is_prior_to compound
|
|
1271
|
-
compound.is_prior_to reaction
|
|
1272
|
-
reaction.is_prior_to genes
|
|
1273
|
-
genes.is_prior_to genome
|
|
1274
|
-
# PDB
|
|
1275
|
-
pdb.is_prior_to het
|
|
1276
|
-
# BLAST
|
|
1277
|
-
wublast.is_prior_to wutblast
|
|
1278
|
-
wutblast.is_prior_to blast
|
|
1279
|
-
blast.is_prior_to tblast
|
|
1280
|
-
# FastaFormat
|
|
1281
|
-
BottomRule.is_prior_to(fastaformat)
|
|
1282
|
-
|
|
1283
|
-
# for debug
|
|
1284
|
-
#debug_first = RuleDebug.new('debug_first')
|
|
1285
|
-
#a.add(debug_first)
|
|
1286
|
-
#debug_first.is_prior_to(TopRule)
|
|
1287
|
-
|
|
1288
|
-
## for debug
|
|
1289
|
-
#debug_last = RuleDebug.new('debug_last')
|
|
1290
|
-
#a.add(debug_last)
|
|
1291
|
-
#BottomRule.is_prior_to(debug_last)
|
|
1292
|
-
#fastaformat.is_prior_to(debug_last)
|
|
1293
|
-
|
|
1294
|
-
a.rehash
|
|
1295
|
-
return a
|
|
1296
|
-
end
|
|
1297
|
-
|
|
1298
|
-
end #class AutoDetect
|
|
1299
|
-
|
|
1300
464
|
end #class FlatFile
|
|
1301
465
|
|
|
1302
466
|
end #module Bio
|