bio 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +14 -122
- data/bin/br_biofetch.rb +2 -2
- data/bin/br_bioflat.rb +2 -2
- data/bin/br_biogetseq.rb +2 -2
- data/bin/br_pmfetch.rb +3 -3
- data/doc/Changes-0.7.rd +77 -0
- data/doc/KEGG_API.rd +523 -232
- data/doc/KEGG_API.rd.ja +529 -207
- data/doc/Tutorial.rd +48 -11
- data/lib/bio.rb +59 -6
- data/lib/bio/alignment.rb +713 -103
- data/lib/bio/appl/bl2seq/report.rb +2 -18
- data/lib/bio/appl/blast.rb +108 -91
- data/lib/bio/appl/blast/format0.rb +33 -18
- data/lib/bio/appl/blast/format8.rb +6 -20
- data/lib/bio/appl/blast/report.rb +293 -429
- data/lib/bio/appl/blast/rexml.rb +8 -22
- data/lib/bio/appl/blast/wublast.rb +21 -12
- data/lib/bio/appl/blast/xmlparser.rb +180 -183
- data/lib/bio/appl/blat/report.rb +127 -30
- data/lib/bio/appl/clustalw.rb +87 -59
- data/lib/bio/appl/clustalw/report.rb +20 -22
- data/lib/bio/appl/emboss.rb +113 -20
- data/lib/bio/appl/fasta.rb +173 -198
- data/lib/bio/appl/fasta/format10.rb +244 -347
- data/lib/bio/appl/gcg/msf.rb +212 -0
- data/lib/bio/appl/gcg/seq.rb +195 -0
- data/lib/bio/appl/genscan/report.rb +5 -23
- data/lib/bio/appl/hmmer.rb +8 -45
- data/lib/bio/appl/hmmer/report.rb +2 -20
- data/lib/bio/appl/iprscan/report.rb +374 -0
- data/lib/bio/appl/mafft.rb +87 -50
- data/lib/bio/appl/mafft/report.rb +151 -44
- data/lib/bio/appl/muscle.rb +52 -0
- data/lib/bio/appl/phylip/alignment.rb +129 -0
- data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
- data/lib/bio/appl/probcons.rb +41 -0
- data/lib/bio/appl/psort.rb +89 -96
- data/lib/bio/appl/psort/report.rb +6 -22
- data/lib/bio/appl/pts1.rb +263 -0
- data/lib/bio/appl/sim4.rb +26 -36
- data/lib/bio/appl/sim4/report.rb +2 -18
- data/lib/bio/appl/sosui/report.rb +5 -20
- data/lib/bio/appl/spidey/report.rb +2 -2
- data/lib/bio/appl/targetp/report.rb +4 -20
- data/lib/bio/appl/tcoffee.rb +55 -0
- data/lib/bio/appl/tmhmm/report.rb +4 -20
- data/lib/bio/command.rb +235 -64
- data/lib/bio/data/aa.rb +21 -26
- data/lib/bio/data/codontable.rb +2 -20
- data/lib/bio/data/na.rb +19 -4
- data/lib/bio/db.rb +27 -12
- data/lib/bio/db/aaindex.rb +2 -20
- data/lib/bio/db/embl/common.rb +4 -21
- data/lib/bio/db/embl/embl.rb +33 -85
- data/lib/bio/db/embl/sptr.rb +612 -302
- data/lib/bio/db/embl/swissprot.rb +10 -29
- data/lib/bio/db/embl/trembl.rb +10 -29
- data/lib/bio/db/embl/uniprot.rb +10 -29
- data/lib/bio/db/fantom.rb +15 -20
- data/lib/bio/db/fasta.rb +3 -3
- data/lib/bio/db/genbank/common.rb +37 -46
- data/lib/bio/db/genbank/ddbj.rb +6 -18
- data/lib/bio/db/genbank/genbank.rb +47 -186
- data/lib/bio/db/genbank/genpept.rb +4 -17
- data/lib/bio/db/genbank/refseq.rb +4 -17
- data/lib/bio/db/gff.rb +103 -35
- data/lib/bio/db/go.rb +4 -20
- data/lib/bio/db/kegg/brite.rb +26 -36
- data/lib/bio/db/kegg/compound.rb +81 -85
- data/lib/bio/db/kegg/drug.rb +98 -0
- data/lib/bio/db/kegg/enzyme.rb +133 -110
- data/lib/bio/db/kegg/expression.rb +2 -20
- data/lib/bio/db/kegg/genes.rb +208 -238
- data/lib/bio/db/kegg/genome.rb +164 -285
- data/lib/bio/db/kegg/glycan.rb +114 -157
- data/lib/bio/db/kegg/keggtab.rb +242 -303
- data/lib/bio/db/kegg/kgml.rb +117 -160
- data/lib/bio/db/kegg/orthology.rb +112 -0
- data/lib/bio/db/kegg/reaction.rb +54 -69
- data/lib/bio/db/kegg/taxonomy.rb +331 -0
- data/lib/bio/db/lasergene.rb +209 -0
- data/lib/bio/db/litdb.rb +3 -27
- data/lib/bio/db/medline.rb +228 -249
- data/lib/bio/db/nbrf.rb +3 -3
- data/lib/bio/db/newick.rb +510 -0
- data/lib/bio/db/nexus.rb +1854 -0
- data/lib/bio/db/pdb.rb +5 -17
- data/lib/bio/db/pdb/atom.rb +2 -18
- data/lib/bio/db/pdb/chain.rb +2 -18
- data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
- data/lib/bio/db/pdb/model.rb +2 -18
- data/lib/bio/db/pdb/pdb.rb +73 -34
- data/lib/bio/db/pdb/residue.rb +4 -20
- data/lib/bio/db/pdb/utils.rb +2 -18
- data/lib/bio/db/prosite.rb +403 -422
- data/lib/bio/db/rebase.rb +84 -40
- data/lib/bio/db/soft.rb +404 -0
- data/lib/bio/db/transfac.rb +5 -17
- data/lib/bio/feature.rb +106 -52
- data/lib/bio/io/das.rb +32 -42
- data/lib/bio/io/dbget.rb +2 -20
- data/lib/bio/io/ddbjxml.rb +77 -138
- data/lib/bio/io/ebisoap.rb +158 -0
- data/lib/bio/io/ensembl.rb +229 -0
- data/lib/bio/io/fastacmd.rb +89 -82
- data/lib/bio/io/fetch.rb +163 -96
- data/lib/bio/io/flatfile.rb +170 -73
- data/lib/bio/io/flatfile/bdb.rb +3 -16
- data/lib/bio/io/flatfile/index.rb +2 -2
- data/lib/bio/io/flatfile/indexer.rb +3 -2
- data/lib/bio/io/higet.rb +12 -31
- data/lib/bio/io/keggapi.rb +210 -269
- data/lib/bio/io/ncbisoap.rb +155 -0
- data/lib/bio/io/pubmed.rb +169 -147
- data/lib/bio/io/registry.rb +4 -20
- data/lib/bio/io/soapwsdl.rb +43 -38
- data/lib/bio/io/sql.rb +242 -305
- data/lib/bio/location.rb +407 -285
- data/lib/bio/map.rb +410 -0
- data/lib/bio/pathway.rb +558 -695
- data/lib/bio/reference.rb +272 -75
- data/lib/bio/sequence.rb +255 -13
- data/lib/bio/sequence/aa.rb +71 -10
- data/lib/bio/sequence/common.rb +187 -33
- data/lib/bio/sequence/compat.rb +59 -4
- data/lib/bio/sequence/format.rb +54 -7
- data/lib/bio/sequence/generic.rb +3 -3
- data/lib/bio/sequence/na.rb +328 -26
- data/lib/bio/shell.rb +11 -4
- data/lib/bio/shell/core.rb +221 -160
- data/lib/bio/shell/demo.rb +18 -15
- data/lib/bio/shell/interface.rb +14 -12
- data/lib/bio/shell/irb.rb +95 -0
- data/lib/bio/shell/object.rb +45 -26
- data/lib/bio/shell/plugin/blast.rb +42 -0
- data/lib/bio/shell/plugin/codon.rb +22 -14
- data/lib/bio/shell/plugin/das.rb +58 -0
- data/lib/bio/shell/plugin/emboss.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +22 -11
- data/lib/bio/shell/plugin/flatfile.rb +2 -2
- data/lib/bio/shell/plugin/keggapi.rb +13 -6
- data/lib/bio/shell/plugin/midi.rb +4 -4
- data/lib/bio/shell/plugin/obda.rb +2 -2
- data/lib/bio/shell/plugin/psort.rb +56 -0
- data/lib/bio/shell/plugin/seq.rb +35 -8
- data/lib/bio/shell/plugin/soap.rb +87 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
- data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
- data/lib/bio/shell/script.rb +25 -0
- data/lib/bio/shell/setup.rb +109 -0
- data/lib/bio/shell/web.rb +70 -58
- data/lib/bio/tree.rb +850 -0
- data/lib/bio/util/color_scheme.rb +84 -107
- data/lib/bio/util/color_scheme/buried.rb +5 -24
- data/lib/bio/util/color_scheme/helix.rb +5 -24
- data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
- data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
- data/lib/bio/util/color_scheme/strand.rb +5 -24
- data/lib/bio/util/color_scheme/taylor.rb +5 -24
- data/lib/bio/util/color_scheme/turn.rb +5 -24
- data/lib/bio/util/color_scheme/zappo.rb +5 -24
- data/lib/bio/util/contingency_table.rb +70 -43
- data/lib/bio/util/restriction_enzyme.rb +228 -0
- data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
- data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
- data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
- data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
- data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
- data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
- data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
- data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
- data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
- data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
- data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
- data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
- data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
- data/lib/bio/util/sirna.rb +4 -22
- data/sample/color_scheme_na.rb +4 -12
- data/sample/enzymes.rb +78 -0
- data/sample/goslim.rb +5 -13
- data/sample/psortplot_html.rb +4 -12
- data/test/data/blast/2.2.15.blastp.m7 +876 -0
- data/test/data/embl/AB090716.embl.rel89 +63 -0
- data/test/data/fasta/example1.txt +75 -0
- data/test/data/fasta/example2.txt +21 -0
- data/test/data/iprscan/merged.raw +32 -0
- data/test/data/iprscan/merged.txt +74 -0
- data/test/data/soft/GDS100_partial.soft +92 -0
- data/test/data/soft/GSE3457_family_partial.soft +874 -0
- data/test/functional/bio/io/test_ensembl.rb +103 -0
- data/test/functional/bio/io/test_soapwsdl.rb +5 -17
- data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
- data/test/unit/bio/appl/blast/test_report.rb +3 -16
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
- data/test/unit/bio/appl/genscan/test_report.rb +3 -16
- data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
- data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
- data/test/unit/bio/appl/mafft/test_report.rb +63 -0
- data/test/unit/bio/appl/sosui/test_report.rb +3 -16
- data/test/unit/bio/appl/targetp/test_report.rb +3 -16
- data/test/unit/bio/appl/test_blast.rb +3 -16
- data/test/unit/bio/appl/test_fasta.rb +4 -16
- data/test/unit/bio/appl/test_pts1.rb +140 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
- data/test/unit/bio/data/test_aa.rb +4 -17
- data/test/unit/bio/data/test_codontable.rb +3 -16
- data/test/unit/bio/data/test_na.rb +3 -3
- data/test/unit/bio/db/embl/test_common.rb +3 -16
- data/test/unit/bio/db/embl/test_embl.rb +3 -16
- data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
- data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
- data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
- data/test/unit/bio/db/kegg/test_genes.rb +3 -16
- data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
- data/test/unit/bio/db/test_aaindex.rb +2 -2
- data/test/unit/bio/db/test_fasta.rb +3 -16
- data/test/unit/bio/db/test_gff.rb +3 -16
- data/test/unit/bio/db/test_lasergene.rb +95 -0
- data/test/unit/bio/db/test_newick.rb +56 -0
- data/test/unit/bio/db/test_nexus.rb +360 -0
- data/test/unit/bio/db/test_prosite.rb +5 -18
- data/test/unit/bio/db/test_rebase.rb +11 -25
- data/test/unit/bio/db/test_soft.rb +138 -0
- data/test/unit/bio/io/test_ddbjxml.rb +5 -17
- data/test/unit/bio/io/test_ensembl.rb +109 -0
- data/test/unit/bio/io/test_fastacmd.rb +3 -16
- data/test/unit/bio/io/test_flatfile.rb +237 -0
- data/test/unit/bio/io/test_soapwsdl.rb +4 -17
- data/test/unit/bio/sequence/test_aa.rb +3 -3
- data/test/unit/bio/sequence/test_common.rb +3 -16
- data/test/unit/bio/sequence/test_compat.rb +3 -16
- data/test/unit/bio/sequence/test_na.rb +29 -3
- data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
- data/test/unit/bio/test_alignment.rb +16 -27
- data/test/unit/bio/test_command.rb +242 -25
- data/test/unit/bio/test_db.rb +3 -16
- data/test/unit/bio/test_feature.rb +4 -16
- data/test/unit/bio/test_location.rb +4 -16
- data/test/unit/bio/test_map.rb +230 -0
- data/test/unit/bio/test_pathway.rb +4 -16
- data/test/unit/bio/test_reference.rb +2 -2
- data/test/unit/bio/test_sequence.rb +7 -19
- data/test/unit/bio/test_shell.rb +3 -16
- data/test/unit/bio/test_tree.rb +593 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
- data/test/unit/bio/util/test_color_scheme.rb +6 -18
- data/test/unit/bio/util/test_contingency_table.rb +6 -18
- data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
- data/test/unit/bio/util/test_sirna.rb +3 -16
- metadata +228 -169
- data/doc/BioRuby.rd.ja +0 -225
- data/doc/Design.rd.ja +0 -341
- data/doc/TODO.rd.ja +0 -138
- data/lib/bio/appl/fasta/format6.rb +0 -37
- data/lib/bio/db/kegg/cell.rb +0 -88
- data/lib/bio/db/kegg/ko.rb +0 -178
- data/lib/bio/shell/rails/Rakefile +0 -10
- data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
- data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
- data/lib/bio/shell/rails/config/boot.rb +0 -19
- data/lib/bio/shell/rails/config/database.yml +0 -85
- data/lib/bio/shell/rails/config/environment.rb +0 -53
- data/lib/bio/shell/rails/config/environments/development.rb +0 -19
- data/lib/bio/shell/rails/config/environments/production.rb +0 -19
- data/lib/bio/shell/rails/config/environments/test.rb +0 -19
- data/lib/bio/shell/rails/config/routes.rb +0 -19
- data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
- data/lib/bio/shell/rails/public/404.html +0 -8
- data/lib/bio/shell/rails/public/500.html +0 -8
- data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
- data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
- data/lib/bio/shell/rails/public/dispatch.rb +0 -10
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +0 -277
- data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
- data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
- data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
- data/lib/bio/shell/rails/public/robots.txt +0 -1
- data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
- data/lib/bio/shell/rails/script/about +0 -3
- data/lib/bio/shell/rails/script/breakpointer +0 -3
- data/lib/bio/shell/rails/script/console +0 -3
- data/lib/bio/shell/rails/script/destroy +0 -3
- data/lib/bio/shell/rails/script/generate +0 -3
- data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
- data/lib/bio/shell/rails/script/performance/profiler +0 -3
- data/lib/bio/shell/rails/script/plugin +0 -3
- data/lib/bio/shell/rails/script/process/reaper +0 -3
- data/lib/bio/shell/rails/script/process/spawner +0 -3
- data/lib/bio/shell/rails/script/process/spinner +0 -3
- data/lib/bio/shell/rails/script/runner +0 -3
- data/lib/bio/shell/rails/script/server +0 -42
- data/lib/bio/shell/rails/test/test_helper.rb +0 -28
data/lib/bio/io/fetch.rb
CHANGED
|
@@ -1,114 +1,181 @@
|
|
|
1
1
|
#
|
|
2
2
|
# = bio/io/biofetch.rb - BioFetch access module
|
|
3
3
|
#
|
|
4
|
-
# Copyright::
|
|
5
|
-
#
|
|
6
|
-
# License::
|
|
4
|
+
# Copyright:: Copyright (C) 2002, 2005 Toshiaki Katayama <k@bioruby.org>,
|
|
5
|
+
# Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
|
|
6
|
+
# License:: The Ruby License
|
|
7
7
|
#
|
|
8
|
-
#
|
|
8
|
+
# $Id: fetch.rb,v 1.10 2007/04/05 23:35:41 trevor Exp $
|
|
9
9
|
#
|
|
10
|
-
|
|
10
|
+
# == DESCRIPTION
|
|
11
11
|
#
|
|
12
|
-
#
|
|
13
|
-
# modify it under the terms of the GNU Lesser General Public
|
|
14
|
-
# License as published by the Free Software Foundation; either
|
|
15
|
-
# version 2 of the License, or (at your option) any later version.
|
|
12
|
+
# Using BioRuby BioFetch server
|
|
16
13
|
#
|
|
17
|
-
#
|
|
18
|
-
#
|
|
19
|
-
#
|
|
20
|
-
#
|
|
14
|
+
# br_server = Bio::Fetch.new()
|
|
15
|
+
# puts br_server.databases
|
|
16
|
+
# puts br_server.formats('embl')
|
|
17
|
+
# puts br_server.maxids
|
|
21
18
|
#
|
|
22
|
-
#
|
|
23
|
-
# License along with this library; if not, write to the Free Software
|
|
24
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
25
|
-
#
|
|
26
|
-
#++
|
|
19
|
+
# Using EBI BioFetch server
|
|
27
20
|
#
|
|
21
|
+
# ebi_server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
|
|
22
|
+
# puts ebi_server.fetch('embl', 'J00231', 'raw')
|
|
23
|
+
# puts ebi_server.fetch('embl', 'J00231', 'html')
|
|
24
|
+
# puts Bio::Fetch.query('genbank', 'J00231')
|
|
25
|
+
# puts Bio::Fetch.query('genbank', 'J00231', 'raw', 'fasta')
|
|
26
|
+
#
|
|
28
27
|
|
|
29
28
|
require 'uri'
|
|
30
|
-
require '
|
|
29
|
+
require 'bio/command'
|
|
31
30
|
|
|
32
31
|
module Bio
|
|
33
|
-
|
|
34
|
-
class
|
|
35
|
-
|
|
36
|
-
#
|
|
37
|
-
#
|
|
38
|
-
#
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
#
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
#
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
# Fetch
|
|
52
|
-
# '
|
|
53
|
-
#
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
#
|
|
64
|
-
#
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
if database
|
|
79
|
-
query = "info=formats;db=#{database}"
|
|
80
|
-
responce, result = Net::HTTP.new(@host, @port).post(@path, query)
|
|
81
|
-
return result
|
|
32
|
+
# = DESCRIPTION
|
|
33
|
+
# The Bio::Fetch class provides an interface to dbfetch servers. Given
|
|
34
|
+
# a database name and an accession number, these servers return the associated
|
|
35
|
+
# record. For example, for the embl database on the EBI, that would be a
|
|
36
|
+
# nucleic or amino acid sequence.
|
|
37
|
+
#
|
|
38
|
+
# Possible dbfetch servers include:
|
|
39
|
+
# * http://bioruby.org/cgi-bin/biofetch.rb (default)
|
|
40
|
+
# * http://www.ebi.ac.uk/cgi-bin/dbfetch
|
|
41
|
+
#
|
|
42
|
+
# If you're behind a proxy server, be sure to set your HTTP_PROXY
|
|
43
|
+
# environment variable accordingly.
|
|
44
|
+
#
|
|
45
|
+
# = USAGE
|
|
46
|
+
# require 'bio'
|
|
47
|
+
#
|
|
48
|
+
# # Retrieve the sequence of accession number M33388 from the EMBL
|
|
49
|
+
# # database.
|
|
50
|
+
# server = Bio::Fetch.new() #uses default server
|
|
51
|
+
# puts server.fetch('embl','M33388')
|
|
52
|
+
#
|
|
53
|
+
# # Do the same thing without creating a Bio::Fetch object. This method always
|
|
54
|
+
# # uses the default dbfetch server: http://bioruby.org/cgi-bin/biofetch.rb
|
|
55
|
+
# puts Bio::Fetch.query('embl','M33388')
|
|
56
|
+
#
|
|
57
|
+
# # To know what databases are available on the bioruby dbfetch server:
|
|
58
|
+
# server = Bio::Fetch.new()
|
|
59
|
+
# puts server.databases
|
|
60
|
+
#
|
|
61
|
+
# # Some databases provide their data in different formats (e.g. 'fasta',
|
|
62
|
+
# # 'genbank' or 'embl'). To check which formats are supported by a given
|
|
63
|
+
# # database:
|
|
64
|
+
# puts server.formats('embl')
|
|
65
|
+
#
|
|
66
|
+
class Fetch
|
|
67
|
+
|
|
68
|
+
# Create a new Bio::Fetch server object that can subsequently be queried
|
|
69
|
+
# using the Bio::Fetch#fetch method
|
|
70
|
+
# ---
|
|
71
|
+
# *Arguments*:
|
|
72
|
+
# * _url_: URL of dbfetch server (default = 'http://bioruby.org/cgi-bin/biofetch.rb')
|
|
73
|
+
# *Returns*:: Bio::Fetch object
|
|
74
|
+
def initialize(url = 'http://bioruby.org/cgi-bin/biofetch.rb')
|
|
75
|
+
@url = url
|
|
76
|
+
schema, user, @host, @port, reg, @path, = URI.split(@url)
|
|
82
77
|
end
|
|
78
|
+
|
|
79
|
+
# The default database to query
|
|
80
|
+
#--
|
|
81
|
+
# This will be used by the get_by_id method
|
|
82
|
+
#++
|
|
83
|
+
attr_accessor :database
|
|
84
|
+
|
|
85
|
+
# Get raw database entry by id. This method lets the Bio::Registry class
|
|
86
|
+
# use Bio::Fetch objects.
|
|
87
|
+
def get_by_id(id)
|
|
88
|
+
fetch(@database, id)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Fetch a database entry as specified by database (db), entry id (id),
|
|
92
|
+
# 'raw' text or 'html' (style), and format. When using BioRuby's
|
|
93
|
+
# BioFetch server, value for the format should not be set.
|
|
94
|
+
# Examples:
|
|
95
|
+
# server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
|
|
96
|
+
# puts server.fetch('embl','M33388','raw','fasta')
|
|
97
|
+
# puts server.fetch('refseq','NM_12345','html','embl')
|
|
98
|
+
# ---
|
|
99
|
+
# *Arguments*:
|
|
100
|
+
# * _database_: name of database to query (see Bio::Fetch#databases to get list of supported databases)
|
|
101
|
+
# * _id_: single ID or ID list separated by commas or white space
|
|
102
|
+
# * _style_: [raw|html] (default = 'raw')
|
|
103
|
+
# * _format_: name of output format (see Bio::Fetch#formats)
|
|
104
|
+
def fetch(db, id, style = 'raw', format = nil)
|
|
105
|
+
query = [ "db=#{db}", "id=#{id}", "style=#{style}" ]
|
|
106
|
+
query.push("format=#{format}") if format
|
|
107
|
+
query = query.join('&')
|
|
108
|
+
|
|
109
|
+
Bio::Command.read_uri(@url + '?' + URI.escape(query))
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Shortcut for using BioRuby's BioFetch server. You can fetch an entry
|
|
113
|
+
# without creating an instance of BioFetch server. This method uses the
|
|
114
|
+
# default dbfetch server, which is http://bioruby.org/cgi-bin/biofetch.rb
|
|
115
|
+
#
|
|
116
|
+
# Example:
|
|
117
|
+
# puts Bio::Fetch.query('refseq','NM_12345')
|
|
118
|
+
#
|
|
119
|
+
# ---
|
|
120
|
+
# *Arguments*:
|
|
121
|
+
# * _database_: name of database to query (see Bio::Fetch#databases to get list of supported databases)
|
|
122
|
+
# * _id_: single ID or ID list separated by commas or white space
|
|
123
|
+
# * _style_: [raw|html] (default = 'raw')
|
|
124
|
+
# * _format_: name of output format (see Bio::Fetch#formats)
|
|
125
|
+
def self.query(*args)
|
|
126
|
+
self.new.fetch(*args)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Using this method, the user can ask a dbfetch server what databases
|
|
130
|
+
# it supports. This would normally be the first step you'd take when
|
|
131
|
+
# you use a dbfetch server for the first time.
|
|
132
|
+
# Example:
|
|
133
|
+
# server = Bio::Fetch.new()
|
|
134
|
+
# puts server.databases # returns "aa aax bl cpd dgenes dr ec eg emb ..."
|
|
135
|
+
#
|
|
136
|
+
# This method only works for the bioruby dbfetch server. For a list
|
|
137
|
+
# of databases available from the EBI, see the EBI website at
|
|
138
|
+
# http://www.ebi.ac.uk/cgi-bin/dbfetch/
|
|
139
|
+
# ---
|
|
140
|
+
# *Returns*:: array of database names
|
|
141
|
+
def databases
|
|
142
|
+
query = "info=dbs"
|
|
143
|
+
|
|
144
|
+
Bio::Command.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Lists the formats that are available for a given database. Like the
|
|
148
|
+
# Bio::Fetch#databases method, this method is only available on
|
|
149
|
+
# the bioruby dbfetch server.
|
|
150
|
+
# Example:
|
|
151
|
+
# server = Bio::Fetch.new()
|
|
152
|
+
# puts server.formats('embl') # returns "default fasta"
|
|
153
|
+
# ---
|
|
154
|
+
# *Arguments*:
|
|
155
|
+
# * _database_:: name of database you want the supported formats for
|
|
156
|
+
# *Returns*:: array of formats
|
|
157
|
+
def formats(database = @database)
|
|
158
|
+
if database
|
|
159
|
+
query = "info=formats;db=#{database}"
|
|
160
|
+
|
|
161
|
+
Bio::Command.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# A dbfetch server will only return entries up to a given maximum number.
|
|
166
|
+
# This method retrieves that number from the server. As for the databases
|
|
167
|
+
# and formats methods, the maxids method only works for the bioruby
|
|
168
|
+
# dbfetch server.
|
|
169
|
+
# ---
|
|
170
|
+
# *Arguments*: none
|
|
171
|
+
# *Returns*:: number
|
|
172
|
+
def maxids
|
|
173
|
+
query = "info=maxids"
|
|
174
|
+
|
|
175
|
+
Bio::Command.read_uri(@url + '?' + URI.escape(query)).to_i
|
|
176
|
+
end
|
|
177
|
+
|
|
83
178
|
end
|
|
84
179
|
|
|
85
|
-
# How many entries can be retrieved simultaneously?
|
|
86
|
-
def maxids
|
|
87
|
-
query = "info=maxids"
|
|
88
|
-
responce, result = Net::HTTP.new(@host, @port).post(@path, query)
|
|
89
|
-
return result
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
end
|
|
93
|
-
|
|
94
180
|
end # module Bio
|
|
95
181
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
if __FILE__ == $0
|
|
99
|
-
|
|
100
|
-
# bfserv = Bio::Fetch.new('http://www.ebi.ac.uk:80/cgi-bin/dbfetch')
|
|
101
|
-
bfserv = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
|
|
102
|
-
puts "# test 1"
|
|
103
|
-
puts bfserv.fetch('embl', 'J00231', 'raw')
|
|
104
|
-
puts "# test 2"
|
|
105
|
-
puts bfserv.fetch('embl', 'J00231', 'html')
|
|
106
|
-
|
|
107
|
-
puts "# test 3"
|
|
108
|
-
puts Bio::Fetch.query('genbank', 'J00231')
|
|
109
|
-
puts "# test 4"
|
|
110
|
-
puts Bio::Fetch.query('genbank', 'J00231', 'raw', 'fasta')
|
|
111
|
-
|
|
112
|
-
end
|
|
113
|
-
|
|
114
|
-
|
data/lib/bio/io/flatfile.rb
CHANGED
|
@@ -3,9 +3,9 @@
|
|
|
3
3
|
#
|
|
4
4
|
# Copyright (C) 2001-2006 Naohisa Goto <ng@bioruby.org>
|
|
5
5
|
#
|
|
6
|
-
# License:: Ruby
|
|
6
|
+
# License:: The Ruby License
|
|
7
7
|
#
|
|
8
|
-
# $Id: flatfile.rb,v 1.
|
|
8
|
+
# $Id: flatfile.rb,v 1.60 2007/07/09 14:08:34 ngoto Exp $
|
|
9
9
|
#
|
|
10
10
|
#
|
|
11
11
|
# Bio::FlatFile is a helper and wrapper class to read a biological data file.
|
|
@@ -34,7 +34,6 @@ module Bio
|
|
|
34
34
|
@path = path
|
|
35
35
|
# initialize prefetch buffer
|
|
36
36
|
@buffer = ''
|
|
37
|
-
@path = path
|
|
38
37
|
end
|
|
39
38
|
|
|
40
39
|
# Creates a new input stream wrapper from the given IO object.
|
|
@@ -262,6 +261,9 @@ module Bio
|
|
|
262
261
|
# the last entry read from the stream
|
|
263
262
|
attr_reader :entry
|
|
264
263
|
|
|
264
|
+
# a flag to write down entry start and end positions
|
|
265
|
+
attr_accessor :entry_pos_flag
|
|
266
|
+
|
|
265
267
|
# start position of the entry
|
|
266
268
|
attr_reader :entry_start_pos
|
|
267
269
|
|
|
@@ -290,6 +292,7 @@ module Bio
|
|
|
290
292
|
end
|
|
291
293
|
end
|
|
292
294
|
@delimiter_overrun = klass::DELIMITER_OVERRUN rescue nil
|
|
295
|
+
@entry_pos_flag = nil
|
|
293
296
|
end
|
|
294
297
|
|
|
295
298
|
# (String) delimiter indicates the end of a entry.
|
|
@@ -330,7 +333,7 @@ module Bio
|
|
|
330
333
|
|
|
331
334
|
# gets a entry
|
|
332
335
|
def get_entry
|
|
333
|
-
p0 = @stream.pos
|
|
336
|
+
p0 = @entry_pos_flag ? @stream.pos : nil
|
|
334
337
|
e = @stream.gets(@delimiter)
|
|
335
338
|
if e and @delimiter_overrun then
|
|
336
339
|
if e[-@delimiter.size, @delimiter.size ] == @delimiter then
|
|
@@ -339,7 +342,7 @@ module Bio
|
|
|
339
342
|
@stream.ungets(overrun)
|
|
340
343
|
end
|
|
341
344
|
end
|
|
342
|
-
p1 = @stream.pos
|
|
345
|
+
p1 = @entry_pos_flag ? @stream.pos : nil
|
|
343
346
|
@entry_start_pos = p0
|
|
344
347
|
@entry = e
|
|
345
348
|
@entry_ended_pos = p1
|
|
@@ -422,7 +425,7 @@ module Bio
|
|
|
422
425
|
# check if file is filename or IO object
|
|
423
426
|
unless file.respond_to?(:gets)
|
|
424
427
|
# 'file' is a filename
|
|
425
|
-
|
|
428
|
+
_open_file(dbclass, file, *arg, &block)
|
|
426
429
|
else
|
|
427
430
|
# 'file' is a IO object
|
|
428
431
|
ff = self.new(dbclass, file)
|
|
@@ -462,15 +465,27 @@ module Bio
|
|
|
462
465
|
# Otherwise, it returns a new FlatFile object.
|
|
463
466
|
#
|
|
464
467
|
def self.open_file(filename, *arg)
|
|
468
|
+
_open_file(nil, filename, *arg)
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
# Same as FlatFile.open(dbclass, filename, *arg),
|
|
472
|
+
# except that it only accept filename and doesn't accept IO object.
|
|
473
|
+
#
|
|
474
|
+
# It can accept a block.
|
|
475
|
+
# If a block is given, it returns the block's return value.
|
|
476
|
+
# Otherwise, it returns a new FlatFile object.
|
|
477
|
+
#
|
|
478
|
+
def self._open_file(dbclass, filename, *arg)
|
|
465
479
|
if block_given? then
|
|
466
480
|
BufferedInputStream.open_file(filename, *arg) do |stream|
|
|
467
|
-
yield self.new(
|
|
481
|
+
yield self.new(dbclass, stream)
|
|
468
482
|
end
|
|
469
483
|
else
|
|
470
484
|
stream = BufferedInputStream.open_file(filename, *arg)
|
|
471
|
-
self.new(
|
|
485
|
+
self.new(dbclass, stream)
|
|
472
486
|
end
|
|
473
487
|
end
|
|
488
|
+
private_class_method :_open_file
|
|
474
489
|
|
|
475
490
|
# Opens URI specified as _uri_.
|
|
476
491
|
# _uri_ must be a String or URI object.
|
|
@@ -493,6 +508,20 @@ module Bio
|
|
|
493
508
|
end
|
|
494
509
|
end
|
|
495
510
|
|
|
511
|
+
# Executes the block for every entry in the stream.
|
|
512
|
+
# Same as FlatFile.open(*arg) { |ff| ff.each { |entry| ... }}.
|
|
513
|
+
#
|
|
514
|
+
# * Example
|
|
515
|
+
# Bio::FlatFile.foreach('test.fst') { |e| puts e.definition }
|
|
516
|
+
#
|
|
517
|
+
def self.foreach(*arg)
|
|
518
|
+
self.open(*arg) do |flatfileobj|
|
|
519
|
+
flatfileobj.each do |entry|
|
|
520
|
+
yield entry
|
|
521
|
+
end
|
|
522
|
+
end
|
|
523
|
+
end
|
|
524
|
+
|
|
496
525
|
# Same as FlatFile.open, except that 'stream' should be a opened
|
|
497
526
|
# stream object (IO, File, ..., who have the 'gets' method).
|
|
498
527
|
#
|
|
@@ -518,13 +547,11 @@ module Bio
|
|
|
518
547
|
#
|
|
519
548
|
def initialize(dbclass, stream)
|
|
520
549
|
# 2nd arg: IO object
|
|
521
|
-
if
|
|
550
|
+
if stream.kind_of?(BufferedInputStream)
|
|
522
551
|
@stream = stream
|
|
523
552
|
else
|
|
524
553
|
@stream = BufferedInputStream.for_io(stream)
|
|
525
554
|
end
|
|
526
|
-
# default is raw mode
|
|
527
|
-
self.raw = false
|
|
528
555
|
# 1st arg: database class (or file format autodetection)
|
|
529
556
|
if dbclass then
|
|
530
557
|
self.dbclass = dbclass
|
|
@@ -534,6 +561,8 @@ module Bio
|
|
|
534
561
|
#
|
|
535
562
|
@skip_leader_mode = :firsttime
|
|
536
563
|
@firsttime_flag = true
|
|
564
|
+
# default raw mode is false
|
|
565
|
+
self.raw = false
|
|
537
566
|
end
|
|
538
567
|
|
|
539
568
|
# The mode how to skip leader of the data.
|
|
@@ -563,8 +592,14 @@ module Bio
|
|
|
563
592
|
@stream.path
|
|
564
593
|
end
|
|
565
594
|
|
|
595
|
+
# Exception class to be raised when data format hasn't been specified.
|
|
596
|
+
class UnknownDataFormatError < IOError
|
|
597
|
+
end
|
|
598
|
+
|
|
566
599
|
# Get next entry.
|
|
567
600
|
def next_entry
|
|
601
|
+
raise UnknownDataFormatError,
|
|
602
|
+
'file format auto-detection failed?' unless @dbclass
|
|
568
603
|
if @skip_leader_mode and
|
|
569
604
|
((@firsttime_flag and @skip_leader_mode == :firsttime) or
|
|
570
605
|
@skip_leader_mode == :everytime)
|
|
@@ -587,6 +622,16 @@ module Bio
|
|
|
587
622
|
@splitter.entry
|
|
588
623
|
end
|
|
589
624
|
|
|
625
|
+
# a flag to write down entry start and end positions
|
|
626
|
+
def entry_pos_flag
|
|
627
|
+
@splitter.entry_pos_flag
|
|
628
|
+
end
|
|
629
|
+
|
|
630
|
+
# Sets flag to write down entry start and end positions
|
|
631
|
+
def entry_pos_flag=(x)
|
|
632
|
+
@splitter.entry_pos_flag = x
|
|
633
|
+
end
|
|
634
|
+
|
|
590
635
|
# start position of the last entry
|
|
591
636
|
def entry_start_pos
|
|
592
637
|
@splitter.entry_start_pos
|
|
@@ -736,21 +781,26 @@ module Bio
|
|
|
736
781
|
|
|
737
782
|
include TSort
|
|
738
783
|
|
|
784
|
+
# Array to store autodetection rules.
|
|
785
|
+
# This is defined only for inspect.
|
|
786
|
+
class RulesArray < Array
|
|
787
|
+
# visualize contents
|
|
788
|
+
def inspect
|
|
789
|
+
"[#{self.collect { |e| e.name.inspect }.join(' ')}]"
|
|
790
|
+
end
|
|
791
|
+
end #class RulesArray
|
|
792
|
+
|
|
739
793
|
# Template of a single rule of autodetection
|
|
740
794
|
class RuleTemplate
|
|
741
795
|
# Creates a new element.
|
|
742
796
|
def self.[](*arg)
|
|
743
797
|
self.new(*arg)
|
|
744
798
|
end
|
|
745
|
-
|
|
799
|
+
|
|
746
800
|
# Creates a new element.
|
|
747
801
|
def initialize
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
"[#{self.collect { |e| e.name.inspect }.join(' ')}]"
|
|
751
|
-
end
|
|
752
|
-
@higher_priority_elements = a.clone
|
|
753
|
-
@lower_priority_elements = a.clone
|
|
802
|
+
@higher_priority_elements = RulesArray.new
|
|
803
|
+
@lower_priority_elements = RulesArray.new
|
|
754
804
|
@name = nil
|
|
755
805
|
end
|
|
756
806
|
|
|
@@ -784,6 +834,24 @@ module Bio
|
|
|
784
834
|
def guess(text, meta)
|
|
785
835
|
nil
|
|
786
836
|
end
|
|
837
|
+
|
|
838
|
+
private
|
|
839
|
+
# Gets constant from constant name given as a string.
|
|
840
|
+
def str2const(str)
|
|
841
|
+
const = Object
|
|
842
|
+
str.split(/\:\:/).each do |x|
|
|
843
|
+
const = const.const_get(x)
|
|
844
|
+
end
|
|
845
|
+
const
|
|
846
|
+
end
|
|
847
|
+
|
|
848
|
+
# Gets database class from given object.
|
|
849
|
+
# Current implementation is:
|
|
850
|
+
# if _obj_ is kind of String, regarded as a constant.
|
|
851
|
+
# Otherwise, returns _obj_ as is.
|
|
852
|
+
def get_dbclass(obj)
|
|
853
|
+
obj.kind_of?(String) ? str2const(obj) : obj
|
|
854
|
+
end
|
|
787
855
|
end #class Rule_Template
|
|
788
856
|
|
|
789
857
|
# RuleDebug is a class for debugging autodetect classes/methods
|
|
@@ -835,40 +903,49 @@ module Bio
|
|
|
835
903
|
def initialize(dbclass, re)
|
|
836
904
|
super()
|
|
837
905
|
@re = re
|
|
838
|
-
@dbclass = dbclass
|
|
839
|
-
@dbclasses = [ dbclass ]
|
|
840
906
|
@name = dbclass.to_s
|
|
907
|
+
@dbclass = nil
|
|
908
|
+
@dbclass_lazy = dbclass
|
|
909
|
+
end
|
|
910
|
+
|
|
911
|
+
# database class (lazy evaluation)
|
|
912
|
+
def dbclass
|
|
913
|
+
unless @dbclass
|
|
914
|
+
@dbclass = get_dbclass(@dbclass_lazy)
|
|
915
|
+
end
|
|
916
|
+
@dbclass
|
|
917
|
+
end
|
|
918
|
+
private :dbclass
|
|
919
|
+
|
|
920
|
+
# returns database classes
|
|
921
|
+
def dbclasses
|
|
922
|
+
[ dbclass ]
|
|
841
923
|
end
|
|
842
924
|
|
|
843
925
|
# If given text matches the regexp, returns the database class.
|
|
844
926
|
# Otherwise, returns nil or false.
|
|
845
927
|
# _meta_ is ignored.
|
|
846
928
|
def guess(text, meta)
|
|
847
|
-
@re =~ text ?
|
|
929
|
+
@re =~ text ? dbclass : nil
|
|
848
930
|
end
|
|
849
931
|
end #class RuleRegexp
|
|
850
932
|
|
|
851
933
|
# A autodetection rule to use more than two regular expressions.
|
|
852
|
-
|
|
934
|
+
# If given string matches one of the regular expressions,
|
|
935
|
+
# returns the database class.
|
|
936
|
+
class RuleRegexp2 < RuleRegexp
|
|
853
937
|
# Creates a new instance.
|
|
854
938
|
def initialize(dbclass, *regexps)
|
|
855
|
-
super()
|
|
939
|
+
super(dbclass, nil)
|
|
856
940
|
@regexps = regexps
|
|
857
|
-
@dbclass = dbclass
|
|
858
|
-
@dbclasses = [ dbclass ]
|
|
859
|
-
if name
|
|
860
|
-
@name = name
|
|
861
|
-
else
|
|
862
|
-
@name = @dbclass.to_s
|
|
863
|
-
end
|
|
864
941
|
end
|
|
865
942
|
|
|
866
|
-
# If given text matches the regexp, returns the database class.
|
|
943
|
+
# If given text matches one of the regexp, returns the database class.
|
|
867
944
|
# Otherwise, returns nil or false.
|
|
868
945
|
# _meta_ is ignored.
|
|
869
946
|
def guess(text, meta)
|
|
870
947
|
@regexps.each do |re|
|
|
871
|
-
return
|
|
948
|
+
return dbclass if re =~ text
|
|
872
949
|
end
|
|
873
950
|
nil
|
|
874
951
|
end
|
|
@@ -880,10 +957,19 @@ module Bio
|
|
|
880
957
|
def initialize(*dbclasses, &proc)
|
|
881
958
|
super()
|
|
882
959
|
@proc = proc
|
|
883
|
-
@dbclasses =
|
|
960
|
+
@dbclasses = nil
|
|
961
|
+
@dbclasses_lazy = dbclasses
|
|
884
962
|
@name = dbclasses.collect { |x| x.to_s }.join('|')
|
|
885
963
|
end
|
|
886
964
|
|
|
965
|
+
# database classes (lazy evaluation)
|
|
966
|
+
def dbclasses
|
|
967
|
+
unless @dbclasses
|
|
968
|
+
@dbclasses = @dbclasses_lazy.collect { |x| get_dbclass(x) }
|
|
969
|
+
end
|
|
970
|
+
@dbclasses
|
|
971
|
+
end
|
|
972
|
+
|
|
887
973
|
# If given text (and/or meta information) is known, returns
|
|
888
974
|
# the database class.
|
|
889
975
|
# Otherwise, returns nil or false.
|
|
@@ -1039,22 +1125,23 @@ module Bio
|
|
|
1039
1125
|
# make a default of default autodetect object
|
|
1040
1126
|
def self.make_default
|
|
1041
1127
|
a = self[
|
|
1042
|
-
genbank = RuleRegexp[ Bio::GenBank,
|
|
1128
|
+
genbank = RuleRegexp[ 'Bio::GenBank',
|
|
1043
1129
|
/^LOCUS .+ bp .*[a-z]*[DR]?NA/ ],
|
|
1044
|
-
genpept = RuleRegexp[ Bio::GenPept,
|
|
1130
|
+
genpept = RuleRegexp[ 'Bio::GenPept',
|
|
1045
1131
|
/^LOCUS .+ aa .+/ ],
|
|
1046
|
-
medline = RuleRegexp[ Bio::MEDLINE,
|
|
1132
|
+
medline = RuleRegexp[ 'Bio::MEDLINE',
|
|
1047
1133
|
/^UI \- [0-9]+$/ ],
|
|
1048
|
-
embl = RuleRegexp[ Bio::EMBL,
|
|
1134
|
+
embl = RuleRegexp[ 'Bio::EMBL',
|
|
1049
1135
|
/^ID .+\; .*(DNA|RNA|XXX)\;/ ],
|
|
1050
|
-
sptr =
|
|
1051
|
-
/^ID .+\; *PRT
|
|
1052
|
-
|
|
1136
|
+
sptr = RuleRegexp2[ 'Bio::SPTR',
|
|
1137
|
+
/^ID .+\; *PRT\;/,
|
|
1138
|
+
/^ID [-A-Za-z0-9_\.]+ .+\; *[0-9]+ *AA\./ ],
|
|
1139
|
+
prosite = RuleRegexp[ 'Bio::PROSITE',
|
|
1053
1140
|
/^ID [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
|
|
1054
|
-
transfac = RuleRegexp[ Bio::TRANSFAC,
|
|
1141
|
+
transfac = RuleRegexp[ 'Bio::TRANSFAC',
|
|
1055
1142
|
/^AC [-A-Za-z0-9_\.]+$/ ],
|
|
1056
1143
|
|
|
1057
|
-
aaindex = RuleProc.new(Bio::AAindex1, Bio::AAindex2) do |text|
|
|
1144
|
+
aaindex = RuleProc.new('Bio::AAindex1', 'Bio::AAindex2') do |text|
|
|
1058
1145
|
if /^H [-A-Z0-9_\.]+$/ =~ text then
|
|
1059
1146
|
if text =~ /^M [rc]/ then
|
|
1060
1147
|
Bio::AAindex2
|
|
@@ -1068,33 +1155,35 @@ module Bio
|
|
|
1068
1155
|
end
|
|
1069
1156
|
end,
|
|
1070
1157
|
|
|
1071
|
-
litdb = RuleRegexp[ Bio::LITDB,
|
|
1158
|
+
litdb = RuleRegexp[ 'Bio::LITDB',
|
|
1072
1159
|
/^CODE [0-9]+$/ ],
|
|
1073
|
-
brite = RuleRegexp[ Bio::KEGG::BRITE,
|
|
1160
|
+
brite = RuleRegexp[ 'Bio::KEGG::BRITE',
|
|
1074
1161
|
/^Entry [A-Z0-9]+/ ],
|
|
1075
|
-
|
|
1162
|
+
orthology = RuleRegexp[ 'Bio::KEGG::ORTHOLOGY',
|
|
1076
1163
|
/^ENTRY .+ KO\s*/ ],
|
|
1077
|
-
|
|
1164
|
+
drug = RuleRegexp[ 'Bio::KEGG::DRUG',
|
|
1165
|
+
/^ENTRY .+ Drug\s*/ ],
|
|
1166
|
+
glycan = RuleRegexp[ 'Bio::KEGG::GLYCAN',
|
|
1078
1167
|
/^ENTRY .+ Glycan\s*/ ],
|
|
1079
|
-
enzyme = RuleRegexp2[ Bio::KEGG::ENZYME,
|
|
1168
|
+
enzyme = RuleRegexp2[ 'Bio::KEGG::ENZYME',
|
|
1080
1169
|
/^ENTRY EC [0-9\.]+$/,
|
|
1081
1170
|
/^ENTRY .+ Enzyme\s*/
|
|
1082
1171
|
],
|
|
1083
|
-
compound = RuleRegexp2[ Bio::KEGG::COMPOUND,
|
|
1172
|
+
compound = RuleRegexp2[ 'Bio::KEGG::COMPOUND',
|
|
1084
1173
|
/^ENTRY C[A-Za-z0-9\._]+$/,
|
|
1085
1174
|
/^ENTRY .+ Compound\s*/
|
|
1086
1175
|
],
|
|
1087
|
-
reaction = RuleRegexp2[ Bio::KEGG::REACTION,
|
|
1176
|
+
reaction = RuleRegexp2[ 'Bio::KEGG::REACTION',
|
|
1088
1177
|
/^ENTRY R[A-Za-z0-9\._]+$/,
|
|
1089
1178
|
/^ENTRY .+ Reaction\s*/
|
|
1090
1179
|
],
|
|
1091
|
-
genes = RuleRegexp[ Bio::KEGG::GENES,
|
|
1092
|
-
/^ENTRY .+ (CDS|gene|.*RNA) / ],
|
|
1093
|
-
genome = RuleRegexp[ Bio::KEGG::GENOME,
|
|
1180
|
+
genes = RuleRegexp[ 'Bio::KEGG::GENES',
|
|
1181
|
+
/^ENTRY .+ (CDS|gene|.*RNA|Contig) / ],
|
|
1182
|
+
genome = RuleRegexp[ 'Bio::KEGG::GENOME',
|
|
1094
1183
|
/^ENTRY [a-z]+$/ ],
|
|
1095
1184
|
|
|
1096
|
-
fantom = RuleProc.new(Bio::FANTOM::MaXML::Cluster,
|
|
1097
|
-
Bio::FANTOM::MaXML::Sequence) do |text|
|
|
1185
|
+
fantom = RuleProc.new('Bio::FANTOM::MaXML::Cluster',
|
|
1186
|
+
'Bio::FANTOM::MaXML::Sequence') do |text|
|
|
1098
1187
|
if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
|
|
1099
1188
|
case $1
|
|
1100
1189
|
when 'clusters'
|
|
@@ -1109,37 +1198,44 @@ module Bio
|
|
|
1109
1198
|
end
|
|
1110
1199
|
end,
|
|
1111
1200
|
|
|
1112
|
-
pdb = RuleRegexp[ Bio::PDB,
|
|
1201
|
+
pdb = RuleRegexp[ 'Bio::PDB',
|
|
1113
1202
|
/^HEADER .{40}\d\d\-[A-Z]{3}\-\d\d [0-9A-Z]{4}/ ],
|
|
1114
|
-
het = RuleRegexp[ Bio::PDB::ChemicalComponent,
|
|
1203
|
+
het = RuleRegexp[ 'Bio::PDB::ChemicalComponent',
|
|
1115
1204
|
/^RESIDUE +.+ +\d+\s*$/ ],
|
|
1116
1205
|
|
|
1117
|
-
clustal =
|
|
1118
|
-
/^CLUSTAL .*\(.*\).*sequence +alignment
|
|
1206
|
+
clustal = RuleRegexp2[ 'Bio::ClustalW::Report',
|
|
1207
|
+
/^CLUSTAL .*\(.*\).*sequence +alignment/,
|
|
1208
|
+
/^CLUSTAL FORMAT for T-COFFEE/ ],
|
|
1209
|
+
|
|
1210
|
+
gcg_msf = RuleRegexp[ 'Bio::GCG::Msf',
|
|
1211
|
+
/^!!(N|A)A_MULTIPLE_ALIGNMENT .+/ ],
|
|
1212
|
+
|
|
1213
|
+
gcg_seq = RuleRegexp[ 'Bio::GCG::Seq',
|
|
1214
|
+
/^!!(N|A)A_SEQUENCE .+/ ],
|
|
1119
1215
|
|
|
1120
|
-
blastxml = RuleRegexp[ Bio::Blast::Report,
|
|
1216
|
+
blastxml = RuleRegexp[ 'Bio::Blast::Report',
|
|
1121
1217
|
/\<\!DOCTYPE BlastOutput PUBLIC / ],
|
|
1122
|
-
wublast = RuleRegexp[ Bio::Blast::WU::Report,
|
|
1218
|
+
wublast = RuleRegexp[ 'Bio::Blast::WU::Report',
|
|
1123
1219
|
/^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
|
|
1124
|
-
wutblast = RuleRegexp[ Bio::Blast::WU::Report_TBlast,
|
|
1220
|
+
wutblast = RuleRegexp[ 'Bio::Blast::WU::Report_TBlast',
|
|
1125
1221
|
/^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
|
|
1126
|
-
blast = RuleRegexp[ Bio::Blast::Default::Report,
|
|
1222
|
+
blast = RuleRegexp[ 'Bio::Blast::Default::Report',
|
|
1127
1223
|
/^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
|
|
1128
|
-
tblast = RuleRegexp[ Bio::Blast::Default::Report_TBlast,
|
|
1224
|
+
tblast = RuleRegexp[ 'Bio::Blast::Default::Report_TBlast',
|
|
1129
1225
|
/^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
|
|
1130
1226
|
|
|
1131
|
-
blat = RuleRegexp[ Bio::Blat::Report,
|
|
1132
|
-
/^psLayout version \d
|
|
1133
|
-
spidey = RuleRegexp[ Bio::Spidey::Report,
|
|
1227
|
+
blat = RuleRegexp[ 'Bio::Blat::Report',
|
|
1228
|
+
/^psLayout version \d+/ ],
|
|
1229
|
+
spidey = RuleRegexp[ 'Bio::Spidey::Report',
|
|
1134
1230
|
/^\-\-SPIDEY version .+\-\-$/ ],
|
|
1135
|
-
hmmer = RuleRegexp[ Bio::HMMER::Report,
|
|
1231
|
+
hmmer = RuleRegexp[ 'Bio::HMMER::Report',
|
|
1136
1232
|
/^HMMER +\d+\./ ],
|
|
1137
|
-
sim4 = RuleRegexp[ Bio::Sim4::Report,
|
|
1233
|
+
sim4 = RuleRegexp[ 'Bio::Sim4::Report',
|
|
1138
1234
|
/^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
|
|
1139
1235
|
|
|
1140
|
-
fastaformat = RuleProc.new(Bio::FastaFormat,
|
|
1141
|
-
Bio::NBRF,
|
|
1142
|
-
Bio::FastaNumericFormat) do |text|
|
|
1236
|
+
fastaformat = RuleProc.new('Bio::FastaFormat',
|
|
1237
|
+
'Bio::NBRF',
|
|
1238
|
+
'Bio::FastaNumericFormat') do |text|
|
|
1143
1239
|
if /^>.+$/ =~ text
|
|
1144
1240
|
case text
|
|
1145
1241
|
when /^>([PF]1|[DR][LC]|N[13]|XX)\;.+/
|
|
@@ -1167,8 +1263,9 @@ module Bio
|
|
|
1167
1263
|
# KEGG
|
|
1168
1264
|
#aaindex.is_prior_to litdb
|
|
1169
1265
|
#litdb.is_prior_to brite
|
|
1170
|
-
brite.is_prior_to
|
|
1171
|
-
|
|
1266
|
+
brite.is_prior_to orthology
|
|
1267
|
+
orthology.is_prior_to drug
|
|
1268
|
+
drug.is_prior_to glycan
|
|
1172
1269
|
glycan.is_prior_to enzyme
|
|
1173
1270
|
enzyme.is_prior_to compound
|
|
1174
1271
|
compound.is_prior_to reaction
|