bio 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +14 -122
- data/bin/br_biofetch.rb +2 -2
- data/bin/br_bioflat.rb +2 -2
- data/bin/br_biogetseq.rb +2 -2
- data/bin/br_pmfetch.rb +3 -3
- data/doc/Changes-0.7.rd +77 -0
- data/doc/KEGG_API.rd +523 -232
- data/doc/KEGG_API.rd.ja +529 -207
- data/doc/Tutorial.rd +48 -11
- data/lib/bio.rb +59 -6
- data/lib/bio/alignment.rb +713 -103
- data/lib/bio/appl/bl2seq/report.rb +2 -18
- data/lib/bio/appl/blast.rb +108 -91
- data/lib/bio/appl/blast/format0.rb +33 -18
- data/lib/bio/appl/blast/format8.rb +6 -20
- data/lib/bio/appl/blast/report.rb +293 -429
- data/lib/bio/appl/blast/rexml.rb +8 -22
- data/lib/bio/appl/blast/wublast.rb +21 -12
- data/lib/bio/appl/blast/xmlparser.rb +180 -183
- data/lib/bio/appl/blat/report.rb +127 -30
- data/lib/bio/appl/clustalw.rb +87 -59
- data/lib/bio/appl/clustalw/report.rb +20 -22
- data/lib/bio/appl/emboss.rb +113 -20
- data/lib/bio/appl/fasta.rb +173 -198
- data/lib/bio/appl/fasta/format10.rb +244 -347
- data/lib/bio/appl/gcg/msf.rb +212 -0
- data/lib/bio/appl/gcg/seq.rb +195 -0
- data/lib/bio/appl/genscan/report.rb +5 -23
- data/lib/bio/appl/hmmer.rb +8 -45
- data/lib/bio/appl/hmmer/report.rb +2 -20
- data/lib/bio/appl/iprscan/report.rb +374 -0
- data/lib/bio/appl/mafft.rb +87 -50
- data/lib/bio/appl/mafft/report.rb +151 -44
- data/lib/bio/appl/muscle.rb +52 -0
- data/lib/bio/appl/phylip/alignment.rb +129 -0
- data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
- data/lib/bio/appl/probcons.rb +41 -0
- data/lib/bio/appl/psort.rb +89 -96
- data/lib/bio/appl/psort/report.rb +6 -22
- data/lib/bio/appl/pts1.rb +263 -0
- data/lib/bio/appl/sim4.rb +26 -36
- data/lib/bio/appl/sim4/report.rb +2 -18
- data/lib/bio/appl/sosui/report.rb +5 -20
- data/lib/bio/appl/spidey/report.rb +2 -2
- data/lib/bio/appl/targetp/report.rb +4 -20
- data/lib/bio/appl/tcoffee.rb +55 -0
- data/lib/bio/appl/tmhmm/report.rb +4 -20
- data/lib/bio/command.rb +235 -64
- data/lib/bio/data/aa.rb +21 -26
- data/lib/bio/data/codontable.rb +2 -20
- data/lib/bio/data/na.rb +19 -4
- data/lib/bio/db.rb +27 -12
- data/lib/bio/db/aaindex.rb +2 -20
- data/lib/bio/db/embl/common.rb +4 -21
- data/lib/bio/db/embl/embl.rb +33 -85
- data/lib/bio/db/embl/sptr.rb +612 -302
- data/lib/bio/db/embl/swissprot.rb +10 -29
- data/lib/bio/db/embl/trembl.rb +10 -29
- data/lib/bio/db/embl/uniprot.rb +10 -29
- data/lib/bio/db/fantom.rb +15 -20
- data/lib/bio/db/fasta.rb +3 -3
- data/lib/bio/db/genbank/common.rb +37 -46
- data/lib/bio/db/genbank/ddbj.rb +6 -18
- data/lib/bio/db/genbank/genbank.rb +47 -186
- data/lib/bio/db/genbank/genpept.rb +4 -17
- data/lib/bio/db/genbank/refseq.rb +4 -17
- data/lib/bio/db/gff.rb +103 -35
- data/lib/bio/db/go.rb +4 -20
- data/lib/bio/db/kegg/brite.rb +26 -36
- data/lib/bio/db/kegg/compound.rb +81 -85
- data/lib/bio/db/kegg/drug.rb +98 -0
- data/lib/bio/db/kegg/enzyme.rb +133 -110
- data/lib/bio/db/kegg/expression.rb +2 -20
- data/lib/bio/db/kegg/genes.rb +208 -238
- data/lib/bio/db/kegg/genome.rb +164 -285
- data/lib/bio/db/kegg/glycan.rb +114 -157
- data/lib/bio/db/kegg/keggtab.rb +242 -303
- data/lib/bio/db/kegg/kgml.rb +117 -160
- data/lib/bio/db/kegg/orthology.rb +112 -0
- data/lib/bio/db/kegg/reaction.rb +54 -69
- data/lib/bio/db/kegg/taxonomy.rb +331 -0
- data/lib/bio/db/lasergene.rb +209 -0
- data/lib/bio/db/litdb.rb +3 -27
- data/lib/bio/db/medline.rb +228 -249
- data/lib/bio/db/nbrf.rb +3 -3
- data/lib/bio/db/newick.rb +510 -0
- data/lib/bio/db/nexus.rb +1854 -0
- data/lib/bio/db/pdb.rb +5 -17
- data/lib/bio/db/pdb/atom.rb +2 -18
- data/lib/bio/db/pdb/chain.rb +2 -18
- data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
- data/lib/bio/db/pdb/model.rb +2 -18
- data/lib/bio/db/pdb/pdb.rb +73 -34
- data/lib/bio/db/pdb/residue.rb +4 -20
- data/lib/bio/db/pdb/utils.rb +2 -18
- data/lib/bio/db/prosite.rb +403 -422
- data/lib/bio/db/rebase.rb +84 -40
- data/lib/bio/db/soft.rb +404 -0
- data/lib/bio/db/transfac.rb +5 -17
- data/lib/bio/feature.rb +106 -52
- data/lib/bio/io/das.rb +32 -42
- data/lib/bio/io/dbget.rb +2 -20
- data/lib/bio/io/ddbjxml.rb +77 -138
- data/lib/bio/io/ebisoap.rb +158 -0
- data/lib/bio/io/ensembl.rb +229 -0
- data/lib/bio/io/fastacmd.rb +89 -82
- data/lib/bio/io/fetch.rb +163 -96
- data/lib/bio/io/flatfile.rb +170 -73
- data/lib/bio/io/flatfile/bdb.rb +3 -16
- data/lib/bio/io/flatfile/index.rb +2 -2
- data/lib/bio/io/flatfile/indexer.rb +3 -2
- data/lib/bio/io/higet.rb +12 -31
- data/lib/bio/io/keggapi.rb +210 -269
- data/lib/bio/io/ncbisoap.rb +155 -0
- data/lib/bio/io/pubmed.rb +169 -147
- data/lib/bio/io/registry.rb +4 -20
- data/lib/bio/io/soapwsdl.rb +43 -38
- data/lib/bio/io/sql.rb +242 -305
- data/lib/bio/location.rb +407 -285
- data/lib/bio/map.rb +410 -0
- data/lib/bio/pathway.rb +558 -695
- data/lib/bio/reference.rb +272 -75
- data/lib/bio/sequence.rb +255 -13
- data/lib/bio/sequence/aa.rb +71 -10
- data/lib/bio/sequence/common.rb +187 -33
- data/lib/bio/sequence/compat.rb +59 -4
- data/lib/bio/sequence/format.rb +54 -7
- data/lib/bio/sequence/generic.rb +3 -3
- data/lib/bio/sequence/na.rb +328 -26
- data/lib/bio/shell.rb +11 -4
- data/lib/bio/shell/core.rb +221 -160
- data/lib/bio/shell/demo.rb +18 -15
- data/lib/bio/shell/interface.rb +14 -12
- data/lib/bio/shell/irb.rb +95 -0
- data/lib/bio/shell/object.rb +45 -26
- data/lib/bio/shell/plugin/blast.rb +42 -0
- data/lib/bio/shell/plugin/codon.rb +22 -14
- data/lib/bio/shell/plugin/das.rb +58 -0
- data/lib/bio/shell/plugin/emboss.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +22 -11
- data/lib/bio/shell/plugin/flatfile.rb +2 -2
- data/lib/bio/shell/plugin/keggapi.rb +13 -6
- data/lib/bio/shell/plugin/midi.rb +4 -4
- data/lib/bio/shell/plugin/obda.rb +2 -2
- data/lib/bio/shell/plugin/psort.rb +56 -0
- data/lib/bio/shell/plugin/seq.rb +35 -8
- data/lib/bio/shell/plugin/soap.rb +87 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
- data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
- data/lib/bio/shell/script.rb +25 -0
- data/lib/bio/shell/setup.rb +109 -0
- data/lib/bio/shell/web.rb +70 -58
- data/lib/bio/tree.rb +850 -0
- data/lib/bio/util/color_scheme.rb +84 -107
- data/lib/bio/util/color_scheme/buried.rb +5 -24
- data/lib/bio/util/color_scheme/helix.rb +5 -24
- data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
- data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
- data/lib/bio/util/color_scheme/strand.rb +5 -24
- data/lib/bio/util/color_scheme/taylor.rb +5 -24
- data/lib/bio/util/color_scheme/turn.rb +5 -24
- data/lib/bio/util/color_scheme/zappo.rb +5 -24
- data/lib/bio/util/contingency_table.rb +70 -43
- data/lib/bio/util/restriction_enzyme.rb +228 -0
- data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
- data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
- data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
- data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
- data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
- data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
- data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
- data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
- data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
- data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
- data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
- data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
- data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
- data/lib/bio/util/sirna.rb +4 -22
- data/sample/color_scheme_na.rb +4 -12
- data/sample/enzymes.rb +78 -0
- data/sample/goslim.rb +5 -13
- data/sample/psortplot_html.rb +4 -12
- data/test/data/blast/2.2.15.blastp.m7 +876 -0
- data/test/data/embl/AB090716.embl.rel89 +63 -0
- data/test/data/fasta/example1.txt +75 -0
- data/test/data/fasta/example2.txt +21 -0
- data/test/data/iprscan/merged.raw +32 -0
- data/test/data/iprscan/merged.txt +74 -0
- data/test/data/soft/GDS100_partial.soft +92 -0
- data/test/data/soft/GSE3457_family_partial.soft +874 -0
- data/test/functional/bio/io/test_ensembl.rb +103 -0
- data/test/functional/bio/io/test_soapwsdl.rb +5 -17
- data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
- data/test/unit/bio/appl/blast/test_report.rb +3 -16
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
- data/test/unit/bio/appl/genscan/test_report.rb +3 -16
- data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
- data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
- data/test/unit/bio/appl/mafft/test_report.rb +63 -0
- data/test/unit/bio/appl/sosui/test_report.rb +3 -16
- data/test/unit/bio/appl/targetp/test_report.rb +3 -16
- data/test/unit/bio/appl/test_blast.rb +3 -16
- data/test/unit/bio/appl/test_fasta.rb +4 -16
- data/test/unit/bio/appl/test_pts1.rb +140 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
- data/test/unit/bio/data/test_aa.rb +4 -17
- data/test/unit/bio/data/test_codontable.rb +3 -16
- data/test/unit/bio/data/test_na.rb +3 -3
- data/test/unit/bio/db/embl/test_common.rb +3 -16
- data/test/unit/bio/db/embl/test_embl.rb +3 -16
- data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
- data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
- data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
- data/test/unit/bio/db/kegg/test_genes.rb +3 -16
- data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
- data/test/unit/bio/db/test_aaindex.rb +2 -2
- data/test/unit/bio/db/test_fasta.rb +3 -16
- data/test/unit/bio/db/test_gff.rb +3 -16
- data/test/unit/bio/db/test_lasergene.rb +95 -0
- data/test/unit/bio/db/test_newick.rb +56 -0
- data/test/unit/bio/db/test_nexus.rb +360 -0
- data/test/unit/bio/db/test_prosite.rb +5 -18
- data/test/unit/bio/db/test_rebase.rb +11 -25
- data/test/unit/bio/db/test_soft.rb +138 -0
- data/test/unit/bio/io/test_ddbjxml.rb +5 -17
- data/test/unit/bio/io/test_ensembl.rb +109 -0
- data/test/unit/bio/io/test_fastacmd.rb +3 -16
- data/test/unit/bio/io/test_flatfile.rb +237 -0
- data/test/unit/bio/io/test_soapwsdl.rb +4 -17
- data/test/unit/bio/sequence/test_aa.rb +3 -3
- data/test/unit/bio/sequence/test_common.rb +3 -16
- data/test/unit/bio/sequence/test_compat.rb +3 -16
- data/test/unit/bio/sequence/test_na.rb +29 -3
- data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
- data/test/unit/bio/test_alignment.rb +16 -27
- data/test/unit/bio/test_command.rb +242 -25
- data/test/unit/bio/test_db.rb +3 -16
- data/test/unit/bio/test_feature.rb +4 -16
- data/test/unit/bio/test_location.rb +4 -16
- data/test/unit/bio/test_map.rb +230 -0
- data/test/unit/bio/test_pathway.rb +4 -16
- data/test/unit/bio/test_reference.rb +2 -2
- data/test/unit/bio/test_sequence.rb +7 -19
- data/test/unit/bio/test_shell.rb +3 -16
- data/test/unit/bio/test_tree.rb +593 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
- data/test/unit/bio/util/test_color_scheme.rb +6 -18
- data/test/unit/bio/util/test_contingency_table.rb +6 -18
- data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
- data/test/unit/bio/util/test_sirna.rb +3 -16
- metadata +228 -169
- data/doc/BioRuby.rd.ja +0 -225
- data/doc/Design.rd.ja +0 -341
- data/doc/TODO.rd.ja +0 -138
- data/lib/bio/appl/fasta/format6.rb +0 -37
- data/lib/bio/db/kegg/cell.rb +0 -88
- data/lib/bio/db/kegg/ko.rb +0 -178
- data/lib/bio/shell/rails/Rakefile +0 -10
- data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
- data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
- data/lib/bio/shell/rails/config/boot.rb +0 -19
- data/lib/bio/shell/rails/config/database.yml +0 -85
- data/lib/bio/shell/rails/config/environment.rb +0 -53
- data/lib/bio/shell/rails/config/environments/development.rb +0 -19
- data/lib/bio/shell/rails/config/environments/production.rb +0 -19
- data/lib/bio/shell/rails/config/environments/test.rb +0 -19
- data/lib/bio/shell/rails/config/routes.rb +0 -19
- data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
- data/lib/bio/shell/rails/public/404.html +0 -8
- data/lib/bio/shell/rails/public/500.html +0 -8
- data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
- data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
- data/lib/bio/shell/rails/public/dispatch.rb +0 -10
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +0 -277
- data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
- data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
- data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
- data/lib/bio/shell/rails/public/robots.txt +0 -1
- data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
- data/lib/bio/shell/rails/script/about +0 -3
- data/lib/bio/shell/rails/script/breakpointer +0 -3
- data/lib/bio/shell/rails/script/console +0 -3
- data/lib/bio/shell/rails/script/destroy +0 -3
- data/lib/bio/shell/rails/script/generate +0 -3
- data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
- data/lib/bio/shell/rails/script/performance/profiler +0 -3
- data/lib/bio/shell/rails/script/plugin +0 -3
- data/lib/bio/shell/rails/script/process/reaper +0 -3
- data/lib/bio/shell/rails/script/process/spawner +0 -3
- data/lib/bio/shell/rails/script/process/spinner +0 -3
- data/lib/bio/shell/rails/script/runner +0 -3
- data/lib/bio/shell/rails/script/server +0 -42
- data/lib/bio/shell/rails/test/test_helper.rb +0 -28
data/lib/bio/db/kegg/glycan.rb
CHANGED
|
@@ -1,194 +1,151 @@
|
|
|
1
1
|
#
|
|
2
|
-
# bio/db/kegg/glycan.rb - KEGG GLYCAN database class
|
|
2
|
+
# = bio/db/kegg/glycan.rb - KEGG GLYCAN database class
|
|
3
3
|
#
|
|
4
|
-
#
|
|
4
|
+
# Copyright:: Copyright (C) 2004 Toshiaki Katayama <k@bioruby.org>
|
|
5
|
+
# License:: The Ruby License
|
|
5
6
|
#
|
|
6
|
-
#
|
|
7
|
-
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
-
# License as published by the Free Software Foundation; either
|
|
9
|
-
# version 2 of the License, or (at your option) any later version.
|
|
10
|
-
#
|
|
11
|
-
# This library is distributed in the hope that it will be useful,
|
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
-
# Lesser General Public License for more details.
|
|
15
|
-
#
|
|
16
|
-
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
-
# License along with this library; if not, write to the Free Software
|
|
18
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
-
#
|
|
20
|
-
# $Id: glycan.rb,v 1.2 2005/09/08 01:22:11 k Exp $
|
|
7
|
+
# $Id: glycan.rb,v 1.6 2007/06/28 11:27:24 k Exp $
|
|
21
8
|
#
|
|
22
9
|
|
|
23
10
|
require 'bio/db'
|
|
24
11
|
|
|
25
12
|
module Bio
|
|
13
|
+
class KEGG
|
|
26
14
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class GLYCAN < KEGGDB
|
|
15
|
+
class GLYCAN < KEGGDB
|
|
30
16
|
|
|
31
|
-
|
|
32
|
-
|
|
17
|
+
DELIMITER = RS = "\n///\n"
|
|
18
|
+
TAGSIZE = 12
|
|
33
19
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
20
|
+
def initialize(entry)
|
|
21
|
+
super(entry, TAGSIZE)
|
|
22
|
+
end
|
|
37
23
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
end
|
|
43
|
-
@data['ENTRY']
|
|
44
|
-
end
|
|
24
|
+
# ENTRY
|
|
25
|
+
def entry_id
|
|
26
|
+
field_fetch('ENTRY')[/\S+/]
|
|
27
|
+
end
|
|
45
28
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
29
|
+
# NAME
|
|
30
|
+
def name
|
|
31
|
+
field_fetch('NAME')
|
|
32
|
+
end
|
|
50
33
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
end
|
|
58
|
-
@data['COMPOSITION'] = hash
|
|
59
|
-
end
|
|
60
|
-
@data['COMPOSITION']
|
|
34
|
+
# COMPOSITION
|
|
35
|
+
def composition
|
|
36
|
+
unless @data['COMPOSITION']
|
|
37
|
+
hash = Hash.new(0)
|
|
38
|
+
fetch('COMPOSITION').scan(/\((\S+)\)(\d+)/).each do |key, val|
|
|
39
|
+
hash[key] = val.to_i
|
|
61
40
|
end
|
|
41
|
+
@data['COMPOSITION'] = hash
|
|
42
|
+
end
|
|
43
|
+
@data['COMPOSITION']
|
|
44
|
+
end
|
|
62
45
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
end
|
|
70
|
-
@data['MASS'] = hash
|
|
71
|
-
end
|
|
72
|
-
@data['MASS']
|
|
46
|
+
# MASS
|
|
47
|
+
def mass
|
|
48
|
+
unless @data['MASS']
|
|
49
|
+
hash = Hash.new
|
|
50
|
+
fetch('MASS').scan(/(\S+)\s+\((\S+)\)/).each do |val, key|
|
|
51
|
+
hash[key] = val.to_f
|
|
73
52
|
end
|
|
53
|
+
@data['MASS'] = hash
|
|
54
|
+
end
|
|
55
|
+
@data['MASS']
|
|
56
|
+
end
|
|
74
57
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
58
|
+
# CLASS
|
|
59
|
+
def keggclass
|
|
60
|
+
field_fetch('CLASS')
|
|
61
|
+
end
|
|
79
62
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
ary << line
|
|
88
|
-
else
|
|
89
|
-
ary.last << " #{line.strip}"
|
|
90
|
-
end
|
|
91
|
-
end
|
|
92
|
-
@data['BINDING'] = ary
|
|
93
|
-
end
|
|
94
|
-
@data['BINDING']
|
|
95
|
-
end
|
|
63
|
+
# COMPOUND
|
|
64
|
+
def compounds
|
|
65
|
+
unless @data['COMPOUND']
|
|
66
|
+
@data['COMPOUND'] = fetch('COMPOUND').split(/\s+/)
|
|
67
|
+
end
|
|
68
|
+
@data['COMPOUND']
|
|
69
|
+
end
|
|
96
70
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
71
|
+
# REACTION
|
|
72
|
+
def reactions
|
|
73
|
+
unless @data['REACTION']
|
|
74
|
+
@data['REACTION'] = fetch('REACTION').split(/\s+/)
|
|
75
|
+
end
|
|
76
|
+
@data['REACTION']
|
|
77
|
+
end
|
|
104
78
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
end
|
|
110
|
-
@data['REACTION']
|
|
111
|
-
end
|
|
79
|
+
# PATHWAY
|
|
80
|
+
def pathways
|
|
81
|
+
lines_fetch('PATHWAY')
|
|
82
|
+
end
|
|
112
83
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
84
|
+
# ENZYME
|
|
85
|
+
def enzymes
|
|
86
|
+
unless @data['ENZYME']
|
|
87
|
+
field = fetch('ENZYME')
|
|
88
|
+
if /\(/.match(field) # old version
|
|
89
|
+
@data['ENZYME'] = field.scan(/\S+ \(\S+\)/)
|
|
90
|
+
else
|
|
91
|
+
@data['ENZYME'] = field.scan(/\S+/)
|
|
116
92
|
end
|
|
93
|
+
end
|
|
94
|
+
@data['ENZYME']
|
|
95
|
+
end
|
|
117
96
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
@data['ENZYME'] = field.scan(/\S+/)
|
|
126
|
-
end
|
|
127
|
-
end
|
|
128
|
-
@data['ENZYME']
|
|
129
|
-
end
|
|
97
|
+
# ORTHOLOG
|
|
98
|
+
def orthologs
|
|
99
|
+
unless @data['ORTHOLOG']
|
|
100
|
+
@data['ORTHOLOG'] = lines_fetch('ORTHOLOG')
|
|
101
|
+
end
|
|
102
|
+
@data['ORTHOLOG']
|
|
103
|
+
end
|
|
130
104
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
lines = lines_fetch('ORTHOLOG')
|
|
136
|
-
lines.each do |line|
|
|
137
|
-
if /^\S/.match(line)
|
|
138
|
-
ary << line
|
|
139
|
-
else
|
|
140
|
-
ary.last << " #{line.strip}"
|
|
141
|
-
end
|
|
142
|
-
end
|
|
143
|
-
@data['ORTHOLOG'] = ary
|
|
144
|
-
end
|
|
145
|
-
@data['ORTHOLOG']
|
|
146
|
-
end
|
|
105
|
+
# COMMENT
|
|
106
|
+
def comment
|
|
107
|
+
field_fetch('COMMENT')
|
|
108
|
+
end
|
|
147
109
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
lines = lines_fetch('REFERENCE')
|
|
153
|
-
lines.each do |line|
|
|
154
|
-
if /^\d+\s+\[PMID/.match(line)
|
|
155
|
-
ary << line
|
|
156
|
-
else
|
|
157
|
-
ary.last << " #{line.strip}"
|
|
158
|
-
end
|
|
159
|
-
end
|
|
160
|
-
@data['REFERENCE'] = ary
|
|
161
|
-
end
|
|
162
|
-
@data['REFERENCE']
|
|
163
|
-
end
|
|
110
|
+
# REMARK
|
|
111
|
+
def remark
|
|
112
|
+
field_fetch('REMARK')
|
|
113
|
+
end
|
|
164
114
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
end
|
|
176
|
-
end
|
|
177
|
-
@data['DBLINKS'] = ary
|
|
115
|
+
# REFERENCE
|
|
116
|
+
def references
|
|
117
|
+
unless @data['REFERENCE']
|
|
118
|
+
ary = Array.new
|
|
119
|
+
lines = lines_fetch('REFERENCE')
|
|
120
|
+
lines.each do |line|
|
|
121
|
+
if /^\d+\s+\[PMID/.match(line)
|
|
122
|
+
ary << line
|
|
123
|
+
else
|
|
124
|
+
ary.last << " #{line.strip}"
|
|
178
125
|
end
|
|
179
|
-
@data['DBLINKS']
|
|
180
|
-
end
|
|
181
|
-
|
|
182
|
-
# ATOM, BOND
|
|
183
|
-
def kcf
|
|
184
|
-
return "#{get('NODE')}#{get('EDGE')}"
|
|
185
126
|
end
|
|
127
|
+
@data['REFERENCE'] = ary
|
|
128
|
+
end
|
|
129
|
+
@data['REFERENCE']
|
|
130
|
+
end
|
|
186
131
|
|
|
132
|
+
# DBLINKS
|
|
133
|
+
def dblinks
|
|
134
|
+
unless @data['DBLINKS']
|
|
135
|
+
@data['DBLINKS'] = lines_fetch('DBLINKS')
|
|
187
136
|
end
|
|
137
|
+
@data['DBLINKS']
|
|
138
|
+
end
|
|
188
139
|
|
|
140
|
+
# ATOM, BOND
|
|
141
|
+
def kcf
|
|
142
|
+
return "#{get('NODE')}#{get('EDGE')}"
|
|
189
143
|
end
|
|
190
144
|
|
|
191
|
-
end
|
|
145
|
+
end # GLYCAN
|
|
146
|
+
|
|
147
|
+
end # KEGG
|
|
148
|
+
end # Bio
|
|
192
149
|
|
|
193
150
|
|
|
194
151
|
if __FILE__ == $0
|
data/lib/bio/db/kegg/keggtab.rb
CHANGED
|
@@ -1,215 +1,283 @@
|
|
|
1
1
|
#
|
|
2
|
-
# bio/db/kegg/keggtab.rb - KEGG keggtab class
|
|
2
|
+
# = bio/db/kegg/keggtab.rb - KEGG keggtab class
|
|
3
3
|
#
|
|
4
|
-
#
|
|
5
|
-
#
|
|
4
|
+
# Copyright:: Copyright (C) 2001 Mitsuteru C. Nakao <n@bioruby.org>
|
|
5
|
+
# Copyright (C) 2003, 2006 Toshiaki Katayama <k@bioruby.org>
|
|
6
|
+
# License:: The Ruby License
|
|
6
7
|
#
|
|
7
|
-
#
|
|
8
|
-
# modify it under the terms of the GNU Lesser General Public
|
|
9
|
-
# License as published by the Free Software Foundation; either
|
|
10
|
-
# version 2 of the License, or (at your option) any later version.
|
|
11
|
-
#
|
|
12
|
-
# This library is distributed in the hope that it will be useful,
|
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
15
|
-
# Lesser General Public License for more details.
|
|
16
|
-
#
|
|
17
|
-
# You should have received a copy of the GNU Lesser General Public
|
|
18
|
-
# License along with this library; if not, write to the Free Software
|
|
19
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
20
|
-
#
|
|
21
|
-
# $Id: keggtab.rb,v 1.7 2005/09/26 13:00:07 k Exp $
|
|
8
|
+
# $Id: keggtab.rb,v 1.10 2007/04/05 23:35:41 trevor Exp $
|
|
22
9
|
#
|
|
23
10
|
|
|
24
11
|
module Bio
|
|
25
|
-
|
|
12
|
+
class KEGG
|
|
26
13
|
|
|
27
|
-
|
|
14
|
+
# == Description
|
|
15
|
+
#
|
|
16
|
+
# Parse 'keggtab' KEGG database definition file which also includes
|
|
17
|
+
# Taxonomic category of the KEGG organisms.
|
|
18
|
+
#
|
|
19
|
+
# == References
|
|
20
|
+
#
|
|
21
|
+
# The 'keggtab' file is included in
|
|
22
|
+
#
|
|
23
|
+
# * ftp://ftp.genome.jp/pub/kegg/tarfiles/genes.tar.gz
|
|
24
|
+
# * ftp://ftp.genome.jp/pub/kegg/tarfiles/genes.weekly.last.tar.Z
|
|
25
|
+
#
|
|
26
|
+
# == Format
|
|
27
|
+
#
|
|
28
|
+
# File format is something like
|
|
29
|
+
#
|
|
30
|
+
# # KEGGTAB
|
|
31
|
+
# #
|
|
32
|
+
# # name type directory abbreviation
|
|
33
|
+
# #
|
|
34
|
+
# enzyme enzyme $BIOROOT/db/ideas/ligand ec
|
|
35
|
+
# ec alias enzyme
|
|
36
|
+
# (snip)
|
|
37
|
+
# # Human
|
|
38
|
+
# h.sapiens genes $BIOROOT/db/kegg/genes hsa
|
|
39
|
+
# H.sapiens alias h.sapiens
|
|
40
|
+
# hsa alias h.sapiens
|
|
41
|
+
# (snip)
|
|
42
|
+
# #
|
|
43
|
+
# # Taxonomy
|
|
44
|
+
# #
|
|
45
|
+
# (snip)
|
|
46
|
+
# animals alias hsa+mmu+rno+dre+dme+cel
|
|
47
|
+
# eukaryotes alias animals+plants+protists+fungi
|
|
48
|
+
# genes alias eubacteria+archaea+eukaryotes
|
|
49
|
+
#
|
|
50
|
+
class Keggtab
|
|
51
|
+
|
|
52
|
+
# Path for keggtab file and optionally set bioroot top directory.
|
|
53
|
+
# Environmental variable BIOROOT overrides bioroot.
|
|
54
|
+
def initialize(file_path, bioroot = nil)
|
|
55
|
+
@bioroot = ENV['BIOROOT'] || bioroot
|
|
56
|
+
@db_names = Hash.new
|
|
57
|
+
@database = Hash.new
|
|
58
|
+
@taxonomy = Hash.new
|
|
59
|
+
File.open(file_path) do |f|
|
|
60
|
+
parse_keggtab(f.read)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
28
63
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
@database = Hash.new
|
|
33
|
-
@taxonomy = Hash.new
|
|
34
|
-
parse_keggtab(File.open(file_path).read)
|
|
35
|
-
end
|
|
36
|
-
attr_reader :bioroot, :db_names
|
|
64
|
+
# Returns a string of the BIOROOT path prefix.
|
|
65
|
+
attr_reader :bioroot
|
|
66
|
+
attr_reader :db_names
|
|
37
67
|
|
|
38
68
|
|
|
39
|
-
|
|
69
|
+
# Bio::KEGG::Keggtab::DB
|
|
40
70
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
71
|
+
class DB
|
|
72
|
+
# Create a container object for database definitions.
|
|
73
|
+
def initialize(db_name, db_type, db_path, db_abbrev)
|
|
74
|
+
@name = db_name
|
|
75
|
+
@type = db_type
|
|
76
|
+
@path = db_path
|
|
77
|
+
@abbrev = db_abbrev
|
|
78
|
+
@aliases = Array.new
|
|
79
|
+
end
|
|
80
|
+
# Database name. (e.g. 'enzyme', 'h.sapies', 'e.coli', ...)
|
|
81
|
+
attr_reader :name
|
|
82
|
+
# Definition type. (e.g. 'enzyme', 'alias', 'genes', ...)
|
|
83
|
+
attr_reader :type
|
|
84
|
+
# Database flat file path. (e.g. '$BIOROOT/db/kegg/genes', ...)
|
|
85
|
+
attr_reader :path
|
|
86
|
+
# Short name for the database. (e.g. 'ec', 'hsa', 'eco', ...)
|
|
87
|
+
# korg and keggorg are alias for abbrev method.
|
|
88
|
+
attr_reader :abbrev
|
|
89
|
+
# Array containing all alias names for the database.
|
|
90
|
+
# (e.g. ["H.sapiens", "hsa"], ["E.coli", "eco"], ...)
|
|
91
|
+
attr_reader :aliases
|
|
92
|
+
|
|
93
|
+
alias korg abbrev
|
|
94
|
+
alias keggorg abbrev
|
|
95
|
+
end
|
|
53
96
|
|
|
54
97
|
|
|
55
|
-
|
|
98
|
+
# DB section
|
|
56
99
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
100
|
+
# Returns a hash containing DB definition section of the keggtab file.
|
|
101
|
+
# If database name is given as an argument, returns a Keggtab::DB object.
|
|
102
|
+
def database(db_abbrev = nil)
|
|
103
|
+
if db_abbrev
|
|
104
|
+
@database[db_abbrev]
|
|
105
|
+
else
|
|
106
|
+
@database
|
|
107
|
+
end
|
|
108
|
+
end
|
|
64
109
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
110
|
+
# Returns an Array containing all alias names for the database.
|
|
111
|
+
# (e.g. 'hsa' -> ["H.sapiens", "hsa"], 'hpj' -> ["H.pylori_J99", "hpj"])
|
|
112
|
+
def aliases(db_abbrev)
|
|
113
|
+
if @database[db_abbrev]
|
|
114
|
+
@database[db_abbrev].aliases
|
|
115
|
+
end
|
|
116
|
+
end
|
|
70
117
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
118
|
+
# Returns a canonical database name for the abbreviation.
|
|
119
|
+
# (e.g. 'ec' -> 'enzyme', 'hsa' -> 'h.sapies', ...)
|
|
120
|
+
def name(db_abbrev)
|
|
121
|
+
if @database[db_abbrev]
|
|
122
|
+
@database[db_abbrev].name
|
|
123
|
+
end
|
|
124
|
+
end
|
|
76
125
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
126
|
+
# Returns an absolute path for the flat file database.
|
|
127
|
+
# (e.g. '/bio/db/kegg/genes', ...)
|
|
128
|
+
def path(db_abbrev)
|
|
129
|
+
if @database[db_abbrev]
|
|
130
|
+
file = @database[db_abbrev].name
|
|
131
|
+
if @bioroot
|
|
132
|
+
"#{@database[db_abbrev].path.sub(/\$BIOROOT/,@bioroot)}/#{file}"
|
|
133
|
+
else
|
|
134
|
+
"#{@database[db_abbrev].path}/#{file}"
|
|
86
135
|
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
87
138
|
|
|
88
139
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
def db_path(db_name)
|
|
96
|
-
if @bioroot
|
|
97
|
-
"#{@db_names[db_name].path.sub(/\$BIOROOT/,@bioroot)}/#{db_name}"
|
|
98
|
-
else
|
|
99
|
-
"#{@db_names[db_name].path}/#{db_name}"
|
|
100
|
-
end
|
|
101
|
-
end
|
|
140
|
+
# deprecated
|
|
141
|
+
def alias_list(db_name)
|
|
142
|
+
if @db_names[db_name]
|
|
143
|
+
@db_names[db_name].aliases
|
|
144
|
+
end
|
|
145
|
+
end
|
|
102
146
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
147
|
+
# deprecated
|
|
148
|
+
def db_path(db_name)
|
|
149
|
+
if @bioroot
|
|
150
|
+
"#{@db_names[db_name].path.sub(/\$BIOROOT/,@bioroot)}/#{db_name}"
|
|
151
|
+
else
|
|
152
|
+
"#{@db_names[db_name].path}/#{db_name}"
|
|
153
|
+
end
|
|
154
|
+
end
|
|
109
155
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
156
|
+
# deprecated
|
|
157
|
+
def db_by_abbrev(db_abbrev)
|
|
158
|
+
@db_names.each do |k, db|
|
|
159
|
+
return db if db.abbrev == db_abbrev
|
|
160
|
+
end
|
|
161
|
+
return nil
|
|
162
|
+
end
|
|
113
163
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
164
|
+
# deprecated
|
|
165
|
+
def name_by_abbrev(db_abbrev)
|
|
166
|
+
db_by_abbrev(db_abbrev).name
|
|
167
|
+
end
|
|
118
168
|
|
|
169
|
+
# deprecated
|
|
170
|
+
def db_path_by_abbrev(db_abbrev)
|
|
171
|
+
db_name = name_by_abbrev(db_abbrev)
|
|
172
|
+
db_path(db_name)
|
|
173
|
+
end
|
|
119
174
|
|
|
120
|
-
# Taxonomy section
|
|
121
175
|
|
|
122
|
-
|
|
123
|
-
if node
|
|
124
|
-
@taxonomy[node]
|
|
125
|
-
else
|
|
126
|
-
@taxonomy
|
|
127
|
-
end
|
|
128
|
-
end
|
|
176
|
+
# Taxonomy section
|
|
129
177
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
178
|
+
# Returns a hash containing Taxonomy section of the keggtab file.
|
|
179
|
+
# If argument is given, returns a List of all child nodes belongs
|
|
180
|
+
# to the label node.
|
|
181
|
+
# (e.g. "eukaryotes" -> ["animals", "plants", "protists", "fungi"], ...)
|
|
182
|
+
def taxonomy(node = nil)
|
|
183
|
+
if node
|
|
184
|
+
@taxonomy[node]
|
|
185
|
+
else
|
|
186
|
+
@taxonomy
|
|
187
|
+
end
|
|
188
|
+
end
|
|
133
189
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
190
|
+
# List of all node labels from Taxonomy section.
|
|
191
|
+
# (e.g. ["actinobacteria", "animals", "archaea", "bacillales", ...)
|
|
192
|
+
def taxa_list
|
|
193
|
+
@taxonomy.keys.sort
|
|
194
|
+
end
|
|
137
195
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
else
|
|
142
|
-
if @taxonomy[node]
|
|
143
|
-
tmp = Array.new
|
|
144
|
-
@taxonomy[node].each do |x|
|
|
145
|
-
tmp.push(taxo2korgs(x))
|
|
146
|
-
end
|
|
147
|
-
return tmp
|
|
148
|
-
else
|
|
149
|
-
return nil
|
|
150
|
-
end
|
|
151
|
-
end
|
|
152
|
-
end
|
|
153
|
-
alias taxo2keggorgs taxo2korgs
|
|
154
|
-
alias taxon2korgs taxo2korgs
|
|
155
|
-
alias taxon2keggorgs taxo2korgs
|
|
196
|
+
def child_nodes(node = 'genes')
|
|
197
|
+
return @taxonomy[node]
|
|
198
|
+
end
|
|
156
199
|
|
|
157
|
-
|
|
200
|
+
# Returns an array of organism names included in the specified taxon
|
|
201
|
+
# label. (e.g. 'proteobeta' -> ["nme", "nma", "rso"])
|
|
202
|
+
# This method has taxo2keggorgs, taxon2korgs, and taxon2keggorgs aliases.
|
|
203
|
+
def taxo2korgs(node = 'genes')
|
|
204
|
+
if node.length == 3
|
|
205
|
+
return node
|
|
206
|
+
else
|
|
207
|
+
if @taxonomy[node]
|
|
158
208
|
tmp = Array.new
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
tmp.push(k)
|
|
163
|
-
traverse.call(k)
|
|
164
|
-
break
|
|
165
|
-
end
|
|
166
|
-
end
|
|
167
|
-
}
|
|
168
|
-
traverse.call(keggorg)
|
|
209
|
+
@taxonomy[node].each do |x|
|
|
210
|
+
tmp.push(taxo2korgs(x))
|
|
211
|
+
end
|
|
169
212
|
return tmp
|
|
213
|
+
else
|
|
214
|
+
return nil
|
|
170
215
|
end
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
db_type = $2
|
|
189
|
-
db_path = $3
|
|
190
|
-
db_abbrev = $4
|
|
191
|
-
@db_names[db_name] =
|
|
192
|
-
Bio::KEGG::Keggtab::DB.new(db_name, db_type, db_path, db_abbrev)
|
|
193
|
-
when /(^\w\S+)\s+alias\s+(\w.+\w)/ # alias
|
|
194
|
-
db_alias = $1
|
|
195
|
-
db_name = $2#.downcase
|
|
196
|
-
if in_taxonomy
|
|
197
|
-
@taxonomy.update(db_alias => db_name.split('+'))
|
|
198
|
-
elsif @db_names[db_name]
|
|
199
|
-
@db_names[db_name].aliases.push(db_alias)
|
|
200
|
-
end
|
|
201
|
-
end
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
alias taxo2keggorgs taxo2korgs
|
|
219
|
+
alias taxon2korgs taxo2korgs
|
|
220
|
+
alias taxon2keggorgs taxo2korgs
|
|
221
|
+
|
|
222
|
+
# Returns an array of taxonomy names the organism belongs.
|
|
223
|
+
# (e.g. 'eco' -> ['proteogamma','proteobacteria','eubacteria','genes'])
|
|
224
|
+
# This method has aliases as keggorg2taxo, korg2taxonomy, keggorg2taxonomy.
|
|
225
|
+
def korg2taxo(keggorg)
|
|
226
|
+
tmp = Array.new
|
|
227
|
+
traverse = Proc.new {|keggorg|
|
|
228
|
+
@taxonomy.each do |k,v|
|
|
229
|
+
if v.include?(keggorg)
|
|
230
|
+
tmp.push(k)
|
|
231
|
+
traverse.call(k)
|
|
232
|
+
break
|
|
202
233
|
end
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
234
|
+
end
|
|
235
|
+
}
|
|
236
|
+
traverse.call(keggorg)
|
|
237
|
+
return tmp
|
|
238
|
+
end
|
|
239
|
+
alias keggorg2taxo korg2taxo
|
|
240
|
+
alias korg2taxonomy korg2taxo
|
|
241
|
+
alias keggorg2taxonomy korg2taxo
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
private
|
|
245
|
+
|
|
246
|
+
def parse_keggtab(keggtab)
|
|
247
|
+
in_taxonomy = nil
|
|
248
|
+
keggtab.each do |line|
|
|
249
|
+
case line
|
|
250
|
+
when /^# Taxonomy/ # beginning of the taxonomy section
|
|
251
|
+
in_taxonomy = true
|
|
252
|
+
when /^#|^$/
|
|
253
|
+
next
|
|
254
|
+
when /(^\w\S+)\s+(\w+)\s+(\$\S+)\s+(\w+)/ # db
|
|
255
|
+
db_name = $1
|
|
256
|
+
db_type = $2
|
|
257
|
+
db_path = $3
|
|
258
|
+
db_abbrev = $4
|
|
259
|
+
@db_names[db_name] =
|
|
260
|
+
Bio::KEGG::Keggtab::DB.new(db_name, db_type, db_path, db_abbrev)
|
|
261
|
+
when /(^\w\S+)\s+alias\s+(\w.+\w)/ # alias
|
|
262
|
+
db_alias = $1
|
|
263
|
+
db_name = $2#.downcase
|
|
264
|
+
if in_taxonomy
|
|
265
|
+
@taxonomy.update(db_alias => db_name.split('+'))
|
|
266
|
+
elsif @db_names[db_name]
|
|
267
|
+
@db_names[db_name].aliases.push(db_alias)
|
|
206
268
|
end
|
|
207
269
|
end
|
|
208
|
-
|
|
209
270
|
end
|
|
210
|
-
|
|
271
|
+
# convert keys-by-names hash @db_names to keys-by-abbrev hash @database
|
|
272
|
+
@db_names.each do |k,v|
|
|
273
|
+
@database[v.abbrev] = v
|
|
274
|
+
end
|
|
211
275
|
end
|
|
212
|
-
|
|
276
|
+
|
|
277
|
+
end # Keggtab
|
|
278
|
+
|
|
279
|
+
end # KEGG
|
|
280
|
+
end # Bio
|
|
213
281
|
|
|
214
282
|
|
|
215
283
|
|
|
@@ -287,132 +355,3 @@ if __FILE__ == $0
|
|
|
287
355
|
end
|
|
288
356
|
|
|
289
357
|
|
|
290
|
-
|
|
291
|
-
=begin
|
|
292
|
-
|
|
293
|
-
The keggtab file is included in
|
|
294
|
-
|
|
295
|
-
* ((URL:ftp://ftp.genome.jp/pub/kegg/tarfiles/genes.weekly.last.tar.Z>))
|
|
296
|
-
|
|
297
|
-
File format is something like
|
|
298
|
-
|
|
299
|
-
# KEGGTAB
|
|
300
|
-
#
|
|
301
|
-
# name type directory abbreviation
|
|
302
|
-
#
|
|
303
|
-
enzyme enzyme $BIOROOT/db/ideas/ligand ec
|
|
304
|
-
ec alias enzyme
|
|
305
|
-
(snip)
|
|
306
|
-
# Human
|
|
307
|
-
h.sapiens genes $BIOROOT/db/kegg/genes hsa
|
|
308
|
-
H.sapiens alias h.sapiens
|
|
309
|
-
hsa alias h.sapiens
|
|
310
|
-
(snip)
|
|
311
|
-
#
|
|
312
|
-
# Taxonomy
|
|
313
|
-
#
|
|
314
|
-
(snip)
|
|
315
|
-
animals alias hsa+mmu+rno+dre+dme+cel
|
|
316
|
-
eukaryotes alias animals+plants+protists+fungi
|
|
317
|
-
genes alias eubacteria+archaea+eukaryotes
|
|
318
|
-
|
|
319
|
-
= Bio::KEGG::Keggtab
|
|
320
|
-
|
|
321
|
-
--- Bio::KEGG::Keggtab.new(file_path, bioroot = nil)
|
|
322
|
-
|
|
323
|
-
Path for keggtab file and optionally set bioroot top directory.
|
|
324
|
-
Environmental variable BIOROOT overrides bioroot.
|
|
325
|
-
|
|
326
|
-
--- Bio::KEGG::Keggtab#database -> Hash
|
|
327
|
-
|
|
328
|
-
Returns a hash containing DB definition section of the keggtab file.
|
|
329
|
-
|
|
330
|
-
--- Bio::KEGG::Keggtab#database(db_abbrev) -> Keggtab::DB
|
|
331
|
-
|
|
332
|
-
Returns a Keggtab::DB object.
|
|
333
|
-
|
|
334
|
-
--- Bio::KEGG::Keggtab#taxonomy -> Hash
|
|
335
|
-
|
|
336
|
-
Returns a hash containing Taxonomy section of the keggtab file.
|
|
337
|
-
|
|
338
|
-
--- Bio::KEGG::Keggtab#taxonomy(node) -> Array
|
|
339
|
-
|
|
340
|
-
Returns a List of all child nodes belongs to the label node.
|
|
341
|
-
(e.g. "eukaryotes" -> ["animals", "plants", "protists", "fungi"], ...)
|
|
342
|
-
|
|
343
|
-
--- Bio::KEGG::Keggtab#bioroot -> String
|
|
344
|
-
|
|
345
|
-
Returns a string of the BIOROOT path prefix.
|
|
346
|
-
|
|
347
|
-
--- Bio::KEGG::Keggtab#name(db_abbrev) -> String
|
|
348
|
-
|
|
349
|
-
Returns a canonical database name for the abbreviation.
|
|
350
|
-
(e.g. 'ec' -> 'enzyme', 'hsa' -> 'h.sapies', ...)
|
|
351
|
-
|
|
352
|
-
--- Bio::KEGG::Keggtab#aliases(db_abbrev) -> Array
|
|
353
|
-
|
|
354
|
-
Returns an Array containing all alias names for the database.
|
|
355
|
-
(e.g. 'hsa' -> ["H.sapiens", "hsa"], 'hpj' -> ["H.pylori_J99", "hpj"])
|
|
356
|
-
|
|
357
|
-
--- Bio::KEGG::Keggtab#path(db_abbrev) -> String
|
|
358
|
-
|
|
359
|
-
Returns an absolute path for the flat file database.
|
|
360
|
-
(e.g. '/bio/db/kegg/genes', ...)
|
|
361
|
-
|
|
362
|
-
--- Bio::KEGG::Keggtab#taxa_list -> Array
|
|
363
|
-
|
|
364
|
-
List of all node labels from Taxonomy section.
|
|
365
|
-
(e.g. ["actinobacteria", "animals", "archaea", "bacillales", ...)
|
|
366
|
-
|
|
367
|
-
--- Bio::KEGG::Keggtab#taxo2korgs(taxon) -> Array
|
|
368
|
-
|
|
369
|
-
Returns an array of organism names included in the specified taxon
|
|
370
|
-
label. (e.g. 'proteobeta' -> ["nme", "nma", "rso"])
|
|
371
|
-
This method has taxo2keggorgs, taxon2korgs, and taxon2keggorgs aliases.
|
|
372
|
-
|
|
373
|
-
--- Bio::KEGG::Keggtab#korg2taxo(keggorg) -> Array
|
|
374
|
-
|
|
375
|
-
Returns an array of taxonomy names the organism belongs.
|
|
376
|
-
(e.g. 'eco' -> ['proteogamma','proteobacteria','eubacteria','genes'])
|
|
377
|
-
This method has aliases as keggorg2taxo, korg2taxonomy, keggorg2taxonomy.
|
|
378
|
-
|
|
379
|
-
* following methods are deprecated
|
|
380
|
-
|
|
381
|
-
--- Bio::KEGG::Keggtab#db_names[db_name] -> Keggtab::DB
|
|
382
|
-
--- Bio::KEGG::Keggtab#db_by_abbrev(db_abbrev) -> Keggtab::DB
|
|
383
|
-
--- Bio::KEGG::Keggtab#alias_list(db_name) -> Array
|
|
384
|
-
--- Bio::KEGG::Keggtab#name_by_abbrev(db_abbrev) -> String
|
|
385
|
-
--- Bio::KEGG::Keggtab#db_path(db_name) -> String
|
|
386
|
-
--- Bio::KEGG::Keggtab#db_path_by_abbrev(keggorg) -> String
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
== Bio::KEGG::Keggtab::DB
|
|
390
|
-
|
|
391
|
-
--- Bio::KEGG::Keggtab::DB.new(db_name, db_type, db_path, db_abbrev)
|
|
392
|
-
|
|
393
|
-
Create a container object for database definitions.
|
|
394
|
-
|
|
395
|
-
--- Bio::KEGG::Keggtab::DB#name -> String
|
|
396
|
-
|
|
397
|
-
Database name. (e.g. 'enzyme', 'h.sapies', 'e.coli', ...)
|
|
398
|
-
|
|
399
|
-
--- Bio::KEGG::Keggtab::DB#type -> String
|
|
400
|
-
|
|
401
|
-
Definition type. (e.g. 'enzyme', 'alias', 'genes', ...)
|
|
402
|
-
|
|
403
|
-
--- Bio::KEGG::Keggtab::DB#path -> String
|
|
404
|
-
|
|
405
|
-
Database flat file path. (e.g. '$BIOROOT/db/kegg/genes', ...)
|
|
406
|
-
|
|
407
|
-
--- Bio::KEGG::Keggtab::DB#abbrev -> String
|
|
408
|
-
|
|
409
|
-
Short name for the database. (e.g. 'ec', 'hsa', 'eco', ...)
|
|
410
|
-
korg and keggorg are alias for abbrev method.
|
|
411
|
-
|
|
412
|
-
--- Bio::KEGG::Keggtab::DB#aliases -> Array
|
|
413
|
-
|
|
414
|
-
Array containing all alias names for the database.
|
|
415
|
-
(e.g. ["H.sapiens", "hsa"], ["E.coli", "eco"], ...)
|
|
416
|
-
|
|
417
|
-
=end
|
|
418
|
-
|