bio 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +14 -122
- data/bin/br_biofetch.rb +2 -2
- data/bin/br_bioflat.rb +2 -2
- data/bin/br_biogetseq.rb +2 -2
- data/bin/br_pmfetch.rb +3 -3
- data/doc/Changes-0.7.rd +77 -0
- data/doc/KEGG_API.rd +523 -232
- data/doc/KEGG_API.rd.ja +529 -207
- data/doc/Tutorial.rd +48 -11
- data/lib/bio.rb +59 -6
- data/lib/bio/alignment.rb +713 -103
- data/lib/bio/appl/bl2seq/report.rb +2 -18
- data/lib/bio/appl/blast.rb +108 -91
- data/lib/bio/appl/blast/format0.rb +33 -18
- data/lib/bio/appl/blast/format8.rb +6 -20
- data/lib/bio/appl/blast/report.rb +293 -429
- data/lib/bio/appl/blast/rexml.rb +8 -22
- data/lib/bio/appl/blast/wublast.rb +21 -12
- data/lib/bio/appl/blast/xmlparser.rb +180 -183
- data/lib/bio/appl/blat/report.rb +127 -30
- data/lib/bio/appl/clustalw.rb +87 -59
- data/lib/bio/appl/clustalw/report.rb +20 -22
- data/lib/bio/appl/emboss.rb +113 -20
- data/lib/bio/appl/fasta.rb +173 -198
- data/lib/bio/appl/fasta/format10.rb +244 -347
- data/lib/bio/appl/gcg/msf.rb +212 -0
- data/lib/bio/appl/gcg/seq.rb +195 -0
- data/lib/bio/appl/genscan/report.rb +5 -23
- data/lib/bio/appl/hmmer.rb +8 -45
- data/lib/bio/appl/hmmer/report.rb +2 -20
- data/lib/bio/appl/iprscan/report.rb +374 -0
- data/lib/bio/appl/mafft.rb +87 -50
- data/lib/bio/appl/mafft/report.rb +151 -44
- data/lib/bio/appl/muscle.rb +52 -0
- data/lib/bio/appl/phylip/alignment.rb +129 -0
- data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
- data/lib/bio/appl/probcons.rb +41 -0
- data/lib/bio/appl/psort.rb +89 -96
- data/lib/bio/appl/psort/report.rb +6 -22
- data/lib/bio/appl/pts1.rb +263 -0
- data/lib/bio/appl/sim4.rb +26 -36
- data/lib/bio/appl/sim4/report.rb +2 -18
- data/lib/bio/appl/sosui/report.rb +5 -20
- data/lib/bio/appl/spidey/report.rb +2 -2
- data/lib/bio/appl/targetp/report.rb +4 -20
- data/lib/bio/appl/tcoffee.rb +55 -0
- data/lib/bio/appl/tmhmm/report.rb +4 -20
- data/lib/bio/command.rb +235 -64
- data/lib/bio/data/aa.rb +21 -26
- data/lib/bio/data/codontable.rb +2 -20
- data/lib/bio/data/na.rb +19 -4
- data/lib/bio/db.rb +27 -12
- data/lib/bio/db/aaindex.rb +2 -20
- data/lib/bio/db/embl/common.rb +4 -21
- data/lib/bio/db/embl/embl.rb +33 -85
- data/lib/bio/db/embl/sptr.rb +612 -302
- data/lib/bio/db/embl/swissprot.rb +10 -29
- data/lib/bio/db/embl/trembl.rb +10 -29
- data/lib/bio/db/embl/uniprot.rb +10 -29
- data/lib/bio/db/fantom.rb +15 -20
- data/lib/bio/db/fasta.rb +3 -3
- data/lib/bio/db/genbank/common.rb +37 -46
- data/lib/bio/db/genbank/ddbj.rb +6 -18
- data/lib/bio/db/genbank/genbank.rb +47 -186
- data/lib/bio/db/genbank/genpept.rb +4 -17
- data/lib/bio/db/genbank/refseq.rb +4 -17
- data/lib/bio/db/gff.rb +103 -35
- data/lib/bio/db/go.rb +4 -20
- data/lib/bio/db/kegg/brite.rb +26 -36
- data/lib/bio/db/kegg/compound.rb +81 -85
- data/lib/bio/db/kegg/drug.rb +98 -0
- data/lib/bio/db/kegg/enzyme.rb +133 -110
- data/lib/bio/db/kegg/expression.rb +2 -20
- data/lib/bio/db/kegg/genes.rb +208 -238
- data/lib/bio/db/kegg/genome.rb +164 -285
- data/lib/bio/db/kegg/glycan.rb +114 -157
- data/lib/bio/db/kegg/keggtab.rb +242 -303
- data/lib/bio/db/kegg/kgml.rb +117 -160
- data/lib/bio/db/kegg/orthology.rb +112 -0
- data/lib/bio/db/kegg/reaction.rb +54 -69
- data/lib/bio/db/kegg/taxonomy.rb +331 -0
- data/lib/bio/db/lasergene.rb +209 -0
- data/lib/bio/db/litdb.rb +3 -27
- data/lib/bio/db/medline.rb +228 -249
- data/lib/bio/db/nbrf.rb +3 -3
- data/lib/bio/db/newick.rb +510 -0
- data/lib/bio/db/nexus.rb +1854 -0
- data/lib/bio/db/pdb.rb +5 -17
- data/lib/bio/db/pdb/atom.rb +2 -18
- data/lib/bio/db/pdb/chain.rb +2 -18
- data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
- data/lib/bio/db/pdb/model.rb +2 -18
- data/lib/bio/db/pdb/pdb.rb +73 -34
- data/lib/bio/db/pdb/residue.rb +4 -20
- data/lib/bio/db/pdb/utils.rb +2 -18
- data/lib/bio/db/prosite.rb +403 -422
- data/lib/bio/db/rebase.rb +84 -40
- data/lib/bio/db/soft.rb +404 -0
- data/lib/bio/db/transfac.rb +5 -17
- data/lib/bio/feature.rb +106 -52
- data/lib/bio/io/das.rb +32 -42
- data/lib/bio/io/dbget.rb +2 -20
- data/lib/bio/io/ddbjxml.rb +77 -138
- data/lib/bio/io/ebisoap.rb +158 -0
- data/lib/bio/io/ensembl.rb +229 -0
- data/lib/bio/io/fastacmd.rb +89 -82
- data/lib/bio/io/fetch.rb +163 -96
- data/lib/bio/io/flatfile.rb +170 -73
- data/lib/bio/io/flatfile/bdb.rb +3 -16
- data/lib/bio/io/flatfile/index.rb +2 -2
- data/lib/bio/io/flatfile/indexer.rb +3 -2
- data/lib/bio/io/higet.rb +12 -31
- data/lib/bio/io/keggapi.rb +210 -269
- data/lib/bio/io/ncbisoap.rb +155 -0
- data/lib/bio/io/pubmed.rb +169 -147
- data/lib/bio/io/registry.rb +4 -20
- data/lib/bio/io/soapwsdl.rb +43 -38
- data/lib/bio/io/sql.rb +242 -305
- data/lib/bio/location.rb +407 -285
- data/lib/bio/map.rb +410 -0
- data/lib/bio/pathway.rb +558 -695
- data/lib/bio/reference.rb +272 -75
- data/lib/bio/sequence.rb +255 -13
- data/lib/bio/sequence/aa.rb +71 -10
- data/lib/bio/sequence/common.rb +187 -33
- data/lib/bio/sequence/compat.rb +59 -4
- data/lib/bio/sequence/format.rb +54 -7
- data/lib/bio/sequence/generic.rb +3 -3
- data/lib/bio/sequence/na.rb +328 -26
- data/lib/bio/shell.rb +11 -4
- data/lib/bio/shell/core.rb +221 -160
- data/lib/bio/shell/demo.rb +18 -15
- data/lib/bio/shell/interface.rb +14 -12
- data/lib/bio/shell/irb.rb +95 -0
- data/lib/bio/shell/object.rb +45 -26
- data/lib/bio/shell/plugin/blast.rb +42 -0
- data/lib/bio/shell/plugin/codon.rb +22 -14
- data/lib/bio/shell/plugin/das.rb +58 -0
- data/lib/bio/shell/plugin/emboss.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +22 -11
- data/lib/bio/shell/plugin/flatfile.rb +2 -2
- data/lib/bio/shell/plugin/keggapi.rb +13 -6
- data/lib/bio/shell/plugin/midi.rb +4 -4
- data/lib/bio/shell/plugin/obda.rb +2 -2
- data/lib/bio/shell/plugin/psort.rb +56 -0
- data/lib/bio/shell/plugin/seq.rb +35 -8
- data/lib/bio/shell/plugin/soap.rb +87 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
- data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
- data/lib/bio/shell/script.rb +25 -0
- data/lib/bio/shell/setup.rb +109 -0
- data/lib/bio/shell/web.rb +70 -58
- data/lib/bio/tree.rb +850 -0
- data/lib/bio/util/color_scheme.rb +84 -107
- data/lib/bio/util/color_scheme/buried.rb +5 -24
- data/lib/bio/util/color_scheme/helix.rb +5 -24
- data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
- data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
- data/lib/bio/util/color_scheme/strand.rb +5 -24
- data/lib/bio/util/color_scheme/taylor.rb +5 -24
- data/lib/bio/util/color_scheme/turn.rb +5 -24
- data/lib/bio/util/color_scheme/zappo.rb +5 -24
- data/lib/bio/util/contingency_table.rb +70 -43
- data/lib/bio/util/restriction_enzyme.rb +228 -0
- data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
- data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
- data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
- data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
- data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
- data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
- data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
- data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
- data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
- data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
- data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
- data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
- data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
- data/lib/bio/util/sirna.rb +4 -22
- data/sample/color_scheme_na.rb +4 -12
- data/sample/enzymes.rb +78 -0
- data/sample/goslim.rb +5 -13
- data/sample/psortplot_html.rb +4 -12
- data/test/data/blast/2.2.15.blastp.m7 +876 -0
- data/test/data/embl/AB090716.embl.rel89 +63 -0
- data/test/data/fasta/example1.txt +75 -0
- data/test/data/fasta/example2.txt +21 -0
- data/test/data/iprscan/merged.raw +32 -0
- data/test/data/iprscan/merged.txt +74 -0
- data/test/data/soft/GDS100_partial.soft +92 -0
- data/test/data/soft/GSE3457_family_partial.soft +874 -0
- data/test/functional/bio/io/test_ensembl.rb +103 -0
- data/test/functional/bio/io/test_soapwsdl.rb +5 -17
- data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
- data/test/unit/bio/appl/blast/test_report.rb +3 -16
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
- data/test/unit/bio/appl/genscan/test_report.rb +3 -16
- data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
- data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
- data/test/unit/bio/appl/mafft/test_report.rb +63 -0
- data/test/unit/bio/appl/sosui/test_report.rb +3 -16
- data/test/unit/bio/appl/targetp/test_report.rb +3 -16
- data/test/unit/bio/appl/test_blast.rb +3 -16
- data/test/unit/bio/appl/test_fasta.rb +4 -16
- data/test/unit/bio/appl/test_pts1.rb +140 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
- data/test/unit/bio/data/test_aa.rb +4 -17
- data/test/unit/bio/data/test_codontable.rb +3 -16
- data/test/unit/bio/data/test_na.rb +3 -3
- data/test/unit/bio/db/embl/test_common.rb +3 -16
- data/test/unit/bio/db/embl/test_embl.rb +3 -16
- data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
- data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
- data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
- data/test/unit/bio/db/kegg/test_genes.rb +3 -16
- data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
- data/test/unit/bio/db/test_aaindex.rb +2 -2
- data/test/unit/bio/db/test_fasta.rb +3 -16
- data/test/unit/bio/db/test_gff.rb +3 -16
- data/test/unit/bio/db/test_lasergene.rb +95 -0
- data/test/unit/bio/db/test_newick.rb +56 -0
- data/test/unit/bio/db/test_nexus.rb +360 -0
- data/test/unit/bio/db/test_prosite.rb +5 -18
- data/test/unit/bio/db/test_rebase.rb +11 -25
- data/test/unit/bio/db/test_soft.rb +138 -0
- data/test/unit/bio/io/test_ddbjxml.rb +5 -17
- data/test/unit/bio/io/test_ensembl.rb +109 -0
- data/test/unit/bio/io/test_fastacmd.rb +3 -16
- data/test/unit/bio/io/test_flatfile.rb +237 -0
- data/test/unit/bio/io/test_soapwsdl.rb +4 -17
- data/test/unit/bio/sequence/test_aa.rb +3 -3
- data/test/unit/bio/sequence/test_common.rb +3 -16
- data/test/unit/bio/sequence/test_compat.rb +3 -16
- data/test/unit/bio/sequence/test_na.rb +29 -3
- data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
- data/test/unit/bio/test_alignment.rb +16 -27
- data/test/unit/bio/test_command.rb +242 -25
- data/test/unit/bio/test_db.rb +3 -16
- data/test/unit/bio/test_feature.rb +4 -16
- data/test/unit/bio/test_location.rb +4 -16
- data/test/unit/bio/test_map.rb +230 -0
- data/test/unit/bio/test_pathway.rb +4 -16
- data/test/unit/bio/test_reference.rb +2 -2
- data/test/unit/bio/test_sequence.rb +7 -19
- data/test/unit/bio/test_shell.rb +3 -16
- data/test/unit/bio/test_tree.rb +593 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
- data/test/unit/bio/util/test_color_scheme.rb +6 -18
- data/test/unit/bio/util/test_contingency_table.rb +6 -18
- data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
- data/test/unit/bio/util/test_sirna.rb +3 -16
- metadata +228 -169
- data/doc/BioRuby.rd.ja +0 -225
- data/doc/Design.rd.ja +0 -341
- data/doc/TODO.rd.ja +0 -138
- data/lib/bio/appl/fasta/format6.rb +0 -37
- data/lib/bio/db/kegg/cell.rb +0 -88
- data/lib/bio/db/kegg/ko.rb +0 -178
- data/lib/bio/shell/rails/Rakefile +0 -10
- data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
- data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
- data/lib/bio/shell/rails/config/boot.rb +0 -19
- data/lib/bio/shell/rails/config/database.yml +0 -85
- data/lib/bio/shell/rails/config/environment.rb +0 -53
- data/lib/bio/shell/rails/config/environments/development.rb +0 -19
- data/lib/bio/shell/rails/config/environments/production.rb +0 -19
- data/lib/bio/shell/rails/config/environments/test.rb +0 -19
- data/lib/bio/shell/rails/config/routes.rb +0 -19
- data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
- data/lib/bio/shell/rails/public/404.html +0 -8
- data/lib/bio/shell/rails/public/500.html +0 -8
- data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
- data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
- data/lib/bio/shell/rails/public/dispatch.rb +0 -10
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +0 -277
- data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
- data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
- data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
- data/lib/bio/shell/rails/public/robots.txt +0 -1
- data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
- data/lib/bio/shell/rails/script/about +0 -3
- data/lib/bio/shell/rails/script/breakpointer +0 -3
- data/lib/bio/shell/rails/script/console +0 -3
- data/lib/bio/shell/rails/script/destroy +0 -3
- data/lib/bio/shell/rails/script/generate +0 -3
- data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
- data/lib/bio/shell/rails/script/performance/profiler +0 -3
- data/lib/bio/shell/rails/script/plugin +0 -3
- data/lib/bio/shell/rails/script/process/reaper +0 -3
- data/lib/bio/shell/rails/script/process/spawner +0 -3
- data/lib/bio/shell/rails/script/process/spinner +0 -3
- data/lib/bio/shell/rails/script/runner +0 -3
- data/lib/bio/shell/rails/script/server +0 -42
- data/lib/bio/shell/rails/test/test_helper.rb +0 -28
data/lib/bio/db/kegg/reaction.rb
CHANGED
|
@@ -1,86 +1,71 @@
|
|
|
1
1
|
#
|
|
2
|
-
# bio/db/kegg/reaction.rb - KEGG REACTION database class
|
|
2
|
+
# = bio/db/kegg/reaction.rb - KEGG REACTION database class
|
|
3
3
|
#
|
|
4
|
-
#
|
|
4
|
+
# Copyright:: Copyright (C) 2004 Toshiaki Katayama <k@bioruby.org>
|
|
5
|
+
# License:: The Ruby License
|
|
5
6
|
#
|
|
6
|
-
#
|
|
7
|
-
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
-
# License as published by the Free Software Foundation; either
|
|
9
|
-
# version 2 of the License, or (at your option) any later version.
|
|
10
|
-
#
|
|
11
|
-
# This library is distributed in the hope that it will be useful,
|
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
-
# Lesser General Public License for more details.
|
|
15
|
-
#
|
|
16
|
-
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
-
# License along with this library; if not, write to the Free Software
|
|
18
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
-
#
|
|
20
|
-
# $Id: reaction.rb,v 1.3 2005/09/08 01:22:11 k Exp $
|
|
7
|
+
# $Id: reaction.rb,v 1.6 2007/06/28 11:27:24 k Exp $
|
|
21
8
|
#
|
|
22
9
|
|
|
23
10
|
require 'bio/db'
|
|
24
11
|
|
|
25
12
|
module Bio
|
|
13
|
+
class KEGG
|
|
14
|
+
|
|
15
|
+
class REACTION < KEGGDB
|
|
16
|
+
|
|
17
|
+
DELIMITER = RS = "\n///\n"
|
|
18
|
+
TAGSIZE = 12
|
|
19
|
+
|
|
20
|
+
def initialize(entry)
|
|
21
|
+
super(entry, TAGSIZE)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# ENTRY
|
|
25
|
+
def entry_id
|
|
26
|
+
field_fetch('ENTRY')[/\S+/]
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# NAME
|
|
30
|
+
def name
|
|
31
|
+
field_fetch('NAME')
|
|
32
|
+
end
|
|
26
33
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
DELIMITER = RS = "\n///\n"
|
|
32
|
-
TAGSIZE = 12
|
|
33
|
-
|
|
34
|
-
def initialize(entry)
|
|
35
|
-
super(entry, TAGSIZE)
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
# ENTRY
|
|
39
|
-
def entry_id
|
|
40
|
-
field_fetch('ENTRY')
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
# NAME
|
|
44
|
-
def name
|
|
45
|
-
field_fetch('NAME')
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
# DEFINITION
|
|
49
|
-
def definition
|
|
50
|
-
field_fetch('DEFINITION')
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
# EQUATION
|
|
54
|
-
def equation
|
|
55
|
-
field_fetch('EQUATION')
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
# RPAIR
|
|
59
|
-
def rpairs
|
|
60
|
-
unless @data['RPAIR']
|
|
61
|
-
@data['RPAIR'] = fetch('RPAIR').split(/\s+/)
|
|
62
|
-
end
|
|
63
|
-
@data['RPAIR']
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
# PATHWAY
|
|
67
|
-
def pathways
|
|
68
|
-
lines_fetch('PATHWAY')
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
# ENZYME
|
|
72
|
-
def enzymes
|
|
73
|
-
unless @data['ENZYME']
|
|
74
|
-
@data['ENZYME'] = fetch('ENZYME').scan(/\S+/)
|
|
75
|
-
end
|
|
76
|
-
@data['ENZYME']
|
|
77
|
-
end
|
|
34
|
+
# DEFINITION
|
|
35
|
+
def definition
|
|
36
|
+
field_fetch('DEFINITION')
|
|
37
|
+
end
|
|
78
38
|
|
|
39
|
+
# EQUATION
|
|
40
|
+
def equation
|
|
41
|
+
field_fetch('EQUATION')
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# RPAIR
|
|
45
|
+
def rpairs
|
|
46
|
+
unless @data['RPAIR']
|
|
47
|
+
@data['RPAIR'] = fetch('RPAIR').split(/\s+/)
|
|
79
48
|
end
|
|
49
|
+
@data['RPAIR']
|
|
50
|
+
end
|
|
80
51
|
|
|
52
|
+
# PATHWAY
|
|
53
|
+
def pathways
|
|
54
|
+
lines_fetch('PATHWAY')
|
|
81
55
|
end
|
|
82
56
|
|
|
83
|
-
|
|
57
|
+
# ENZYME
|
|
58
|
+
def enzymes
|
|
59
|
+
unless @data['ENZYME']
|
|
60
|
+
@data['ENZYME'] = fetch('ENZYME').scan(/\S+/)
|
|
61
|
+
end
|
|
62
|
+
@data['ENZYME']
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
end # REACTION
|
|
66
|
+
|
|
67
|
+
end # KEGG
|
|
68
|
+
end # Bio
|
|
84
69
|
|
|
85
70
|
|
|
86
71
|
if __FILE__ == $0
|
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/db/kegg/taxonomy.rb - KEGG taxonomy parser class
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2007 Toshiaki Katayama <k@bioruby.org>
|
|
5
|
+
# License:: The Ruby License
|
|
6
|
+
#
|
|
7
|
+
# $Id: taxonomy.rb,v 1.2 2007/07/09 10:29:16 k Exp $
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
module Bio
|
|
11
|
+
class KEGG
|
|
12
|
+
|
|
13
|
+
# == Description
|
|
14
|
+
#
|
|
15
|
+
# Parse the KEGG 'taxonomy' file which describes taxonomic classification
|
|
16
|
+
# of organisms.
|
|
17
|
+
#
|
|
18
|
+
# == References
|
|
19
|
+
#
|
|
20
|
+
# The KEGG 'taxonomy' file is available at
|
|
21
|
+
#
|
|
22
|
+
# * ftp://ftp.genome.jp/pub/kegg/genes/taxonomy
|
|
23
|
+
#
|
|
24
|
+
class Taxonomy
|
|
25
|
+
|
|
26
|
+
def initialize(filename, orgs = [])
|
|
27
|
+
# Stores the taxonomic tree as a linked list (implemented in Hash), so
|
|
28
|
+
# every node need to have unique name (key) to work correctly
|
|
29
|
+
@tree = Hash.new
|
|
30
|
+
|
|
31
|
+
# Also stores the taxonomic tree as a list of arrays (full path)
|
|
32
|
+
@path = Array.new
|
|
33
|
+
|
|
34
|
+
# Also stores all leaf nodes (organism codes) of every intermediate nodes
|
|
35
|
+
@leaves = Hash.new
|
|
36
|
+
|
|
37
|
+
# tentative name for the root node (use accessor to change)
|
|
38
|
+
@root = 'Genes'
|
|
39
|
+
|
|
40
|
+
hier = Array.new
|
|
41
|
+
level = 0
|
|
42
|
+
label = nil
|
|
43
|
+
|
|
44
|
+
File.open(filename).each do |line|
|
|
45
|
+
next if line.strip.empty?
|
|
46
|
+
|
|
47
|
+
# line for taxonomic hierarchy (indent according to the number of # marks)
|
|
48
|
+
if line[/^#/]
|
|
49
|
+
level = line[/^#+/].length
|
|
50
|
+
label = line[/[A-z].*/]
|
|
51
|
+
hier[level] = sanitize(label)
|
|
52
|
+
|
|
53
|
+
# line for organims name (unify different strains of a species)
|
|
54
|
+
else
|
|
55
|
+
tax, org, name, desc = line.chomp.split("\t")
|
|
56
|
+
if orgs.nil? or orgs.empty? or orgs.include?(org)
|
|
57
|
+
species, strain, = name.split('_')
|
|
58
|
+
# (0) Grouping of the strains of the same species.
|
|
59
|
+
# If the name of species is the same as the previous line,
|
|
60
|
+
# add the species to the same species group.
|
|
61
|
+
# ex. Gamma/enterobacteria has a large number of organisms,
|
|
62
|
+
# so sub grouping of strains is needed for E.coli strains etc.
|
|
63
|
+
#
|
|
64
|
+
# However, if the species name is already used, need to avoid
|
|
65
|
+
# collision of species name as the current implementation stores
|
|
66
|
+
# the tree as a Hash, which may cause the infinite loop.
|
|
67
|
+
#
|
|
68
|
+
# (1) If species name == the intermediate node of other lineage
|
|
69
|
+
# Add '_sp' to the species name to avoid the conflict (1-1), and if
|
|
70
|
+
# 'species_sp' is already taken, use 'species_strain' instead (1-2).
|
|
71
|
+
# ex. Bacteria/Proteobacteria/Beta/T.denitrificans/tbd
|
|
72
|
+
# Bacteria/Proteobacteria/Epsilon/T.denitrificans_ATCC33889/tdn
|
|
73
|
+
# -> Bacteria/Proteobacteria/Beta/T.denitrificans/tbd
|
|
74
|
+
# Bacteria/Proteobacteria/Epsilon/T.denitrificans_sp/tdn
|
|
75
|
+
#
|
|
76
|
+
# (2) If species name == the intermediate node of the same lineage
|
|
77
|
+
# Add '_sp' to the species name to avoid the conflict.
|
|
78
|
+
# ex. Bacteria/Cyanobacgteria/Cyanobacteria_CYA/cya
|
|
79
|
+
# Bacteria/Cyanobacgteria/Cyanobacteria_CYB/cya
|
|
80
|
+
# Bacteria/Proteobacteria/Magnetococcus/Magnetococcus_MC1/mgm
|
|
81
|
+
# -> Bacteria/Cyanobacgteria/Cyanobacteria_sp/cya
|
|
82
|
+
# Bacteria/Cyanobacgteria/Cyanobacteria_sp/cya
|
|
83
|
+
# Bacteria/Proteobacteria/Magnetococcus/Magnetococcus_sp/mgm
|
|
84
|
+
sp_group = "#{species}_sp"
|
|
85
|
+
if @tree[species]
|
|
86
|
+
if hier[level+1] == species
|
|
87
|
+
# case (0)
|
|
88
|
+
else
|
|
89
|
+
# case (1-1)
|
|
90
|
+
species = sp_group
|
|
91
|
+
# case (1-2)
|
|
92
|
+
if @tree[sp_group] and hier[level+1] != species
|
|
93
|
+
species = name
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
else
|
|
97
|
+
if hier[level] == species
|
|
98
|
+
# case (2)
|
|
99
|
+
species = sp_group
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
# 'hier' is an array of the taxonomic tree + species and strain name.
|
|
103
|
+
# ex. [nil, Eukaryotes, Fungi, Ascomycetes, Saccharomycetes] +
|
|
104
|
+
# [S_cerevisiae, sce]
|
|
105
|
+
hier[level+1] = species # sanitize(species)
|
|
106
|
+
hier[level+2] = org
|
|
107
|
+
ary = hier[1, level+2]
|
|
108
|
+
warn ary.inspect if $DEBUG
|
|
109
|
+
add_to_tree(ary)
|
|
110
|
+
add_to_leaves(ary)
|
|
111
|
+
add_to_path(ary)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
return tree
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
attr_reader :tree
|
|
119
|
+
attr_reader :path
|
|
120
|
+
attr_reader :leaves
|
|
121
|
+
attr_accessor :root
|
|
122
|
+
|
|
123
|
+
def organisms(group)
|
|
124
|
+
@leaves[group]
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Add a new path [node, subnode, subsubnode, ..., leaf] under the root node
|
|
128
|
+
# and every intermediate nodes stores their child nodes as a Hash.
|
|
129
|
+
def add_to_tree(ary)
|
|
130
|
+
parent = @root
|
|
131
|
+
ary.each do |node|
|
|
132
|
+
@tree[parent] ||= Hash.new
|
|
133
|
+
@tree[parent][node] = nil
|
|
134
|
+
parent = node
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Add a new path [node, subnode, subsubnode, ..., leaf] under the root node
|
|
139
|
+
# and stores leaf nodes to the every intermediate nodes as an Array.
|
|
140
|
+
def add_to_leaves(ary)
|
|
141
|
+
leaf = ary.last
|
|
142
|
+
ary.each do |node|
|
|
143
|
+
@leaves[node] ||= Array.new
|
|
144
|
+
@leaves[node] << leaf
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Add a new path [node, subnode, subsubnode, ..., leaf] under the root node
|
|
149
|
+
# and stores the path itself in an Array.
|
|
150
|
+
def add_to_path(ary)
|
|
151
|
+
@path << ary
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Compaction of intermediate nodes of the resulted taxonomic tree.
|
|
155
|
+
# - If child node has only one child node (grandchild), make the child of
|
|
156
|
+
# grandchild as a grandchild.
|
|
157
|
+
# ex.
|
|
158
|
+
# Plants / Monocotyledons / grass family / osa
|
|
159
|
+
# --> Plants / Monocotyledons / osa
|
|
160
|
+
#
|
|
161
|
+
def compact(node = root)
|
|
162
|
+
# if the node has children
|
|
163
|
+
if subnodes = @tree[node]
|
|
164
|
+
# obtain grandchildren for each child
|
|
165
|
+
subnodes.keys.each do |subnode|
|
|
166
|
+
if subsubnodes = @tree[subnode]
|
|
167
|
+
# if the number of grandchild node is 1
|
|
168
|
+
if subsubnodes.keys.size == 1
|
|
169
|
+
# obtain the name of the grandchild node
|
|
170
|
+
subsubnode = subsubnodes.keys.first
|
|
171
|
+
# obtain the child of the grandchlid node
|
|
172
|
+
if subsubsubnodes = @tree[subsubnode]
|
|
173
|
+
# make the child of grandchild node as a chlid of child node
|
|
174
|
+
@tree[subnode] = subsubsubnodes
|
|
175
|
+
# delete grandchild node
|
|
176
|
+
@tree[subnode].delete(subsubnode)
|
|
177
|
+
warn "--- compact: #{subsubnode} is replaced by #{subsubsubnodes}" if $DEBUG
|
|
178
|
+
# retry until new grandchild also needed to be compacted.
|
|
179
|
+
retry
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
# repeat recurseively
|
|
184
|
+
compact(subnode)
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Reduction of the leaf node of the resulted taxonomic tree.
|
|
190
|
+
# - If the parent node have only one leaf node, replace parent node
|
|
191
|
+
# with the leaf node.
|
|
192
|
+
# ex.
|
|
193
|
+
# Plants / Monocotyledons / osa
|
|
194
|
+
# --> Plants / osa
|
|
195
|
+
#
|
|
196
|
+
def reduce(node = root)
|
|
197
|
+
# if the node has children
|
|
198
|
+
if subnodes = @tree[node]
|
|
199
|
+
# obtain grandchildren for each child
|
|
200
|
+
subnodes.keys.each do |subnode|
|
|
201
|
+
if subsubnodes = @tree[subnode]
|
|
202
|
+
# if the number of grandchild node is 1
|
|
203
|
+
if subsubnodes.keys.size == 1
|
|
204
|
+
# obtain the name of the grandchild node
|
|
205
|
+
subsubnode = subsubnodes.keys.first
|
|
206
|
+
# if the grandchild node is a leaf node
|
|
207
|
+
unless @tree[subsubnode]
|
|
208
|
+
# make the grandchild node as a child node
|
|
209
|
+
@tree[node].update(subsubnodes)
|
|
210
|
+
# delete child node
|
|
211
|
+
@tree[node].delete(subnode)
|
|
212
|
+
warn "--- reduce: #{subnode} is replaced by #{subsubnode}" if $DEBUG
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
# repeat recursively
|
|
217
|
+
reduce(subnode)
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Traverse the taxonomic tree by the depth first search method
|
|
223
|
+
# under the given (root or intermediate) node.
|
|
224
|
+
def dfs(parent, &block)
|
|
225
|
+
if children = @tree[parent]
|
|
226
|
+
yield parent, children
|
|
227
|
+
children.keys.each do |child|
|
|
228
|
+
dfs(child, &block)
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# Similar to the dfs method but also passes the current level of the nest
|
|
234
|
+
# to the iterator.
|
|
235
|
+
def dfs_with_level(parent, &block)
|
|
236
|
+
@level ||= 0
|
|
237
|
+
if children = @tree[parent]
|
|
238
|
+
yield parent, children, @level
|
|
239
|
+
@level += 1
|
|
240
|
+
children.keys.each do |child|
|
|
241
|
+
dfs_with_level(child, &block)
|
|
242
|
+
end
|
|
243
|
+
@level -= 1
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# Convert the taxonomic tree structure to a simple ascii art.
|
|
248
|
+
def to_s
|
|
249
|
+
result = "#{@root}\n"
|
|
250
|
+
@tree[@root].keys.each do |node|
|
|
251
|
+
result += ascii_tree(node, " ")
|
|
252
|
+
end
|
|
253
|
+
return result
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
private
|
|
257
|
+
|
|
258
|
+
# Helper method for the to_s method.
|
|
259
|
+
def ascii_tree(node, indent)
|
|
260
|
+
result = "#{indent}+- #{node}\n"
|
|
261
|
+
indent += " "
|
|
262
|
+
@tree[node].keys.each do |child|
|
|
263
|
+
if @tree[child]
|
|
264
|
+
result += ascii_tree(child, indent)
|
|
265
|
+
else
|
|
266
|
+
result += "#{indent}+- #{child}\n"
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
return result
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
def sanitize(str)
|
|
273
|
+
str.gsub(/[^A-z0-9]/, '_')
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
end # Taxonomy
|
|
277
|
+
|
|
278
|
+
end # KEGG
|
|
279
|
+
end # Bio
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
if __FILE__ == $0
|
|
284
|
+
|
|
285
|
+
# Usage:
|
|
286
|
+
# % wget ftp://ftp.genome.jp/pub/kegg/genes/taxonomy
|
|
287
|
+
# % ruby taxonomy.rb taxonomy | less -S
|
|
288
|
+
|
|
289
|
+
taxonomy = ARGV.shift
|
|
290
|
+
org_list = ARGV.shift || nil
|
|
291
|
+
|
|
292
|
+
if org_list
|
|
293
|
+
orgs = File.readlines(org_list).map{|x| x.strip}
|
|
294
|
+
else
|
|
295
|
+
orgs = nil
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
tree = Bio::KEGG::Taxonomy.new(taxonomy, orgs)
|
|
299
|
+
|
|
300
|
+
puts ">>> tree - original"
|
|
301
|
+
puts tree
|
|
302
|
+
|
|
303
|
+
puts ">>> tree - after compact"
|
|
304
|
+
tree.compact
|
|
305
|
+
puts tree
|
|
306
|
+
|
|
307
|
+
puts ">>> tree - after reduce"
|
|
308
|
+
tree.reduce
|
|
309
|
+
puts tree
|
|
310
|
+
|
|
311
|
+
puts ">>> path - sorted"
|
|
312
|
+
tree.path.sort.each do |path|
|
|
313
|
+
puts path.join("/")
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
puts ">>> group : orgs"
|
|
317
|
+
tree.dfs(tree.root) do |parent, children|
|
|
318
|
+
if orgs = tree.organisms(parent)
|
|
319
|
+
puts "#{parent.ljust(30)} (#{orgs.size})\t#{orgs.join(', ')}"
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
puts ">>> group : subgroups"
|
|
324
|
+
tree.dfs_with_level(tree.root) do |parent, children, level|
|
|
325
|
+
subgroups = children.keys.sort
|
|
326
|
+
indent = " " * level
|
|
327
|
+
label = "#{indent} #{level} #{parent}"
|
|
328
|
+
puts "#{label.ljust(35)}\t#{subgroups.join(', ')}"
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
end
|