bio 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +14 -122
- data/bin/br_biofetch.rb +2 -2
- data/bin/br_bioflat.rb +2 -2
- data/bin/br_biogetseq.rb +2 -2
- data/bin/br_pmfetch.rb +3 -3
- data/doc/Changes-0.7.rd +77 -0
- data/doc/KEGG_API.rd +523 -232
- data/doc/KEGG_API.rd.ja +529 -207
- data/doc/Tutorial.rd +48 -11
- data/lib/bio.rb +59 -6
- data/lib/bio/alignment.rb +713 -103
- data/lib/bio/appl/bl2seq/report.rb +2 -18
- data/lib/bio/appl/blast.rb +108 -91
- data/lib/bio/appl/blast/format0.rb +33 -18
- data/lib/bio/appl/blast/format8.rb +6 -20
- data/lib/bio/appl/blast/report.rb +293 -429
- data/lib/bio/appl/blast/rexml.rb +8 -22
- data/lib/bio/appl/blast/wublast.rb +21 -12
- data/lib/bio/appl/blast/xmlparser.rb +180 -183
- data/lib/bio/appl/blat/report.rb +127 -30
- data/lib/bio/appl/clustalw.rb +87 -59
- data/lib/bio/appl/clustalw/report.rb +20 -22
- data/lib/bio/appl/emboss.rb +113 -20
- data/lib/bio/appl/fasta.rb +173 -198
- data/lib/bio/appl/fasta/format10.rb +244 -347
- data/lib/bio/appl/gcg/msf.rb +212 -0
- data/lib/bio/appl/gcg/seq.rb +195 -0
- data/lib/bio/appl/genscan/report.rb +5 -23
- data/lib/bio/appl/hmmer.rb +8 -45
- data/lib/bio/appl/hmmer/report.rb +2 -20
- data/lib/bio/appl/iprscan/report.rb +374 -0
- data/lib/bio/appl/mafft.rb +87 -50
- data/lib/bio/appl/mafft/report.rb +151 -44
- data/lib/bio/appl/muscle.rb +52 -0
- data/lib/bio/appl/phylip/alignment.rb +129 -0
- data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
- data/lib/bio/appl/probcons.rb +41 -0
- data/lib/bio/appl/psort.rb +89 -96
- data/lib/bio/appl/psort/report.rb +6 -22
- data/lib/bio/appl/pts1.rb +263 -0
- data/lib/bio/appl/sim4.rb +26 -36
- data/lib/bio/appl/sim4/report.rb +2 -18
- data/lib/bio/appl/sosui/report.rb +5 -20
- data/lib/bio/appl/spidey/report.rb +2 -2
- data/lib/bio/appl/targetp/report.rb +4 -20
- data/lib/bio/appl/tcoffee.rb +55 -0
- data/lib/bio/appl/tmhmm/report.rb +4 -20
- data/lib/bio/command.rb +235 -64
- data/lib/bio/data/aa.rb +21 -26
- data/lib/bio/data/codontable.rb +2 -20
- data/lib/bio/data/na.rb +19 -4
- data/lib/bio/db.rb +27 -12
- data/lib/bio/db/aaindex.rb +2 -20
- data/lib/bio/db/embl/common.rb +4 -21
- data/lib/bio/db/embl/embl.rb +33 -85
- data/lib/bio/db/embl/sptr.rb +612 -302
- data/lib/bio/db/embl/swissprot.rb +10 -29
- data/lib/bio/db/embl/trembl.rb +10 -29
- data/lib/bio/db/embl/uniprot.rb +10 -29
- data/lib/bio/db/fantom.rb +15 -20
- data/lib/bio/db/fasta.rb +3 -3
- data/lib/bio/db/genbank/common.rb +37 -46
- data/lib/bio/db/genbank/ddbj.rb +6 -18
- data/lib/bio/db/genbank/genbank.rb +47 -186
- data/lib/bio/db/genbank/genpept.rb +4 -17
- data/lib/bio/db/genbank/refseq.rb +4 -17
- data/lib/bio/db/gff.rb +103 -35
- data/lib/bio/db/go.rb +4 -20
- data/lib/bio/db/kegg/brite.rb +26 -36
- data/lib/bio/db/kegg/compound.rb +81 -85
- data/lib/bio/db/kegg/drug.rb +98 -0
- data/lib/bio/db/kegg/enzyme.rb +133 -110
- data/lib/bio/db/kegg/expression.rb +2 -20
- data/lib/bio/db/kegg/genes.rb +208 -238
- data/lib/bio/db/kegg/genome.rb +164 -285
- data/lib/bio/db/kegg/glycan.rb +114 -157
- data/lib/bio/db/kegg/keggtab.rb +242 -303
- data/lib/bio/db/kegg/kgml.rb +117 -160
- data/lib/bio/db/kegg/orthology.rb +112 -0
- data/lib/bio/db/kegg/reaction.rb +54 -69
- data/lib/bio/db/kegg/taxonomy.rb +331 -0
- data/lib/bio/db/lasergene.rb +209 -0
- data/lib/bio/db/litdb.rb +3 -27
- data/lib/bio/db/medline.rb +228 -249
- data/lib/bio/db/nbrf.rb +3 -3
- data/lib/bio/db/newick.rb +510 -0
- data/lib/bio/db/nexus.rb +1854 -0
- data/lib/bio/db/pdb.rb +5 -17
- data/lib/bio/db/pdb/atom.rb +2 -18
- data/lib/bio/db/pdb/chain.rb +2 -18
- data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
- data/lib/bio/db/pdb/model.rb +2 -18
- data/lib/bio/db/pdb/pdb.rb +73 -34
- data/lib/bio/db/pdb/residue.rb +4 -20
- data/lib/bio/db/pdb/utils.rb +2 -18
- data/lib/bio/db/prosite.rb +403 -422
- data/lib/bio/db/rebase.rb +84 -40
- data/lib/bio/db/soft.rb +404 -0
- data/lib/bio/db/transfac.rb +5 -17
- data/lib/bio/feature.rb +106 -52
- data/lib/bio/io/das.rb +32 -42
- data/lib/bio/io/dbget.rb +2 -20
- data/lib/bio/io/ddbjxml.rb +77 -138
- data/lib/bio/io/ebisoap.rb +158 -0
- data/lib/bio/io/ensembl.rb +229 -0
- data/lib/bio/io/fastacmd.rb +89 -82
- data/lib/bio/io/fetch.rb +163 -96
- data/lib/bio/io/flatfile.rb +170 -73
- data/lib/bio/io/flatfile/bdb.rb +3 -16
- data/lib/bio/io/flatfile/index.rb +2 -2
- data/lib/bio/io/flatfile/indexer.rb +3 -2
- data/lib/bio/io/higet.rb +12 -31
- data/lib/bio/io/keggapi.rb +210 -269
- data/lib/bio/io/ncbisoap.rb +155 -0
- data/lib/bio/io/pubmed.rb +169 -147
- data/lib/bio/io/registry.rb +4 -20
- data/lib/bio/io/soapwsdl.rb +43 -38
- data/lib/bio/io/sql.rb +242 -305
- data/lib/bio/location.rb +407 -285
- data/lib/bio/map.rb +410 -0
- data/lib/bio/pathway.rb +558 -695
- data/lib/bio/reference.rb +272 -75
- data/lib/bio/sequence.rb +255 -13
- data/lib/bio/sequence/aa.rb +71 -10
- data/lib/bio/sequence/common.rb +187 -33
- data/lib/bio/sequence/compat.rb +59 -4
- data/lib/bio/sequence/format.rb +54 -7
- data/lib/bio/sequence/generic.rb +3 -3
- data/lib/bio/sequence/na.rb +328 -26
- data/lib/bio/shell.rb +11 -4
- data/lib/bio/shell/core.rb +221 -160
- data/lib/bio/shell/demo.rb +18 -15
- data/lib/bio/shell/interface.rb +14 -12
- data/lib/bio/shell/irb.rb +95 -0
- data/lib/bio/shell/object.rb +45 -26
- data/lib/bio/shell/plugin/blast.rb +42 -0
- data/lib/bio/shell/plugin/codon.rb +22 -14
- data/lib/bio/shell/plugin/das.rb +58 -0
- data/lib/bio/shell/plugin/emboss.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +22 -11
- data/lib/bio/shell/plugin/flatfile.rb +2 -2
- data/lib/bio/shell/plugin/keggapi.rb +13 -6
- data/lib/bio/shell/plugin/midi.rb +4 -4
- data/lib/bio/shell/plugin/obda.rb +2 -2
- data/lib/bio/shell/plugin/psort.rb +56 -0
- data/lib/bio/shell/plugin/seq.rb +35 -8
- data/lib/bio/shell/plugin/soap.rb +87 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
- data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
- data/lib/bio/shell/script.rb +25 -0
- data/lib/bio/shell/setup.rb +109 -0
- data/lib/bio/shell/web.rb +70 -58
- data/lib/bio/tree.rb +850 -0
- data/lib/bio/util/color_scheme.rb +84 -107
- data/lib/bio/util/color_scheme/buried.rb +5 -24
- data/lib/bio/util/color_scheme/helix.rb +5 -24
- data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
- data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
- data/lib/bio/util/color_scheme/strand.rb +5 -24
- data/lib/bio/util/color_scheme/taylor.rb +5 -24
- data/lib/bio/util/color_scheme/turn.rb +5 -24
- data/lib/bio/util/color_scheme/zappo.rb +5 -24
- data/lib/bio/util/contingency_table.rb +70 -43
- data/lib/bio/util/restriction_enzyme.rb +228 -0
- data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
- data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
- data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
- data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
- data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
- data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
- data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
- data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
- data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
- data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
- data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
- data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
- data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
- data/lib/bio/util/sirna.rb +4 -22
- data/sample/color_scheme_na.rb +4 -12
- data/sample/enzymes.rb +78 -0
- data/sample/goslim.rb +5 -13
- data/sample/psortplot_html.rb +4 -12
- data/test/data/blast/2.2.15.blastp.m7 +876 -0
- data/test/data/embl/AB090716.embl.rel89 +63 -0
- data/test/data/fasta/example1.txt +75 -0
- data/test/data/fasta/example2.txt +21 -0
- data/test/data/iprscan/merged.raw +32 -0
- data/test/data/iprscan/merged.txt +74 -0
- data/test/data/soft/GDS100_partial.soft +92 -0
- data/test/data/soft/GSE3457_family_partial.soft +874 -0
- data/test/functional/bio/io/test_ensembl.rb +103 -0
- data/test/functional/bio/io/test_soapwsdl.rb +5 -17
- data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
- data/test/unit/bio/appl/blast/test_report.rb +3 -16
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
- data/test/unit/bio/appl/genscan/test_report.rb +3 -16
- data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
- data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
- data/test/unit/bio/appl/mafft/test_report.rb +63 -0
- data/test/unit/bio/appl/sosui/test_report.rb +3 -16
- data/test/unit/bio/appl/targetp/test_report.rb +3 -16
- data/test/unit/bio/appl/test_blast.rb +3 -16
- data/test/unit/bio/appl/test_fasta.rb +4 -16
- data/test/unit/bio/appl/test_pts1.rb +140 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
- data/test/unit/bio/data/test_aa.rb +4 -17
- data/test/unit/bio/data/test_codontable.rb +3 -16
- data/test/unit/bio/data/test_na.rb +3 -3
- data/test/unit/bio/db/embl/test_common.rb +3 -16
- data/test/unit/bio/db/embl/test_embl.rb +3 -16
- data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
- data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
- data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
- data/test/unit/bio/db/kegg/test_genes.rb +3 -16
- data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
- data/test/unit/bio/db/test_aaindex.rb +2 -2
- data/test/unit/bio/db/test_fasta.rb +3 -16
- data/test/unit/bio/db/test_gff.rb +3 -16
- data/test/unit/bio/db/test_lasergene.rb +95 -0
- data/test/unit/bio/db/test_newick.rb +56 -0
- data/test/unit/bio/db/test_nexus.rb +360 -0
- data/test/unit/bio/db/test_prosite.rb +5 -18
- data/test/unit/bio/db/test_rebase.rb +11 -25
- data/test/unit/bio/db/test_soft.rb +138 -0
- data/test/unit/bio/io/test_ddbjxml.rb +5 -17
- data/test/unit/bio/io/test_ensembl.rb +109 -0
- data/test/unit/bio/io/test_fastacmd.rb +3 -16
- data/test/unit/bio/io/test_flatfile.rb +237 -0
- data/test/unit/bio/io/test_soapwsdl.rb +4 -17
- data/test/unit/bio/sequence/test_aa.rb +3 -3
- data/test/unit/bio/sequence/test_common.rb +3 -16
- data/test/unit/bio/sequence/test_compat.rb +3 -16
- data/test/unit/bio/sequence/test_na.rb +29 -3
- data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
- data/test/unit/bio/test_alignment.rb +16 -27
- data/test/unit/bio/test_command.rb +242 -25
- data/test/unit/bio/test_db.rb +3 -16
- data/test/unit/bio/test_feature.rb +4 -16
- data/test/unit/bio/test_location.rb +4 -16
- data/test/unit/bio/test_map.rb +230 -0
- data/test/unit/bio/test_pathway.rb +4 -16
- data/test/unit/bio/test_reference.rb +2 -2
- data/test/unit/bio/test_sequence.rb +7 -19
- data/test/unit/bio/test_shell.rb +3 -16
- data/test/unit/bio/test_tree.rb +593 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
- data/test/unit/bio/util/test_color_scheme.rb +6 -18
- data/test/unit/bio/util/test_contingency_table.rb +6 -18
- data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
- data/test/unit/bio/util/test_sirna.rb +3 -16
- metadata +228 -169
- data/doc/BioRuby.rd.ja +0 -225
- data/doc/Design.rd.ja +0 -341
- data/doc/TODO.rd.ja +0 -138
- data/lib/bio/appl/fasta/format6.rb +0 -37
- data/lib/bio/db/kegg/cell.rb +0 -88
- data/lib/bio/db/kegg/ko.rb +0 -178
- data/lib/bio/shell/rails/Rakefile +0 -10
- data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
- data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
- data/lib/bio/shell/rails/config/boot.rb +0 -19
- data/lib/bio/shell/rails/config/database.yml +0 -85
- data/lib/bio/shell/rails/config/environment.rb +0 -53
- data/lib/bio/shell/rails/config/environments/development.rb +0 -19
- data/lib/bio/shell/rails/config/environments/production.rb +0 -19
- data/lib/bio/shell/rails/config/environments/test.rb +0 -19
- data/lib/bio/shell/rails/config/routes.rb +0 -19
- data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
- data/lib/bio/shell/rails/public/404.html +0 -8
- data/lib/bio/shell/rails/public/500.html +0 -8
- data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
- data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
- data/lib/bio/shell/rails/public/dispatch.rb +0 -10
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +0 -277
- data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
- data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
- data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
- data/lib/bio/shell/rails/public/robots.txt +0 -1
- data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
- data/lib/bio/shell/rails/script/about +0 -3
- data/lib/bio/shell/rails/script/breakpointer +0 -3
- data/lib/bio/shell/rails/script/console +0 -3
- data/lib/bio/shell/rails/script/destroy +0 -3
- data/lib/bio/shell/rails/script/generate +0 -3
- data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
- data/lib/bio/shell/rails/script/performance/profiler +0 -3
- data/lib/bio/shell/rails/script/plugin +0 -3
- data/lib/bio/shell/rails/script/process/reaper +0 -3
- data/lib/bio/shell/rails/script/process/spawner +0 -3
- data/lib/bio/shell/rails/script/process/spinner +0 -3
- data/lib/bio/shell/rails/script/runner +0 -3
- data/lib/bio/shell/rails/script/server +0 -42
- data/lib/bio/shell/rails/test/test_helper.rb +0 -28
data/lib/bio/db/nexus.rb
ADDED
|
@@ -0,0 +1,1854 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/db/nexus.rb - Nexus Standard phylogenetic tree parser / formatter
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2006 Christian M Zmasek <cmzmasek@yahoo.com>
|
|
5
|
+
#
|
|
6
|
+
# License:: The Ruby License
|
|
7
|
+
#
|
|
8
|
+
# $Id: nexus.rb,v 1.3 2007/04/05 23:35:40 trevor Exp $
|
|
9
|
+
#
|
|
10
|
+
# == Description
|
|
11
|
+
#
|
|
12
|
+
# This file contains classes that implement a parser for NEXUS formatted
|
|
13
|
+
# data as well as objects to store, access, and write the parsed data.
|
|
14
|
+
#
|
|
15
|
+
# The following five blocks:
|
|
16
|
+
# taxa, characters, distances, trees, data
|
|
17
|
+
# are recognizable and parsable.
|
|
18
|
+
#
|
|
19
|
+
# The parser can deal with (nested) comments (indicated by square brackets),
|
|
20
|
+
# unless the comments are inside a command or data item (e.g.
|
|
21
|
+
# "Dim[comment]ensions" or inside a matrix).
|
|
22
|
+
#
|
|
23
|
+
# Single or double quoted TaxLabels are processed as follows (by way
|
|
24
|
+
# of example): "mus musculus" -> mus_musculus
|
|
25
|
+
#
|
|
26
|
+
#
|
|
27
|
+
# == USAGE
|
|
28
|
+
#
|
|
29
|
+
# require 'bio/db/nexus'
|
|
30
|
+
#
|
|
31
|
+
# # Create a new parser:
|
|
32
|
+
# nexus = Bio::Nexus.new( nexus_data_as_string )
|
|
33
|
+
#
|
|
34
|
+
# # Get first taxa block:
|
|
35
|
+
# taxa_block = nexus.get_taxa_blocks[ 0 ]
|
|
36
|
+
# # Get number of taxa:
|
|
37
|
+
# number_of_taxa = taxa_block.get_number_of_taxa.to_i
|
|
38
|
+
# # Get name of first taxon:
|
|
39
|
+
# first_taxon = taxa_block.get_taxa[ 0 ]
|
|
40
|
+
#
|
|
41
|
+
# # Get first data block:
|
|
42
|
+
# data_block = nexus.get_data_blocks[ 0 ]
|
|
43
|
+
# # Get first characters name:
|
|
44
|
+
# seq_name = data_block.get_row_name( 0 )
|
|
45
|
+
# # Get first characters row named "taxon_2" as Bio::Sequence sequence:
|
|
46
|
+
# seq_tax_2 = data_block.get_sequences_by_name( "taxon_2" )[ 0 ]
|
|
47
|
+
# # Get third characters row as Bio::Sequence sequence:
|
|
48
|
+
# seq_2 = data_block.get_sequence( 2 )
|
|
49
|
+
# # Get first characters row named "taxon_3" as String:
|
|
50
|
+
# string_tax_3 = data_block.get_characters_strings_by_name( "taxon_3" )
|
|
51
|
+
# # Get name of first taxon:
|
|
52
|
+
# taxon_0 = data_block.get_taxa[ 0 ]
|
|
53
|
+
# # Get characters matrix as Bio::Nexus::NexusMatrix (names are in column 0)
|
|
54
|
+
# characters_matrix = data_block.get_matrix
|
|
55
|
+
#
|
|
56
|
+
# # Get first characters block (same methods as Nexus::DataBlock except
|
|
57
|
+
# # it lacks get_taxa method):
|
|
58
|
+
# characters_block = nexus.get_characters_blocks[ 0 ]
|
|
59
|
+
#
|
|
60
|
+
# # Get trees block(s):
|
|
61
|
+
# trees_block = nexus.get_trees_blocks[ 0 ]
|
|
62
|
+
# # Get first tree named "best" as String:
|
|
63
|
+
# string_fish = trees_block.get_tree_strings_by_name( "best" )[ 0 ]
|
|
64
|
+
# # Get first tree named "best" as Bio::Db::Newick object:
|
|
65
|
+
# tree_fish = trees_block.get_trees_by_name( "best" )[ 0 ]
|
|
66
|
+
# # Get first tree as Bio::Db::Newick object:
|
|
67
|
+
# tree_first = trees_block.get_tree( 0 )
|
|
68
|
+
#
|
|
69
|
+
# # Get distances block(s):
|
|
70
|
+
# distances_blocks = nexus.get_distances_blocks
|
|
71
|
+
# # Get matrix as Bio::Nexus::NexusMatrix object:
|
|
72
|
+
# matrix = distances_blocks[ 0 ].get_matrix
|
|
73
|
+
# # Get value (column 0 are names):
|
|
74
|
+
# val = matrix.get_value( 1, 5 )
|
|
75
|
+
#
|
|
76
|
+
# # Get blocks for which no class exists (private blocks):
|
|
77
|
+
# private_blocks = nexus.get_blocks_by_name( "my_block" )
|
|
78
|
+
# # Get first block names "my_block":
|
|
79
|
+
# my_block_0 = private_blocks[ 0 ]
|
|
80
|
+
# # Get first token in first block names "my_block":
|
|
81
|
+
# first_token = my_block_0.get_tokens[ 0 ]
|
|
82
|
+
#
|
|
83
|
+
#
|
|
84
|
+
# == References
|
|
85
|
+
#
|
|
86
|
+
# * Maddison DR, Swofford DL, Maddison WP (1997). NEXUS: an extensible file
|
|
87
|
+
# format for systematic information.
|
|
88
|
+
# Syst Biol. 1997 46(4):590-621.
|
|
89
|
+
#
|
|
90
|
+
|
|
91
|
+
require 'bio/sequence'
|
|
92
|
+
require 'bio/tree'
|
|
93
|
+
require 'bio/db/newick'
|
|
94
|
+
|
|
95
|
+
module Bio
|
|
96
|
+
|
|
97
|
+
# == DESCRIPTION
|
|
98
|
+
# Bio::Nexus is a parser for nexus formatted data.
|
|
99
|
+
# It contains classes and constants enabling the representation and
|
|
100
|
+
# processing of nexus data.
|
|
101
|
+
#
|
|
102
|
+
# == USAGE
|
|
103
|
+
#
|
|
104
|
+
# # Parsing a nexus formatted string str:
|
|
105
|
+
# nexus = Bio::Nexus.new( nexus_str )
|
|
106
|
+
#
|
|
107
|
+
# # Obtaining of the nexus blocks as array of GenericBlock or
|
|
108
|
+
# # any of its subclasses (such as DistancesBlock):
|
|
109
|
+
# blocks = nexus.get_blocks
|
|
110
|
+
#
|
|
111
|
+
# # Getting a block by name:
|
|
112
|
+
# my_blocks = nexus.get_blocks_by_name( "my_block" )
|
|
113
|
+
#
|
|
114
|
+
# # Getting distance blocks:
|
|
115
|
+
# distances_blocks = nexus.get_distances_blocks
|
|
116
|
+
#
|
|
117
|
+
# # Getting trees blocks:
|
|
118
|
+
# trees_blocks = nexus.get_trees_blocks
|
|
119
|
+
#
|
|
120
|
+
# # Getting data blocks:
|
|
121
|
+
# data_blocks = nexus.get_data_blocks
|
|
122
|
+
#
|
|
123
|
+
# # Getting characters blocks:
|
|
124
|
+
# character_blocks = nexus.get_characters_blocks
|
|
125
|
+
#
|
|
126
|
+
# # Getting taxa blocks:
|
|
127
|
+
# taxa_blocks = nexus.get_taxa_blocks
|
|
128
|
+
#
|
|
129
|
+
class Nexus
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
END_OF_LINE = "\n"
|
|
133
|
+
INDENTENTION = " "
|
|
134
|
+
DOUBLE_QUOTE = "\""
|
|
135
|
+
SINGLE_QUOTE = "'"
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
BEGIN_NEXUS = "#NEXUS"
|
|
139
|
+
DELIMITER = ";"
|
|
140
|
+
BEGIN_BLOCK = "Begin"
|
|
141
|
+
END_BLOCK = "End" + DELIMITER
|
|
142
|
+
BEGIN_COMMENT = "["
|
|
143
|
+
END_COMMENT = "]"
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
TAXA = "Taxa"
|
|
147
|
+
CHARACTERS = "Characters"
|
|
148
|
+
DATA = "Data"
|
|
149
|
+
DISTANCES = "Distances"
|
|
150
|
+
TREES = "Trees"
|
|
151
|
+
TAXA_BLOCK = TAXA + DELIMITER
|
|
152
|
+
CHARACTERS_BLOCK = CHARACTERS + DELIMITER
|
|
153
|
+
DATA_BLOCK = DATA + DELIMITER
|
|
154
|
+
DISTANCES_BLOCK = DISTANCES + DELIMITER
|
|
155
|
+
TREES_BLOCK = TREES + DELIMITER
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
DIMENSIONS = "Dimensions"
|
|
159
|
+
FORMAT = "Format"
|
|
160
|
+
NTAX = "NTax"
|
|
161
|
+
NCHAR = "NChar"
|
|
162
|
+
DATATYPE = "DataType"
|
|
163
|
+
TAXLABELS = "TaxLabels"
|
|
164
|
+
MATRIX = "Matrix"
|
|
165
|
+
# End of constants.
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
# Nexus parse error class,
|
|
169
|
+
# indicates error during parsing of nexus formatted data.
|
|
170
|
+
class NexusParseError < RuntimeError; end
|
|
171
|
+
|
|
172
|
+
# Creates a new nexus parser for 'nexus_str'.
|
|
173
|
+
#
|
|
174
|
+
# ---
|
|
175
|
+
# *Arguments*:
|
|
176
|
+
# * (required) _nexus_str_: String - nexus formatted data
|
|
177
|
+
def initialize( nexus_str )
|
|
178
|
+
@blocks = Array.new
|
|
179
|
+
@current_cmd = nil
|
|
180
|
+
@current_subcmd = nil
|
|
181
|
+
@current_block_name = nil
|
|
182
|
+
@current_block = nil
|
|
183
|
+
parse( nexus_str )
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
# Returns an Array of all blocks found in the String 'nexus_str'
|
|
188
|
+
# set via Bio::Nexus.new( nexus_str ).
|
|
189
|
+
#
|
|
190
|
+
# ---
|
|
191
|
+
# *Returns*:: Array of GenericBlocks or any of its subclasses
|
|
192
|
+
def get_blocks
|
|
193
|
+
@blocks
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# A convenience methods which returns an array of
|
|
197
|
+
# all nexus blocks for which the name equals 'name' found
|
|
198
|
+
# in the String 'nexus_str' set via Bio::Nexus.new( nexus_str ).
|
|
199
|
+
#
|
|
200
|
+
# ---
|
|
201
|
+
# *Arguments*:
|
|
202
|
+
# * (required) _name_: String
|
|
203
|
+
# *Returns*:: Array of GenericBlocks or any of its subclasses
|
|
204
|
+
def get_blocks_by_name( name )
|
|
205
|
+
found_blocks = Array.new
|
|
206
|
+
@blocks.each do | block |
|
|
207
|
+
if ( name == block.get_name )
|
|
208
|
+
found_blocks.push( block )
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
found_blocks
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
# A convenience methods which returns an array of
|
|
215
|
+
# all data blocks.
|
|
216
|
+
#
|
|
217
|
+
# ---
|
|
218
|
+
# *Returns*:: Array of DataBlocks
|
|
219
|
+
def get_data_blocks
|
|
220
|
+
get_blocks_by_name( DATA_BLOCK.chomp( ";").downcase )
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# A convenience methods which returns an array of
|
|
224
|
+
# all characters blocks.
|
|
225
|
+
#
|
|
226
|
+
# ---
|
|
227
|
+
# *Returns*:: Array of CharactersBlocks
|
|
228
|
+
def get_characters_blocks
|
|
229
|
+
get_blocks_by_name( CHARACTERS_BLOCK.chomp( ";").downcase )
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# A convenience methods which returns an array of
|
|
233
|
+
# all trees blocks.
|
|
234
|
+
#
|
|
235
|
+
# ---
|
|
236
|
+
# *Returns*:: Array of TreesBlocks
|
|
237
|
+
def get_trees_blocks
|
|
238
|
+
get_blocks_by_name( TREES_BLOCK.chomp( ";").downcase )
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
# A convenience methods which returns an array of
|
|
242
|
+
# all distances blocks.
|
|
243
|
+
#
|
|
244
|
+
# ---
|
|
245
|
+
# *Returns*:: Array of DistancesBlock
|
|
246
|
+
def get_distances_blocks
|
|
247
|
+
get_blocks_by_name( DISTANCES_BLOCK.chomp( ";").downcase )
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# A convenience methods which returns an array of
|
|
251
|
+
# all taxa blocks.
|
|
252
|
+
#
|
|
253
|
+
# ---
|
|
254
|
+
# *Returns*:: Array of TaxaBlocks
|
|
255
|
+
def get_taxa_blocks
|
|
256
|
+
get_blocks_by_name( TAXA_BLOCK.chomp( ";").downcase )
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# Returns a String listing how many of each blocks it parsed.
|
|
260
|
+
#
|
|
261
|
+
# ---
|
|
262
|
+
# *Returns*:: String
|
|
263
|
+
def to_s
|
|
264
|
+
str = String.new
|
|
265
|
+
if get_blocks.length < 1
|
|
266
|
+
str << "empty"
|
|
267
|
+
else
|
|
268
|
+
str << "number of blocks: " << get_blocks.length.to_s
|
|
269
|
+
if get_characters_blocks.length > 0
|
|
270
|
+
str << " [characters blocks: " << get_characters_blocks.length.to_s << "] "
|
|
271
|
+
end
|
|
272
|
+
if get_data_blocks.length > 0
|
|
273
|
+
str << " [data blocks: " << get_data_blocks.length.to_s << "] "
|
|
274
|
+
end
|
|
275
|
+
if get_distances_blocks.length > 0
|
|
276
|
+
str << " [distances blocks: " << get_distances_blocks.length.to_s << "] "
|
|
277
|
+
end
|
|
278
|
+
if get_taxa_blocks.length > 0
|
|
279
|
+
str << " [taxa blocks: " << get_taxa_blocks.length.to_s << "] "
|
|
280
|
+
end
|
|
281
|
+
if get_trees_blocks.length > 0
|
|
282
|
+
str << " [trees blocks: " << get_trees_blocks.length.to_s << "] "
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
str
|
|
286
|
+
end
|
|
287
|
+
alias to_str to_s
|
|
288
|
+
|
|
289
|
+
private
|
|
290
|
+
|
|
291
|
+
# The master method for parsing.
|
|
292
|
+
# Stores the resulting block in array @blocks.
|
|
293
|
+
#
|
|
294
|
+
# ---
|
|
295
|
+
# *Arguments*:
|
|
296
|
+
# * (required) _str_: String - the String to be parsed
|
|
297
|
+
def parse( str )
|
|
298
|
+
str = str.chop if str[-1..-1] == ';'
|
|
299
|
+
ary = str.split(/[\s+=]/)
|
|
300
|
+
ary.collect! { |x| x.strip!; x.empty? ? nil : x }
|
|
301
|
+
ary.compact!
|
|
302
|
+
in_comment = false
|
|
303
|
+
comment_level = 0
|
|
304
|
+
|
|
305
|
+
# Main loop
|
|
306
|
+
while token = ary.shift
|
|
307
|
+
# Quotes:
|
|
308
|
+
if ( token.index( SINGLE_QUOTE ) == 0 ||
|
|
309
|
+
token.index( DOUBLE_QUOTE ) == 0 )
|
|
310
|
+
token << "_" << ary.shift
|
|
311
|
+
token = token.chop if token[-1..-1] == ';'
|
|
312
|
+
token = token.slice( 1, token.length - 2 )
|
|
313
|
+
end
|
|
314
|
+
# Comments:
|
|
315
|
+
open = token.count( BEGIN_COMMENT )
|
|
316
|
+
close = token.count( END_COMMENT )
|
|
317
|
+
comment = comment_level > 0
|
|
318
|
+
comment_level = comment_level + open - close
|
|
319
|
+
if ( open > 0 && open == close )
|
|
320
|
+
next
|
|
321
|
+
elsif comment_level > 0 || comment
|
|
322
|
+
next
|
|
323
|
+
elsif equal?( token, END_BLOCK )
|
|
324
|
+
end_block()
|
|
325
|
+
elsif equal?( token, BEGIN_BLOCK )
|
|
326
|
+
begin_block()
|
|
327
|
+
@current_block_name = token = ary.shift
|
|
328
|
+
@current_block_name.downcase!
|
|
329
|
+
@current_block = create_block()
|
|
330
|
+
@blocks.push( @current_block )
|
|
331
|
+
elsif ( @current_block_name != nil )
|
|
332
|
+
process_token( token.chomp( DELIMITER ), ary )
|
|
333
|
+
end
|
|
334
|
+
end # main loop
|
|
335
|
+
@blocks.compact!
|
|
336
|
+
end # parse
|
|
337
|
+
|
|
338
|
+
# Operations required when beginnig of block encountered.
|
|
339
|
+
#
|
|
340
|
+
# ---
|
|
341
|
+
def begin_block()
|
|
342
|
+
if @current_block_name != nil
|
|
343
|
+
raise NexusParseError, "Cannot have nested nexus blocks (\"end;\" might be missing)"
|
|
344
|
+
end
|
|
345
|
+
reset_command_state()
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
# Operations required when ending of block encountered.
|
|
349
|
+
#
|
|
350
|
+
# ---
|
|
351
|
+
def end_block()
|
|
352
|
+
if @current_block_name == nil
|
|
353
|
+
raise NexusParseError, "Cannot have two or more \"end;\" tokens in sequence"
|
|
354
|
+
end
|
|
355
|
+
@current_block_name = nil
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
# This calls various process_token_for_<name>_block methods
|
|
359
|
+
# depeding on state of @current_block_name.
|
|
360
|
+
#
|
|
361
|
+
# ---
|
|
362
|
+
# *Arguments*:
|
|
363
|
+
# * (required) _token_: String
|
|
364
|
+
# * (required) _ary_: Array
|
|
365
|
+
def process_token( token, ary )
|
|
366
|
+
case @current_block_name
|
|
367
|
+
when TAXA_BLOCK.downcase
|
|
368
|
+
process_token_for_taxa_block( token )
|
|
369
|
+
when CHARACTERS_BLOCK.downcase
|
|
370
|
+
process_token_for_character_block( token, ary )
|
|
371
|
+
when DATA_BLOCK.downcase
|
|
372
|
+
process_token_for_data_block( token, ary )
|
|
373
|
+
when DISTANCES_BLOCK.downcase
|
|
374
|
+
process_token_for_distances_block( token, ary )
|
|
375
|
+
when TREES_BLOCK.downcase
|
|
376
|
+
process_token_for_trees_block( token, ary )
|
|
377
|
+
else
|
|
378
|
+
process_token_for_generic_block( token )
|
|
379
|
+
end
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
# Resets @current_cmd and @current_subcmd to nil.
|
|
383
|
+
#
|
|
384
|
+
# ---
|
|
385
|
+
def reset_command_state()
|
|
386
|
+
@current_cmd = nil
|
|
387
|
+
@current_subcmd = nil
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
# Creates GenericBlock (or any of its subclasses) the type of
|
|
391
|
+
# which is determined by the state of @current_block_name.
|
|
392
|
+
#
|
|
393
|
+
# ---
|
|
394
|
+
# *Returns*:: GenericBlock (or any of its subclasses) object
|
|
395
|
+
def create_block()
|
|
396
|
+
case @current_block_name
|
|
397
|
+
when TAXA_BLOCK.downcase
|
|
398
|
+
return Bio::Nexus::TaxaBlock.new( @current_block_name )
|
|
399
|
+
when CHARACTERS_BLOCK.downcase
|
|
400
|
+
return Bio::Nexus::CharactersBlock.new( @current_block_name )
|
|
401
|
+
when DATA_BLOCK.downcase
|
|
402
|
+
return Bio::Nexus::DataBlock.new( @current_block_name )
|
|
403
|
+
when DISTANCES_BLOCK.downcase
|
|
404
|
+
return Bio::Nexus::DistancesBlock.new( @current_block_name )
|
|
405
|
+
when TREES_BLOCK.downcase
|
|
406
|
+
return Bio::Nexus::TreesBlock.new( @current_block_name )
|
|
407
|
+
else
|
|
408
|
+
return Bio::Nexus::GenericBlock.new( @current_block_name )
|
|
409
|
+
end
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
# This processes the tokens (between Begin Taxa; and End;) for a taxa block
|
|
413
|
+
# Example of a currently parseable taxa block:
|
|
414
|
+
# Begin Taxa;
|
|
415
|
+
# Dimensions NTax=4;
|
|
416
|
+
# TaxLabels fish [comment] 'african frog' "rat snake" 'red mouse';
|
|
417
|
+
# End;
|
|
418
|
+
#
|
|
419
|
+
# ---
|
|
420
|
+
# *Arguments*:
|
|
421
|
+
# * (required) _token_: String
|
|
422
|
+
def process_token_for_taxa_block( token )
|
|
423
|
+
if ( equal?( token, DIMENSIONS ) )
|
|
424
|
+
@current_cmd = DIMENSIONS
|
|
425
|
+
@current_subcmd = nil
|
|
426
|
+
elsif ( equal?( token, TAXLABELS ) )
|
|
427
|
+
@current_cmd = TAXLABELS
|
|
428
|
+
@current_subcmd = nil
|
|
429
|
+
elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) )
|
|
430
|
+
@current_subcmd = NTAX
|
|
431
|
+
elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) )
|
|
432
|
+
@current_block.set_number_of_taxa( token )
|
|
433
|
+
elsif ( cmds_equal_to?( TAXLABELS, nil ) )
|
|
434
|
+
@current_block.add_taxon( token )
|
|
435
|
+
end
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
# This processes the tokens (between Begin Taxa; and End;) for a character
|
|
439
|
+
# block
|
|
440
|
+
# Example of a currently parseable character block:
|
|
441
|
+
# Begin Characters;
|
|
442
|
+
# Dimensions NChar=20
|
|
443
|
+
# NTax=4;
|
|
444
|
+
# Format DataType=DNA
|
|
445
|
+
# Missing=x
|
|
446
|
+
# Gap=- MatchChar=.;
|
|
447
|
+
# Matrix
|
|
448
|
+
# fish ACATA GAGGG TACCT CTAAG
|
|
449
|
+
# frog ACTTA GAGGC TACCT CTAGC
|
|
450
|
+
# snake ACTCA CTGGG TACCT TTGCG
|
|
451
|
+
# mouse ACTCA GACGG TACCT TTGCG;
|
|
452
|
+
# End;
|
|
453
|
+
#
|
|
454
|
+
# ---
|
|
455
|
+
# *Arguments*:
|
|
456
|
+
# * (required) _token_: String
|
|
457
|
+
# * (required) _ary_: Array
|
|
458
|
+
def process_token_for_character_block( token, ary )
|
|
459
|
+
if ( equal?( token, DIMENSIONS ) )
|
|
460
|
+
@current_cmd = DIMENSIONS
|
|
461
|
+
@current_subcmd = nil
|
|
462
|
+
elsif ( equal?( token, FORMAT ) )
|
|
463
|
+
@current_cmd = FORMAT
|
|
464
|
+
@current_subcmd = nil
|
|
465
|
+
elsif ( equal?( token, MATRIX ) )
|
|
466
|
+
@current_cmd = MATRIX
|
|
467
|
+
@current_subcmd = nil
|
|
468
|
+
elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) )
|
|
469
|
+
@current_subcmd = NTAX
|
|
470
|
+
elsif ( @current_cmd == DIMENSIONS && equal?( token, NCHAR ) )
|
|
471
|
+
@current_subcmd = NCHAR
|
|
472
|
+
elsif ( @current_cmd == FORMAT && equal?( token, DATATYPE ) )
|
|
473
|
+
@current_subcmd = DATATYPE
|
|
474
|
+
elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MISSING ) )
|
|
475
|
+
@current_subcmd = CharactersBlock::MISSING
|
|
476
|
+
elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::GAP ) )
|
|
477
|
+
@current_subcmd = CharactersBlock::GAP
|
|
478
|
+
elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MATCHCHAR ) )
|
|
479
|
+
@current_subcmd = CharactersBlock::MATCHCHAR
|
|
480
|
+
elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) )
|
|
481
|
+
@current_block.set_number_of_taxa( token )
|
|
482
|
+
elsif ( cmds_equal_to?( DIMENSIONS, NCHAR ) )
|
|
483
|
+
@current_block.set_number_of_characters( token )
|
|
484
|
+
elsif ( cmds_equal_to?( FORMAT, DATATYPE ) )
|
|
485
|
+
@current_block.set_datatype( token )
|
|
486
|
+
elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MISSING ) )
|
|
487
|
+
@current_block.set_missing( token )
|
|
488
|
+
elsif ( cmds_equal_to?( FORMAT, CharactersBlock::GAP ) )
|
|
489
|
+
@current_block.set_gap_character( token )
|
|
490
|
+
elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MATCHCHAR ) )
|
|
491
|
+
@current_block.set_match_character( token )
|
|
492
|
+
elsif ( cmds_equal_to?( MATRIX, nil ) )
|
|
493
|
+
@current_block.set_matrix( make_matrix( token, ary,
|
|
494
|
+
@current_block.get_number_of_characters, true ) )
|
|
495
|
+
end
|
|
496
|
+
end
|
|
497
|
+
|
|
498
|
+
# This processes the tokens (between Begin Trees; and End;) for a trees block
|
|
499
|
+
# Example of a currently parseable taxa block:
|
|
500
|
+
# Begin Trees;
|
|
501
|
+
# Tree best=(fish,(frog,(snake, mouse)));
|
|
502
|
+
# Tree other=(snake,(frog,( fish, mouse)));
|
|
503
|
+
# End;
|
|
504
|
+
#
|
|
505
|
+
# ---
|
|
506
|
+
# *Arguments*:
|
|
507
|
+
# * (required) _token_: String
|
|
508
|
+
# * (required) _ary_: Array
|
|
509
|
+
def process_token_for_trees_block( token, ary )
|
|
510
|
+
if ( equal?( token, TreesBlock::TREE ) )
|
|
511
|
+
@current_cmd = TreesBlock::TREE
|
|
512
|
+
@current_subcmd = nil
|
|
513
|
+
elsif ( cmds_equal_to?( TreesBlock::TREE, nil ) )
|
|
514
|
+
@current_block.add_tree_name( token )
|
|
515
|
+
tree_string = ary.shift
|
|
516
|
+
while ( tree_string.index( ";" ) == nil )
|
|
517
|
+
tree_string << ary.shift
|
|
518
|
+
end
|
|
519
|
+
@current_block.add_tree( tree_string )
|
|
520
|
+
@current_cmd = nil
|
|
521
|
+
end
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
# This processes the tokens (between Begin Taxa; and End;) for a character
|
|
525
|
+
# block.
|
|
526
|
+
# Example of a currently parseable character block:
|
|
527
|
+
# Begin Distances;
|
|
528
|
+
# Dimensions nchar=20 ntax=5;
|
|
529
|
+
# Format Triangle=Upper;
|
|
530
|
+
# Matrix
|
|
531
|
+
# taxon_1 0.0 1.0 2.0 4.0 7.0
|
|
532
|
+
# taxon_2 1.0 0.0 3.0 5.0 8.0
|
|
533
|
+
# taxon_3 3.0 4.0 0.0 6.0 9.0
|
|
534
|
+
# taxon_4 7.0 3.0 1.0 0.0 9.5
|
|
535
|
+
# taxon_5 1.2 1.3 1.4 1.5 0.0;
|
|
536
|
+
# End;
|
|
537
|
+
#
|
|
538
|
+
# ---
|
|
539
|
+
# *Arguments*:
|
|
540
|
+
# * (required) _token_: String
|
|
541
|
+
# * (required) _ary_: Array
|
|
542
|
+
def process_token_for_distances_block( token, ary )
|
|
543
|
+
if ( equal?( token, DIMENSIONS ) )
|
|
544
|
+
@current_cmd = DIMENSIONS
|
|
545
|
+
@current_subcmd = nil
|
|
546
|
+
elsif ( equal?( token, FORMAT ) )
|
|
547
|
+
@current_cmd = FORMAT
|
|
548
|
+
@current_subcmd = nil
|
|
549
|
+
elsif ( equal?( token, MATRIX ) )
|
|
550
|
+
@current_cmd = MATRIX
|
|
551
|
+
@current_subcmd = nil
|
|
552
|
+
elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) )
|
|
553
|
+
@current_subcmd = NTAX
|
|
554
|
+
elsif ( @current_cmd == DIMENSIONS && equal?( token, NCHAR ) )
|
|
555
|
+
@current_subcmd = NCHAR
|
|
556
|
+
elsif ( @current_cmd == FORMAT && equal?( token, DATATYPE ) )
|
|
557
|
+
@current_subcmd = DATATYPE
|
|
558
|
+
elsif ( @current_cmd == FORMAT && equal?( token, DistancesBlock::TRIANGLE ) )
|
|
559
|
+
@current_subcmd = DistancesBlock::TRIANGLE
|
|
560
|
+
elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) )
|
|
561
|
+
@current_block.set_number_of_taxa( token )
|
|
562
|
+
elsif ( cmds_equal_to?( DIMENSIONS, NCHAR ) )
|
|
563
|
+
@current_block.set_number_of_characters( token )
|
|
564
|
+
elsif ( cmds_equal_to?( FORMAT, DistancesBlock::TRIANGLE ) )
|
|
565
|
+
@current_block.set_triangle( token )
|
|
566
|
+
elsif ( cmds_equal_to?( MATRIX, nil ) )
|
|
567
|
+
@current_block.set_matrix( make_matrix( token, ary,
|
|
568
|
+
@current_block.get_number_of_taxa, false ) )
|
|
569
|
+
end
|
|
570
|
+
end
|
|
571
|
+
|
|
572
|
+
# This processes the tokens (between Begin Taxa; and End;) for a data
|
|
573
|
+
# block.
|
|
574
|
+
# Example of a currently parseable data block:
|
|
575
|
+
# Begin Data;
|
|
576
|
+
# Dimensions ntax=5 nchar=14;
|
|
577
|
+
# Format Datatype=RNA gap=# MISSING=x MatchChar=^;
|
|
578
|
+
# TaxLabels ciona cow [comment] ape 'purple urchin' "green lizard";
|
|
579
|
+
# Matrix
|
|
580
|
+
# taxon_1 A- CCGTCGA-GTTA
|
|
581
|
+
# taxon_2 T- CCG-CGA-GATA
|
|
582
|
+
# taxon_3 A- C-GTCGA-GATA
|
|
583
|
+
# taxon_4 A- CCTCGA--GTTA
|
|
584
|
+
# taxon_5 T- CGGTCGT-CTTA;
|
|
585
|
+
# End;
|
|
586
|
+
#
|
|
587
|
+
# ---
|
|
588
|
+
# *Arguments*:
|
|
589
|
+
# * (required) _token_: String
|
|
590
|
+
# * (required) _ary_: Array
|
|
591
|
+
def process_token_for_data_block( token, ary )
|
|
592
|
+
if ( equal?( token, DIMENSIONS ) )
|
|
593
|
+
@current_cmd = DIMENSIONS
|
|
594
|
+
@current_subcmd = nil
|
|
595
|
+
elsif ( equal?( token, FORMAT ) )
|
|
596
|
+
@current_cmd = FORMAT
|
|
597
|
+
@current_subcmd = nil
|
|
598
|
+
elsif ( equal?( token, TAXLABELS ) )
|
|
599
|
+
@current_cmd = TAXLABELS
|
|
600
|
+
@current_subcmd = nil
|
|
601
|
+
elsif ( equal?( token, MATRIX ) )
|
|
602
|
+
@current_cmd = MATRIX
|
|
603
|
+
@current_subcmd = nil
|
|
604
|
+
elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) )
|
|
605
|
+
@current_subcmd = NTAX
|
|
606
|
+
elsif ( @current_cmd == DIMENSIONS && equal?( token, NCHAR ) )
|
|
607
|
+
@current_subcmd = NCHAR
|
|
608
|
+
elsif ( @current_cmd == FORMAT && equal?( token, DATATYPE ) )
|
|
609
|
+
@current_subcmd = DATATYPE
|
|
610
|
+
elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MISSING ) )
|
|
611
|
+
@current_subcmd = CharactersBlock::MISSING
|
|
612
|
+
elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::GAP ) )
|
|
613
|
+
@current_subcmd = CharactersBlock::GAP
|
|
614
|
+
elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MATCHCHAR ) )
|
|
615
|
+
@current_subcmd = CharactersBlock::MATCHCHAR
|
|
616
|
+
elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) )
|
|
617
|
+
@current_block.set_number_of_taxa( token )
|
|
618
|
+
elsif ( cmds_equal_to?( DIMENSIONS, NCHAR ) )
|
|
619
|
+
@current_block.set_number_of_characters( token )
|
|
620
|
+
elsif ( cmds_equal_to?( FORMAT, DATATYPE ) )
|
|
621
|
+
@current_block.set_datatype( token )
|
|
622
|
+
elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MISSING ) )
|
|
623
|
+
@current_block.set_missing( token )
|
|
624
|
+
elsif ( cmds_equal_to?( FORMAT, CharactersBlock::GAP ) )
|
|
625
|
+
@current_block.set_gap_character( token )
|
|
626
|
+
elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MATCHCHAR ) )
|
|
627
|
+
@current_block.set_match_character( token )
|
|
628
|
+
elsif ( cmds_equal_to?( TAXLABELS, nil ) )
|
|
629
|
+
@current_block.add_taxon( token )
|
|
630
|
+
elsif ( cmds_equal_to?( MATRIX, nil ) )
|
|
631
|
+
@current_block.set_matrix( make_matrix( token, ary,
|
|
632
|
+
@current_block.get_number_of_characters, true ) )
|
|
633
|
+
end
|
|
634
|
+
end
|
|
635
|
+
|
|
636
|
+
# Makes a NexusMatrix out of token from token Array ary
|
|
637
|
+
# Used by process_token_for_X_block methods which contain
|
|
638
|
+
# data in a matrix form. Column 0 contains names.
|
|
639
|
+
# This will shift tokens from ary.
|
|
640
|
+
# ---
|
|
641
|
+
# *Arguments*:
|
|
642
|
+
# * (required) _token_: String
|
|
643
|
+
# * (required) _ary_: Array
|
|
644
|
+
# * (required) _size_: Integer
|
|
645
|
+
# * (optional) _scan_token_: true or false
|
|
646
|
+
# *Returns*:: NexusMatrix
|
|
647
|
+
def make_matrix( token, ary, size, scan_token = false )
|
|
648
|
+
matrix = NexusMatrix.new
|
|
649
|
+
col = -1
|
|
650
|
+
row = 0
|
|
651
|
+
done = false
|
|
652
|
+
while ( !done )
|
|
653
|
+
if ( col == -1 )
|
|
654
|
+
# name
|
|
655
|
+
col = 0
|
|
656
|
+
matrix.set_value( row, col, token ) # name is in col 0
|
|
657
|
+
else
|
|
658
|
+
# values
|
|
659
|
+
col = add_token_to_matrix( token, scan_token, matrix, row, col )
|
|
660
|
+
if ( col == size.to_i )
|
|
661
|
+
col = -1
|
|
662
|
+
row += 1
|
|
663
|
+
end
|
|
664
|
+
end
|
|
665
|
+
token = ary.shift
|
|
666
|
+
if ( token.index( DELIMITER ) != nil )
|
|
667
|
+
col = add_token_to_matrix( token.chomp( ";" ), scan_token, matrix, row, col )
|
|
668
|
+
done = true
|
|
669
|
+
end
|
|
670
|
+
end # while
|
|
671
|
+
matrix
|
|
672
|
+
end
|
|
673
|
+
|
|
674
|
+
# Helper method for make_matrix.
|
|
675
|
+
#
|
|
676
|
+
# ---
|
|
677
|
+
# *Arguments*:
|
|
678
|
+
# * (required) _token_: String
|
|
679
|
+
# * (required) _scan_token_: true or false - add whole token
|
|
680
|
+
# or
|
|
681
|
+
# scan into chars
|
|
682
|
+
# * (required) _matrix_: NexusMatrix - the matrix to which to add token
|
|
683
|
+
# * (required) _row_: Integer - the row for matrix
|
|
684
|
+
# * (required) _col_: Integer - the starting row
|
|
685
|
+
# *Returns*:: Integer - ending row
|
|
686
|
+
def add_token_to_matrix( token, scan_token, matrix, row, col )
|
|
687
|
+
if ( scan_token )
|
|
688
|
+
token.scan(/./) { |w|
|
|
689
|
+
col += 1
|
|
690
|
+
matrix.set_value( row, col, w )
|
|
691
|
+
}
|
|
692
|
+
else
|
|
693
|
+
col += 1
|
|
694
|
+
matrix.set_value( row, col, token )
|
|
695
|
+
end
|
|
696
|
+
col
|
|
697
|
+
end
|
|
698
|
+
|
|
699
|
+
# This processes the tokens (between Begin Taxa; and End;) for a block
|
|
700
|
+
# for which a specific parser is not available.
|
|
701
|
+
# Example of a currently parseable generic block:
|
|
702
|
+
# Begin Taxa;
|
|
703
|
+
# token1 token2 token3 ...
|
|
704
|
+
# End;
|
|
705
|
+
#
|
|
706
|
+
# ---
|
|
707
|
+
# *Arguments*:
|
|
708
|
+
# * (required) _token_: String
|
|
709
|
+
def process_token_for_generic_block( token )
|
|
710
|
+
@current_block.add_token( token )
|
|
711
|
+
end
|
|
712
|
+
|
|
713
|
+
# Returns true if Strings str1 and str2 are
|
|
714
|
+
# equal - ignoring case.
|
|
715
|
+
#
|
|
716
|
+
# ---
|
|
717
|
+
# *Arguments*:
|
|
718
|
+
# * (required) _str1_: String
|
|
719
|
+
# * (required) _str2_: String
|
|
720
|
+
# *Returns*:: true or false
|
|
721
|
+
def equal?( str1, str2 )
|
|
722
|
+
if ( str1 == nil || str2 == nil )
|
|
723
|
+
return false
|
|
724
|
+
else
|
|
725
|
+
return ( str1.downcase == str2.downcase )
|
|
726
|
+
end
|
|
727
|
+
end
|
|
728
|
+
|
|
729
|
+
# Returns true if @current_cmd == command
|
|
730
|
+
# and @current_subcmd == subcommand, false otherwise
|
|
731
|
+
# ---
|
|
732
|
+
# *Arguments*:
|
|
733
|
+
# * (required) _command_: String
|
|
734
|
+
# * (required) _subcommand_: String
|
|
735
|
+
# *Returns*:: true or false
|
|
736
|
+
def cmds_equal_to?( command, subcommand )
|
|
737
|
+
return ( @current_cmd == command && @current_subcmd == subcommand )
|
|
738
|
+
end
|
|
739
|
+
|
|
740
|
+
# Classes to represent nexus data follow.
|
|
741
|
+
|
|
742
|
+
# == DESCRIPTION
|
|
743
|
+
# Bio::Nexus::GenericBlock represents a generic nexus block.
|
|
744
|
+
# It is mainly intended to be extended into more specific classes,
|
|
745
|
+
# although it is used for blocks not represented by more specific
|
|
746
|
+
# block classes.
|
|
747
|
+
# It has a name and a array for the tokenized content of a
|
|
748
|
+
# nexus block.
|
|
749
|
+
#
|
|
750
|
+
# == USAGE
|
|
751
|
+
#
|
|
752
|
+
# require 'bio/db/nexus'
|
|
753
|
+
#
|
|
754
|
+
# # Create a new parser:
|
|
755
|
+
# nexus = Bio::Nexus.new( nexus_data_as_string )
|
|
756
|
+
#
|
|
757
|
+
# # Get blocks for which no class exists (private blocks)
|
|
758
|
+
# as Nexus::GenericBlock:
|
|
759
|
+
# private_blocks = nexus.get_blocks_by_name( "my_block" )
|
|
760
|
+
# # Get first block names "my_block":
|
|
761
|
+
# my_block_0 = private_blocks[ 0 ]
|
|
762
|
+
# # Get first token in first block names "my_block":
|
|
763
|
+
# first_token = my_block_0.get_tokens[ 0 ]
|
|
764
|
+
# # Get name of block (would return "my_block" in this case):
|
|
765
|
+
# name = my_block_0.get_name
|
|
766
|
+
# # Return data of block as nexus formatted String:
|
|
767
|
+
# name = my_block_0.to_nexus
|
|
768
|
+
#
|
|
769
|
+
class GenericBlock
|
|
770
|
+
|
|
771
|
+
# Creates a new GenericBlock object named 'name'.
|
|
772
|
+
# ---
|
|
773
|
+
# *Arguments*:
|
|
774
|
+
# * (required) _name_: String
|
|
775
|
+
def initialize( name )
|
|
776
|
+
@name = name.chomp(";")
|
|
777
|
+
@tokens = Array.new
|
|
778
|
+
end
|
|
779
|
+
|
|
780
|
+
# Gets the name of this block.
|
|
781
|
+
#
|
|
782
|
+
# ---
|
|
783
|
+
# *Returns*:: String
|
|
784
|
+
def get_name
|
|
785
|
+
@name
|
|
786
|
+
end
|
|
787
|
+
|
|
788
|
+
# Returns contents as Array of Strings.
|
|
789
|
+
#
|
|
790
|
+
# ---
|
|
791
|
+
# *Returns*:: Array
|
|
792
|
+
def get_tokens
|
|
793
|
+
@tokens
|
|
794
|
+
end
|
|
795
|
+
|
|
796
|
+
# Same as to_nexus.
|
|
797
|
+
#
|
|
798
|
+
# ---
|
|
799
|
+
# *Returns*:: String
|
|
800
|
+
def to_s
|
|
801
|
+
to_nexus
|
|
802
|
+
end
|
|
803
|
+
alias to_str to_s
|
|
804
|
+
|
|
805
|
+
# Should return a String describing this block as nexus formatted data.
|
|
806
|
+
# ---
|
|
807
|
+
# *Returns*:: String
|
|
808
|
+
def to_nexus
|
|
809
|
+
str = "generic block \"" + get_name + "\" [do not know how to write in nexus format]"
|
|
810
|
+
end
|
|
811
|
+
|
|
812
|
+
# Adds a token to this.
|
|
813
|
+
#
|
|
814
|
+
# ---
|
|
815
|
+
# *Arguments*:
|
|
816
|
+
# * (required) _token_: String
|
|
817
|
+
def add_token( token )
|
|
818
|
+
@tokens.push( token )
|
|
819
|
+
end
|
|
820
|
+
|
|
821
|
+
end # class GenericBlock
|
|
822
|
+
|
|
823
|
+
|
|
824
|
+
# == DESCRIPTION
|
|
825
|
+
# Bio::Nexus::TaxaBlock represents a taxa nexus block.
|
|
826
|
+
#
|
|
827
|
+
# = Example of Taxa block:
|
|
828
|
+
# Begin Taxa;
|
|
829
|
+
# Dimensions NTax=4;
|
|
830
|
+
# TaxLabels fish [comment] 'african frog' "rat snake" 'red mouse';
|
|
831
|
+
# End;
|
|
832
|
+
#
|
|
833
|
+
# == USAGE
|
|
834
|
+
#
|
|
835
|
+
# require 'bio/db/nexus'
|
|
836
|
+
#
|
|
837
|
+
# # Create a new parser:
|
|
838
|
+
# nexus = Bio::Nexus.new( nexus_data_as_string )
|
|
839
|
+
#
|
|
840
|
+
# # Get first taxa block:
|
|
841
|
+
# taxa_block = nexus.get_taxa_blocks[ 0 ]
|
|
842
|
+
# # Get number of taxa:
|
|
843
|
+
# number_of_taxa = taxa_block.get_number_of_taxa.to_i
|
|
844
|
+
# # Get name of first taxon:
|
|
845
|
+
# first_taxon = taxa_block.get_taxa[ 0 ]
|
|
846
|
+
#
|
|
847
|
+
class TaxaBlock < GenericBlock
|
|
848
|
+
|
|
849
|
+
# Creates a new TaxaBlock object named 'name'.
|
|
850
|
+
# ---
|
|
851
|
+
# *Arguments*:
|
|
852
|
+
# * (required) _name_: String
|
|
853
|
+
def initialize( name )
|
|
854
|
+
super( name )
|
|
855
|
+
@number_of_taxa = 0
|
|
856
|
+
@taxa = Array.new
|
|
857
|
+
end
|
|
858
|
+
|
|
859
|
+
# Returns a String describing this block as nexus formatted data.
|
|
860
|
+
# ---
|
|
861
|
+
# *Returns*:: String
|
|
862
|
+
def to_nexus
|
|
863
|
+
line_1 = String.new
|
|
864
|
+
line_1 << DIMENSIONS
|
|
865
|
+
if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) )
|
|
866
|
+
line_1 << " " << NTAX << "=" << get_number_of_taxa
|
|
867
|
+
end
|
|
868
|
+
line_1 << DELIMITER
|
|
869
|
+
line_2 = String.new
|
|
870
|
+
line_2 << TAXLABELS << " " << Nexus::Util::array_to_string( get_taxa ) << DELIMITER
|
|
871
|
+
Nexus::Util::to_nexus_helper( TAXA_BLOCK, [ line_1, line_2 ] )
|
|
872
|
+
end
|
|
873
|
+
|
|
874
|
+
# Gets the "number of taxa" property.
|
|
875
|
+
#
|
|
876
|
+
# ---
|
|
877
|
+
# *Returns*:: Integer
|
|
878
|
+
def get_number_of_taxa
|
|
879
|
+
@number_of_taxa
|
|
880
|
+
end
|
|
881
|
+
|
|
882
|
+
# Gets the taxa of this block.
|
|
883
|
+
#
|
|
884
|
+
# ---
|
|
885
|
+
# *Returns*:: Array
|
|
886
|
+
def get_taxa
|
|
887
|
+
@taxa
|
|
888
|
+
end
|
|
889
|
+
|
|
890
|
+
# Sets the "number of taxa" property.
|
|
891
|
+
#
|
|
892
|
+
# ---
|
|
893
|
+
# *Arguments*:
|
|
894
|
+
# * (required) _number_of_taxa_: Integer
|
|
895
|
+
def set_number_of_taxa( number_of_taxa )
|
|
896
|
+
@number_of_taxa = number_of_taxa
|
|
897
|
+
end
|
|
898
|
+
|
|
899
|
+
# Adds a taxon name to this block.
|
|
900
|
+
#
|
|
901
|
+
# ---
|
|
902
|
+
# *Arguments*:
|
|
903
|
+
# * (required) _taxon_: String
|
|
904
|
+
def add_taxon( taxon )
|
|
905
|
+
@taxa.push( taxon )
|
|
906
|
+
end
|
|
907
|
+
|
|
908
|
+
end # class TaxaBlock
|
|
909
|
+
|
|
910
|
+
|
|
911
|
+
# == DESCRIPTION
|
|
912
|
+
# Bio::Nexus::CharactersBlock represents a characters nexus block.
|
|
913
|
+
#
|
|
914
|
+
# = Example of Characters block:
|
|
915
|
+
# Begin Characters;
|
|
916
|
+
# Dimensions NChar=20
|
|
917
|
+
# NTax=4;
|
|
918
|
+
# Format DataType=DNA
|
|
919
|
+
# Missing=x
|
|
920
|
+
# Gap=- MatchChar=.;
|
|
921
|
+
# Matrix
|
|
922
|
+
# fish ACATA GAGGG TACCT CTAAG
|
|
923
|
+
# frog ACTTA GAGGC TACCT CTAGC
|
|
924
|
+
# snake ACTCA CTGGG TACCT TTGCG
|
|
925
|
+
# mouse ACTCA GACGG TACCT TTGCG;
|
|
926
|
+
# End;
|
|
927
|
+
#
|
|
928
|
+
#
|
|
929
|
+
# == USAGE
|
|
930
|
+
#
|
|
931
|
+
# require 'bio/db/nexus'
|
|
932
|
+
#
|
|
933
|
+
# # Create a new parser:
|
|
934
|
+
# nexus = Bio::Nexus.new( nexus_data_as_string )
|
|
935
|
+
#
|
|
936
|
+
#
|
|
937
|
+
# # Get first characters block (same methods as Nexus::DataBlock except
|
|
938
|
+
# # it lacks get_taxa method):
|
|
939
|
+
# characters_block = nexus.get_characters_blocks[ 0 ]
|
|
940
|
+
#
|
|
941
|
+
class CharactersBlock < GenericBlock
|
|
942
|
+
|
|
943
|
+
MISSING = "Missing"
|
|
944
|
+
GAP = "Gap"
|
|
945
|
+
MATCHCHAR = "MatchChar"
|
|
946
|
+
|
|
947
|
+
# Creates a new CharactersBlock object named 'name'.
|
|
948
|
+
# ---
|
|
949
|
+
# *Arguments*:
|
|
950
|
+
# * (required) _name_: String
|
|
951
|
+
def initialize( name )
|
|
952
|
+
super( name )
|
|
953
|
+
@number_of_taxa = 0
|
|
954
|
+
@number_of_characters = 0
|
|
955
|
+
@data_type = String.new
|
|
956
|
+
@gap_character = String.new
|
|
957
|
+
@missing = String.new
|
|
958
|
+
@match_character = String.new
|
|
959
|
+
@matrix = NexusMatrix.new
|
|
960
|
+
end
|
|
961
|
+
|
|
962
|
+
# Returns a String describing this block as nexus formatted data.
|
|
963
|
+
#
|
|
964
|
+
# ---
|
|
965
|
+
# *Returns*:: String
|
|
966
|
+
def to_nexus
|
|
967
|
+
line_1 = String.new
|
|
968
|
+
line_1 << DIMENSIONS
|
|
969
|
+
if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) )
|
|
970
|
+
line_1 << " " << NTAX << "=" << get_number_of_taxa
|
|
971
|
+
end
|
|
972
|
+
if ( Nexus::Util::larger_than_zero( get_number_of_characters ) )
|
|
973
|
+
line_1 << " " << NCHAR << "=" << get_number_of_characters
|
|
974
|
+
end
|
|
975
|
+
line_1 << DELIMITER
|
|
976
|
+
|
|
977
|
+
line_2 = String.new
|
|
978
|
+
line_2 << FORMAT
|
|
979
|
+
if ( Nexus::Util::longer_than_zero( get_datatype ) )
|
|
980
|
+
line_2 << " " << DATATYPE << "=" << get_datatype
|
|
981
|
+
end
|
|
982
|
+
if ( Nexus::Util::longer_than_zero( get_missing ) )
|
|
983
|
+
line_2 << " " << MISSING << "=" << get_missing
|
|
984
|
+
end
|
|
985
|
+
if ( Nexus::Util::longer_than_zero( get_gap_character ) )
|
|
986
|
+
line_2 << " " << GAP << "=" << get_gap_character
|
|
987
|
+
end
|
|
988
|
+
if ( Nexus::Util::longer_than_zero( get_match_character ) )
|
|
989
|
+
line_2 << " " << MATCHCHAR << "=" << get_match_character
|
|
990
|
+
end
|
|
991
|
+
line_2 << DELIMITER
|
|
992
|
+
|
|
993
|
+
line_3 = String.new
|
|
994
|
+
line_3 << MATRIX
|
|
995
|
+
Nexus::Util::to_nexus_helper( CHARACTERS_BLOCK, [ line_1, line_2, line_3 ] +
|
|
996
|
+
get_matrix.to_nexus_row_array )
|
|
997
|
+
end
|
|
998
|
+
|
|
999
|
+
# Gets the "number of taxa" property.
|
|
1000
|
+
#
|
|
1001
|
+
# ---
|
|
1002
|
+
# *Returns*:: Integer
|
|
1003
|
+
def get_number_of_taxa
|
|
1004
|
+
@number_of_taxa
|
|
1005
|
+
end
|
|
1006
|
+
|
|
1007
|
+
# Gets the "number of characters" property.
|
|
1008
|
+
#
|
|
1009
|
+
# ---
|
|
1010
|
+
# *Returns*:: Integer
|
|
1011
|
+
def get_number_of_characters
|
|
1012
|
+
@number_of_characters
|
|
1013
|
+
end
|
|
1014
|
+
|
|
1015
|
+
# Gets the "datatype" property.
|
|
1016
|
+
# ---
|
|
1017
|
+
# *Returns*:: String
|
|
1018
|
+
def get_datatype
|
|
1019
|
+
@data_type
|
|
1020
|
+
end
|
|
1021
|
+
|
|
1022
|
+
# Gets the "gap character" property.
|
|
1023
|
+
# ---
|
|
1024
|
+
# *Returns*:: String
|
|
1025
|
+
def get_gap_character
|
|
1026
|
+
@gap_character
|
|
1027
|
+
end
|
|
1028
|
+
|
|
1029
|
+
# Gets the "missing" property.
|
|
1030
|
+
# ---
|
|
1031
|
+
# *Returns*:: String
|
|
1032
|
+
def get_missing
|
|
1033
|
+
@missing
|
|
1034
|
+
end
|
|
1035
|
+
|
|
1036
|
+
# Gets the "match character" property.
|
|
1037
|
+
# ---
|
|
1038
|
+
# *Returns*:: String
|
|
1039
|
+
def get_match_character
|
|
1040
|
+
@match_character
|
|
1041
|
+
end
|
|
1042
|
+
|
|
1043
|
+
# Gets the matrix.
|
|
1044
|
+
# ---
|
|
1045
|
+
# *Returns*:: Bio::Nexus::NexusMatrix
|
|
1046
|
+
def get_matrix
|
|
1047
|
+
@matrix
|
|
1048
|
+
end
|
|
1049
|
+
|
|
1050
|
+
# Returns character data as Bio::Sequence object Array
|
|
1051
|
+
# for matrix rows named 'name'.
|
|
1052
|
+
# ---
|
|
1053
|
+
# *Arguments*:
|
|
1054
|
+
# * (required) _name_: String
|
|
1055
|
+
# *Returns*:: Bio::Sequence
|
|
1056
|
+
def get_sequences_by_name( name )
|
|
1057
|
+
seq_strs = get_characters_strings_by_name( name )
|
|
1058
|
+
seqs = Array.new
|
|
1059
|
+
seq_strs.each do | seq_str |
|
|
1060
|
+
seqs.push( create_sequence( seq_str, name ) )
|
|
1061
|
+
end
|
|
1062
|
+
seqs
|
|
1063
|
+
end
|
|
1064
|
+
|
|
1065
|
+
# Returns the characters in the matrix at row 'row' as
|
|
1066
|
+
# Bio::Sequence object. Column 0 of the matrix is set as
|
|
1067
|
+
# the definition of the Bio::Sequence object.
|
|
1068
|
+
# ---
|
|
1069
|
+
# *Arguments*:
|
|
1070
|
+
# * (required) _row_: Integer
|
|
1071
|
+
# *Returns*:: Bio::Sequence
|
|
1072
|
+
def get_sequence( row )
|
|
1073
|
+
create_sequence( get_characters_string( row ), get_row_name( row ) )
|
|
1074
|
+
end
|
|
1075
|
+
|
|
1076
|
+
# Returns the String in the matrix at row 'row' and column 0,
|
|
1077
|
+
# which usually is interpreted as a sequence name (if the matrix
|
|
1078
|
+
# contains molecular sequence characters).
|
|
1079
|
+
#
|
|
1080
|
+
# ---
|
|
1081
|
+
# *Arguments*:
|
|
1082
|
+
# * (required) _row_: Integer
|
|
1083
|
+
# *Returns*:: String
|
|
1084
|
+
def get_row_name( row )
|
|
1085
|
+
get_matrix.get_name( row )
|
|
1086
|
+
end
|
|
1087
|
+
|
|
1088
|
+
# Returns character data as String Array
|
|
1089
|
+
# for matrix rows named 'name'.
|
|
1090
|
+
#
|
|
1091
|
+
# ---
|
|
1092
|
+
# *Arguments*:
|
|
1093
|
+
# * (required) _name_: String
|
|
1094
|
+
# *Returns*:: Array of Strings
|
|
1095
|
+
def get_characters_strings_by_name( name )
|
|
1096
|
+
get_matrix.get_row_strings_by_name( name, "" )
|
|
1097
|
+
end
|
|
1098
|
+
|
|
1099
|
+
# Returns character data as String
|
|
1100
|
+
# for matrix row 'row'.
|
|
1101
|
+
#
|
|
1102
|
+
# ---
|
|
1103
|
+
# *Arguments*:
|
|
1104
|
+
# * (required) _row_: Integer
|
|
1105
|
+
# *Returns*:: String
|
|
1106
|
+
def get_characters_string( row )
|
|
1107
|
+
get_matrix.get_row_string( row, "" )
|
|
1108
|
+
end
|
|
1109
|
+
|
|
1110
|
+
# Sets the "number of taxa" property.
|
|
1111
|
+
# ---
|
|
1112
|
+
# *Arguments*:
|
|
1113
|
+
# * (required) _number_of_taxa_: Integer
|
|
1114
|
+
def set_number_of_taxa( number_of_taxa )
|
|
1115
|
+
@number_of_taxa = number_of_taxa
|
|
1116
|
+
end
|
|
1117
|
+
|
|
1118
|
+
# Sets the "number of characters" property.
|
|
1119
|
+
# ---
|
|
1120
|
+
# *Arguments*:
|
|
1121
|
+
# * (required) _number_of_characters_: Integer
|
|
1122
|
+
def set_number_of_characters( number_of_characters )
|
|
1123
|
+
@number_of_characters = number_of_characters
|
|
1124
|
+
end
|
|
1125
|
+
|
|
1126
|
+
# Sets the "data type" property.
|
|
1127
|
+
# ---
|
|
1128
|
+
# *Arguments*:
|
|
1129
|
+
# * (required) _data_type_: String
|
|
1130
|
+
def set_datatype( data_type )
|
|
1131
|
+
@data_type = data_type
|
|
1132
|
+
end
|
|
1133
|
+
|
|
1134
|
+
# Sets the "gap character" property.
|
|
1135
|
+
# ---
|
|
1136
|
+
# *Arguments*:
|
|
1137
|
+
# * (required) _gap_character_: String
|
|
1138
|
+
def set_gap_character( gap_character )
|
|
1139
|
+
@gap_character = gap_character
|
|
1140
|
+
end
|
|
1141
|
+
|
|
1142
|
+
# Sets the "missing" property.
|
|
1143
|
+
# ---
|
|
1144
|
+
# *Arguments*:
|
|
1145
|
+
# * (required) _missing_: String
|
|
1146
|
+
def set_missing( missing )
|
|
1147
|
+
@missing = missing
|
|
1148
|
+
end
|
|
1149
|
+
|
|
1150
|
+
# Sets the "match character" property.
|
|
1151
|
+
# ---
|
|
1152
|
+
# *Arguments*:
|
|
1153
|
+
# * (required) _match_character_: String
|
|
1154
|
+
def set_match_character( match_character )
|
|
1155
|
+
@match_character = match_character
|
|
1156
|
+
end
|
|
1157
|
+
|
|
1158
|
+
# Sets the matrix.
|
|
1159
|
+
# ---
|
|
1160
|
+
# *Arguments*:
|
|
1161
|
+
# * (required) _matrix_: Bio::Nexus::NexusMatrix
|
|
1162
|
+
def set_matrix( matrix )
|
|
1163
|
+
@matrix = matrix
|
|
1164
|
+
end
|
|
1165
|
+
|
|
1166
|
+
private
|
|
1167
|
+
|
|
1168
|
+
# Creates a Bio::Sequence object with sequence 'seq_str'
|
|
1169
|
+
# and definition 'definition'.
|
|
1170
|
+
# ---
|
|
1171
|
+
# *Arguments*:
|
|
1172
|
+
# * (required) _seq_str_: String
|
|
1173
|
+
# * (optional) _defintion_: String
|
|
1174
|
+
# *Returns*:: Bio::Sequence
|
|
1175
|
+
def create_sequence( seq_str, definition = "" )
|
|
1176
|
+
seq = Bio::Sequence.auto( seq_str )
|
|
1177
|
+
seq.definition = definition
|
|
1178
|
+
seq
|
|
1179
|
+
end
|
|
1180
|
+
|
|
1181
|
+
end # class CharactersBlock
|
|
1182
|
+
|
|
1183
|
+
|
|
1184
|
+
# == DESCRIPTION
|
|
1185
|
+
# Bio::Nexus::DataBlock represents a data nexus block.
|
|
1186
|
+
# A data block is a Bio::Nexus::CharactersBlock with the added
|
|
1187
|
+
# capability to store taxa names.
|
|
1188
|
+
#
|
|
1189
|
+
# = Example of Data block:
|
|
1190
|
+
# Begin Data;
|
|
1191
|
+
# Dimensions ntax=5 nchar=14;
|
|
1192
|
+
# Format Datatype=RNA gap=# MISSING=x MatchChar=^;
|
|
1193
|
+
# TaxLabels ciona cow [comment] ape 'purple urchin' "green lizard";
|
|
1194
|
+
# Matrix
|
|
1195
|
+
# taxon_1 A- CCGTCGA-GTTA
|
|
1196
|
+
# taxon_2 T- CCG-CGA-GATA
|
|
1197
|
+
# taxon_3 A- C-GTCGA-GATA
|
|
1198
|
+
# taxon_4 A- CCTCGA--GTTA
|
|
1199
|
+
# taxon_5 T- CGGTCGT-CTTA;
|
|
1200
|
+
# End;
|
|
1201
|
+
#
|
|
1202
|
+
#
|
|
1203
|
+
# == USAGE
|
|
1204
|
+
#
|
|
1205
|
+
# require 'bio/db/nexus'
|
|
1206
|
+
#
|
|
1207
|
+
# # Create a new parser:
|
|
1208
|
+
# nexus = Bio::Nexus.new( nexus_data_as_string )
|
|
1209
|
+
#
|
|
1210
|
+
#
|
|
1211
|
+
# # Get first data block:
|
|
1212
|
+
# data_block = nexus.get_data_blocks[ 0 ]
|
|
1213
|
+
# # Get first characters name:
|
|
1214
|
+
# seq_name = data_block.get_row_name( 0 )
|
|
1215
|
+
# # Get first characters row named "taxon_2" as Bio::Sequence sequence:
|
|
1216
|
+
# seq_tax_2 = data_block.get_sequences_by_name( "taxon_2" )[ 0 ]
|
|
1217
|
+
# # Get third characters row as Bio::Sequence sequence:
|
|
1218
|
+
# seq_2 = data_block.get_sequence( 2 )
|
|
1219
|
+
# # Get first characters row named "taxon_3" as String:
|
|
1220
|
+
# string_tax_3 = data_block.get_characters_strings_by_name( "taxon_3" )
|
|
1221
|
+
# # Get name of first taxon:
|
|
1222
|
+
# taxon_0 = data_block.get_taxa[ 0 ]
|
|
1223
|
+
# # Get characters matrix as Bio::Nexus::NexusMatrix (names are in column 0)
|
|
1224
|
+
# characters_matrix = data_block.get_matrix
|
|
1225
|
+
#
|
|
1226
|
+
class DataBlock < CharactersBlock
|
|
1227
|
+
|
|
1228
|
+
# Creates a new DataBlock object named 'name'.
|
|
1229
|
+
# ---
|
|
1230
|
+
# *Arguments*:
|
|
1231
|
+
# * (required) _name_: String
|
|
1232
|
+
def initialize( name )
|
|
1233
|
+
super( name )
|
|
1234
|
+
@taxa = Array.new
|
|
1235
|
+
end
|
|
1236
|
+
|
|
1237
|
+
# Returns a String describing this block as nexus formatted data.
|
|
1238
|
+
# ---
|
|
1239
|
+
# *Returns*:: String
|
|
1240
|
+
def to_nexus
|
|
1241
|
+
line_1 = String.new
|
|
1242
|
+
line_1 << DIMENSIONS
|
|
1243
|
+
if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) )
|
|
1244
|
+
line_1 << " " << NTAX << "=" << get_number_of_taxa
|
|
1245
|
+
end
|
|
1246
|
+
if ( Nexus::Util::larger_than_zero( get_number_of_characters ) )
|
|
1247
|
+
line_1 << " " << NCHAR << "=" << get_number_of_characters
|
|
1248
|
+
end
|
|
1249
|
+
line_1 << DELIMITER
|
|
1250
|
+
|
|
1251
|
+
line_2 = String.new
|
|
1252
|
+
line_2 << FORMAT
|
|
1253
|
+
if ( Nexus::Util::longer_than_zero( get_datatype ) )
|
|
1254
|
+
line_2 << " " << DATATYPE << "=" << get_datatype
|
|
1255
|
+
end
|
|
1256
|
+
if ( Nexus::Util::longer_than_zero( get_missing ) )
|
|
1257
|
+
line_2 << " " << MISSING << "=" << get_missing
|
|
1258
|
+
end
|
|
1259
|
+
if ( Nexus::Util::longer_than_zero( get_gap_character ) )
|
|
1260
|
+
line_2 << " " << GAP << "=" << get_gap_character
|
|
1261
|
+
end
|
|
1262
|
+
if ( Nexus::Util::longer_than_zero( get_match_character ) )
|
|
1263
|
+
line_2 << " " << MATCHCHAR << "=" << get_match_character
|
|
1264
|
+
end
|
|
1265
|
+
line_2 << DELIMITER
|
|
1266
|
+
|
|
1267
|
+
line_3 = String.new
|
|
1268
|
+
line_3 << TAXLABELS << " " << Nexus::Util::array_to_string( get_taxa )
|
|
1269
|
+
line_3 << DELIMITER
|
|
1270
|
+
|
|
1271
|
+
line_4 = String.new
|
|
1272
|
+
line_4 << MATRIX
|
|
1273
|
+
Nexus::Util::to_nexus_helper( DATA_BLOCK, [ line_1, line_2, line_3, line_4 ] +
|
|
1274
|
+
get_matrix.to_nexus_row_array )
|
|
1275
|
+
end
|
|
1276
|
+
|
|
1277
|
+
# Gets the taxa of this block.
|
|
1278
|
+
# ---
|
|
1279
|
+
# *Returns*:: Array
|
|
1280
|
+
def get_taxa
|
|
1281
|
+
@taxa
|
|
1282
|
+
end
|
|
1283
|
+
|
|
1284
|
+
# Adds a taxon name to this block.
|
|
1285
|
+
# ---
|
|
1286
|
+
# *Arguments*:
|
|
1287
|
+
# * (required) _taxon_: String
|
|
1288
|
+
def add_taxon( taxon )
|
|
1289
|
+
@taxa.push( taxon )
|
|
1290
|
+
end
|
|
1291
|
+
|
|
1292
|
+
end # class DataBlock
|
|
1293
|
+
|
|
1294
|
+
|
|
1295
|
+
# == DESCRIPTION
|
|
1296
|
+
# Bio::Nexus::DistancesBlock represents a distances nexus block.
|
|
1297
|
+
#
|
|
1298
|
+
# = Example of Distances block:
|
|
1299
|
+
# Begin Distances;
|
|
1300
|
+
# Dimensions nchar=20 ntax=5;
|
|
1301
|
+
# Format Triangle=Upper;
|
|
1302
|
+
# Matrix
|
|
1303
|
+
# taxon_1 0.0 1.0 2.0 4.0 7.0
|
|
1304
|
+
# taxon_2 1.0 0.0 3.0 5.0 8.0
|
|
1305
|
+
# taxon_3 3.0 4.0 0.0 6.0 9.0
|
|
1306
|
+
# taxon_4 7.0 3.0 1.0 0.0 9.5
|
|
1307
|
+
# taxon_5 1.2 1.3 1.4 1.5 0.0;
|
|
1308
|
+
# End;
|
|
1309
|
+
#
|
|
1310
|
+
#
|
|
1311
|
+
# == USAGE
|
|
1312
|
+
#
|
|
1313
|
+
# require 'bio/db/nexus'
|
|
1314
|
+
#
|
|
1315
|
+
# # Create a new parser:
|
|
1316
|
+
# nexus = Bio::Nexus.new( nexus_data_as_string )
|
|
1317
|
+
#
|
|
1318
|
+
# # Get distances block(s):
|
|
1319
|
+
# distances_blocks = nexus.get_distances_blocks
|
|
1320
|
+
# # Get matrix as Bio::Nexus::NexusMatrix object:
|
|
1321
|
+
# matrix = distances_blocks[ 0 ].get_matrix
|
|
1322
|
+
# # Get value (column 0 are names):
|
|
1323
|
+
# val = matrix.get_value( 1, 5 )
|
|
1324
|
+
#
|
|
1325
|
+
class DistancesBlock < GenericBlock
|
|
1326
|
+
TRIANGLE = "Triangle"
|
|
1327
|
+
|
|
1328
|
+
# Creates a new DistancesBlock object named 'name'.
|
|
1329
|
+
# ---
|
|
1330
|
+
# *Arguments*:
|
|
1331
|
+
# * (required) _name_: String
|
|
1332
|
+
def initialize( name )
|
|
1333
|
+
super( name )
|
|
1334
|
+
@number_of_taxa = 0
|
|
1335
|
+
@number_of_characters = 0
|
|
1336
|
+
@triangle = String.new
|
|
1337
|
+
@matrix = NexusMatrix.new
|
|
1338
|
+
end
|
|
1339
|
+
|
|
1340
|
+
# Returns a String describing this block as nexus formatted data.
|
|
1341
|
+
# ---
|
|
1342
|
+
# *Returns*:: String
|
|
1343
|
+
def to_nexus
|
|
1344
|
+
line_1 = String.new
|
|
1345
|
+
line_1 << DIMENSIONS
|
|
1346
|
+
if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) )
|
|
1347
|
+
line_1 << " " << NTAX << "=" << get_number_of_taxa
|
|
1348
|
+
end
|
|
1349
|
+
if ( Nexus::Util::larger_than_zero( get_number_of_characters ) )
|
|
1350
|
+
line_1 << " " << NCHAR << "=" << get_number_of_characters
|
|
1351
|
+
end
|
|
1352
|
+
line_1 << DELIMITER
|
|
1353
|
+
|
|
1354
|
+
line_2 = String.new
|
|
1355
|
+
line_2 << FORMAT
|
|
1356
|
+
if ( Nexus::Util::longer_than_zero( get_triangle ) )
|
|
1357
|
+
line_2 << " " << TRIANGLE << "=" << get_triangle
|
|
1358
|
+
end
|
|
1359
|
+
line_2 << DELIMITER
|
|
1360
|
+
|
|
1361
|
+
line_3 = String.new
|
|
1362
|
+
line_3 << MATRIX
|
|
1363
|
+
Nexus::Util::to_nexus_helper( DISTANCES_BLOCK, [ line_1, line_2, line_3 ] +
|
|
1364
|
+
get_matrix.to_nexus_row_array( " " ) )
|
|
1365
|
+
end
|
|
1366
|
+
|
|
1367
|
+
# Gets the "number of taxa" property.
|
|
1368
|
+
# ---
|
|
1369
|
+
# *Returns*:: Integer
|
|
1370
|
+
def get_number_of_taxa
|
|
1371
|
+
@number_of_taxa
|
|
1372
|
+
end
|
|
1373
|
+
|
|
1374
|
+
# Gets the "number of characters" property.
|
|
1375
|
+
# ---
|
|
1376
|
+
# *Returns*:: Integer
|
|
1377
|
+
def get_number_of_characters
|
|
1378
|
+
@number_of_characters
|
|
1379
|
+
end
|
|
1380
|
+
|
|
1381
|
+
# Gets the "triangle" property.
|
|
1382
|
+
# ---
|
|
1383
|
+
# *Returns*:: String
|
|
1384
|
+
def get_triangle
|
|
1385
|
+
@triangle
|
|
1386
|
+
end
|
|
1387
|
+
|
|
1388
|
+
# Gets the matrix.
|
|
1389
|
+
# ---
|
|
1390
|
+
# *Returns*:: Bio::Nexus::NexusMatrix
|
|
1391
|
+
def get_matrix
|
|
1392
|
+
@matrix
|
|
1393
|
+
end
|
|
1394
|
+
|
|
1395
|
+
# Sets the "number of taxa" property.
|
|
1396
|
+
# ---
|
|
1397
|
+
# *Arguments*:
|
|
1398
|
+
# * (required) _number_of_taxa_: Integer
|
|
1399
|
+
def set_number_of_taxa( number_of_taxa )
|
|
1400
|
+
@number_of_taxa = number_of_taxa
|
|
1401
|
+
end
|
|
1402
|
+
|
|
1403
|
+
# Sets the "number of characters" property.
|
|
1404
|
+
# ---
|
|
1405
|
+
# *Arguments*:
|
|
1406
|
+
# * (required) _number_of_characters_: Integer
|
|
1407
|
+
def set_number_of_characters( number_of_characters )
|
|
1408
|
+
@number_of_characters = number_of_characters
|
|
1409
|
+
end
|
|
1410
|
+
|
|
1411
|
+
# Sets the "triangle" property.
|
|
1412
|
+
# ---
|
|
1413
|
+
# *Arguments*:
|
|
1414
|
+
# * (required) _triangle_: String
|
|
1415
|
+
def set_triangle( triangle )
|
|
1416
|
+
@triangle = triangle
|
|
1417
|
+
end
|
|
1418
|
+
|
|
1419
|
+
# Sets the matrix.
|
|
1420
|
+
# ---
|
|
1421
|
+
# *Arguments*:
|
|
1422
|
+
# * (required) _matrix_: Bio::Nexus::NexusMatrix
|
|
1423
|
+
def set_matrix( matrix )
|
|
1424
|
+
@matrix = matrix
|
|
1425
|
+
end
|
|
1426
|
+
|
|
1427
|
+
end # class DistancesBlock
|
|
1428
|
+
|
|
1429
|
+
|
|
1430
|
+
# == DESCRIPTION
|
|
1431
|
+
# Bio::Nexus::TreesBlock represents a trees nexus block.
|
|
1432
|
+
#
|
|
1433
|
+
# = Example of Trees block:
|
|
1434
|
+
# Begin Trees;
|
|
1435
|
+
# Tree best=(fish,(frog,(snake, mouse)));
|
|
1436
|
+
# Tree other=(snake,(frog,( fish, mouse)));
|
|
1437
|
+
# End;
|
|
1438
|
+
#
|
|
1439
|
+
#
|
|
1440
|
+
# == USAGE
|
|
1441
|
+
#
|
|
1442
|
+
# require 'bio/db/nexus'
|
|
1443
|
+
#
|
|
1444
|
+
# # Create a new parser:
|
|
1445
|
+
# nexus = Bio::Nexus.new( nexus_data_as_string )
|
|
1446
|
+
#
|
|
1447
|
+
# Get trees block(s):
|
|
1448
|
+
# trees_block = nexus.get_trees_blocks[ 0 ]
|
|
1449
|
+
# # Get first tree named "best" as String:
|
|
1450
|
+
# string_fish = trees_block.get_tree_strings_by_name( "best" )[ 0 ]
|
|
1451
|
+
# # Get first tree named "best" as Bio::Db::Newick object:
|
|
1452
|
+
# tree_fish = trees_block.get_trees_by_name( "best" )[ 0 ]
|
|
1453
|
+
# # Get first tree as Bio::Db::Newick object:
|
|
1454
|
+
# tree_first = trees_block.get_tree( 0 )
|
|
1455
|
+
#
|
|
1456
|
+
class TreesBlock < GenericBlock
|
|
1457
|
+
TREE = "Tree"
|
|
1458
|
+
def initialize( name )
|
|
1459
|
+
super( name )
|
|
1460
|
+
@trees = Array.new
|
|
1461
|
+
@tree_names = Array.new
|
|
1462
|
+
end
|
|
1463
|
+
|
|
1464
|
+
# Returns a String describing this block as nexus formatted data.
|
|
1465
|
+
# ---
|
|
1466
|
+
# *Returns*:: String
|
|
1467
|
+
def to_nexus
|
|
1468
|
+
trees_ary = Array.new
|
|
1469
|
+
for i in 0 .. @trees.length - 1
|
|
1470
|
+
trees_ary.push( TREE + " " + @tree_names[ i ] + "=" + @trees[ i ] )
|
|
1471
|
+
end
|
|
1472
|
+
Nexus::Util::to_nexus_helper( TREES_BLOCK, trees_ary )
|
|
1473
|
+
end
|
|
1474
|
+
|
|
1475
|
+
# Returns an array of strings describing trees
|
|
1476
|
+
# ---
|
|
1477
|
+
# *Returns*:: Array
|
|
1478
|
+
def get_tree_strings
|
|
1479
|
+
@trees
|
|
1480
|
+
end
|
|
1481
|
+
|
|
1482
|
+
# Returns an array of tree names.
|
|
1483
|
+
# ---
|
|
1484
|
+
# *Returns*:: Array
|
|
1485
|
+
def get_tree_names
|
|
1486
|
+
@tree_names
|
|
1487
|
+
end
|
|
1488
|
+
|
|
1489
|
+
# Returns an array of strings describing trees
|
|
1490
|
+
# for which name matches the tree name.
|
|
1491
|
+
# ---
|
|
1492
|
+
# *Arguments*:
|
|
1493
|
+
# * (required) _name_: String
|
|
1494
|
+
# *Returns*:: Array
|
|
1495
|
+
def get_tree_strings_by_name( name )
|
|
1496
|
+
found_trees = Array.new
|
|
1497
|
+
i = 0
|
|
1498
|
+
@tree_names.each do | n |
|
|
1499
|
+
if ( n == name )
|
|
1500
|
+
found_trees.push( @trees[ i ] )
|
|
1501
|
+
end
|
|
1502
|
+
i += 1
|
|
1503
|
+
end
|
|
1504
|
+
found_trees
|
|
1505
|
+
end
|
|
1506
|
+
|
|
1507
|
+
# Returns tree i (same order as in nexus data) as
|
|
1508
|
+
# newick parsed tree object.
|
|
1509
|
+
# ---
|
|
1510
|
+
# *Arguments*:
|
|
1511
|
+
# * (required) _i_: Integer
|
|
1512
|
+
# *Returns*:: Bio::Newick
|
|
1513
|
+
def get_tree( i )
|
|
1514
|
+
newick = Bio::Newick.new( @trees[ i ] )
|
|
1515
|
+
tree = newick.tree
|
|
1516
|
+
tree
|
|
1517
|
+
end
|
|
1518
|
+
|
|
1519
|
+
# Returns an array of newick parsed tree objects
|
|
1520
|
+
# for which name matches the tree name.
|
|
1521
|
+
# ---
|
|
1522
|
+
# *Arguments*:
|
|
1523
|
+
# * (required) _name_: String
|
|
1524
|
+
# *Returns*:: Array of Bio::Newick
|
|
1525
|
+
def get_trees_by_name( name )
|
|
1526
|
+
found_trees = Array.new
|
|
1527
|
+
i = 0
|
|
1528
|
+
@tree_names.each do | n |
|
|
1529
|
+
if ( n == name )
|
|
1530
|
+
found_trees.push( get_tree( i ) )
|
|
1531
|
+
end
|
|
1532
|
+
i += 1
|
|
1533
|
+
end
|
|
1534
|
+
found_trees
|
|
1535
|
+
end
|
|
1536
|
+
|
|
1537
|
+
# Adds a tree name to this block.
|
|
1538
|
+
# ---
|
|
1539
|
+
# *Arguments*:
|
|
1540
|
+
# * (required) _tree_name_: String
|
|
1541
|
+
def add_tree_name( tree_name )
|
|
1542
|
+
@tree_names.push( tree_name )
|
|
1543
|
+
end
|
|
1544
|
+
|
|
1545
|
+
# Adds a tree to this block.
|
|
1546
|
+
# ---
|
|
1547
|
+
# *Arguments*:
|
|
1548
|
+
# * (required) _tree_as_string_: String
|
|
1549
|
+
def add_tree( tree_as_string )
|
|
1550
|
+
@trees.push( tree_as_string )
|
|
1551
|
+
end
|
|
1552
|
+
|
|
1553
|
+
end # class TreesBlock
|
|
1554
|
+
|
|
1555
|
+
|
|
1556
|
+
# == DESCRIPTION
|
|
1557
|
+
# Bio::Nexus::NexusMatrix represents a characters or distance matrix,
|
|
1558
|
+
# where the names are stored in column zero.
|
|
1559
|
+
#
|
|
1560
|
+
#
|
|
1561
|
+
# == USAGE
|
|
1562
|
+
#
|
|
1563
|
+
# require 'bio/db/nexus'
|
|
1564
|
+
#
|
|
1565
|
+
# # Create a new parser:
|
|
1566
|
+
# nexus = Bio::Nexus.new( nexus_data_as_string )
|
|
1567
|
+
# # Get distances block(s):
|
|
1568
|
+
# distances_block = nexus.get_distances_blocks[ 0 ]
|
|
1569
|
+
# # Get matrix as Bio::Nexus::NexusMatrix object:
|
|
1570
|
+
# matrix = distances_blocks.get_matrix
|
|
1571
|
+
# # Get value (column 0 are names):
|
|
1572
|
+
# val = matrix.get_value( 1, 5 )
|
|
1573
|
+
# # Return first row as String (all columns except column 0),
|
|
1574
|
+
# # values are separated by "_":
|
|
1575
|
+
# row_str_0 = matrix.get_row_string( 0, "_" )
|
|
1576
|
+
# # Return all rows named "ciona" as String (all columns except column 0),
|
|
1577
|
+
# # values are separated by "+":
|
|
1578
|
+
# ciona_rows = matrix.get_row_strings_by_name( "ciona", "+" )
|
|
1579
|
+
class NexusMatrix
|
|
1580
|
+
|
|
1581
|
+
# Nexus matrix error class.
|
|
1582
|
+
class NexusMatrixError < RuntimeError; end
|
|
1583
|
+
|
|
1584
|
+
# Creates new NexusMatrix.
|
|
1585
|
+
def initialize()
|
|
1586
|
+
@rows = Hash.new
|
|
1587
|
+
@max_row = -1
|
|
1588
|
+
@max_col = -1
|
|
1589
|
+
end
|
|
1590
|
+
|
|
1591
|
+
# Sets the value at row 'row' and column 'col' to 'value'.
|
|
1592
|
+
# ---
|
|
1593
|
+
# *Arguments*:
|
|
1594
|
+
# * (required) _row_: Integer
|
|
1595
|
+
# * (required) _col_: Integer
|
|
1596
|
+
# * (required) _value_: Object
|
|
1597
|
+
def set_value( row, col, value )
|
|
1598
|
+
if ( ( row < 0 ) || ( col < 0 ) )
|
|
1599
|
+
raise( NexusTableError, "attempt to use negative values for row or column" )
|
|
1600
|
+
end
|
|
1601
|
+
if ( row > get_max_row() )
|
|
1602
|
+
set_max_row( row )
|
|
1603
|
+
end
|
|
1604
|
+
if ( col > get_max_col() )
|
|
1605
|
+
set_max_col( col )
|
|
1606
|
+
end
|
|
1607
|
+
row_map = nil
|
|
1608
|
+
if ( @rows.has_key?( row ) )
|
|
1609
|
+
row_map = @rows[ row ]
|
|
1610
|
+
else
|
|
1611
|
+
row_map = Hash.new
|
|
1612
|
+
@rows[ row ] = row_map
|
|
1613
|
+
end
|
|
1614
|
+
row_map[ col ] = value
|
|
1615
|
+
end
|
|
1616
|
+
|
|
1617
|
+
# Returns the value at row 'row' and column 'col'.
|
|
1618
|
+
# ---
|
|
1619
|
+
# *Arguments*:
|
|
1620
|
+
# * (required) _row_: Integer
|
|
1621
|
+
# * (required) _col_: Integer
|
|
1622
|
+
# *Returns*:: Object
|
|
1623
|
+
def get_value( row, col )
|
|
1624
|
+
if ( ( row > get_max_row() ) || ( row < 0 ) )
|
|
1625
|
+
raise( NexusMatrixError, "value for row (" + row.to_s +
|
|
1626
|
+
") is out of range [max row: " + get_max_row().to_s + "]" )
|
|
1627
|
+
elsif ( ( col > get_max_col() ) || ( row < 0 ) )
|
|
1628
|
+
raise( NexusMatrixError, "value for column (" + col.to_s +
|
|
1629
|
+
") is out of range [max column: " + get_max_col().to_s + "]" )
|
|
1630
|
+
end
|
|
1631
|
+
r = @rows[ row ]
|
|
1632
|
+
if ( ( r == nil ) || ( r.length < 1 ) )
|
|
1633
|
+
return nil
|
|
1634
|
+
end
|
|
1635
|
+
r[ col ]
|
|
1636
|
+
end
|
|
1637
|
+
|
|
1638
|
+
# Returns the maximal columns number.
|
|
1639
|
+
# ---
|
|
1640
|
+
# *Returns*:: Integer
|
|
1641
|
+
def get_max_col
|
|
1642
|
+
return @max_col
|
|
1643
|
+
end
|
|
1644
|
+
|
|
1645
|
+
# Returns the maximal row number.
|
|
1646
|
+
# ---
|
|
1647
|
+
# *Returns*:: Integer
|
|
1648
|
+
def get_max_row
|
|
1649
|
+
return @max_row
|
|
1650
|
+
end
|
|
1651
|
+
|
|
1652
|
+
# Returns true of matrix is empty.
|
|
1653
|
+
#
|
|
1654
|
+
# ---
|
|
1655
|
+
# *Returns*:: true or false
|
|
1656
|
+
def is_empty?
|
|
1657
|
+
return get_max_col < 0 || get_max_row < 0
|
|
1658
|
+
end
|
|
1659
|
+
|
|
1660
|
+
# Convenience method which return the value of
|
|
1661
|
+
# column 0 and row 'row' which is usually the name.
|
|
1662
|
+
#
|
|
1663
|
+
# ---
|
|
1664
|
+
# *Arguments*:
|
|
1665
|
+
# * (required) _row_: Integer
|
|
1666
|
+
# *Returns*:: String
|
|
1667
|
+
def get_name( row )
|
|
1668
|
+
get_value( row, 0 ).to_s
|
|
1669
|
+
end
|
|
1670
|
+
|
|
1671
|
+
# Returns the values of columns 1 to maximal column length
|
|
1672
|
+
# in row 'row' concatenated as string. Individual values can be
|
|
1673
|
+
# separated by 'spacer'.
|
|
1674
|
+
#
|
|
1675
|
+
# ---
|
|
1676
|
+
# *Arguments*:
|
|
1677
|
+
# * (required) _row_: Integer
|
|
1678
|
+
# * (optional) _spacer_: String
|
|
1679
|
+
# *Returns*:: String
|
|
1680
|
+
def get_row_string( row, spacer = "" )
|
|
1681
|
+
row_str = String.new
|
|
1682
|
+
if is_empty?
|
|
1683
|
+
return row_str
|
|
1684
|
+
end
|
|
1685
|
+
for col in 1 .. get_max_col
|
|
1686
|
+
row_str << get_value( row, col ) << spacer
|
|
1687
|
+
end
|
|
1688
|
+
row_str
|
|
1689
|
+
end
|
|
1690
|
+
|
|
1691
|
+
# Returns all rows as Array of Strings separated by 'spacer'
|
|
1692
|
+
# for which column 0 is 'name'.
|
|
1693
|
+
# ---
|
|
1694
|
+
# *Arguments*:
|
|
1695
|
+
# * (required) _name_: String
|
|
1696
|
+
# * (optional) _spacer_: String
|
|
1697
|
+
# *Returns*:: Array
|
|
1698
|
+
def get_row_strings_by_name( name, spacer = "" )
|
|
1699
|
+
row_strs = Array.new
|
|
1700
|
+
if is_empty?
|
|
1701
|
+
return row_strs
|
|
1702
|
+
end
|
|
1703
|
+
for row in 0 .. get_max_row
|
|
1704
|
+
if ( get_value( row, 0 ) == name )
|
|
1705
|
+
row_strs.push( get_row_string( row, spacer ) )
|
|
1706
|
+
end
|
|
1707
|
+
end
|
|
1708
|
+
row_strs
|
|
1709
|
+
end
|
|
1710
|
+
|
|
1711
|
+
# Returns matrix as String, returns "empty" if empty.
|
|
1712
|
+
# ---
|
|
1713
|
+
# *Returns*:: String
|
|
1714
|
+
def to_s
|
|
1715
|
+
if is_empty?
|
|
1716
|
+
return "empty"
|
|
1717
|
+
end
|
|
1718
|
+
str = String.new
|
|
1719
|
+
row_array = to_nexus_row_array( spacer = " ", false )
|
|
1720
|
+
row_array.each do | row |
|
|
1721
|
+
str << row << END_OF_LINE
|
|
1722
|
+
end
|
|
1723
|
+
str
|
|
1724
|
+
end
|
|
1725
|
+
alias to_str to_s
|
|
1726
|
+
|
|
1727
|
+
# Helper method to produce nexus formatted data.
|
|
1728
|
+
# ---
|
|
1729
|
+
# *Arguments*:
|
|
1730
|
+
# * (optional) _spacer_: String
|
|
1731
|
+
# * (optional) _append_delimiter_: true or false
|
|
1732
|
+
# *Returns*:: Array
|
|
1733
|
+
def to_nexus_row_array( spacer = "", append_delimiter = true )
|
|
1734
|
+
ary = Array.new
|
|
1735
|
+
if is_empty?
|
|
1736
|
+
return ary
|
|
1737
|
+
end
|
|
1738
|
+
max_length = 10
|
|
1739
|
+
for row in 0 .. get_max_row
|
|
1740
|
+
l = get_value( row, 0 ).length
|
|
1741
|
+
if ( l > max_length )
|
|
1742
|
+
max_length = l
|
|
1743
|
+
end
|
|
1744
|
+
end
|
|
1745
|
+
for row in 0 .. get_max_row
|
|
1746
|
+
row_str = String.new
|
|
1747
|
+
ary.push( row_str )
|
|
1748
|
+
name = get_value( row, 0 )
|
|
1749
|
+
name = name.ljust( max_length + 1 )
|
|
1750
|
+
row_str << name << " " << get_row_string( row, spacer )
|
|
1751
|
+
if ( spacer != nil && spacer.length > 0 )
|
|
1752
|
+
row_str.chomp!( spacer )
|
|
1753
|
+
end
|
|
1754
|
+
if ( append_delimiter && row == get_max_row )
|
|
1755
|
+
row_str << DELIMITER
|
|
1756
|
+
end
|
|
1757
|
+
end
|
|
1758
|
+
ary
|
|
1759
|
+
end
|
|
1760
|
+
|
|
1761
|
+
|
|
1762
|
+
private
|
|
1763
|
+
|
|
1764
|
+
# Returns row data as Array.
|
|
1765
|
+
# ---
|
|
1766
|
+
# *Arguments*:
|
|
1767
|
+
# * (required) _row_: Integer
|
|
1768
|
+
# *Returns*:: Array
|
|
1769
|
+
def get_row( row )
|
|
1770
|
+
return @rows[ row ]
|
|
1771
|
+
end
|
|
1772
|
+
|
|
1773
|
+
# Sets maximal column number.
|
|
1774
|
+
# ---
|
|
1775
|
+
# *Arguments*:
|
|
1776
|
+
# * (required) _max_col_: Integer
|
|
1777
|
+
def set_max_col( max_col )
|
|
1778
|
+
@max_col = max_col
|
|
1779
|
+
end
|
|
1780
|
+
|
|
1781
|
+
# Sets maximal row number.
|
|
1782
|
+
# ---
|
|
1783
|
+
# *Arguments*:
|
|
1784
|
+
# * (required) _max_row_: Integer
|
|
1785
|
+
def set_max_row( max_row )
|
|
1786
|
+
@max_row = max_row
|
|
1787
|
+
end
|
|
1788
|
+
|
|
1789
|
+
end # NexusMatrix
|
|
1790
|
+
|
|
1791
|
+
# End of classes to represent nexus data.
|
|
1792
|
+
|
|
1793
|
+
# = DESCRIPTION
|
|
1794
|
+
# Bio::Nexus::Util is a class containing static helper methods
|
|
1795
|
+
#
|
|
1796
|
+
class Util
|
|
1797
|
+
|
|
1798
|
+
# Helper method to produce nexus formatted data.
|
|
1799
|
+
# ---
|
|
1800
|
+
# *Arguments*:
|
|
1801
|
+
# * (required) _block_: Nexus:GenericBlock or its subclasses
|
|
1802
|
+
# * (required) _block_: Array
|
|
1803
|
+
# *Returns*:: String
|
|
1804
|
+
def Util::to_nexus_helper( block, lines )
|
|
1805
|
+
str = String.new
|
|
1806
|
+
str << BEGIN_BLOCK << " " << block << END_OF_LINE
|
|
1807
|
+
lines.each do | line |
|
|
1808
|
+
if ( line != nil )
|
|
1809
|
+
str << INDENTENTION << line << END_OF_LINE
|
|
1810
|
+
end
|
|
1811
|
+
end # do
|
|
1812
|
+
str << END_BLOCK << END_OF_LINE
|
|
1813
|
+
str
|
|
1814
|
+
end
|
|
1815
|
+
|
|
1816
|
+
# Returns string as array separated by " ".
|
|
1817
|
+
# ---
|
|
1818
|
+
# *Arguments*:
|
|
1819
|
+
# * (required) _ary_: Array
|
|
1820
|
+
# *Returns*:: String
|
|
1821
|
+
def Util::array_to_string( ary )
|
|
1822
|
+
str = String.new
|
|
1823
|
+
ary.each do | e |
|
|
1824
|
+
str << e << " "
|
|
1825
|
+
end
|
|
1826
|
+
str.chomp!( " " )
|
|
1827
|
+
str
|
|
1828
|
+
end
|
|
1829
|
+
|
|
1830
|
+
# Returns true if Integer i is not nil and larger than 0.
|
|
1831
|
+
# ---
|
|
1832
|
+
# *Arguments*:
|
|
1833
|
+
# * (required) _i_: Integer
|
|
1834
|
+
# *Returns*:: true or false
|
|
1835
|
+
def Util::larger_than_zero( i )
|
|
1836
|
+
return ( i != nil && i.to_i > 0 )
|
|
1837
|
+
end
|
|
1838
|
+
|
|
1839
|
+
# Returns true if String str is not nil and longer than 0.
|
|
1840
|
+
# ---
|
|
1841
|
+
# *Arguments*:
|
|
1842
|
+
# * (required) _str_: String
|
|
1843
|
+
# *Returns*:: true or false
|
|
1844
|
+
def Util::longer_than_zero( str )
|
|
1845
|
+
return ( str != nil && str.length > 0 )
|
|
1846
|
+
end
|
|
1847
|
+
|
|
1848
|
+
end # class Util
|
|
1849
|
+
|
|
1850
|
+
end # class Nexus
|
|
1851
|
+
|
|
1852
|
+
end #module Bio
|
|
1853
|
+
|
|
1854
|
+
|