bio 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/bioruby +14 -122
- data/bin/br_biofetch.rb +2 -2
- data/bin/br_bioflat.rb +2 -2
- data/bin/br_biogetseq.rb +2 -2
- data/bin/br_pmfetch.rb +3 -3
- data/doc/Changes-0.7.rd +77 -0
- data/doc/KEGG_API.rd +523 -232
- data/doc/KEGG_API.rd.ja +529 -207
- data/doc/Tutorial.rd +48 -11
- data/lib/bio.rb +59 -6
- data/lib/bio/alignment.rb +713 -103
- data/lib/bio/appl/bl2seq/report.rb +2 -18
- data/lib/bio/appl/blast.rb +108 -91
- data/lib/bio/appl/blast/format0.rb +33 -18
- data/lib/bio/appl/blast/format8.rb +6 -20
- data/lib/bio/appl/blast/report.rb +293 -429
- data/lib/bio/appl/blast/rexml.rb +8 -22
- data/lib/bio/appl/blast/wublast.rb +21 -12
- data/lib/bio/appl/blast/xmlparser.rb +180 -183
- data/lib/bio/appl/blat/report.rb +127 -30
- data/lib/bio/appl/clustalw.rb +87 -59
- data/lib/bio/appl/clustalw/report.rb +20 -22
- data/lib/bio/appl/emboss.rb +113 -20
- data/lib/bio/appl/fasta.rb +173 -198
- data/lib/bio/appl/fasta/format10.rb +244 -347
- data/lib/bio/appl/gcg/msf.rb +212 -0
- data/lib/bio/appl/gcg/seq.rb +195 -0
- data/lib/bio/appl/genscan/report.rb +5 -23
- data/lib/bio/appl/hmmer.rb +8 -45
- data/lib/bio/appl/hmmer/report.rb +2 -20
- data/lib/bio/appl/iprscan/report.rb +374 -0
- data/lib/bio/appl/mafft.rb +87 -50
- data/lib/bio/appl/mafft/report.rb +151 -44
- data/lib/bio/appl/muscle.rb +52 -0
- data/lib/bio/appl/phylip/alignment.rb +129 -0
- data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
- data/lib/bio/appl/probcons.rb +41 -0
- data/lib/bio/appl/psort.rb +89 -96
- data/lib/bio/appl/psort/report.rb +6 -22
- data/lib/bio/appl/pts1.rb +263 -0
- data/lib/bio/appl/sim4.rb +26 -36
- data/lib/bio/appl/sim4/report.rb +2 -18
- data/lib/bio/appl/sosui/report.rb +5 -20
- data/lib/bio/appl/spidey/report.rb +2 -2
- data/lib/bio/appl/targetp/report.rb +4 -20
- data/lib/bio/appl/tcoffee.rb +55 -0
- data/lib/bio/appl/tmhmm/report.rb +4 -20
- data/lib/bio/command.rb +235 -64
- data/lib/bio/data/aa.rb +21 -26
- data/lib/bio/data/codontable.rb +2 -20
- data/lib/bio/data/na.rb +19 -4
- data/lib/bio/db.rb +27 -12
- data/lib/bio/db/aaindex.rb +2 -20
- data/lib/bio/db/embl/common.rb +4 -21
- data/lib/bio/db/embl/embl.rb +33 -85
- data/lib/bio/db/embl/sptr.rb +612 -302
- data/lib/bio/db/embl/swissprot.rb +10 -29
- data/lib/bio/db/embl/trembl.rb +10 -29
- data/lib/bio/db/embl/uniprot.rb +10 -29
- data/lib/bio/db/fantom.rb +15 -20
- data/lib/bio/db/fasta.rb +3 -3
- data/lib/bio/db/genbank/common.rb +37 -46
- data/lib/bio/db/genbank/ddbj.rb +6 -18
- data/lib/bio/db/genbank/genbank.rb +47 -186
- data/lib/bio/db/genbank/genpept.rb +4 -17
- data/lib/bio/db/genbank/refseq.rb +4 -17
- data/lib/bio/db/gff.rb +103 -35
- data/lib/bio/db/go.rb +4 -20
- data/lib/bio/db/kegg/brite.rb +26 -36
- data/lib/bio/db/kegg/compound.rb +81 -85
- data/lib/bio/db/kegg/drug.rb +98 -0
- data/lib/bio/db/kegg/enzyme.rb +133 -110
- data/lib/bio/db/kegg/expression.rb +2 -20
- data/lib/bio/db/kegg/genes.rb +208 -238
- data/lib/bio/db/kegg/genome.rb +164 -285
- data/lib/bio/db/kegg/glycan.rb +114 -157
- data/lib/bio/db/kegg/keggtab.rb +242 -303
- data/lib/bio/db/kegg/kgml.rb +117 -160
- data/lib/bio/db/kegg/orthology.rb +112 -0
- data/lib/bio/db/kegg/reaction.rb +54 -69
- data/lib/bio/db/kegg/taxonomy.rb +331 -0
- data/lib/bio/db/lasergene.rb +209 -0
- data/lib/bio/db/litdb.rb +3 -27
- data/lib/bio/db/medline.rb +228 -249
- data/lib/bio/db/nbrf.rb +3 -3
- data/lib/bio/db/newick.rb +510 -0
- data/lib/bio/db/nexus.rb +1854 -0
- data/lib/bio/db/pdb.rb +5 -17
- data/lib/bio/db/pdb/atom.rb +2 -18
- data/lib/bio/db/pdb/chain.rb +2 -18
- data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
- data/lib/bio/db/pdb/model.rb +2 -18
- data/lib/bio/db/pdb/pdb.rb +73 -34
- data/lib/bio/db/pdb/residue.rb +4 -20
- data/lib/bio/db/pdb/utils.rb +2 -18
- data/lib/bio/db/prosite.rb +403 -422
- data/lib/bio/db/rebase.rb +84 -40
- data/lib/bio/db/soft.rb +404 -0
- data/lib/bio/db/transfac.rb +5 -17
- data/lib/bio/feature.rb +106 -52
- data/lib/bio/io/das.rb +32 -42
- data/lib/bio/io/dbget.rb +2 -20
- data/lib/bio/io/ddbjxml.rb +77 -138
- data/lib/bio/io/ebisoap.rb +158 -0
- data/lib/bio/io/ensembl.rb +229 -0
- data/lib/bio/io/fastacmd.rb +89 -82
- data/lib/bio/io/fetch.rb +163 -96
- data/lib/bio/io/flatfile.rb +170 -73
- data/lib/bio/io/flatfile/bdb.rb +3 -16
- data/lib/bio/io/flatfile/index.rb +2 -2
- data/lib/bio/io/flatfile/indexer.rb +3 -2
- data/lib/bio/io/higet.rb +12 -31
- data/lib/bio/io/keggapi.rb +210 -269
- data/lib/bio/io/ncbisoap.rb +155 -0
- data/lib/bio/io/pubmed.rb +169 -147
- data/lib/bio/io/registry.rb +4 -20
- data/lib/bio/io/soapwsdl.rb +43 -38
- data/lib/bio/io/sql.rb +242 -305
- data/lib/bio/location.rb +407 -285
- data/lib/bio/map.rb +410 -0
- data/lib/bio/pathway.rb +558 -695
- data/lib/bio/reference.rb +272 -75
- data/lib/bio/sequence.rb +255 -13
- data/lib/bio/sequence/aa.rb +71 -10
- data/lib/bio/sequence/common.rb +187 -33
- data/lib/bio/sequence/compat.rb +59 -4
- data/lib/bio/sequence/format.rb +54 -7
- data/lib/bio/sequence/generic.rb +3 -3
- data/lib/bio/sequence/na.rb +328 -26
- data/lib/bio/shell.rb +11 -4
- data/lib/bio/shell/core.rb +221 -160
- data/lib/bio/shell/demo.rb +18 -15
- data/lib/bio/shell/interface.rb +14 -12
- data/lib/bio/shell/irb.rb +95 -0
- data/lib/bio/shell/object.rb +45 -26
- data/lib/bio/shell/plugin/blast.rb +42 -0
- data/lib/bio/shell/plugin/codon.rb +22 -14
- data/lib/bio/shell/plugin/das.rb +58 -0
- data/lib/bio/shell/plugin/emboss.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +22 -11
- data/lib/bio/shell/plugin/flatfile.rb +2 -2
- data/lib/bio/shell/plugin/keggapi.rb +13 -6
- data/lib/bio/shell/plugin/midi.rb +4 -4
- data/lib/bio/shell/plugin/obda.rb +2 -2
- data/lib/bio/shell/plugin/psort.rb +56 -0
- data/lib/bio/shell/plugin/seq.rb +35 -8
- data/lib/bio/shell/plugin/soap.rb +87 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
- data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
- data/lib/bio/shell/script.rb +25 -0
- data/lib/bio/shell/setup.rb +109 -0
- data/lib/bio/shell/web.rb +70 -58
- data/lib/bio/tree.rb +850 -0
- data/lib/bio/util/color_scheme.rb +84 -107
- data/lib/bio/util/color_scheme/buried.rb +5 -24
- data/lib/bio/util/color_scheme/helix.rb +5 -24
- data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
- data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
- data/lib/bio/util/color_scheme/strand.rb +5 -24
- data/lib/bio/util/color_scheme/taylor.rb +5 -24
- data/lib/bio/util/color_scheme/turn.rb +5 -24
- data/lib/bio/util/color_scheme/zappo.rb +5 -24
- data/lib/bio/util/contingency_table.rb +70 -43
- data/lib/bio/util/restriction_enzyme.rb +228 -0
- data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
- data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
- data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
- data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
- data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
- data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
- data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
- data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
- data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
- data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
- data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
- data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
- data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
- data/lib/bio/util/sirna.rb +4 -22
- data/sample/color_scheme_na.rb +4 -12
- data/sample/enzymes.rb +78 -0
- data/sample/goslim.rb +5 -13
- data/sample/psortplot_html.rb +4 -12
- data/test/data/blast/2.2.15.blastp.m7 +876 -0
- data/test/data/embl/AB090716.embl.rel89 +63 -0
- data/test/data/fasta/example1.txt +75 -0
- data/test/data/fasta/example2.txt +21 -0
- data/test/data/iprscan/merged.raw +32 -0
- data/test/data/iprscan/merged.txt +74 -0
- data/test/data/soft/GDS100_partial.soft +92 -0
- data/test/data/soft/GSE3457_family_partial.soft +874 -0
- data/test/functional/bio/io/test_ensembl.rb +103 -0
- data/test/functional/bio/io/test_soapwsdl.rb +5 -17
- data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
- data/test/unit/bio/appl/blast/test_report.rb +3 -16
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
- data/test/unit/bio/appl/genscan/test_report.rb +3 -16
- data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
- data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
- data/test/unit/bio/appl/mafft/test_report.rb +63 -0
- data/test/unit/bio/appl/sosui/test_report.rb +3 -16
- data/test/unit/bio/appl/targetp/test_report.rb +3 -16
- data/test/unit/bio/appl/test_blast.rb +3 -16
- data/test/unit/bio/appl/test_fasta.rb +4 -16
- data/test/unit/bio/appl/test_pts1.rb +140 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
- data/test/unit/bio/data/test_aa.rb +4 -17
- data/test/unit/bio/data/test_codontable.rb +3 -16
- data/test/unit/bio/data/test_na.rb +3 -3
- data/test/unit/bio/db/embl/test_common.rb +3 -16
- data/test/unit/bio/db/embl/test_embl.rb +3 -16
- data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
- data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
- data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
- data/test/unit/bio/db/kegg/test_genes.rb +3 -16
- data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
- data/test/unit/bio/db/test_aaindex.rb +2 -2
- data/test/unit/bio/db/test_fasta.rb +3 -16
- data/test/unit/bio/db/test_gff.rb +3 -16
- data/test/unit/bio/db/test_lasergene.rb +95 -0
- data/test/unit/bio/db/test_newick.rb +56 -0
- data/test/unit/bio/db/test_nexus.rb +360 -0
- data/test/unit/bio/db/test_prosite.rb +5 -18
- data/test/unit/bio/db/test_rebase.rb +11 -25
- data/test/unit/bio/db/test_soft.rb +138 -0
- data/test/unit/bio/io/test_ddbjxml.rb +5 -17
- data/test/unit/bio/io/test_ensembl.rb +109 -0
- data/test/unit/bio/io/test_fastacmd.rb +3 -16
- data/test/unit/bio/io/test_flatfile.rb +237 -0
- data/test/unit/bio/io/test_soapwsdl.rb +4 -17
- data/test/unit/bio/sequence/test_aa.rb +3 -3
- data/test/unit/bio/sequence/test_common.rb +3 -16
- data/test/unit/bio/sequence/test_compat.rb +3 -16
- data/test/unit/bio/sequence/test_na.rb +29 -3
- data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
- data/test/unit/bio/test_alignment.rb +16 -27
- data/test/unit/bio/test_command.rb +242 -25
- data/test/unit/bio/test_db.rb +3 -16
- data/test/unit/bio/test_feature.rb +4 -16
- data/test/unit/bio/test_location.rb +4 -16
- data/test/unit/bio/test_map.rb +230 -0
- data/test/unit/bio/test_pathway.rb +4 -16
- data/test/unit/bio/test_reference.rb +2 -2
- data/test/unit/bio/test_sequence.rb +7 -19
- data/test/unit/bio/test_shell.rb +3 -16
- data/test/unit/bio/test_tree.rb +593 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
- data/test/unit/bio/util/test_color_scheme.rb +6 -18
- data/test/unit/bio/util/test_contingency_table.rb +6 -18
- data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
- data/test/unit/bio/util/test_sirna.rb +3 -16
- metadata +228 -169
- data/doc/BioRuby.rd.ja +0 -225
- data/doc/Design.rd.ja +0 -341
- data/doc/TODO.rd.ja +0 -138
- data/lib/bio/appl/fasta/format6.rb +0 -37
- data/lib/bio/db/kegg/cell.rb +0 -88
- data/lib/bio/db/kegg/ko.rb +0 -178
- data/lib/bio/shell/rails/Rakefile +0 -10
- data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
- data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
- data/lib/bio/shell/rails/config/boot.rb +0 -19
- data/lib/bio/shell/rails/config/database.yml +0 -85
- data/lib/bio/shell/rails/config/environment.rb +0 -53
- data/lib/bio/shell/rails/config/environments/development.rb +0 -19
- data/lib/bio/shell/rails/config/environments/production.rb +0 -19
- data/lib/bio/shell/rails/config/environments/test.rb +0 -19
- data/lib/bio/shell/rails/config/routes.rb +0 -19
- data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
- data/lib/bio/shell/rails/public/404.html +0 -8
- data/lib/bio/shell/rails/public/500.html +0 -8
- data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
- data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
- data/lib/bio/shell/rails/public/dispatch.rb +0 -10
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +0 -277
- data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
- data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
- data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
- data/lib/bio/shell/rails/public/robots.txt +0 -1
- data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
- data/lib/bio/shell/rails/script/about +0 -3
- data/lib/bio/shell/rails/script/breakpointer +0 -3
- data/lib/bio/shell/rails/script/console +0 -3
- data/lib/bio/shell/rails/script/destroy +0 -3
- data/lib/bio/shell/rails/script/generate +0 -3
- data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
- data/lib/bio/shell/rails/script/performance/profiler +0 -3
- data/lib/bio/shell/rails/script/plugin +0 -3
- data/lib/bio/shell/rails/script/process/reaper +0 -3
- data/lib/bio/shell/rails/script/process/spawner +0 -3
- data/lib/bio/shell/rails/script/process/spinner +0 -3
- data/lib/bio/shell/rails/script/runner +0 -3
- data/lib/bio/shell/rails/script/server +0 -42
- data/lib/bio/shell/rails/test/test_helper.rb +0 -28
data/lib/bio/db/nexus.rb
ADDED
@@ -0,0 +1,1854 @@
|
|
1
|
+
#
|
2
|
+
# = bio/db/nexus.rb - Nexus Standard phylogenetic tree parser / formatter
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2006 Christian M Zmasek <cmzmasek@yahoo.com>
|
5
|
+
#
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
# $Id: nexus.rb,v 1.3 2007/04/05 23:35:40 trevor Exp $
|
9
|
+
#
|
10
|
+
# == Description
|
11
|
+
#
|
12
|
+
# This file contains classes that implement a parser for NEXUS formatted
|
13
|
+
# data as well as objects to store, access, and write the parsed data.
|
14
|
+
#
|
15
|
+
# The following five blocks:
|
16
|
+
# taxa, characters, distances, trees, data
|
17
|
+
# are recognizable and parsable.
|
18
|
+
#
|
19
|
+
# The parser can deal with (nested) comments (indicated by square brackets),
|
20
|
+
# unless the comments are inside a command or data item (e.g.
|
21
|
+
# "Dim[comment]ensions" or inside a matrix).
|
22
|
+
#
|
23
|
+
# Single or double quoted TaxLabels are processed as follows (by way
|
24
|
+
# of example): "mus musculus" -> mus_musculus
|
25
|
+
#
|
26
|
+
#
|
27
|
+
# == USAGE
|
28
|
+
#
|
29
|
+
# require 'bio/db/nexus'
|
30
|
+
#
|
31
|
+
# # Create a new parser:
|
32
|
+
# nexus = Bio::Nexus.new( nexus_data_as_string )
|
33
|
+
#
|
34
|
+
# # Get first taxa block:
|
35
|
+
# taxa_block = nexus.get_taxa_blocks[ 0 ]
|
36
|
+
# # Get number of taxa:
|
37
|
+
# number_of_taxa = taxa_block.get_number_of_taxa.to_i
|
38
|
+
# # Get name of first taxon:
|
39
|
+
# first_taxon = taxa_block.get_taxa[ 0 ]
|
40
|
+
#
|
41
|
+
# # Get first data block:
|
42
|
+
# data_block = nexus.get_data_blocks[ 0 ]
|
43
|
+
# # Get first characters name:
|
44
|
+
# seq_name = data_block.get_row_name( 0 )
|
45
|
+
# # Get first characters row named "taxon_2" as Bio::Sequence sequence:
|
46
|
+
# seq_tax_2 = data_block.get_sequences_by_name( "taxon_2" )[ 0 ]
|
47
|
+
# # Get third characters row as Bio::Sequence sequence:
|
48
|
+
# seq_2 = data_block.get_sequence( 2 )
|
49
|
+
# # Get first characters row named "taxon_3" as String:
|
50
|
+
# string_tax_3 = data_block.get_characters_strings_by_name( "taxon_3" )
|
51
|
+
# # Get name of first taxon:
|
52
|
+
# taxon_0 = data_block.get_taxa[ 0 ]
|
53
|
+
# # Get characters matrix as Bio::Nexus::NexusMatrix (names are in column 0)
|
54
|
+
# characters_matrix = data_block.get_matrix
|
55
|
+
#
|
56
|
+
# # Get first characters block (same methods as Nexus::DataBlock except
|
57
|
+
# # it lacks get_taxa method):
|
58
|
+
# characters_block = nexus.get_characters_blocks[ 0 ]
|
59
|
+
#
|
60
|
+
# # Get trees block(s):
|
61
|
+
# trees_block = nexus.get_trees_blocks[ 0 ]
|
62
|
+
# # Get first tree named "best" as String:
|
63
|
+
# string_fish = trees_block.get_tree_strings_by_name( "best" )[ 0 ]
|
64
|
+
# # Get first tree named "best" as Bio::Db::Newick object:
|
65
|
+
# tree_fish = trees_block.get_trees_by_name( "best" )[ 0 ]
|
66
|
+
# # Get first tree as Bio::Db::Newick object:
|
67
|
+
# tree_first = trees_block.get_tree( 0 )
|
68
|
+
#
|
69
|
+
# # Get distances block(s):
|
70
|
+
# distances_blocks = nexus.get_distances_blocks
|
71
|
+
# # Get matrix as Bio::Nexus::NexusMatrix object:
|
72
|
+
# matrix = distances_blocks[ 0 ].get_matrix
|
73
|
+
# # Get value (column 0 are names):
|
74
|
+
# val = matrix.get_value( 1, 5 )
|
75
|
+
#
|
76
|
+
# # Get blocks for which no class exists (private blocks):
|
77
|
+
# private_blocks = nexus.get_blocks_by_name( "my_block" )
|
78
|
+
# # Get first block names "my_block":
|
79
|
+
# my_block_0 = private_blocks[ 0 ]
|
80
|
+
# # Get first token in first block names "my_block":
|
81
|
+
# first_token = my_block_0.get_tokens[ 0 ]
|
82
|
+
#
|
83
|
+
#
|
84
|
+
# == References
|
85
|
+
#
|
86
|
+
# * Maddison DR, Swofford DL, Maddison WP (1997). NEXUS: an extensible file
|
87
|
+
# format for systematic information.
|
88
|
+
# Syst Biol. 1997 46(4):590-621.
|
89
|
+
#
|
90
|
+
|
91
|
+
require 'bio/sequence'
|
92
|
+
require 'bio/tree'
|
93
|
+
require 'bio/db/newick'
|
94
|
+
|
95
|
+
module Bio
|
96
|
+
|
97
|
+
# == DESCRIPTION
|
98
|
+
# Bio::Nexus is a parser for nexus formatted data.
|
99
|
+
# It contains classes and constants enabling the representation and
|
100
|
+
# processing of nexus data.
|
101
|
+
#
|
102
|
+
# == USAGE
|
103
|
+
#
|
104
|
+
# # Parsing a nexus formatted string str:
|
105
|
+
# nexus = Bio::Nexus.new( nexus_str )
|
106
|
+
#
|
107
|
+
# # Obtaining of the nexus blocks as array of GenericBlock or
|
108
|
+
# # any of its subclasses (such as DistancesBlock):
|
109
|
+
# blocks = nexus.get_blocks
|
110
|
+
#
|
111
|
+
# # Getting a block by name:
|
112
|
+
# my_blocks = nexus.get_blocks_by_name( "my_block" )
|
113
|
+
#
|
114
|
+
# # Getting distance blocks:
|
115
|
+
# distances_blocks = nexus.get_distances_blocks
|
116
|
+
#
|
117
|
+
# # Getting trees blocks:
|
118
|
+
# trees_blocks = nexus.get_trees_blocks
|
119
|
+
#
|
120
|
+
# # Getting data blocks:
|
121
|
+
# data_blocks = nexus.get_data_blocks
|
122
|
+
#
|
123
|
+
# # Getting characters blocks:
|
124
|
+
# character_blocks = nexus.get_characters_blocks
|
125
|
+
#
|
126
|
+
# # Getting taxa blocks:
|
127
|
+
# taxa_blocks = nexus.get_taxa_blocks
|
128
|
+
#
|
129
|
+
class Nexus
|
130
|
+
|
131
|
+
|
132
|
+
END_OF_LINE = "\n"
|
133
|
+
INDENTENTION = " "
|
134
|
+
DOUBLE_QUOTE = "\""
|
135
|
+
SINGLE_QUOTE = "'"
|
136
|
+
|
137
|
+
|
138
|
+
BEGIN_NEXUS = "#NEXUS"
|
139
|
+
DELIMITER = ";"
|
140
|
+
BEGIN_BLOCK = "Begin"
|
141
|
+
END_BLOCK = "End" + DELIMITER
|
142
|
+
BEGIN_COMMENT = "["
|
143
|
+
END_COMMENT = "]"
|
144
|
+
|
145
|
+
|
146
|
+
TAXA = "Taxa"
|
147
|
+
CHARACTERS = "Characters"
|
148
|
+
DATA = "Data"
|
149
|
+
DISTANCES = "Distances"
|
150
|
+
TREES = "Trees"
|
151
|
+
TAXA_BLOCK = TAXA + DELIMITER
|
152
|
+
CHARACTERS_BLOCK = CHARACTERS + DELIMITER
|
153
|
+
DATA_BLOCK = DATA + DELIMITER
|
154
|
+
DISTANCES_BLOCK = DISTANCES + DELIMITER
|
155
|
+
TREES_BLOCK = TREES + DELIMITER
|
156
|
+
|
157
|
+
|
158
|
+
DIMENSIONS = "Dimensions"
|
159
|
+
FORMAT = "Format"
|
160
|
+
NTAX = "NTax"
|
161
|
+
NCHAR = "NChar"
|
162
|
+
DATATYPE = "DataType"
|
163
|
+
TAXLABELS = "TaxLabels"
|
164
|
+
MATRIX = "Matrix"
|
165
|
+
# End of constants.
|
166
|
+
|
167
|
+
|
168
|
+
# Nexus parse error class,
|
169
|
+
# indicates error during parsing of nexus formatted data.
|
170
|
+
class NexusParseError < RuntimeError; end
|
171
|
+
|
172
|
+
# Creates a new nexus parser for 'nexus_str'.
|
173
|
+
#
|
174
|
+
# ---
|
175
|
+
# *Arguments*:
|
176
|
+
# * (required) _nexus_str_: String - nexus formatted data
|
177
|
+
def initialize( nexus_str )
|
178
|
+
@blocks = Array.new
|
179
|
+
@current_cmd = nil
|
180
|
+
@current_subcmd = nil
|
181
|
+
@current_block_name = nil
|
182
|
+
@current_block = nil
|
183
|
+
parse( nexus_str )
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
# Returns an Array of all blocks found in the String 'nexus_str'
|
188
|
+
# set via Bio::Nexus.new( nexus_str ).
|
189
|
+
#
|
190
|
+
# ---
|
191
|
+
# *Returns*:: Array of GenericBlocks or any of its subclasses
|
192
|
+
def get_blocks
|
193
|
+
@blocks
|
194
|
+
end
|
195
|
+
|
196
|
+
# A convenience methods which returns an array of
|
197
|
+
# all nexus blocks for which the name equals 'name' found
|
198
|
+
# in the String 'nexus_str' set via Bio::Nexus.new( nexus_str ).
|
199
|
+
#
|
200
|
+
# ---
|
201
|
+
# *Arguments*:
|
202
|
+
# * (required) _name_: String
|
203
|
+
# *Returns*:: Array of GenericBlocks or any of its subclasses
|
204
|
+
def get_blocks_by_name( name )
|
205
|
+
found_blocks = Array.new
|
206
|
+
@blocks.each do | block |
|
207
|
+
if ( name == block.get_name )
|
208
|
+
found_blocks.push( block )
|
209
|
+
end
|
210
|
+
end
|
211
|
+
found_blocks
|
212
|
+
end
|
213
|
+
|
214
|
+
# A convenience methods which returns an array of
|
215
|
+
# all data blocks.
|
216
|
+
#
|
217
|
+
# ---
|
218
|
+
# *Returns*:: Array of DataBlocks
|
219
|
+
def get_data_blocks
|
220
|
+
get_blocks_by_name( DATA_BLOCK.chomp( ";").downcase )
|
221
|
+
end
|
222
|
+
|
223
|
+
# A convenience methods which returns an array of
|
224
|
+
# all characters blocks.
|
225
|
+
#
|
226
|
+
# ---
|
227
|
+
# *Returns*:: Array of CharactersBlocks
|
228
|
+
def get_characters_blocks
|
229
|
+
get_blocks_by_name( CHARACTERS_BLOCK.chomp( ";").downcase )
|
230
|
+
end
|
231
|
+
|
232
|
+
# A convenience methods which returns an array of
|
233
|
+
# all trees blocks.
|
234
|
+
#
|
235
|
+
# ---
|
236
|
+
# *Returns*:: Array of TreesBlocks
|
237
|
+
def get_trees_blocks
|
238
|
+
get_blocks_by_name( TREES_BLOCK.chomp( ";").downcase )
|
239
|
+
end
|
240
|
+
|
241
|
+
# A convenience methods which returns an array of
|
242
|
+
# all distances blocks.
|
243
|
+
#
|
244
|
+
# ---
|
245
|
+
# *Returns*:: Array of DistancesBlock
|
246
|
+
def get_distances_blocks
|
247
|
+
get_blocks_by_name( DISTANCES_BLOCK.chomp( ";").downcase )
|
248
|
+
end
|
249
|
+
|
250
|
+
# A convenience methods which returns an array of
|
251
|
+
# all taxa blocks.
|
252
|
+
#
|
253
|
+
# ---
|
254
|
+
# *Returns*:: Array of TaxaBlocks
|
255
|
+
def get_taxa_blocks
|
256
|
+
get_blocks_by_name( TAXA_BLOCK.chomp( ";").downcase )
|
257
|
+
end
|
258
|
+
|
259
|
+
# Returns a String listing how many of each blocks it parsed.
|
260
|
+
#
|
261
|
+
# ---
|
262
|
+
# *Returns*:: String
|
263
|
+
def to_s
|
264
|
+
str = String.new
|
265
|
+
if get_blocks.length < 1
|
266
|
+
str << "empty"
|
267
|
+
else
|
268
|
+
str << "number of blocks: " << get_blocks.length.to_s
|
269
|
+
if get_characters_blocks.length > 0
|
270
|
+
str << " [characters blocks: " << get_characters_blocks.length.to_s << "] "
|
271
|
+
end
|
272
|
+
if get_data_blocks.length > 0
|
273
|
+
str << " [data blocks: " << get_data_blocks.length.to_s << "] "
|
274
|
+
end
|
275
|
+
if get_distances_blocks.length > 0
|
276
|
+
str << " [distances blocks: " << get_distances_blocks.length.to_s << "] "
|
277
|
+
end
|
278
|
+
if get_taxa_blocks.length > 0
|
279
|
+
str << " [taxa blocks: " << get_taxa_blocks.length.to_s << "] "
|
280
|
+
end
|
281
|
+
if get_trees_blocks.length > 0
|
282
|
+
str << " [trees blocks: " << get_trees_blocks.length.to_s << "] "
|
283
|
+
end
|
284
|
+
end
|
285
|
+
str
|
286
|
+
end
|
287
|
+
alias to_str to_s
|
288
|
+
|
289
|
+
private
|
290
|
+
|
291
|
+
# The master method for parsing.
|
292
|
+
# Stores the resulting block in array @blocks.
|
293
|
+
#
|
294
|
+
# ---
|
295
|
+
# *Arguments*:
|
296
|
+
# * (required) _str_: String - the String to be parsed
|
297
|
+
def parse( str )
|
298
|
+
str = str.chop if str[-1..-1] == ';'
|
299
|
+
ary = str.split(/[\s+=]/)
|
300
|
+
ary.collect! { |x| x.strip!; x.empty? ? nil : x }
|
301
|
+
ary.compact!
|
302
|
+
in_comment = false
|
303
|
+
comment_level = 0
|
304
|
+
|
305
|
+
# Main loop
|
306
|
+
while token = ary.shift
|
307
|
+
# Quotes:
|
308
|
+
if ( token.index( SINGLE_QUOTE ) == 0 ||
|
309
|
+
token.index( DOUBLE_QUOTE ) == 0 )
|
310
|
+
token << "_" << ary.shift
|
311
|
+
token = token.chop if token[-1..-1] == ';'
|
312
|
+
token = token.slice( 1, token.length - 2 )
|
313
|
+
end
|
314
|
+
# Comments:
|
315
|
+
open = token.count( BEGIN_COMMENT )
|
316
|
+
close = token.count( END_COMMENT )
|
317
|
+
comment = comment_level > 0
|
318
|
+
comment_level = comment_level + open - close
|
319
|
+
if ( open > 0 && open == close )
|
320
|
+
next
|
321
|
+
elsif comment_level > 0 || comment
|
322
|
+
next
|
323
|
+
elsif equal?( token, END_BLOCK )
|
324
|
+
end_block()
|
325
|
+
elsif equal?( token, BEGIN_BLOCK )
|
326
|
+
begin_block()
|
327
|
+
@current_block_name = token = ary.shift
|
328
|
+
@current_block_name.downcase!
|
329
|
+
@current_block = create_block()
|
330
|
+
@blocks.push( @current_block )
|
331
|
+
elsif ( @current_block_name != nil )
|
332
|
+
process_token( token.chomp( DELIMITER ), ary )
|
333
|
+
end
|
334
|
+
end # main loop
|
335
|
+
@blocks.compact!
|
336
|
+
end # parse
|
337
|
+
|
338
|
+
# Operations required when beginnig of block encountered.
|
339
|
+
#
|
340
|
+
# ---
|
341
|
+
def begin_block()
|
342
|
+
if @current_block_name != nil
|
343
|
+
raise NexusParseError, "Cannot have nested nexus blocks (\"end;\" might be missing)"
|
344
|
+
end
|
345
|
+
reset_command_state()
|
346
|
+
end
|
347
|
+
|
348
|
+
# Operations required when ending of block encountered.
|
349
|
+
#
|
350
|
+
# ---
|
351
|
+
def end_block()
|
352
|
+
if @current_block_name == nil
|
353
|
+
raise NexusParseError, "Cannot have two or more \"end;\" tokens in sequence"
|
354
|
+
end
|
355
|
+
@current_block_name = nil
|
356
|
+
end
|
357
|
+
|
358
|
+
# This calls various process_token_for_<name>_block methods
|
359
|
+
# depeding on state of @current_block_name.
|
360
|
+
#
|
361
|
+
# ---
|
362
|
+
# *Arguments*:
|
363
|
+
# * (required) _token_: String
|
364
|
+
# * (required) _ary_: Array
|
365
|
+
def process_token( token, ary )
|
366
|
+
case @current_block_name
|
367
|
+
when TAXA_BLOCK.downcase
|
368
|
+
process_token_for_taxa_block( token )
|
369
|
+
when CHARACTERS_BLOCK.downcase
|
370
|
+
process_token_for_character_block( token, ary )
|
371
|
+
when DATA_BLOCK.downcase
|
372
|
+
process_token_for_data_block( token, ary )
|
373
|
+
when DISTANCES_BLOCK.downcase
|
374
|
+
process_token_for_distances_block( token, ary )
|
375
|
+
when TREES_BLOCK.downcase
|
376
|
+
process_token_for_trees_block( token, ary )
|
377
|
+
else
|
378
|
+
process_token_for_generic_block( token )
|
379
|
+
end
|
380
|
+
end
|
381
|
+
|
382
|
+
# Resets @current_cmd and @current_subcmd to nil.
|
383
|
+
#
|
384
|
+
# ---
|
385
|
+
def reset_command_state()
|
386
|
+
@current_cmd = nil
|
387
|
+
@current_subcmd = nil
|
388
|
+
end
|
389
|
+
|
390
|
+
# Creates GenericBlock (or any of its subclasses) the type of
|
391
|
+
# which is determined by the state of @current_block_name.
|
392
|
+
#
|
393
|
+
# ---
|
394
|
+
# *Returns*:: GenericBlock (or any of its subclasses) object
|
395
|
+
def create_block()
|
396
|
+
case @current_block_name
|
397
|
+
when TAXA_BLOCK.downcase
|
398
|
+
return Bio::Nexus::TaxaBlock.new( @current_block_name )
|
399
|
+
when CHARACTERS_BLOCK.downcase
|
400
|
+
return Bio::Nexus::CharactersBlock.new( @current_block_name )
|
401
|
+
when DATA_BLOCK.downcase
|
402
|
+
return Bio::Nexus::DataBlock.new( @current_block_name )
|
403
|
+
when DISTANCES_BLOCK.downcase
|
404
|
+
return Bio::Nexus::DistancesBlock.new( @current_block_name )
|
405
|
+
when TREES_BLOCK.downcase
|
406
|
+
return Bio::Nexus::TreesBlock.new( @current_block_name )
|
407
|
+
else
|
408
|
+
return Bio::Nexus::GenericBlock.new( @current_block_name )
|
409
|
+
end
|
410
|
+
end
|
411
|
+
|
412
|
+
# This processes the tokens (between Begin Taxa; and End;) for a taxa block
|
413
|
+
# Example of a currently parseable taxa block:
|
414
|
+
# Begin Taxa;
|
415
|
+
# Dimensions NTax=4;
|
416
|
+
# TaxLabels fish [comment] 'african frog' "rat snake" 'red mouse';
|
417
|
+
# End;
|
418
|
+
#
|
419
|
+
# ---
|
420
|
+
# *Arguments*:
|
421
|
+
# * (required) _token_: String
|
422
|
+
def process_token_for_taxa_block( token )
|
423
|
+
if ( equal?( token, DIMENSIONS ) )
|
424
|
+
@current_cmd = DIMENSIONS
|
425
|
+
@current_subcmd = nil
|
426
|
+
elsif ( equal?( token, TAXLABELS ) )
|
427
|
+
@current_cmd = TAXLABELS
|
428
|
+
@current_subcmd = nil
|
429
|
+
elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) )
|
430
|
+
@current_subcmd = NTAX
|
431
|
+
elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) )
|
432
|
+
@current_block.set_number_of_taxa( token )
|
433
|
+
elsif ( cmds_equal_to?( TAXLABELS, nil ) )
|
434
|
+
@current_block.add_taxon( token )
|
435
|
+
end
|
436
|
+
end
|
437
|
+
|
438
|
+
# This processes the tokens (between Begin Taxa; and End;) for a character
|
439
|
+
# block
|
440
|
+
# Example of a currently parseable character block:
|
441
|
+
# Begin Characters;
|
442
|
+
# Dimensions NChar=20
|
443
|
+
# NTax=4;
|
444
|
+
# Format DataType=DNA
|
445
|
+
# Missing=x
|
446
|
+
# Gap=- MatchChar=.;
|
447
|
+
# Matrix
|
448
|
+
# fish ACATA GAGGG TACCT CTAAG
|
449
|
+
# frog ACTTA GAGGC TACCT CTAGC
|
450
|
+
# snake ACTCA CTGGG TACCT TTGCG
|
451
|
+
# mouse ACTCA GACGG TACCT TTGCG;
|
452
|
+
# End;
|
453
|
+
#
|
454
|
+
# ---
|
455
|
+
# *Arguments*:
|
456
|
+
# * (required) _token_: String
|
457
|
+
# * (required) _ary_: Array
|
458
|
+
def process_token_for_character_block( token, ary )
|
459
|
+
if ( equal?( token, DIMENSIONS ) )
|
460
|
+
@current_cmd = DIMENSIONS
|
461
|
+
@current_subcmd = nil
|
462
|
+
elsif ( equal?( token, FORMAT ) )
|
463
|
+
@current_cmd = FORMAT
|
464
|
+
@current_subcmd = nil
|
465
|
+
elsif ( equal?( token, MATRIX ) )
|
466
|
+
@current_cmd = MATRIX
|
467
|
+
@current_subcmd = nil
|
468
|
+
elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) )
|
469
|
+
@current_subcmd = NTAX
|
470
|
+
elsif ( @current_cmd == DIMENSIONS && equal?( token, NCHAR ) )
|
471
|
+
@current_subcmd = NCHAR
|
472
|
+
elsif ( @current_cmd == FORMAT && equal?( token, DATATYPE ) )
|
473
|
+
@current_subcmd = DATATYPE
|
474
|
+
elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MISSING ) )
|
475
|
+
@current_subcmd = CharactersBlock::MISSING
|
476
|
+
elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::GAP ) )
|
477
|
+
@current_subcmd = CharactersBlock::GAP
|
478
|
+
elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MATCHCHAR ) )
|
479
|
+
@current_subcmd = CharactersBlock::MATCHCHAR
|
480
|
+
elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) )
|
481
|
+
@current_block.set_number_of_taxa( token )
|
482
|
+
elsif ( cmds_equal_to?( DIMENSIONS, NCHAR ) )
|
483
|
+
@current_block.set_number_of_characters( token )
|
484
|
+
elsif ( cmds_equal_to?( FORMAT, DATATYPE ) )
|
485
|
+
@current_block.set_datatype( token )
|
486
|
+
elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MISSING ) )
|
487
|
+
@current_block.set_missing( token )
|
488
|
+
elsif ( cmds_equal_to?( FORMAT, CharactersBlock::GAP ) )
|
489
|
+
@current_block.set_gap_character( token )
|
490
|
+
elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MATCHCHAR ) )
|
491
|
+
@current_block.set_match_character( token )
|
492
|
+
elsif ( cmds_equal_to?( MATRIX, nil ) )
|
493
|
+
@current_block.set_matrix( make_matrix( token, ary,
|
494
|
+
@current_block.get_number_of_characters, true ) )
|
495
|
+
end
|
496
|
+
end
|
497
|
+
|
498
|
+
# This processes the tokens (between Begin Trees; and End;) for a trees block
|
499
|
+
# Example of a currently parseable taxa block:
|
500
|
+
# Begin Trees;
|
501
|
+
# Tree best=(fish,(frog,(snake, mouse)));
|
502
|
+
# Tree other=(snake,(frog,( fish, mouse)));
|
503
|
+
# End;
|
504
|
+
#
|
505
|
+
# ---
|
506
|
+
# *Arguments*:
|
507
|
+
# * (required) _token_: String
|
508
|
+
# * (required) _ary_: Array
|
509
|
+
def process_token_for_trees_block( token, ary )
|
510
|
+
if ( equal?( token, TreesBlock::TREE ) )
|
511
|
+
@current_cmd = TreesBlock::TREE
|
512
|
+
@current_subcmd = nil
|
513
|
+
elsif ( cmds_equal_to?( TreesBlock::TREE, nil ) )
|
514
|
+
@current_block.add_tree_name( token )
|
515
|
+
tree_string = ary.shift
|
516
|
+
while ( tree_string.index( ";" ) == nil )
|
517
|
+
tree_string << ary.shift
|
518
|
+
end
|
519
|
+
@current_block.add_tree( tree_string )
|
520
|
+
@current_cmd = nil
|
521
|
+
end
|
522
|
+
end
|
523
|
+
|
524
|
+
# This processes the tokens (between Begin Taxa; and End;) for a character
|
525
|
+
# block.
|
526
|
+
# Example of a currently parseable character block:
|
527
|
+
# Begin Distances;
|
528
|
+
# Dimensions nchar=20 ntax=5;
|
529
|
+
# Format Triangle=Upper;
|
530
|
+
# Matrix
|
531
|
+
# taxon_1 0.0 1.0 2.0 4.0 7.0
|
532
|
+
# taxon_2 1.0 0.0 3.0 5.0 8.0
|
533
|
+
# taxon_3 3.0 4.0 0.0 6.0 9.0
|
534
|
+
# taxon_4 7.0 3.0 1.0 0.0 9.5
|
535
|
+
# taxon_5 1.2 1.3 1.4 1.5 0.0;
|
536
|
+
# End;
|
537
|
+
#
|
538
|
+
# ---
|
539
|
+
# *Arguments*:
|
540
|
+
# * (required) _token_: String
|
541
|
+
# * (required) _ary_: Array
|
542
|
+
def process_token_for_distances_block( token, ary )
|
543
|
+
if ( equal?( token, DIMENSIONS ) )
|
544
|
+
@current_cmd = DIMENSIONS
|
545
|
+
@current_subcmd = nil
|
546
|
+
elsif ( equal?( token, FORMAT ) )
|
547
|
+
@current_cmd = FORMAT
|
548
|
+
@current_subcmd = nil
|
549
|
+
elsif ( equal?( token, MATRIX ) )
|
550
|
+
@current_cmd = MATRIX
|
551
|
+
@current_subcmd = nil
|
552
|
+
elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) )
|
553
|
+
@current_subcmd = NTAX
|
554
|
+
elsif ( @current_cmd == DIMENSIONS && equal?( token, NCHAR ) )
|
555
|
+
@current_subcmd = NCHAR
|
556
|
+
elsif ( @current_cmd == FORMAT && equal?( token, DATATYPE ) )
|
557
|
+
@current_subcmd = DATATYPE
|
558
|
+
elsif ( @current_cmd == FORMAT && equal?( token, DistancesBlock::TRIANGLE ) )
|
559
|
+
@current_subcmd = DistancesBlock::TRIANGLE
|
560
|
+
elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) )
|
561
|
+
@current_block.set_number_of_taxa( token )
|
562
|
+
elsif ( cmds_equal_to?( DIMENSIONS, NCHAR ) )
|
563
|
+
@current_block.set_number_of_characters( token )
|
564
|
+
elsif ( cmds_equal_to?( FORMAT, DistancesBlock::TRIANGLE ) )
|
565
|
+
@current_block.set_triangle( token )
|
566
|
+
elsif ( cmds_equal_to?( MATRIX, nil ) )
|
567
|
+
@current_block.set_matrix( make_matrix( token, ary,
|
568
|
+
@current_block.get_number_of_taxa, false ) )
|
569
|
+
end
|
570
|
+
end
|
571
|
+
|
572
|
+
# This processes the tokens (between Begin Taxa; and End;) for a data
|
573
|
+
# block.
|
574
|
+
# Example of a currently parseable data block:
|
575
|
+
# Begin Data;
|
576
|
+
# Dimensions ntax=5 nchar=14;
|
577
|
+
# Format Datatype=RNA gap=# MISSING=x MatchChar=^;
|
578
|
+
# TaxLabels ciona cow [comment] ape 'purple urchin' "green lizard";
|
579
|
+
# Matrix
|
580
|
+
# taxon_1 A- CCGTCGA-GTTA
|
581
|
+
# taxon_2 T- CCG-CGA-GATA
|
582
|
+
# taxon_3 A- C-GTCGA-GATA
|
583
|
+
# taxon_4 A- CCTCGA--GTTA
|
584
|
+
# taxon_5 T- CGGTCGT-CTTA;
|
585
|
+
# End;
|
586
|
+
#
|
587
|
+
# ---
|
588
|
+
# *Arguments*:
|
589
|
+
# * (required) _token_: String
|
590
|
+
# * (required) _ary_: Array
|
591
|
+
def process_token_for_data_block( token, ary )
|
592
|
+
if ( equal?( token, DIMENSIONS ) )
|
593
|
+
@current_cmd = DIMENSIONS
|
594
|
+
@current_subcmd = nil
|
595
|
+
elsif ( equal?( token, FORMAT ) )
|
596
|
+
@current_cmd = FORMAT
|
597
|
+
@current_subcmd = nil
|
598
|
+
elsif ( equal?( token, TAXLABELS ) )
|
599
|
+
@current_cmd = TAXLABELS
|
600
|
+
@current_subcmd = nil
|
601
|
+
elsif ( equal?( token, MATRIX ) )
|
602
|
+
@current_cmd = MATRIX
|
603
|
+
@current_subcmd = nil
|
604
|
+
elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) )
|
605
|
+
@current_subcmd = NTAX
|
606
|
+
elsif ( @current_cmd == DIMENSIONS && equal?( token, NCHAR ) )
|
607
|
+
@current_subcmd = NCHAR
|
608
|
+
elsif ( @current_cmd == FORMAT && equal?( token, DATATYPE ) )
|
609
|
+
@current_subcmd = DATATYPE
|
610
|
+
elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MISSING ) )
|
611
|
+
@current_subcmd = CharactersBlock::MISSING
|
612
|
+
elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::GAP ) )
|
613
|
+
@current_subcmd = CharactersBlock::GAP
|
614
|
+
elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MATCHCHAR ) )
|
615
|
+
@current_subcmd = CharactersBlock::MATCHCHAR
|
616
|
+
elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) )
|
617
|
+
@current_block.set_number_of_taxa( token )
|
618
|
+
elsif ( cmds_equal_to?( DIMENSIONS, NCHAR ) )
|
619
|
+
@current_block.set_number_of_characters( token )
|
620
|
+
elsif ( cmds_equal_to?( FORMAT, DATATYPE ) )
|
621
|
+
@current_block.set_datatype( token )
|
622
|
+
elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MISSING ) )
|
623
|
+
@current_block.set_missing( token )
|
624
|
+
elsif ( cmds_equal_to?( FORMAT, CharactersBlock::GAP ) )
|
625
|
+
@current_block.set_gap_character( token )
|
626
|
+
elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MATCHCHAR ) )
|
627
|
+
@current_block.set_match_character( token )
|
628
|
+
elsif ( cmds_equal_to?( TAXLABELS, nil ) )
|
629
|
+
@current_block.add_taxon( token )
|
630
|
+
elsif ( cmds_equal_to?( MATRIX, nil ) )
|
631
|
+
@current_block.set_matrix( make_matrix( token, ary,
|
632
|
+
@current_block.get_number_of_characters, true ) )
|
633
|
+
end
|
634
|
+
end
|
635
|
+
|
636
|
+
# Makes a NexusMatrix out of token from token Array ary
|
637
|
+
# Used by process_token_for_X_block methods which contain
|
638
|
+
# data in a matrix form. Column 0 contains names.
|
639
|
+
# This will shift tokens from ary.
|
640
|
+
# ---
|
641
|
+
# *Arguments*:
|
642
|
+
# * (required) _token_: String
|
643
|
+
# * (required) _ary_: Array
|
644
|
+
# * (required) _size_: Integer
|
645
|
+
# * (optional) _scan_token_: true or false
|
646
|
+
# *Returns*:: NexusMatrix
|
647
|
+
def make_matrix( token, ary, size, scan_token = false )
|
648
|
+
matrix = NexusMatrix.new
|
649
|
+
col = -1
|
650
|
+
row = 0
|
651
|
+
done = false
|
652
|
+
while ( !done )
|
653
|
+
if ( col == -1 )
|
654
|
+
# name
|
655
|
+
col = 0
|
656
|
+
matrix.set_value( row, col, token ) # name is in col 0
|
657
|
+
else
|
658
|
+
# values
|
659
|
+
col = add_token_to_matrix( token, scan_token, matrix, row, col )
|
660
|
+
if ( col == size.to_i )
|
661
|
+
col = -1
|
662
|
+
row += 1
|
663
|
+
end
|
664
|
+
end
|
665
|
+
token = ary.shift
|
666
|
+
if ( token.index( DELIMITER ) != nil )
|
667
|
+
col = add_token_to_matrix( token.chomp( ";" ), scan_token, matrix, row, col )
|
668
|
+
done = true
|
669
|
+
end
|
670
|
+
end # while
|
671
|
+
matrix
|
672
|
+
end
|
673
|
+
|
674
|
+
# Helper method for make_matrix.
|
675
|
+
#
|
676
|
+
# ---
|
677
|
+
# *Arguments*:
|
678
|
+
# * (required) _token_: String
|
679
|
+
# * (required) _scan_token_: true or false - add whole token
|
680
|
+
# or
|
681
|
+
# scan into chars
|
682
|
+
# * (required) _matrix_: NexusMatrix - the matrix to which to add token
|
683
|
+
# * (required) _row_: Integer - the row for matrix
|
684
|
+
# * (required) _col_: Integer - the starting row
|
685
|
+
# *Returns*:: Integer - ending row
|
686
|
+
def add_token_to_matrix( token, scan_token, matrix, row, col )
|
687
|
+
if ( scan_token )
|
688
|
+
token.scan(/./) { |w|
|
689
|
+
col += 1
|
690
|
+
matrix.set_value( row, col, w )
|
691
|
+
}
|
692
|
+
else
|
693
|
+
col += 1
|
694
|
+
matrix.set_value( row, col, token )
|
695
|
+
end
|
696
|
+
col
|
697
|
+
end
|
698
|
+
|
699
|
+
# This processes the tokens (between Begin Taxa; and End;) for a block
|
700
|
+
# for which a specific parser is not available.
|
701
|
+
# Example of a currently parseable generic block:
|
702
|
+
# Begin Taxa;
|
703
|
+
# token1 token2 token3 ...
|
704
|
+
# End;
|
705
|
+
#
|
706
|
+
# ---
|
707
|
+
# *Arguments*:
|
708
|
+
# * (required) _token_: String
|
709
|
+
def process_token_for_generic_block( token )
|
710
|
+
@current_block.add_token( token )
|
711
|
+
end
|
712
|
+
|
713
|
+
# Returns true if Strings str1 and str2 are
|
714
|
+
# equal - ignoring case.
|
715
|
+
#
|
716
|
+
# ---
|
717
|
+
# *Arguments*:
|
718
|
+
# * (required) _str1_: String
|
719
|
+
# * (required) _str2_: String
|
720
|
+
# *Returns*:: true or false
|
721
|
+
def equal?( str1, str2 )
|
722
|
+
if ( str1 == nil || str2 == nil )
|
723
|
+
return false
|
724
|
+
else
|
725
|
+
return ( str1.downcase == str2.downcase )
|
726
|
+
end
|
727
|
+
end
|
728
|
+
|
729
|
+
# Returns true if @current_cmd == command
|
730
|
+
# and @current_subcmd == subcommand, false otherwise
|
731
|
+
# ---
|
732
|
+
# *Arguments*:
|
733
|
+
# * (required) _command_: String
|
734
|
+
# * (required) _subcommand_: String
|
735
|
+
# *Returns*:: true or false
|
736
|
+
def cmds_equal_to?( command, subcommand )
|
737
|
+
return ( @current_cmd == command && @current_subcmd == subcommand )
|
738
|
+
end
|
739
|
+
|
740
|
+
# Classes to represent nexus data follow.
|
741
|
+
|
742
|
+
# == DESCRIPTION
|
743
|
+
# Bio::Nexus::GenericBlock represents a generic nexus block.
|
744
|
+
# It is mainly intended to be extended into more specific classes,
|
745
|
+
# although it is used for blocks not represented by more specific
|
746
|
+
# block classes.
|
747
|
+
# It has a name and a array for the tokenized content of a
|
748
|
+
# nexus block.
|
749
|
+
#
|
750
|
+
# == USAGE
|
751
|
+
#
|
752
|
+
# require 'bio/db/nexus'
|
753
|
+
#
|
754
|
+
# # Create a new parser:
|
755
|
+
# nexus = Bio::Nexus.new( nexus_data_as_string )
|
756
|
+
#
|
757
|
+
# # Get blocks for which no class exists (private blocks)
|
758
|
+
# as Nexus::GenericBlock:
|
759
|
+
# private_blocks = nexus.get_blocks_by_name( "my_block" )
|
760
|
+
# # Get first block names "my_block":
|
761
|
+
# my_block_0 = private_blocks[ 0 ]
|
762
|
+
# # Get first token in first block names "my_block":
|
763
|
+
# first_token = my_block_0.get_tokens[ 0 ]
|
764
|
+
# # Get name of block (would return "my_block" in this case):
|
765
|
+
# name = my_block_0.get_name
|
766
|
+
# # Return data of block as nexus formatted String:
|
767
|
+
# name = my_block_0.to_nexus
|
768
|
+
#
|
769
|
+
class GenericBlock
|
770
|
+
|
771
|
+
# Creates a new GenericBlock object named 'name'.
|
772
|
+
# ---
|
773
|
+
# *Arguments*:
|
774
|
+
# * (required) _name_: String
|
775
|
+
def initialize( name )
|
776
|
+
@name = name.chomp(";")
|
777
|
+
@tokens = Array.new
|
778
|
+
end
|
779
|
+
|
780
|
+
# Gets the name of this block.
|
781
|
+
#
|
782
|
+
# ---
|
783
|
+
# *Returns*:: String
|
784
|
+
def get_name
|
785
|
+
@name
|
786
|
+
end
|
787
|
+
|
788
|
+
# Returns contents as Array of Strings.
|
789
|
+
#
|
790
|
+
# ---
|
791
|
+
# *Returns*:: Array
|
792
|
+
def get_tokens
|
793
|
+
@tokens
|
794
|
+
end
|
795
|
+
|
796
|
+
# Same as to_nexus.
|
797
|
+
#
|
798
|
+
# ---
|
799
|
+
# *Returns*:: String
|
800
|
+
def to_s
|
801
|
+
to_nexus
|
802
|
+
end
|
803
|
+
alias to_str to_s
|
804
|
+
|
805
|
+
# Should return a String describing this block as nexus formatted data.
|
806
|
+
# ---
|
807
|
+
# *Returns*:: String
|
808
|
+
def to_nexus
|
809
|
+
str = "generic block \"" + get_name + "\" [do not know how to write in nexus format]"
|
810
|
+
end
|
811
|
+
|
812
|
+
# Adds a token to this.
|
813
|
+
#
|
814
|
+
# ---
|
815
|
+
# *Arguments*:
|
816
|
+
# * (required) _token_: String
|
817
|
+
def add_token( token )
|
818
|
+
@tokens.push( token )
|
819
|
+
end
|
820
|
+
|
821
|
+
end # class GenericBlock
|
822
|
+
|
823
|
+
|
824
|
+
# == DESCRIPTION
|
825
|
+
# Bio::Nexus::TaxaBlock represents a taxa nexus block.
|
826
|
+
#
|
827
|
+
# = Example of Taxa block:
|
828
|
+
# Begin Taxa;
|
829
|
+
# Dimensions NTax=4;
|
830
|
+
# TaxLabels fish [comment] 'african frog' "rat snake" 'red mouse';
|
831
|
+
# End;
|
832
|
+
#
|
833
|
+
# == USAGE
|
834
|
+
#
|
835
|
+
# require 'bio/db/nexus'
|
836
|
+
#
|
837
|
+
# # Create a new parser:
|
838
|
+
# nexus = Bio::Nexus.new( nexus_data_as_string )
|
839
|
+
#
|
840
|
+
# # Get first taxa block:
|
841
|
+
# taxa_block = nexus.get_taxa_blocks[ 0 ]
|
842
|
+
# # Get number of taxa:
|
843
|
+
# number_of_taxa = taxa_block.get_number_of_taxa.to_i
|
844
|
+
# # Get name of first taxon:
|
845
|
+
# first_taxon = taxa_block.get_taxa[ 0 ]
|
846
|
+
#
|
847
|
+
class TaxaBlock < GenericBlock
|
848
|
+
|
849
|
+
# Creates a new TaxaBlock object named 'name'.
|
850
|
+
# ---
|
851
|
+
# *Arguments*:
|
852
|
+
# * (required) _name_: String
|
853
|
+
def initialize( name )
|
854
|
+
super( name )
|
855
|
+
@number_of_taxa = 0
|
856
|
+
@taxa = Array.new
|
857
|
+
end
|
858
|
+
|
859
|
+
# Returns a String describing this block as nexus formatted data.
|
860
|
+
# ---
|
861
|
+
# *Returns*:: String
|
862
|
+
def to_nexus
|
863
|
+
line_1 = String.new
|
864
|
+
line_1 << DIMENSIONS
|
865
|
+
if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) )
|
866
|
+
line_1 << " " << NTAX << "=" << get_number_of_taxa
|
867
|
+
end
|
868
|
+
line_1 << DELIMITER
|
869
|
+
line_2 = String.new
|
870
|
+
line_2 << TAXLABELS << " " << Nexus::Util::array_to_string( get_taxa ) << DELIMITER
|
871
|
+
Nexus::Util::to_nexus_helper( TAXA_BLOCK, [ line_1, line_2 ] )
|
872
|
+
end
|
873
|
+
|
874
|
+
# Gets the "number of taxa" property.
|
875
|
+
#
|
876
|
+
# ---
|
877
|
+
# *Returns*:: Integer
|
878
|
+
def get_number_of_taxa
|
879
|
+
@number_of_taxa
|
880
|
+
end
|
881
|
+
|
882
|
+
# Gets the taxa of this block.
|
883
|
+
#
|
884
|
+
# ---
|
885
|
+
# *Returns*:: Array
|
886
|
+
def get_taxa
|
887
|
+
@taxa
|
888
|
+
end
|
889
|
+
|
890
|
+
# Sets the "number of taxa" property.
|
891
|
+
#
|
892
|
+
# ---
|
893
|
+
# *Arguments*:
|
894
|
+
# * (required) _number_of_taxa_: Integer
|
895
|
+
def set_number_of_taxa( number_of_taxa )
|
896
|
+
@number_of_taxa = number_of_taxa
|
897
|
+
end
|
898
|
+
|
899
|
+
# Adds a taxon name to this block.
|
900
|
+
#
|
901
|
+
# ---
|
902
|
+
# *Arguments*:
|
903
|
+
# * (required) _taxon_: String
|
904
|
+
def add_taxon( taxon )
|
905
|
+
@taxa.push( taxon )
|
906
|
+
end
|
907
|
+
|
908
|
+
end # class TaxaBlock
|
909
|
+
|
910
|
+
|
911
|
+
# == DESCRIPTION
|
912
|
+
# Bio::Nexus::CharactersBlock represents a characters nexus block.
|
913
|
+
#
|
914
|
+
# = Example of Characters block:
|
915
|
+
# Begin Characters;
|
916
|
+
# Dimensions NChar=20
|
917
|
+
# NTax=4;
|
918
|
+
# Format DataType=DNA
|
919
|
+
# Missing=x
|
920
|
+
# Gap=- MatchChar=.;
|
921
|
+
# Matrix
|
922
|
+
# fish ACATA GAGGG TACCT CTAAG
|
923
|
+
# frog ACTTA GAGGC TACCT CTAGC
|
924
|
+
# snake ACTCA CTGGG TACCT TTGCG
|
925
|
+
# mouse ACTCA GACGG TACCT TTGCG;
|
926
|
+
# End;
|
927
|
+
#
|
928
|
+
#
|
929
|
+
# == USAGE
|
930
|
+
#
|
931
|
+
# require 'bio/db/nexus'
|
932
|
+
#
|
933
|
+
# # Create a new parser:
|
934
|
+
# nexus = Bio::Nexus.new( nexus_data_as_string )
|
935
|
+
#
|
936
|
+
#
|
937
|
+
# # Get first characters block (same methods as Nexus::DataBlock except
|
938
|
+
# # it lacks get_taxa method):
|
939
|
+
# characters_block = nexus.get_characters_blocks[ 0 ]
|
940
|
+
#
|
941
|
+
class CharactersBlock < GenericBlock
|
942
|
+
|
943
|
+
MISSING = "Missing"
|
944
|
+
GAP = "Gap"
|
945
|
+
MATCHCHAR = "MatchChar"
|
946
|
+
|
947
|
+
# Creates a new CharactersBlock object named 'name'.
|
948
|
+
# ---
|
949
|
+
# *Arguments*:
|
950
|
+
# * (required) _name_: String
|
951
|
+
def initialize( name )
|
952
|
+
super( name )
|
953
|
+
@number_of_taxa = 0
|
954
|
+
@number_of_characters = 0
|
955
|
+
@data_type = String.new
|
956
|
+
@gap_character = String.new
|
957
|
+
@missing = String.new
|
958
|
+
@match_character = String.new
|
959
|
+
@matrix = NexusMatrix.new
|
960
|
+
end
|
961
|
+
|
962
|
+
# Returns a String describing this block as nexus formatted data.
|
963
|
+
#
|
964
|
+
# ---
|
965
|
+
# *Returns*:: String
|
966
|
+
def to_nexus
|
967
|
+
line_1 = String.new
|
968
|
+
line_1 << DIMENSIONS
|
969
|
+
if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) )
|
970
|
+
line_1 << " " << NTAX << "=" << get_number_of_taxa
|
971
|
+
end
|
972
|
+
if ( Nexus::Util::larger_than_zero( get_number_of_characters ) )
|
973
|
+
line_1 << " " << NCHAR << "=" << get_number_of_characters
|
974
|
+
end
|
975
|
+
line_1 << DELIMITER
|
976
|
+
|
977
|
+
line_2 = String.new
|
978
|
+
line_2 << FORMAT
|
979
|
+
if ( Nexus::Util::longer_than_zero( get_datatype ) )
|
980
|
+
line_2 << " " << DATATYPE << "=" << get_datatype
|
981
|
+
end
|
982
|
+
if ( Nexus::Util::longer_than_zero( get_missing ) )
|
983
|
+
line_2 << " " << MISSING << "=" << get_missing
|
984
|
+
end
|
985
|
+
if ( Nexus::Util::longer_than_zero( get_gap_character ) )
|
986
|
+
line_2 << " " << GAP << "=" << get_gap_character
|
987
|
+
end
|
988
|
+
if ( Nexus::Util::longer_than_zero( get_match_character ) )
|
989
|
+
line_2 << " " << MATCHCHAR << "=" << get_match_character
|
990
|
+
end
|
991
|
+
line_2 << DELIMITER
|
992
|
+
|
993
|
+
line_3 = String.new
|
994
|
+
line_3 << MATRIX
|
995
|
+
Nexus::Util::to_nexus_helper( CHARACTERS_BLOCK, [ line_1, line_2, line_3 ] +
|
996
|
+
get_matrix.to_nexus_row_array )
|
997
|
+
end
|
998
|
+
|
999
|
+
# Gets the "number of taxa" property.
|
1000
|
+
#
|
1001
|
+
# ---
|
1002
|
+
# *Returns*:: Integer
|
1003
|
+
def get_number_of_taxa
|
1004
|
+
@number_of_taxa
|
1005
|
+
end
|
1006
|
+
|
1007
|
+
# Gets the "number of characters" property.
|
1008
|
+
#
|
1009
|
+
# ---
|
1010
|
+
# *Returns*:: Integer
|
1011
|
+
def get_number_of_characters
|
1012
|
+
@number_of_characters
|
1013
|
+
end
|
1014
|
+
|
1015
|
+
# Gets the "datatype" property.
|
1016
|
+
# ---
|
1017
|
+
# *Returns*:: String
|
1018
|
+
def get_datatype
|
1019
|
+
@data_type
|
1020
|
+
end
|
1021
|
+
|
1022
|
+
# Gets the "gap character" property.
|
1023
|
+
# ---
|
1024
|
+
# *Returns*:: String
|
1025
|
+
def get_gap_character
|
1026
|
+
@gap_character
|
1027
|
+
end
|
1028
|
+
|
1029
|
+
# Gets the "missing" property.
|
1030
|
+
# ---
|
1031
|
+
# *Returns*:: String
|
1032
|
+
def get_missing
|
1033
|
+
@missing
|
1034
|
+
end
|
1035
|
+
|
1036
|
+
# Gets the "match character" property.
|
1037
|
+
# ---
|
1038
|
+
# *Returns*:: String
|
1039
|
+
def get_match_character
|
1040
|
+
@match_character
|
1041
|
+
end
|
1042
|
+
|
1043
|
+
# Gets the matrix.
|
1044
|
+
# ---
|
1045
|
+
# *Returns*:: Bio::Nexus::NexusMatrix
|
1046
|
+
def get_matrix
|
1047
|
+
@matrix
|
1048
|
+
end
|
1049
|
+
|
1050
|
+
# Returns character data as Bio::Sequence object Array
|
1051
|
+
# for matrix rows named 'name'.
|
1052
|
+
# ---
|
1053
|
+
# *Arguments*:
|
1054
|
+
# * (required) _name_: String
|
1055
|
+
# *Returns*:: Bio::Sequence
|
1056
|
+
def get_sequences_by_name( name )
|
1057
|
+
seq_strs = get_characters_strings_by_name( name )
|
1058
|
+
seqs = Array.new
|
1059
|
+
seq_strs.each do | seq_str |
|
1060
|
+
seqs.push( create_sequence( seq_str, name ) )
|
1061
|
+
end
|
1062
|
+
seqs
|
1063
|
+
end
|
1064
|
+
|
1065
|
+
# Returns the characters in the matrix at row 'row' as
|
1066
|
+
# Bio::Sequence object. Column 0 of the matrix is set as
|
1067
|
+
# the definition of the Bio::Sequence object.
|
1068
|
+
# ---
|
1069
|
+
# *Arguments*:
|
1070
|
+
# * (required) _row_: Integer
|
1071
|
+
# *Returns*:: Bio::Sequence
|
1072
|
+
def get_sequence( row )
|
1073
|
+
create_sequence( get_characters_string( row ), get_row_name( row ) )
|
1074
|
+
end
|
1075
|
+
|
1076
|
+
# Returns the String in the matrix at row 'row' and column 0,
|
1077
|
+
# which usually is interpreted as a sequence name (if the matrix
|
1078
|
+
# contains molecular sequence characters).
|
1079
|
+
#
|
1080
|
+
# ---
|
1081
|
+
# *Arguments*:
|
1082
|
+
# * (required) _row_: Integer
|
1083
|
+
# *Returns*:: String
|
1084
|
+
def get_row_name( row )
|
1085
|
+
get_matrix.get_name( row )
|
1086
|
+
end
|
1087
|
+
|
1088
|
+
# Returns character data as String Array
|
1089
|
+
# for matrix rows named 'name'.
|
1090
|
+
#
|
1091
|
+
# ---
|
1092
|
+
# *Arguments*:
|
1093
|
+
# * (required) _name_: String
|
1094
|
+
# *Returns*:: Array of Strings
|
1095
|
+
def get_characters_strings_by_name( name )
|
1096
|
+
get_matrix.get_row_strings_by_name( name, "" )
|
1097
|
+
end
|
1098
|
+
|
1099
|
+
# Returns character data as String
|
1100
|
+
# for matrix row 'row'.
|
1101
|
+
#
|
1102
|
+
# ---
|
1103
|
+
# *Arguments*:
|
1104
|
+
# * (required) _row_: Integer
|
1105
|
+
# *Returns*:: String
|
1106
|
+
def get_characters_string( row )
|
1107
|
+
get_matrix.get_row_string( row, "" )
|
1108
|
+
end
|
1109
|
+
|
1110
|
+
# Sets the "number of taxa" property.
|
1111
|
+
# ---
|
1112
|
+
# *Arguments*:
|
1113
|
+
# * (required) _number_of_taxa_: Integer
|
1114
|
+
def set_number_of_taxa( number_of_taxa )
|
1115
|
+
@number_of_taxa = number_of_taxa
|
1116
|
+
end
|
1117
|
+
|
1118
|
+
# Sets the "number of characters" property.
|
1119
|
+
# ---
|
1120
|
+
# *Arguments*:
|
1121
|
+
# * (required) _number_of_characters_: Integer
|
1122
|
+
def set_number_of_characters( number_of_characters )
|
1123
|
+
@number_of_characters = number_of_characters
|
1124
|
+
end
|
1125
|
+
|
1126
|
+
# Sets the "data type" property.
|
1127
|
+
# ---
|
1128
|
+
# *Arguments*:
|
1129
|
+
# * (required) _data_type_: String
|
1130
|
+
def set_datatype( data_type )
|
1131
|
+
@data_type = data_type
|
1132
|
+
end
|
1133
|
+
|
1134
|
+
# Sets the "gap character" property.
|
1135
|
+
# ---
|
1136
|
+
# *Arguments*:
|
1137
|
+
# * (required) _gap_character_: String
|
1138
|
+
def set_gap_character( gap_character )
|
1139
|
+
@gap_character = gap_character
|
1140
|
+
end
|
1141
|
+
|
1142
|
+
# Sets the "missing" property.
|
1143
|
+
# ---
|
1144
|
+
# *Arguments*:
|
1145
|
+
# * (required) _missing_: String
|
1146
|
+
def set_missing( missing )
|
1147
|
+
@missing = missing
|
1148
|
+
end
|
1149
|
+
|
1150
|
+
# Sets the "match character" property.
|
1151
|
+
# ---
|
1152
|
+
# *Arguments*:
|
1153
|
+
# * (required) _match_character_: String
|
1154
|
+
def set_match_character( match_character )
|
1155
|
+
@match_character = match_character
|
1156
|
+
end
|
1157
|
+
|
1158
|
+
# Sets the matrix.
|
1159
|
+
# ---
|
1160
|
+
# *Arguments*:
|
1161
|
+
# * (required) _matrix_: Bio::Nexus::NexusMatrix
|
1162
|
+
def set_matrix( matrix )
|
1163
|
+
@matrix = matrix
|
1164
|
+
end
|
1165
|
+
|
1166
|
+
private
|
1167
|
+
|
1168
|
+
# Creates a Bio::Sequence object with sequence 'seq_str'
|
1169
|
+
# and definition 'definition'.
|
1170
|
+
# ---
|
1171
|
+
# *Arguments*:
|
1172
|
+
# * (required) _seq_str_: String
|
1173
|
+
# * (optional) _defintion_: String
|
1174
|
+
# *Returns*:: Bio::Sequence
|
1175
|
+
def create_sequence( seq_str, definition = "" )
|
1176
|
+
seq = Bio::Sequence.auto( seq_str )
|
1177
|
+
seq.definition = definition
|
1178
|
+
seq
|
1179
|
+
end
|
1180
|
+
|
1181
|
+
end # class CharactersBlock
|
1182
|
+
|
1183
|
+
|
1184
|
+
# == DESCRIPTION
|
1185
|
+
# Bio::Nexus::DataBlock represents a data nexus block.
|
1186
|
+
# A data block is a Bio::Nexus::CharactersBlock with the added
|
1187
|
+
# capability to store taxa names.
|
1188
|
+
#
|
1189
|
+
# = Example of Data block:
|
1190
|
+
# Begin Data;
|
1191
|
+
# Dimensions ntax=5 nchar=14;
|
1192
|
+
# Format Datatype=RNA gap=# MISSING=x MatchChar=^;
|
1193
|
+
# TaxLabels ciona cow [comment] ape 'purple urchin' "green lizard";
|
1194
|
+
# Matrix
|
1195
|
+
# taxon_1 A- CCGTCGA-GTTA
|
1196
|
+
# taxon_2 T- CCG-CGA-GATA
|
1197
|
+
# taxon_3 A- C-GTCGA-GATA
|
1198
|
+
# taxon_4 A- CCTCGA--GTTA
|
1199
|
+
# taxon_5 T- CGGTCGT-CTTA;
|
1200
|
+
# End;
|
1201
|
+
#
|
1202
|
+
#
|
1203
|
+
# == USAGE
|
1204
|
+
#
|
1205
|
+
# require 'bio/db/nexus'
|
1206
|
+
#
|
1207
|
+
# # Create a new parser:
|
1208
|
+
# nexus = Bio::Nexus.new( nexus_data_as_string )
|
1209
|
+
#
|
1210
|
+
#
|
1211
|
+
# # Get first data block:
|
1212
|
+
# data_block = nexus.get_data_blocks[ 0 ]
|
1213
|
+
# # Get first characters name:
|
1214
|
+
# seq_name = data_block.get_row_name( 0 )
|
1215
|
+
# # Get first characters row named "taxon_2" as Bio::Sequence sequence:
|
1216
|
+
# seq_tax_2 = data_block.get_sequences_by_name( "taxon_2" )[ 0 ]
|
1217
|
+
# # Get third characters row as Bio::Sequence sequence:
|
1218
|
+
# seq_2 = data_block.get_sequence( 2 )
|
1219
|
+
# # Get first characters row named "taxon_3" as String:
|
1220
|
+
# string_tax_3 = data_block.get_characters_strings_by_name( "taxon_3" )
|
1221
|
+
# # Get name of first taxon:
|
1222
|
+
# taxon_0 = data_block.get_taxa[ 0 ]
|
1223
|
+
# # Get characters matrix as Bio::Nexus::NexusMatrix (names are in column 0)
|
1224
|
+
# characters_matrix = data_block.get_matrix
|
1225
|
+
#
|
1226
|
+
class DataBlock < CharactersBlock
|
1227
|
+
|
1228
|
+
# Creates a new DataBlock object named 'name'.
|
1229
|
+
# ---
|
1230
|
+
# *Arguments*:
|
1231
|
+
# * (required) _name_: String
|
1232
|
+
def initialize( name )
|
1233
|
+
super( name )
|
1234
|
+
@taxa = Array.new
|
1235
|
+
end
|
1236
|
+
|
1237
|
+
# Returns a String describing this block as nexus formatted data.
|
1238
|
+
# ---
|
1239
|
+
# *Returns*:: String
|
1240
|
+
def to_nexus
|
1241
|
+
line_1 = String.new
|
1242
|
+
line_1 << DIMENSIONS
|
1243
|
+
if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) )
|
1244
|
+
line_1 << " " << NTAX << "=" << get_number_of_taxa
|
1245
|
+
end
|
1246
|
+
if ( Nexus::Util::larger_than_zero( get_number_of_characters ) )
|
1247
|
+
line_1 << " " << NCHAR << "=" << get_number_of_characters
|
1248
|
+
end
|
1249
|
+
line_1 << DELIMITER
|
1250
|
+
|
1251
|
+
line_2 = String.new
|
1252
|
+
line_2 << FORMAT
|
1253
|
+
if ( Nexus::Util::longer_than_zero( get_datatype ) )
|
1254
|
+
line_2 << " " << DATATYPE << "=" << get_datatype
|
1255
|
+
end
|
1256
|
+
if ( Nexus::Util::longer_than_zero( get_missing ) )
|
1257
|
+
line_2 << " " << MISSING << "=" << get_missing
|
1258
|
+
end
|
1259
|
+
if ( Nexus::Util::longer_than_zero( get_gap_character ) )
|
1260
|
+
line_2 << " " << GAP << "=" << get_gap_character
|
1261
|
+
end
|
1262
|
+
if ( Nexus::Util::longer_than_zero( get_match_character ) )
|
1263
|
+
line_2 << " " << MATCHCHAR << "=" << get_match_character
|
1264
|
+
end
|
1265
|
+
line_2 << DELIMITER
|
1266
|
+
|
1267
|
+
line_3 = String.new
|
1268
|
+
line_3 << TAXLABELS << " " << Nexus::Util::array_to_string( get_taxa )
|
1269
|
+
line_3 << DELIMITER
|
1270
|
+
|
1271
|
+
line_4 = String.new
|
1272
|
+
line_4 << MATRIX
|
1273
|
+
Nexus::Util::to_nexus_helper( DATA_BLOCK, [ line_1, line_2, line_3, line_4 ] +
|
1274
|
+
get_matrix.to_nexus_row_array )
|
1275
|
+
end
|
1276
|
+
|
1277
|
+
# Gets the taxa of this block.
|
1278
|
+
# ---
|
1279
|
+
# *Returns*:: Array
|
1280
|
+
def get_taxa
|
1281
|
+
@taxa
|
1282
|
+
end
|
1283
|
+
|
1284
|
+
# Adds a taxon name to this block.
|
1285
|
+
# ---
|
1286
|
+
# *Arguments*:
|
1287
|
+
# * (required) _taxon_: String
|
1288
|
+
def add_taxon( taxon )
|
1289
|
+
@taxa.push( taxon )
|
1290
|
+
end
|
1291
|
+
|
1292
|
+
end # class DataBlock
|
1293
|
+
|
1294
|
+
|
1295
|
+
# == DESCRIPTION
|
1296
|
+
# Bio::Nexus::DistancesBlock represents a distances nexus block.
|
1297
|
+
#
|
1298
|
+
# = Example of Distances block:
|
1299
|
+
# Begin Distances;
|
1300
|
+
# Dimensions nchar=20 ntax=5;
|
1301
|
+
# Format Triangle=Upper;
|
1302
|
+
# Matrix
|
1303
|
+
# taxon_1 0.0 1.0 2.0 4.0 7.0
|
1304
|
+
# taxon_2 1.0 0.0 3.0 5.0 8.0
|
1305
|
+
# taxon_3 3.0 4.0 0.0 6.0 9.0
|
1306
|
+
# taxon_4 7.0 3.0 1.0 0.0 9.5
|
1307
|
+
# taxon_5 1.2 1.3 1.4 1.5 0.0;
|
1308
|
+
# End;
|
1309
|
+
#
|
1310
|
+
#
|
1311
|
+
# == USAGE
|
1312
|
+
#
|
1313
|
+
# require 'bio/db/nexus'
|
1314
|
+
#
|
1315
|
+
# # Create a new parser:
|
1316
|
+
# nexus = Bio::Nexus.new( nexus_data_as_string )
|
1317
|
+
#
|
1318
|
+
# # Get distances block(s):
|
1319
|
+
# distances_blocks = nexus.get_distances_blocks
|
1320
|
+
# # Get matrix as Bio::Nexus::NexusMatrix object:
|
1321
|
+
# matrix = distances_blocks[ 0 ].get_matrix
|
1322
|
+
# # Get value (column 0 are names):
|
1323
|
+
# val = matrix.get_value( 1, 5 )
|
1324
|
+
#
|
1325
|
+
class DistancesBlock < GenericBlock
|
1326
|
+
TRIANGLE = "Triangle"
|
1327
|
+
|
1328
|
+
# Creates a new DistancesBlock object named 'name'.
|
1329
|
+
# ---
|
1330
|
+
# *Arguments*:
|
1331
|
+
# * (required) _name_: String
|
1332
|
+
def initialize( name )
|
1333
|
+
super( name )
|
1334
|
+
@number_of_taxa = 0
|
1335
|
+
@number_of_characters = 0
|
1336
|
+
@triangle = String.new
|
1337
|
+
@matrix = NexusMatrix.new
|
1338
|
+
end
|
1339
|
+
|
1340
|
+
# Returns a String describing this block as nexus formatted data.
|
1341
|
+
# ---
|
1342
|
+
# *Returns*:: String
|
1343
|
+
def to_nexus
|
1344
|
+
line_1 = String.new
|
1345
|
+
line_1 << DIMENSIONS
|
1346
|
+
if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) )
|
1347
|
+
line_1 << " " << NTAX << "=" << get_number_of_taxa
|
1348
|
+
end
|
1349
|
+
if ( Nexus::Util::larger_than_zero( get_number_of_characters ) )
|
1350
|
+
line_1 << " " << NCHAR << "=" << get_number_of_characters
|
1351
|
+
end
|
1352
|
+
line_1 << DELIMITER
|
1353
|
+
|
1354
|
+
line_2 = String.new
|
1355
|
+
line_2 << FORMAT
|
1356
|
+
if ( Nexus::Util::longer_than_zero( get_triangle ) )
|
1357
|
+
line_2 << " " << TRIANGLE << "=" << get_triangle
|
1358
|
+
end
|
1359
|
+
line_2 << DELIMITER
|
1360
|
+
|
1361
|
+
line_3 = String.new
|
1362
|
+
line_3 << MATRIX
|
1363
|
+
Nexus::Util::to_nexus_helper( DISTANCES_BLOCK, [ line_1, line_2, line_3 ] +
|
1364
|
+
get_matrix.to_nexus_row_array( " " ) )
|
1365
|
+
end
|
1366
|
+
|
1367
|
+
# Gets the "number of taxa" property.
|
1368
|
+
# ---
|
1369
|
+
# *Returns*:: Integer
|
1370
|
+
def get_number_of_taxa
|
1371
|
+
@number_of_taxa
|
1372
|
+
end
|
1373
|
+
|
1374
|
+
# Gets the "number of characters" property.
|
1375
|
+
# ---
|
1376
|
+
# *Returns*:: Integer
|
1377
|
+
def get_number_of_characters
|
1378
|
+
@number_of_characters
|
1379
|
+
end
|
1380
|
+
|
1381
|
+
# Gets the "triangle" property.
|
1382
|
+
# ---
|
1383
|
+
# *Returns*:: String
|
1384
|
+
def get_triangle
|
1385
|
+
@triangle
|
1386
|
+
end
|
1387
|
+
|
1388
|
+
# Gets the matrix.
|
1389
|
+
# ---
|
1390
|
+
# *Returns*:: Bio::Nexus::NexusMatrix
|
1391
|
+
def get_matrix
|
1392
|
+
@matrix
|
1393
|
+
end
|
1394
|
+
|
1395
|
+
# Sets the "number of taxa" property.
|
1396
|
+
# ---
|
1397
|
+
# *Arguments*:
|
1398
|
+
# * (required) _number_of_taxa_: Integer
|
1399
|
+
def set_number_of_taxa( number_of_taxa )
|
1400
|
+
@number_of_taxa = number_of_taxa
|
1401
|
+
end
|
1402
|
+
|
1403
|
+
# Sets the "number of characters" property.
|
1404
|
+
# ---
|
1405
|
+
# *Arguments*:
|
1406
|
+
# * (required) _number_of_characters_: Integer
|
1407
|
+
def set_number_of_characters( number_of_characters )
|
1408
|
+
@number_of_characters = number_of_characters
|
1409
|
+
end
|
1410
|
+
|
1411
|
+
# Sets the "triangle" property.
|
1412
|
+
# ---
|
1413
|
+
# *Arguments*:
|
1414
|
+
# * (required) _triangle_: String
|
1415
|
+
def set_triangle( triangle )
|
1416
|
+
@triangle = triangle
|
1417
|
+
end
|
1418
|
+
|
1419
|
+
# Sets the matrix.
|
1420
|
+
# ---
|
1421
|
+
# *Arguments*:
|
1422
|
+
# * (required) _matrix_: Bio::Nexus::NexusMatrix
|
1423
|
+
def set_matrix( matrix )
|
1424
|
+
@matrix = matrix
|
1425
|
+
end
|
1426
|
+
|
1427
|
+
end # class DistancesBlock
|
1428
|
+
|
1429
|
+
|
1430
|
+
# == DESCRIPTION
|
1431
|
+
# Bio::Nexus::TreesBlock represents a trees nexus block.
|
1432
|
+
#
|
1433
|
+
# = Example of Trees block:
|
1434
|
+
# Begin Trees;
|
1435
|
+
# Tree best=(fish,(frog,(snake, mouse)));
|
1436
|
+
# Tree other=(snake,(frog,( fish, mouse)));
|
1437
|
+
# End;
|
1438
|
+
#
|
1439
|
+
#
|
1440
|
+
# == USAGE
|
1441
|
+
#
|
1442
|
+
# require 'bio/db/nexus'
|
1443
|
+
#
|
1444
|
+
# # Create a new parser:
|
1445
|
+
# nexus = Bio::Nexus.new( nexus_data_as_string )
|
1446
|
+
#
|
1447
|
+
# Get trees block(s):
|
1448
|
+
# trees_block = nexus.get_trees_blocks[ 0 ]
|
1449
|
+
# # Get first tree named "best" as String:
|
1450
|
+
# string_fish = trees_block.get_tree_strings_by_name( "best" )[ 0 ]
|
1451
|
+
# # Get first tree named "best" as Bio::Db::Newick object:
|
1452
|
+
# tree_fish = trees_block.get_trees_by_name( "best" )[ 0 ]
|
1453
|
+
# # Get first tree as Bio::Db::Newick object:
|
1454
|
+
# tree_first = trees_block.get_tree( 0 )
|
1455
|
+
#
|
1456
|
+
class TreesBlock < GenericBlock
|
1457
|
+
TREE = "Tree"
|
1458
|
+
def initialize( name )
|
1459
|
+
super( name )
|
1460
|
+
@trees = Array.new
|
1461
|
+
@tree_names = Array.new
|
1462
|
+
end
|
1463
|
+
|
1464
|
+
# Returns a String describing this block as nexus formatted data.
|
1465
|
+
# ---
|
1466
|
+
# *Returns*:: String
|
1467
|
+
def to_nexus
|
1468
|
+
trees_ary = Array.new
|
1469
|
+
for i in 0 .. @trees.length - 1
|
1470
|
+
trees_ary.push( TREE + " " + @tree_names[ i ] + "=" + @trees[ i ] )
|
1471
|
+
end
|
1472
|
+
Nexus::Util::to_nexus_helper( TREES_BLOCK, trees_ary )
|
1473
|
+
end
|
1474
|
+
|
1475
|
+
# Returns an array of strings describing trees
|
1476
|
+
# ---
|
1477
|
+
# *Returns*:: Array
|
1478
|
+
def get_tree_strings
|
1479
|
+
@trees
|
1480
|
+
end
|
1481
|
+
|
1482
|
+
# Returns an array of tree names.
|
1483
|
+
# ---
|
1484
|
+
# *Returns*:: Array
|
1485
|
+
def get_tree_names
|
1486
|
+
@tree_names
|
1487
|
+
end
|
1488
|
+
|
1489
|
+
# Returns an array of strings describing trees
|
1490
|
+
# for which name matches the tree name.
|
1491
|
+
# ---
|
1492
|
+
# *Arguments*:
|
1493
|
+
# * (required) _name_: String
|
1494
|
+
# *Returns*:: Array
|
1495
|
+
def get_tree_strings_by_name( name )
|
1496
|
+
found_trees = Array.new
|
1497
|
+
i = 0
|
1498
|
+
@tree_names.each do | n |
|
1499
|
+
if ( n == name )
|
1500
|
+
found_trees.push( @trees[ i ] )
|
1501
|
+
end
|
1502
|
+
i += 1
|
1503
|
+
end
|
1504
|
+
found_trees
|
1505
|
+
end
|
1506
|
+
|
1507
|
+
# Returns tree i (same order as in nexus data) as
|
1508
|
+
# newick parsed tree object.
|
1509
|
+
# ---
|
1510
|
+
# *Arguments*:
|
1511
|
+
# * (required) _i_: Integer
|
1512
|
+
# *Returns*:: Bio::Newick
|
1513
|
+
def get_tree( i )
|
1514
|
+
newick = Bio::Newick.new( @trees[ i ] )
|
1515
|
+
tree = newick.tree
|
1516
|
+
tree
|
1517
|
+
end
|
1518
|
+
|
1519
|
+
# Returns an array of newick parsed tree objects
|
1520
|
+
# for which name matches the tree name.
|
1521
|
+
# ---
|
1522
|
+
# *Arguments*:
|
1523
|
+
# * (required) _name_: String
|
1524
|
+
# *Returns*:: Array of Bio::Newick
|
1525
|
+
def get_trees_by_name( name )
|
1526
|
+
found_trees = Array.new
|
1527
|
+
i = 0
|
1528
|
+
@tree_names.each do | n |
|
1529
|
+
if ( n == name )
|
1530
|
+
found_trees.push( get_tree( i ) )
|
1531
|
+
end
|
1532
|
+
i += 1
|
1533
|
+
end
|
1534
|
+
found_trees
|
1535
|
+
end
|
1536
|
+
|
1537
|
+
# Adds a tree name to this block.
|
1538
|
+
# ---
|
1539
|
+
# *Arguments*:
|
1540
|
+
# * (required) _tree_name_: String
|
1541
|
+
def add_tree_name( tree_name )
|
1542
|
+
@tree_names.push( tree_name )
|
1543
|
+
end
|
1544
|
+
|
1545
|
+
# Adds a tree to this block.
|
1546
|
+
# ---
|
1547
|
+
# *Arguments*:
|
1548
|
+
# * (required) _tree_as_string_: String
|
1549
|
+
def add_tree( tree_as_string )
|
1550
|
+
@trees.push( tree_as_string )
|
1551
|
+
end
|
1552
|
+
|
1553
|
+
end # class TreesBlock
|
1554
|
+
|
1555
|
+
|
1556
|
+
# == DESCRIPTION
|
1557
|
+
# Bio::Nexus::NexusMatrix represents a characters or distance matrix,
|
1558
|
+
# where the names are stored in column zero.
|
1559
|
+
#
|
1560
|
+
#
|
1561
|
+
# == USAGE
|
1562
|
+
#
|
1563
|
+
# require 'bio/db/nexus'
|
1564
|
+
#
|
1565
|
+
# # Create a new parser:
|
1566
|
+
# nexus = Bio::Nexus.new( nexus_data_as_string )
|
1567
|
+
# # Get distances block(s):
|
1568
|
+
# distances_block = nexus.get_distances_blocks[ 0 ]
|
1569
|
+
# # Get matrix as Bio::Nexus::NexusMatrix object:
|
1570
|
+
# matrix = distances_blocks.get_matrix
|
1571
|
+
# # Get value (column 0 are names):
|
1572
|
+
# val = matrix.get_value( 1, 5 )
|
1573
|
+
# # Return first row as String (all columns except column 0),
|
1574
|
+
# # values are separated by "_":
|
1575
|
+
# row_str_0 = matrix.get_row_string( 0, "_" )
|
1576
|
+
# # Return all rows named "ciona" as String (all columns except column 0),
|
1577
|
+
# # values are separated by "+":
|
1578
|
+
# ciona_rows = matrix.get_row_strings_by_name( "ciona", "+" )
|
1579
|
+
class NexusMatrix
|
1580
|
+
|
1581
|
+
# Nexus matrix error class.
|
1582
|
+
class NexusMatrixError < RuntimeError; end
|
1583
|
+
|
1584
|
+
# Creates new NexusMatrix.
|
1585
|
+
def initialize()
|
1586
|
+
@rows = Hash.new
|
1587
|
+
@max_row = -1
|
1588
|
+
@max_col = -1
|
1589
|
+
end
|
1590
|
+
|
1591
|
+
# Sets the value at row 'row' and column 'col' to 'value'.
|
1592
|
+
# ---
|
1593
|
+
# *Arguments*:
|
1594
|
+
# * (required) _row_: Integer
|
1595
|
+
# * (required) _col_: Integer
|
1596
|
+
# * (required) _value_: Object
|
1597
|
+
def set_value( row, col, value )
|
1598
|
+
if ( ( row < 0 ) || ( col < 0 ) )
|
1599
|
+
raise( NexusTableError, "attempt to use negative values for row or column" )
|
1600
|
+
end
|
1601
|
+
if ( row > get_max_row() )
|
1602
|
+
set_max_row( row )
|
1603
|
+
end
|
1604
|
+
if ( col > get_max_col() )
|
1605
|
+
set_max_col( col )
|
1606
|
+
end
|
1607
|
+
row_map = nil
|
1608
|
+
if ( @rows.has_key?( row ) )
|
1609
|
+
row_map = @rows[ row ]
|
1610
|
+
else
|
1611
|
+
row_map = Hash.new
|
1612
|
+
@rows[ row ] = row_map
|
1613
|
+
end
|
1614
|
+
row_map[ col ] = value
|
1615
|
+
end
|
1616
|
+
|
1617
|
+
# Returns the value at row 'row' and column 'col'.
|
1618
|
+
# ---
|
1619
|
+
# *Arguments*:
|
1620
|
+
# * (required) _row_: Integer
|
1621
|
+
# * (required) _col_: Integer
|
1622
|
+
# *Returns*:: Object
|
1623
|
+
def get_value( row, col )
|
1624
|
+
if ( ( row > get_max_row() ) || ( row < 0 ) )
|
1625
|
+
raise( NexusMatrixError, "value for row (" + row.to_s +
|
1626
|
+
") is out of range [max row: " + get_max_row().to_s + "]" )
|
1627
|
+
elsif ( ( col > get_max_col() ) || ( row < 0 ) )
|
1628
|
+
raise( NexusMatrixError, "value for column (" + col.to_s +
|
1629
|
+
") is out of range [max column: " + get_max_col().to_s + "]" )
|
1630
|
+
end
|
1631
|
+
r = @rows[ row ]
|
1632
|
+
if ( ( r == nil ) || ( r.length < 1 ) )
|
1633
|
+
return nil
|
1634
|
+
end
|
1635
|
+
r[ col ]
|
1636
|
+
end
|
1637
|
+
|
1638
|
+
# Returns the maximal columns number.
|
1639
|
+
# ---
|
1640
|
+
# *Returns*:: Integer
|
1641
|
+
def get_max_col
|
1642
|
+
return @max_col
|
1643
|
+
end
|
1644
|
+
|
1645
|
+
# Returns the maximal row number.
|
1646
|
+
# ---
|
1647
|
+
# *Returns*:: Integer
|
1648
|
+
def get_max_row
|
1649
|
+
return @max_row
|
1650
|
+
end
|
1651
|
+
|
1652
|
+
# Returns true of matrix is empty.
|
1653
|
+
#
|
1654
|
+
# ---
|
1655
|
+
# *Returns*:: true or false
|
1656
|
+
def is_empty?
|
1657
|
+
return get_max_col < 0 || get_max_row < 0
|
1658
|
+
end
|
1659
|
+
|
1660
|
+
# Convenience method which return the value of
|
1661
|
+
# column 0 and row 'row' which is usually the name.
|
1662
|
+
#
|
1663
|
+
# ---
|
1664
|
+
# *Arguments*:
|
1665
|
+
# * (required) _row_: Integer
|
1666
|
+
# *Returns*:: String
|
1667
|
+
def get_name( row )
|
1668
|
+
get_value( row, 0 ).to_s
|
1669
|
+
end
|
1670
|
+
|
1671
|
+
# Returns the values of columns 1 to maximal column length
|
1672
|
+
# in row 'row' concatenated as string. Individual values can be
|
1673
|
+
# separated by 'spacer'.
|
1674
|
+
#
|
1675
|
+
# ---
|
1676
|
+
# *Arguments*:
|
1677
|
+
# * (required) _row_: Integer
|
1678
|
+
# * (optional) _spacer_: String
|
1679
|
+
# *Returns*:: String
|
1680
|
+
def get_row_string( row, spacer = "" )
|
1681
|
+
row_str = String.new
|
1682
|
+
if is_empty?
|
1683
|
+
return row_str
|
1684
|
+
end
|
1685
|
+
for col in 1 .. get_max_col
|
1686
|
+
row_str << get_value( row, col ) << spacer
|
1687
|
+
end
|
1688
|
+
row_str
|
1689
|
+
end
|
1690
|
+
|
1691
|
+
# Returns all rows as Array of Strings separated by 'spacer'
|
1692
|
+
# for which column 0 is 'name'.
|
1693
|
+
# ---
|
1694
|
+
# *Arguments*:
|
1695
|
+
# * (required) _name_: String
|
1696
|
+
# * (optional) _spacer_: String
|
1697
|
+
# *Returns*:: Array
|
1698
|
+
def get_row_strings_by_name( name, spacer = "" )
|
1699
|
+
row_strs = Array.new
|
1700
|
+
if is_empty?
|
1701
|
+
return row_strs
|
1702
|
+
end
|
1703
|
+
for row in 0 .. get_max_row
|
1704
|
+
if ( get_value( row, 0 ) == name )
|
1705
|
+
row_strs.push( get_row_string( row, spacer ) )
|
1706
|
+
end
|
1707
|
+
end
|
1708
|
+
row_strs
|
1709
|
+
end
|
1710
|
+
|
1711
|
+
# Returns matrix as String, returns "empty" if empty.
|
1712
|
+
# ---
|
1713
|
+
# *Returns*:: String
|
1714
|
+
def to_s
|
1715
|
+
if is_empty?
|
1716
|
+
return "empty"
|
1717
|
+
end
|
1718
|
+
str = String.new
|
1719
|
+
row_array = to_nexus_row_array( spacer = " ", false )
|
1720
|
+
row_array.each do | row |
|
1721
|
+
str << row << END_OF_LINE
|
1722
|
+
end
|
1723
|
+
str
|
1724
|
+
end
|
1725
|
+
alias to_str to_s
|
1726
|
+
|
1727
|
+
# Helper method to produce nexus formatted data.
|
1728
|
+
# ---
|
1729
|
+
# *Arguments*:
|
1730
|
+
# * (optional) _spacer_: String
|
1731
|
+
# * (optional) _append_delimiter_: true or false
|
1732
|
+
# *Returns*:: Array
|
1733
|
+
def to_nexus_row_array( spacer = "", append_delimiter = true )
|
1734
|
+
ary = Array.new
|
1735
|
+
if is_empty?
|
1736
|
+
return ary
|
1737
|
+
end
|
1738
|
+
max_length = 10
|
1739
|
+
for row in 0 .. get_max_row
|
1740
|
+
l = get_value( row, 0 ).length
|
1741
|
+
if ( l > max_length )
|
1742
|
+
max_length = l
|
1743
|
+
end
|
1744
|
+
end
|
1745
|
+
for row in 0 .. get_max_row
|
1746
|
+
row_str = String.new
|
1747
|
+
ary.push( row_str )
|
1748
|
+
name = get_value( row, 0 )
|
1749
|
+
name = name.ljust( max_length + 1 )
|
1750
|
+
row_str << name << " " << get_row_string( row, spacer )
|
1751
|
+
if ( spacer != nil && spacer.length > 0 )
|
1752
|
+
row_str.chomp!( spacer )
|
1753
|
+
end
|
1754
|
+
if ( append_delimiter && row == get_max_row )
|
1755
|
+
row_str << DELIMITER
|
1756
|
+
end
|
1757
|
+
end
|
1758
|
+
ary
|
1759
|
+
end
|
1760
|
+
|
1761
|
+
|
1762
|
+
private
|
1763
|
+
|
1764
|
+
# Returns row data as Array.
|
1765
|
+
# ---
|
1766
|
+
# *Arguments*:
|
1767
|
+
# * (required) _row_: Integer
|
1768
|
+
# *Returns*:: Array
|
1769
|
+
def get_row( row )
|
1770
|
+
return @rows[ row ]
|
1771
|
+
end
|
1772
|
+
|
1773
|
+
# Sets maximal column number.
|
1774
|
+
# ---
|
1775
|
+
# *Arguments*:
|
1776
|
+
# * (required) _max_col_: Integer
|
1777
|
+
def set_max_col( max_col )
|
1778
|
+
@max_col = max_col
|
1779
|
+
end
|
1780
|
+
|
1781
|
+
# Sets maximal row number.
|
1782
|
+
# ---
|
1783
|
+
# *Arguments*:
|
1784
|
+
# * (required) _max_row_: Integer
|
1785
|
+
def set_max_row( max_row )
|
1786
|
+
@max_row = max_row
|
1787
|
+
end
|
1788
|
+
|
1789
|
+
end # NexusMatrix
|
1790
|
+
|
1791
|
+
# End of classes to represent nexus data.
|
1792
|
+
|
1793
|
+
# = DESCRIPTION
|
1794
|
+
# Bio::Nexus::Util is a class containing static helper methods
|
1795
|
+
#
|
1796
|
+
class Util
|
1797
|
+
|
1798
|
+
# Helper method to produce nexus formatted data.
|
1799
|
+
# ---
|
1800
|
+
# *Arguments*:
|
1801
|
+
# * (required) _block_: Nexus:GenericBlock or its subclasses
|
1802
|
+
# * (required) _block_: Array
|
1803
|
+
# *Returns*:: String
|
1804
|
+
def Util::to_nexus_helper( block, lines )
|
1805
|
+
str = String.new
|
1806
|
+
str << BEGIN_BLOCK << " " << block << END_OF_LINE
|
1807
|
+
lines.each do | line |
|
1808
|
+
if ( line != nil )
|
1809
|
+
str << INDENTENTION << line << END_OF_LINE
|
1810
|
+
end
|
1811
|
+
end # do
|
1812
|
+
str << END_BLOCK << END_OF_LINE
|
1813
|
+
str
|
1814
|
+
end
|
1815
|
+
|
1816
|
+
# Returns string as array separated by " ".
|
1817
|
+
# ---
|
1818
|
+
# *Arguments*:
|
1819
|
+
# * (required) _ary_: Array
|
1820
|
+
# *Returns*:: String
|
1821
|
+
def Util::array_to_string( ary )
|
1822
|
+
str = String.new
|
1823
|
+
ary.each do | e |
|
1824
|
+
str << e << " "
|
1825
|
+
end
|
1826
|
+
str.chomp!( " " )
|
1827
|
+
str
|
1828
|
+
end
|
1829
|
+
|
1830
|
+
# Returns true if Integer i is not nil and larger than 0.
|
1831
|
+
# ---
|
1832
|
+
# *Arguments*:
|
1833
|
+
# * (required) _i_: Integer
|
1834
|
+
# *Returns*:: true or false
|
1835
|
+
def Util::larger_than_zero( i )
|
1836
|
+
return ( i != nil && i.to_i > 0 )
|
1837
|
+
end
|
1838
|
+
|
1839
|
+
# Returns true if String str is not nil and longer than 0.
|
1840
|
+
# ---
|
1841
|
+
# *Arguments*:
|
1842
|
+
# * (required) _str_: String
|
1843
|
+
# *Returns*:: true or false
|
1844
|
+
def Util::longer_than_zero( str )
|
1845
|
+
return ( str != nil && str.length > 0 )
|
1846
|
+
end
|
1847
|
+
|
1848
|
+
end # class Util
|
1849
|
+
|
1850
|
+
end # class Nexus
|
1851
|
+
|
1852
|
+
end #module Bio
|
1853
|
+
|
1854
|
+
|