biotite 1.5.0__cp314-cp314-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-314-darwin.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-314-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-314-darwin.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-314-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-314-darwin.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-314-darwin.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-314-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-314-darwin.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-314-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-314-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-314-darwin.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-314-darwin.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-314-darwin.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-314-darwin.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-314-darwin.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-314-darwin.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-314-darwin.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-314-darwin.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-314-darwin.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-314-darwin.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-314-darwin.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,1169 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.sequence.phylo"
|
|
6
|
+
__author__ = "Patrick Kunzmann, Tom David Müller"
|
|
7
|
+
__all__ = ["Tree", "TreeNode", "as_binary", "TreeError"]
|
|
8
|
+
|
|
9
|
+
cimport cython
|
|
10
|
+
cimport numpy as np
|
|
11
|
+
|
|
12
|
+
import copy
|
|
13
|
+
import numpy as np
|
|
14
|
+
import networkx as nx
|
|
15
|
+
from ...file import InvalidFileError
|
|
16
|
+
from ...copyable import Copyable
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Tree(Copyable):
|
|
20
|
+
"""
|
|
21
|
+
__init__(root)
|
|
22
|
+
|
|
23
|
+
A :class:`Tree` represents a rooted tree
|
|
24
|
+
(e.g. alignment guide tree or phylogenetic tree).
|
|
25
|
+
|
|
26
|
+
The tree itself wraps a *root* :class:`TreeNode` object,
|
|
27
|
+
accessible via the :attr:`root` property.
|
|
28
|
+
|
|
29
|
+
A :class:`Tree` is not a container itself:
|
|
30
|
+
Objects, e.g species names or sequences, that are represented by the
|
|
31
|
+
nodes, cannot be stored directly in a :class:`Tree` or its nodes.
|
|
32
|
+
Instead, each leaf :class:`TreeNode` has a reference index:
|
|
33
|
+
These indices refer to a separate list or array, containing the
|
|
34
|
+
actual reference objects.
|
|
35
|
+
|
|
36
|
+
The property :attr:`leaves` contains a list of the leaf nodes,
|
|
37
|
+
where the index of the leaf node in this list is equal to the
|
|
38
|
+
reference index of the leaf node (``leaf.index``).
|
|
39
|
+
|
|
40
|
+
The amount of leaves in a tree can be determined via the
|
|
41
|
+
:func:`len()` function.
|
|
42
|
+
|
|
43
|
+
Objects of this class are immutable.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
root: TreeNode
|
|
48
|
+
The root of the tree.
|
|
49
|
+
The constructor calls the node's :func:`as_root()` method,
|
|
50
|
+
in order to make it immutable.
|
|
51
|
+
|
|
52
|
+
Attributes
|
|
53
|
+
----------
|
|
54
|
+
root : TreeNode
|
|
55
|
+
The root node of the tree.
|
|
56
|
+
leaves : list of TreeNode
|
|
57
|
+
The leaf nodes of the tree.
|
|
58
|
+
The index of the leaf node in this list is equal to the
|
|
59
|
+
reference index of the leaf node.
|
|
60
|
+
This attribute is a shallow copy of the repsective internal
|
|
61
|
+
object.
|
|
62
|
+
|
|
63
|
+
Examples
|
|
64
|
+
--------
|
|
65
|
+
|
|
66
|
+
>>> objects = ["An object", "Another object", "Yet another one"]
|
|
67
|
+
>>> leaf1 = TreeNode(index=0)
|
|
68
|
+
>>> leaf2 = TreeNode(index=1)
|
|
69
|
+
>>> leaf3 = TreeNode(index=2)
|
|
70
|
+
>>> inter = TreeNode([leaf1, leaf2], [5.0, 7.0])
|
|
71
|
+
>>> root = TreeNode([inter, leaf3], [3.0, 10.0])
|
|
72
|
+
>>> tree = Tree(root)
|
|
73
|
+
>>> print(tree)
|
|
74
|
+
((0:5.0,1:7.0):3.0,2:10.0):0.0;
|
|
75
|
+
>>> print([objects[node.index] for node in tree.leaves])
|
|
76
|
+
['An object', 'Another object', 'Yet another one']
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
def __init__(self, TreeNode root not None):
|
|
80
|
+
root.as_root()
|
|
81
|
+
self._root = root
|
|
82
|
+
|
|
83
|
+
cdef list leaves_unsorted = self._root.get_leaves()
|
|
84
|
+
cdef int leaf_count = len(leaves_unsorted)
|
|
85
|
+
cdef np.ndarray indices = np.array(
|
|
86
|
+
[leaf.index for leaf in leaves_unsorted]
|
|
87
|
+
)
|
|
88
|
+
self._leaves = [None] * leaf_count
|
|
89
|
+
cdef int i
|
|
90
|
+
cdef int index
|
|
91
|
+
for i in range(len(indices)):
|
|
92
|
+
index = indices[i]
|
|
93
|
+
if index >= leaf_count or index < 0:
|
|
94
|
+
raise TreeError("The tree's indices are out of range")
|
|
95
|
+
self._leaves[index] = leaves_unsorted[i]
|
|
96
|
+
|
|
97
|
+
def __copy_create__(self):
|
|
98
|
+
return Tree(self._root.copy())
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def root(self):
|
|
102
|
+
return self._root
|
|
103
|
+
|
|
104
|
+
@property
|
|
105
|
+
def leaves(self):
|
|
106
|
+
return copy.copy(self._leaves)
|
|
107
|
+
|
|
108
|
+
def as_graph(self):
|
|
109
|
+
"""
|
|
110
|
+
as_graph()
|
|
111
|
+
|
|
112
|
+
Obtain a graph representation of the :class:`Tree`.
|
|
113
|
+
|
|
114
|
+
Returns
|
|
115
|
+
-------
|
|
116
|
+
bond_set : DiGraph
|
|
117
|
+
A *NetworkX* directed graph.
|
|
118
|
+
For a leaf node the graph node is its reference index.
|
|
119
|
+
For an intermediate and root node the graph node is a tuple
|
|
120
|
+
containing it children nodes.
|
|
121
|
+
Each edge has a ``"distance"`` attribute depicting the
|
|
122
|
+
distance between the nodes.
|
|
123
|
+
Each edge starts from the parent ends at its child.
|
|
124
|
+
|
|
125
|
+
Examples
|
|
126
|
+
--------
|
|
127
|
+
|
|
128
|
+
>>> leaves = [TreeNode(index=i) for i in range(3)]
|
|
129
|
+
>>> intermediate = TreeNode([leaves[0], leaves[1]], [2.0, 3.0])
|
|
130
|
+
>>> root = TreeNode([intermediate, leaves[2]], [1.0, 5.0])
|
|
131
|
+
>>> tree = Tree(root)
|
|
132
|
+
>>> graph = tree.as_graph()
|
|
133
|
+
>>> for node_i, node_j in graph.edges:
|
|
134
|
+
... print(f"{str(node_i):12} -> {str(node_j):12}")
|
|
135
|
+
(0, 1) -> 0
|
|
136
|
+
(0, 1) -> 1
|
|
137
|
+
((0, 1), 2) -> (0, 1)
|
|
138
|
+
((0, 1), 2) -> 2
|
|
139
|
+
"""
|
|
140
|
+
cdef tuple children
|
|
141
|
+
cdef bint children_already_handled
|
|
142
|
+
cdef TreeNode node, child, parent
|
|
143
|
+
|
|
144
|
+
graph = nx.DiGraph()
|
|
145
|
+
|
|
146
|
+
# This dict maps a TreeNode to its corresponding int or tuple
|
|
147
|
+
cdef dict node_repr = {}
|
|
148
|
+
|
|
149
|
+
# A First-In-First-Out queue for iterative handling of each node
|
|
150
|
+
# Starting with all leaf nodes
|
|
151
|
+
cdef list queue = copy.copy(self._leaves)
|
|
152
|
+
# A set representation of the same queue for efficient
|
|
153
|
+
# '__contains__()' operation
|
|
154
|
+
cdef set queue_set = set(self._leaves)
|
|
155
|
+
while len(queue) > 0:
|
|
156
|
+
node = queue.pop(0)
|
|
157
|
+
|
|
158
|
+
if node.is_leaf():
|
|
159
|
+
node_repr[node] = node.index
|
|
160
|
+
else:
|
|
161
|
+
children = node.children
|
|
162
|
+
children_handled = True
|
|
163
|
+
for child in children:
|
|
164
|
+
if child not in node_repr:
|
|
165
|
+
children_handled = False
|
|
166
|
+
# If the node representation of any child of this node
|
|
167
|
+
# is not calculated yet, put this node to the end of the
|
|
168
|
+
# queue and handle it later
|
|
169
|
+
if not children_handled:
|
|
170
|
+
queue.append(node)
|
|
171
|
+
continue
|
|
172
|
+
else:
|
|
173
|
+
repr = tuple(node_repr[child] for child in children)
|
|
174
|
+
node_repr[node] = repr
|
|
175
|
+
# Add adges to children in graph
|
|
176
|
+
for child in children:
|
|
177
|
+
graph.add_edge(
|
|
178
|
+
repr, node_repr[child], distance=child.distance
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# This leads finally to termination of the loop:
|
|
182
|
+
# When the root node is handled the last element in the
|
|
183
|
+
# queue is handled and no new node is added to the queue
|
|
184
|
+
if not node.is_root():
|
|
185
|
+
parent = node.parent
|
|
186
|
+
# The parent node might be already in the queue from
|
|
187
|
+
# handling another child node
|
|
188
|
+
if parent not in queue_set:
|
|
189
|
+
queue.append(parent)
|
|
190
|
+
queue_set.add(parent)
|
|
191
|
+
|
|
192
|
+
# Node is handled
|
|
193
|
+
# -> not in 'queue' anymore
|
|
194
|
+
# -> remove also from 'queue_set'
|
|
195
|
+
queue_set.remove(node)
|
|
196
|
+
|
|
197
|
+
return graph
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def get_distance(self, index1, index2, bint topological=False):
|
|
202
|
+
"""
|
|
203
|
+
get_distance(index1, index2, topological=False)
|
|
204
|
+
|
|
205
|
+
Get the distance between two leaf nodes.
|
|
206
|
+
|
|
207
|
+
The distance is the sum of all distances from the each of the
|
|
208
|
+
two nodes to their lowest common ancestor.
|
|
209
|
+
|
|
210
|
+
Parameters
|
|
211
|
+
----------
|
|
212
|
+
index1, index2 : int
|
|
213
|
+
The reference indices of the two leaf nodes, to calculate
|
|
214
|
+
the distance for.
|
|
215
|
+
topological : bool, optional
|
|
216
|
+
If True the topological distance is measured, i.e. all
|
|
217
|
+
child-parent distance is 1.
|
|
218
|
+
Otherwise, the distances from the `distance` attribute are
|
|
219
|
+
used.
|
|
220
|
+
|
|
221
|
+
Returns
|
|
222
|
+
-------
|
|
223
|
+
distance : float
|
|
224
|
+
The distance between the nodes.
|
|
225
|
+
|
|
226
|
+
Examples
|
|
227
|
+
--------
|
|
228
|
+
|
|
229
|
+
>>> leaf1 = TreeNode(index=0)
|
|
230
|
+
>>> leaf2 = TreeNode(index=1)
|
|
231
|
+
>>> leaf3 = TreeNode(index=2)
|
|
232
|
+
>>> inter = TreeNode([leaf1, leaf2], [5.0, 7.0])
|
|
233
|
+
>>> root = TreeNode([inter, leaf3], [3.0, 10.0])
|
|
234
|
+
>>> tree = Tree(root)
|
|
235
|
+
>>> print(tree.get_distance(0,1))
|
|
236
|
+
12.0
|
|
237
|
+
>>> print(tree.get_distance(0,2))
|
|
238
|
+
18.0
|
|
239
|
+
>>> print(tree.get_distance(1,2))
|
|
240
|
+
20.0
|
|
241
|
+
"""
|
|
242
|
+
return self._leaves[index1].distance_to(
|
|
243
|
+
self._leaves[index2], topological
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
def to_newick(self, labels=None, bint include_distance=True,
|
|
247
|
+
round_distance=None):
|
|
248
|
+
"""
|
|
249
|
+
to_newick(labels=None, include_distance=True)
|
|
250
|
+
|
|
251
|
+
Obtain the Newick notation of the tree.
|
|
252
|
+
|
|
253
|
+
Parameters
|
|
254
|
+
----------
|
|
255
|
+
labels : iterable object of str
|
|
256
|
+
The labels the indices in the leaf nodes srefer to
|
|
257
|
+
include_distance : bool
|
|
258
|
+
If true, the distances are displayed in the newick notation,
|
|
259
|
+
otherwise they are omitted.
|
|
260
|
+
round_distance : int, optional
|
|
261
|
+
If set, the distances are rounded to the given number of
|
|
262
|
+
digits.
|
|
263
|
+
|
|
264
|
+
Returns
|
|
265
|
+
-------
|
|
266
|
+
newick : str
|
|
267
|
+
The Newick notation of the tree.
|
|
268
|
+
|
|
269
|
+
Examples
|
|
270
|
+
--------
|
|
271
|
+
|
|
272
|
+
>>> leaf1 = TreeNode(index=0)
|
|
273
|
+
>>> leaf2 = TreeNode(index=1)
|
|
274
|
+
>>> leaf3 = TreeNode(index=2)
|
|
275
|
+
>>> inter = TreeNode([leaf1, leaf2], [5.0, 7.0])
|
|
276
|
+
>>> root = TreeNode([inter, leaf3], [3.0, 10.0])
|
|
277
|
+
>>> tree = Tree(root)
|
|
278
|
+
>>> print(tree.to_newick())
|
|
279
|
+
((0:5.0,1:7.0):3.0,2:10.0):0.0;
|
|
280
|
+
>>> print(tree.to_newick(include_distance=False))
|
|
281
|
+
((0,1),2);
|
|
282
|
+
>>> labels = ["foo", "bar", "foobar"]
|
|
283
|
+
>>> print(tree.to_newick(labels=labels, include_distance=False))
|
|
284
|
+
((foo,bar),foobar);
|
|
285
|
+
"""
|
|
286
|
+
return self._root.to_newick(
|
|
287
|
+
labels, include_distance, round_distance
|
|
288
|
+
) + ";"
|
|
289
|
+
|
|
290
|
+
@staticmethod
|
|
291
|
+
def from_newick(str newick, list labels=None):
|
|
292
|
+
"""
|
|
293
|
+
from_newick(newick, labels=None)
|
|
294
|
+
|
|
295
|
+
Create a tree from a Newick notation.
|
|
296
|
+
|
|
297
|
+
Parameters
|
|
298
|
+
----------
|
|
299
|
+
newick : str
|
|
300
|
+
The Newick notation to create the tree from.
|
|
301
|
+
labels : list of str, optional
|
|
302
|
+
If the Newick notation contains labels, that are not
|
|
303
|
+
parseable into reference indices,
|
|
304
|
+
i.e. they are not integers, this parameter can be provided
|
|
305
|
+
to convert these labels into reference indices.
|
|
306
|
+
The corresponding index is the position of the label in the
|
|
307
|
+
provided list.
|
|
308
|
+
|
|
309
|
+
Returns
|
|
310
|
+
-------
|
|
311
|
+
tree : Tree
|
|
312
|
+
A tree created from the Newick notation
|
|
313
|
+
|
|
314
|
+
Notes
|
|
315
|
+
-----
|
|
316
|
+
This function does accept but does not require the Newick string
|
|
317
|
+
to have the terminal semicolon.
|
|
318
|
+
|
|
319
|
+
Keep in mind that the :class:`Tree` class does not support any
|
|
320
|
+
labels on intermediate nodes.
|
|
321
|
+
If the string contains such labels, they are discarded.
|
|
322
|
+
"""
|
|
323
|
+
newick = newick.strip()
|
|
324
|
+
if len(newick) == 0:
|
|
325
|
+
raise InvalidFileError("Newick string is empty")
|
|
326
|
+
# Remove terminal colon as required by 'TreeNode.from_newick()'
|
|
327
|
+
if newick[-1] == ";":
|
|
328
|
+
newick = newick[:-1]
|
|
329
|
+
root, distance = TreeNode.from_newick(newick, labels)
|
|
330
|
+
return Tree(root)
|
|
331
|
+
|
|
332
|
+
def __str__(self):
|
|
333
|
+
return self.to_newick()
|
|
334
|
+
|
|
335
|
+
def __len__(self):
|
|
336
|
+
return len(self._leaves)
|
|
337
|
+
|
|
338
|
+
def __eq__(self, item):
|
|
339
|
+
if not isinstance(item, Tree):
|
|
340
|
+
return False
|
|
341
|
+
return self._root == item._root
|
|
342
|
+
|
|
343
|
+
def __hash__(self):
|
|
344
|
+
return hash(self._root)
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
cdef class TreeNode:
|
|
348
|
+
"""
|
|
349
|
+
__init__(children=None, distances=None, index=None)
|
|
350
|
+
|
|
351
|
+
:class:`TreeNode` objects are part of a rooted tree
|
|
352
|
+
(e.g. alignment guide tree).
|
|
353
|
+
There are two :class:`TreeNode` subtypes:
|
|
354
|
+
|
|
355
|
+
- Leaf node - Cannot have child nodes but has an index referring
|
|
356
|
+
to an array-like reference object.
|
|
357
|
+
- Intermediate node - Has child nodes but no reference index
|
|
358
|
+
|
|
359
|
+
This subtype is determined based on whether child nodes were given
|
|
360
|
+
to the constructor.
|
|
361
|
+
|
|
362
|
+
Every :class:`TreeNode` has a reference to its parent node.
|
|
363
|
+
A root node is node without a parent node, that is finalized
|
|
364
|
+
using `as_root()`.
|
|
365
|
+
The call of this function prevents that a the node can be used as
|
|
366
|
+
child.
|
|
367
|
+
|
|
368
|
+
:class:`TreeNode` objects are semi-immutable:
|
|
369
|
+
The child nodes or the reference index are fixed at the time of
|
|
370
|
+
creation.
|
|
371
|
+
Only the parent can be set once, when the parent node is created.
|
|
372
|
+
:class:`TreeNode` objects that are finalized using `as_root()` are
|
|
373
|
+
completely immutable.
|
|
374
|
+
|
|
375
|
+
All object properties are read-only.
|
|
376
|
+
|
|
377
|
+
Parameters
|
|
378
|
+
----------
|
|
379
|
+
children: array-like object of TreeNode, length=n, optional
|
|
380
|
+
The children of this node.
|
|
381
|
+
As this causes the creation of an intermediate node,
|
|
382
|
+
this parameter cannot be used in combination with `index`.
|
|
383
|
+
distances: array-like object of float, length=n, optional
|
|
384
|
+
The distances of the child nodes to this node.
|
|
385
|
+
Must be set if `children` is set.
|
|
386
|
+
index: int, optional
|
|
387
|
+
Index to a reference array-like object
|
|
388
|
+
(e.g. list of sequences or labels).
|
|
389
|
+
Must be a positive integer.
|
|
390
|
+
As this causes the creation of a leaf node, this parameter
|
|
391
|
+
cannot be used in combination with the other parameters.
|
|
392
|
+
|
|
393
|
+
Attributes
|
|
394
|
+
----------
|
|
395
|
+
parent : TreeNode
|
|
396
|
+
The parent node.
|
|
397
|
+
`None` if node has no parent.
|
|
398
|
+
children : tuple of TreeNode
|
|
399
|
+
The child nodes.
|
|
400
|
+
`None` if node is a leaf node.
|
|
401
|
+
index : int
|
|
402
|
+
The index to a reference array-like object.
|
|
403
|
+
`None` if node is not a leaf node.
|
|
404
|
+
distance : float
|
|
405
|
+
Distance to parent node.
|
|
406
|
+
`None` if `parent` is `Ǹone`.
|
|
407
|
+
|
|
408
|
+
Examples
|
|
409
|
+
--------
|
|
410
|
+
Creating leaf nodes:
|
|
411
|
+
|
|
412
|
+
>>> leaf1 = TreeNode(index=0)
|
|
413
|
+
>>> leaf2 = TreeNode(index=1)
|
|
414
|
+
>>> leaf3 = TreeNode(index=2)
|
|
415
|
+
|
|
416
|
+
Creating intermediate nodes as parent of those leaf nodes:
|
|
417
|
+
|
|
418
|
+
>>> inter = TreeNode([leaf1, leaf2], [5.0, 7.0])
|
|
419
|
+
>>> root = TreeNode([inter, leaf3], [3.0, 10.0])
|
|
420
|
+
>>> print(root)
|
|
421
|
+
((0:5.0,1:7.0):3.0,2:10.0):0.0
|
|
422
|
+
"""
|
|
423
|
+
|
|
424
|
+
cdef int _index
|
|
425
|
+
cdef float _distance
|
|
426
|
+
cdef bint _is_root
|
|
427
|
+
cdef TreeNode _parent
|
|
428
|
+
cdef tuple _children
|
|
429
|
+
|
|
430
|
+
def __cinit__(self, children=None, distances=None, index=None):
|
|
431
|
+
self._is_root = False
|
|
432
|
+
self._distance = 0
|
|
433
|
+
self._parent = None
|
|
434
|
+
cdef TreeNode child
|
|
435
|
+
cdef float distance
|
|
436
|
+
if index is None:
|
|
437
|
+
# Node is intermediate -> has children
|
|
438
|
+
if children is None or distances is None:
|
|
439
|
+
raise TypeError(
|
|
440
|
+
"Either reference index (for terminal node) or "
|
|
441
|
+
"child nodes including the distance "
|
|
442
|
+
"(for intermediate node) must be set"
|
|
443
|
+
)
|
|
444
|
+
for item in children:
|
|
445
|
+
if not isinstance(item, TreeNode):
|
|
446
|
+
raise TypeError(
|
|
447
|
+
f"Expected 'TreeNode', but got '{type(item).__name__}'"
|
|
448
|
+
)
|
|
449
|
+
for item in distances:
|
|
450
|
+
if not isinstance(item, float) and not isinstance(item, int):
|
|
451
|
+
raise TypeError(
|
|
452
|
+
f"Expected 'float' or 'int', "
|
|
453
|
+
f"but got '{type(item).__name__}'"
|
|
454
|
+
)
|
|
455
|
+
if len(children) == 0:
|
|
456
|
+
raise TreeError(
|
|
457
|
+
"Intermediate nodes must at least contain one child node"
|
|
458
|
+
)
|
|
459
|
+
if len(children) != len(distances):
|
|
460
|
+
raise ValueError(
|
|
461
|
+
"The number of children must equal the number of distances"
|
|
462
|
+
)
|
|
463
|
+
for i in range(len(children)):
|
|
464
|
+
for j in range(len(children)):
|
|
465
|
+
if i != j and children[i] is children[j]:
|
|
466
|
+
raise TreeError(
|
|
467
|
+
"Two child nodes cannot be the same object"
|
|
468
|
+
)
|
|
469
|
+
self._index = -1
|
|
470
|
+
self._children = tuple(children)
|
|
471
|
+
for child, distance in zip(children, distances):
|
|
472
|
+
child._set_parent(self, distance)
|
|
473
|
+
elif index < 0:
|
|
474
|
+
raise ValueError("Index cannot be negative")
|
|
475
|
+
else:
|
|
476
|
+
# Node is terminal -> has no children
|
|
477
|
+
if children is not None or distances is not None:
|
|
478
|
+
raise TypeError(
|
|
479
|
+
"Reference index and child nodes are mutually exclusive"
|
|
480
|
+
)
|
|
481
|
+
self._index = index
|
|
482
|
+
self._children = None
|
|
483
|
+
|
|
484
|
+
def _set_parent(self, TreeNode parent not None, float distance):
|
|
485
|
+
if self._parent is not None or self._is_root:
|
|
486
|
+
raise TreeError("Node already has a parent")
|
|
487
|
+
self._parent = parent
|
|
488
|
+
self._distance = distance
|
|
489
|
+
|
|
490
|
+
def copy(self):
|
|
491
|
+
"""
|
|
492
|
+
copy()
|
|
493
|
+
|
|
494
|
+
Create a deep copy of this :class:`TreeNode`.
|
|
495
|
+
|
|
496
|
+
The copy includes this node, its reference index and deep copies
|
|
497
|
+
of its child nodes.
|
|
498
|
+
The parent node and the distance to it is not included.
|
|
499
|
+
"""
|
|
500
|
+
if self.is_leaf():
|
|
501
|
+
return TreeNode(index=self._index)
|
|
502
|
+
else:
|
|
503
|
+
distances = [child.distance for child in self._children]
|
|
504
|
+
children_clones = [child.copy() for child in self._children]
|
|
505
|
+
return TreeNode(children_clones, distances)
|
|
506
|
+
|
|
507
|
+
@property
|
|
508
|
+
def index(self):
|
|
509
|
+
return None if self._index == -1 else self._index
|
|
510
|
+
|
|
511
|
+
@property
|
|
512
|
+
def children(self):
|
|
513
|
+
return self._children
|
|
514
|
+
|
|
515
|
+
@property
|
|
516
|
+
def parent(self):
|
|
517
|
+
return self._parent
|
|
518
|
+
|
|
519
|
+
@property
|
|
520
|
+
def distance(self):
|
|
521
|
+
return None if self._parent is None else self._distance
|
|
522
|
+
|
|
523
|
+
def is_leaf(self):
|
|
524
|
+
"""
|
|
525
|
+
is_leaf()
|
|
526
|
+
|
|
527
|
+
Check if the node is a leaf node.
|
|
528
|
+
|
|
529
|
+
Returns
|
|
530
|
+
-------
|
|
531
|
+
is_leaf : bool
|
|
532
|
+
True if the node is a leaf node, false otherwise.
|
|
533
|
+
"""
|
|
534
|
+
return False if self._index == -1 else True
|
|
535
|
+
|
|
536
|
+
def is_root(self):
|
|
537
|
+
"""
|
|
538
|
+
is_root()
|
|
539
|
+
|
|
540
|
+
Check if the node is a root node.
|
|
541
|
+
|
|
542
|
+
Returns
|
|
543
|
+
-------
|
|
544
|
+
is_root : bool
|
|
545
|
+
True if the node is a root node, false otherwise.
|
|
546
|
+
"""
|
|
547
|
+
return bool(self._is_root)
|
|
548
|
+
|
|
549
|
+
def as_root(self):
|
|
550
|
+
"""
|
|
551
|
+
as_root()
|
|
552
|
+
|
|
553
|
+
Convert the node into a root node.
|
|
554
|
+
|
|
555
|
+
When a root node is used as `child` parameter in the
|
|
556
|
+
construction of a potential parent node, a :class:`TreeError` is
|
|
557
|
+
raised.
|
|
558
|
+
"""
|
|
559
|
+
if self._parent is not None:
|
|
560
|
+
raise TreeError("Node has parent, cannot be a root node")
|
|
561
|
+
self._is_root = True
|
|
562
|
+
|
|
563
|
+
def distance_to(self, TreeNode node, bint topological=False):
|
|
564
|
+
"""
|
|
565
|
+
distance_to(node, topological=False)
|
|
566
|
+
|
|
567
|
+
Get the distance of this node to another node.
|
|
568
|
+
|
|
569
|
+
The distance is the sum of all distances from this and the other
|
|
570
|
+
node to the lowest common ancestor.
|
|
571
|
+
|
|
572
|
+
Parameters
|
|
573
|
+
----------
|
|
574
|
+
node : TreeNode
|
|
575
|
+
The second node for distance calculation.
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
Returns
|
|
579
|
+
-------
|
|
580
|
+
distance : float
|
|
581
|
+
The distance of this node to `node`.
|
|
582
|
+
|
|
583
|
+
Raises
|
|
584
|
+
------
|
|
585
|
+
TreeError
|
|
586
|
+
If the nodes have no common ancestor.
|
|
587
|
+
|
|
588
|
+
Examples
|
|
589
|
+
--------
|
|
590
|
+
|
|
591
|
+
>>> leaf1 = TreeNode(index=0)
|
|
592
|
+
>>> leaf2 = TreeNode(index=1)
|
|
593
|
+
>>> leaf3 = TreeNode(index=2)
|
|
594
|
+
>>> inter = TreeNode([leaf1, leaf2], [5.0, 7.0])
|
|
595
|
+
>>> root = TreeNode([inter, leaf3], [3.0, 10.0])
|
|
596
|
+
>>> print(leaf1.distance_to(leaf2))
|
|
597
|
+
12.0
|
|
598
|
+
>>> print(leaf1.distance_to(leaf3))
|
|
599
|
+
18.0
|
|
600
|
+
"""
|
|
601
|
+
# Sum distances until LCA has been reached
|
|
602
|
+
cdef float distance = 0
|
|
603
|
+
cdef TreeNode current_node = None
|
|
604
|
+
cdef TreeNode lca = self.lowest_common_ancestor(node)
|
|
605
|
+
if lca is None:
|
|
606
|
+
raise TreeError("The nodes do not have a common ancestor")
|
|
607
|
+
current_node = self
|
|
608
|
+
while current_node is not lca:
|
|
609
|
+
if topological:
|
|
610
|
+
distance += 1
|
|
611
|
+
else:
|
|
612
|
+
distance += current_node._distance
|
|
613
|
+
current_node = current_node._parent
|
|
614
|
+
current_node = node
|
|
615
|
+
while current_node is not lca:
|
|
616
|
+
if topological:
|
|
617
|
+
distance += 1
|
|
618
|
+
else:
|
|
619
|
+
distance += current_node._distance
|
|
620
|
+
current_node = current_node._parent
|
|
621
|
+
return distance
|
|
622
|
+
|
|
623
|
+
def lowest_common_ancestor(self, TreeNode node):
|
|
624
|
+
"""
|
|
625
|
+
lowest_common_ancestor(node)
|
|
626
|
+
|
|
627
|
+
Get the lowest common ancestor of this node and another node.
|
|
628
|
+
|
|
629
|
+
Parameters
|
|
630
|
+
----------
|
|
631
|
+
node : TreeNode
|
|
632
|
+
The node to get the lowest common ancestor with.
|
|
633
|
+
|
|
634
|
+
Returns
|
|
635
|
+
-------
|
|
636
|
+
ancestor : TreeNode or None
|
|
637
|
+
The lowest common ancestor. `None` if the nodes have no
|
|
638
|
+
common ancestor, i.e. they are not in the same tree
|
|
639
|
+
"""
|
|
640
|
+
cdef int i
|
|
641
|
+
cdef TreeNode lca = None
|
|
642
|
+
# Create two paths from the leaves to root
|
|
643
|
+
cdef list self_path = _create_path_to_root(self)
|
|
644
|
+
cdef list other_path = _create_path_to_root(node)
|
|
645
|
+
# Reverse Iteration through path (beginning from root)
|
|
646
|
+
# until the paths diverge
|
|
647
|
+
for i in range(-1, -min(len(self_path), len(other_path))-1, -1):
|
|
648
|
+
if self_path[i] is other_path[i]:
|
|
649
|
+
# Same node -> common ancestor
|
|
650
|
+
lca = self_path[i]
|
|
651
|
+
else:
|
|
652
|
+
# Different node -> Not common ancestor
|
|
653
|
+
# -> return last common ancewstor found
|
|
654
|
+
break
|
|
655
|
+
return lca
|
|
656
|
+
|
|
657
|
+
def get_indices(self):
|
|
658
|
+
"""
|
|
659
|
+
get_indices()
|
|
660
|
+
|
|
661
|
+
Get an array of reference indices that leaf nodes of this node
|
|
662
|
+
contain.
|
|
663
|
+
|
|
664
|
+
This method identifies all leaf nodes, which have this node as
|
|
665
|
+
ancestor and puts the contained indices into an array.
|
|
666
|
+
If this node is a leaf node itself, the array contains the
|
|
667
|
+
reference index of this node as single element.
|
|
668
|
+
|
|
669
|
+
Returns
|
|
670
|
+
-------
|
|
671
|
+
indices : ndarray, dtype=int32
|
|
672
|
+
The reference indices of direct and indirect child leaf
|
|
673
|
+
nodes of this node.
|
|
674
|
+
|
|
675
|
+
Examples
|
|
676
|
+
--------
|
|
677
|
+
|
|
678
|
+
>>> leaf0 = TreeNode(index=0)
|
|
679
|
+
>>> leaf1 = TreeNode(index=1)
|
|
680
|
+
>>> leaf2 = TreeNode(index=2)
|
|
681
|
+
>>> leaf3 = TreeNode(index=3)
|
|
682
|
+
>>> intr0 = TreeNode([leaf0, leaf2], [0, 0])
|
|
683
|
+
>>> intr1 = TreeNode([leaf1, leaf3], [0, 0])
|
|
684
|
+
>>> root = TreeNode([intr0, intr1], [0, 0])
|
|
685
|
+
>>> print(leaf0.get_indices())
|
|
686
|
+
[0]
|
|
687
|
+
>>> print(intr0.get_indices())
|
|
688
|
+
[0 2]
|
|
689
|
+
>>> print(intr1.get_indices())
|
|
690
|
+
[1 3]
|
|
691
|
+
>>> print(root.get_indices())
|
|
692
|
+
[0 2 1 3]
|
|
693
|
+
"""
|
|
694
|
+
cdef TreeNode leaf
|
|
695
|
+
return np.array(
|
|
696
|
+
[leaf._index for leaf in self.get_leaves()], dtype=np.int32
|
|
697
|
+
)
|
|
698
|
+
|
|
699
|
+
def get_leaves(self):
|
|
700
|
+
"""
|
|
701
|
+
get_leaves()
|
|
702
|
+
|
|
703
|
+
Get a list of leaf nodes that are direct or indirect child nodes
|
|
704
|
+
of this node.
|
|
705
|
+
|
|
706
|
+
This method identifies all leaf nodes, which have this node as
|
|
707
|
+
ancestor.
|
|
708
|
+
If this node is a leaf node itself, the list contains this node
|
|
709
|
+
as single element.
|
|
710
|
+
|
|
711
|
+
Returns
|
|
712
|
+
-------
|
|
713
|
+
leaf_nodes : list
|
|
714
|
+
The leaf nodes, that are direct or indirect child nodes
|
|
715
|
+
of this node.
|
|
716
|
+
"""
|
|
717
|
+
cdef list leaf_list = []
|
|
718
|
+
# delegate to 'cdef' method
|
|
719
|
+
# to reduce overhead of recursive function calling
|
|
720
|
+
_get_leaves(self, leaf_list)
|
|
721
|
+
return leaf_list
|
|
722
|
+
|
|
723
|
+
def get_leaf_count(self):
|
|
724
|
+
""""
|
|
725
|
+
get_leaf_count()
|
|
726
|
+
|
|
727
|
+
Get the number of direct or indirect leaves of this ńode.
|
|
728
|
+
|
|
729
|
+
This method identifies all leaf nodes, which have this node as
|
|
730
|
+
ancestor.
|
|
731
|
+
If this node is a leaf node itself, 1 is returned.
|
|
732
|
+
"""
|
|
733
|
+
return _get_leaf_count(self)
|
|
734
|
+
|
|
735
|
+
def to_newick(self, labels=None, bint include_distance=True,
|
|
736
|
+
round_distance=None):
|
|
737
|
+
"""
|
|
738
|
+
to_newick(labels=None, include_distance=True)
|
|
739
|
+
|
|
740
|
+
Obtain the node represented in Newick notation.
|
|
741
|
+
|
|
742
|
+
The terminal semicolon is not included.
|
|
743
|
+
|
|
744
|
+
Parameters
|
|
745
|
+
----------
|
|
746
|
+
labels : iterable object of str
|
|
747
|
+
The labels the indices in the leaf nodes refer to
|
|
748
|
+
include_distance : bool
|
|
749
|
+
If true, the distances are displayed in the newick notation,
|
|
750
|
+
otherwise they are omitted.
|
|
751
|
+
round_distance : int, optional
|
|
752
|
+
If set, the distances are rounded to the given number of
|
|
753
|
+
digits.
|
|
754
|
+
|
|
755
|
+
Returns
|
|
756
|
+
-------
|
|
757
|
+
newick : str
|
|
758
|
+
The Newick notation of the node.
|
|
759
|
+
|
|
760
|
+
Examples
|
|
761
|
+
--------
|
|
762
|
+
|
|
763
|
+
>>> leaf1 = TreeNode(index=0)
|
|
764
|
+
>>> leaf2 = TreeNode(index=1)
|
|
765
|
+
>>> leaf3 = TreeNode(index=2)
|
|
766
|
+
>>> inter = TreeNode([leaf1, leaf2], [5.0, 7.0])
|
|
767
|
+
>>> root = TreeNode([inter, leaf3], [3.0, 10.0])
|
|
768
|
+
>>> print(root.to_newick())
|
|
769
|
+
((0:5.0,1:7.0):3.0,2:10.0):0.0
|
|
770
|
+
>>> print(root.to_newick(include_distance=False))
|
|
771
|
+
((0,1),2)
|
|
772
|
+
>>> labels = ["foo", "bar", "foobar"]
|
|
773
|
+
>>> print(root.to_newick(labels=labels, include_distance=False))
|
|
774
|
+
((foo,bar),foobar)
|
|
775
|
+
"""
|
|
776
|
+
if self.is_leaf():
|
|
777
|
+
if labels is not None:
|
|
778
|
+
for label in labels:
|
|
779
|
+
label = labels[self._index]
|
|
780
|
+
# Characters that are part of the Newick syntax
|
|
781
|
+
# are illegal
|
|
782
|
+
illegal_chars = [",",":",";","(",")"]
|
|
783
|
+
for char in illegal_chars:
|
|
784
|
+
if char in label:
|
|
785
|
+
raise ValueError(
|
|
786
|
+
f"Label '{label}' contains "
|
|
787
|
+
f"illegal character '{char}'"
|
|
788
|
+
)
|
|
789
|
+
else:
|
|
790
|
+
label = str(self._index)
|
|
791
|
+
if include_distance:
|
|
792
|
+
if round_distance is None:
|
|
793
|
+
return f"{label}:{self._distance}"
|
|
794
|
+
else:
|
|
795
|
+
return f"{label}:{self._distance:.{round_distance}f}"
|
|
796
|
+
else:
|
|
797
|
+
return f"{label}"
|
|
798
|
+
else:
|
|
799
|
+
# Build string in a recursive way
|
|
800
|
+
child_strings = [child.to_newick(
|
|
801
|
+
labels, include_distance, round_distance
|
|
802
|
+
) for child in self._children]
|
|
803
|
+
if include_distance:
|
|
804
|
+
if round_distance is None:
|
|
805
|
+
return f"({','.join(child_strings)}):{self._distance}"
|
|
806
|
+
else:
|
|
807
|
+
return (
|
|
808
|
+
f"({','.join(child_strings)}):"
|
|
809
|
+
f"{self._distance:.{round_distance}f}"
|
|
810
|
+
)
|
|
811
|
+
else:
|
|
812
|
+
return f"({','.join(child_strings)})"
|
|
813
|
+
|
|
814
|
+
@staticmethod
|
|
815
|
+
def from_newick(str newick, list labels=None):
|
|
816
|
+
"""
|
|
817
|
+
from_newick(newick, labels=None)
|
|
818
|
+
|
|
819
|
+
Create a node and all its child nodes from a Newick notation.
|
|
820
|
+
|
|
821
|
+
Parameters
|
|
822
|
+
----------
|
|
823
|
+
newick : str
|
|
824
|
+
The Newick notation to create the node from.
|
|
825
|
+
labels : list of str, optional
|
|
826
|
+
If the Newick notation contains labels, that are not
|
|
827
|
+
parseable into reference indices,
|
|
828
|
+
i.e. they are not integers, this parameter can be provided
|
|
829
|
+
to convert these labels into reference indices.
|
|
830
|
+
The corresponding index is the position of the label in the
|
|
831
|
+
provided list.
|
|
832
|
+
|
|
833
|
+
Returns
|
|
834
|
+
-------
|
|
835
|
+
node : TreeNode
|
|
836
|
+
The tree node parsed from the Newick notation.
|
|
837
|
+
distance : float
|
|
838
|
+
Distance of the node to its parent. If the newick notation
|
|
839
|
+
does not provide a distance, it is set to 0 by default.
|
|
840
|
+
|
|
841
|
+
Notes
|
|
842
|
+
-----
|
|
843
|
+
The provided Newick notation must not have a terminal semicolon.
|
|
844
|
+
If you have a Newick notation that covers an entire tree, you
|
|
845
|
+
may use the same method in the :class:`Tree` class instead.
|
|
846
|
+
Keep in mind that the :class:`TreeNode` class does support any
|
|
847
|
+
labels on intermediate nodes.
|
|
848
|
+
If the string contains such labels, they are discarded.
|
|
849
|
+
"""
|
|
850
|
+
cdef int i
|
|
851
|
+
cdef int subnewick_start_i = -1
|
|
852
|
+
cdef int subnewick_stop_i = -1
|
|
853
|
+
cdef int level = 0
|
|
854
|
+
cdef list comma_pos
|
|
855
|
+
cdef list children
|
|
856
|
+
cdef list distances
|
|
857
|
+
cdef int pos
|
|
858
|
+
cdef int next_pos
|
|
859
|
+
|
|
860
|
+
# Ignore any whitespace
|
|
861
|
+
newick = "".join(newick.split())
|
|
862
|
+
|
|
863
|
+
# Find brackets belonging to sub-newick
|
|
864
|
+
# e.g. (A:0.1,B:0.2):0.5
|
|
865
|
+
# ^ ^
|
|
866
|
+
for i in range(len(newick)):
|
|
867
|
+
char = newick[i]
|
|
868
|
+
if char == "(":
|
|
869
|
+
subnewick_start_i = i
|
|
870
|
+
break
|
|
871
|
+
if char == ")":
|
|
872
|
+
raise InvalidFileError("Bracket closed before it was opened")
|
|
873
|
+
for i in reversed(range(len(newick))):
|
|
874
|
+
char = newick[i]
|
|
875
|
+
if char == ")":
|
|
876
|
+
subnewick_stop_i = i+1
|
|
877
|
+
break
|
|
878
|
+
if char == "(":
|
|
879
|
+
raise InvalidFileError("Bracket was opened but not closed")
|
|
880
|
+
|
|
881
|
+
if subnewick_start_i == -1 and subnewick_stop_i == -1:
|
|
882
|
+
# No brackets -> no sub-newwick -> Leaf node
|
|
883
|
+
label_and_distance = newick
|
|
884
|
+
try:
|
|
885
|
+
label, distance = label_and_distance.split(":")
|
|
886
|
+
distance = float(distance)
|
|
887
|
+
except ValueError:
|
|
888
|
+
# No colon -> No distance is provided
|
|
889
|
+
distance = 0
|
|
890
|
+
label = label_and_distance
|
|
891
|
+
index = int(label) if labels is None else labels.index(label)
|
|
892
|
+
return TreeNode(index=index), distance
|
|
893
|
+
|
|
894
|
+
else:
|
|
895
|
+
# Intermediate node
|
|
896
|
+
if subnewick_stop_i == len(newick):
|
|
897
|
+
# Node with neither distance nor label
|
|
898
|
+
label = None
|
|
899
|
+
distance = 0
|
|
900
|
+
else:
|
|
901
|
+
label_and_distance = newick[subnewick_stop_i:]
|
|
902
|
+
try:
|
|
903
|
+
label, distance = label_and_distance.split(":")
|
|
904
|
+
distance = float(distance)
|
|
905
|
+
except ValueError:
|
|
906
|
+
# No colon -> No distance is provided
|
|
907
|
+
distance = 0
|
|
908
|
+
label = label_and_distance
|
|
909
|
+
# Label of intermediate nodes is discarded
|
|
910
|
+
distance = float(distance)
|
|
911
|
+
|
|
912
|
+
subnewick = newick[subnewick_start_i+1 : subnewick_stop_i-1]
|
|
913
|
+
if len(subnewick) == 0:
|
|
914
|
+
raise InvalidFileError(
|
|
915
|
+
"Intermediate node must at least have one child"
|
|
916
|
+
)
|
|
917
|
+
# Parse childs
|
|
918
|
+
# Split subnewick at ',' if ',' is at current level
|
|
919
|
+
# (not in a subsubnewick)
|
|
920
|
+
comma_pos = []
|
|
921
|
+
for i, char in enumerate(subnewick):
|
|
922
|
+
if char == "(":
|
|
923
|
+
level += 1
|
|
924
|
+
elif char == ")":
|
|
925
|
+
level -= 1
|
|
926
|
+
elif char == ",":
|
|
927
|
+
if level == 0:
|
|
928
|
+
comma_pos.append(i)
|
|
929
|
+
if level < 0:
|
|
930
|
+
raise InvalidFileError(
|
|
931
|
+
"Bracket closed before it was opened"
|
|
932
|
+
)
|
|
933
|
+
|
|
934
|
+
children = []
|
|
935
|
+
distances = []
|
|
936
|
+
# Recursive tree construction
|
|
937
|
+
for i, pos in enumerate(comma_pos):
|
|
938
|
+
if i == 0:
|
|
939
|
+
# (A,B),(C,D),(E,F)
|
|
940
|
+
# -----
|
|
941
|
+
child, dist = TreeNode.from_newick(
|
|
942
|
+
subnewick[:pos], labels=labels
|
|
943
|
+
)
|
|
944
|
+
else:
|
|
945
|
+
# (A,B),(C,D),(E,F)
|
|
946
|
+
# -----
|
|
947
|
+
prev_pos = comma_pos[i-1]
|
|
948
|
+
child, dist = TreeNode.from_newick(
|
|
949
|
+
subnewick[prev_pos+1 : pos], labels=labels
|
|
950
|
+
)
|
|
951
|
+
children.append(child)
|
|
952
|
+
distances.append(dist)
|
|
953
|
+
# Node after last comma
|
|
954
|
+
# (A,B),(C,D),(E,F)
|
|
955
|
+
# -----
|
|
956
|
+
if len(comma_pos) != 0:
|
|
957
|
+
child, dist = TreeNode.from_newick(
|
|
958
|
+
subnewick[comma_pos[-1]+1:], labels=labels
|
|
959
|
+
)
|
|
960
|
+
else:
|
|
961
|
+
# Single child node:
|
|
962
|
+
child, dist = TreeNode.from_newick(
|
|
963
|
+
subnewick, labels=labels
|
|
964
|
+
)
|
|
965
|
+
children.append(child)
|
|
966
|
+
distances.append(dist)
|
|
967
|
+
return TreeNode(children, distances), distance
|
|
968
|
+
|
|
969
|
+
def __str__(self):
|
|
970
|
+
return self.to_newick()
|
|
971
|
+
|
|
972
|
+
def __eq__(self, item):
|
|
973
|
+
if not isinstance(item, TreeNode):
|
|
974
|
+
return False
|
|
975
|
+
cdef TreeNode node = item
|
|
976
|
+
if self._distance != node._distance:
|
|
977
|
+
return False
|
|
978
|
+
if self._index !=-1:
|
|
979
|
+
if self._index != node._index:
|
|
980
|
+
return False
|
|
981
|
+
else:
|
|
982
|
+
if frozenset(self._children) != frozenset(node._children):
|
|
983
|
+
return False
|
|
984
|
+
return True
|
|
985
|
+
|
|
986
|
+
def __hash__(self):
|
|
987
|
+
# Order of children is not important -> set
|
|
988
|
+
children_set = frozenset(self._children) \
|
|
989
|
+
if self._children is not None else None
|
|
990
|
+
return hash((self._index, children_set, self._distance))
|
|
991
|
+
|
|
992
|
+
|
|
993
|
+
cdef _get_leaves(TreeNode node, list leaf_list):
|
|
994
|
+
cdef TreeNode child
|
|
995
|
+
if node._index == -1:
|
|
996
|
+
# Intermediate node -> Recursive calls
|
|
997
|
+
for child in node._children:
|
|
998
|
+
_get_leaves(child, leaf_list)
|
|
999
|
+
else:
|
|
1000
|
+
# Node itself is leaf node -> add node -> terminate
|
|
1001
|
+
leaf_list.append(node)
|
|
1002
|
+
|
|
1003
|
+
|
|
1004
|
+
cdef int _get_leaf_count(TreeNode node):
|
|
1005
|
+
cdef TreeNode child
|
|
1006
|
+
cdef int count = 0
|
|
1007
|
+
if node._index == -1:
|
|
1008
|
+
# Intermediate node -> Recursive calls
|
|
1009
|
+
for child in node._children:
|
|
1010
|
+
count += _get_leaf_count(child)
|
|
1011
|
+
return count
|
|
1012
|
+
else:
|
|
1013
|
+
# Leaf node -> return count of itself = 1
|
|
1014
|
+
return 1
|
|
1015
|
+
|
|
1016
|
+
|
|
1017
|
+
cdef list _create_path_to_root(TreeNode node):
|
|
1018
|
+
"""
|
|
1019
|
+
Create a list of nodes representing the path from this node to the
|
|
1020
|
+
specified node
|
|
1021
|
+
"""
|
|
1022
|
+
cdef list path = []
|
|
1023
|
+
cdef TreeNode current_node = node
|
|
1024
|
+
while current_node is not None:
|
|
1025
|
+
path.append(current_node)
|
|
1026
|
+
current_node = current_node._parent
|
|
1027
|
+
return path
|
|
1028
|
+
|
|
1029
|
+
|
|
1030
|
+
|
|
1031
|
+
def as_binary(tree_or_node):
|
|
1032
|
+
"""
|
|
1033
|
+
as_binary(tree_or_node)
|
|
1034
|
+
|
|
1035
|
+
Convert a tree into a binary tree.
|
|
1036
|
+
|
|
1037
|
+
In general a :class:`TreeNode` can have more or less than two
|
|
1038
|
+
children.
|
|
1039
|
+
However guide trees usually expect each intermediate node to have
|
|
1040
|
+
exactly two child nodes.
|
|
1041
|
+
This function creates a binary :class:`Tree` (or :class:`TreeNode`)
|
|
1042
|
+
for the given :class:`Tree` (or :class:`TreeNode`):
|
|
1043
|
+
Intermediate nodes that have only a single child are deleted and its
|
|
1044
|
+
parent node is directly connected to its child node.
|
|
1045
|
+
Intermediate nodes that have more than two childs are divided into
|
|
1046
|
+
multiple nodes (distances are preserved).
|
|
1047
|
+
|
|
1048
|
+
Parameters
|
|
1049
|
+
----------
|
|
1050
|
+
tree_or_node : Tree or TreeNode
|
|
1051
|
+
The tree or node to be converted into a binary tree or node.
|
|
1052
|
+
|
|
1053
|
+
Returns
|
|
1054
|
+
-------
|
|
1055
|
+
binary_tree_or_node : Tree or TreeNode
|
|
1056
|
+
The converted tree or node.
|
|
1057
|
+
"""
|
|
1058
|
+
if isinstance(tree_or_node, Tree):
|
|
1059
|
+
node, _ = _as_binary(tree_or_node.root)
|
|
1060
|
+
return Tree(node)
|
|
1061
|
+
elif isinstance(tree_or_node, TreeNode):
|
|
1062
|
+
node, _ = _as_binary(tree_or_node)
|
|
1063
|
+
return _as_binary(node)
|
|
1064
|
+
else:
|
|
1065
|
+
raise TypeError(
|
|
1066
|
+
f"Expected 'Tree' or 'TreeNode', not {type(tree_or_node).__name__}"
|
|
1067
|
+
)
|
|
1068
|
+
|
|
1069
|
+
cdef _as_binary(TreeNode node):
|
|
1070
|
+
"""
|
|
1071
|
+
The actual logic wrapped by :func:`as_binary()`.
|
|
1072
|
+
|
|
1073
|
+
Parameters
|
|
1074
|
+
----------
|
|
1075
|
+
node : TreeNode
|
|
1076
|
+
The node to be converted.
|
|
1077
|
+
|
|
1078
|
+
Returns
|
|
1079
|
+
-------
|
|
1080
|
+
binary_node: TreeNode
|
|
1081
|
+
The converted node.
|
|
1082
|
+
distance : float
|
|
1083
|
+
The distance of the converted node to its parent
|
|
1084
|
+
"""
|
|
1085
|
+
cdef TreeNode child
|
|
1086
|
+
cdef TreeNode current_div_node
|
|
1087
|
+
cdef tuple children
|
|
1088
|
+
cdef list rem_children
|
|
1089
|
+
cdef list distances
|
|
1090
|
+
cdef float distance
|
|
1091
|
+
|
|
1092
|
+
children = node.children
|
|
1093
|
+
if children is None:
|
|
1094
|
+
# Leaf node
|
|
1095
|
+
return TreeNode(index=node.index), node.distance
|
|
1096
|
+
elif len(children) == 1:
|
|
1097
|
+
# Intermediate node with one child
|
|
1098
|
+
# -> Omit node and directly connect its child to its parent
|
|
1099
|
+
# The distances are added
|
|
1100
|
+
#
|
|
1101
|
+
# |-- |--
|
|
1102
|
+
# | |
|
|
1103
|
+
# --|--|-- -> ----|--
|
|
1104
|
+
# | |
|
|
1105
|
+
# |-- |--
|
|
1106
|
+
#
|
|
1107
|
+
child, distance = _as_binary(node.children[0])
|
|
1108
|
+
if node.is_root():
|
|
1109
|
+
# Child is new root -> No distance to parent
|
|
1110
|
+
return child, None
|
|
1111
|
+
else:
|
|
1112
|
+
return child, node.distance + distance
|
|
1113
|
+
elif len(children) > 2:
|
|
1114
|
+
# Intermediate node with more than two childs
|
|
1115
|
+
# -> Create a new node having two childs:
|
|
1116
|
+
# - One of the childs of the original node
|
|
1117
|
+
# - The original node with one child less (distance = 0)
|
|
1118
|
+
# Repeat until all children are put into binary nodes
|
|
1119
|
+
#
|
|
1120
|
+
# |-- |--
|
|
1121
|
+
# | --| |--
|
|
1122
|
+
# --|-- -> |--|
|
|
1123
|
+
# | |--
|
|
1124
|
+
# |--
|
|
1125
|
+
#
|
|
1126
|
+
# The remaining children
|
|
1127
|
+
rem_children, distances = [list(tup) for tup in zip(
|
|
1128
|
+
*[_as_binary(child) for child in children]
|
|
1129
|
+
)]
|
|
1130
|
+
current_div_node = None
|
|
1131
|
+
while len(rem_children) > 0:
|
|
1132
|
+
if current_div_node is None:
|
|
1133
|
+
# The bottom-most node is created
|
|
1134
|
+
#-> Gets two of the remaining childs
|
|
1135
|
+
current_div_node = TreeNode(
|
|
1136
|
+
rem_children[:2],
|
|
1137
|
+
distances[:2]
|
|
1138
|
+
)
|
|
1139
|
+
# Pop the two utilized remaining childs from the list
|
|
1140
|
+
rem_children.pop(0)
|
|
1141
|
+
rem_children.pop(0)
|
|
1142
|
+
distances.pop(0)
|
|
1143
|
+
distances.pop(0)
|
|
1144
|
+
else:
|
|
1145
|
+
# A node is created that gets one remaining child
|
|
1146
|
+
# and the intermediate node from the last step
|
|
1147
|
+
current_div_node = TreeNode(
|
|
1148
|
+
(current_div_node, rem_children[0]),
|
|
1149
|
+
(0, distances[0])
|
|
1150
|
+
)
|
|
1151
|
+
# Pop the utilized remaining child from the list
|
|
1152
|
+
rem_children.pop(0)
|
|
1153
|
+
distances.pop(0)
|
|
1154
|
+
return current_div_node, node.distance
|
|
1155
|
+
else:
|
|
1156
|
+
# Intermediate node with exactly two childs
|
|
1157
|
+
# -> Keep node unchanged
|
|
1158
|
+
binary_children, distances = [list(tup) for tup in zip(
|
|
1159
|
+
*[_as_binary(child) for child in children]
|
|
1160
|
+
)]
|
|
1161
|
+
return TreeNode(binary_children, distances), node.distance
|
|
1162
|
+
|
|
1163
|
+
|
|
1164
|
+
|
|
1165
|
+
class TreeError(Exception):
|
|
1166
|
+
"""
|
|
1167
|
+
An exception that occurs in context of tree topology.
|
|
1168
|
+
"""
|
|
1169
|
+
pass
|