biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.sequence.phylo"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["neighbor_joining"]
|
|
8
|
+
|
|
9
|
+
cimport cython
|
|
10
|
+
cimport numpy as np
|
|
11
|
+
|
|
12
|
+
from .tree import Tree, TreeNode
|
|
13
|
+
import numpy as np
|
|
14
|
+
|
|
15
|
+
ctypedef np.float32_t float32
|
|
16
|
+
ctypedef np.uint8_t uint8
|
|
17
|
+
ctypedef np.uint32_t uint32
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
cdef float32 MAX_FLOAT = np.finfo(np.float32).max
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@cython.boundscheck(False)
|
|
24
|
+
@cython.wraparound(False)
|
|
25
|
+
def neighbor_joining(np.ndarray distances):
|
|
26
|
+
"""
|
|
27
|
+
neighbor_join(distances)
|
|
28
|
+
|
|
29
|
+
Perform hierarchical clustering using the
|
|
30
|
+
*neighbor joining* algorithm. :footcite:`Saitou1987, Studier1988`
|
|
31
|
+
|
|
32
|
+
In contrast to UPGMA this algorithm does not assume a constant
|
|
33
|
+
evolution rate. The resulting tree is considered to be unrooted.
|
|
34
|
+
|
|
35
|
+
Parameters
|
|
36
|
+
----------
|
|
37
|
+
distances : ndarray, shape=(n,n)
|
|
38
|
+
Pairwise distance matrix.
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
tree : Tree
|
|
43
|
+
A rooted tree. The `index` attribute in the leaf
|
|
44
|
+
:class:`TreeNode` objects refer to the indices of `distances`.
|
|
45
|
+
|
|
46
|
+
Raises
|
|
47
|
+
------
|
|
48
|
+
ValueError
|
|
49
|
+
If the distance matrix is not symmetric
|
|
50
|
+
or if any matrix entry is below 0.
|
|
51
|
+
|
|
52
|
+
Notes
|
|
53
|
+
-----
|
|
54
|
+
The created tree is binary except for the root node, that has three
|
|
55
|
+
child notes
|
|
56
|
+
|
|
57
|
+
References
|
|
58
|
+
----------
|
|
59
|
+
|
|
60
|
+
.. footbibliography::
|
|
61
|
+
|
|
62
|
+
Examples
|
|
63
|
+
--------
|
|
64
|
+
|
|
65
|
+
>>> distances = np.array([
|
|
66
|
+
... [0, 1, 7, 7, 9],
|
|
67
|
+
... [1, 0, 7, 6, 8],
|
|
68
|
+
... [7, 7, 0, 2, 4],
|
|
69
|
+
... [7, 6, 2, 0, 3],
|
|
70
|
+
... [9, 8, 4, 3, 0],
|
|
71
|
+
... ])
|
|
72
|
+
>>> tree = neighbor_joining(distances)
|
|
73
|
+
>>> print(tree.to_newick(include_distance=False))
|
|
74
|
+
(3,(2,(1,0)),4);
|
|
75
|
+
"""
|
|
76
|
+
cdef int i=0, j=0, k=0, u=0
|
|
77
|
+
cdef int i_min=0, j_min=0
|
|
78
|
+
cdef float32 dist=0, dist_min, dist_sum=0
|
|
79
|
+
cdef float32 node_dist_i=0, node_dist_j=0, node_dist_k=0
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
if distances.shape[0] != distances.shape[1] \
|
|
83
|
+
or not np.allclose(distances.T, distances):
|
|
84
|
+
raise ValueError("Distance matrix must be symmetric")
|
|
85
|
+
if np.isnan(distances).any():
|
|
86
|
+
raise ValueError("Distance matrix contains NaN values")
|
|
87
|
+
if (distances >= MAX_FLOAT).any():
|
|
88
|
+
raise ValueError("Distance matrix contains infinity")
|
|
89
|
+
if distances.shape[0] < 4:
|
|
90
|
+
raise ValueError("At least 4 nodes are required")
|
|
91
|
+
if (distances < 0).any():
|
|
92
|
+
raise ValueError("Distances must be positive")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# Keep track on clustered indices
|
|
96
|
+
cdef np.ndarray nodes = np.array(
|
|
97
|
+
[TreeNode(index=i) for i in range(distances.shape[0])]
|
|
98
|
+
)
|
|
99
|
+
# Indicates whether an index in the distance matrix has already been
|
|
100
|
+
# clustered and the repsective rows and columns can be ignored
|
|
101
|
+
cdef uint8[:] is_clustered_v = np.full(
|
|
102
|
+
distances.shape[0], False, dtype=np.uint8
|
|
103
|
+
)
|
|
104
|
+
cdef int n_rem_nodes = \
|
|
105
|
+
len(distances) - np.count_nonzero(np.asarray(is_clustered_v))
|
|
106
|
+
# The divergence of of a 'taxum'
|
|
107
|
+
# describes the relative evolution rate
|
|
108
|
+
cdef float32[:] divergence_v = np.zeros(
|
|
109
|
+
distances.shape[0], dtype=np.float32
|
|
110
|
+
)
|
|
111
|
+
# Triangular matrix for storing the divergence corrected distances
|
|
112
|
+
cdef float32[:,:] corr_distances_v = np.zeros(
|
|
113
|
+
(distances.shape[0],) * 2, dtype=np.float32
|
|
114
|
+
)
|
|
115
|
+
cdef float32[:,:] distances_v = distances.astype(np.float32, copy=True)
|
|
116
|
+
|
|
117
|
+
# Cluster indices
|
|
118
|
+
|
|
119
|
+
# Exit loop via 'return'
|
|
120
|
+
while True:
|
|
121
|
+
|
|
122
|
+
# Calculate divergence
|
|
123
|
+
for i in range(distances_v.shape[0]):
|
|
124
|
+
if is_clustered_v[i]:
|
|
125
|
+
continue
|
|
126
|
+
dist_sum = 0
|
|
127
|
+
for k in range(distances_v.shape[0]):
|
|
128
|
+
if is_clustered_v[k]:
|
|
129
|
+
continue
|
|
130
|
+
dist_sum += distances_v[i,k]
|
|
131
|
+
divergence_v[i] = dist_sum
|
|
132
|
+
|
|
133
|
+
# Calculate corrected distance matrix
|
|
134
|
+
for i in range(distances_v.shape[0]):
|
|
135
|
+
if is_clustered_v[i]:
|
|
136
|
+
continue
|
|
137
|
+
for j in range(i):
|
|
138
|
+
if is_clustered_v[j]:
|
|
139
|
+
continue
|
|
140
|
+
corr_distances_v[i,j] = \
|
|
141
|
+
(n_rem_nodes - 2) * distances_v[i,j] \
|
|
142
|
+
- divergence_v[i] - divergence_v[j]
|
|
143
|
+
|
|
144
|
+
# Find minimum corrected distance
|
|
145
|
+
dist_min = MAX_FLOAT
|
|
146
|
+
i_min = -1
|
|
147
|
+
j_min = -1
|
|
148
|
+
for i in range(corr_distances_v.shape[0]):
|
|
149
|
+
if is_clustered_v[i]:
|
|
150
|
+
continue
|
|
151
|
+
for j in range(i):
|
|
152
|
+
if is_clustered_v[j]:
|
|
153
|
+
continue
|
|
154
|
+
dist = corr_distances_v[i,j]
|
|
155
|
+
if dist < dist_min:
|
|
156
|
+
dist_min = dist
|
|
157
|
+
i_min = i
|
|
158
|
+
j_min = j
|
|
159
|
+
|
|
160
|
+
# Check if all nodes have been clustered
|
|
161
|
+
if i_min == -1 or j_min == -1:
|
|
162
|
+
# No distance found -> all leaf nodes are clustered
|
|
163
|
+
# -> exit loop
|
|
164
|
+
break
|
|
165
|
+
|
|
166
|
+
# Cluster the nodes with minimum distance
|
|
167
|
+
# replacing the node at position i_min
|
|
168
|
+
# leaving the node at position j_min empty
|
|
169
|
+
# (is_clustered_v -> True)
|
|
170
|
+
node_dist_i = 0.5 * (
|
|
171
|
+
distances_v[i_min,j_min]
|
|
172
|
+
+ 1/(n_rem_nodes-2) * (divergence_v[i_min] - divergence_v[j_min])
|
|
173
|
+
)
|
|
174
|
+
node_dist_j = 0.5 * (
|
|
175
|
+
distances_v[i_min,j_min]
|
|
176
|
+
+ 1/(n_rem_nodes-2) * (divergence_v[j_min] - divergence_v[i_min])
|
|
177
|
+
)
|
|
178
|
+
if n_rem_nodes > 3:
|
|
179
|
+
# Clustering is not finished
|
|
180
|
+
# -> Create a node with two children
|
|
181
|
+
nodes[i_min] = TreeNode(
|
|
182
|
+
(nodes[i_min], nodes[j_min]),
|
|
183
|
+
(node_dist_i, node_dist_j)
|
|
184
|
+
)
|
|
185
|
+
# Mark position j_min as clustered
|
|
186
|
+
nodes[j_min] = None
|
|
187
|
+
is_clustered_v[j_min] = True
|
|
188
|
+
else:
|
|
189
|
+
# Clustering is finished
|
|
190
|
+
# Combine ast three nodes into root node
|
|
191
|
+
# Find the index of the remaining one of the three nodes
|
|
192
|
+
# (other than i_min and j_min)
|
|
193
|
+
is_clustered_v[i_min] = True
|
|
194
|
+
is_clustered_v[j_min] = True
|
|
195
|
+
# The index of the remaining one
|
|
196
|
+
k = np.where(~np.asarray(is_clustered_v, dtype=bool))[0][0]
|
|
197
|
+
node_dist_k = 0.5 * (
|
|
198
|
+
distances_v[i_min,k] + distances_v[j_min,k]
|
|
199
|
+
- distances_v[i_min,j_min]
|
|
200
|
+
)
|
|
201
|
+
root = TreeNode(
|
|
202
|
+
(nodes[i_min], nodes[j_min], nodes[k]),
|
|
203
|
+
(node_dist_i, node_dist_j, node_dist_k)
|
|
204
|
+
)
|
|
205
|
+
# Clustering is finished -> put into tree and return
|
|
206
|
+
return Tree(root)
|
|
207
|
+
|
|
208
|
+
# Update distance matrix
|
|
209
|
+
# Calculate distances of new node to all other nodes
|
|
210
|
+
for k in range(distances_v.shape[0]):
|
|
211
|
+
if not is_clustered_v[k] and k != i_min:
|
|
212
|
+
dist = 0.5 * (
|
|
213
|
+
distances_v[i_min,k] + distances_v[j_min,k]
|
|
214
|
+
- distances_v[i_min,j_min]
|
|
215
|
+
)
|
|
216
|
+
distances_v[i_min,k] = dist
|
|
217
|
+
distances_v[k,i_min] = dist
|
|
218
|
+
|
|
219
|
+
# Update the amount of remaining nodes
|
|
220
|
+
n_rem_nodes = \
|
|
221
|
+
len(distances) - np.count_nonzero(np.asarray(is_clustered_v))
|
|
Binary file
|