phykit 2.1.34__tar.gz → 2.1.35__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phykit-2.1.34 → phykit-2.1.35}/PKG-INFO +1 -1
- {phykit-2.1.34 → phykit-2.1.35}/phykit/phykit.py +34 -15
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/ancestral_reconstruction.py +701 -1
- phykit-2.1.35/phykit/version.py +1 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit.egg-info/PKG-INFO +1 -1
- phykit-2.1.34/phykit/version.py +0 -1
- {phykit-2.1.34 → phykit-2.1.35}/LICENSE.md +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/README.md +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/__init__.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/__main__.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/cli_registry.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/errors.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/helpers/__init__.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/helpers/boolean_argument_parsing.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/helpers/caching.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/helpers/files.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/helpers/json_output.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/helpers/parallel.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/helpers/stats_summary.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/helpers/streaming.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/service_factories.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/__init__.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/__init__.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/alignment_entropy.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/alignment_length.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/alignment_length_no_gaps.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/alignment_outlier_taxa.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/alignment_recoding.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/base.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/column_score.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/composition_per_taxon.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/compositional_bias_per_site.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/create_concatenation_matrix.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/dna_threader.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/evolutionary_rate_per_site.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/faidx.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/gc_content.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/mask_alignment.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/occupancy_per_taxon.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/pairwise_identity.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/parsimony_informative_sites.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/plot_alignment_qc.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/rcv.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/rcvt.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/rename_fasta_entries.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/sum_of_pairs_score.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/variable_sites.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/base.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/__init__.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/base.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/bipartition_support_stats.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/branch_length_multiplier.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/collapse_branches.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/concordance_asr.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/consensus_network.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/consensus_tree.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/cont_map.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/cophylo.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/covarying_evolutionary_rates.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/density_map.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/discordance_asymmetry.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/dvmc.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/evo_tempo_map.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/evolutionary_rate.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/fit_continuous.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/hidden_paralogy_check.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/internal_branch_stats.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/internode_labeler.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/last_common_ancestor_subtree.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/lb_score.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/ltt.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/monophyly_check.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/nearest_neighbor_interchange.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/network_signal.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/ou_shift_detection.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/ouwie.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/patristic_distances.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/phenogram.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/phylogenetic_glm.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/phylogenetic_ordination.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/phylogenetic_regression.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/phylogenetic_signal.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/phylomorphospace.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/polytomy_test.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/print_tree.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/prune_tree.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/quartet_network.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/rate_heterogeneity.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/relative_rate_test.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/rename_tree_tips.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/rf_distance.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/root_tree.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/saturation.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/spurious_sequence.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/stochastic_character_map.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/terminal_branch_stats.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/threshold_model.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/tip_labels.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/tip_to_tip_distance.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/tip_to_tip_node_distance.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/total_tree_length.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/treeness.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/treeness_over_rcv.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/vcv_utils.py +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit.egg-info/SOURCES.txt +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit.egg-info/dependency_links.txt +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit.egg-info/entry_points.txt +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit.egg-info/requires.txt +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/phykit.egg-info/top_level.txt +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/setup.cfg +0 -0
- {phykit-2.1.34 → phykit-2.1.35}/setup.py +0 -0
|
@@ -1474,19 +1474,24 @@ class Phykit:
|
|
|
1474
1474
|
f"""\
|
|
1475
1475
|
{help_header}
|
|
1476
1476
|
|
|
1477
|
-
Estimate ancestral states
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1477
|
+
Estimate ancestral states using maximum likelihood.
|
|
1478
|
+
|
|
1479
|
+
Supports two trait types:
|
|
1480
|
+
- continuous (default): Brownian Motion model, analogous
|
|
1481
|
+
to R's phytools::fastAnc() and ape::ace(type="ML").
|
|
1482
|
+
Optionally produce a contMap plot.
|
|
1483
|
+
- discrete: Mk model with marginal posterior probabilities
|
|
1484
|
+
at each internal node, analogous to ape::ace(type="discrete").
|
|
1485
|
+
Optionally produce a pie-chart phylogeny plot.
|
|
1486
|
+
|
|
1487
|
+
Continuous methods (--type continuous):
|
|
1488
|
+
- fast (default): Felsenstein's pruning/contrasts, O(n)
|
|
1486
1489
|
- ml: full VCV-based ML with exact conditional CIs, O(n^3)
|
|
1487
1490
|
|
|
1488
|
-
|
|
1489
|
-
|
|
1491
|
+
Discrete models (--type discrete):
|
|
1492
|
+
- ER (default): equal rates
|
|
1493
|
+
- SYM: symmetric rates
|
|
1494
|
+
- ARD: all rates different
|
|
1490
1495
|
|
|
1491
1496
|
Input trait data can be either:
|
|
1492
1497
|
(1) A two-column file (taxon<tab>value) when -c is omitted
|
|
@@ -1499,7 +1504,7 @@ class Phykit:
|
|
|
1499
1504
|
pk_ancestral_state_reconstruction, pk_asr, pk_anc_recon
|
|
1500
1505
|
|
|
1501
1506
|
Usage:
|
|
1502
|
-
phykit ancestral_state_reconstruction -t <tree> -d <trait_data> [-c <trait>] [-m <method>] [--ci] [--plot <output>] [--json]
|
|
1507
|
+
phykit ancestral_state_reconstruction -t <tree> -d <trait_data> [-c <trait>] [--type <type>] [-m <method>] [--model <model>] [--ci] [--plot <output>] [--json]
|
|
1503
1508
|
|
|
1504
1509
|
Options
|
|
1505
1510
|
=====================================================
|
|
@@ -1511,13 +1516,19 @@ class Phykit:
|
|
|
1511
1516
|
-c/--trait trait column name (required
|
|
1512
1517
|
for multi-trait files)
|
|
1513
1518
|
|
|
1519
|
+
--type trait type: continuous or
|
|
1520
|
+
discrete (default: continuous)
|
|
1521
|
+
|
|
1514
1522
|
-m/--method method to use: fast or ml
|
|
1515
|
-
(default: fast)
|
|
1523
|
+
(continuous only; default: fast)
|
|
1524
|
+
|
|
1525
|
+
--model Mk model: ER, SYM, or ARD
|
|
1526
|
+
(discrete only; default: ER)
|
|
1516
1527
|
|
|
1517
1528
|
--ci include 95% confidence
|
|
1518
|
-
intervals
|
|
1529
|
+
intervals (continuous only)
|
|
1519
1530
|
|
|
1520
|
-
--plot output path for
|
|
1531
|
+
--plot output path for plot
|
|
1521
1532
|
|
|
1522
1533
|
--json output results as JSON
|
|
1523
1534
|
"""
|
|
@@ -1533,10 +1544,18 @@ class Phykit:
|
|
|
1533
1544
|
"-c", "--trait", type=str, required=False, default=None,
|
|
1534
1545
|
help=SUPPRESS, metavar=""
|
|
1535
1546
|
)
|
|
1547
|
+
parser.add_argument(
|
|
1548
|
+
"--type", type=str, required=False, default="continuous",
|
|
1549
|
+
choices=["continuous", "discrete"], help=SUPPRESS, metavar=""
|
|
1550
|
+
)
|
|
1536
1551
|
parser.add_argument(
|
|
1537
1552
|
"-m", "--method", type=str, required=False, default="fast",
|
|
1538
1553
|
choices=["fast", "ml"], help=SUPPRESS, metavar=""
|
|
1539
1554
|
)
|
|
1555
|
+
parser.add_argument(
|
|
1556
|
+
"--model", type=str, required=False, default="ER",
|
|
1557
|
+
choices=["ER", "SYM", "ARD"], help=SUPPRESS, metavar=""
|
|
1558
|
+
)
|
|
1540
1559
|
parser.add_argument(
|
|
1541
1560
|
"--ci", action="store_true", required=False, help=SUPPRESS
|
|
1542
1561
|
)
|
|
@@ -4,6 +4,8 @@ import sys
|
|
|
4
4
|
from typing import Dict, List, Tuple
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
|
+
from scipy.linalg import expm
|
|
8
|
+
from scipy.optimize import minimize
|
|
7
9
|
|
|
8
10
|
from .base import Tree
|
|
9
11
|
from ...helpers.json_output import print_json
|
|
@@ -20,13 +22,19 @@ class AncestralReconstruction(Tree):
|
|
|
20
22
|
self.ci = parsed["ci"]
|
|
21
23
|
self.plot_output = parsed["plot_output"]
|
|
22
24
|
self.json_output = parsed["json_output"]
|
|
25
|
+
self.trait_type = parsed["trait_type"]
|
|
26
|
+
self.model = parsed["model"]
|
|
23
27
|
|
|
24
28
|
def run(self) -> None:
|
|
25
29
|
tree = self.read_tree_file()
|
|
26
30
|
self._validate_tree(tree)
|
|
27
|
-
|
|
28
31
|
tree_tips = self.get_tip_names_from_tree(tree)
|
|
32
|
+
if self.trait_type == "discrete":
|
|
33
|
+
self._run_discrete(tree, tree_tips)
|
|
34
|
+
else:
|
|
35
|
+
self._run_continuous(tree, tree_tips)
|
|
29
36
|
|
|
37
|
+
def _run_continuous(self, tree, tree_tips) -> None:
|
|
30
38
|
if self.trait_column is not None:
|
|
31
39
|
trait_values = self._parse_multi_trait_data(
|
|
32
40
|
self.trait_data_path, tree_tips, self.trait_column
|
|
@@ -106,6 +114,8 @@ class AncestralReconstruction(Tree):
|
|
|
106
114
|
ci=getattr(args, "ci", False),
|
|
107
115
|
plot_output=getattr(args, "plot", None),
|
|
108
116
|
json_output=getattr(args, "json", False),
|
|
117
|
+
trait_type=getattr(args, "type", "continuous"),
|
|
118
|
+
model=getattr(args, "model", "ER"),
|
|
109
119
|
)
|
|
110
120
|
|
|
111
121
|
def _validate_tree(self, tree) -> None:
|
|
@@ -904,3 +914,693 @@ class AncestralReconstruction(Tree):
|
|
|
904
914
|
fig.savefig(output_path, dpi=300, bbox_inches="tight")
|
|
905
915
|
plt.close(fig)
|
|
906
916
|
print(f"Saved contMap plot: {output_path}")
|
|
917
|
+
|
|
918
|
+
# ------------------------------------------------------------------
|
|
919
|
+
# Discrete trait parsers
|
|
920
|
+
# ------------------------------------------------------------------
|
|
921
|
+
|
|
922
|
+
def _parse_discrete_trait_data_single(
|
|
923
|
+
self, path: str, tree_tips: List[str]
|
|
924
|
+
) -> Dict[str, str]:
|
|
925
|
+
try:
|
|
926
|
+
with open(path) as f:
|
|
927
|
+
lines = f.readlines()
|
|
928
|
+
except FileNotFoundError:
|
|
929
|
+
raise PhykitUserError(
|
|
930
|
+
[
|
|
931
|
+
f"{path} corresponds to no such file or directory.",
|
|
932
|
+
"Please check filename and pathing",
|
|
933
|
+
],
|
|
934
|
+
code=2,
|
|
935
|
+
)
|
|
936
|
+
|
|
937
|
+
traits: Dict[str, str] = {}
|
|
938
|
+
for line_num, line in enumerate(lines, 1):
|
|
939
|
+
line = line.strip()
|
|
940
|
+
if not line or line.startswith("#"):
|
|
941
|
+
continue
|
|
942
|
+
parts = line.split("\t")
|
|
943
|
+
if len(parts) != 2:
|
|
944
|
+
raise PhykitUserError(
|
|
945
|
+
[
|
|
946
|
+
f"Line {line_num} in trait file has {len(parts)} columns; expected 2.",
|
|
947
|
+
"Each line should be: taxon_name<tab>trait_value",
|
|
948
|
+
],
|
|
949
|
+
code=2,
|
|
950
|
+
)
|
|
951
|
+
taxon, value = parts
|
|
952
|
+
traits[taxon] = value
|
|
953
|
+
|
|
954
|
+
tree_tip_set = set(tree_tips)
|
|
955
|
+
trait_taxa_set = set(traits.keys())
|
|
956
|
+
shared = tree_tip_set & trait_taxa_set
|
|
957
|
+
|
|
958
|
+
tree_only = tree_tip_set - trait_taxa_set
|
|
959
|
+
trait_only = trait_taxa_set - tree_tip_set
|
|
960
|
+
|
|
961
|
+
if tree_only:
|
|
962
|
+
print(
|
|
963
|
+
f"Warning: {len(tree_only)} taxa in tree but not in trait file: "
|
|
964
|
+
f"{', '.join(sorted(tree_only))}",
|
|
965
|
+
file=sys.stderr,
|
|
966
|
+
)
|
|
967
|
+
if trait_only:
|
|
968
|
+
print(
|
|
969
|
+
f"Warning: {len(trait_only)} taxa in trait file but not in tree: "
|
|
970
|
+
f"{', '.join(sorted(trait_only))}",
|
|
971
|
+
file=sys.stderr,
|
|
972
|
+
)
|
|
973
|
+
|
|
974
|
+
if len(shared) < 3:
|
|
975
|
+
raise PhykitUserError(
|
|
976
|
+
[
|
|
977
|
+
f"Only {len(shared)} shared taxa between tree and trait file.",
|
|
978
|
+
"At least 3 shared taxa are required.",
|
|
979
|
+
],
|
|
980
|
+
code=2,
|
|
981
|
+
)
|
|
982
|
+
|
|
983
|
+
return {taxon: traits[taxon] for taxon in shared}
|
|
984
|
+
|
|
985
|
+
def _parse_discrete_trait_data_multi(
|
|
986
|
+
self, path: str, tree_tips: List[str], trait_column: str
|
|
987
|
+
) -> Dict[str, str]:
|
|
988
|
+
try:
|
|
989
|
+
with open(path) as f:
|
|
990
|
+
lines = f.readlines()
|
|
991
|
+
except FileNotFoundError:
|
|
992
|
+
raise PhykitUserError(
|
|
993
|
+
[
|
|
994
|
+
f"{path} corresponds to no such file or directory.",
|
|
995
|
+
"Please check filename and pathing",
|
|
996
|
+
],
|
|
997
|
+
code=2,
|
|
998
|
+
)
|
|
999
|
+
|
|
1000
|
+
data_lines = []
|
|
1001
|
+
for line in lines:
|
|
1002
|
+
stripped = line.strip()
|
|
1003
|
+
if not stripped or stripped.startswith("#"):
|
|
1004
|
+
continue
|
|
1005
|
+
data_lines.append(stripped)
|
|
1006
|
+
|
|
1007
|
+
if len(data_lines) < 2:
|
|
1008
|
+
raise PhykitUserError(
|
|
1009
|
+
[
|
|
1010
|
+
"Multi-trait file must have a header row and at least one data row.",
|
|
1011
|
+
],
|
|
1012
|
+
code=2,
|
|
1013
|
+
)
|
|
1014
|
+
|
|
1015
|
+
header_parts = data_lines[0].split("\t")
|
|
1016
|
+
if len(header_parts) < 2:
|
|
1017
|
+
raise PhykitUserError(
|
|
1018
|
+
[
|
|
1019
|
+
"Header must have at least 2 columns (taxon + at least 1 trait).",
|
|
1020
|
+
],
|
|
1021
|
+
code=2,
|
|
1022
|
+
)
|
|
1023
|
+
trait_names = header_parts[1:]
|
|
1024
|
+
|
|
1025
|
+
if trait_column not in trait_names:
|
|
1026
|
+
raise PhykitUserError(
|
|
1027
|
+
[
|
|
1028
|
+
f"Column '{trait_column}' not found in trait file.",
|
|
1029
|
+
f"Available columns: {', '.join(trait_names)}",
|
|
1030
|
+
],
|
|
1031
|
+
code=2,
|
|
1032
|
+
)
|
|
1033
|
+
|
|
1034
|
+
col_idx = trait_names.index(trait_column)
|
|
1035
|
+
|
|
1036
|
+
traits: Dict[str, str] = {}
|
|
1037
|
+
for line_idx, line in enumerate(data_lines[1:], 2):
|
|
1038
|
+
parts = line.split("\t")
|
|
1039
|
+
if len(parts) != len(header_parts):
|
|
1040
|
+
raise PhykitUserError(
|
|
1041
|
+
[
|
|
1042
|
+
f"Line {line_idx} has {len(parts)} columns; expected {len(header_parts)}.",
|
|
1043
|
+
],
|
|
1044
|
+
code=2,
|
|
1045
|
+
)
|
|
1046
|
+
taxon = parts[0]
|
|
1047
|
+
traits[taxon] = parts[1 + col_idx]
|
|
1048
|
+
|
|
1049
|
+
tree_tip_set = set(tree_tips)
|
|
1050
|
+
trait_taxa_set = set(traits.keys())
|
|
1051
|
+
shared = tree_tip_set & trait_taxa_set
|
|
1052
|
+
|
|
1053
|
+
tree_only = tree_tip_set - trait_taxa_set
|
|
1054
|
+
trait_only = trait_taxa_set - tree_tip_set
|
|
1055
|
+
|
|
1056
|
+
if tree_only:
|
|
1057
|
+
print(
|
|
1058
|
+
f"Warning: {len(tree_only)} taxa in tree but not in trait file: "
|
|
1059
|
+
f"{', '.join(sorted(tree_only))}",
|
|
1060
|
+
file=sys.stderr,
|
|
1061
|
+
)
|
|
1062
|
+
if trait_only:
|
|
1063
|
+
print(
|
|
1064
|
+
f"Warning: {len(trait_only)} taxa in trait file but not in tree: "
|
|
1065
|
+
f"{', '.join(sorted(trait_only))}",
|
|
1066
|
+
file=sys.stderr,
|
|
1067
|
+
)
|
|
1068
|
+
|
|
1069
|
+
if len(shared) < 3:
|
|
1070
|
+
raise PhykitUserError(
|
|
1071
|
+
[
|
|
1072
|
+
f"Only {len(shared)} shared taxa between tree and trait file.",
|
|
1073
|
+
"At least 3 shared taxa are required.",
|
|
1074
|
+
],
|
|
1075
|
+
code=2,
|
|
1076
|
+
)
|
|
1077
|
+
|
|
1078
|
+
return {taxon: traits[taxon] for taxon in shared}
|
|
1079
|
+
|
|
1080
|
+
# ------------------------------------------------------------------
|
|
1081
|
+
# Mk model primitives (shared with StochasticCharacterMap)
|
|
1082
|
+
# ------------------------------------------------------------------
|
|
1083
|
+
|
|
1084
|
+
def _build_q_matrix(
|
|
1085
|
+
self, params: np.ndarray, k: int, model: str
|
|
1086
|
+
) -> np.ndarray:
|
|
1087
|
+
Q = np.zeros((k, k))
|
|
1088
|
+
if model == "ER":
|
|
1089
|
+
rate = params[0]
|
|
1090
|
+
Q[:] = rate
|
|
1091
|
+
np.fill_diagonal(Q, 0.0)
|
|
1092
|
+
elif model == "SYM":
|
|
1093
|
+
idx = 0
|
|
1094
|
+
for i in range(k):
|
|
1095
|
+
for j in range(i + 1, k):
|
|
1096
|
+
Q[i, j] = params[idx]
|
|
1097
|
+
Q[j, i] = params[idx]
|
|
1098
|
+
idx += 1
|
|
1099
|
+
elif model == "ARD":
|
|
1100
|
+
idx = 0
|
|
1101
|
+
for i in range(k):
|
|
1102
|
+
for j in range(k):
|
|
1103
|
+
if i != j:
|
|
1104
|
+
Q[i, j] = params[idx]
|
|
1105
|
+
idx += 1
|
|
1106
|
+
# Set diagonal
|
|
1107
|
+
for i in range(k):
|
|
1108
|
+
Q[i, i] = -np.sum(Q[i, :])
|
|
1109
|
+
return Q
|
|
1110
|
+
|
|
1111
|
+
def _matrix_exp(self, Q: np.ndarray, t: float) -> np.ndarray:
|
|
1112
|
+
return expm(Q * t)
|
|
1113
|
+
|
|
1114
|
+
def _felsenstein_pruning(
|
|
1115
|
+
self, tree, tip_states: Dict[str, str], Q: np.ndarray,
|
|
1116
|
+
pi: np.ndarray, states: List[str]
|
|
1117
|
+
) -> Tuple[Dict, float]:
|
|
1118
|
+
k = len(states)
|
|
1119
|
+
state_idx = {s: i for i, s in enumerate(states)}
|
|
1120
|
+
cond_liks: Dict[int, np.ndarray] = {}
|
|
1121
|
+
|
|
1122
|
+
for clade in tree.find_clades(order="postorder"):
|
|
1123
|
+
if clade.is_terminal():
|
|
1124
|
+
lik = np.zeros(k)
|
|
1125
|
+
if clade.name in tip_states:
|
|
1126
|
+
lik[state_idx[tip_states[clade.name]]] = 1.0
|
|
1127
|
+
cond_liks[id(clade)] = lik
|
|
1128
|
+
else:
|
|
1129
|
+
lik = np.ones(k)
|
|
1130
|
+
for child in clade.clades:
|
|
1131
|
+
t = child.branch_length if child.branch_length else 1e-8
|
|
1132
|
+
P = self._matrix_exp(Q, t)
|
|
1133
|
+
child_lik = cond_liks[id(child)]
|
|
1134
|
+
lik *= P @ child_lik
|
|
1135
|
+
cond_liks[id(clade)] = lik
|
|
1136
|
+
|
|
1137
|
+
root_lik = cond_liks[id(tree.root)]
|
|
1138
|
+
total_lik = np.sum(pi * root_lik)
|
|
1139
|
+
if total_lik <= 0:
|
|
1140
|
+
loglik = -1e20
|
|
1141
|
+
else:
|
|
1142
|
+
loglik = np.log(total_lik)
|
|
1143
|
+
|
|
1144
|
+
return cond_liks, loglik
|
|
1145
|
+
|
|
1146
|
+
def _fit_q_matrix(
|
|
1147
|
+
self, tree, tip_states: Dict[str, str],
|
|
1148
|
+
states: List[str], model: str
|
|
1149
|
+
) -> Tuple[np.ndarray, float]:
|
|
1150
|
+
k = len(states)
|
|
1151
|
+
|
|
1152
|
+
if model == "ER":
|
|
1153
|
+
n_params = 1
|
|
1154
|
+
elif model == "SYM":
|
|
1155
|
+
n_params = k * (k - 1) // 2
|
|
1156
|
+
elif model == "ARD":
|
|
1157
|
+
n_params = k * (k - 1)
|
|
1158
|
+
else:
|
|
1159
|
+
raise PhykitUserError(
|
|
1160
|
+
[f"Unknown model '{model}'. Use ER, SYM, or ARD."],
|
|
1161
|
+
code=2,
|
|
1162
|
+
)
|
|
1163
|
+
|
|
1164
|
+
pi = np.ones(k) / k
|
|
1165
|
+
|
|
1166
|
+
def neg_loglik(params):
|
|
1167
|
+
Q = self._build_q_matrix(np.abs(params), k, model)
|
|
1168
|
+
_, ll = self._felsenstein_pruning(tree, tip_states, Q, pi, states)
|
|
1169
|
+
return -ll
|
|
1170
|
+
|
|
1171
|
+
bounds = [(1e-8, 100.0)] * n_params
|
|
1172
|
+
|
|
1173
|
+
# Multi-start optimization for robustness
|
|
1174
|
+
starting_values = [0.001, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0]
|
|
1175
|
+
best_negll = np.inf
|
|
1176
|
+
best_params = np.ones(n_params) * 0.1
|
|
1177
|
+
|
|
1178
|
+
for sv in starting_values:
|
|
1179
|
+
x0 = np.ones(n_params) * sv
|
|
1180
|
+
for opt_method in ["L-BFGS-B", "Nelder-Mead"]:
|
|
1181
|
+
try:
|
|
1182
|
+
kwargs = {"method": opt_method}
|
|
1183
|
+
if opt_method == "L-BFGS-B":
|
|
1184
|
+
kwargs["bounds"] = bounds
|
|
1185
|
+
result = minimize(neg_loglik, x0, **kwargs)
|
|
1186
|
+
if result.fun < best_negll:
|
|
1187
|
+
best_negll = result.fun
|
|
1188
|
+
best_params = np.abs(result.x)
|
|
1189
|
+
except (ValueError, np.linalg.LinAlgError):
|
|
1190
|
+
continue
|
|
1191
|
+
|
|
1192
|
+
# Refine best result with Nelder-Mead
|
|
1193
|
+
try:
|
|
1194
|
+
result = minimize(
|
|
1195
|
+
neg_loglik, best_params, method="Nelder-Mead",
|
|
1196
|
+
options={"maxiter": 10000, "xatol": 1e-10, "fatol": 1e-10},
|
|
1197
|
+
)
|
|
1198
|
+
if result.fun < best_negll:
|
|
1199
|
+
best_params = np.abs(result.x)
|
|
1200
|
+
except (ValueError, np.linalg.LinAlgError):
|
|
1201
|
+
pass
|
|
1202
|
+
|
|
1203
|
+
Q = self._build_q_matrix(best_params, k, model)
|
|
1204
|
+
_, loglik = self._felsenstein_pruning(tree, tip_states, Q, pi, states)
|
|
1205
|
+
|
|
1206
|
+
return Q, loglik
|
|
1207
|
+
|
|
1208
|
+
# ------------------------------------------------------------------
|
|
1209
|
+
# Discrete marginal posteriors (upward-downward belief propagation)
|
|
1210
|
+
# ------------------------------------------------------------------
|
|
1211
|
+
|
|
1212
|
+
def _discrete_marginal_posteriors(
|
|
1213
|
+
self, tree, tip_states: Dict[str, str], Q: np.ndarray,
|
|
1214
|
+
states: List[str],
|
|
1215
|
+
) -> Dict[int, np.ndarray]:
|
|
1216
|
+
"""Compute marginal posterior probabilities for all internal nodes.
|
|
1217
|
+
|
|
1218
|
+
Uses upward-downward (inside-outside) belief propagation:
|
|
1219
|
+
1. Downward pass (postorder): Felsenstein pruning for conditional likelihoods
|
|
1220
|
+
2. Upward pass (preorder): propagate information from rest of tree
|
|
1221
|
+
3. Combine: posterior_v[s] proportional to L_v[s] * U_v[s]
|
|
1222
|
+
"""
|
|
1223
|
+
k = len(states)
|
|
1224
|
+
pi = np.ones(k) / k
|
|
1225
|
+
|
|
1226
|
+
# Downward pass
|
|
1227
|
+
cond_liks, _ = self._felsenstein_pruning(tree, tip_states, Q, pi, states)
|
|
1228
|
+
|
|
1229
|
+
# Build parent map
|
|
1230
|
+
parent_map = self._build_parent_map(tree)
|
|
1231
|
+
|
|
1232
|
+
# Upward pass (preorder)
|
|
1233
|
+
upward = {}
|
|
1234
|
+
root = tree.root
|
|
1235
|
+
upward[id(root)] = pi.copy()
|
|
1236
|
+
|
|
1237
|
+
for clade in tree.find_clades(order="preorder"):
|
|
1238
|
+
if clade == root:
|
|
1239
|
+
continue
|
|
1240
|
+
if id(clade) not in parent_map:
|
|
1241
|
+
continue
|
|
1242
|
+
|
|
1243
|
+
parent = parent_map[id(clade)]
|
|
1244
|
+
U_parent = upward[id(parent)]
|
|
1245
|
+
|
|
1246
|
+
# Product of sibling messages
|
|
1247
|
+
sibling_product = np.ones(k)
|
|
1248
|
+
for sibling in parent.clades:
|
|
1249
|
+
if id(sibling) == id(clade):
|
|
1250
|
+
continue
|
|
1251
|
+
t_sib = sibling.branch_length if sibling.branch_length else 1e-8
|
|
1252
|
+
P_sib = self._matrix_exp(Q, t_sib)
|
|
1253
|
+
sibling_product *= P_sib @ cond_liks[id(sibling)]
|
|
1254
|
+
|
|
1255
|
+
# Upward message for this node
|
|
1256
|
+
t_v = clade.branch_length if clade.branch_length else 1e-8
|
|
1257
|
+
P_v = self._matrix_exp(Q, t_v)
|
|
1258
|
+
upward[id(clade)] = P_v.T @ (U_parent * sibling_product)
|
|
1259
|
+
|
|
1260
|
+
# Normalize to prevent underflow
|
|
1261
|
+
s = np.sum(upward[id(clade)])
|
|
1262
|
+
if s > 0:
|
|
1263
|
+
upward[id(clade)] /= s
|
|
1264
|
+
|
|
1265
|
+
# Combine downward and upward for posteriors
|
|
1266
|
+
posteriors: Dict[int, np.ndarray] = {}
|
|
1267
|
+
for clade in tree.find_clades(order="preorder"):
|
|
1268
|
+
if clade.is_terminal():
|
|
1269
|
+
continue
|
|
1270
|
+
L = cond_liks[id(clade)]
|
|
1271
|
+
U = upward.get(id(clade), pi)
|
|
1272
|
+
raw = L * U
|
|
1273
|
+
total = np.sum(raw)
|
|
1274
|
+
if total > 0:
|
|
1275
|
+
posteriors[id(clade)] = raw / total
|
|
1276
|
+
else:
|
|
1277
|
+
posteriors[id(clade)] = np.ones(k) / k
|
|
1278
|
+
|
|
1279
|
+
return posteriors
|
|
1280
|
+
|
|
1281
|
+
# ------------------------------------------------------------------
|
|
1282
|
+
# Discrete ASR orchestration
|
|
1283
|
+
# ------------------------------------------------------------------
|
|
1284
|
+
|
|
1285
|
+
def _run_discrete(self, tree, tree_tips) -> None:
|
|
1286
|
+
# Parse discrete traits
|
|
1287
|
+
if self.trait_column is not None:
|
|
1288
|
+
tip_states = self._parse_discrete_trait_data_multi(
|
|
1289
|
+
self.trait_data_path, tree_tips, self.trait_column
|
|
1290
|
+
)
|
|
1291
|
+
trait_name = self.trait_column
|
|
1292
|
+
else:
|
|
1293
|
+
tip_states = self._parse_discrete_trait_data_single(
|
|
1294
|
+
self.trait_data_path, tree_tips
|
|
1295
|
+
)
|
|
1296
|
+
trait_name = "trait"
|
|
1297
|
+
|
|
1298
|
+
# Prune tree to shared taxa
|
|
1299
|
+
tree_copy = copy.deepcopy(tree)
|
|
1300
|
+
tip_names_in_tree = [t.name for t in tree_copy.get_terminals()]
|
|
1301
|
+
tips_to_prune = [t for t in tip_names_in_tree if t not in tip_states]
|
|
1302
|
+
if tips_to_prune:
|
|
1303
|
+
tree_copy = self.prune_tree_using_taxa_list(tree_copy, tips_to_prune)
|
|
1304
|
+
|
|
1305
|
+
# Label internal nodes
|
|
1306
|
+
node_labels = self._label_internal_nodes(tree_copy)
|
|
1307
|
+
|
|
1308
|
+
# Get sorted unique states
|
|
1309
|
+
states = sorted(set(tip_states.values()))
|
|
1310
|
+
|
|
1311
|
+
# Fit Q matrix
|
|
1312
|
+
Q, log_likelihood = self._fit_q_matrix(
|
|
1313
|
+
tree_copy, tip_states, states, self.model
|
|
1314
|
+
)
|
|
1315
|
+
|
|
1316
|
+
# Compute marginal posteriors
|
|
1317
|
+
node_posteriors = self._discrete_marginal_posteriors(
|
|
1318
|
+
tree_copy, tip_states, Q, states
|
|
1319
|
+
)
|
|
1320
|
+
|
|
1321
|
+
n_tips = len(tip_states)
|
|
1322
|
+
|
|
1323
|
+
# Build result
|
|
1324
|
+
result = self._format_discrete_result(
|
|
1325
|
+
model=self.model,
|
|
1326
|
+
trait_name=trait_name,
|
|
1327
|
+
n_tips=n_tips,
|
|
1328
|
+
log_likelihood=log_likelihood,
|
|
1329
|
+
states=states,
|
|
1330
|
+
Q=Q,
|
|
1331
|
+
node_posteriors=node_posteriors,
|
|
1332
|
+
node_labels=node_labels,
|
|
1333
|
+
tree=tree_copy,
|
|
1334
|
+
tip_states=tip_states,
|
|
1335
|
+
)
|
|
1336
|
+
|
|
1337
|
+
if self.plot_output:
|
|
1338
|
+
self._plot_discrete_asr(
|
|
1339
|
+
tree_copy, node_posteriors, node_labels,
|
|
1340
|
+
states, tip_states, self.plot_output
|
|
1341
|
+
)
|
|
1342
|
+
result["plot_output"] = self.plot_output
|
|
1343
|
+
|
|
1344
|
+
if self.json_output:
|
|
1345
|
+
print_json(result)
|
|
1346
|
+
else:
|
|
1347
|
+
self._print_discrete_text_output(
|
|
1348
|
+
model=self.model,
|
|
1349
|
+
trait_name=trait_name,
|
|
1350
|
+
n_tips=n_tips,
|
|
1351
|
+
log_likelihood=log_likelihood,
|
|
1352
|
+
states=states,
|
|
1353
|
+
Q=Q,
|
|
1354
|
+
node_posteriors=node_posteriors,
|
|
1355
|
+
node_labels=node_labels,
|
|
1356
|
+
tree=tree_copy,
|
|
1357
|
+
)
|
|
1358
|
+
|
|
1359
|
+
# ------------------------------------------------------------------
|
|
1360
|
+
# Discrete output formatting
|
|
1361
|
+
# ------------------------------------------------------------------
|
|
1362
|
+
|
|
1363
|
+
def _format_discrete_result(
|
|
1364
|
+
self, *, model, trait_name, n_tips, log_likelihood,
|
|
1365
|
+
states, Q, node_posteriors, node_labels, tree, tip_states,
|
|
1366
|
+
) -> Dict:
|
|
1367
|
+
k = len(states)
|
|
1368
|
+
q_matrix = {}
|
|
1369
|
+
for i in range(k):
|
|
1370
|
+
row = {}
|
|
1371
|
+
for j in range(k):
|
|
1372
|
+
row[states[j]] = float(Q[i, j])
|
|
1373
|
+
q_matrix[states[i]] = row
|
|
1374
|
+
|
|
1375
|
+
ancestral_states = {}
|
|
1376
|
+
root_id = id(tree.root)
|
|
1377
|
+
|
|
1378
|
+
for clade in tree.find_clades(order="preorder"):
|
|
1379
|
+
if clade.is_terminal():
|
|
1380
|
+
continue
|
|
1381
|
+
if id(clade) not in node_labels:
|
|
1382
|
+
continue
|
|
1383
|
+
label = node_labels[id(clade)]
|
|
1384
|
+
if id(clade) not in node_posteriors:
|
|
1385
|
+
continue
|
|
1386
|
+
|
|
1387
|
+
posterior = node_posteriors[id(clade)]
|
|
1388
|
+
map_idx = int(np.argmax(posterior))
|
|
1389
|
+
state_probs = {
|
|
1390
|
+
states[i]: float(posterior[i]) for i in range(k)
|
|
1391
|
+
}
|
|
1392
|
+
|
|
1393
|
+
ancestral_states[label] = {
|
|
1394
|
+
"map_state": states[map_idx],
|
|
1395
|
+
"posteriors": state_probs,
|
|
1396
|
+
"descendants": self._get_descendant_tips(tree, clade),
|
|
1397
|
+
"is_root": id(clade) == root_id,
|
|
1398
|
+
}
|
|
1399
|
+
|
|
1400
|
+
return {
|
|
1401
|
+
"method": "discrete",
|
|
1402
|
+
"model": model,
|
|
1403
|
+
"trait": trait_name,
|
|
1404
|
+
"n_tips": n_tips,
|
|
1405
|
+
"log_likelihood": float(log_likelihood),
|
|
1406
|
+
"states": states,
|
|
1407
|
+
"q_matrix": q_matrix,
|
|
1408
|
+
"ancestral_states": ancestral_states,
|
|
1409
|
+
"tip_states": {k: v for k, v in sorted(tip_states.items())},
|
|
1410
|
+
}
|
|
1411
|
+
|
|
1412
|
+
def _print_discrete_text_output(
|
|
1413
|
+
self, *, model, trait_name, n_tips, log_likelihood,
|
|
1414
|
+
states, Q, node_posteriors, node_labels, tree,
|
|
1415
|
+
) -> None:
|
|
1416
|
+
k = len(states)
|
|
1417
|
+
print("Ancestral State Reconstruction (Discrete)")
|
|
1418
|
+
print(f"\nModel: Mk ({model})")
|
|
1419
|
+
print(f"Trait: {trait_name}")
|
|
1420
|
+
print(f"Number of tips: {n_tips}")
|
|
1421
|
+
print(f"Number of states: {k}")
|
|
1422
|
+
print(f"States: {', '.join(states)}")
|
|
1423
|
+
print(f"\nLog-likelihood: {log_likelihood:.4f}")
|
|
1424
|
+
|
|
1425
|
+
# Q matrix table
|
|
1426
|
+
print("\nRate matrix (Q):")
|
|
1427
|
+
col_w = max(12, max(len(s) for s in states) + 2)
|
|
1428
|
+
header = f" {'':>{col_w}}" + "".join(f"{s:>{col_w}}" for s in states)
|
|
1429
|
+
print(header)
|
|
1430
|
+
for i, si in enumerate(states):
|
|
1431
|
+
row = f" {si:>{col_w}}"
|
|
1432
|
+
for j in range(k):
|
|
1433
|
+
row += f"{Q[i, j]:>{col_w}.6f}"
|
|
1434
|
+
print(row)
|
|
1435
|
+
|
|
1436
|
+
# Per-node table
|
|
1437
|
+
print("\nAncestral state posteriors:")
|
|
1438
|
+
root_id = id(tree.root)
|
|
1439
|
+
col_w_state = max(10, max(len(s) for s in states) + 2)
|
|
1440
|
+
header = f" {'Node':<12s}{'Desc':>6s}{'MAP':>{col_w_state}}"
|
|
1441
|
+
for s in states:
|
|
1442
|
+
header += f"{s:>{col_w_state}}"
|
|
1443
|
+
print(header)
|
|
1444
|
+
|
|
1445
|
+
for clade in tree.find_clades(order="preorder"):
|
|
1446
|
+
if clade.is_terminal():
|
|
1447
|
+
continue
|
|
1448
|
+
if id(clade) not in node_labels:
|
|
1449
|
+
continue
|
|
1450
|
+
label = node_labels[id(clade)]
|
|
1451
|
+
if id(clade) not in node_posteriors:
|
|
1452
|
+
continue
|
|
1453
|
+
|
|
1454
|
+
posterior = node_posteriors[id(clade)]
|
|
1455
|
+
map_idx = int(np.argmax(posterior))
|
|
1456
|
+
n_desc = len(self._get_descendant_tips(tree, clade))
|
|
1457
|
+
root_tag = " (root)" if id(clade) == root_id else ""
|
|
1458
|
+
|
|
1459
|
+
row = f" {label + root_tag:<12s}{n_desc:>6d}{states[map_idx]:>{col_w_state}}"
|
|
1460
|
+
for i in range(k):
|
|
1461
|
+
row += f"{posterior[i]:>{col_w_state}.4f}"
|
|
1462
|
+
print(row)
|
|
1463
|
+
|
|
1464
|
+
# ------------------------------------------------------------------
|
|
1465
|
+
# Discrete ASR plot
|
|
1466
|
+
# ------------------------------------------------------------------
|
|
1467
|
+
|
|
1468
|
+
def _plot_discrete_asr(
|
|
1469
|
+
self, tree, node_posteriors, node_labels, states,
|
|
1470
|
+
tip_states, output_path,
|
|
1471
|
+
) -> None:
|
|
1472
|
+
try:
|
|
1473
|
+
import matplotlib
|
|
1474
|
+
matplotlib.use("Agg")
|
|
1475
|
+
import matplotlib.pyplot as plt
|
|
1476
|
+
from matplotlib.patches import Wedge
|
|
1477
|
+
except ImportError:
|
|
1478
|
+
print(
|
|
1479
|
+
"matplotlib is required for discrete ASR plotting. "
|
|
1480
|
+
"Install matplotlib and retry."
|
|
1481
|
+
)
|
|
1482
|
+
raise SystemExit(2)
|
|
1483
|
+
|
|
1484
|
+
parent_map = self._build_parent_map(tree)
|
|
1485
|
+
tips = list(tree.get_terminals())
|
|
1486
|
+
k = len(states)
|
|
1487
|
+
|
|
1488
|
+
# Color palette for states
|
|
1489
|
+
if k <= 10:
|
|
1490
|
+
cmap = plt.get_cmap("tab10")
|
|
1491
|
+
colors = [cmap(i) for i in range(k)]
|
|
1492
|
+
else:
|
|
1493
|
+
cmap = plt.get_cmap("tab20")
|
|
1494
|
+
colors = [cmap(i) for i in range(k)]
|
|
1495
|
+
state_colors = {states[i]: colors[i] for i in range(k)}
|
|
1496
|
+
|
|
1497
|
+
node_x = {}
|
|
1498
|
+
node_y = {}
|
|
1499
|
+
|
|
1500
|
+
# Assign tip y-positions
|
|
1501
|
+
for i, tip in enumerate(tips):
|
|
1502
|
+
node_y[id(tip)] = i
|
|
1503
|
+
|
|
1504
|
+
# Assign x-positions via preorder traversal
|
|
1505
|
+
root = tree.root
|
|
1506
|
+
for clade in tree.find_clades(order="preorder"):
|
|
1507
|
+
if clade == root:
|
|
1508
|
+
node_x[id(clade)] = 0.0
|
|
1509
|
+
else:
|
|
1510
|
+
if id(clade) in parent_map:
|
|
1511
|
+
parent = parent_map[id(clade)]
|
|
1512
|
+
t = clade.branch_length if clade.branch_length else 0.0
|
|
1513
|
+
node_x[id(clade)] = node_x[id(parent)] + t
|
|
1514
|
+
|
|
1515
|
+
# Internal y-positions (mean of children)
|
|
1516
|
+
for clade in tree.find_clades(order="postorder"):
|
|
1517
|
+
if not clade.is_terminal() and id(clade) not in node_y:
|
|
1518
|
+
child_ys = [
|
|
1519
|
+
node_y[id(c)] for c in clade.clades if id(c) in node_y
|
|
1520
|
+
]
|
|
1521
|
+
if child_ys:
|
|
1522
|
+
node_y[id(clade)] = np.mean(child_ys)
|
|
1523
|
+
else:
|
|
1524
|
+
node_y[id(clade)] = 0.0
|
|
1525
|
+
|
|
1526
|
+
fig, ax = plt.subplots(figsize=(10, max(4, len(tips) * 0.4)))
|
|
1527
|
+
|
|
1528
|
+
# Draw branches (gray)
|
|
1529
|
+
for clade in tree.find_clades(order="preorder"):
|
|
1530
|
+
if clade == root:
|
|
1531
|
+
continue
|
|
1532
|
+
if id(clade) not in parent_map:
|
|
1533
|
+
continue
|
|
1534
|
+
parent = parent_map[id(clade)]
|
|
1535
|
+
if id(parent) not in node_x or id(clade) not in node_x:
|
|
1536
|
+
continue
|
|
1537
|
+
|
|
1538
|
+
x0 = node_x[id(parent)]
|
|
1539
|
+
x1 = node_x[id(clade)]
|
|
1540
|
+
y0 = node_y[id(parent)]
|
|
1541
|
+
y1 = node_y[id(clade)]
|
|
1542
|
+
|
|
1543
|
+
# Horizontal segment
|
|
1544
|
+
ax.plot([x0, x1], [y1, y1], color="gray", lw=2, solid_capstyle="butt")
|
|
1545
|
+
# Vertical connector
|
|
1546
|
+
ax.plot([x0, x0], [y0, y1], color="gray", lw=2, solid_capstyle="butt")
|
|
1547
|
+
|
|
1548
|
+
# Pie charts at internal nodes
|
|
1549
|
+
max_x = max(node_x.values()) if node_x else 1.0
|
|
1550
|
+
pie_radius = max_x * 0.015
|
|
1551
|
+
|
|
1552
|
+
for clade in tree.find_clades(order="preorder"):
|
|
1553
|
+
if clade.is_terminal():
|
|
1554
|
+
continue
|
|
1555
|
+
if id(clade) not in node_posteriors:
|
|
1556
|
+
continue
|
|
1557
|
+
|
|
1558
|
+
cx = node_x[id(clade)]
|
|
1559
|
+
cy = node_y[id(clade)]
|
|
1560
|
+
posterior = node_posteriors[id(clade)]
|
|
1561
|
+
|
|
1562
|
+
# Draw pie chart using Wedge patches
|
|
1563
|
+
start_angle = 90.0
|
|
1564
|
+
for i in range(k):
|
|
1565
|
+
if posterior[i] < 1e-6:
|
|
1566
|
+
continue
|
|
1567
|
+
sweep = posterior[i] * 360.0
|
|
1568
|
+
wedge = Wedge(
|
|
1569
|
+
(cx, cy), pie_radius, start_angle, start_angle + sweep,
|
|
1570
|
+
facecolor=state_colors[states[i]], edgecolor="black",
|
|
1571
|
+
linewidth=0.5,
|
|
1572
|
+
)
|
|
1573
|
+
ax.add_patch(wedge)
|
|
1574
|
+
start_angle += sweep
|
|
1575
|
+
|
|
1576
|
+
# Tip labels with state color
|
|
1577
|
+
max_x_val = max(node_x.values()) if node_x else 0
|
|
1578
|
+
offset = max_x_val * 0.02
|
|
1579
|
+
for tip in tips:
|
|
1580
|
+
color = state_colors.get(tip_states.get(tip.name, ""), "black")
|
|
1581
|
+
ax.text(
|
|
1582
|
+
node_x[id(tip)] + offset, node_y[id(tip)],
|
|
1583
|
+
tip.name, va="center", fontsize=9, color=color,
|
|
1584
|
+
)
|
|
1585
|
+
|
|
1586
|
+
# Legend
|
|
1587
|
+
legend_handles = []
|
|
1588
|
+
for i, s in enumerate(states):
|
|
1589
|
+
legend_handles.append(
|
|
1590
|
+
plt.Line2D([0], [0], marker="o", color="w",
|
|
1591
|
+
markerfacecolor=state_colors[s], markersize=10,
|
|
1592
|
+
label=s)
|
|
1593
|
+
)
|
|
1594
|
+
ax.legend(handles=legend_handles, loc="upper left", framealpha=0.9)
|
|
1595
|
+
|
|
1596
|
+
ax.set_xlabel("Branch length")
|
|
1597
|
+
ax.set_yticks([])
|
|
1598
|
+
ax.spines["top"].set_visible(False)
|
|
1599
|
+
ax.spines["right"].set_visible(False)
|
|
1600
|
+
ax.spines["left"].set_visible(False)
|
|
1601
|
+
ax.set_title("Discrete Ancestral State Reconstruction")
|
|
1602
|
+
ax.set_aspect("auto")
|
|
1603
|
+
fig.tight_layout()
|
|
1604
|
+
fig.savefig(output_path, dpi=300, bbox_inches="tight")
|
|
1605
|
+
plt.close(fig)
|
|
1606
|
+
print(f"Saved discrete ASR plot: {output_path}")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "2.1.35"
|
phykit-2.1.34/phykit/version.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "2.1.34"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|