phykit 2.1.34__tar.gz → 2.1.35__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. {phykit-2.1.34 → phykit-2.1.35}/PKG-INFO +1 -1
  2. {phykit-2.1.34 → phykit-2.1.35}/phykit/phykit.py +34 -15
  3. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/ancestral_reconstruction.py +701 -1
  4. phykit-2.1.35/phykit/version.py +1 -0
  5. {phykit-2.1.34 → phykit-2.1.35}/phykit.egg-info/PKG-INFO +1 -1
  6. phykit-2.1.34/phykit/version.py +0 -1
  7. {phykit-2.1.34 → phykit-2.1.35}/LICENSE.md +0 -0
  8. {phykit-2.1.34 → phykit-2.1.35}/README.md +0 -0
  9. {phykit-2.1.34 → phykit-2.1.35}/phykit/__init__.py +0 -0
  10. {phykit-2.1.34 → phykit-2.1.35}/phykit/__main__.py +0 -0
  11. {phykit-2.1.34 → phykit-2.1.35}/phykit/cli_registry.py +0 -0
  12. {phykit-2.1.34 → phykit-2.1.35}/phykit/errors.py +0 -0
  13. {phykit-2.1.34 → phykit-2.1.35}/phykit/helpers/__init__.py +0 -0
  14. {phykit-2.1.34 → phykit-2.1.35}/phykit/helpers/boolean_argument_parsing.py +0 -0
  15. {phykit-2.1.34 → phykit-2.1.35}/phykit/helpers/caching.py +0 -0
  16. {phykit-2.1.34 → phykit-2.1.35}/phykit/helpers/files.py +0 -0
  17. {phykit-2.1.34 → phykit-2.1.35}/phykit/helpers/json_output.py +0 -0
  18. {phykit-2.1.34 → phykit-2.1.35}/phykit/helpers/parallel.py +0 -0
  19. {phykit-2.1.34 → phykit-2.1.35}/phykit/helpers/stats_summary.py +0 -0
  20. {phykit-2.1.34 → phykit-2.1.35}/phykit/helpers/streaming.py +0 -0
  21. {phykit-2.1.34 → phykit-2.1.35}/phykit/service_factories.py +0 -0
  22. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/__init__.py +0 -0
  23. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/__init__.py +0 -0
  24. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/alignment_entropy.py +0 -0
  25. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/alignment_length.py +0 -0
  26. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/alignment_length_no_gaps.py +0 -0
  27. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/alignment_outlier_taxa.py +0 -0
  28. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/alignment_recoding.py +0 -0
  29. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/base.py +0 -0
  30. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/column_score.py +0 -0
  31. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/composition_per_taxon.py +0 -0
  32. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/compositional_bias_per_site.py +0 -0
  33. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/create_concatenation_matrix.py +0 -0
  34. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/dna_threader.py +0 -0
  35. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/evolutionary_rate_per_site.py +0 -0
  36. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/faidx.py +0 -0
  37. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/gc_content.py +0 -0
  38. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/mask_alignment.py +0 -0
  39. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/occupancy_per_taxon.py +0 -0
  40. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/pairwise_identity.py +0 -0
  41. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/parsimony_informative_sites.py +0 -0
  42. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/plot_alignment_qc.py +0 -0
  43. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/rcv.py +0 -0
  44. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/rcvt.py +0 -0
  45. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/rename_fasta_entries.py +0 -0
  46. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/sum_of_pairs_score.py +0 -0
  47. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/alignment/variable_sites.py +0 -0
  48. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/base.py +0 -0
  49. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/__init__.py +0 -0
  50. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/base.py +0 -0
  51. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/bipartition_support_stats.py +0 -0
  52. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/branch_length_multiplier.py +0 -0
  53. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/collapse_branches.py +0 -0
  54. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/concordance_asr.py +0 -0
  55. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/consensus_network.py +0 -0
  56. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/consensus_tree.py +0 -0
  57. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/cont_map.py +0 -0
  58. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/cophylo.py +0 -0
  59. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/covarying_evolutionary_rates.py +0 -0
  60. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/density_map.py +0 -0
  61. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/discordance_asymmetry.py +0 -0
  62. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/dvmc.py +0 -0
  63. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/evo_tempo_map.py +0 -0
  64. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/evolutionary_rate.py +0 -0
  65. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/fit_continuous.py +0 -0
  66. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/hidden_paralogy_check.py +0 -0
  67. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/internal_branch_stats.py +0 -0
  68. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/internode_labeler.py +0 -0
  69. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/last_common_ancestor_subtree.py +0 -0
  70. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/lb_score.py +0 -0
  71. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/ltt.py +0 -0
  72. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/monophyly_check.py +0 -0
  73. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/nearest_neighbor_interchange.py +0 -0
  74. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/network_signal.py +0 -0
  75. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/ou_shift_detection.py +0 -0
  76. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/ouwie.py +0 -0
  77. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/patristic_distances.py +0 -0
  78. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/phenogram.py +0 -0
  79. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/phylogenetic_glm.py +0 -0
  80. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/phylogenetic_ordination.py +0 -0
  81. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/phylogenetic_regression.py +0 -0
  82. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/phylogenetic_signal.py +0 -0
  83. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/phylomorphospace.py +0 -0
  84. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/polytomy_test.py +0 -0
  85. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/print_tree.py +0 -0
  86. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/prune_tree.py +0 -0
  87. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/quartet_network.py +0 -0
  88. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/rate_heterogeneity.py +0 -0
  89. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/relative_rate_test.py +0 -0
  90. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/rename_tree_tips.py +0 -0
  91. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/rf_distance.py +0 -0
  92. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/root_tree.py +0 -0
  93. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/saturation.py +0 -0
  94. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/spurious_sequence.py +0 -0
  95. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/stochastic_character_map.py +0 -0
  96. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/terminal_branch_stats.py +0 -0
  97. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/threshold_model.py +0 -0
  98. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/tip_labels.py +0 -0
  99. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/tip_to_tip_distance.py +0 -0
  100. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/tip_to_tip_node_distance.py +0 -0
  101. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/total_tree_length.py +0 -0
  102. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/treeness.py +0 -0
  103. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/treeness_over_rcv.py +0 -0
  104. {phykit-2.1.34 → phykit-2.1.35}/phykit/services/tree/vcv_utils.py +0 -0
  105. {phykit-2.1.34 → phykit-2.1.35}/phykit.egg-info/SOURCES.txt +0 -0
  106. {phykit-2.1.34 → phykit-2.1.35}/phykit.egg-info/dependency_links.txt +0 -0
  107. {phykit-2.1.34 → phykit-2.1.35}/phykit.egg-info/entry_points.txt +0 -0
  108. {phykit-2.1.34 → phykit-2.1.35}/phykit.egg-info/requires.txt +0 -0
  109. {phykit-2.1.34 → phykit-2.1.35}/phykit.egg-info/top_level.txt +0 -0
  110. {phykit-2.1.34 → phykit-2.1.35}/setup.cfg +0 -0
  111. {phykit-2.1.34 → phykit-2.1.35}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phykit
3
- Version: 2.1.34
3
+ Version: 2.1.35
4
4
  Home-page: https://github.com/jlsteenwyk/phykit
5
5
  Author: Jacob L. Steenwyk
6
6
  Author-email: jlsteenwyk@gmail.com
@@ -1474,19 +1474,24 @@ class Phykit:
1474
1474
  f"""\
1475
1475
  {help_header}
1476
1476
 
1477
- Estimate ancestral states for continuous traits using
1478
- maximum likelihood, analogous to R's phytools::fastAnc()
1479
- and ape::ace(type="ML"). Optionally produce a contMap
1480
- plot showing continuous trait values mapped onto the
1481
- phylogeny.
1482
-
1483
- Two methods are available:
1484
- - fast (default): Felsenstein's pruning/contrasts shortcut,
1485
- O(n) time
1477
+ Estimate ancestral states using maximum likelihood.
1478
+
1479
+ Supports two trait types:
1480
+ - continuous (default): Brownian Motion model, analogous
1481
+ to R's phytools::fastAnc() and ape::ace(type="ML").
1482
+ Optionally produce a contMap plot.
1483
+ - discrete: Mk model with marginal posterior probabilities
1484
+ at each internal node, analogous to ape::ace(type="discrete").
1485
+ Optionally produce a pie-chart phylogeny plot.
1486
+
1487
+ Continuous methods (--type continuous):
1488
+ - fast (default): Felsenstein's pruning/contrasts, O(n)
1486
1489
  - ml: full VCV-based ML with exact conditional CIs, O(n^3)
1487
1490
 
1488
- Both methods produce identical point estimates; ml gives
1489
- exact conditional confidence intervals.
1491
+ Discrete models (--type discrete):
1492
+ - ER (default): equal rates
1493
+ - SYM: symmetric rates
1494
+ - ARD: all rates different
1490
1495
 
1491
1496
  Input trait data can be either:
1492
1497
  (1) A two-column file (taxon<tab>value) when -c is omitted
@@ -1499,7 +1504,7 @@ class Phykit:
1499
1504
  pk_ancestral_state_reconstruction, pk_asr, pk_anc_recon
1500
1505
 
1501
1506
  Usage:
1502
- phykit ancestral_state_reconstruction -t <tree> -d <trait_data> [-c <trait>] [-m <method>] [--ci] [--plot <output>] [--json]
1507
+ phykit ancestral_state_reconstruction -t <tree> -d <trait_data> [-c <trait>] [--type <type>] [-m <method>] [--model <model>] [--ci] [--plot <output>] [--json]
1503
1508
 
1504
1509
  Options
1505
1510
  =====================================================
@@ -1511,13 +1516,19 @@ class Phykit:
1511
1516
  -c/--trait trait column name (required
1512
1517
  for multi-trait files)
1513
1518
 
1519
+ --type trait type: continuous or
1520
+ discrete (default: continuous)
1521
+
1514
1522
  -m/--method method to use: fast or ml
1515
- (default: fast)
1523
+ (continuous only; default: fast)
1524
+
1525
+ --model Mk model: ER, SYM, or ARD
1526
+ (discrete only; default: ER)
1516
1527
 
1517
1528
  --ci include 95% confidence
1518
- intervals
1529
+ intervals (continuous only)
1519
1530
 
1520
- --plot output path for contMap plot
1531
+ --plot output path for plot
1521
1532
 
1522
1533
  --json output results as JSON
1523
1534
  """
@@ -1533,10 +1544,18 @@ class Phykit:
1533
1544
  "-c", "--trait", type=str, required=False, default=None,
1534
1545
  help=SUPPRESS, metavar=""
1535
1546
  )
1547
+ parser.add_argument(
1548
+ "--type", type=str, required=False, default="continuous",
1549
+ choices=["continuous", "discrete"], help=SUPPRESS, metavar=""
1550
+ )
1536
1551
  parser.add_argument(
1537
1552
  "-m", "--method", type=str, required=False, default="fast",
1538
1553
  choices=["fast", "ml"], help=SUPPRESS, metavar=""
1539
1554
  )
1555
+ parser.add_argument(
1556
+ "--model", type=str, required=False, default="ER",
1557
+ choices=["ER", "SYM", "ARD"], help=SUPPRESS, metavar=""
1558
+ )
1540
1559
  parser.add_argument(
1541
1560
  "--ci", action="store_true", required=False, help=SUPPRESS
1542
1561
  )
@@ -4,6 +4,8 @@ import sys
4
4
  from typing import Dict, List, Tuple
5
5
 
6
6
  import numpy as np
7
+ from scipy.linalg import expm
8
+ from scipy.optimize import minimize
7
9
 
8
10
  from .base import Tree
9
11
  from ...helpers.json_output import print_json
@@ -20,13 +22,19 @@ class AncestralReconstruction(Tree):
20
22
  self.ci = parsed["ci"]
21
23
  self.plot_output = parsed["plot_output"]
22
24
  self.json_output = parsed["json_output"]
25
+ self.trait_type = parsed["trait_type"]
26
+ self.model = parsed["model"]
23
27
 
24
28
  def run(self) -> None:
25
29
  tree = self.read_tree_file()
26
30
  self._validate_tree(tree)
27
-
28
31
  tree_tips = self.get_tip_names_from_tree(tree)
32
+ if self.trait_type == "discrete":
33
+ self._run_discrete(tree, tree_tips)
34
+ else:
35
+ self._run_continuous(tree, tree_tips)
29
36
 
37
+ def _run_continuous(self, tree, tree_tips) -> None:
30
38
  if self.trait_column is not None:
31
39
  trait_values = self._parse_multi_trait_data(
32
40
  self.trait_data_path, tree_tips, self.trait_column
@@ -106,6 +114,8 @@ class AncestralReconstruction(Tree):
106
114
  ci=getattr(args, "ci", False),
107
115
  plot_output=getattr(args, "plot", None),
108
116
  json_output=getattr(args, "json", False),
117
+ trait_type=getattr(args, "type", "continuous"),
118
+ model=getattr(args, "model", "ER"),
109
119
  )
110
120
 
111
121
  def _validate_tree(self, tree) -> None:
@@ -904,3 +914,693 @@ class AncestralReconstruction(Tree):
904
914
  fig.savefig(output_path, dpi=300, bbox_inches="tight")
905
915
  plt.close(fig)
906
916
  print(f"Saved contMap plot: {output_path}")
917
+
918
+ # ------------------------------------------------------------------
919
+ # Discrete trait parsers
920
+ # ------------------------------------------------------------------
921
+
922
+ def _parse_discrete_trait_data_single(
923
+ self, path: str, tree_tips: List[str]
924
+ ) -> Dict[str, str]:
925
+ try:
926
+ with open(path) as f:
927
+ lines = f.readlines()
928
+ except FileNotFoundError:
929
+ raise PhykitUserError(
930
+ [
931
+ f"{path} corresponds to no such file or directory.",
932
+ "Please check filename and pathing",
933
+ ],
934
+ code=2,
935
+ )
936
+
937
+ traits: Dict[str, str] = {}
938
+ for line_num, line in enumerate(lines, 1):
939
+ line = line.strip()
940
+ if not line or line.startswith("#"):
941
+ continue
942
+ parts = line.split("\t")
943
+ if len(parts) != 2:
944
+ raise PhykitUserError(
945
+ [
946
+ f"Line {line_num} in trait file has {len(parts)} columns; expected 2.",
947
+ "Each line should be: taxon_name<tab>trait_value",
948
+ ],
949
+ code=2,
950
+ )
951
+ taxon, value = parts
952
+ traits[taxon] = value
953
+
954
+ tree_tip_set = set(tree_tips)
955
+ trait_taxa_set = set(traits.keys())
956
+ shared = tree_tip_set & trait_taxa_set
957
+
958
+ tree_only = tree_tip_set - trait_taxa_set
959
+ trait_only = trait_taxa_set - tree_tip_set
960
+
961
+ if tree_only:
962
+ print(
963
+ f"Warning: {len(tree_only)} taxa in tree but not in trait file: "
964
+ f"{', '.join(sorted(tree_only))}",
965
+ file=sys.stderr,
966
+ )
967
+ if trait_only:
968
+ print(
969
+ f"Warning: {len(trait_only)} taxa in trait file but not in tree: "
970
+ f"{', '.join(sorted(trait_only))}",
971
+ file=sys.stderr,
972
+ )
973
+
974
+ if len(shared) < 3:
975
+ raise PhykitUserError(
976
+ [
977
+ f"Only {len(shared)} shared taxa between tree and trait file.",
978
+ "At least 3 shared taxa are required.",
979
+ ],
980
+ code=2,
981
+ )
982
+
983
+ return {taxon: traits[taxon] for taxon in shared}
984
+
985
+ def _parse_discrete_trait_data_multi(
986
+ self, path: str, tree_tips: List[str], trait_column: str
987
+ ) -> Dict[str, str]:
988
+ try:
989
+ with open(path) as f:
990
+ lines = f.readlines()
991
+ except FileNotFoundError:
992
+ raise PhykitUserError(
993
+ [
994
+ f"{path} corresponds to no such file or directory.",
995
+ "Please check filename and pathing",
996
+ ],
997
+ code=2,
998
+ )
999
+
1000
+ data_lines = []
1001
+ for line in lines:
1002
+ stripped = line.strip()
1003
+ if not stripped or stripped.startswith("#"):
1004
+ continue
1005
+ data_lines.append(stripped)
1006
+
1007
+ if len(data_lines) < 2:
1008
+ raise PhykitUserError(
1009
+ [
1010
+ "Multi-trait file must have a header row and at least one data row.",
1011
+ ],
1012
+ code=2,
1013
+ )
1014
+
1015
+ header_parts = data_lines[0].split("\t")
1016
+ if len(header_parts) < 2:
1017
+ raise PhykitUserError(
1018
+ [
1019
+ "Header must have at least 2 columns (taxon + at least 1 trait).",
1020
+ ],
1021
+ code=2,
1022
+ )
1023
+ trait_names = header_parts[1:]
1024
+
1025
+ if trait_column not in trait_names:
1026
+ raise PhykitUserError(
1027
+ [
1028
+ f"Column '{trait_column}' not found in trait file.",
1029
+ f"Available columns: {', '.join(trait_names)}",
1030
+ ],
1031
+ code=2,
1032
+ )
1033
+
1034
+ col_idx = trait_names.index(trait_column)
1035
+
1036
+ traits: Dict[str, str] = {}
1037
+ for line_idx, line in enumerate(data_lines[1:], 2):
1038
+ parts = line.split("\t")
1039
+ if len(parts) != len(header_parts):
1040
+ raise PhykitUserError(
1041
+ [
1042
+ f"Line {line_idx} has {len(parts)} columns; expected {len(header_parts)}.",
1043
+ ],
1044
+ code=2,
1045
+ )
1046
+ taxon = parts[0]
1047
+ traits[taxon] = parts[1 + col_idx]
1048
+
1049
+ tree_tip_set = set(tree_tips)
1050
+ trait_taxa_set = set(traits.keys())
1051
+ shared = tree_tip_set & trait_taxa_set
1052
+
1053
+ tree_only = tree_tip_set - trait_taxa_set
1054
+ trait_only = trait_taxa_set - tree_tip_set
1055
+
1056
+ if tree_only:
1057
+ print(
1058
+ f"Warning: {len(tree_only)} taxa in tree but not in trait file: "
1059
+ f"{', '.join(sorted(tree_only))}",
1060
+ file=sys.stderr,
1061
+ )
1062
+ if trait_only:
1063
+ print(
1064
+ f"Warning: {len(trait_only)} taxa in trait file but not in tree: "
1065
+ f"{', '.join(sorted(trait_only))}",
1066
+ file=sys.stderr,
1067
+ )
1068
+
1069
+ if len(shared) < 3:
1070
+ raise PhykitUserError(
1071
+ [
1072
+ f"Only {len(shared)} shared taxa between tree and trait file.",
1073
+ "At least 3 shared taxa are required.",
1074
+ ],
1075
+ code=2,
1076
+ )
1077
+
1078
+ return {taxon: traits[taxon] for taxon in shared}
1079
+
1080
+ # ------------------------------------------------------------------
1081
+ # Mk model primitives (shared with StochasticCharacterMap)
1082
+ # ------------------------------------------------------------------
1083
+
1084
+ def _build_q_matrix(
1085
+ self, params: np.ndarray, k: int, model: str
1086
+ ) -> np.ndarray:
1087
+ Q = np.zeros((k, k))
1088
+ if model == "ER":
1089
+ rate = params[0]
1090
+ Q[:] = rate
1091
+ np.fill_diagonal(Q, 0.0)
1092
+ elif model == "SYM":
1093
+ idx = 0
1094
+ for i in range(k):
1095
+ for j in range(i + 1, k):
1096
+ Q[i, j] = params[idx]
1097
+ Q[j, i] = params[idx]
1098
+ idx += 1
1099
+ elif model == "ARD":
1100
+ idx = 0
1101
+ for i in range(k):
1102
+ for j in range(k):
1103
+ if i != j:
1104
+ Q[i, j] = params[idx]
1105
+ idx += 1
1106
+ # Set diagonal
1107
+ for i in range(k):
1108
+ Q[i, i] = -np.sum(Q[i, :])
1109
+ return Q
1110
+
1111
+ def _matrix_exp(self, Q: np.ndarray, t: float) -> np.ndarray:
1112
+ return expm(Q * t)
1113
+
1114
+ def _felsenstein_pruning(
1115
+ self, tree, tip_states: Dict[str, str], Q: np.ndarray,
1116
+ pi: np.ndarray, states: List[str]
1117
+ ) -> Tuple[Dict, float]:
1118
+ k = len(states)
1119
+ state_idx = {s: i for i, s in enumerate(states)}
1120
+ cond_liks: Dict[int, np.ndarray] = {}
1121
+
1122
+ for clade in tree.find_clades(order="postorder"):
1123
+ if clade.is_terminal():
1124
+ lik = np.zeros(k)
1125
+ if clade.name in tip_states:
1126
+ lik[state_idx[tip_states[clade.name]]] = 1.0
1127
+ cond_liks[id(clade)] = lik
1128
+ else:
1129
+ lik = np.ones(k)
1130
+ for child in clade.clades:
1131
+ t = child.branch_length if child.branch_length else 1e-8
1132
+ P = self._matrix_exp(Q, t)
1133
+ child_lik = cond_liks[id(child)]
1134
+ lik *= P @ child_lik
1135
+ cond_liks[id(clade)] = lik
1136
+
1137
+ root_lik = cond_liks[id(tree.root)]
1138
+ total_lik = np.sum(pi * root_lik)
1139
+ if total_lik <= 0:
1140
+ loglik = -1e20
1141
+ else:
1142
+ loglik = np.log(total_lik)
1143
+
1144
+ return cond_liks, loglik
1145
+
1146
+ def _fit_q_matrix(
1147
+ self, tree, tip_states: Dict[str, str],
1148
+ states: List[str], model: str
1149
+ ) -> Tuple[np.ndarray, float]:
1150
+ k = len(states)
1151
+
1152
+ if model == "ER":
1153
+ n_params = 1
1154
+ elif model == "SYM":
1155
+ n_params = k * (k - 1) // 2
1156
+ elif model == "ARD":
1157
+ n_params = k * (k - 1)
1158
+ else:
1159
+ raise PhykitUserError(
1160
+ [f"Unknown model '{model}'. Use ER, SYM, or ARD."],
1161
+ code=2,
1162
+ )
1163
+
1164
+ pi = np.ones(k) / k
1165
+
1166
+ def neg_loglik(params):
1167
+ Q = self._build_q_matrix(np.abs(params), k, model)
1168
+ _, ll = self._felsenstein_pruning(tree, tip_states, Q, pi, states)
1169
+ return -ll
1170
+
1171
+ bounds = [(1e-8, 100.0)] * n_params
1172
+
1173
+ # Multi-start optimization for robustness
1174
+ starting_values = [0.001, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0]
1175
+ best_negll = np.inf
1176
+ best_params = np.ones(n_params) * 0.1
1177
+
1178
+ for sv in starting_values:
1179
+ x0 = np.ones(n_params) * sv
1180
+ for opt_method in ["L-BFGS-B", "Nelder-Mead"]:
1181
+ try:
1182
+ kwargs = {"method": opt_method}
1183
+ if opt_method == "L-BFGS-B":
1184
+ kwargs["bounds"] = bounds
1185
+ result = minimize(neg_loglik, x0, **kwargs)
1186
+ if result.fun < best_negll:
1187
+ best_negll = result.fun
1188
+ best_params = np.abs(result.x)
1189
+ except (ValueError, np.linalg.LinAlgError):
1190
+ continue
1191
+
1192
+ # Refine best result with Nelder-Mead
1193
+ try:
1194
+ result = minimize(
1195
+ neg_loglik, best_params, method="Nelder-Mead",
1196
+ options={"maxiter": 10000, "xatol": 1e-10, "fatol": 1e-10},
1197
+ )
1198
+ if result.fun < best_negll:
1199
+ best_params = np.abs(result.x)
1200
+ except (ValueError, np.linalg.LinAlgError):
1201
+ pass
1202
+
1203
+ Q = self._build_q_matrix(best_params, k, model)
1204
+ _, loglik = self._felsenstein_pruning(tree, tip_states, Q, pi, states)
1205
+
1206
+ return Q, loglik
1207
+
1208
+ # ------------------------------------------------------------------
1209
+ # Discrete marginal posteriors (upward-downward belief propagation)
1210
+ # ------------------------------------------------------------------
1211
+
1212
+ def _discrete_marginal_posteriors(
1213
+ self, tree, tip_states: Dict[str, str], Q: np.ndarray,
1214
+ states: List[str],
1215
+ ) -> Dict[int, np.ndarray]:
1216
+ """Compute marginal posterior probabilities for all internal nodes.
1217
+
1218
+ Uses upward-downward (inside-outside) belief propagation:
1219
+ 1. Downward pass (postorder): Felsenstein pruning for conditional likelihoods
1220
+ 2. Upward pass (preorder): propagate information from rest of tree
1221
+ 3. Combine: posterior_v[s] proportional to L_v[s] * U_v[s]
1222
+ """
1223
+ k = len(states)
1224
+ pi = np.ones(k) / k
1225
+
1226
+ # Downward pass
1227
+ cond_liks, _ = self._felsenstein_pruning(tree, tip_states, Q, pi, states)
1228
+
1229
+ # Build parent map
1230
+ parent_map = self._build_parent_map(tree)
1231
+
1232
+ # Upward pass (preorder)
1233
+ upward = {}
1234
+ root = tree.root
1235
+ upward[id(root)] = pi.copy()
1236
+
1237
+ for clade in tree.find_clades(order="preorder"):
1238
+ if clade == root:
1239
+ continue
1240
+ if id(clade) not in parent_map:
1241
+ continue
1242
+
1243
+ parent = parent_map[id(clade)]
1244
+ U_parent = upward[id(parent)]
1245
+
1246
+ # Product of sibling messages
1247
+ sibling_product = np.ones(k)
1248
+ for sibling in parent.clades:
1249
+ if id(sibling) == id(clade):
1250
+ continue
1251
+ t_sib = sibling.branch_length if sibling.branch_length else 1e-8
1252
+ P_sib = self._matrix_exp(Q, t_sib)
1253
+ sibling_product *= P_sib @ cond_liks[id(sibling)]
1254
+
1255
+ # Upward message for this node
1256
+ t_v = clade.branch_length if clade.branch_length else 1e-8
1257
+ P_v = self._matrix_exp(Q, t_v)
1258
+ upward[id(clade)] = P_v.T @ (U_parent * sibling_product)
1259
+
1260
+ # Normalize to prevent underflow
1261
+ s = np.sum(upward[id(clade)])
1262
+ if s > 0:
1263
+ upward[id(clade)] /= s
1264
+
1265
+ # Combine downward and upward for posteriors
1266
+ posteriors: Dict[int, np.ndarray] = {}
1267
+ for clade in tree.find_clades(order="preorder"):
1268
+ if clade.is_terminal():
1269
+ continue
1270
+ L = cond_liks[id(clade)]
1271
+ U = upward.get(id(clade), pi)
1272
+ raw = L * U
1273
+ total = np.sum(raw)
1274
+ if total > 0:
1275
+ posteriors[id(clade)] = raw / total
1276
+ else:
1277
+ posteriors[id(clade)] = np.ones(k) / k
1278
+
1279
+ return posteriors
1280
+
1281
+ # ------------------------------------------------------------------
1282
+ # Discrete ASR orchestration
1283
+ # ------------------------------------------------------------------
1284
+
1285
+ def _run_discrete(self, tree, tree_tips) -> None:
1286
+ # Parse discrete traits
1287
+ if self.trait_column is not None:
1288
+ tip_states = self._parse_discrete_trait_data_multi(
1289
+ self.trait_data_path, tree_tips, self.trait_column
1290
+ )
1291
+ trait_name = self.trait_column
1292
+ else:
1293
+ tip_states = self._parse_discrete_trait_data_single(
1294
+ self.trait_data_path, tree_tips
1295
+ )
1296
+ trait_name = "trait"
1297
+
1298
+ # Prune tree to shared taxa
1299
+ tree_copy = copy.deepcopy(tree)
1300
+ tip_names_in_tree = [t.name for t in tree_copy.get_terminals()]
1301
+ tips_to_prune = [t for t in tip_names_in_tree if t not in tip_states]
1302
+ if tips_to_prune:
1303
+ tree_copy = self.prune_tree_using_taxa_list(tree_copy, tips_to_prune)
1304
+
1305
+ # Label internal nodes
1306
+ node_labels = self._label_internal_nodes(tree_copy)
1307
+
1308
+ # Get sorted unique states
1309
+ states = sorted(set(tip_states.values()))
1310
+
1311
+ # Fit Q matrix
1312
+ Q, log_likelihood = self._fit_q_matrix(
1313
+ tree_copy, tip_states, states, self.model
1314
+ )
1315
+
1316
+ # Compute marginal posteriors
1317
+ node_posteriors = self._discrete_marginal_posteriors(
1318
+ tree_copy, tip_states, Q, states
1319
+ )
1320
+
1321
+ n_tips = len(tip_states)
1322
+
1323
+ # Build result
1324
+ result = self._format_discrete_result(
1325
+ model=self.model,
1326
+ trait_name=trait_name,
1327
+ n_tips=n_tips,
1328
+ log_likelihood=log_likelihood,
1329
+ states=states,
1330
+ Q=Q,
1331
+ node_posteriors=node_posteriors,
1332
+ node_labels=node_labels,
1333
+ tree=tree_copy,
1334
+ tip_states=tip_states,
1335
+ )
1336
+
1337
+ if self.plot_output:
1338
+ self._plot_discrete_asr(
1339
+ tree_copy, node_posteriors, node_labels,
1340
+ states, tip_states, self.plot_output
1341
+ )
1342
+ result["plot_output"] = self.plot_output
1343
+
1344
+ if self.json_output:
1345
+ print_json(result)
1346
+ else:
1347
+ self._print_discrete_text_output(
1348
+ model=self.model,
1349
+ trait_name=trait_name,
1350
+ n_tips=n_tips,
1351
+ log_likelihood=log_likelihood,
1352
+ states=states,
1353
+ Q=Q,
1354
+ node_posteriors=node_posteriors,
1355
+ node_labels=node_labels,
1356
+ tree=tree_copy,
1357
+ )
1358
+
1359
+ # ------------------------------------------------------------------
1360
+ # Discrete output formatting
1361
+ # ------------------------------------------------------------------
1362
+
1363
+ def _format_discrete_result(
1364
+ self, *, model, trait_name, n_tips, log_likelihood,
1365
+ states, Q, node_posteriors, node_labels, tree, tip_states,
1366
+ ) -> Dict:
1367
+ k = len(states)
1368
+ q_matrix = {}
1369
+ for i in range(k):
1370
+ row = {}
1371
+ for j in range(k):
1372
+ row[states[j]] = float(Q[i, j])
1373
+ q_matrix[states[i]] = row
1374
+
1375
+ ancestral_states = {}
1376
+ root_id = id(tree.root)
1377
+
1378
+ for clade in tree.find_clades(order="preorder"):
1379
+ if clade.is_terminal():
1380
+ continue
1381
+ if id(clade) not in node_labels:
1382
+ continue
1383
+ label = node_labels[id(clade)]
1384
+ if id(clade) not in node_posteriors:
1385
+ continue
1386
+
1387
+ posterior = node_posteriors[id(clade)]
1388
+ map_idx = int(np.argmax(posterior))
1389
+ state_probs = {
1390
+ states[i]: float(posterior[i]) for i in range(k)
1391
+ }
1392
+
1393
+ ancestral_states[label] = {
1394
+ "map_state": states[map_idx],
1395
+ "posteriors": state_probs,
1396
+ "descendants": self._get_descendant_tips(tree, clade),
1397
+ "is_root": id(clade) == root_id,
1398
+ }
1399
+
1400
+ return {
1401
+ "method": "discrete",
1402
+ "model": model,
1403
+ "trait": trait_name,
1404
+ "n_tips": n_tips,
1405
+ "log_likelihood": float(log_likelihood),
1406
+ "states": states,
1407
+ "q_matrix": q_matrix,
1408
+ "ancestral_states": ancestral_states,
1409
+ "tip_states": {k: v for k, v in sorted(tip_states.items())},
1410
+ }
1411
+
1412
+ def _print_discrete_text_output(
1413
+ self, *, model, trait_name, n_tips, log_likelihood,
1414
+ states, Q, node_posteriors, node_labels, tree,
1415
+ ) -> None:
1416
+ k = len(states)
1417
+ print("Ancestral State Reconstruction (Discrete)")
1418
+ print(f"\nModel: Mk ({model})")
1419
+ print(f"Trait: {trait_name}")
1420
+ print(f"Number of tips: {n_tips}")
1421
+ print(f"Number of states: {k}")
1422
+ print(f"States: {', '.join(states)}")
1423
+ print(f"\nLog-likelihood: {log_likelihood:.4f}")
1424
+
1425
+ # Q matrix table
1426
+ print("\nRate matrix (Q):")
1427
+ col_w = max(12, max(len(s) for s in states) + 2)
1428
+ header = f" {'':>{col_w}}" + "".join(f"{s:>{col_w}}" for s in states)
1429
+ print(header)
1430
+ for i, si in enumerate(states):
1431
+ row = f" {si:>{col_w}}"
1432
+ for j in range(k):
1433
+ row += f"{Q[i, j]:>{col_w}.6f}"
1434
+ print(row)
1435
+
1436
+ # Per-node table
1437
+ print("\nAncestral state posteriors:")
1438
+ root_id = id(tree.root)
1439
+ col_w_state = max(10, max(len(s) for s in states) + 2)
1440
+ header = f" {'Node':<12s}{'Desc':>6s}{'MAP':>{col_w_state}}"
1441
+ for s in states:
1442
+ header += f"{s:>{col_w_state}}"
1443
+ print(header)
1444
+
1445
+ for clade in tree.find_clades(order="preorder"):
1446
+ if clade.is_terminal():
1447
+ continue
1448
+ if id(clade) not in node_labels:
1449
+ continue
1450
+ label = node_labels[id(clade)]
1451
+ if id(clade) not in node_posteriors:
1452
+ continue
1453
+
1454
+ posterior = node_posteriors[id(clade)]
1455
+ map_idx = int(np.argmax(posterior))
1456
+ n_desc = len(self._get_descendant_tips(tree, clade))
1457
+ root_tag = " (root)" if id(clade) == root_id else ""
1458
+
1459
+ row = f" {label + root_tag:<12s}{n_desc:>6d}{states[map_idx]:>{col_w_state}}"
1460
+ for i in range(k):
1461
+ row += f"{posterior[i]:>{col_w_state}.4f}"
1462
+ print(row)
1463
+
1464
+ # ------------------------------------------------------------------
1465
+ # Discrete ASR plot
1466
+ # ------------------------------------------------------------------
1467
+
1468
+ def _plot_discrete_asr(
1469
+ self, tree, node_posteriors, node_labels, states,
1470
+ tip_states, output_path,
1471
+ ) -> None:
1472
+ try:
1473
+ import matplotlib
1474
+ matplotlib.use("Agg")
1475
+ import matplotlib.pyplot as plt
1476
+ from matplotlib.patches import Wedge
1477
+ except ImportError:
1478
+ print(
1479
+ "matplotlib is required for discrete ASR plotting. "
1480
+ "Install matplotlib and retry."
1481
+ )
1482
+ raise SystemExit(2)
1483
+
1484
+ parent_map = self._build_parent_map(tree)
1485
+ tips = list(tree.get_terminals())
1486
+ k = len(states)
1487
+
1488
+ # Color palette for states
1489
+ if k <= 10:
1490
+ cmap = plt.get_cmap("tab10")
1491
+ colors = [cmap(i) for i in range(k)]
1492
+ else:
1493
+ cmap = plt.get_cmap("tab20")
1494
+ colors = [cmap(i) for i in range(k)]
1495
+ state_colors = {states[i]: colors[i] for i in range(k)}
1496
+
1497
+ node_x = {}
1498
+ node_y = {}
1499
+
1500
+ # Assign tip y-positions
1501
+ for i, tip in enumerate(tips):
1502
+ node_y[id(tip)] = i
1503
+
1504
+ # Assign x-positions via preorder traversal
1505
+ root = tree.root
1506
+ for clade in tree.find_clades(order="preorder"):
1507
+ if clade == root:
1508
+ node_x[id(clade)] = 0.0
1509
+ else:
1510
+ if id(clade) in parent_map:
1511
+ parent = parent_map[id(clade)]
1512
+ t = clade.branch_length if clade.branch_length else 0.0
1513
+ node_x[id(clade)] = node_x[id(parent)] + t
1514
+
1515
+ # Internal y-positions (mean of children)
1516
+ for clade in tree.find_clades(order="postorder"):
1517
+ if not clade.is_terminal() and id(clade) not in node_y:
1518
+ child_ys = [
1519
+ node_y[id(c)] for c in clade.clades if id(c) in node_y
1520
+ ]
1521
+ if child_ys:
1522
+ node_y[id(clade)] = np.mean(child_ys)
1523
+ else:
1524
+ node_y[id(clade)] = 0.0
1525
+
1526
+ fig, ax = plt.subplots(figsize=(10, max(4, len(tips) * 0.4)))
1527
+
1528
+ # Draw branches (gray)
1529
+ for clade in tree.find_clades(order="preorder"):
1530
+ if clade == root:
1531
+ continue
1532
+ if id(clade) not in parent_map:
1533
+ continue
1534
+ parent = parent_map[id(clade)]
1535
+ if id(parent) not in node_x or id(clade) not in node_x:
1536
+ continue
1537
+
1538
+ x0 = node_x[id(parent)]
1539
+ x1 = node_x[id(clade)]
1540
+ y0 = node_y[id(parent)]
1541
+ y1 = node_y[id(clade)]
1542
+
1543
+ # Horizontal segment
1544
+ ax.plot([x0, x1], [y1, y1], color="gray", lw=2, solid_capstyle="butt")
1545
+ # Vertical connector
1546
+ ax.plot([x0, x0], [y0, y1], color="gray", lw=2, solid_capstyle="butt")
1547
+
1548
+ # Pie charts at internal nodes
1549
+ max_x = max(node_x.values()) if node_x else 1.0
1550
+ pie_radius = max_x * 0.015
1551
+
1552
+ for clade in tree.find_clades(order="preorder"):
1553
+ if clade.is_terminal():
1554
+ continue
1555
+ if id(clade) not in node_posteriors:
1556
+ continue
1557
+
1558
+ cx = node_x[id(clade)]
1559
+ cy = node_y[id(clade)]
1560
+ posterior = node_posteriors[id(clade)]
1561
+
1562
+ # Draw pie chart using Wedge patches
1563
+ start_angle = 90.0
1564
+ for i in range(k):
1565
+ if posterior[i] < 1e-6:
1566
+ continue
1567
+ sweep = posterior[i] * 360.0
1568
+ wedge = Wedge(
1569
+ (cx, cy), pie_radius, start_angle, start_angle + sweep,
1570
+ facecolor=state_colors[states[i]], edgecolor="black",
1571
+ linewidth=0.5,
1572
+ )
1573
+ ax.add_patch(wedge)
1574
+ start_angle += sweep
1575
+
1576
+ # Tip labels with state color
1577
+ max_x_val = max(node_x.values()) if node_x else 0
1578
+ offset = max_x_val * 0.02
1579
+ for tip in tips:
1580
+ color = state_colors.get(tip_states.get(tip.name, ""), "black")
1581
+ ax.text(
1582
+ node_x[id(tip)] + offset, node_y[id(tip)],
1583
+ tip.name, va="center", fontsize=9, color=color,
1584
+ )
1585
+
1586
+ # Legend
1587
+ legend_handles = []
1588
+ for i, s in enumerate(states):
1589
+ legend_handles.append(
1590
+ plt.Line2D([0], [0], marker="o", color="w",
1591
+ markerfacecolor=state_colors[s], markersize=10,
1592
+ label=s)
1593
+ )
1594
+ ax.legend(handles=legend_handles, loc="upper left", framealpha=0.9)
1595
+
1596
+ ax.set_xlabel("Branch length")
1597
+ ax.set_yticks([])
1598
+ ax.spines["top"].set_visible(False)
1599
+ ax.spines["right"].set_visible(False)
1600
+ ax.spines["left"].set_visible(False)
1601
+ ax.set_title("Discrete Ancestral State Reconstruction")
1602
+ ax.set_aspect("auto")
1603
+ fig.tight_layout()
1604
+ fig.savefig(output_path, dpi=300, bbox_inches="tight")
1605
+ plt.close(fig)
1606
+ print(f"Saved discrete ASR plot: {output_path}")
@@ -0,0 +1 @@
1
+ __version__ = "2.1.35"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phykit
3
- Version: 2.1.34
3
+ Version: 2.1.35
4
4
  Home-page: https://github.com/jlsteenwyk/phykit
5
5
  Author: Jacob L. Steenwyk
6
6
  Author-email: jlsteenwyk@gmail.com
@@ -1 +0,0 @@
1
- __version__ = "2.1.34"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes