phykit 2.1.83__tar.gz → 2.1.85__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. {phykit-2.1.83 → phykit-2.1.85}/PKG-INFO +1 -1
  2. {phykit-2.1.83 → phykit-2.1.85}/phykit/cli_registry.py +6 -0
  3. phykit-2.1.85/phykit/helpers/pgls_utils.py +181 -0
  4. {phykit-2.1.83 → phykit-2.1.85}/phykit/helpers/plot_config.py +116 -0
  5. phykit-2.1.85/phykit/helpers/trait_parsing.py +133 -0
  6. {phykit-2.1.83 → phykit-2.1.85}/phykit/phykit.py +128 -0
  7. {phykit-2.1.83 → phykit-2.1.85}/phykit/service_factories.py +2 -0
  8. phykit-2.1.85/phykit/services/alignment/occupancy_filter.py +299 -0
  9. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/__init__.py +1 -0
  10. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/ancestral_reconstruction.py +1 -15
  11. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/base.py +40 -0
  12. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/concordance_asr.py +1 -1
  13. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/cont_map.py +1 -15
  14. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/density_map.py +1 -1
  15. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/fit_continuous.py +3 -44
  16. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/fit_discrete.py +1 -11
  17. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/independent_contrasts.py +1 -11
  18. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/ltt.py +1 -15
  19. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/ou_shift_detection.py +1 -15
  20. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/ouwie.py +1 -15
  21. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/parsimony_score.py +1 -8
  22. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/phenogram.py +1 -15
  23. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/phylo_anova.py +1 -6
  24. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/phylo_heatmap.py +11 -78
  25. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/phylo_impute.py +1 -15
  26. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/phylo_logistic.py +3 -117
  27. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/phylo_path.py +14 -91
  28. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/phylogenetic_glm.py +3 -116
  29. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/phylogenetic_ordination.py +5 -143
  30. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/phylogenetic_regression.py +14 -244
  31. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/phylogenetic_signal.py +5 -160
  32. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/phylomorphospace.py +3 -116
  33. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/quartet_pie.py +17 -73
  34. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/rate_heterogeneity.py +1 -15
  35. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/simmap_summary.py +10 -43
  36. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/stochastic_character_map.py +1 -15
  37. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/threshold_model.py +1 -15
  38. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/trait_correlation.py +5 -119
  39. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/trait_rate_map.py +1 -19
  40. phykit-2.1.85/phykit/services/tree/transfer_annotations.py +166 -0
  41. phykit-2.1.85/phykit/version.py +1 -0
  42. {phykit-2.1.83 → phykit-2.1.85}/phykit.egg-info/PKG-INFO +1 -1
  43. {phykit-2.1.83 → phykit-2.1.85}/phykit.egg-info/SOURCES.txt +4 -0
  44. {phykit-2.1.83 → phykit-2.1.85}/phykit.egg-info/entry_points.txt +9 -89
  45. phykit-2.1.85/setup.py +63 -0
  46. phykit-2.1.83/phykit/version.py +0 -1
  47. phykit-2.1.83/setup.py +0 -356
  48. {phykit-2.1.83 → phykit-2.1.85}/LICENSE.md +0 -0
  49. {phykit-2.1.83 → phykit-2.1.85}/README.md +0 -0
  50. {phykit-2.1.83 → phykit-2.1.85}/phykit/__init__.py +0 -0
  51. {phykit-2.1.83 → phykit-2.1.85}/phykit/__main__.py +0 -0
  52. {phykit-2.1.83 → phykit-2.1.85}/phykit/errors.py +0 -0
  53. {phykit-2.1.83 → phykit-2.1.85}/phykit/helpers/__init__.py +0 -0
  54. {phykit-2.1.83 → phykit-2.1.85}/phykit/helpers/boolean_argument_parsing.py +0 -0
  55. {phykit-2.1.83 → phykit-2.1.85}/phykit/helpers/caching.py +0 -0
  56. {phykit-2.1.83 → phykit-2.1.85}/phykit/helpers/circular_layout.py +0 -0
  57. {phykit-2.1.83 → phykit-2.1.85}/phykit/helpers/color_annotations.py +0 -0
  58. {phykit-2.1.83 → phykit-2.1.85}/phykit/helpers/discrete_models.py +0 -0
  59. {phykit-2.1.83 → phykit-2.1.85}/phykit/helpers/files.py +0 -0
  60. {phykit-2.1.83 → phykit-2.1.85}/phykit/helpers/json_output.py +0 -0
  61. {phykit-2.1.83 → phykit-2.1.85}/phykit/helpers/parallel.py +0 -0
  62. {phykit-2.1.83 → phykit-2.1.85}/phykit/helpers/parsimony_utils.py +0 -0
  63. {phykit-2.1.83 → phykit-2.1.85}/phykit/helpers/quartet_utils.py +0 -0
  64. {phykit-2.1.83 → phykit-2.1.85}/phykit/helpers/stats_summary.py +0 -0
  65. {phykit-2.1.83 → phykit-2.1.85}/phykit/helpers/streaming.py +0 -0
  66. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/__init__.py +0 -0
  67. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/__init__.py +0 -0
  68. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/alignment_entropy.py +0 -0
  69. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/alignment_length.py +0 -0
  70. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/alignment_length_no_gaps.py +0 -0
  71. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/alignment_outlier_taxa.py +0 -0
  72. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/alignment_recoding.py +0 -0
  73. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/alignment_subsample.py +0 -0
  74. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/base.py +0 -0
  75. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/column_score.py +0 -0
  76. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/composition_per_taxon.py +0 -0
  77. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/compositional_bias_per_site.py +0 -0
  78. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/create_concatenation_matrix.py +0 -0
  79. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/dfoil.py +0 -0
  80. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/dna_threader.py +0 -0
  81. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/dstatistic.py +0 -0
  82. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/evolutionary_rate_per_site.py +0 -0
  83. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/faidx.py +0 -0
  84. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/gc_content.py +0 -0
  85. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/identity_matrix.py +0 -0
  86. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/mask_alignment.py +0 -0
  87. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/occupancy_per_taxon.py +0 -0
  88. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/pairwise_identity.py +0 -0
  89. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/parsimony_informative_sites.py +0 -0
  90. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/phylo_gwas.py +0 -0
  91. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/plot_alignment_qc.py +0 -0
  92. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/rcv.py +0 -0
  93. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/rcvt.py +0 -0
  94. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/rename_fasta_entries.py +0 -0
  95. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/sum_of_pairs_score.py +0 -0
  96. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/taxon_groups.py +0 -0
  97. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/alignment/variable_sites.py +0 -0
  98. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/base.py +0 -0
  99. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/bipartition_support_stats.py +0 -0
  100. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/branch_length_multiplier.py +0 -0
  101. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/character_map.py +0 -0
  102. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/collapse_branches.py +0 -0
  103. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/consensus_network.py +0 -0
  104. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/consensus_tree.py +0 -0
  105. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/cophylo.py +0 -0
  106. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/covarying_evolutionary_rates.py +0 -0
  107. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/discordance_asymmetry.py +0 -0
  108. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/dvmc.py +0 -0
  109. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/evo_tempo_map.py +0 -0
  110. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/evolutionary_rate.py +0 -0
  111. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/hidden_paralogy_check.py +0 -0
  112. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/hybridization.py +0 -0
  113. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/internal_branch_stats.py +0 -0
  114. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/internode_labeler.py +0 -0
  115. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/kf_distance.py +0 -0
  116. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/last_common_ancestor_subtree.py +0 -0
  117. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/lb_score.py +0 -0
  118. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/monophyly_check.py +0 -0
  119. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/nearest_neighbor_interchange.py +0 -0
  120. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/neighbor_net.py +0 -0
  121. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/network_signal.py +0 -0
  122. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/patristic_distances.py +0 -0
  123. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/polytomy_test.py +0 -0
  124. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/print_tree.py +0 -0
  125. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/prune_tree.py +0 -0
  126. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/quartet_network.py +0 -0
  127. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/relative_rate_test.py +0 -0
  128. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/rename_tree_tips.py +0 -0
  129. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/rf_distance.py +0 -0
  130. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/root_tree.py +0 -0
  131. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/saturation.py +0 -0
  132. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/spectral_discordance.py +0 -0
  133. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/spr.py +0 -0
  134. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/spurious_sequence.py +0 -0
  135. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/terminal_branch_stats.py +0 -0
  136. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/tip_labels.py +0 -0
  137. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/tip_to_tip_distance.py +0 -0
  138. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/tip_to_tip_node_distance.py +0 -0
  139. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/total_tree_length.py +0 -0
  140. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/tree_space.py +0 -0
  141. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/treeness.py +0 -0
  142. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/treeness_over_rcv.py +0 -0
  143. {phykit-2.1.83 → phykit-2.1.85}/phykit/services/tree/vcv_utils.py +0 -0
  144. {phykit-2.1.83 → phykit-2.1.85}/phykit.egg-info/dependency_links.txt +0 -0
  145. {phykit-2.1.83 → phykit-2.1.85}/phykit.egg-info/requires.txt +0 -0
  146. {phykit-2.1.83 → phykit-2.1.85}/phykit.egg-info/top_level.txt +0 -0
  147. {phykit-2.1.83 → phykit-2.1.85}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: phykit
3
- Version: 2.1.83
3
+ Version: 2.1.85
4
4
  Home-page: https://github.com/jlsteenwyk/phykit
5
5
  Author: Jacob L. Steenwyk
6
6
  Author-email: jlsteenwyk@gmail.com
@@ -215,8 +215,14 @@ ALIAS_TO_HANDLER: Dict[str, str] = {
215
215
  "tree_space": "tree_space",
216
216
  "tspace": "tree_space",
217
217
  "tree_landscape": "tree_space",
218
+ "transfer_annotations": "transfer_annotations",
219
+ "transfer_annot": "transfer_annotations",
220
+ "annotate_tree": "transfer_annotations",
218
221
  "tgroups": "taxon_groups",
219
222
  "shared_taxa": "taxon_groups",
223
+ "occupancy_filter": "occupancy_filter",
224
+ "occ_filter": "occupancy_filter",
225
+ "filter_occupancy": "occupancy_filter",
220
226
  # Helper aliases
221
227
  "create_concat": "create_concatenation_matrix",
222
228
  "cc": "create_concatenation_matrix",
@@ -0,0 +1,181 @@
1
+ """
2
+ Shared PGLS (Phylogenetic Generalized Least Squares) utilities.
3
+
4
+ Provides reusable functions for:
5
+ - Pagel's lambda estimation via ML
6
+ - Concentrated PGLS log-likelihood
7
+ - GLS model fitting
8
+ - Lambda upper bound computation
9
+
10
+ Used by phylogenetic_regression, phylo_path, phylogenetic_signal,
11
+ phylogenetic_ordination, fit_continuous, and other comparative methods.
12
+ """
13
+ from typing import Tuple
14
+
15
+ import numpy as np
16
+ from scipy.optimize import minimize_scalar
17
+
18
+ from ..errors import PhykitUserError
19
+
20
+
21
+ def max_lambda(tree) -> float:
22
+ """Compute the upper bound for Pagel's lambda.
23
+
24
+ For ultrametric trees, returns max_tip_height / max_parent_height.
25
+ For non-ultrametric trees, returns 1.0.
26
+ """
27
+ tips = tree.get_terminals()
28
+ root = tree.root
29
+ tip_heights = [tree.distance(root, tip) for tip in tips]
30
+ max_tip_height = max(tip_heights)
31
+ min_tip_height = min(tip_heights)
32
+
33
+ is_ultrametric = (max_tip_height - min_tip_height) / max_tip_height < 1e-6
34
+
35
+ if not is_ultrametric:
36
+ return 1.0
37
+
38
+ max_parent_height = 0.0
39
+ for clade in tree.find_clades(order="level"):
40
+ if clade == root:
41
+ continue
42
+ node_height = tree.distance(root, clade)
43
+ parent_height = node_height - (clade.branch_length or 0.0)
44
+ if parent_height > max_parent_height:
45
+ max_parent_height = parent_height
46
+
47
+ if max_parent_height == 0.0:
48
+ return 1.0
49
+
50
+ return max_tip_height / max_parent_height
51
+
52
+
53
+ def pgls_log_likelihood(
54
+ y: np.ndarray, X: np.ndarray, C: np.ndarray
55
+ ) -> float:
56
+ """Concentrated log-likelihood with beta and sigma^2 profiled out.
57
+
58
+ Parameters
59
+ ----------
60
+ y : response vector (n,)
61
+ X : design matrix (n, p)
62
+ C : phylogenetic VCV matrix (n, n), possibly lambda-transformed
63
+ """
64
+ n = len(y)
65
+ try:
66
+ C_inv = np.linalg.inv(C)
67
+ XtCiX = X.T @ C_inv @ X
68
+ XtCiX_inv = np.linalg.inv(XtCiX)
69
+ except np.linalg.LinAlgError:
70
+ return -1e20
71
+
72
+ beta_hat = XtCiX_inv @ X.T @ C_inv @ y
73
+ e = y - X @ beta_hat
74
+ sigma2_ml = float(e @ C_inv @ e) / n
75
+
76
+ sign, logdet_C = np.linalg.slogdet(C)
77
+ if sign <= 0 or sigma2_ml <= 0:
78
+ return -1e20
79
+
80
+ ll = -0.5 * (
81
+ n * np.log(2 * np.pi) + n * np.log(sigma2_ml) + logdet_C + n
82
+ )
83
+ return float(ll)
84
+
85
+
86
+ def estimate_lambda(
87
+ y: np.ndarray,
88
+ X: np.ndarray,
89
+ vcv: np.ndarray,
90
+ max_lam: float = 1.0,
91
+ ) -> Tuple[float, float]:
92
+ """Optimize Pagel's lambda via ML using multi-interval bounded search.
93
+
94
+ Parameters
95
+ ----------
96
+ y : response vector (n,)
97
+ X : design matrix (n, p)
98
+ vcv : phylogenetic VCV matrix (n, n)
99
+ max_lam : upper bound for lambda (default 1.0)
100
+
101
+ Returns
102
+ -------
103
+ (lambda_hat, log_likelihood_at_lambda)
104
+ """
105
+ diag_vals = np.diag(vcv).copy()
106
+ niter = 10
107
+
108
+ def neg_ll(lam):
109
+ C_lam = vcv * lam
110
+ np.fill_diagonal(C_lam, diag_vals)
111
+ try:
112
+ ll = pgls_log_likelihood(y, X, C_lam)
113
+ return -ll
114
+ except (np.linalg.LinAlgError, FloatingPointError, ValueError):
115
+ return 1e10
116
+
117
+ bounds_lo = np.linspace(0, max_lam - max_lam / niter, niter)
118
+ bounds_hi = np.linspace(max_lam / niter, max_lam, niter)
119
+
120
+ best_ll = -np.inf
121
+ lambda_hat = 0.0
122
+ for lo, hi in zip(bounds_lo, bounds_hi):
123
+ res = minimize_scalar(neg_ll, bounds=(lo, hi), method="bounded")
124
+ ll_val = -res.fun
125
+ if ll_val > best_ll:
126
+ best_ll = ll_val
127
+ lambda_hat = res.x
128
+
129
+ # Compute log-likelihood at fitted lambda
130
+ C_fitted = vcv * lambda_hat
131
+ np.fill_diagonal(C_fitted, diag_vals)
132
+ ll_fitted = pgls_log_likelihood(y, X, C_fitted)
133
+
134
+ return float(lambda_hat), float(ll_fitted)
135
+
136
+
137
+ def fit_gls(
138
+ y: np.ndarray, X: np.ndarray, C_inv: np.ndarray
139
+ ) -> Tuple[np.ndarray, np.ndarray, float, np.ndarray]:
140
+ """GLS estimation: beta_hat = (X' C_inv X)^{-1} X' C_inv y.
141
+
142
+ Parameters
143
+ ----------
144
+ y : response vector (n,)
145
+ X : design matrix (n, p)
146
+ C_inv : inverse of phylogenetic VCV matrix (n, n)
147
+
148
+ Returns
149
+ -------
150
+ (beta_hat, residuals, sigma2_reml, var_beta)
151
+ """
152
+ n, k_plus_1 = X.shape
153
+ XtCiX = X.T @ C_inv @ X
154
+ try:
155
+ XtCiX_inv = np.linalg.inv(XtCiX)
156
+ except np.linalg.LinAlgError:
157
+ raise PhykitUserError(
158
+ [
159
+ "Singular design matrix: cannot estimate coefficients.",
160
+ "Check that predictors are not collinear.",
161
+ ],
162
+ code=2,
163
+ )
164
+
165
+ beta_hat = XtCiX_inv @ X.T @ C_inv @ y
166
+ residuals = y - X @ beta_hat
167
+
168
+ df_resid = n - k_plus_1
169
+ sigma2 = float(residuals @ C_inv @ residuals) / max(df_resid, 1)
170
+
171
+ var_beta = sigma2 * XtCiX_inv
172
+
173
+ return beta_hat, residuals, sigma2, var_beta
174
+
175
+
176
+ def apply_lambda(vcv: np.ndarray, lambda_val: float) -> np.ndarray:
177
+ """Apply Pagel's lambda to a VCV matrix (scale off-diagonals, keep diagonal)."""
178
+ diag_vals = np.diag(vcv).copy()
179
+ vcv_lam = vcv * lambda_val
180
+ np.fill_diagonal(vcv_lam, diag_vals)
181
+ return vcv_lam
@@ -248,3 +248,119 @@ def compute_node_x_cladogram(tree, parent_map):
248
248
  else:
249
249
  node_x[cid] = float(node_depth.get(cid, 0)) * step_size
250
250
  return node_x
251
+
252
+
253
+ # ---- Shared rectangular tree plotting utilities ----
254
+
255
+
256
+ def build_parent_map(tree):
257
+ """Build a dict mapping child node id -> parent node."""
258
+ parent_map = {}
259
+ for clade in tree.find_clades(order="preorder"):
260
+ for child in clade.clades:
261
+ parent_map[id(child)] = clade
262
+ return parent_map
263
+
264
+
265
+ def compute_node_positions(tree, parent_map, cladogram=False):
266
+ """Compute (node_x, node_y) for a rectangular tree layout.
267
+
268
+ Parameters
269
+ ----------
270
+ tree : Bio.Phylo tree
271
+ parent_map : dict from build_parent_map()
272
+ cladogram : if True, use equal-depth x-positions (tips aligned)
273
+
274
+ Returns
275
+ -------
276
+ (node_x, node_y) : dicts mapping node id -> coordinate
277
+ """
278
+ import numpy as np
279
+
280
+ tips = list(tree.get_terminals())
281
+ root = tree.root
282
+
283
+ node_y = {}
284
+ for i, tip in enumerate(tips):
285
+ node_y[id(tip)] = i
286
+
287
+ if cladogram:
288
+ node_x = compute_node_x_cladogram(tree, parent_map)
289
+ else:
290
+ node_x = {}
291
+ for clade in tree.find_clades(order="preorder"):
292
+ if clade == root:
293
+ node_x[id(clade)] = 0.0
294
+ elif id(clade) in parent_map:
295
+ parent = parent_map[id(clade)]
296
+ t = clade.branch_length if clade.branch_length else 0.0
297
+ node_x[id(clade)] = node_x.get(id(parent), 0.0) + t
298
+
299
+ for clade in tree.find_clades(order="postorder"):
300
+ if not clade.is_terminal() and id(clade) not in node_y:
301
+ child_ys = [
302
+ node_y[id(c)] for c in clade.clades if id(c) in node_y
303
+ ]
304
+ if child_ys:
305
+ node_y[id(clade)] = float(np.mean(child_ys))
306
+ else:
307
+ node_y[id(clade)] = 0.0
308
+
309
+ return node_x, node_y
310
+
311
+
312
+ def draw_tree_branches(
313
+ ax, tree, node_x, node_y, parent_map,
314
+ color="black", lw=1.5, vertical_color="black", vertical_lw=0.8,
315
+ ):
316
+ """Draw rectangular tree branches (horizontal + vertical connectors).
317
+
318
+ Override color per branch by passing a callable for `color`:
319
+ color=lambda clade: "red" if ... else "black"
320
+ """
321
+ root = tree.root
322
+ for clade in tree.find_clades(order="preorder"):
323
+ if clade == root:
324
+ continue
325
+ if id(clade) not in parent_map:
326
+ continue
327
+ parent = parent_map[id(clade)]
328
+ if id(parent) not in node_x or id(clade) not in node_x:
329
+ continue
330
+
331
+ x0 = node_x[id(parent)]
332
+ x1 = node_x[id(clade)]
333
+ y0 = node_y.get(id(parent), 0)
334
+ y1 = node_y.get(id(clade), 0)
335
+
336
+ branch_color = color(clade) if callable(color) else color
337
+ ax.plot([x0, x1], [y1, y1], color=branch_color, lw=lw)
338
+ ax.plot([x0, x0], [y0, y1], color=vertical_color, lw=vertical_lw)
339
+
340
+
341
+ def draw_tip_labels(
342
+ ax, tree, node_x, node_y, fontsize=9, offset_fraction=0.03,
343
+ ):
344
+ """Draw taxon name labels at tree tips."""
345
+ tips = list(tree.get_terminals())
346
+ max_x = max(node_x.values()) if node_x else 1.0
347
+ offset = max_x * offset_fraction
348
+
349
+ if fontsize <= 0:
350
+ return
351
+
352
+ for tip in tips:
353
+ ax.text(
354
+ node_x[id(tip)] + offset, node_y[id(tip)],
355
+ tip.name, va="center", fontsize=fontsize,
356
+ )
357
+
358
+
359
+ def cleanup_tree_axes(ax, show_xlabel=True):
360
+ """Standard axis cleanup for rectangular tree plots."""
361
+ ax.set_yticks([])
362
+ ax.spines["top"].set_visible(False)
363
+ ax.spines["right"].set_visible(False)
364
+ ax.spines["left"].set_visible(False)
365
+ if show_xlabel:
366
+ ax.set_xlabel("Branch length")
@@ -0,0 +1,133 @@
1
+ """
2
+ Shared trait file parsing utilities.
3
+
4
+ Provides a single implementation for parsing tab-delimited multi-trait
5
+ files with a header row, used across phylogenetic regression, signal,
6
+ ordination, path analysis, ANOVA, and other comparative methods.
7
+ """
8
+ import sys
9
+ from typing import Dict, List, Tuple
10
+
11
+ from ..errors import PhykitUserError
12
+
13
+
14
+ def parse_multi_trait_file(
15
+ path: str,
16
+ tree_tips: List[str],
17
+ min_shared: int = 3,
18
+ min_columns: int = 2,
19
+ ) -> Tuple[List[str], Dict[str, List[float]]]:
20
+ """Parse a tab-delimited multi-trait file with a header row.
21
+
22
+ Format:
23
+ taxon<tab>trait1<tab>trait2<tab>...
24
+ species_A<tab>1.2<tab>3.4<tab>...
25
+
26
+ Parameters
27
+ ----------
28
+ path : path to TSV file
29
+ tree_tips : list of tip names from the tree
30
+ min_shared : minimum shared taxa between tree and file (default 3)
31
+ min_columns : minimum columns in header (default 2: taxon + 1 trait)
32
+
33
+ Returns
34
+ -------
35
+ (trait_names, traits_dict) where traits_dict maps taxon -> [float values]
36
+ """
37
+ try:
38
+ with open(path) as f:
39
+ lines = f.readlines()
40
+ except FileNotFoundError:
41
+ raise PhykitUserError(
42
+ [
43
+ f"{path} corresponds to no such file or directory.",
44
+ "Please check filename and pathing",
45
+ ],
46
+ code=2,
47
+ )
48
+
49
+ # Filter out comments and blank lines
50
+ data_lines = []
51
+ for line in lines:
52
+ stripped = line.strip()
53
+ if not stripped or stripped.startswith("#"):
54
+ continue
55
+ data_lines.append(stripped)
56
+
57
+ if len(data_lines) < 2:
58
+ raise PhykitUserError(
59
+ ["Multi-trait file must have a header row and at least one data row."],
60
+ code=2,
61
+ )
62
+
63
+ # First data line is the header
64
+ header_parts = data_lines[0].split("\t")
65
+ n_cols = len(header_parts)
66
+ if n_cols < min_columns:
67
+ raise PhykitUserError(
68
+ [
69
+ f"Header must have at least {min_columns} columns "
70
+ f"(taxon + at least {min_columns - 1} trait(s)).",
71
+ ],
72
+ code=2,
73
+ )
74
+ trait_names = header_parts[1:]
75
+
76
+ traits = {}
77
+ for line_idx, line in enumerate(data_lines[1:], 2):
78
+ parts = line.split("\t")
79
+ if len(parts) != n_cols:
80
+ raise PhykitUserError(
81
+ [
82
+ f"Line {line_idx} has {len(parts)} columns; expected {n_cols}.",
83
+ f"Each line should have: taxon_name<tab>"
84
+ f"{'<tab>'.join(['trait'] * len(trait_names))}",
85
+ ],
86
+ code=2,
87
+ )
88
+ taxon = parts[0]
89
+ values = []
90
+ for i, val_str in enumerate(parts[1:]):
91
+ try:
92
+ values.append(float(val_str))
93
+ except ValueError:
94
+ raise PhykitUserError(
95
+ [
96
+ f"Non-numeric trait value '{val_str}' for taxon '{taxon}' "
97
+ f"(trait '{trait_names[i]}') on line {line_idx}.",
98
+ ],
99
+ code=2,
100
+ )
101
+ traits[taxon] = values
102
+
103
+ tree_tip_set = set(tree_tips)
104
+ trait_taxa_set = set(traits.keys())
105
+ shared = tree_tip_set & trait_taxa_set
106
+
107
+ tree_only = tree_tip_set - trait_taxa_set
108
+ trait_only = trait_taxa_set - tree_tip_set
109
+
110
+ if tree_only:
111
+ print(
112
+ f"Warning: {len(tree_only)} taxa in tree but not in trait file: "
113
+ f"{', '.join(sorted(tree_only))}",
114
+ file=sys.stderr,
115
+ )
116
+ if trait_only:
117
+ print(
118
+ f"Warning: {len(trait_only)} taxa in trait file but not in tree: "
119
+ f"{', '.join(sorted(trait_only))}",
120
+ file=sys.stderr,
121
+ )
122
+
123
+ if len(shared) < min_shared:
124
+ raise PhykitUserError(
125
+ [
126
+ f"Only {len(shared)} shared taxa between tree and trait file.",
127
+ f"At least {min_shared} shared taxa are required.",
128
+ ],
129
+ code=2,
130
+ )
131
+
132
+ filtered = {taxon: traits[taxon] for taxon in shared}
133
+ return trait_names, filtered
@@ -175,6 +175,8 @@ class Phykit:
175
175
  in an alignment
176
176
  taxon_groups (alias: tgroups; shared_taxa)
177
177
  - group tree or FASTA files by their taxon set
178
+ occupancy_filter (alias: occ_filter; filter_occupancy)
179
+ - filter alignments/trees by cross-file taxon occupancy
178
180
 
179
181
  Tree-based commands
180
182
  ===================
@@ -306,6 +308,8 @@ class Phykit:
306
308
  - prune taxa from a phylogeny
307
309
  subtree_prune_regraft (alias: spr)
308
310
  - generate all SPR rearrangements for a specified subtree
311
+ transfer_annotations (alias: transfer_annot; annotate_tree)
312
+ - transfer node annotations between trees (e.g., wASTRAL to RAxML/IQ-TREE)
309
313
  relative_rate_test (alias: rrt; tajima_rrt)
310
314
  - Tajima's relative rate test for equal evolutionary
311
315
  rates between two ingroup lineages
@@ -6819,6 +6823,59 @@ class Phykit:
6819
6823
  _add_json_argument(parser)
6820
6824
  _run_service(parser, argv, Spr)
6821
6825
 
6826
+ @staticmethod
6827
+ def transfer_annotations(argv):
6828
+ parser = _new_parser(
6829
+ description=textwrap.dedent(
6830
+ f"""\
6831
+ {help_header}
6832
+
6833
+ Transfer internal node annotations from one tree onto
6834
+ another. Matches nodes by bipartition (descendant taxa
6835
+ set) and copies the annotation labels.
6836
+
6837
+ Typical use case: transfer wASTRAL support annotations
6838
+ (q1/q2/q3, pp1, f1, etc.) from an annotated ASTRAL
6839
+ tree onto a branch-length-optimized topology from
6840
+ RAxML-NG, IQ-TREE, or any other tool. The output tree
6841
+ has the target's branch lengths with the source's
6842
+ annotations.
6843
+
6844
+ Aliases:
6845
+ transfer_annotations, transfer_annot, annotate_tree
6846
+ Command line interfaces:
6847
+ pk_transfer_annotations, pk_transfer_annot, pk_annotate_tree
6848
+
6849
+ Usage:
6850
+ phykit transfer_annotations --source <annotated_tree>
6851
+ --target <branch_length_tree> [-o/--output <file>]
6852
+ [--json]
6853
+
6854
+ Options
6855
+ =====================================================
6856
+ --source annotated tree file (e.g.,
6857
+ wASTRAL output with
6858
+ --support 3)
6859
+
6860
+ --target target tree file with
6861
+ branch lengths to keep
6862
+ (e.g., RAxML-NG or
6863
+ IQ-TREE output)
6864
+
6865
+ -o/--output output file for the
6866
+ annotated tree (default:
6867
+ target file + ".annotated")
6868
+
6869
+ --json output results as JSON
6870
+ """
6871
+ ),
6872
+ )
6873
+ parser.add_argument("--source", type=str, required=True, help=SUPPRESS, metavar="")
6874
+ parser.add_argument("--target", type=str, required=True, help=SUPPRESS, metavar="")
6875
+ parser.add_argument("-o", "--output", type=str, default=None, help=SUPPRESS, metavar="")
6876
+ _add_json_argument(parser)
6877
+ _run_service(parser, argv, TransferAnnotations)
6878
+
6822
6879
  @staticmethod
6823
6880
  def relative_rate_test(argv):
6824
6881
  parser = _new_parser(
@@ -8606,6 +8663,69 @@ class Phykit:
8606
8663
  _add_json_argument(parser)
8607
8664
  _run_service(parser, argv, TaxonGroups)
8608
8665
 
8666
+ @staticmethod
8667
+ def occupancy_filter(argv):
8668
+ parser = _new_parser(
8669
+ description=textwrap.dedent(
8670
+ f"""\
8671
+ {help_header}
8672
+
8673
+ Filter alignments and/or trees by cross-file taxon
8674
+ occupancy. Counts how many files each taxon appears in
8675
+ and retains only taxa meeting a minimum threshold.
8676
+ Outputs filtered copies of each input file.
8677
+
8678
+ For FASTA files, removes sequences of filtered taxa.
8679
+ For tree files, prunes tips of filtered taxa.
8680
+
8681
+ Aliases:
8682
+ occupancy_filter, occ_filter, filter_occupancy
8683
+ Command line interfaces:
8684
+ pk_occupancy_filter, pk_occ_filter, pk_filter_occupancy
8685
+
8686
+ Usage:
8687
+ phykit occupancy_filter -l <file_list>
8688
+ [-f/--format fasta|trees] [-t/--threshold <int>]
8689
+ [-o/--output-dir <dir>] [--suffix <str>] [--json]
8690
+
8691
+ Options
8692
+ =====================================================
8693
+ -l/--list file listing paths to
8694
+ alignment or tree files,
8695
+ one per line (required)
8696
+
8697
+ -f/--format input file format: fasta
8698
+ or trees (default: fasta)
8699
+
8700
+ -t/--threshold minimum occupancy to retain
8701
+ a taxon. Values between 0
8702
+ and 1 are treated as a
8703
+ fraction (e.g., 0.5 = 50%
8704
+ of files). Values >= 1 are
8705
+ treated as an absolute
8706
+ count. (default: 0.5)
8707
+
8708
+ -o/--output-dir directory for filtered
8709
+ output files (default:
8710
+ same directory as input)
8711
+
8712
+ --suffix suffix added to output
8713
+ filenames before the
8714
+ extension (default:
8715
+ ".filtered")
8716
+
8717
+ --json output results as JSON
8718
+ """
8719
+ ),
8720
+ )
8721
+ parser.add_argument("-l", "--list", type=str, required=True, help=SUPPRESS, metavar="")
8722
+ parser.add_argument("-f", "--format", type=str, default="fasta", choices=["fasta", "trees"], help=SUPPRESS, metavar="")
8723
+ parser.add_argument("-t", "--threshold", type=float, default=0.5, help=SUPPRESS, metavar="")
8724
+ parser.add_argument("-o", "--output-dir", type=str, default=None, help=SUPPRESS, metavar="")
8725
+ parser.add_argument("--suffix", type=str, default=".filtered", help=SUPPRESS, metavar="")
8726
+ _add_json_argument(parser)
8727
+ _run_service(parser, argv, OccupancyFilter)
8728
+
8609
8729
  ### Helper commands
8610
8730
  @staticmethod
8611
8731
  def create_concatenation_matrix(argv):
@@ -9125,6 +9245,10 @@ def subtree_prune_regraft(argv=None):
9125
9245
  Phykit.subtree_prune_regraft(sys.argv[1:])
9126
9246
 
9127
9247
 
9248
+ def transfer_annotations(argv=None):
9249
+ Phykit.transfer_annotations(sys.argv[1:])
9250
+
9251
+
9128
9252
  def relative_rate_test(argv=None):
9129
9253
  Phykit.relative_rate_test(sys.argv[1:])
9130
9254
 
@@ -9225,3 +9349,7 @@ def trait_rate_map(argv=None):
9225
9349
 
9226
9350
  def taxon_groups(argv=None):
9227
9351
  Phykit.taxon_groups(sys.argv[1:])
9352
+
9353
+
9354
+ def occupancy_filter(argv=None):
9355
+ Phykit.occupancy_filter(sys.argv[1:])
@@ -47,6 +47,7 @@ SumOfPairsScore = _LazyServiceFactory("phykit.services.alignment.sum_of_pairs_sc
47
47
  PhyloAnova = _LazyServiceFactory("phykit.services.tree.phylo_anova", "PhyloAnova")
48
48
  PhyloPath = _LazyServiceFactory("phykit.services.tree.phylo_path", "PhyloPath")
49
49
  PhyloGwas = _LazyServiceFactory("phykit.services.alignment.phylo_gwas", "PhyloGwas")
50
+ OccupancyFilter = _LazyServiceFactory("phykit.services.alignment.occupancy_filter", "OccupancyFilter")
50
51
  TaxonGroups = _LazyServiceFactory("phykit.services.alignment.taxon_groups", "TaxonGroups")
51
52
  VariableSites = _LazyServiceFactory("phykit.services.alignment.variable_sites", "VariableSites")
52
53
 
@@ -107,6 +108,7 @@ RobinsonFouldsDistance = _LazyServiceFactory("phykit.services.tree.rf_distance",
107
108
  RootTree = _LazyServiceFactory("phykit.services.tree.root_tree", "RootTree")
108
109
  Saturation = _LazyServiceFactory("phykit.services.tree.saturation", "Saturation")
109
110
  Spr = _LazyServiceFactory("phykit.services.tree.spr", "Spr")
111
+ TransferAnnotations = _LazyServiceFactory("phykit.services.tree.transfer_annotations", "TransferAnnotations")
110
112
  SpuriousSequence = _LazyServiceFactory("phykit.services.tree.spurious_sequence", "SpuriousSequence")
111
113
  TerminalBranchStats = _LazyServiceFactory("phykit.services.tree.terminal_branch_stats", "TerminalBranchStats")
112
114
  TipLabels = _LazyServiceFactory("phykit.services.tree.tip_labels", "TipLabels")