phykit 2.1.82__tar.gz → 2.1.84__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. {phykit-2.1.82 → phykit-2.1.84}/PKG-INFO +1 -1
  2. {phykit-2.1.82 → phykit-2.1.84}/phykit/cli_registry.py +6 -0
  3. phykit-2.1.84/phykit/helpers/pgls_utils.py +181 -0
  4. {phykit-2.1.82 → phykit-2.1.84}/phykit/helpers/plot_config.py +116 -0
  5. phykit-2.1.84/phykit/helpers/trait_parsing.py +133 -0
  6. {phykit-2.1.82 → phykit-2.1.84}/phykit/phykit.py +134 -0
  7. {phykit-2.1.82 → phykit-2.1.84}/phykit/service_factories.py +2 -0
  8. phykit-2.1.84/phykit/services/alignment/occupancy_filter.py +299 -0
  9. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/__init__.py +1 -0
  10. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/ancestral_reconstruction.py +1 -15
  11. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/base.py +40 -0
  12. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/concordance_asr.py +1 -1
  13. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/cont_map.py +1 -15
  14. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/density_map.py +1 -1
  15. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/fit_continuous.py +3 -44
  16. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/fit_discrete.py +1 -11
  17. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/independent_contrasts.py +1 -11
  18. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/ltt.py +1 -15
  19. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/ou_shift_detection.py +1 -15
  20. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/ouwie.py +1 -15
  21. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/parsimony_score.py +1 -8
  22. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/phenogram.py +1 -15
  23. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/phylo_anova.py +1 -6
  24. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/phylo_heatmap.py +11 -78
  25. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/phylo_impute.py +1 -15
  26. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/phylo_logistic.py +3 -117
  27. phykit-2.1.84/phykit/services/tree/phylo_path.py +744 -0
  28. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/phylogenetic_glm.py +3 -116
  29. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/phylogenetic_ordination.py +5 -143
  30. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/phylogenetic_regression.py +14 -244
  31. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/phylogenetic_signal.py +5 -160
  32. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/phylomorphospace.py +3 -116
  33. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/quartet_pie.py +17 -73
  34. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/rate_heterogeneity.py +1 -15
  35. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/simmap_summary.py +10 -43
  36. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/stochastic_character_map.py +1 -15
  37. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/threshold_model.py +1 -15
  38. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/trait_correlation.py +5 -119
  39. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/trait_rate_map.py +1 -19
  40. phykit-2.1.84/phykit/version.py +1 -0
  41. {phykit-2.1.82 → phykit-2.1.84}/phykit.egg-info/PKG-INFO +1 -1
  42. {phykit-2.1.82 → phykit-2.1.84}/phykit.egg-info/SOURCES.txt +4 -0
  43. {phykit-2.1.82 → phykit-2.1.84}/phykit.egg-info/entry_points.txt +9 -89
  44. phykit-2.1.84/setup.py +63 -0
  45. phykit-2.1.82/phykit/version.py +0 -1
  46. phykit-2.1.82/setup.py +0 -353
  47. {phykit-2.1.82 → phykit-2.1.84}/LICENSE.md +0 -0
  48. {phykit-2.1.82 → phykit-2.1.84}/README.md +0 -0
  49. {phykit-2.1.82 → phykit-2.1.84}/phykit/__init__.py +0 -0
  50. {phykit-2.1.82 → phykit-2.1.84}/phykit/__main__.py +0 -0
  51. {phykit-2.1.82 → phykit-2.1.84}/phykit/errors.py +0 -0
  52. {phykit-2.1.82 → phykit-2.1.84}/phykit/helpers/__init__.py +0 -0
  53. {phykit-2.1.82 → phykit-2.1.84}/phykit/helpers/boolean_argument_parsing.py +0 -0
  54. {phykit-2.1.82 → phykit-2.1.84}/phykit/helpers/caching.py +0 -0
  55. {phykit-2.1.82 → phykit-2.1.84}/phykit/helpers/circular_layout.py +0 -0
  56. {phykit-2.1.82 → phykit-2.1.84}/phykit/helpers/color_annotations.py +0 -0
  57. {phykit-2.1.82 → phykit-2.1.84}/phykit/helpers/discrete_models.py +0 -0
  58. {phykit-2.1.82 → phykit-2.1.84}/phykit/helpers/files.py +0 -0
  59. {phykit-2.1.82 → phykit-2.1.84}/phykit/helpers/json_output.py +0 -0
  60. {phykit-2.1.82 → phykit-2.1.84}/phykit/helpers/parallel.py +0 -0
  61. {phykit-2.1.82 → phykit-2.1.84}/phykit/helpers/parsimony_utils.py +0 -0
  62. {phykit-2.1.82 → phykit-2.1.84}/phykit/helpers/quartet_utils.py +0 -0
  63. {phykit-2.1.82 → phykit-2.1.84}/phykit/helpers/stats_summary.py +0 -0
  64. {phykit-2.1.82 → phykit-2.1.84}/phykit/helpers/streaming.py +0 -0
  65. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/__init__.py +0 -0
  66. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/__init__.py +0 -0
  67. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/alignment_entropy.py +0 -0
  68. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/alignment_length.py +0 -0
  69. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/alignment_length_no_gaps.py +0 -0
  70. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/alignment_outlier_taxa.py +0 -0
  71. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/alignment_recoding.py +0 -0
  72. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/alignment_subsample.py +0 -0
  73. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/base.py +0 -0
  74. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/column_score.py +0 -0
  75. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/composition_per_taxon.py +0 -0
  76. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/compositional_bias_per_site.py +0 -0
  77. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/create_concatenation_matrix.py +0 -0
  78. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/dfoil.py +0 -0
  79. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/dna_threader.py +0 -0
  80. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/dstatistic.py +0 -0
  81. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/evolutionary_rate_per_site.py +0 -0
  82. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/faidx.py +0 -0
  83. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/gc_content.py +0 -0
  84. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/identity_matrix.py +0 -0
  85. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/mask_alignment.py +0 -0
  86. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/occupancy_per_taxon.py +0 -0
  87. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/pairwise_identity.py +0 -0
  88. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/parsimony_informative_sites.py +0 -0
  89. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/phylo_gwas.py +0 -0
  90. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/plot_alignment_qc.py +0 -0
  91. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/rcv.py +0 -0
  92. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/rcvt.py +0 -0
  93. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/rename_fasta_entries.py +0 -0
  94. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/sum_of_pairs_score.py +0 -0
  95. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/taxon_groups.py +0 -0
  96. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/alignment/variable_sites.py +0 -0
  97. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/base.py +0 -0
  98. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/bipartition_support_stats.py +0 -0
  99. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/branch_length_multiplier.py +0 -0
  100. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/character_map.py +0 -0
  101. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/collapse_branches.py +0 -0
  102. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/consensus_network.py +0 -0
  103. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/consensus_tree.py +0 -0
  104. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/cophylo.py +0 -0
  105. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/covarying_evolutionary_rates.py +0 -0
  106. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/discordance_asymmetry.py +0 -0
  107. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/dvmc.py +0 -0
  108. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/evo_tempo_map.py +0 -0
  109. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/evolutionary_rate.py +0 -0
  110. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/hidden_paralogy_check.py +0 -0
  111. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/hybridization.py +0 -0
  112. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/internal_branch_stats.py +0 -0
  113. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/internode_labeler.py +0 -0
  114. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/kf_distance.py +0 -0
  115. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/last_common_ancestor_subtree.py +0 -0
  116. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/lb_score.py +0 -0
  117. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/monophyly_check.py +0 -0
  118. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/nearest_neighbor_interchange.py +0 -0
  119. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/neighbor_net.py +0 -0
  120. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/network_signal.py +0 -0
  121. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/patristic_distances.py +0 -0
  122. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/polytomy_test.py +0 -0
  123. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/print_tree.py +0 -0
  124. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/prune_tree.py +0 -0
  125. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/quartet_network.py +0 -0
  126. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/relative_rate_test.py +0 -0
  127. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/rename_tree_tips.py +0 -0
  128. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/rf_distance.py +0 -0
  129. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/root_tree.py +0 -0
  130. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/saturation.py +0 -0
  131. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/spectral_discordance.py +0 -0
  132. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/spr.py +0 -0
  133. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/spurious_sequence.py +0 -0
  134. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/terminal_branch_stats.py +0 -0
  135. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/tip_labels.py +0 -0
  136. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/tip_to_tip_distance.py +0 -0
  137. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/tip_to_tip_node_distance.py +0 -0
  138. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/total_tree_length.py +0 -0
  139. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/tree_space.py +0 -0
  140. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/treeness.py +0 -0
  141. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/treeness_over_rcv.py +0 -0
  142. {phykit-2.1.82 → phykit-2.1.84}/phykit/services/tree/vcv_utils.py +0 -0
  143. {phykit-2.1.82 → phykit-2.1.84}/phykit.egg-info/dependency_links.txt +0 -0
  144. {phykit-2.1.82 → phykit-2.1.84}/phykit.egg-info/requires.txt +0 -0
  145. {phykit-2.1.82 → phykit-2.1.84}/phykit.egg-info/top_level.txt +0 -0
  146. {phykit-2.1.82 → phykit-2.1.84}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: phykit
3
- Version: 2.1.82
3
+ Version: 2.1.84
4
4
  Home-page: https://github.com/jlsteenwyk/phykit
5
5
  Author: Jacob L. Steenwyk
6
6
  Author-email: jlsteenwyk@gmail.com
@@ -55,6 +55,9 @@ ALIAS_TO_HANDLER: Dict[str, str] = {
55
55
  "sum_of_pairs_score": "sum_of_pairs_score",
56
56
  "sops": "sum_of_pairs_score",
57
57
  "sop": "sum_of_pairs_score",
58
+ "phylo_path": "phylo_path",
59
+ "ppath": "phylo_path",
60
+ "phylopath": "phylo_path",
58
61
  "phylo_anova": "phylo_anova",
59
62
  "panova": "phylo_anova",
60
63
  "phylo_manova": "phylo_anova",
@@ -214,6 +217,9 @@ ALIAS_TO_HANDLER: Dict[str, str] = {
214
217
  "tree_landscape": "tree_space",
215
218
  "tgroups": "taxon_groups",
216
219
  "shared_taxa": "taxon_groups",
220
+ "occupancy_filter": "occupancy_filter",
221
+ "occ_filter": "occupancy_filter",
222
+ "filter_occupancy": "occupancy_filter",
217
223
  # Helper aliases
218
224
  "create_concat": "create_concatenation_matrix",
219
225
  "cc": "create_concatenation_matrix",
@@ -0,0 +1,181 @@
1
+ """
2
+ Shared PGLS (Phylogenetic Generalized Least Squares) utilities.
3
+
4
+ Provides reusable functions for:
5
+ - Pagel's lambda estimation via ML
6
+ - Concentrated PGLS log-likelihood
7
+ - GLS model fitting
8
+ - Lambda upper bound computation
9
+
10
+ Used by phylogenetic_regression, phylo_path, phylogenetic_signal,
11
+ phylogenetic_ordination, fit_continuous, and other comparative methods.
12
+ """
13
+ from typing import Tuple
14
+
15
+ import numpy as np
16
+ from scipy.optimize import minimize_scalar
17
+
18
+ from ..errors import PhykitUserError
19
+
20
+
21
+ def max_lambda(tree) -> float:
22
+ """Compute the upper bound for Pagel's lambda.
23
+
24
+ For ultrametric trees, returns max_tip_height / max_parent_height.
25
+ For non-ultrametric trees, returns 1.0.
26
+ """
27
+ tips = tree.get_terminals()
28
+ root = tree.root
29
+ tip_heights = [tree.distance(root, tip) for tip in tips]
30
+ max_tip_height = max(tip_heights)
31
+ min_tip_height = min(tip_heights)
32
+
33
+ is_ultrametric = (max_tip_height - min_tip_height) / max_tip_height < 1e-6
34
+
35
+ if not is_ultrametric:
36
+ return 1.0
37
+
38
+ max_parent_height = 0.0
39
+ for clade in tree.find_clades(order="level"):
40
+ if clade == root:
41
+ continue
42
+ node_height = tree.distance(root, clade)
43
+ parent_height = node_height - (clade.branch_length or 0.0)
44
+ if parent_height > max_parent_height:
45
+ max_parent_height = parent_height
46
+
47
+ if max_parent_height == 0.0:
48
+ return 1.0
49
+
50
+ return max_tip_height / max_parent_height
51
+
52
+
53
+ def pgls_log_likelihood(
54
+ y: np.ndarray, X: np.ndarray, C: np.ndarray
55
+ ) -> float:
56
+ """Concentrated log-likelihood with beta and sigma^2 profiled out.
57
+
58
+ Parameters
59
+ ----------
60
+ y : response vector (n,)
61
+ X : design matrix (n, p)
62
+ C : phylogenetic VCV matrix (n, n), possibly lambda-transformed
63
+ """
64
+ n = len(y)
65
+ try:
66
+ C_inv = np.linalg.inv(C)
67
+ XtCiX = X.T @ C_inv @ X
68
+ XtCiX_inv = np.linalg.inv(XtCiX)
69
+ except np.linalg.LinAlgError:
70
+ return -1e20
71
+
72
+ beta_hat = XtCiX_inv @ X.T @ C_inv @ y
73
+ e = y - X @ beta_hat
74
+ sigma2_ml = float(e @ C_inv @ e) / n
75
+
76
+ sign, logdet_C = np.linalg.slogdet(C)
77
+ if sign <= 0 or sigma2_ml <= 0:
78
+ return -1e20
79
+
80
+ ll = -0.5 * (
81
+ n * np.log(2 * np.pi) + n * np.log(sigma2_ml) + logdet_C + n
82
+ )
83
+ return float(ll)
84
+
85
+
86
+ def estimate_lambda(
87
+ y: np.ndarray,
88
+ X: np.ndarray,
89
+ vcv: np.ndarray,
90
+ max_lam: float = 1.0,
91
+ ) -> Tuple[float, float]:
92
+ """Optimize Pagel's lambda via ML using multi-interval bounded search.
93
+
94
+ Parameters
95
+ ----------
96
+ y : response vector (n,)
97
+ X : design matrix (n, p)
98
+ vcv : phylogenetic VCV matrix (n, n)
99
+ max_lam : upper bound for lambda (default 1.0)
100
+
101
+ Returns
102
+ -------
103
+ (lambda_hat, log_likelihood_at_lambda)
104
+ """
105
+ diag_vals = np.diag(vcv).copy()
106
+ niter = 10
107
+
108
+ def neg_ll(lam):
109
+ C_lam = vcv * lam
110
+ np.fill_diagonal(C_lam, diag_vals)
111
+ try:
112
+ ll = pgls_log_likelihood(y, X, C_lam)
113
+ return -ll
114
+ except (np.linalg.LinAlgError, FloatingPointError, ValueError):
115
+ return 1e10
116
+
117
+ bounds_lo = np.linspace(0, max_lam - max_lam / niter, niter)
118
+ bounds_hi = np.linspace(max_lam / niter, max_lam, niter)
119
+
120
+ best_ll = -np.inf
121
+ lambda_hat = 0.0
122
+ for lo, hi in zip(bounds_lo, bounds_hi):
123
+ res = minimize_scalar(neg_ll, bounds=(lo, hi), method="bounded")
124
+ ll_val = -res.fun
125
+ if ll_val > best_ll:
126
+ best_ll = ll_val
127
+ lambda_hat = res.x
128
+
129
+ # Compute log-likelihood at fitted lambda
130
+ C_fitted = vcv * lambda_hat
131
+ np.fill_diagonal(C_fitted, diag_vals)
132
+ ll_fitted = pgls_log_likelihood(y, X, C_fitted)
133
+
134
+ return float(lambda_hat), float(ll_fitted)
135
+
136
+
137
+ def fit_gls(
138
+ y: np.ndarray, X: np.ndarray, C_inv: np.ndarray
139
+ ) -> Tuple[np.ndarray, np.ndarray, float, np.ndarray]:
140
+ """GLS estimation: beta_hat = (X' C_inv X)^{-1} X' C_inv y.
141
+
142
+ Parameters
143
+ ----------
144
+ y : response vector (n,)
145
+ X : design matrix (n, p)
146
+ C_inv : inverse of phylogenetic VCV matrix (n, n)
147
+
148
+ Returns
149
+ -------
150
+ (beta_hat, residuals, sigma2_reml, var_beta)
151
+ """
152
+ n, k_plus_1 = X.shape
153
+ XtCiX = X.T @ C_inv @ X
154
+ try:
155
+ XtCiX_inv = np.linalg.inv(XtCiX)
156
+ except np.linalg.LinAlgError:
157
+ raise PhykitUserError(
158
+ [
159
+ "Singular design matrix: cannot estimate coefficients.",
160
+ "Check that predictors are not collinear.",
161
+ ],
162
+ code=2,
163
+ )
164
+
165
+ beta_hat = XtCiX_inv @ X.T @ C_inv @ y
166
+ residuals = y - X @ beta_hat
167
+
168
+ df_resid = n - k_plus_1
169
+ sigma2 = float(residuals @ C_inv @ residuals) / max(df_resid, 1)
170
+
171
+ var_beta = sigma2 * XtCiX_inv
172
+
173
+ return beta_hat, residuals, sigma2, var_beta
174
+
175
+
176
+ def apply_lambda(vcv: np.ndarray, lambda_val: float) -> np.ndarray:
177
+ """Apply Pagel's lambda to a VCV matrix (scale off-diagonals, keep diagonal)."""
178
+ diag_vals = np.diag(vcv).copy()
179
+ vcv_lam = vcv * lambda_val
180
+ np.fill_diagonal(vcv_lam, diag_vals)
181
+ return vcv_lam
@@ -248,3 +248,119 @@ def compute_node_x_cladogram(tree, parent_map):
248
248
  else:
249
249
  node_x[cid] = float(node_depth.get(cid, 0)) * step_size
250
250
  return node_x
251
+
252
+
253
+ # ---- Shared rectangular tree plotting utilities ----
254
+
255
+
256
+ def build_parent_map(tree):
257
+ """Build a dict mapping child node id -> parent node."""
258
+ parent_map = {}
259
+ for clade in tree.find_clades(order="preorder"):
260
+ for child in clade.clades:
261
+ parent_map[id(child)] = clade
262
+ return parent_map
263
+
264
+
265
+ def compute_node_positions(tree, parent_map, cladogram=False):
266
+ """Compute (node_x, node_y) for a rectangular tree layout.
267
+
268
+ Parameters
269
+ ----------
270
+ tree : Bio.Phylo tree
271
+ parent_map : dict from build_parent_map()
272
+ cladogram : if True, use equal-depth x-positions (tips aligned)
273
+
274
+ Returns
275
+ -------
276
+ (node_x, node_y) : dicts mapping node id -> coordinate
277
+ """
278
+ import numpy as np
279
+
280
+ tips = list(tree.get_terminals())
281
+ root = tree.root
282
+
283
+ node_y = {}
284
+ for i, tip in enumerate(tips):
285
+ node_y[id(tip)] = i
286
+
287
+ if cladogram:
288
+ node_x = compute_node_x_cladogram(tree, parent_map)
289
+ else:
290
+ node_x = {}
291
+ for clade in tree.find_clades(order="preorder"):
292
+ if clade == root:
293
+ node_x[id(clade)] = 0.0
294
+ elif id(clade) in parent_map:
295
+ parent = parent_map[id(clade)]
296
+ t = clade.branch_length if clade.branch_length else 0.0
297
+ node_x[id(clade)] = node_x.get(id(parent), 0.0) + t
298
+
299
+ for clade in tree.find_clades(order="postorder"):
300
+ if not clade.is_terminal() and id(clade) not in node_y:
301
+ child_ys = [
302
+ node_y[id(c)] for c in clade.clades if id(c) in node_y
303
+ ]
304
+ if child_ys:
305
+ node_y[id(clade)] = float(np.mean(child_ys))
306
+ else:
307
+ node_y[id(clade)] = 0.0
308
+
309
+ return node_x, node_y
310
+
311
+
312
+ def draw_tree_branches(
313
+ ax, tree, node_x, node_y, parent_map,
314
+ color="black", lw=1.5, vertical_color="black", vertical_lw=0.8,
315
+ ):
316
+ """Draw rectangular tree branches (horizontal + vertical connectors).
317
+
318
+ Override color per branch by passing a callable for `color`:
319
+ color=lambda clade: "red" if ... else "black"
320
+ """
321
+ root = tree.root
322
+ for clade in tree.find_clades(order="preorder"):
323
+ if clade == root:
324
+ continue
325
+ if id(clade) not in parent_map:
326
+ continue
327
+ parent = parent_map[id(clade)]
328
+ if id(parent) not in node_x or id(clade) not in node_x:
329
+ continue
330
+
331
+ x0 = node_x[id(parent)]
332
+ x1 = node_x[id(clade)]
333
+ y0 = node_y.get(id(parent), 0)
334
+ y1 = node_y.get(id(clade), 0)
335
+
336
+ branch_color = color(clade) if callable(color) else color
337
+ ax.plot([x0, x1], [y1, y1], color=branch_color, lw=lw)
338
+ ax.plot([x0, x0], [y0, y1], color=vertical_color, lw=vertical_lw)
339
+
340
+
341
+ def draw_tip_labels(
342
+ ax, tree, node_x, node_y, fontsize=9, offset_fraction=0.03,
343
+ ):
344
+ """Draw taxon name labels at tree tips."""
345
+ tips = list(tree.get_terminals())
346
+ max_x = max(node_x.values()) if node_x else 1.0
347
+ offset = max_x * offset_fraction
348
+
349
+ if fontsize <= 0:
350
+ return
351
+
352
+ for tip in tips:
353
+ ax.text(
354
+ node_x[id(tip)] + offset, node_y[id(tip)],
355
+ tip.name, va="center", fontsize=fontsize,
356
+ )
357
+
358
+
359
+ def cleanup_tree_axes(ax, show_xlabel=True):
360
+ """Standard axis cleanup for rectangular tree plots."""
361
+ ax.set_yticks([])
362
+ ax.spines["top"].set_visible(False)
363
+ ax.spines["right"].set_visible(False)
364
+ ax.spines["left"].set_visible(False)
365
+ if show_xlabel:
366
+ ax.set_xlabel("Branch length")
@@ -0,0 +1,133 @@
1
+ """
2
+ Shared trait file parsing utilities.
3
+
4
+ Provides a single implementation for parsing tab-delimited multi-trait
5
+ files with a header row, used across phylogenetic regression, signal,
6
+ ordination, path analysis, ANOVA, and other comparative methods.
7
+ """
8
+ import sys
9
+ from typing import Dict, List, Tuple
10
+
11
+ from ..errors import PhykitUserError
12
+
13
+
14
+ def parse_multi_trait_file(
15
+ path: str,
16
+ tree_tips: List[str],
17
+ min_shared: int = 3,
18
+ min_columns: int = 2,
19
+ ) -> Tuple[List[str], Dict[str, List[float]]]:
20
+ """Parse a tab-delimited multi-trait file with a header row.
21
+
22
+ Format:
23
+ taxon<tab>trait1<tab>trait2<tab>...
24
+ species_A<tab>1.2<tab>3.4<tab>...
25
+
26
+ Parameters
27
+ ----------
28
+ path : path to TSV file
29
+ tree_tips : list of tip names from the tree
30
+ min_shared : minimum shared taxa between tree and file (default 3)
31
+ min_columns : minimum columns in header (default 2: taxon + 1 trait)
32
+
33
+ Returns
34
+ -------
35
+ (trait_names, traits_dict) where traits_dict maps taxon -> [float values]
36
+ """
37
+ try:
38
+ with open(path) as f:
39
+ lines = f.readlines()
40
+ except FileNotFoundError:
41
+ raise PhykitUserError(
42
+ [
43
+ f"{path} corresponds to no such file or directory.",
44
+ "Please check filename and pathing",
45
+ ],
46
+ code=2,
47
+ )
48
+
49
+ # Filter out comments and blank lines
50
+ data_lines = []
51
+ for line in lines:
52
+ stripped = line.strip()
53
+ if not stripped or stripped.startswith("#"):
54
+ continue
55
+ data_lines.append(stripped)
56
+
57
+ if len(data_lines) < 2:
58
+ raise PhykitUserError(
59
+ ["Multi-trait file must have a header row and at least one data row."],
60
+ code=2,
61
+ )
62
+
63
+ # First data line is the header
64
+ header_parts = data_lines[0].split("\t")
65
+ n_cols = len(header_parts)
66
+ if n_cols < min_columns:
67
+ raise PhykitUserError(
68
+ [
69
+ f"Header must have at least {min_columns} columns "
70
+ f"(taxon + at least {min_columns - 1} trait(s)).",
71
+ ],
72
+ code=2,
73
+ )
74
+ trait_names = header_parts[1:]
75
+
76
+ traits = {}
77
+ for line_idx, line in enumerate(data_lines[1:], 2):
78
+ parts = line.split("\t")
79
+ if len(parts) != n_cols:
80
+ raise PhykitUserError(
81
+ [
82
+ f"Line {line_idx} has {len(parts)} columns; expected {n_cols}.",
83
+ f"Each line should have: taxon_name<tab>"
84
+ f"{'<tab>'.join(['trait'] * len(trait_names))}",
85
+ ],
86
+ code=2,
87
+ )
88
+ taxon = parts[0]
89
+ values = []
90
+ for i, val_str in enumerate(parts[1:]):
91
+ try:
92
+ values.append(float(val_str))
93
+ except ValueError:
94
+ raise PhykitUserError(
95
+ [
96
+ f"Non-numeric trait value '{val_str}' for taxon '{taxon}' "
97
+ f"(trait '{trait_names[i]}') on line {line_idx}.",
98
+ ],
99
+ code=2,
100
+ )
101
+ traits[taxon] = values
102
+
103
+ tree_tip_set = set(tree_tips)
104
+ trait_taxa_set = set(traits.keys())
105
+ shared = tree_tip_set & trait_taxa_set
106
+
107
+ tree_only = tree_tip_set - trait_taxa_set
108
+ trait_only = trait_taxa_set - tree_tip_set
109
+
110
+ if tree_only:
111
+ print(
112
+ f"Warning: {len(tree_only)} taxa in tree but not in trait file: "
113
+ f"{', '.join(sorted(tree_only))}",
114
+ file=sys.stderr,
115
+ )
116
+ if trait_only:
117
+ print(
118
+ f"Warning: {len(trait_only)} taxa in trait file but not in tree: "
119
+ f"{', '.join(sorted(trait_only))}",
120
+ file=sys.stderr,
121
+ )
122
+
123
+ if len(shared) < min_shared:
124
+ raise PhykitUserError(
125
+ [
126
+ f"Only {len(shared)} shared taxa between tree and trait file.",
127
+ f"At least {min_shared} shared taxa are required.",
128
+ ],
129
+ code=2,
130
+ )
131
+
132
+ filtered = {taxon: traits[taxon] for taxon in shared}
133
+ return trait_names, filtered
@@ -175,6 +175,8 @@ class Phykit:
175
175
  in an alignment
176
176
  taxon_groups (alias: tgroups; shared_taxa)
177
177
  - group tree or FASTA files by their taxon set
178
+ occupancy_filter (alias: occ_filter; filter_occupancy)
179
+ - filter alignments/trees by cross-file taxon occupancy
178
180
 
179
181
  Tree-based commands
180
182
  ===================
@@ -250,6 +252,8 @@ class Phykit:
250
252
  - fit phylogenetic GLM for binary (logistic) or count (Poisson) data
251
253
  phylo_anova (alias: panova; phylo_manova; pmanova)
252
254
  - phylogenetic ANOVA / MANOVA using RRPP (Adams & Collyer 2018)
255
+ phylo_path (alias: ppath; phylopath)
256
+ - phylogenetic path analysis (von Hardenberg & Gonzalez-Voyer 2013)
253
257
  phylo_logistic (alias: phylo_logreg; plogreg)
254
258
  - fit phylogenetic logistic regression (Ives & Garland 2010)
255
259
  stochastic_character_map (alias: simmap; scm)
@@ -2089,6 +2093,65 @@ class Phykit:
2089
2093
  _add_json_argument(parser)
2090
2094
  _run_service(parser, argv, PhyloAnova)
2091
2095
 
2096
+ @staticmethod
2097
+ def phylo_path(argv):
2098
+ parser = _new_parser(
2099
+ description=textwrap.dedent(
2100
+ f"""\
2101
+ {help_header}
2102
+
2103
+ Phylogenetic path analysis (von Hardenberg & Gonzalez-Voyer
2104
+ 2013). Compare competing causal DAGs using d-separation
2105
+ tests via PGLS with Pagel's lambda, rank models by CICc,
2106
+ and estimate model-averaged path coefficients.
2107
+
2108
+ Aliases:
2109
+ phylo_path, ppath, phylopath
2110
+ Command line interfaces:
2111
+ pk_phylo_path, pk_ppath, pk_phylopath
2112
+
2113
+ Usage:
2114
+ phykit phylo_path -t <tree> --traits <traits_file>
2115
+ --models <models_file> [--best-only]
2116
+ [--plot-output <file>] [--csv <file>] [--json]
2117
+
2118
+ Options
2119
+ =====================================================
2120
+ -t/--tree species tree (required)
2121
+
2122
+ --traits TSV file with taxon and
2123
+ continuous trait columns
2124
+ (required)
2125
+
2126
+ --models model definition file
2127
+ with candidate DAGs
2128
+ (required). Format:
2129
+ name: A->B, B->C, ...
2130
+
2131
+ --best-only report only best model
2132
+ coefficients (default:
2133
+ model averaging)
2134
+
2135
+ --plot-output output DAG plot file
2136
+
2137
+ --csv output CSV with model
2138
+ comparison and path
2139
+ coefficients
2140
+
2141
+ --json output results as JSON
2142
+ """
2143
+ ),
2144
+ )
2145
+ parser.add_argument("-t", "--tree", type=str, required=True, help=SUPPRESS, metavar="")
2146
+ parser.add_argument("--traits", type=str, required=True, help=SUPPRESS, metavar="")
2147
+ parser.add_argument("--models", type=str, required=True, help=SUPPRESS, metavar="")
2148
+ parser.add_argument("--best-only", action="store_true", help=SUPPRESS)
2149
+ parser.add_argument("--plot-output", type=str, default=None, help=SUPPRESS, metavar="")
2150
+ parser.add_argument("--csv", type=str, default=None, help=SUPPRESS, metavar="")
2151
+ add_plot_arguments(parser)
2152
+ _add_json_argument(parser)
2153
+ _run_service(parser, argv, PhyloPath)
2154
+
2092
2155
  @staticmethod
2093
2156
  def alignment_subsample(argv):
2094
2157
  parser = _new_parser(
@@ -8545,6 +8608,69 @@ class Phykit:
8545
8608
  _add_json_argument(parser)
8546
8609
  _run_service(parser, argv, TaxonGroups)
8547
8610
 
8611
+ @staticmethod
8612
+ def occupancy_filter(argv):
8613
+ parser = _new_parser(
8614
+ description=textwrap.dedent(
8615
+ f"""\
8616
+ {help_header}
8617
+
8618
+ Filter alignments and/or trees by cross-file taxon
8619
+ occupancy. Counts how many files each taxon appears in
8620
+ and retains only taxa meeting a minimum threshold.
8621
+ Outputs filtered copies of each input file.
8622
+
8623
+ For FASTA files, removes sequences of filtered taxa.
8624
+ For tree files, prunes tips of filtered taxa.
8625
+
8626
+ Aliases:
8627
+ occupancy_filter, occ_filter, filter_occupancy
8628
+ Command line interfaces:
8629
+ pk_occupancy_filter, pk_occ_filter, pk_filter_occupancy
8630
+
8631
+ Usage:
8632
+ phykit occupancy_filter -l <file_list>
8633
+ [-f/--format fasta|trees] [-t/--threshold <int>]
8634
+ [-o/--output-dir <dir>] [--suffix <str>] [--json]
8635
+
8636
+ Options
8637
+ =====================================================
8638
+ -l/--list file listing paths to
8639
+ alignment or tree files,
8640
+ one per line (required)
8641
+
8642
+ -f/--format input file format: fasta
8643
+ or trees (default: fasta)
8644
+
8645
+ -t/--threshold minimum occupancy to retain
8646
+ a taxon. Values between 0
8647
+ and 1 are treated as a
8648
+ fraction (e.g., 0.5 = 50%
8649
+ of files). Values >= 1 are
8650
+ treated as an absolute
8651
+ count. (default: 0.5)
8652
+
8653
+ -o/--output-dir directory for filtered
8654
+ output files (default:
8655
+ same directory as input)
8656
+
8657
+ --suffix suffix added to output
8658
+ filenames before the
8659
+ extension (default:
8660
+ ".filtered")
8661
+
8662
+ --json output results as JSON
8663
+ """
8664
+ ),
8665
+ )
8666
+ parser.add_argument("-l", "--list", type=str, required=True, help=SUPPRESS, metavar="")
8667
+ parser.add_argument("-f", "--format", type=str, default="fasta", choices=["fasta", "trees"], help=SUPPRESS, metavar="")
8668
+ parser.add_argument("-t", "--threshold", type=float, default=0.5, help=SUPPRESS, metavar="")
8669
+ parser.add_argument("-o", "--output-dir", type=str, default=None, help=SUPPRESS, metavar="")
8670
+ parser.add_argument("--suffix", type=str, default=".filtered", help=SUPPRESS, metavar="")
8671
+ _add_json_argument(parser)
8672
+ _run_service(parser, argv, OccupancyFilter)
8673
+
8548
8674
  ### Helper commands
8549
8675
  @staticmethod
8550
8676
  def create_concatenation_matrix(argv):
@@ -8847,6 +8973,10 @@ def phylo_anova(argv=None):
8847
8973
  Phykit.phylo_anova(sys.argv[1:])
8848
8974
 
8849
8975
 
8976
+ def phylo_path(argv=None):
8977
+ Phykit.phylo_path(sys.argv[1:])
8978
+
8979
+
8850
8980
  def dfoil(argv=None):
8851
8981
  Phykit.dfoil(sys.argv[1:])
8852
8982
 
@@ -9160,3 +9290,7 @@ def trait_rate_map(argv=None):
9160
9290
 
9161
9291
  def taxon_groups(argv=None):
9162
9292
  Phykit.taxon_groups(sys.argv[1:])
9293
+
9294
+
9295
+ def occupancy_filter(argv=None):
9296
+ Phykit.occupancy_filter(sys.argv[1:])
@@ -45,7 +45,9 @@ RelativeCompositionVariabilityTaxon = _LazyServiceFactory("phykit.services.align
45
45
  RenameFastaEntries = _LazyServiceFactory("phykit.services.alignment.rename_fasta_entries", "RenameFastaEntries")
46
46
  SumOfPairsScore = _LazyServiceFactory("phykit.services.alignment.sum_of_pairs_score", "SumOfPairsScore")
47
47
  PhyloAnova = _LazyServiceFactory("phykit.services.tree.phylo_anova", "PhyloAnova")
48
+ PhyloPath = _LazyServiceFactory("phykit.services.tree.phylo_path", "PhyloPath")
48
49
  PhyloGwas = _LazyServiceFactory("phykit.services.alignment.phylo_gwas", "PhyloGwas")
50
+ OccupancyFilter = _LazyServiceFactory("phykit.services.alignment.occupancy_filter", "OccupancyFilter")
49
51
  TaxonGroups = _LazyServiceFactory("phykit.services.alignment.taxon_groups", "TaxonGroups")
50
52
  VariableSites = _LazyServiceFactory("phykit.services.alignment.variable_sites", "VariableSites")
51
53