phykit 2.1.30__tar.gz → 2.1.32__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. {phykit-2.1.30 → phykit-2.1.32}/PKG-INFO +1 -1
  2. {phykit-2.1.30 → phykit-2.1.32}/phykit/cli_registry.py +3 -0
  3. {phykit-2.1.30 → phykit-2.1.32}/phykit/phykit.py +130 -0
  4. {phykit-2.1.30 → phykit-2.1.32}/phykit/service_factories.py +2 -0
  5. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/__init__.py +2 -0
  6. phykit-2.1.32/phykit/services/tree/discordance_asymmetry.py +520 -0
  7. phykit-2.1.32/phykit/services/tree/evo_tempo_map.py +658 -0
  8. phykit-2.1.32/phykit/version.py +1 -0
  9. {phykit-2.1.30 → phykit-2.1.32}/phykit.egg-info/PKG-INFO +1 -1
  10. {phykit-2.1.30 → phykit-2.1.32}/phykit.egg-info/SOURCES.txt +2 -0
  11. phykit-2.1.30/phykit/version.py +0 -1
  12. {phykit-2.1.30 → phykit-2.1.32}/LICENSE.md +0 -0
  13. {phykit-2.1.30 → phykit-2.1.32}/README.md +0 -0
  14. {phykit-2.1.30 → phykit-2.1.32}/phykit/__init__.py +0 -0
  15. {phykit-2.1.30 → phykit-2.1.32}/phykit/__main__.py +0 -0
  16. {phykit-2.1.30 → phykit-2.1.32}/phykit/errors.py +0 -0
  17. {phykit-2.1.30 → phykit-2.1.32}/phykit/helpers/__init__.py +0 -0
  18. {phykit-2.1.30 → phykit-2.1.32}/phykit/helpers/boolean_argument_parsing.py +0 -0
  19. {phykit-2.1.30 → phykit-2.1.32}/phykit/helpers/caching.py +0 -0
  20. {phykit-2.1.30 → phykit-2.1.32}/phykit/helpers/files.py +0 -0
  21. {phykit-2.1.30 → phykit-2.1.32}/phykit/helpers/json_output.py +0 -0
  22. {phykit-2.1.30 → phykit-2.1.32}/phykit/helpers/parallel.py +0 -0
  23. {phykit-2.1.30 → phykit-2.1.32}/phykit/helpers/stats_summary.py +0 -0
  24. {phykit-2.1.30 → phykit-2.1.32}/phykit/helpers/streaming.py +0 -0
  25. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/__init__.py +0 -0
  26. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/__init__.py +0 -0
  27. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/alignment_entropy.py +0 -0
  28. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/alignment_length.py +0 -0
  29. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/alignment_length_no_gaps.py +0 -0
  30. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/alignment_outlier_taxa.py +0 -0
  31. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/alignment_recoding.py +0 -0
  32. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/base.py +0 -0
  33. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/column_score.py +0 -0
  34. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/composition_per_taxon.py +0 -0
  35. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/compositional_bias_per_site.py +0 -0
  36. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/create_concatenation_matrix.py +0 -0
  37. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/dna_threader.py +0 -0
  38. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/evolutionary_rate_per_site.py +0 -0
  39. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/faidx.py +0 -0
  40. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/gc_content.py +0 -0
  41. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/mask_alignment.py +0 -0
  42. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/occupancy_per_taxon.py +0 -0
  43. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/pairwise_identity.py +0 -0
  44. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/parsimony_informative_sites.py +0 -0
  45. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/plot_alignment_qc.py +0 -0
  46. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/rcv.py +0 -0
  47. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/rcvt.py +0 -0
  48. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/rename_fasta_entries.py +0 -0
  49. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/sum_of_pairs_score.py +0 -0
  50. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/variable_sites.py +0 -0
  51. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/base.py +0 -0
  52. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/ancestral_reconstruction.py +0 -0
  53. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/base.py +0 -0
  54. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/bipartition_support_stats.py +0 -0
  55. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/branch_length_multiplier.py +0 -0
  56. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/collapse_branches.py +0 -0
  57. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/concordance_asr.py +0 -0
  58. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/consensus_network.py +0 -0
  59. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/consensus_tree.py +0 -0
  60. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/cont_map.py +0 -0
  61. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/cophylo.py +0 -0
  62. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/covarying_evolutionary_rates.py +0 -0
  63. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/density_map.py +0 -0
  64. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/dvmc.py +0 -0
  65. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/evolutionary_rate.py +0 -0
  66. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/fit_continuous.py +0 -0
  67. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/hidden_paralogy_check.py +0 -0
  68. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/internal_branch_stats.py +0 -0
  69. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/internode_labeler.py +0 -0
  70. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/last_common_ancestor_subtree.py +0 -0
  71. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/lb_score.py +0 -0
  72. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/ltt.py +0 -0
  73. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/monophyly_check.py +0 -0
  74. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/nearest_neighbor_interchange.py +0 -0
  75. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/network_signal.py +0 -0
  76. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/ou_shift_detection.py +0 -0
  77. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/ouwie.py +0 -0
  78. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/patristic_distances.py +0 -0
  79. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/phenogram.py +0 -0
  80. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/phylogenetic_glm.py +0 -0
  81. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/phylogenetic_ordination.py +0 -0
  82. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/phylogenetic_regression.py +0 -0
  83. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/phylogenetic_signal.py +0 -0
  84. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/phylomorphospace.py +0 -0
  85. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/polytomy_test.py +0 -0
  86. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/print_tree.py +0 -0
  87. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/prune_tree.py +0 -0
  88. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/quartet_network.py +0 -0
  89. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/rate_heterogeneity.py +0 -0
  90. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/relative_rate_test.py +0 -0
  91. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/rename_tree_tips.py +0 -0
  92. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/rf_distance.py +0 -0
  93. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/root_tree.py +0 -0
  94. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/saturation.py +0 -0
  95. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/spurious_sequence.py +0 -0
  96. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/stochastic_character_map.py +0 -0
  97. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/terminal_branch_stats.py +0 -0
  98. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/threshold_model.py +0 -0
  99. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/tip_labels.py +0 -0
  100. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/tip_to_tip_distance.py +0 -0
  101. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/tip_to_tip_node_distance.py +0 -0
  102. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/total_tree_length.py +0 -0
  103. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/treeness.py +0 -0
  104. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/treeness_over_rcv.py +0 -0
  105. {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/vcv_utils.py +0 -0
  106. {phykit-2.1.30 → phykit-2.1.32}/phykit.egg-info/dependency_links.txt +0 -0
  107. {phykit-2.1.30 → phykit-2.1.32}/phykit.egg-info/entry_points.txt +0 -0
  108. {phykit-2.1.30 → phykit-2.1.32}/phykit.egg-info/requires.txt +0 -0
  109. {phykit-2.1.30 → phykit-2.1.32}/phykit.egg-info/top_level.txt +0 -0
  110. {phykit-2.1.30 → phykit-2.1.32}/setup.cfg +0 -0
  111. {phykit-2.1.30 → phykit-2.1.32}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phykit
3
- Version: 2.1.30
3
+ Version: 2.1.32
4
4
  Home-page: https://github.com/jlsteenwyk/phykit
5
5
  Author: Jacob L. Steenwyk
6
6
  Author-email: jlsteenwyk@gmail.com
@@ -154,6 +154,9 @@ ALIAS_TO_HANDLER: Dict[str, str] = {
154
154
  "thresh_bayes": "threshold_model",
155
155
  "toverr": "treeness_over_rcv",
156
156
  "tor": "treeness_over_rcv",
157
+ "etm": "evo_tempo_map",
158
+ "disc_asym": "discordance_asymmetry",
159
+ "da": "discordance_asymmetry",
157
160
  # Helper aliases
158
161
  "create_concat": "create_concatenation_matrix",
159
162
  "cc": "create_concatenation_matrix",
@@ -4851,6 +4851,136 @@ class Phykit:
4851
4851
  _add_json_argument(parser)
4852
4852
  _run_service(parser, argv, TreenessOverRCV)
4853
4853
 
4854
+ @staticmethod
4855
+ def evo_tempo_map(argv):
4856
+ parser = _new_parser(
4857
+ description=textwrap.dedent(
4858
+ f"""\
4859
+ {help_header}
4860
+
4861
+ Detect rate-topology associations by comparing branch length
4862
+ distributions between concordant and discordant gene trees at
4863
+ each species tree branch.
4864
+
4865
+ Under the multispecies coalescent, discordant gene trees should
4866
+ have shorter internal branches near the discordant node. Deviations
4867
+ suggest substitution rate heterogeneity correlated with topology
4868
+ (adaptive evolution, different selective pressures, or model
4869
+ misspecification).
4870
+
4871
+ For each internal branch of the species tree, gene trees are
4872
+ classified as concordant or discordant via bipartition matching.
4873
+ The homologous branch length is extracted from each gene tree
4874
+ and the two groups are compared using Mann-Whitney U and
4875
+ permutation tests. P-values are corrected for multiple testing
4876
+ using Benjamini-Hochberg FDR.
4877
+
4878
+ A global treeness (internal/total branch length ratio) comparison
4879
+ is also reported.
4880
+
4881
+ Aliases:
4882
+ evo_tempo_map, etm
4883
+ Command line interfaces:
4884
+ pk_evo_tempo_map, pk_etm
4885
+
4886
+ Usage:
4887
+ phykit evo_tempo_map -t/--tree <tree> -g/--gene-trees <gene_trees>
4888
+ [--plot <output>] [-v/--verbose] [--json]
4889
+
4890
+ Options
4891
+ =====================================================
4892
+ -t/--tree a species tree file
4893
+
4894
+ -g/--gene-trees multi-Newick file of gene trees
4895
+ with branch lengths
4896
+
4897
+ --plot optional output path for
4898
+ box/strip plot (PNG)
4899
+
4900
+ -v/--verbose print per-gene-tree details
4901
+
4902
+ --json optional argument to output
4903
+ results as JSON
4904
+ """
4905
+ ),
4906
+ )
4907
+ parser.add_argument(
4908
+ "-t", "--tree", type=str, required=True, help=SUPPRESS, metavar=""
4909
+ )
4910
+ parser.add_argument(
4911
+ "-g", "--gene-trees", type=str, required=True, help=SUPPRESS, metavar=""
4912
+ )
4913
+ parser.add_argument(
4914
+ "--plot", dest="plot_output", type=str, required=False,
4915
+ default=None, help=SUPPRESS, metavar=""
4916
+ )
4917
+ parser.add_argument(
4918
+ "-v", "--verbose", action="store_true", required=False, help=SUPPRESS
4919
+ )
4920
+ _add_json_argument(parser)
4921
+ _run_service(parser, argv, EvoTempoMap)
4922
+
4923
+ @staticmethod
4924
+ def discordance_asymmetry(argv):
4925
+ parser = _new_parser(
4926
+ description=textwrap.dedent(
4927
+ f"""\
4928
+ {help_header}
4929
+
4930
+ Test whether the two discordant NNI alternative topologies
4931
+ at each species tree branch are equally frequent, detecting
4932
+ gene flow direction from asymmetric discordance.
4933
+
4934
+ Under incomplete lineage sorting (ILS) alone, the two minor
4935
+ NNI alternatives (gDF1 and gDF2) should be equally frequent.
4936
+ When they are significantly asymmetric, it suggests
4937
+ introgression or gene flow between specific lineages.
4938
+
4939
+ For each internal branch, a two-sided binomial test (H0:
4940
+ P(alt1) = 0.5) is applied. P-values are corrected for
4941
+ multiple testing using Benjamini-Hochberg FDR.
4942
+
4943
+ Aliases:
4944
+ discordance_asymmetry, disc_asym, da
4945
+ Command line interfaces:
4946
+ pk_discordance_asymmetry, pk_disc_asym, pk_da
4947
+
4948
+ Usage:
4949
+ phykit discordance_asymmetry -t/--tree <tree> -g/--gene-trees <gene_trees>
4950
+ [--plot <output>] [-v/--verbose] [--json]
4951
+
4952
+ Options
4953
+ =====================================================
4954
+ -t/--tree a species tree file
4955
+
4956
+ -g/--gene-trees multi-Newick file of gene trees
4957
+
4958
+ --plot optional output path for
4959
+ asymmetry phylogram (PNG)
4960
+
4961
+ -v/--verbose print per-branch details
4962
+
4963
+ --json optional argument to output
4964
+ results as JSON
4965
+ """
4966
+ ),
4967
+ )
4968
+ parser.add_argument(
4969
+ "-t", "--tree", type=str, required=True, help=SUPPRESS, metavar=""
4970
+ )
4971
+ parser.add_argument(
4972
+ "-g", "--gene-trees", type=str, required=True, help=SUPPRESS, metavar=""
4973
+ )
4974
+ parser.add_argument(
4975
+ "--plot", dest="plot_output", type=str, required=False,
4976
+ default=None, help=SUPPRESS, metavar=""
4977
+ )
4978
+ parser.add_argument(
4979
+ "-v", "--verbose", action="store_true", required=False, help=SUPPRESS
4980
+ )
4981
+ _add_json_argument(parser)
4982
+ _run_service(parser, argv, DiscordanceAsymmetry)
4983
+
4854
4984
  ### Helper commands
4855
4985
  @staticmethod
4856
4986
  def create_concatenation_matrix(argv):
@@ -96,6 +96,8 @@ TipToTipNodeDistance = _LazyServiceFactory("phykit.services.tree.tip_to_tip_node
96
96
  TotalTreeLength = _LazyServiceFactory("phykit.services.tree.total_tree_length", "TotalTreeLength")
97
97
  Treeness = _LazyServiceFactory("phykit.services.tree.treeness", "Treeness")
98
98
  TreenessOverRCV = _LazyServiceFactory("phykit.services.tree.treeness_over_rcv", "TreenessOverRCV")
99
+ EvoTempoMap = _LazyServiceFactory("phykit.services.tree.evo_tempo_map", "EvoTempoMap")
100
+ DiscordanceAsymmetry = _LazyServiceFactory("phykit.services.tree.discordance_asymmetry", "DiscordanceAsymmetry")
99
101
 
100
102
  SERVICE_FACTORIES: Dict[str, _LazyServiceFactory] = {
101
103
  name: value
@@ -9,7 +9,9 @@ _EXPORTS = {
9
9
  "ConsensusNetwork": "consensus_network",
10
10
  "ConsensusTree": "consensus_tree",
11
11
  "DVMC": "dvmc",
12
+ "DiscordanceAsymmetry": "discordance_asymmetry",
12
13
  "EvolutionaryRate": "evolutionary_rate",
14
+ "EvoTempoMap": "evo_tempo_map",
13
15
  "HiddenParalogyCheck": "hidden_paralogy_check",
14
16
  "InternalBranchStats": "internal_branch_stats",
15
17
  "InternodeLabeler": "internode_labeler",
@@ -0,0 +1,520 @@
1
+ from io import StringIO
2
+ from pathlib import Path
3
+ from typing import Dict, List
4
+
5
+ from Bio import Phylo
6
+ from scipy.stats import binomtest
7
+
8
+ from .base import Tree
9
+ from ...helpers.json_output import print_json
10
+ from ...errors import PhykitUserError
11
+
12
+
13
+ class DiscordanceAsymmetry(Tree):
14
+ def __init__(self, args) -> None:
15
+ parsed = self.process_args(args)
16
+ super().__init__(tree_file_path=parsed["tree_file_path"])
17
+ self.gene_trees_path = parsed["gene_trees_path"]
18
+ self.verbose = parsed["verbose"]
19
+ self.json_output = parsed["json_output"]
20
+ self.plot_output = parsed["plot_output"]
21
+
22
+ def process_args(self, args) -> Dict:
23
+ return dict(
24
+ tree_file_path=args.tree,
25
+ gene_trees_path=args.gene_trees,
26
+ verbose=getattr(args, "verbose", False),
27
+ json_output=getattr(args, "json", False),
28
+ plot_output=getattr(args, "plot_output", None),
29
+ )
30
+
31
+ def run(self) -> None:
32
+ species_tree = self.read_tree_file()
33
+ gene_trees = self._parse_gene_trees(self.gene_trees_path)
34
+
35
+ topology_counts = self._count_topologies(species_tree, gene_trees)
36
+
37
+ # Test each branch and collect results
38
+ branch_results = []
39
+ for branch_key in sorted(topology_counts.keys()):
40
+ data = topology_counts[branch_key]
41
+ test_result = self._test_asymmetry(data["n_alt1"], data["n_alt2"])
42
+ entry = dict(
43
+ split=data["split"],
44
+ n_concordant=data["n_concordant"],
45
+ n_alt1=data["n_alt1"],
46
+ n_alt2=data["n_alt2"],
47
+ )
48
+ entry.update(test_result)
49
+ branch_results.append(entry)
50
+
51
+ # FDR correction across testable p-values
52
+ testable_indices = []
53
+ testable_pvals = []
54
+ for i, entry in enumerate(branch_results):
55
+ if entry["p_value"] is not None:
56
+ testable_indices.append(i)
57
+ testable_pvals.append(entry["p_value"])
58
+
59
+ fdr_corrected = self._fdr(testable_pvals)
60
+ for idx, fdr_p in zip(testable_indices, fdr_corrected):
61
+ branch_results[idx]["fdr_p"] = fdr_p
62
+
63
+ # Set fdr_p to None for untestable branches
64
+ for entry in branch_results:
65
+ if "fdr_p" not in entry:
66
+ entry["fdr_p"] = None
67
+
68
+ # Summary
69
+ summary = dict(
70
+ n_gene_trees=len(gene_trees),
71
+ n_branches_tested=len(testable_indices),
72
+ n_significant_fdr05=sum(
73
+ 1 for entry in branch_results
74
+ if entry["fdr_p"] is not None and entry["fdr_p"] < 0.05
75
+ ),
76
+ )
77
+
78
+ # Output
79
+ if self.json_output:
80
+ self._output_json(branch_results, summary)
81
+ else:
82
+ self._output_text(branch_results, summary)
83
+
84
+ if self.plot_output:
85
+ self._plot(species_tree, branch_results, self.plot_output)
86
+
87
+ # ------------------------------------------------------------------
88
+ # Output methods
89
+ # ------------------------------------------------------------------
90
+
91
+ def _output_text(self, branch_results, summary) -> None:
92
+ try:
93
+ header = (
94
+ f"{'branch':<30}"
95
+ f"{'n_conc':>8}"
96
+ f"{'n_alt1':>8}"
97
+ f"{'n_alt2':>8}"
98
+ f"{'asym_ratio':>12}"
99
+ f"{'binom_p':>12}"
100
+ f"{'fdr_p':>12}"
101
+ f"{'gene_flow':>12}"
102
+ )
103
+ print(header)
104
+ print("-" * len(header))
105
+
106
+ for entry in branch_results:
107
+ branch_label = ",".join(entry["split"])
108
+ asym = (
109
+ f"{entry['asymmetry_ratio']:.3f}"
110
+ if entry["asymmetry_ratio"] is not None
111
+ else "NA"
112
+ )
113
+ binom_p = (
114
+ f"{entry['p_value']:.4f}"
115
+ if entry["p_value"] is not None
116
+ else "NA"
117
+ )
118
+ fdr_p = (
119
+ f"{entry['fdr_p']:.4f}"
120
+ if entry["fdr_p"] is not None
121
+ else "NA"
122
+ )
123
+ gene_flow = "-"
124
+ if (entry["fdr_p"] is not None and entry["fdr_p"] < 0.05
125
+ and entry["favored_alt"] is not None):
126
+ gene_flow = entry["favored_alt"]
127
+ print(
128
+ f"{branch_label:<30}"
129
+ f"{entry['n_concordant']:>8}"
130
+ f"{entry['n_alt1']:>8}"
131
+ f"{entry['n_alt2']:>8}"
132
+ f"{asym:>12}"
133
+ f"{binom_p:>12}"
134
+ f"{fdr_p:>12}"
135
+ f"{gene_flow:>12}"
136
+ )
137
+
138
+ print("---")
139
+ print(
140
+ f"Summary: {summary['n_branches_tested']} branches tested, "
141
+ f"{summary['n_significant_fdr05']} significant (FDR<0.05)"
142
+ )
143
+
144
+ if self.verbose:
145
+ print()
146
+ for entry in branch_results:
147
+ branch_label = ",".join(entry["split"])
148
+ total = entry["n_concordant"] + entry["n_alt1"] + entry["n_alt2"]
149
+ gcf = entry["n_concordant"] / total if total > 0 else 1.0
150
+ print(f"Branch: {branch_label}")
151
+ print(
152
+ f" gCF={gcf:.3f} gDF1={entry['n_alt1']}/{total} "
153
+ f"gDF2={entry['n_alt2']}/{total}"
154
+ )
155
+ except BrokenPipeError:
156
+ pass
157
+
158
+ def _output_json(self, branch_results, summary) -> None:
159
+ result = dict(
160
+ branches=branch_results,
161
+ summary=summary,
162
+ )
163
+ print_json(result)
164
+
165
+ def _plot(self, species_tree, branch_results, output_path) -> None:
166
+ """Phylogram colored by asymmetry ratio at each branch."""
167
+ import matplotlib
168
+ matplotlib.use("Agg")
169
+ import matplotlib.pyplot as plt
170
+ from matplotlib.colors import Normalize
171
+ import numpy as np
172
+
173
+ # Build lookup from split label -> branch result
174
+ branch_lookup = {}
175
+ for entry in branch_results:
176
+ key = ",".join(entry["split"])
177
+ branch_lookup[key] = entry
178
+
179
+ parent_map = self._build_parent_map(species_tree)
180
+ tips = list(species_tree.get_terminals())
181
+ all_taxa_fs = frozenset(t.name for t in tips)
182
+
183
+ # Compute node positions
184
+ node_x = {}
185
+ node_y = {}
186
+
187
+ for i, tip in enumerate(tips):
188
+ node_y[id(tip)] = i
189
+
190
+ root = species_tree.root
191
+ for clade in species_tree.find_clades(order="preorder"):
192
+ if clade == root:
193
+ node_x[id(clade)] = 0.0
194
+ else:
195
+ if id(clade) in parent_map:
196
+ parent = parent_map[id(clade)]
197
+ t = clade.branch_length if clade.branch_length else 0.0
198
+ node_x[id(clade)] = node_x.get(id(parent), 0.0) + t
199
+
200
+ for clade in species_tree.find_clades(order="postorder"):
201
+ if not clade.is_terminal() and id(clade) not in node_y:
202
+ child_ys = [
203
+ node_y[id(c)] for c in clade.clades if id(c) in node_y
204
+ ]
205
+ if child_ys:
206
+ node_y[id(clade)] = np.mean(child_ys)
207
+ else:
208
+ node_y[id(clade)] = 0.0
209
+
210
+ # Map internal nodes to their branch result
211
+ node_to_result = {}
212
+ for clade in species_tree.find_clades(order="preorder"):
213
+ if clade.is_terminal():
214
+ continue
215
+ node_tips = frozenset(t.name for t in clade.get_terminals())
216
+ split_label = (
217
+ sorted(node_tips)
218
+ if len(node_tips) <= len(all_taxa_fs) - len(node_tips)
219
+ else sorted(all_taxa_fs - node_tips)
220
+ )
221
+ key = ",".join(split_label)
222
+ if key in branch_lookup:
223
+ node_to_result[id(clade)] = branch_lookup[key]
224
+
225
+ # Color setup: diverging from blue (0.5 = symmetric) to red (1.0 = asymmetric)
226
+ cmap = plt.cm.RdYlBu_r
227
+ norm = Normalize(vmin=0.5, vmax=1.0)
228
+
229
+ fig, ax = plt.subplots(figsize=(10, max(4, len(tips) * 0.4)))
230
+
231
+ # Draw branches
232
+ for clade in species_tree.find_clades(order="preorder"):
233
+ if clade == root:
234
+ continue
235
+ if id(clade) not in parent_map:
236
+ continue
237
+ parent = parent_map[id(clade)]
238
+ if id(parent) not in node_x or id(clade) not in node_x:
239
+ continue
240
+
241
+ x0 = node_x[id(parent)]
242
+ x1 = node_x[id(clade)]
243
+ y0 = node_y.get(id(parent), 0)
244
+ y1 = node_y.get(id(clade), 0)
245
+
246
+ # Color the horizontal branch by asymmetry ratio if this is an internal node
247
+ color = "gray"
248
+ lw = 2
249
+ if id(clade) in node_to_result:
250
+ entry = node_to_result[id(clade)]
251
+ if entry["asymmetry_ratio"] is not None:
252
+ color = cmap(norm(entry["asymmetry_ratio"]))
253
+ lw = 3
254
+
255
+ ax.plot([x0, x1], [y1, y1], color=color, lw=lw)
256
+ ax.plot([x0, x0], [y0, y1], color="gray", lw=1.5)
257
+
258
+ # Annotate internal nodes
259
+ for clade in species_tree.find_clades(order="preorder"):
260
+ if clade.is_terminal():
261
+ continue
262
+ if id(clade) not in node_to_result:
263
+ continue
264
+ entry = node_to_result[id(clade)]
265
+ x = node_x.get(id(clade), 0)
266
+ y = node_y.get(id(clade), 0)
267
+
268
+ # Show gCF value
269
+ total = entry["n_concordant"] + entry["n_alt1"] + entry["n_alt2"]
270
+ gcf = entry["n_concordant"] / total if total > 0 else 1.0
271
+ ax.annotate(
272
+ f"gCF={gcf:.2f}",
273
+ (x, y),
274
+ textcoords="offset points",
275
+ xytext=(5, 5),
276
+ fontsize=7,
277
+ )
278
+
279
+ # Mark significant branches (FDR < 0.05)
280
+ if (entry["fdr_p"] is not None and entry["fdr_p"] < 0.05
281
+ and entry["favored_alt"] is not None):
282
+ ax.scatter(x, y, s=100, c="red", marker="*", zorder=5)
283
+
284
+ # Tip labels
285
+ max_x = max(node_x.values()) if node_x else 0
286
+ offset = max_x * 0.02
287
+ for tip in tips:
288
+ ax.text(
289
+ node_x[id(tip)] + offset, node_y[id(tip)],
290
+ tip.name, va="center", fontsize=9,
291
+ )
292
+
293
+ # Colorbar
294
+ sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
295
+ sm.set_array([])
296
+ cbar = fig.colorbar(sm, ax=ax, pad=0.15)
297
+ cbar.set_label("Asymmetry ratio")
298
+
299
+ ax.set_xlabel("Branch length (subs/site)")
300
+ ax.set_yticks([])
301
+ ax.spines["top"].set_visible(False)
302
+ ax.spines["right"].set_visible(False)
303
+ ax.spines["left"].set_visible(False)
304
+ ax.set_title("Discordance Asymmetry")
305
+ fig.tight_layout()
306
+ fig.savefig(output_path, dpi=300, bbox_inches="tight")
307
+ plt.close(fig)
308
+
309
+ # ------------------------------------------------------------------
310
+ # Gene tree parsing
311
+ # ------------------------------------------------------------------
312
+
313
+ def _parse_gene_trees(self, path: str) -> list:
314
+ try:
315
+ lines = Path(path).read_text().splitlines()
316
+ except FileNotFoundError:
317
+ raise PhykitUserError(
318
+ [
319
+ f"{path} corresponds to no such file or directory.",
320
+ "Please check filename and pathing",
321
+ ],
322
+ code=2,
323
+ )
324
+
325
+ cleaned = [l.strip() for l in lines if l.strip() and not l.strip().startswith("#")]
326
+ trees = []
327
+ for line in cleaned:
328
+ if line.startswith("("):
329
+ trees.append(Phylo.read(StringIO(line), "newick"))
330
+ else:
331
+ tree_path = Path(path).parent / line
332
+ trees.append(Phylo.read(str(tree_path), "newick"))
333
+ return trees
334
+
335
+ # ------------------------------------------------------------------
336
+ # Bipartition extraction and topology counting
337
+ # ------------------------------------------------------------------
338
+
339
+ @staticmethod
340
+ def _canonical_split(taxa_side, all_taxa):
341
+ """Normalize a bipartition to canonical form.
342
+
343
+ Returns the smaller side as a frozenset; ties are broken
344
+ lexicographically.
345
+ """
346
+ complement = all_taxa - taxa_side
347
+ if len(taxa_side) < len(complement):
348
+ return frozenset(taxa_side)
349
+ elif len(taxa_side) > len(complement):
350
+ return frozenset(complement)
351
+ else:
352
+ return min(frozenset(taxa_side), frozenset(complement),
353
+ key=lambda s: sorted(s))
354
+
355
+ @staticmethod
356
+ def _build_parent_map(tree) -> Dict:
357
+ """Build a dict mapping child id -> parent clade."""
358
+ parent_map = {}
359
+ for clade in tree.find_clades(order="preorder"):
360
+ for child in clade.clades:
361
+ parent_map[id(child)] = clade
362
+ return parent_map
363
+
364
+ def _get_four_groups(self, tree, node, parent_map, all_taxa_fs):
365
+ """Identify the four subtree groups around an internal branch.
366
+
367
+ For the branch connecting *node* to its parent:
368
+ C1 = tips of node's first child
369
+ C2 = tips of node's second child (extra children merged for polytomies)
370
+ S = tips of node's sibling under parent
371
+ D = remaining tips (everything above parent)
372
+
373
+ Returns (C1, C2, S, D) as frozensets, or None if decomposition
374
+ is not possible (e.g., node is root, leaf, or has <2 children).
375
+ """
376
+ if node.is_terminal() or len(node.clades) < 2:
377
+ return None
378
+
379
+ C1 = frozenset(t.name for t in node.clades[0].get_terminals())
380
+ C2 = frozenset(t.name for t in node.clades[1].get_terminals())
381
+ # If node has >2 children (polytomy), merge extras into C2
382
+ for extra_child in node.clades[2:]:
383
+ C2 = C2 | frozenset(t.name for t in extra_child.get_terminals())
384
+
385
+ parent = parent_map.get(id(node))
386
+ if parent is None:
387
+ # node is root — no branch above it
388
+ return None
389
+
390
+ # Get siblings of node under parent
391
+ siblings = [c for c in parent.clades if id(c) != id(node)]
392
+ if not siblings:
393
+ return None
394
+
395
+ S = frozenset(t.name for t in siblings[0].get_terminals())
396
+ # D = everything else (other siblings + above parent)
397
+ D = all_taxa_fs - C1 - C2 - S
398
+
399
+ return C1, C2, S, D
400
+
401
+ def _count_topologies(self, species_tree, gene_trees) -> Dict:
402
+ """Count concordant and two NNI-alternative topologies for each
403
+ internal branch of the species tree across gene trees.
404
+
405
+ Returns a dict keyed by branch label (comma-joined sorted taxa
406
+ in the smaller partition side) with:
407
+ split: list of sorted taxon names
408
+ n_concordant: int
409
+ n_alt1: int
410
+ n_alt2: int
411
+ """
412
+ all_taxa = sorted(
413
+ set(t.name for t in species_tree.get_terminals())
414
+ & set().union(*(
415
+ set(t.name for t in gt.get_terminals()) for gt in gene_trees
416
+ ))
417
+ )
418
+ all_taxa_fs = frozenset(all_taxa)
419
+ parent_map = self._build_parent_map(species_tree)
420
+
421
+ # Extract bipartitions from all gene trees (topology only, no lengths).
422
+ # Restrict bipartitions to shared taxa without mutating gene tree objects.
423
+ gene_tree_splits = []
424
+ for gt in gene_trees:
425
+ splits = set()
426
+ for clade in gt.get_nonterminals():
427
+ tips = frozenset(
428
+ t.name for t in clade.get_terminals()
429
+ if t.name in all_taxa_fs
430
+ )
431
+ if len(tips) <= 1 or tips == all_taxa_fs:
432
+ continue
433
+ splits.add(self._canonical_split(tips, all_taxa_fs))
434
+ gene_tree_splits.append(splits)
435
+
436
+ result = {}
437
+ for clade in species_tree.find_clades(order="preorder"):
438
+ if clade.is_terminal():
439
+ continue
440
+ groups = self._get_four_groups(
441
+ species_tree, clade, parent_map, all_taxa_fs
442
+ )
443
+ if groups is None:
444
+ continue
445
+ C1, C2, S, D = groups
446
+
447
+ concordant_bp = self._canonical_split(C1 | C2, all_taxa_fs)
448
+ nni_alt1_bp = self._canonical_split(S | C2, all_taxa_fs)
449
+ nni_alt2_bp = self._canonical_split(C1 | S, all_taxa_fs)
450
+
451
+ n_concordant = sum(1 for splits in gene_tree_splits if concordant_bp in splits)
452
+ n_alt1 = sum(1 for splits in gene_tree_splits if nni_alt1_bp in splits)
453
+ n_alt2 = sum(1 for splits in gene_tree_splits if nni_alt2_bp in splits)
454
+
455
+ node_tips = frozenset(t.name for t in clade.get_terminals())
456
+ split_label = (
457
+ sorted(node_tips)
458
+ if len(node_tips) <= len(all_taxa_fs) - len(node_tips)
459
+ else sorted(all_taxa_fs - node_tips)
460
+ )
461
+ branch_key = ",".join(split_label)
462
+ result[branch_key] = dict(
463
+ split=split_label,
464
+ n_concordant=n_concordant,
465
+ n_alt1=n_alt1,
466
+ n_alt2=n_alt2,
467
+ )
468
+ return result
469
+
470
+ # ------------------------------------------------------------------
471
+ # Statistical testing
472
+ # ------------------------------------------------------------------
473
+
474
+ def _test_asymmetry(self, n_alt1: int, n_alt2: int) -> Dict:
475
+ """Run a two-sided binomial test on the two NNI alternatives.
476
+
477
+ Returns a dict with asymmetry_ratio, p_value, and favored_alt.
478
+ """
479
+ total = n_alt1 + n_alt2
480
+ if total == 0:
481
+ return dict(
482
+ asymmetry_ratio=None,
483
+ p_value=None,
484
+ favored_alt=None,
485
+ )
486
+
487
+ asymmetry_ratio = max(n_alt1, n_alt2) / total
488
+ result = binomtest(n_alt1, total, p=0.5, alternative='two-sided')
489
+ p_value = result.pvalue
490
+
491
+ if n_alt1 > n_alt2:
492
+ favored_alt = "alt1"
493
+ elif n_alt2 > n_alt1:
494
+ favored_alt = "alt2"
495
+ else:
496
+ favored_alt = None
497
+
498
+ return dict(
499
+ asymmetry_ratio=asymmetry_ratio,
500
+ p_value=p_value,
501
+ favored_alt=favored_alt,
502
+ )
503
+
504
+ @staticmethod
505
+ def _fdr(p_values: List[float]) -> List[float]:
506
+ """Benjamini-Hochberg FDR correction."""
507
+ n = len(p_values)
508
+ if n == 0:
509
+ return []
510
+ indexed = sorted(enumerate(p_values), key=lambda x: x[1])
511
+ corrected = [0.0] * n
512
+ prev = 1.0
513
+ for rank_minus_1 in range(n - 1, -1, -1):
514
+ orig_idx, p = indexed[rank_minus_1]
515
+ rank = rank_minus_1 + 1
516
+ adjusted = min(p * n / rank, prev)
517
+ adjusted = min(adjusted, 1.0)
518
+ corrected[orig_idx] = adjusted
519
+ prev = adjusted
520
+ return corrected