phykit 2.1.71__tar.gz → 2.1.73__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. {phykit-2.1.71 → phykit-2.1.73}/PKG-INFO +1 -1
  2. {phykit-2.1.71 → phykit-2.1.73}/phykit/cli_registry.py +2 -0
  3. {phykit-2.1.71 → phykit-2.1.73}/phykit/phykit.py +76 -2
  4. {phykit-2.1.71 → phykit-2.1.73}/phykit/service_factories.py +1 -0
  5. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/__init__.py +1 -0
  6. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/consensus_network.py +91 -11
  7. phykit-2.1.73/phykit/services/tree/phylo_impute.py +438 -0
  8. phykit-2.1.73/phykit/version.py +1 -0
  9. {phykit-2.1.71 → phykit-2.1.73}/phykit.egg-info/PKG-INFO +1 -1
  10. {phykit-2.1.71 → phykit-2.1.73}/phykit.egg-info/SOURCES.txt +1 -0
  11. {phykit-2.1.71 → phykit-2.1.73}/phykit.egg-info/entry_points.txt +3 -0
  12. {phykit-2.1.71 → phykit-2.1.73}/setup.py +3 -0
  13. phykit-2.1.71/phykit/version.py +0 -1
  14. {phykit-2.1.71 → phykit-2.1.73}/LICENSE.md +0 -0
  15. {phykit-2.1.71 → phykit-2.1.73}/README.md +0 -0
  16. {phykit-2.1.71 → phykit-2.1.73}/phykit/__init__.py +0 -0
  17. {phykit-2.1.71 → phykit-2.1.73}/phykit/__main__.py +0 -0
  18. {phykit-2.1.71 → phykit-2.1.73}/phykit/errors.py +0 -0
  19. {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/__init__.py +0 -0
  20. {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/boolean_argument_parsing.py +0 -0
  21. {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/caching.py +0 -0
  22. {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/circular_layout.py +0 -0
  23. {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/color_annotations.py +0 -0
  24. {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/discrete_models.py +0 -0
  25. {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/files.py +0 -0
  26. {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/json_output.py +0 -0
  27. {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/parallel.py +0 -0
  28. {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/parsimony_utils.py +0 -0
  29. {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/plot_config.py +0 -0
  30. {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/quartet_utils.py +0 -0
  31. {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/stats_summary.py +0 -0
  32. {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/streaming.py +0 -0
  33. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/__init__.py +0 -0
  34. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/__init__.py +0 -0
  35. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/alignment_entropy.py +0 -0
  36. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/alignment_length.py +0 -0
  37. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/alignment_length_no_gaps.py +0 -0
  38. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/alignment_outlier_taxa.py +0 -0
  39. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/alignment_recoding.py +0 -0
  40. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/alignment_subsample.py +0 -0
  41. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/base.py +0 -0
  42. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/column_score.py +0 -0
  43. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/composition_per_taxon.py +0 -0
  44. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/compositional_bias_per_site.py +0 -0
  45. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/create_concatenation_matrix.py +0 -0
  46. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/dfoil.py +0 -0
  47. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/dna_threader.py +0 -0
  48. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/dstatistic.py +0 -0
  49. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/evolutionary_rate_per_site.py +0 -0
  50. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/faidx.py +0 -0
  51. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/gc_content.py +0 -0
  52. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/identity_matrix.py +0 -0
  53. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/mask_alignment.py +0 -0
  54. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/occupancy_per_taxon.py +0 -0
  55. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/pairwise_identity.py +0 -0
  56. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/parsimony_informative_sites.py +0 -0
  57. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/phylo_gwas.py +0 -0
  58. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/plot_alignment_qc.py +0 -0
  59. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/rcv.py +0 -0
  60. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/rcvt.py +0 -0
  61. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/rename_fasta_entries.py +0 -0
  62. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/sum_of_pairs_score.py +0 -0
  63. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/variable_sites.py +0 -0
  64. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/base.py +0 -0
  65. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/ancestral_reconstruction.py +0 -0
  66. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/base.py +0 -0
  67. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/bipartition_support_stats.py +0 -0
  68. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/branch_length_multiplier.py +0 -0
  69. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/character_map.py +0 -0
  70. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/collapse_branches.py +0 -0
  71. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/concordance_asr.py +0 -0
  72. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/consensus_tree.py +0 -0
  73. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/cont_map.py +0 -0
  74. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/cophylo.py +0 -0
  75. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/covarying_evolutionary_rates.py +0 -0
  76. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/density_map.py +0 -0
  77. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/discordance_asymmetry.py +0 -0
  78. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/dvmc.py +0 -0
  79. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/evo_tempo_map.py +0 -0
  80. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/evolutionary_rate.py +0 -0
  81. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/fit_continuous.py +0 -0
  82. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/fit_discrete.py +0 -0
  83. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/hidden_paralogy_check.py +0 -0
  84. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/independent_contrasts.py +0 -0
  85. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/internal_branch_stats.py +0 -0
  86. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/internode_labeler.py +0 -0
  87. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/kf_distance.py +0 -0
  88. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/last_common_ancestor_subtree.py +0 -0
  89. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/lb_score.py +0 -0
  90. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/ltt.py +0 -0
  91. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/monophyly_check.py +0 -0
  92. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/nearest_neighbor_interchange.py +0 -0
  93. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/network_signal.py +0 -0
  94. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/ou_shift_detection.py +0 -0
  95. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/ouwie.py +0 -0
  96. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/parsimony_score.py +0 -0
  97. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/patristic_distances.py +0 -0
  98. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/phenogram.py +0 -0
  99. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/phylo_heatmap.py +0 -0
  100. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/phylo_logistic.py +0 -0
  101. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/phylogenetic_glm.py +0 -0
  102. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/phylogenetic_ordination.py +0 -0
  103. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/phylogenetic_regression.py +0 -0
  104. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/phylogenetic_signal.py +0 -0
  105. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/phylomorphospace.py +0 -0
  106. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/polytomy_test.py +0 -0
  107. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/print_tree.py +0 -0
  108. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/prune_tree.py +0 -0
  109. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/quartet_network.py +0 -0
  110. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/quartet_pie.py +0 -0
  111. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/rate_heterogeneity.py +0 -0
  112. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/relative_rate_test.py +0 -0
  113. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/rename_tree_tips.py +0 -0
  114. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/rf_distance.py +0 -0
  115. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/root_tree.py +0 -0
  116. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/saturation.py +0 -0
  117. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/spectral_discordance.py +0 -0
  118. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/spurious_sequence.py +0 -0
  119. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/stochastic_character_map.py +0 -0
  120. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/terminal_branch_stats.py +0 -0
  121. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/threshold_model.py +0 -0
  122. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/tip_labels.py +0 -0
  123. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/tip_to_tip_distance.py +0 -0
  124. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/tip_to_tip_node_distance.py +0 -0
  125. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/total_tree_length.py +0 -0
  126. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/trait_correlation.py +0 -0
  127. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/trait_rate_map.py +0 -0
  128. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/tree_space.py +0 -0
  129. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/treeness.py +0 -0
  130. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/treeness_over_rcv.py +0 -0
  131. {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/vcv_utils.py +0 -0
  132. {phykit-2.1.71 → phykit-2.1.73}/phykit.egg-info/dependency_links.txt +0 -0
  133. {phykit-2.1.71 → phykit-2.1.73}/phykit.egg-info/requires.txt +0 -0
  134. {phykit-2.1.71 → phykit-2.1.73}/phykit.egg-info/top_level.txt +0 -0
  135. {phykit-2.1.71 → phykit-2.1.73}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: phykit
3
- Version: 2.1.71
3
+ Version: 2.1.73
4
4
  Home-page: https://github.com/jlsteenwyk/phykit
5
5
  Author: Jacob L. Steenwyk
6
6
  Author-email: jlsteenwyk@gmail.com
@@ -188,6 +188,8 @@ ALIAS_TO_HANDLER: Dict[str, str] = {
188
188
  "da": "discordance_asymmetry",
189
189
  "spec_disc": "spectral_discordance",
190
190
  "sd": "spectral_discordance",
191
+ "impute": "phylo_impute",
192
+ "phylo_imp": "phylo_impute",
191
193
  "trait_correlation": "trait_correlation",
192
194
  "trait_corr": "trait_correlation",
193
195
  "phylo_corr": "trait_correlation",
@@ -227,6 +227,9 @@ class Phykit:
227
227
  trait_correlation (alias: trait_corr; phylo_corr)
228
228
  - compute phylogenetic correlations between all pairs
229
229
  of traits and display as a heatmap
230
+ phylo_impute (alias: impute; phylo_imp)
231
+ - impute missing trait values using phylogenetic
232
+ relationships and between-trait correlations
230
233
  phylogenetic_ordination (alias: phylo_ordination; ordination; ord;
231
234
  phylo_pca; phyl_pca; ppca; phylo_dimreduce; dimreduce; pdr)
232
235
  - phylogenetic ordination (PCA, t-SNE, or UMAP) on
@@ -3681,6 +3684,73 @@ class Phykit:
3681
3684
  _add_json_argument(parser)
3682
3685
  _run_service(parser, argv, TraitCorrelation)
3683
3686
 
3687
+ @staticmethod
3688
+ def phylo_impute(argv):
3689
+ parser = _new_parser(
3690
+ description=textwrap.dedent(
3691
+ f"""\
3692
+ {help_header}
3693
+
3694
+ Phylogenetic imputation of missing trait values using
3695
+ conditional multivariate normal distributions.
3696
+
3697
+ Captures both phylogenetic relationships (via the
3698
+ tree's variance-covariance matrix) and between-trait
3699
+ correlations to predict missing values. Reports
3700
+ imputed values with standard errors and 95% CIs.
3701
+
3702
+ Missing values in the input trait file may be marked
3703
+ as NA, na, ?, or left empty.
3704
+
3705
+ Input is a phylogenetic tree and a tab-delimited
3706
+ multi-trait file with a header row:
3707
+ taxon<tab>trait1<tab>trait2<tab>...
3708
+
3709
+ Aliases:
3710
+ phylo_impute, impute, phylo_imp
3711
+ Command line interfaces:
3712
+ pk_phylo_impute, pk_impute, pk_phylo_imp
3713
+
3714
+ Usage:
3715
+ phykit phylo_impute -t <tree> -d <trait_data> -o <output>
3716
+ [-g <gene_trees>] [--json]
3717
+
3718
+ Options
3719
+ =====================================================
3720
+ -t/--tree tree file (required)
3721
+
3722
+ -d/--trait-data multi-trait TSV with header
3723
+ row; missing values marked
3724
+ as NA, ?, or empty
3725
+ (required)
3726
+
3727
+ -o/--output output TSV file with
3728
+ imputed values (required)
3729
+
3730
+ -g/--gene-trees optional multi-Newick file
3731
+ of gene trees for
3732
+ discordance-aware VCV
3733
+
3734
+ --json optional argument to output
3735
+ results as JSON
3736
+ """
3737
+ ),
3738
+ )
3739
+ parser.add_argument(
3740
+ "-t", "--tree", type=str, required=True, help=SUPPRESS, metavar=""
3741
+ )
3742
+ parser.add_argument(
3743
+ "-d", "--trait-data", type=str, required=True, help=SUPPRESS, metavar=""
3744
+ )
3745
+ parser.add_argument(
3746
+ "-o", "--output", type=str, required=True, help=SUPPRESS, metavar=""
3747
+ )
3748
+ parser.add_argument(
3749
+ "-g", "--gene-trees", type=str, default=None, help=SUPPRESS, metavar=""
3750
+ )
3751
+ _add_json_argument(parser)
3752
+ _run_service(parser, argv, PhyloImpute)
3753
+
3684
3754
  @staticmethod
3685
3755
  def phylogenetic_ordination(argv):
3686
3756
  parser = _new_parser(
@@ -5751,8 +5821,8 @@ class Phykit:
5751
5821
  parser.add_argument(
5752
5822
  "--missing-taxa",
5753
5823
  type=str,
5754
- choices=["error", "shared"],
5755
- default="error",
5824
+ choices=["allow", "error", "shared"],
5825
+ default="allow",
5756
5826
  required=False,
5757
5827
  help=SUPPRESS,
5758
5828
  )
@@ -8562,5 +8632,9 @@ def tree_space(argv=None):
8562
8632
  Phykit.tree_space(sys.argv[1:])
8563
8633
 
8564
8634
 
8635
+ def phylo_impute(argv=None):
8636
+ Phykit.phylo_impute(sys.argv[1:])
8637
+
8638
+
8565
8639
  def trait_rate_map(argv=None):
8566
8640
  Phykit.trait_rate_map(sys.argv[1:])
@@ -112,6 +112,7 @@ TreenessOverRCV = _LazyServiceFactory("phykit.services.tree.treeness_over_rcv",
112
112
  EvoTempoMap = _LazyServiceFactory("phykit.services.tree.evo_tempo_map", "EvoTempoMap")
113
113
  DiscordanceAsymmetry = _LazyServiceFactory("phykit.services.tree.discordance_asymmetry", "DiscordanceAsymmetry")
114
114
  SpectralDiscordance = _LazyServiceFactory("phykit.services.tree.spectral_discordance", "SpectralDiscordance")
115
+ PhyloImpute = _LazyServiceFactory("phykit.services.tree.phylo_impute", "PhyloImpute")
115
116
  TraitCorrelation = _LazyServiceFactory("phykit.services.tree.trait_correlation", "TraitCorrelation")
116
117
  TraitRateMap = _LazyServiceFactory("phykit.services.tree.trait_rate_map", "TraitRateMap")
117
118
  TreeSpace = _LazyServiceFactory("phykit.services.tree.tree_space", "TreeSpace")
@@ -49,6 +49,7 @@ _EXPORTS = {
49
49
  "TreenessOverRCV": "treeness_over_rcv",
50
50
  "ConcordanceAsr": "concordance_asr",
51
51
  "PhyloLogistic": "phylo_logistic",
52
+ "PhyloImpute": "phylo_impute",
52
53
  "TraitCorrelation": "trait_correlation",
53
54
  "TraitRateMap": "trait_rate_map",
54
55
  "TreeSpace": "tree_space",
@@ -103,16 +103,29 @@ class ConsensusNetwork(Tree):
103
103
  raise PhykitUserError(
104
104
  [
105
105
  "Input trees do not share an identical taxon set.",
106
- "Use --missing-taxa shared to prune all trees to their shared taxa.",
106
+ "Use --missing-taxa allow or --missing-taxa shared.",
107
107
  ],
108
108
  code=2,
109
109
  )
110
110
 
111
+ if self.missing_taxa == "allow":
112
+ # Use the union of all taxa; each tree contributes splits
113
+ # using its own taxon set. Split frequencies are normalized
114
+ # by how many trees could contain each split.
115
+ union_taxa = set.union(*tip_sets)
116
+ if len(union_taxa) < 3:
117
+ raise PhykitUserError(
118
+ ["Fewer than 3 taxa found across all trees."], code=2
119
+ )
120
+ return trees, False, union_taxa
121
+
122
+ # shared mode
111
123
  if len(shared_taxa) < 3:
112
124
  raise PhykitUserError(
113
125
  [
114
126
  "Unable to compute network after pruning to shared taxa.",
115
127
  "At least 3 shared taxa are required.",
128
+ "Consider using --missing-taxa allow instead.",
116
129
  ],
117
130
  code=2,
118
131
  )
@@ -159,19 +172,80 @@ class ConsensusNetwork(Tree):
159
172
  return splits
160
173
 
161
174
  @staticmethod
162
- def _count_splits(trees: List, all_taxa: frozenset) -> Counter:
175
+ def _count_splits(trees: List, all_taxa: frozenset,
176
+ allow_mode: bool = False) -> Tuple[Counter, Counter]:
177
+ """Count splits across trees.
178
+
179
+ Returns (split_counts, split_possible) where split_possible[s]
180
+ is the number of trees that contain ALL taxa in split s (and
181
+ its complement). In allow mode, each tree uses its own taxon
182
+ set; in shared mode, all trees use all_taxa.
183
+ """
163
184
  counter = Counter()
164
- for tree in trees:
165
- tree_splits = ConsensusNetwork._extract_splits_from_tree(tree, all_taxa)
166
- for split in tree_splits:
167
- counter[split] += 1
168
- return counter
185
+ possible = Counter()
186
+
187
+ if allow_mode:
188
+ # Precompute taxon sets for all trees
189
+ tree_taxa_list = [
190
+ frozenset(t.name for t in tree.get_terminals())
191
+ for tree in trees
192
+ ]
193
+
194
+ # Extract splits from each tree using its own taxon set
195
+ for tree, tree_taxa in zip(trees, tree_taxa_list):
196
+ tree_splits = ConsensusNetwork._extract_splits_from_tree(
197
+ tree, tree_taxa
198
+ )
199
+ for split in tree_splits:
200
+ counter[split] += 1
201
+
202
+ # For normalization: each split was found in counter[split]
203
+ # trees. The "possible" count is the number of trees that
204
+ # contain ALL taxa on both sides. Since we extracted each
205
+ # split from a tree that had all its taxa, the split count
206
+ # IS the possible count (a tree can only produce a split if
207
+ # it contains all the relevant taxa).
208
+ for split in counter:
209
+ possible[split] = counter[split]
210
+
211
+ # Actually, we should count how many trees COULD have
212
+ # produced the split but didn't. A more accurate approach:
213
+ # possible = number of trees containing all taxa in the
214
+ # split's smaller side. But since splits are defined
215
+ # relative to each tree's own taxon set, the split IS
216
+ # the canonical smaller side from that tree. Different
217
+ # trees may have different "all_taxa" so the same
218
+ # bipartition in two trees means different things.
219
+ #
220
+ # The simplest correct normalization for incomplete
221
+ # taxon sampling: frequency = count / n_trees.
222
+ # This is what most software does.
223
+ for split in counter:
224
+ possible[split] = len(trees)
225
+ else:
226
+ for tree in trees:
227
+ tree_splits = ConsensusNetwork._extract_splits_from_tree(
228
+ tree, all_taxa
229
+ )
230
+ for split in tree_splits:
231
+ counter[split] += 1
232
+ for split in counter:
233
+ possible[split] = len(trees)
234
+
235
+ return counter, possible
169
236
 
170
237
  @staticmethod
171
- def _filter_splits(split_counts: Counter, n_trees: int, threshold: float) -> List[Tuple[frozenset, int, float]]:
238
+ def _filter_splits(
239
+ split_counts: Counter, n_trees: int, threshold: float,
240
+ split_possible: Counter = None,
241
+ ) -> List[Tuple[frozenset, int, float]]:
172
242
  results = []
173
243
  for split, count in split_counts.items():
174
- freq = count / n_trees
244
+ if split_possible and split in split_possible:
245
+ denom = split_possible[split]
246
+ else:
247
+ denom = n_trees
248
+ freq = count / denom if denom > 0 else 0.0
175
249
  if freq >= threshold:
176
250
  results.append((split, count, freq))
177
251
  results.sort(key=lambda x: (-x[2], sorted(x[0])))
@@ -435,8 +509,14 @@ class ConsensusNetwork(Tree):
435
509
  all_taxa = frozenset(all_taxa_set)
436
510
  n_trees = len(trees)
437
511
 
438
- split_counts = self._count_splits(trees, all_taxa)
439
- filtered = self._filter_splits(split_counts, n_trees, self.threshold)
512
+ allow_mode = (self.missing_taxa == "allow")
513
+ split_counts, split_possible = self._count_splits(
514
+ trees, all_taxa, allow_mode=allow_mode
515
+ )
516
+ filtered = self._filter_splits(
517
+ split_counts, n_trees, self.threshold,
518
+ split_possible=split_possible,
519
+ )
440
520
 
441
521
  if self.json_output:
442
522
  splits_list = [