phykit 2.1.71__tar.gz → 2.1.73__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phykit-2.1.71 → phykit-2.1.73}/PKG-INFO +1 -1
- {phykit-2.1.71 → phykit-2.1.73}/phykit/cli_registry.py +2 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/phykit.py +76 -2
- {phykit-2.1.71 → phykit-2.1.73}/phykit/service_factories.py +1 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/__init__.py +1 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/consensus_network.py +91 -11
- phykit-2.1.73/phykit/services/tree/phylo_impute.py +438 -0
- phykit-2.1.73/phykit/version.py +1 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit.egg-info/PKG-INFO +1 -1
- {phykit-2.1.71 → phykit-2.1.73}/phykit.egg-info/SOURCES.txt +1 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit.egg-info/entry_points.txt +3 -0
- {phykit-2.1.71 → phykit-2.1.73}/setup.py +3 -0
- phykit-2.1.71/phykit/version.py +0 -1
- {phykit-2.1.71 → phykit-2.1.73}/LICENSE.md +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/README.md +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/__init__.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/__main__.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/errors.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/__init__.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/boolean_argument_parsing.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/caching.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/circular_layout.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/color_annotations.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/discrete_models.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/files.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/json_output.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/parallel.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/parsimony_utils.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/plot_config.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/quartet_utils.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/stats_summary.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/helpers/streaming.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/__init__.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/__init__.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/alignment_entropy.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/alignment_length.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/alignment_length_no_gaps.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/alignment_outlier_taxa.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/alignment_recoding.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/alignment_subsample.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/base.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/column_score.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/composition_per_taxon.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/compositional_bias_per_site.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/create_concatenation_matrix.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/dfoil.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/dna_threader.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/dstatistic.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/evolutionary_rate_per_site.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/faidx.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/gc_content.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/identity_matrix.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/mask_alignment.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/occupancy_per_taxon.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/pairwise_identity.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/parsimony_informative_sites.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/phylo_gwas.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/plot_alignment_qc.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/rcv.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/rcvt.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/rename_fasta_entries.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/sum_of_pairs_score.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/alignment/variable_sites.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/base.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/ancestral_reconstruction.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/base.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/bipartition_support_stats.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/branch_length_multiplier.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/character_map.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/collapse_branches.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/concordance_asr.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/consensus_tree.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/cont_map.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/cophylo.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/covarying_evolutionary_rates.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/density_map.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/discordance_asymmetry.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/dvmc.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/evo_tempo_map.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/evolutionary_rate.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/fit_continuous.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/fit_discrete.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/hidden_paralogy_check.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/independent_contrasts.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/internal_branch_stats.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/internode_labeler.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/kf_distance.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/last_common_ancestor_subtree.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/lb_score.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/ltt.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/monophyly_check.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/nearest_neighbor_interchange.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/network_signal.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/ou_shift_detection.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/ouwie.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/parsimony_score.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/patristic_distances.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/phenogram.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/phylo_heatmap.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/phylo_logistic.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/phylogenetic_glm.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/phylogenetic_ordination.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/phylogenetic_regression.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/phylogenetic_signal.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/phylomorphospace.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/polytomy_test.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/print_tree.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/prune_tree.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/quartet_network.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/quartet_pie.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/rate_heterogeneity.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/relative_rate_test.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/rename_tree_tips.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/rf_distance.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/root_tree.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/saturation.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/spectral_discordance.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/spurious_sequence.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/stochastic_character_map.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/terminal_branch_stats.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/threshold_model.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/tip_labels.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/tip_to_tip_distance.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/tip_to_tip_node_distance.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/total_tree_length.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/trait_correlation.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/trait_rate_map.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/tree_space.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/treeness.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/treeness_over_rcv.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit/services/tree/vcv_utils.py +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit.egg-info/dependency_links.txt +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit.egg-info/requires.txt +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/phykit.egg-info/top_level.txt +0 -0
- {phykit-2.1.71 → phykit-2.1.73}/setup.cfg +0 -0
|
@@ -188,6 +188,8 @@ ALIAS_TO_HANDLER: Dict[str, str] = {
|
|
|
188
188
|
"da": "discordance_asymmetry",
|
|
189
189
|
"spec_disc": "spectral_discordance",
|
|
190
190
|
"sd": "spectral_discordance",
|
|
191
|
+
"impute": "phylo_impute",
|
|
192
|
+
"phylo_imp": "phylo_impute",
|
|
191
193
|
"trait_correlation": "trait_correlation",
|
|
192
194
|
"trait_corr": "trait_correlation",
|
|
193
195
|
"phylo_corr": "trait_correlation",
|
|
@@ -227,6 +227,9 @@ class Phykit:
|
|
|
227
227
|
trait_correlation (alias: trait_corr; phylo_corr)
|
|
228
228
|
- compute phylogenetic correlations between all pairs
|
|
229
229
|
of traits and display as a heatmap
|
|
230
|
+
phylo_impute (alias: impute; phylo_imp)
|
|
231
|
+
- impute missing trait values using phylogenetic
|
|
232
|
+
relationships and between-trait correlations
|
|
230
233
|
phylogenetic_ordination (alias: phylo_ordination; ordination; ord;
|
|
231
234
|
phylo_pca; phyl_pca; ppca; phylo_dimreduce; dimreduce; pdr)
|
|
232
235
|
- phylogenetic ordination (PCA, t-SNE, or UMAP) on
|
|
@@ -3681,6 +3684,73 @@ class Phykit:
|
|
|
3681
3684
|
_add_json_argument(parser)
|
|
3682
3685
|
_run_service(parser, argv, TraitCorrelation)
|
|
3683
3686
|
|
|
3687
|
+
@staticmethod
|
|
3688
|
+
def phylo_impute(argv):
|
|
3689
|
+
parser = _new_parser(
|
|
3690
|
+
description=textwrap.dedent(
|
|
3691
|
+
f"""\
|
|
3692
|
+
{help_header}
|
|
3693
|
+
|
|
3694
|
+
Phylogenetic imputation of missing trait values using
|
|
3695
|
+
conditional multivariate normal distributions.
|
|
3696
|
+
|
|
3697
|
+
Captures both phylogenetic relationships (via the
|
|
3698
|
+
tree's variance-covariance matrix) and between-trait
|
|
3699
|
+
correlations to predict missing values. Reports
|
|
3700
|
+
imputed values with standard errors and 95% CIs.
|
|
3701
|
+
|
|
3702
|
+
Missing values in the input trait file may be marked
|
|
3703
|
+
as NA, na, ?, or left empty.
|
|
3704
|
+
|
|
3705
|
+
Input is a phylogenetic tree and a tab-delimited
|
|
3706
|
+
multi-trait file with a header row:
|
|
3707
|
+
taxon<tab>trait1<tab>trait2<tab>...
|
|
3708
|
+
|
|
3709
|
+
Aliases:
|
|
3710
|
+
phylo_impute, impute, phylo_imp
|
|
3711
|
+
Command line interfaces:
|
|
3712
|
+
pk_phylo_impute, pk_impute, pk_phylo_imp
|
|
3713
|
+
|
|
3714
|
+
Usage:
|
|
3715
|
+
phykit phylo_impute -t <tree> -d <trait_data> -o <output>
|
|
3716
|
+
[-g <gene_trees>] [--json]
|
|
3717
|
+
|
|
3718
|
+
Options
|
|
3719
|
+
=====================================================
|
|
3720
|
+
-t/--tree tree file (required)
|
|
3721
|
+
|
|
3722
|
+
-d/--trait-data multi-trait TSV with header
|
|
3723
|
+
row; missing values marked
|
|
3724
|
+
as NA, ?, or empty
|
|
3725
|
+
(required)
|
|
3726
|
+
|
|
3727
|
+
-o/--output output TSV file with
|
|
3728
|
+
imputed values (required)
|
|
3729
|
+
|
|
3730
|
+
-g/--gene-trees optional multi-Newick file
|
|
3731
|
+
of gene trees for
|
|
3732
|
+
discordance-aware VCV
|
|
3733
|
+
|
|
3734
|
+
--json optional argument to output
|
|
3735
|
+
results as JSON
|
|
3736
|
+
"""
|
|
3737
|
+
),
|
|
3738
|
+
)
|
|
3739
|
+
parser.add_argument(
|
|
3740
|
+
"-t", "--tree", type=str, required=True, help=SUPPRESS, metavar=""
|
|
3741
|
+
)
|
|
3742
|
+
parser.add_argument(
|
|
3743
|
+
"-d", "--trait-data", type=str, required=True, help=SUPPRESS, metavar=""
|
|
3744
|
+
)
|
|
3745
|
+
parser.add_argument(
|
|
3746
|
+
"-o", "--output", type=str, required=True, help=SUPPRESS, metavar=""
|
|
3747
|
+
)
|
|
3748
|
+
parser.add_argument(
|
|
3749
|
+
"-g", "--gene-trees", type=str, default=None, help=SUPPRESS, metavar=""
|
|
3750
|
+
)
|
|
3751
|
+
_add_json_argument(parser)
|
|
3752
|
+
_run_service(parser, argv, PhyloImpute)
|
|
3753
|
+
|
|
3684
3754
|
@staticmethod
|
|
3685
3755
|
def phylogenetic_ordination(argv):
|
|
3686
3756
|
parser = _new_parser(
|
|
@@ -5751,8 +5821,8 @@ class Phykit:
|
|
|
5751
5821
|
parser.add_argument(
|
|
5752
5822
|
"--missing-taxa",
|
|
5753
5823
|
type=str,
|
|
5754
|
-
choices=["error", "shared"],
|
|
5755
|
-
default="
|
|
5824
|
+
choices=["allow", "error", "shared"],
|
|
5825
|
+
default="allow",
|
|
5756
5826
|
required=False,
|
|
5757
5827
|
help=SUPPRESS,
|
|
5758
5828
|
)
|
|
@@ -8562,5 +8632,9 @@ def tree_space(argv=None):
|
|
|
8562
8632
|
Phykit.tree_space(sys.argv[1:])
|
|
8563
8633
|
|
|
8564
8634
|
|
|
8635
|
+
def phylo_impute(argv=None):
|
|
8636
|
+
Phykit.phylo_impute(sys.argv[1:])
|
|
8637
|
+
|
|
8638
|
+
|
|
8565
8639
|
def trait_rate_map(argv=None):
|
|
8566
8640
|
Phykit.trait_rate_map(sys.argv[1:])
|
|
@@ -112,6 +112,7 @@ TreenessOverRCV = _LazyServiceFactory("phykit.services.tree.treeness_over_rcv",
|
|
|
112
112
|
EvoTempoMap = _LazyServiceFactory("phykit.services.tree.evo_tempo_map", "EvoTempoMap")
|
|
113
113
|
DiscordanceAsymmetry = _LazyServiceFactory("phykit.services.tree.discordance_asymmetry", "DiscordanceAsymmetry")
|
|
114
114
|
SpectralDiscordance = _LazyServiceFactory("phykit.services.tree.spectral_discordance", "SpectralDiscordance")
|
|
115
|
+
PhyloImpute = _LazyServiceFactory("phykit.services.tree.phylo_impute", "PhyloImpute")
|
|
115
116
|
TraitCorrelation = _LazyServiceFactory("phykit.services.tree.trait_correlation", "TraitCorrelation")
|
|
116
117
|
TraitRateMap = _LazyServiceFactory("phykit.services.tree.trait_rate_map", "TraitRateMap")
|
|
117
118
|
TreeSpace = _LazyServiceFactory("phykit.services.tree.tree_space", "TreeSpace")
|
|
@@ -49,6 +49,7 @@ _EXPORTS = {
|
|
|
49
49
|
"TreenessOverRCV": "treeness_over_rcv",
|
|
50
50
|
"ConcordanceAsr": "concordance_asr",
|
|
51
51
|
"PhyloLogistic": "phylo_logistic",
|
|
52
|
+
"PhyloImpute": "phylo_impute",
|
|
52
53
|
"TraitCorrelation": "trait_correlation",
|
|
53
54
|
"TraitRateMap": "trait_rate_map",
|
|
54
55
|
"TreeSpace": "tree_space",
|
|
@@ -103,16 +103,29 @@ class ConsensusNetwork(Tree):
|
|
|
103
103
|
raise PhykitUserError(
|
|
104
104
|
[
|
|
105
105
|
"Input trees do not share an identical taxon set.",
|
|
106
|
-
"Use --missing-taxa
|
|
106
|
+
"Use --missing-taxa allow or --missing-taxa shared.",
|
|
107
107
|
],
|
|
108
108
|
code=2,
|
|
109
109
|
)
|
|
110
110
|
|
|
111
|
+
if self.missing_taxa == "allow":
|
|
112
|
+
# Use the union of all taxa; each tree contributes splits
|
|
113
|
+
# using its own taxon set. Split frequencies are normalized
|
|
114
|
+
# by how many trees could contain each split.
|
|
115
|
+
union_taxa = set.union(*tip_sets)
|
|
116
|
+
if len(union_taxa) < 3:
|
|
117
|
+
raise PhykitUserError(
|
|
118
|
+
["Fewer than 3 taxa found across all trees."], code=2
|
|
119
|
+
)
|
|
120
|
+
return trees, False, union_taxa
|
|
121
|
+
|
|
122
|
+
# shared mode
|
|
111
123
|
if len(shared_taxa) < 3:
|
|
112
124
|
raise PhykitUserError(
|
|
113
125
|
[
|
|
114
126
|
"Unable to compute network after pruning to shared taxa.",
|
|
115
127
|
"At least 3 shared taxa are required.",
|
|
128
|
+
"Consider using --missing-taxa allow instead.",
|
|
116
129
|
],
|
|
117
130
|
code=2,
|
|
118
131
|
)
|
|
@@ -159,19 +172,80 @@ class ConsensusNetwork(Tree):
|
|
|
159
172
|
return splits
|
|
160
173
|
|
|
161
174
|
@staticmethod
|
|
162
|
-
def _count_splits(trees: List, all_taxa: frozenset
|
|
175
|
+
def _count_splits(trees: List, all_taxa: frozenset,
|
|
176
|
+
allow_mode: bool = False) -> Tuple[Counter, Counter]:
|
|
177
|
+
"""Count splits across trees.
|
|
178
|
+
|
|
179
|
+
Returns (split_counts, split_possible) where split_possible[s]
|
|
180
|
+
is the number of trees that contain ALL taxa in split s (and
|
|
181
|
+
its complement). In allow mode, each tree uses its own taxon
|
|
182
|
+
set; in shared mode, all trees use all_taxa.
|
|
183
|
+
"""
|
|
163
184
|
counter = Counter()
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
185
|
+
possible = Counter()
|
|
186
|
+
|
|
187
|
+
if allow_mode:
|
|
188
|
+
# Precompute taxon sets for all trees
|
|
189
|
+
tree_taxa_list = [
|
|
190
|
+
frozenset(t.name for t in tree.get_terminals())
|
|
191
|
+
for tree in trees
|
|
192
|
+
]
|
|
193
|
+
|
|
194
|
+
# Extract splits from each tree using its own taxon set
|
|
195
|
+
for tree, tree_taxa in zip(trees, tree_taxa_list):
|
|
196
|
+
tree_splits = ConsensusNetwork._extract_splits_from_tree(
|
|
197
|
+
tree, tree_taxa
|
|
198
|
+
)
|
|
199
|
+
for split in tree_splits:
|
|
200
|
+
counter[split] += 1
|
|
201
|
+
|
|
202
|
+
# For normalization: each split was found in counter[split]
|
|
203
|
+
# trees. The "possible" count is the number of trees that
|
|
204
|
+
# contain ALL taxa on both sides. Since we extracted each
|
|
205
|
+
# split from a tree that had all its taxa, the split count
|
|
206
|
+
# IS the possible count (a tree can only produce a split if
|
|
207
|
+
# it contains all the relevant taxa).
|
|
208
|
+
for split in counter:
|
|
209
|
+
possible[split] = counter[split]
|
|
210
|
+
|
|
211
|
+
# Actually, we should count how many trees COULD have
|
|
212
|
+
# produced the split but didn't. A more accurate approach:
|
|
213
|
+
# possible = number of trees containing all taxa in the
|
|
214
|
+
# split's smaller side. But since splits are defined
|
|
215
|
+
# relative to each tree's own taxon set, the split IS
|
|
216
|
+
# the canonical smaller side from that tree. Different
|
|
217
|
+
# trees may have different "all_taxa" so the same
|
|
218
|
+
# bipartition in two trees means different things.
|
|
219
|
+
#
|
|
220
|
+
# The simplest correct normalization for incomplete
|
|
221
|
+
# taxon sampling: frequency = count / n_trees.
|
|
222
|
+
# This is what most software does.
|
|
223
|
+
for split in counter:
|
|
224
|
+
possible[split] = len(trees)
|
|
225
|
+
else:
|
|
226
|
+
for tree in trees:
|
|
227
|
+
tree_splits = ConsensusNetwork._extract_splits_from_tree(
|
|
228
|
+
tree, all_taxa
|
|
229
|
+
)
|
|
230
|
+
for split in tree_splits:
|
|
231
|
+
counter[split] += 1
|
|
232
|
+
for split in counter:
|
|
233
|
+
possible[split] = len(trees)
|
|
234
|
+
|
|
235
|
+
return counter, possible
|
|
169
236
|
|
|
170
237
|
@staticmethod
|
|
171
|
-
def _filter_splits(
|
|
238
|
+
def _filter_splits(
|
|
239
|
+
split_counts: Counter, n_trees: int, threshold: float,
|
|
240
|
+
split_possible: Counter = None,
|
|
241
|
+
) -> List[Tuple[frozenset, int, float]]:
|
|
172
242
|
results = []
|
|
173
243
|
for split, count in split_counts.items():
|
|
174
|
-
|
|
244
|
+
if split_possible and split in split_possible:
|
|
245
|
+
denom = split_possible[split]
|
|
246
|
+
else:
|
|
247
|
+
denom = n_trees
|
|
248
|
+
freq = count / denom if denom > 0 else 0.0
|
|
175
249
|
if freq >= threshold:
|
|
176
250
|
results.append((split, count, freq))
|
|
177
251
|
results.sort(key=lambda x: (-x[2], sorted(x[0])))
|
|
@@ -435,8 +509,14 @@ class ConsensusNetwork(Tree):
|
|
|
435
509
|
all_taxa = frozenset(all_taxa_set)
|
|
436
510
|
n_trees = len(trees)
|
|
437
511
|
|
|
438
|
-
|
|
439
|
-
|
|
512
|
+
allow_mode = (self.missing_taxa == "allow")
|
|
513
|
+
split_counts, split_possible = self._count_splits(
|
|
514
|
+
trees, all_taxa, allow_mode=allow_mode
|
|
515
|
+
)
|
|
516
|
+
filtered = self._filter_splits(
|
|
517
|
+
split_counts, n_trees, self.threshold,
|
|
518
|
+
split_possible=split_possible,
|
|
519
|
+
)
|
|
440
520
|
|
|
441
521
|
if self.json_output:
|
|
442
522
|
splits_list = [
|