phykit 2.1.30__tar.gz → 2.1.32__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phykit-2.1.30 → phykit-2.1.32}/PKG-INFO +1 -1
- {phykit-2.1.30 → phykit-2.1.32}/phykit/cli_registry.py +3 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/phykit.py +130 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/service_factories.py +2 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/__init__.py +2 -0
- phykit-2.1.32/phykit/services/tree/discordance_asymmetry.py +520 -0
- phykit-2.1.32/phykit/services/tree/evo_tempo_map.py +658 -0
- phykit-2.1.32/phykit/version.py +1 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit.egg-info/PKG-INFO +1 -1
- {phykit-2.1.30 → phykit-2.1.32}/phykit.egg-info/SOURCES.txt +2 -0
- phykit-2.1.30/phykit/version.py +0 -1
- {phykit-2.1.30 → phykit-2.1.32}/LICENSE.md +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/README.md +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/__init__.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/__main__.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/errors.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/helpers/__init__.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/helpers/boolean_argument_parsing.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/helpers/caching.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/helpers/files.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/helpers/json_output.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/helpers/parallel.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/helpers/stats_summary.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/helpers/streaming.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/__init__.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/__init__.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/alignment_entropy.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/alignment_length.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/alignment_length_no_gaps.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/alignment_outlier_taxa.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/alignment_recoding.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/base.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/column_score.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/composition_per_taxon.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/compositional_bias_per_site.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/create_concatenation_matrix.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/dna_threader.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/evolutionary_rate_per_site.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/faidx.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/gc_content.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/mask_alignment.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/occupancy_per_taxon.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/pairwise_identity.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/parsimony_informative_sites.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/plot_alignment_qc.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/rcv.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/rcvt.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/rename_fasta_entries.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/sum_of_pairs_score.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/alignment/variable_sites.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/base.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/ancestral_reconstruction.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/base.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/bipartition_support_stats.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/branch_length_multiplier.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/collapse_branches.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/concordance_asr.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/consensus_network.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/consensus_tree.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/cont_map.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/cophylo.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/covarying_evolutionary_rates.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/density_map.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/dvmc.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/evolutionary_rate.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/fit_continuous.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/hidden_paralogy_check.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/internal_branch_stats.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/internode_labeler.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/last_common_ancestor_subtree.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/lb_score.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/ltt.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/monophyly_check.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/nearest_neighbor_interchange.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/network_signal.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/ou_shift_detection.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/ouwie.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/patristic_distances.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/phenogram.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/phylogenetic_glm.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/phylogenetic_ordination.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/phylogenetic_regression.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/phylogenetic_signal.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/phylomorphospace.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/polytomy_test.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/print_tree.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/prune_tree.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/quartet_network.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/rate_heterogeneity.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/relative_rate_test.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/rename_tree_tips.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/rf_distance.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/root_tree.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/saturation.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/spurious_sequence.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/stochastic_character_map.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/terminal_branch_stats.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/threshold_model.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/tip_labels.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/tip_to_tip_distance.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/tip_to_tip_node_distance.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/total_tree_length.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/treeness.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/treeness_over_rcv.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit/services/tree/vcv_utils.py +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit.egg-info/dependency_links.txt +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit.egg-info/entry_points.txt +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit.egg-info/requires.txt +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/phykit.egg-info/top_level.txt +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/setup.cfg +0 -0
- {phykit-2.1.30 → phykit-2.1.32}/setup.py +0 -0
|
@@ -154,6 +154,9 @@ ALIAS_TO_HANDLER: Dict[str, str] = {
|
|
|
154
154
|
"thresh_bayes": "threshold_model",
|
|
155
155
|
"toverr": "treeness_over_rcv",
|
|
156
156
|
"tor": "treeness_over_rcv",
|
|
157
|
+
"etm": "evo_tempo_map",
|
|
158
|
+
"disc_asym": "discordance_asymmetry",
|
|
159
|
+
"da": "discordance_asymmetry",
|
|
157
160
|
# Helper aliases
|
|
158
161
|
"create_concat": "create_concatenation_matrix",
|
|
159
162
|
"cc": "create_concatenation_matrix",
|
|
@@ -4851,6 +4851,136 @@ class Phykit:
|
|
|
4851
4851
|
_add_json_argument(parser)
|
|
4852
4852
|
_run_service(parser, argv, TreenessOverRCV)
|
|
4853
4853
|
|
|
4854
|
+
@staticmethod
|
|
4855
|
+
def evo_tempo_map(argv):
|
|
4856
|
+
parser = _new_parser(
|
|
4857
|
+
description=textwrap.dedent(
|
|
4858
|
+
f"""\
|
|
4859
|
+
{help_header}
|
|
4860
|
+
|
|
4861
|
+
Detect rate-topology associations by comparing branch length
|
|
4862
|
+
distributions between concordant and discordant gene trees at
|
|
4863
|
+
each species tree branch.
|
|
4864
|
+
|
|
4865
|
+
Under the multispecies coalescent, discordant gene trees should
|
|
4866
|
+
have shorter internal branches near the discordant node. Deviations
|
|
4867
|
+
suggest substitution rate heterogeneity correlated with topology
|
|
4868
|
+
(adaptive evolution, different selective pressures, or model
|
|
4869
|
+
misspecification).
|
|
4870
|
+
|
|
4871
|
+
For each internal branch of the species tree, gene trees are
|
|
4872
|
+
classified as concordant or discordant via bipartition matching.
|
|
4873
|
+
The homologous branch length is extracted from each gene tree
|
|
4874
|
+
and the two groups are compared using Mann-Whitney U and
|
|
4875
|
+
permutation tests. P-values are corrected for multiple testing
|
|
4876
|
+
using Benjamini-Hochberg FDR.
|
|
4877
|
+
|
|
4878
|
+
A global treeness (internal/total branch length ratio) comparison
|
|
4879
|
+
is also reported.
|
|
4880
|
+
|
|
4881
|
+
Aliases:
|
|
4882
|
+
evo_tempo_map, etm
|
|
4883
|
+
Command line interfaces:
|
|
4884
|
+
pk_evo_tempo_map, pk_etm
|
|
4885
|
+
|
|
4886
|
+
Usage:
|
|
4887
|
+
phykit evo_tempo_map -t/--tree <tree> -g/--gene-trees <gene_trees>
|
|
4888
|
+
[--plot <output>] [-v/--verbose] [--json]
|
|
4889
|
+
|
|
4890
|
+
Options
|
|
4891
|
+
=====================================================
|
|
4892
|
+
-t/--tree a species tree file
|
|
4893
|
+
|
|
4894
|
+
-g/--gene-trees multi-Newick file of gene trees
|
|
4895
|
+
with branch lengths
|
|
4896
|
+
|
|
4897
|
+
--plot optional output path for
|
|
4898
|
+
box/strip plot (PNG)
|
|
4899
|
+
|
|
4900
|
+
-v/--verbose print per-gene-tree details
|
|
4901
|
+
|
|
4902
|
+
--json optional argument to output
|
|
4903
|
+
results as JSON
|
|
4904
|
+
"""
|
|
4905
|
+
),
|
|
4906
|
+
)
|
|
4907
|
+
parser.add_argument(
|
|
4908
|
+
"-t", "--tree", type=str, required=True, help=SUPPRESS, metavar=""
|
|
4909
|
+
)
|
|
4910
|
+
parser.add_argument(
|
|
4911
|
+
"-g", "--gene-trees", type=str, required=True, help=SUPPRESS, metavar=""
|
|
4912
|
+
)
|
|
4913
|
+
parser.add_argument(
|
|
4914
|
+
"--plot", dest="plot_output", type=str, required=False,
|
|
4915
|
+
default=None, help=SUPPRESS, metavar=""
|
|
4916
|
+
)
|
|
4917
|
+
parser.add_argument(
|
|
4918
|
+
"-v", "--verbose", action="store_true", required=False, help=SUPPRESS
|
|
4919
|
+
)
|
|
4920
|
+
_add_json_argument(parser)
|
|
4921
|
+
_run_service(parser, argv, EvoTempoMap)
|
|
4922
|
+
|
|
4923
|
+
@staticmethod
|
|
4924
|
+
def discordance_asymmetry(argv):
|
|
4925
|
+
parser = _new_parser(
|
|
4926
|
+
description=textwrap.dedent(
|
|
4927
|
+
f"""\
|
|
4928
|
+
{help_header}
|
|
4929
|
+
|
|
4930
|
+
Test whether the two discordant NNI alternative topologies
|
|
4931
|
+
at each species tree branch are equally frequent, detecting
|
|
4932
|
+
gene flow direction from asymmetric discordance.
|
|
4933
|
+
|
|
4934
|
+
Under incomplete lineage sorting (ILS) alone, the two minor
|
|
4935
|
+
NNI alternatives (gDF1 and gDF2) should be equally frequent.
|
|
4936
|
+
When they are significantly asymmetric, it suggests
|
|
4937
|
+
introgression or gene flow between specific lineages.
|
|
4938
|
+
|
|
4939
|
+
For each internal branch, a two-sided binomial test (H0:
|
|
4940
|
+
P(alt1) = 0.5) is applied. P-values are corrected for
|
|
4941
|
+
multiple testing using Benjamini-Hochberg FDR.
|
|
4942
|
+
|
|
4943
|
+
Aliases:
|
|
4944
|
+
discordance_asymmetry, disc_asym, da
|
|
4945
|
+
Command line interfaces:
|
|
4946
|
+
pk_discordance_asymmetry, pk_disc_asym, pk_da
|
|
4947
|
+
|
|
4948
|
+
Usage:
|
|
4949
|
+
phykit discordance_asymmetry -t/--tree <tree> -g/--gene-trees <gene_trees>
|
|
4950
|
+
[--plot <output>] [-v/--verbose] [--json]
|
|
4951
|
+
|
|
4952
|
+
Options
|
|
4953
|
+
=====================================================
|
|
4954
|
+
-t/--tree a species tree file
|
|
4955
|
+
|
|
4956
|
+
-g/--gene-trees multi-Newick file of gene trees
|
|
4957
|
+
|
|
4958
|
+
--plot optional output path for
|
|
4959
|
+
asymmetry phylogram (PNG)
|
|
4960
|
+
|
|
4961
|
+
-v/--verbose print per-branch details
|
|
4962
|
+
|
|
4963
|
+
--json optional argument to output
|
|
4964
|
+
results as JSON
|
|
4965
|
+
"""
|
|
4966
|
+
),
|
|
4967
|
+
)
|
|
4968
|
+
parser.add_argument(
|
|
4969
|
+
"-t", "--tree", type=str, required=True, help=SUPPRESS, metavar=""
|
|
4970
|
+
)
|
|
4971
|
+
parser.add_argument(
|
|
4972
|
+
"-g", "--gene-trees", type=str, required=True, help=SUPPRESS, metavar=""
|
|
4973
|
+
)
|
|
4974
|
+
parser.add_argument(
|
|
4975
|
+
"--plot", dest="plot_output", type=str, required=False,
|
|
4976
|
+
default=None, help=SUPPRESS, metavar=""
|
|
4977
|
+
)
|
|
4978
|
+
parser.add_argument(
|
|
4979
|
+
"-v", "--verbose", action="store_true", required=False, help=SUPPRESS
|
|
4980
|
+
)
|
|
4981
|
+
_add_json_argument(parser)
|
|
4982
|
+
_run_service(parser, argv, DiscordanceAsymmetry)
|
|
4983
|
+
|
|
4854
4984
|
### Helper commands
|
|
4855
4985
|
@staticmethod
|
|
4856
4986
|
def create_concatenation_matrix(argv):
|
|
@@ -96,6 +96,8 @@ TipToTipNodeDistance = _LazyServiceFactory("phykit.services.tree.tip_to_tip_node
|
|
|
96
96
|
TotalTreeLength = _LazyServiceFactory("phykit.services.tree.total_tree_length", "TotalTreeLength")
|
|
97
97
|
Treeness = _LazyServiceFactory("phykit.services.tree.treeness", "Treeness")
|
|
98
98
|
TreenessOverRCV = _LazyServiceFactory("phykit.services.tree.treeness_over_rcv", "TreenessOverRCV")
|
|
99
|
+
EvoTempoMap = _LazyServiceFactory("phykit.services.tree.evo_tempo_map", "EvoTempoMap")
|
|
100
|
+
DiscordanceAsymmetry = _LazyServiceFactory("phykit.services.tree.discordance_asymmetry", "DiscordanceAsymmetry")
|
|
99
101
|
|
|
100
102
|
SERVICE_FACTORIES: Dict[str, _LazyServiceFactory] = {
|
|
101
103
|
name: value
|
|
@@ -9,7 +9,9 @@ _EXPORTS = {
|
|
|
9
9
|
"ConsensusNetwork": "consensus_network",
|
|
10
10
|
"ConsensusTree": "consensus_tree",
|
|
11
11
|
"DVMC": "dvmc",
|
|
12
|
+
"DiscordanceAsymmetry": "discordance_asymmetry",
|
|
12
13
|
"EvolutionaryRate": "evolutionary_rate",
|
|
14
|
+
"EvoTempoMap": "evo_tempo_map",
|
|
13
15
|
"HiddenParalogyCheck": "hidden_paralogy_check",
|
|
14
16
|
"InternalBranchStats": "internal_branch_stats",
|
|
15
17
|
"InternodeLabeler": "internode_labeler",
|
|
@@ -0,0 +1,520 @@
|
|
|
1
|
+
from io import StringIO
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Dict, List
|
|
4
|
+
|
|
5
|
+
from Bio import Phylo
|
|
6
|
+
from scipy.stats import binomtest
|
|
7
|
+
|
|
8
|
+
from .base import Tree
|
|
9
|
+
from ...helpers.json_output import print_json
|
|
10
|
+
from ...errors import PhykitUserError
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DiscordanceAsymmetry(Tree):
|
|
14
|
+
def __init__(self, args) -> None:
|
|
15
|
+
parsed = self.process_args(args)
|
|
16
|
+
super().__init__(tree_file_path=parsed["tree_file_path"])
|
|
17
|
+
self.gene_trees_path = parsed["gene_trees_path"]
|
|
18
|
+
self.verbose = parsed["verbose"]
|
|
19
|
+
self.json_output = parsed["json_output"]
|
|
20
|
+
self.plot_output = parsed["plot_output"]
|
|
21
|
+
|
|
22
|
+
def process_args(self, args) -> Dict:
|
|
23
|
+
return dict(
|
|
24
|
+
tree_file_path=args.tree,
|
|
25
|
+
gene_trees_path=args.gene_trees,
|
|
26
|
+
verbose=getattr(args, "verbose", False),
|
|
27
|
+
json_output=getattr(args, "json", False),
|
|
28
|
+
plot_output=getattr(args, "plot_output", None),
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
def run(self) -> None:
|
|
32
|
+
species_tree = self.read_tree_file()
|
|
33
|
+
gene_trees = self._parse_gene_trees(self.gene_trees_path)
|
|
34
|
+
|
|
35
|
+
topology_counts = self._count_topologies(species_tree, gene_trees)
|
|
36
|
+
|
|
37
|
+
# Test each branch and collect results
|
|
38
|
+
branch_results = []
|
|
39
|
+
for branch_key in sorted(topology_counts.keys()):
|
|
40
|
+
data = topology_counts[branch_key]
|
|
41
|
+
test_result = self._test_asymmetry(data["n_alt1"], data["n_alt2"])
|
|
42
|
+
entry = dict(
|
|
43
|
+
split=data["split"],
|
|
44
|
+
n_concordant=data["n_concordant"],
|
|
45
|
+
n_alt1=data["n_alt1"],
|
|
46
|
+
n_alt2=data["n_alt2"],
|
|
47
|
+
)
|
|
48
|
+
entry.update(test_result)
|
|
49
|
+
branch_results.append(entry)
|
|
50
|
+
|
|
51
|
+
# FDR correction across testable p-values
|
|
52
|
+
testable_indices = []
|
|
53
|
+
testable_pvals = []
|
|
54
|
+
for i, entry in enumerate(branch_results):
|
|
55
|
+
if entry["p_value"] is not None:
|
|
56
|
+
testable_indices.append(i)
|
|
57
|
+
testable_pvals.append(entry["p_value"])
|
|
58
|
+
|
|
59
|
+
fdr_corrected = self._fdr(testable_pvals)
|
|
60
|
+
for idx, fdr_p in zip(testable_indices, fdr_corrected):
|
|
61
|
+
branch_results[idx]["fdr_p"] = fdr_p
|
|
62
|
+
|
|
63
|
+
# Set fdr_p to None for untestable branches
|
|
64
|
+
for entry in branch_results:
|
|
65
|
+
if "fdr_p" not in entry:
|
|
66
|
+
entry["fdr_p"] = None
|
|
67
|
+
|
|
68
|
+
# Summary
|
|
69
|
+
summary = dict(
|
|
70
|
+
n_gene_trees=len(gene_trees),
|
|
71
|
+
n_branches_tested=len(testable_indices),
|
|
72
|
+
n_significant_fdr05=sum(
|
|
73
|
+
1 for entry in branch_results
|
|
74
|
+
if entry["fdr_p"] is not None and entry["fdr_p"] < 0.05
|
|
75
|
+
),
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Output
|
|
79
|
+
if self.json_output:
|
|
80
|
+
self._output_json(branch_results, summary)
|
|
81
|
+
else:
|
|
82
|
+
self._output_text(branch_results, summary)
|
|
83
|
+
|
|
84
|
+
if self.plot_output:
|
|
85
|
+
self._plot(species_tree, branch_results, self.plot_output)
|
|
86
|
+
|
|
87
|
+
# ------------------------------------------------------------------
|
|
88
|
+
# Output methods
|
|
89
|
+
# ------------------------------------------------------------------
|
|
90
|
+
|
|
91
|
+
def _output_text(self, branch_results, summary) -> None:
|
|
92
|
+
try:
|
|
93
|
+
header = (
|
|
94
|
+
f"{'branch':<30}"
|
|
95
|
+
f"{'n_conc':>8}"
|
|
96
|
+
f"{'n_alt1':>8}"
|
|
97
|
+
f"{'n_alt2':>8}"
|
|
98
|
+
f"{'asym_ratio':>12}"
|
|
99
|
+
f"{'binom_p':>12}"
|
|
100
|
+
f"{'fdr_p':>12}"
|
|
101
|
+
f"{'gene_flow':>12}"
|
|
102
|
+
)
|
|
103
|
+
print(header)
|
|
104
|
+
print("-" * len(header))
|
|
105
|
+
|
|
106
|
+
for entry in branch_results:
|
|
107
|
+
branch_label = ",".join(entry["split"])
|
|
108
|
+
asym = (
|
|
109
|
+
f"{entry['asymmetry_ratio']:.3f}"
|
|
110
|
+
if entry["asymmetry_ratio"] is not None
|
|
111
|
+
else "NA"
|
|
112
|
+
)
|
|
113
|
+
binom_p = (
|
|
114
|
+
f"{entry['p_value']:.4f}"
|
|
115
|
+
if entry["p_value"] is not None
|
|
116
|
+
else "NA"
|
|
117
|
+
)
|
|
118
|
+
fdr_p = (
|
|
119
|
+
f"{entry['fdr_p']:.4f}"
|
|
120
|
+
if entry["fdr_p"] is not None
|
|
121
|
+
else "NA"
|
|
122
|
+
)
|
|
123
|
+
gene_flow = "-"
|
|
124
|
+
if (entry["fdr_p"] is not None and entry["fdr_p"] < 0.05
|
|
125
|
+
and entry["favored_alt"] is not None):
|
|
126
|
+
gene_flow = entry["favored_alt"]
|
|
127
|
+
print(
|
|
128
|
+
f"{branch_label:<30}"
|
|
129
|
+
f"{entry['n_concordant']:>8}"
|
|
130
|
+
f"{entry['n_alt1']:>8}"
|
|
131
|
+
f"{entry['n_alt2']:>8}"
|
|
132
|
+
f"{asym:>12}"
|
|
133
|
+
f"{binom_p:>12}"
|
|
134
|
+
f"{fdr_p:>12}"
|
|
135
|
+
f"{gene_flow:>12}"
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
print("---")
|
|
139
|
+
print(
|
|
140
|
+
f"Summary: {summary['n_branches_tested']} branches tested, "
|
|
141
|
+
f"{summary['n_significant_fdr05']} significant (FDR<0.05)"
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
if self.verbose:
|
|
145
|
+
print()
|
|
146
|
+
for entry in branch_results:
|
|
147
|
+
branch_label = ",".join(entry["split"])
|
|
148
|
+
total = entry["n_concordant"] + entry["n_alt1"] + entry["n_alt2"]
|
|
149
|
+
gcf = entry["n_concordant"] / total if total > 0 else 1.0
|
|
150
|
+
print(f"Branch: {branch_label}")
|
|
151
|
+
print(
|
|
152
|
+
f" gCF={gcf:.3f} gDF1={entry['n_alt1']}/{total} "
|
|
153
|
+
f"gDF2={entry['n_alt2']}/{total}"
|
|
154
|
+
)
|
|
155
|
+
except BrokenPipeError:
|
|
156
|
+
pass
|
|
157
|
+
|
|
158
|
+
def _output_json(self, branch_results, summary) -> None:
|
|
159
|
+
result = dict(
|
|
160
|
+
branches=branch_results,
|
|
161
|
+
summary=summary,
|
|
162
|
+
)
|
|
163
|
+
print_json(result)
|
|
164
|
+
|
|
165
|
+
def _plot(self, species_tree, branch_results, output_path) -> None:
|
|
166
|
+
"""Phylogram colored by asymmetry ratio at each branch."""
|
|
167
|
+
import matplotlib
|
|
168
|
+
matplotlib.use("Agg")
|
|
169
|
+
import matplotlib.pyplot as plt
|
|
170
|
+
from matplotlib.colors import Normalize
|
|
171
|
+
import numpy as np
|
|
172
|
+
|
|
173
|
+
# Build lookup from split label -> branch result
|
|
174
|
+
branch_lookup = {}
|
|
175
|
+
for entry in branch_results:
|
|
176
|
+
key = ",".join(entry["split"])
|
|
177
|
+
branch_lookup[key] = entry
|
|
178
|
+
|
|
179
|
+
parent_map = self._build_parent_map(species_tree)
|
|
180
|
+
tips = list(species_tree.get_terminals())
|
|
181
|
+
all_taxa_fs = frozenset(t.name for t in tips)
|
|
182
|
+
|
|
183
|
+
# Compute node positions
|
|
184
|
+
node_x = {}
|
|
185
|
+
node_y = {}
|
|
186
|
+
|
|
187
|
+
for i, tip in enumerate(tips):
|
|
188
|
+
node_y[id(tip)] = i
|
|
189
|
+
|
|
190
|
+
root = species_tree.root
|
|
191
|
+
for clade in species_tree.find_clades(order="preorder"):
|
|
192
|
+
if clade == root:
|
|
193
|
+
node_x[id(clade)] = 0.0
|
|
194
|
+
else:
|
|
195
|
+
if id(clade) in parent_map:
|
|
196
|
+
parent = parent_map[id(clade)]
|
|
197
|
+
t = clade.branch_length if clade.branch_length else 0.0
|
|
198
|
+
node_x[id(clade)] = node_x.get(id(parent), 0.0) + t
|
|
199
|
+
|
|
200
|
+
for clade in species_tree.find_clades(order="postorder"):
|
|
201
|
+
if not clade.is_terminal() and id(clade) not in node_y:
|
|
202
|
+
child_ys = [
|
|
203
|
+
node_y[id(c)] for c in clade.clades if id(c) in node_y
|
|
204
|
+
]
|
|
205
|
+
if child_ys:
|
|
206
|
+
node_y[id(clade)] = np.mean(child_ys)
|
|
207
|
+
else:
|
|
208
|
+
node_y[id(clade)] = 0.0
|
|
209
|
+
|
|
210
|
+
# Map internal nodes to their branch result
|
|
211
|
+
node_to_result = {}
|
|
212
|
+
for clade in species_tree.find_clades(order="preorder"):
|
|
213
|
+
if clade.is_terminal():
|
|
214
|
+
continue
|
|
215
|
+
node_tips = frozenset(t.name for t in clade.get_terminals())
|
|
216
|
+
split_label = (
|
|
217
|
+
sorted(node_tips)
|
|
218
|
+
if len(node_tips) <= len(all_taxa_fs) - len(node_tips)
|
|
219
|
+
else sorted(all_taxa_fs - node_tips)
|
|
220
|
+
)
|
|
221
|
+
key = ",".join(split_label)
|
|
222
|
+
if key in branch_lookup:
|
|
223
|
+
node_to_result[id(clade)] = branch_lookup[key]
|
|
224
|
+
|
|
225
|
+
# Color setup: diverging from blue (0.5 = symmetric) to red (1.0 = asymmetric)
|
|
226
|
+
cmap = plt.cm.RdYlBu_r
|
|
227
|
+
norm = Normalize(vmin=0.5, vmax=1.0)
|
|
228
|
+
|
|
229
|
+
fig, ax = plt.subplots(figsize=(10, max(4, len(tips) * 0.4)))
|
|
230
|
+
|
|
231
|
+
# Draw branches
|
|
232
|
+
for clade in species_tree.find_clades(order="preorder"):
|
|
233
|
+
if clade == root:
|
|
234
|
+
continue
|
|
235
|
+
if id(clade) not in parent_map:
|
|
236
|
+
continue
|
|
237
|
+
parent = parent_map[id(clade)]
|
|
238
|
+
if id(parent) not in node_x or id(clade) not in node_x:
|
|
239
|
+
continue
|
|
240
|
+
|
|
241
|
+
x0 = node_x[id(parent)]
|
|
242
|
+
x1 = node_x[id(clade)]
|
|
243
|
+
y0 = node_y.get(id(parent), 0)
|
|
244
|
+
y1 = node_y.get(id(clade), 0)
|
|
245
|
+
|
|
246
|
+
# Color the horizontal branch by asymmetry ratio if this is an internal node
|
|
247
|
+
color = "gray"
|
|
248
|
+
lw = 2
|
|
249
|
+
if id(clade) in node_to_result:
|
|
250
|
+
entry = node_to_result[id(clade)]
|
|
251
|
+
if entry["asymmetry_ratio"] is not None:
|
|
252
|
+
color = cmap(norm(entry["asymmetry_ratio"]))
|
|
253
|
+
lw = 3
|
|
254
|
+
|
|
255
|
+
ax.plot([x0, x1], [y1, y1], color=color, lw=lw)
|
|
256
|
+
ax.plot([x0, x0], [y0, y1], color="gray", lw=1.5)
|
|
257
|
+
|
|
258
|
+
# Annotate internal nodes
|
|
259
|
+
for clade in species_tree.find_clades(order="preorder"):
|
|
260
|
+
if clade.is_terminal():
|
|
261
|
+
continue
|
|
262
|
+
if id(clade) not in node_to_result:
|
|
263
|
+
continue
|
|
264
|
+
entry = node_to_result[id(clade)]
|
|
265
|
+
x = node_x.get(id(clade), 0)
|
|
266
|
+
y = node_y.get(id(clade), 0)
|
|
267
|
+
|
|
268
|
+
# Show gCF value
|
|
269
|
+
total = entry["n_concordant"] + entry["n_alt1"] + entry["n_alt2"]
|
|
270
|
+
gcf = entry["n_concordant"] / total if total > 0 else 1.0
|
|
271
|
+
ax.annotate(
|
|
272
|
+
f"gCF={gcf:.2f}",
|
|
273
|
+
(x, y),
|
|
274
|
+
textcoords="offset points",
|
|
275
|
+
xytext=(5, 5),
|
|
276
|
+
fontsize=7,
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
# Mark significant branches (FDR < 0.05)
|
|
280
|
+
if (entry["fdr_p"] is not None and entry["fdr_p"] < 0.05
|
|
281
|
+
and entry["favored_alt"] is not None):
|
|
282
|
+
ax.scatter(x, y, s=100, c="red", marker="*", zorder=5)
|
|
283
|
+
|
|
284
|
+
# Tip labels
|
|
285
|
+
max_x = max(node_x.values()) if node_x else 0
|
|
286
|
+
offset = max_x * 0.02
|
|
287
|
+
for tip in tips:
|
|
288
|
+
ax.text(
|
|
289
|
+
node_x[id(tip)] + offset, node_y[id(tip)],
|
|
290
|
+
tip.name, va="center", fontsize=9,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
# Colorbar
|
|
294
|
+
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
|
|
295
|
+
sm.set_array([])
|
|
296
|
+
cbar = fig.colorbar(sm, ax=ax, pad=0.15)
|
|
297
|
+
cbar.set_label("Asymmetry ratio")
|
|
298
|
+
|
|
299
|
+
ax.set_xlabel("Branch length (subs/site)")
|
|
300
|
+
ax.set_yticks([])
|
|
301
|
+
ax.spines["top"].set_visible(False)
|
|
302
|
+
ax.spines["right"].set_visible(False)
|
|
303
|
+
ax.spines["left"].set_visible(False)
|
|
304
|
+
ax.set_title("Discordance Asymmetry")
|
|
305
|
+
fig.tight_layout()
|
|
306
|
+
fig.savefig(output_path, dpi=300, bbox_inches="tight")
|
|
307
|
+
plt.close(fig)
|
|
308
|
+
|
|
309
|
+
# ------------------------------------------------------------------
|
|
310
|
+
# Gene tree parsing
|
|
311
|
+
# ------------------------------------------------------------------
|
|
312
|
+
|
|
313
|
+
def _parse_gene_trees(self, path: str) -> list:
|
|
314
|
+
try:
|
|
315
|
+
lines = Path(path).read_text().splitlines()
|
|
316
|
+
except FileNotFoundError:
|
|
317
|
+
raise PhykitUserError(
|
|
318
|
+
[
|
|
319
|
+
f"{path} corresponds to no such file or directory.",
|
|
320
|
+
"Please check filename and pathing",
|
|
321
|
+
],
|
|
322
|
+
code=2,
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
cleaned = [l.strip() for l in lines if l.strip() and not l.strip().startswith("#")]
|
|
326
|
+
trees = []
|
|
327
|
+
for line in cleaned:
|
|
328
|
+
if line.startswith("("):
|
|
329
|
+
trees.append(Phylo.read(StringIO(line), "newick"))
|
|
330
|
+
else:
|
|
331
|
+
tree_path = Path(path).parent / line
|
|
332
|
+
trees.append(Phylo.read(str(tree_path), "newick"))
|
|
333
|
+
return trees
|
|
334
|
+
|
|
335
|
+
# ------------------------------------------------------------------
|
|
336
|
+
# Bipartition extraction and topology counting
|
|
337
|
+
# ------------------------------------------------------------------
|
|
338
|
+
|
|
339
|
+
@staticmethod
|
|
340
|
+
def _canonical_split(taxa_side, all_taxa):
|
|
341
|
+
"""Normalize a bipartition to canonical form.
|
|
342
|
+
|
|
343
|
+
Returns the smaller side as a frozenset; ties are broken
|
|
344
|
+
lexicographically.
|
|
345
|
+
"""
|
|
346
|
+
complement = all_taxa - taxa_side
|
|
347
|
+
if len(taxa_side) < len(complement):
|
|
348
|
+
return frozenset(taxa_side)
|
|
349
|
+
elif len(taxa_side) > len(complement):
|
|
350
|
+
return frozenset(complement)
|
|
351
|
+
else:
|
|
352
|
+
return min(frozenset(taxa_side), frozenset(complement),
|
|
353
|
+
key=lambda s: sorted(s))
|
|
354
|
+
|
|
355
|
+
@staticmethod
|
|
356
|
+
def _build_parent_map(tree) -> Dict:
|
|
357
|
+
"""Build a dict mapping child id -> parent clade."""
|
|
358
|
+
parent_map = {}
|
|
359
|
+
for clade in tree.find_clades(order="preorder"):
|
|
360
|
+
for child in clade.clades:
|
|
361
|
+
parent_map[id(child)] = clade
|
|
362
|
+
return parent_map
|
|
363
|
+
|
|
364
|
+
def _get_four_groups(self, tree, node, parent_map, all_taxa_fs):
|
|
365
|
+
"""Identify the four subtree groups around an internal branch.
|
|
366
|
+
|
|
367
|
+
For the branch connecting *node* to its parent:
|
|
368
|
+
C1 = tips of node's first child
|
|
369
|
+
C2 = tips of node's second child (extra children merged for polytomies)
|
|
370
|
+
S = tips of node's sibling under parent
|
|
371
|
+
D = remaining tips (everything above parent)
|
|
372
|
+
|
|
373
|
+
Returns (C1, C2, S, D) as frozensets, or None if decomposition
|
|
374
|
+
is not possible (e.g., node is root, leaf, or has <2 children).
|
|
375
|
+
"""
|
|
376
|
+
if node.is_terminal() or len(node.clades) < 2:
|
|
377
|
+
return None
|
|
378
|
+
|
|
379
|
+
C1 = frozenset(t.name for t in node.clades[0].get_terminals())
|
|
380
|
+
C2 = frozenset(t.name for t in node.clades[1].get_terminals())
|
|
381
|
+
# If node has >2 children (polytomy), merge extras into C2
|
|
382
|
+
for extra_child in node.clades[2:]:
|
|
383
|
+
C2 = C2 | frozenset(t.name for t in extra_child.get_terminals())
|
|
384
|
+
|
|
385
|
+
parent = parent_map.get(id(node))
|
|
386
|
+
if parent is None:
|
|
387
|
+
# node is root — no branch above it
|
|
388
|
+
return None
|
|
389
|
+
|
|
390
|
+
# Get siblings of node under parent
|
|
391
|
+
siblings = [c for c in parent.clades if id(c) != id(node)]
|
|
392
|
+
if not siblings:
|
|
393
|
+
return None
|
|
394
|
+
|
|
395
|
+
S = frozenset(t.name for t in siblings[0].get_terminals())
|
|
396
|
+
# D = everything else (other siblings + above parent)
|
|
397
|
+
D = all_taxa_fs - C1 - C2 - S
|
|
398
|
+
|
|
399
|
+
return C1, C2, S, D
|
|
400
|
+
|
|
401
|
+
def _count_topologies(self, species_tree, gene_trees) -> Dict:
|
|
402
|
+
"""Count concordant and two NNI-alternative topologies for each
|
|
403
|
+
internal branch of the species tree across gene trees.
|
|
404
|
+
|
|
405
|
+
Returns a dict keyed by branch label (comma-joined sorted taxa
|
|
406
|
+
in the smaller partition side) with:
|
|
407
|
+
split: list of sorted taxon names
|
|
408
|
+
n_concordant: int
|
|
409
|
+
n_alt1: int
|
|
410
|
+
n_alt2: int
|
|
411
|
+
"""
|
|
412
|
+
all_taxa = sorted(
|
|
413
|
+
set(t.name for t in species_tree.get_terminals())
|
|
414
|
+
& set().union(*(
|
|
415
|
+
set(t.name for t in gt.get_terminals()) for gt in gene_trees
|
|
416
|
+
))
|
|
417
|
+
)
|
|
418
|
+
all_taxa_fs = frozenset(all_taxa)
|
|
419
|
+
parent_map = self._build_parent_map(species_tree)
|
|
420
|
+
|
|
421
|
+
# Extract bipartitions from all gene trees (topology only, no lengths).
|
|
422
|
+
# Restrict bipartitions to shared taxa without mutating gene tree objects.
|
|
423
|
+
gene_tree_splits = []
|
|
424
|
+
for gt in gene_trees:
|
|
425
|
+
splits = set()
|
|
426
|
+
for clade in gt.get_nonterminals():
|
|
427
|
+
tips = frozenset(
|
|
428
|
+
t.name for t in clade.get_terminals()
|
|
429
|
+
if t.name in all_taxa_fs
|
|
430
|
+
)
|
|
431
|
+
if len(tips) <= 1 or tips == all_taxa_fs:
|
|
432
|
+
continue
|
|
433
|
+
splits.add(self._canonical_split(tips, all_taxa_fs))
|
|
434
|
+
gene_tree_splits.append(splits)
|
|
435
|
+
|
|
436
|
+
result = {}
|
|
437
|
+
for clade in species_tree.find_clades(order="preorder"):
|
|
438
|
+
if clade.is_terminal():
|
|
439
|
+
continue
|
|
440
|
+
groups = self._get_four_groups(
|
|
441
|
+
species_tree, clade, parent_map, all_taxa_fs
|
|
442
|
+
)
|
|
443
|
+
if groups is None:
|
|
444
|
+
continue
|
|
445
|
+
C1, C2, S, D = groups
|
|
446
|
+
|
|
447
|
+
concordant_bp = self._canonical_split(C1 | C2, all_taxa_fs)
|
|
448
|
+
nni_alt1_bp = self._canonical_split(S | C2, all_taxa_fs)
|
|
449
|
+
nni_alt2_bp = self._canonical_split(C1 | S, all_taxa_fs)
|
|
450
|
+
|
|
451
|
+
n_concordant = sum(1 for splits in gene_tree_splits if concordant_bp in splits)
|
|
452
|
+
n_alt1 = sum(1 for splits in gene_tree_splits if nni_alt1_bp in splits)
|
|
453
|
+
n_alt2 = sum(1 for splits in gene_tree_splits if nni_alt2_bp in splits)
|
|
454
|
+
|
|
455
|
+
node_tips = frozenset(t.name for t in clade.get_terminals())
|
|
456
|
+
split_label = (
|
|
457
|
+
sorted(node_tips)
|
|
458
|
+
if len(node_tips) <= len(all_taxa_fs) - len(node_tips)
|
|
459
|
+
else sorted(all_taxa_fs - node_tips)
|
|
460
|
+
)
|
|
461
|
+
branch_key = ",".join(split_label)
|
|
462
|
+
result[branch_key] = dict(
|
|
463
|
+
split=split_label,
|
|
464
|
+
n_concordant=n_concordant,
|
|
465
|
+
n_alt1=n_alt1,
|
|
466
|
+
n_alt2=n_alt2,
|
|
467
|
+
)
|
|
468
|
+
return result
|
|
469
|
+
|
|
470
|
+
# ------------------------------------------------------------------
|
|
471
|
+
# Statistical testing
|
|
472
|
+
# ------------------------------------------------------------------
|
|
473
|
+
|
|
474
|
+
def _test_asymmetry(self, n_alt1: int, n_alt2: int) -> Dict:
|
|
475
|
+
"""Run a two-sided binomial test on the two NNI alternatives.
|
|
476
|
+
|
|
477
|
+
Returns a dict with asymmetry_ratio, p_value, and favored_alt.
|
|
478
|
+
"""
|
|
479
|
+
total = n_alt1 + n_alt2
|
|
480
|
+
if total == 0:
|
|
481
|
+
return dict(
|
|
482
|
+
asymmetry_ratio=None,
|
|
483
|
+
p_value=None,
|
|
484
|
+
favored_alt=None,
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
asymmetry_ratio = max(n_alt1, n_alt2) / total
|
|
488
|
+
result = binomtest(n_alt1, total, p=0.5, alternative='two-sided')
|
|
489
|
+
p_value = result.pvalue
|
|
490
|
+
|
|
491
|
+
if n_alt1 > n_alt2:
|
|
492
|
+
favored_alt = "alt1"
|
|
493
|
+
elif n_alt2 > n_alt1:
|
|
494
|
+
favored_alt = "alt2"
|
|
495
|
+
else:
|
|
496
|
+
favored_alt = None
|
|
497
|
+
|
|
498
|
+
return dict(
|
|
499
|
+
asymmetry_ratio=asymmetry_ratio,
|
|
500
|
+
p_value=p_value,
|
|
501
|
+
favored_alt=favored_alt,
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
@staticmethod
|
|
505
|
+
def _fdr(p_values: List[float]) -> List[float]:
|
|
506
|
+
"""Benjamini-Hochberg FDR correction."""
|
|
507
|
+
n = len(p_values)
|
|
508
|
+
if n == 0:
|
|
509
|
+
return []
|
|
510
|
+
indexed = sorted(enumerate(p_values), key=lambda x: x[1])
|
|
511
|
+
corrected = [0.0] * n
|
|
512
|
+
prev = 1.0
|
|
513
|
+
for rank_minus_1 in range(n - 1, -1, -1):
|
|
514
|
+
orig_idx, p = indexed[rank_minus_1]
|
|
515
|
+
rank = rank_minus_1 + 1
|
|
516
|
+
adjusted = min(p * n / rank, prev)
|
|
517
|
+
adjusted = min(adjusted, 1.0)
|
|
518
|
+
corrected[orig_idx] = adjusted
|
|
519
|
+
prev = adjusted
|
|
520
|
+
return corrected
|