phykit 2.1.62__tar.gz → 2.1.64__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phykit-2.1.62 → phykit-2.1.64}/PKG-INFO +1 -1
- {phykit-2.1.62 → phykit-2.1.64}/phykit/cli_registry.py +5 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/phykit.py +203 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/service_factories.py +2 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/__init__.py +1 -0
- phykit-2.1.64/phykit/services/alignment/dstatistic.py +224 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/__init__.py +1 -0
- phykit-2.1.64/phykit/services/tree/trait_rate_map.py +658 -0
- phykit-2.1.64/phykit/version.py +1 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit.egg-info/PKG-INFO +1 -1
- {phykit-2.1.62 → phykit-2.1.64}/phykit.egg-info/SOURCES.txt +2 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit.egg-info/entry_points.txt +6 -0
- {phykit-2.1.62 → phykit-2.1.64}/setup.py +6 -0
- phykit-2.1.62/phykit/version.py +0 -1
- {phykit-2.1.62 → phykit-2.1.64}/LICENSE.md +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/README.md +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/__init__.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/__main__.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/errors.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/__init__.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/boolean_argument_parsing.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/caching.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/circular_layout.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/color_annotations.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/discrete_models.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/files.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/json_output.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/parallel.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/parsimony_utils.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/plot_config.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/quartet_utils.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/stats_summary.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/streaming.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/__init__.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/alignment_entropy.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/alignment_length.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/alignment_length_no_gaps.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/alignment_outlier_taxa.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/alignment_recoding.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/alignment_subsample.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/base.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/column_score.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/composition_per_taxon.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/compositional_bias_per_site.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/create_concatenation_matrix.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/dna_threader.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/evolutionary_rate_per_site.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/faidx.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/gc_content.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/identity_matrix.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/mask_alignment.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/occupancy_per_taxon.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/pairwise_identity.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/parsimony_informative_sites.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/plot_alignment_qc.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/rcv.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/rcvt.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/rename_fasta_entries.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/sum_of_pairs_score.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/variable_sites.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/base.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/ancestral_reconstruction.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/base.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/bipartition_support_stats.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/branch_length_multiplier.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/character_map.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/collapse_branches.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/concordance_asr.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/consensus_network.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/consensus_tree.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/cont_map.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/cophylo.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/covarying_evolutionary_rates.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/density_map.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/discordance_asymmetry.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/dvmc.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/evo_tempo_map.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/evolutionary_rate.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/fit_continuous.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/fit_discrete.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/hidden_paralogy_check.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/independent_contrasts.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/internal_branch_stats.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/internode_labeler.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/kf_distance.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/last_common_ancestor_subtree.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/lb_score.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/ltt.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/monophyly_check.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/nearest_neighbor_interchange.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/network_signal.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/ou_shift_detection.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/ouwie.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/parsimony_score.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/patristic_distances.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/phenogram.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/phylo_heatmap.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/phylogenetic_glm.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/phylogenetic_ordination.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/phylogenetic_regression.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/phylogenetic_signal.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/phylomorphospace.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/polytomy_test.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/print_tree.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/prune_tree.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/quartet_network.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/quartet_pie.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/rate_heterogeneity.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/relative_rate_test.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/rename_tree_tips.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/rf_distance.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/root_tree.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/saturation.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/spectral_discordance.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/spurious_sequence.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/stochastic_character_map.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/terminal_branch_stats.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/threshold_model.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/tip_labels.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/tip_to_tip_distance.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/tip_to_tip_node_distance.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/total_tree_length.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/trait_correlation.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/tree_space.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/treeness.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/treeness_over_rcv.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/vcv_utils.py +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit.egg-info/dependency_links.txt +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit.egg-info/requires.txt +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/phykit.egg-info/top_level.txt +0 -0
- {phykit-2.1.62 → phykit-2.1.64}/setup.cfg +0 -0
|
@@ -21,6 +21,8 @@ ALIAS_TO_HANDLER: Dict[str, str] = {
|
|
|
21
21
|
"recode": "alignment_recoding",
|
|
22
22
|
"outlier_taxa": "alignment_outlier_taxa",
|
|
23
23
|
"aot": "alignment_outlier_taxa",
|
|
24
|
+
"dstat": "dstatistic",
|
|
25
|
+
"abba_baba": "dstatistic",
|
|
24
26
|
"cs": "column_score",
|
|
25
27
|
"comp_bias_per_site": "compositional_bias_per_site",
|
|
26
28
|
"cbps": "compositional_bias_per_site",
|
|
@@ -182,6 +184,9 @@ ALIAS_TO_HANDLER: Dict[str, str] = {
|
|
|
182
184
|
"trait_correlation": "trait_correlation",
|
|
183
185
|
"trait_corr": "trait_correlation",
|
|
184
186
|
"phylo_corr": "trait_correlation",
|
|
187
|
+
"trait_rate_map": "trait_rate_map",
|
|
188
|
+
"rate_map": "trait_rate_map",
|
|
189
|
+
"branch_rates": "trait_rate_map",
|
|
185
190
|
"tree_space": "tree_space",
|
|
186
191
|
"tspace": "tree_space",
|
|
187
192
|
"tree_landscape": "tree_space",
|
|
@@ -119,6 +119,9 @@ class Phykit:
|
|
|
119
119
|
- recode alignments using reduced character schemes
|
|
120
120
|
alignment_subsample (alias: aln_subsample; subsample)
|
|
121
121
|
- randomly subsample genes, partitions, or sites
|
|
122
|
+
dstatistic (alias: dstat; abba_baba)
|
|
123
|
+
- Patterson's D-statistic (ABBA-BABA test) for
|
|
124
|
+
detecting introgression/gene flow
|
|
122
125
|
alignment_outlier_taxa (alias: outlier_taxa; aot)
|
|
123
126
|
- identify potential outlier taxa and why they were flagged
|
|
124
127
|
column_score (alias: cs)
|
|
@@ -234,6 +237,8 @@ class Phykit:
|
|
|
234
237
|
- stochastic character mapping (SIMMAP) of discrete traits
|
|
235
238
|
cont_map (alias: contmap; cmap)
|
|
236
239
|
- continuous trait map (contMap) visualization on a phylogeny
|
|
240
|
+
trait_rate_map (alias: rate_map; branch_rates)
|
|
241
|
+
- per-branch evolutionary rate map for a continuous trait
|
|
237
242
|
density_map (alias: densitymap; dmap)
|
|
238
243
|
- density map of posterior state probabilities on a phylogeny
|
|
239
244
|
cophylo (alias: tanglegram; tangle)
|
|
@@ -1943,6 +1948,73 @@ class Phykit:
|
|
|
1943
1948
|
_add_json_argument(parser)
|
|
1944
1949
|
_run_service(parser, argv, AlignmentSubsample)
|
|
1945
1950
|
|
|
1951
|
+
@staticmethod
|
|
1952
|
+
def dstatistic(argv):
|
|
1953
|
+
parser = _new_parser(
|
|
1954
|
+
description=textwrap.dedent(
|
|
1955
|
+
f"""\
|
|
1956
|
+
{help_header}
|
|
1957
|
+
|
|
1958
|
+
Compute Patterson's D-statistic (ABBA-BABA test) for
|
|
1959
|
+
detecting introgression or gene flow from a four-taxon
|
|
1960
|
+
alignment.
|
|
1961
|
+
|
|
1962
|
+
The assumed topology is (((P1, P2), P3), Outgroup).
|
|
1963
|
+
Under incomplete lineage sorting (ILS) alone, ABBA and
|
|
1964
|
+
BABA site patterns should be equally frequent. A
|
|
1965
|
+
significant excess of either pattern indicates gene
|
|
1966
|
+
flow.
|
|
1967
|
+
|
|
1968
|
+
D > 0 suggests gene flow between P2 and P3.
|
|
1969
|
+
D < 0 suggests gene flow between P1 and P3.
|
|
1970
|
+
D = 0 is consistent with ILS alone.
|
|
1971
|
+
|
|
1972
|
+
Significance is assessed via block jackknife (Green
|
|
1973
|
+
et al. 2010; Patterson et al. 2012).
|
|
1974
|
+
|
|
1975
|
+
Aliases:
|
|
1976
|
+
dstatistic, dstat, abba_baba
|
|
1977
|
+
Command line interfaces:
|
|
1978
|
+
pk_dstatistic, pk_dstat, pk_abba_baba
|
|
1979
|
+
|
|
1980
|
+
Usage:
|
|
1981
|
+
phykit dstatistic -a <alignment> --p1 <taxon> --p2 <taxon>
|
|
1982
|
+
--p3 <taxon> --outgroup <taxon>
|
|
1983
|
+
[--block-size 100] [--json]
|
|
1984
|
+
|
|
1985
|
+
Options
|
|
1986
|
+
=====================================================
|
|
1987
|
+
-a/--alignment FASTA alignment file
|
|
1988
|
+
|
|
1989
|
+
--p1 taxon name for P1
|
|
1990
|
+
(sister to P2)
|
|
1991
|
+
|
|
1992
|
+
--p2 taxon name for P2
|
|
1993
|
+
(sister to P1; potential
|
|
1994
|
+
recipient of gene flow)
|
|
1995
|
+
|
|
1996
|
+
--p3 taxon name for P3
|
|
1997
|
+
(donor lineage)
|
|
1998
|
+
|
|
1999
|
+
--outgroup outgroup taxon name
|
|
2000
|
+
|
|
2001
|
+
--block-size block size for jackknife
|
|
2002
|
+
estimation of standard
|
|
2003
|
+
error (default: 100)
|
|
2004
|
+
|
|
2005
|
+
--json output results as JSON
|
|
2006
|
+
"""
|
|
2007
|
+
),
|
|
2008
|
+
)
|
|
2009
|
+
parser.add_argument("-a", "--alignment", type=str, required=True, help=SUPPRESS, metavar="")
|
|
2010
|
+
parser.add_argument("--p1", type=str, required=True, help=SUPPRESS, metavar="")
|
|
2011
|
+
parser.add_argument("--p2", type=str, required=True, help=SUPPRESS, metavar="")
|
|
2012
|
+
parser.add_argument("--p3", type=str, required=True, help=SUPPRESS, metavar="")
|
|
2013
|
+
parser.add_argument("--outgroup", type=str, required=True, help=SUPPRESS, metavar="")
|
|
2014
|
+
parser.add_argument("--block-size", type=int, default=100, help=SUPPRESS, metavar="")
|
|
2015
|
+
_add_json_argument(parser)
|
|
2016
|
+
_run_service(parser, argv, Dstatistic)
|
|
2017
|
+
|
|
1946
2018
|
## Tree functions
|
|
1947
2019
|
@staticmethod
|
|
1948
2020
|
def parsimony_score(argv):
|
|
@@ -7353,6 +7425,129 @@ class Phykit:
|
|
|
7353
7425
|
_add_json_argument(parser)
|
|
7354
7426
|
_run_service(parser, argv, SpectralDiscordance)
|
|
7355
7427
|
|
|
7428
|
+
@staticmethod
|
|
7429
|
+
def trait_rate_map(argv):
|
|
7430
|
+
parser = _new_parser(
|
|
7431
|
+
description=textwrap.dedent(
|
|
7432
|
+
f"""\
|
|
7433
|
+
{help_header}
|
|
7434
|
+
|
|
7435
|
+
Trait Rate Map — estimate per-branch evolutionary
|
|
7436
|
+
rates for a continuous trait and display them as a
|
|
7437
|
+
branch-colored phylogram.
|
|
7438
|
+
|
|
7439
|
+
Ancestral states are reconstructed via Felsenstein's
|
|
7440
|
+
weighted-average method (inverse-branch-length
|
|
7441
|
+
weighting, postorder traversal). Per-branch rate is
|
|
7442
|
+
the squared standardized contrast:
|
|
7443
|
+
rate = (child_val - parent_val)^2 / branch_length
|
|
7444
|
+
|
|
7445
|
+
Input is a phylogenetic tree and either:
|
|
7446
|
+
(a) a two-column TSV (taxon<tab>value, no header), or
|
|
7447
|
+
(b) a multi-column TSV with header (use --trait to
|
|
7448
|
+
select a column)
|
|
7449
|
+
|
|
7450
|
+
Aliases:
|
|
7451
|
+
trait_rate_map, rate_map, branch_rates
|
|
7452
|
+
Command line interfaces:
|
|
7453
|
+
pk_trait_rate_map, pk_rate_map, pk_branch_rates
|
|
7454
|
+
|
|
7455
|
+
Usage:
|
|
7456
|
+
phykit trait_rate_map -t <tree> -d <trait_data> -o <output>
|
|
7457
|
+
[--trait <column>] [--json]
|
|
7458
|
+
[--fig-width <float>] [--fig-height <float>]
|
|
7459
|
+
[--dpi <int>] [--no-title] [--title <str>]
|
|
7460
|
+
[--legend-position <str>]
|
|
7461
|
+
[--ylabel-fontsize <float>] [--xlabel-fontsize <float>]
|
|
7462
|
+
[--title-fontsize <float>] [--axis-fontsize <float>]
|
|
7463
|
+
[--colors <str>] [--ladderize] [--cladogram] [--circular] [--color-file <file>]
|
|
7464
|
+
|
|
7465
|
+
Options
|
|
7466
|
+
=====================================================
|
|
7467
|
+
-t/--tree a tree file
|
|
7468
|
+
|
|
7469
|
+
-d/--trait_data tab-delimited trait file
|
|
7470
|
+
(two-column: taxon<tab>value,
|
|
7471
|
+
no header; or multi-column
|
|
7472
|
+
with header when --trait is
|
|
7473
|
+
used)
|
|
7474
|
+
|
|
7475
|
+
-o/--output output plot file path
|
|
7476
|
+
(required)
|
|
7477
|
+
|
|
7478
|
+
--trait column name to use from a
|
|
7479
|
+
multi-column trait file
|
|
7480
|
+
(if omitted, two-column
|
|
7481
|
+
format is expected)
|
|
7482
|
+
|
|
7483
|
+
--fig-width figure width in inches
|
|
7484
|
+
(auto-scaled if omitted)
|
|
7485
|
+
|
|
7486
|
+
--fig-height figure height in inches
|
|
7487
|
+
(auto-scaled if omitted)
|
|
7488
|
+
|
|
7489
|
+
--dpi resolution in DPI
|
|
7490
|
+
(default: 300)
|
|
7491
|
+
|
|
7492
|
+
--no-title hide the plot title
|
|
7493
|
+
|
|
7494
|
+
--title custom title text
|
|
7495
|
+
|
|
7496
|
+
--legend-position legend location (e.g.,
|
|
7497
|
+
"upper right", "none")
|
|
7498
|
+
|
|
7499
|
+
--ylabel-fontsize font size for y-axis labels;
|
|
7500
|
+
0 to hide
|
|
7501
|
+
|
|
7502
|
+
--xlabel-fontsize font size for x-axis labels;
|
|
7503
|
+
0 to hide
|
|
7504
|
+
|
|
7505
|
+
--title-fontsize font size for the title
|
|
7506
|
+
|
|
7507
|
+
--axis-fontsize font size for axis labels
|
|
7508
|
+
|
|
7509
|
+
--colors comma-separated colors
|
|
7510
|
+
(hex or named, e.g.,
|
|
7511
|
+
"#ff0000,blue,#00ff00")
|
|
7512
|
+
|
|
7513
|
+
--ladderize ladderize (sort) the tree
|
|
7514
|
+
before plotting
|
|
7515
|
+
|
|
7516
|
+
--cladogram draw cladogram (equal branch
|
|
7517
|
+
lengths, tips aligned)
|
|
7518
|
+
instead of phylogram
|
|
7519
|
+
|
|
7520
|
+
--circular draw circular (radial/fan)
|
|
7521
|
+
phylogram instead of
|
|
7522
|
+
rectangular
|
|
7523
|
+
|
|
7524
|
+
--color-file color annotation file for
|
|
7525
|
+
tip labels, clade ranges,
|
|
7526
|
+
and branch colors (iTOL-
|
|
7527
|
+
inspired TSV format)
|
|
7528
|
+
|
|
7529
|
+
--json optional argument to also
|
|
7530
|
+
output results as JSON
|
|
7531
|
+
"""
|
|
7532
|
+
),
|
|
7533
|
+
)
|
|
7534
|
+
parser.add_argument(
|
|
7535
|
+
"-t", "--tree", type=str, required=True, help=SUPPRESS, metavar=""
|
|
7536
|
+
)
|
|
7537
|
+
parser.add_argument(
|
|
7538
|
+
"-d", "--trait_data", type=str, required=True, help=SUPPRESS, metavar=""
|
|
7539
|
+
)
|
|
7540
|
+
parser.add_argument(
|
|
7541
|
+
"-o", "--output", type=str, required=True, help=SUPPRESS, metavar=""
|
|
7542
|
+
)
|
|
7543
|
+
parser.add_argument(
|
|
7544
|
+
"--trait", type=str, required=False, default=None,
|
|
7545
|
+
help=SUPPRESS, metavar=""
|
|
7546
|
+
)
|
|
7547
|
+
add_plot_arguments(parser)
|
|
7548
|
+
_add_json_argument(parser)
|
|
7549
|
+
_run_service(parser, argv, TraitRateMap)
|
|
7550
|
+
|
|
7356
7551
|
@staticmethod
|
|
7357
7552
|
def tree_space(argv):
|
|
7358
7553
|
parser = _new_parser(
|
|
@@ -7756,6 +7951,10 @@ def alignment_subsample(argv=None):
|
|
|
7756
7951
|
Phykit.alignment_subsample(sys.argv[1:])
|
|
7757
7952
|
|
|
7758
7953
|
|
|
7954
|
+
def dstatistic(argv=None):
|
|
7955
|
+
Phykit.dstatistic(sys.argv[1:])
|
|
7956
|
+
|
|
7957
|
+
|
|
7759
7958
|
# Tree-based functions
|
|
7760
7959
|
def parsimony_score(argv=None):
|
|
7761
7960
|
Phykit.parsimony_score(sys.argv[1:])
|
|
@@ -8029,3 +8228,7 @@ def spectral_discordance(argv=None):
|
|
|
8029
8228
|
|
|
8030
8229
|
def tree_space(argv=None):
|
|
8031
8230
|
Phykit.tree_space(sys.argv[1:])
|
|
8231
|
+
|
|
8232
|
+
|
|
8233
|
+
def trait_rate_map(argv=None):
|
|
8234
|
+
Phykit.trait_rate_map(sys.argv[1:])
|
|
@@ -29,6 +29,7 @@ CompositionalBiasPerSite = _LazyServiceFactory("phykit.services.alignment.compos
|
|
|
29
29
|
CompositionPerTaxon = _LazyServiceFactory("phykit.services.alignment.composition_per_taxon", "CompositionPerTaxon")
|
|
30
30
|
CreateConcatenationMatrix = _LazyServiceFactory("phykit.services.alignment.create_concatenation_matrix", "CreateConcatenationMatrix")
|
|
31
31
|
DNAThreader = _LazyServiceFactory("phykit.services.alignment.dna_threader", "DNAThreader")
|
|
32
|
+
Dstatistic = _LazyServiceFactory("phykit.services.alignment.dstatistic", "Dstatistic")
|
|
32
33
|
EvolutionaryRatePerSite = _LazyServiceFactory("phykit.services.alignment.evolutionary_rate_per_site", "EvolutionaryRatePerSite")
|
|
33
34
|
Faidx = _LazyServiceFactory("phykit.services.alignment.faidx", "Faidx")
|
|
34
35
|
GCContent = _LazyServiceFactory("phykit.services.alignment.gc_content", "GCContent")
|
|
@@ -109,6 +110,7 @@ EvoTempoMap = _LazyServiceFactory("phykit.services.tree.evo_tempo_map", "EvoTemp
|
|
|
109
110
|
DiscordanceAsymmetry = _LazyServiceFactory("phykit.services.tree.discordance_asymmetry", "DiscordanceAsymmetry")
|
|
110
111
|
SpectralDiscordance = _LazyServiceFactory("phykit.services.tree.spectral_discordance", "SpectralDiscordance")
|
|
111
112
|
TraitCorrelation = _LazyServiceFactory("phykit.services.tree.trait_correlation", "TraitCorrelation")
|
|
113
|
+
TraitRateMap = _LazyServiceFactory("phykit.services.tree.trait_rate_map", "TraitRateMap")
|
|
112
114
|
TreeSpace = _LazyServiceFactory("phykit.services.tree.tree_space", "TreeSpace")
|
|
113
115
|
|
|
114
116
|
SERVICE_FACTORIES: Dict[str, _LazyServiceFactory] = {
|
|
@@ -12,6 +12,7 @@ _EXPORTS = {
|
|
|
12
12
|
"CompositionPerTaxon": "composition_per_taxon",
|
|
13
13
|
"CreateConcatenationMatrix": "create_concatenation_matrix",
|
|
14
14
|
"DNAThreader": "dna_threader",
|
|
15
|
+
"Dstatistic": "dstatistic",
|
|
15
16
|
"EvolutionaryRatePerSite": "evolutionary_rate_per_site",
|
|
16
17
|
"Faidx": "faidx",
|
|
17
18
|
"GCContent": "gc_content",
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"""Patterson's D-statistic (ABBA-BABA test) for detecting introgression."""
|
|
2
|
+
|
|
3
|
+
from typing import Dict
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from Bio import SeqIO
|
|
7
|
+
|
|
8
|
+
from .base import Alignment
|
|
9
|
+
from ...helpers.json_output import print_json
|
|
10
|
+
from ...errors import PhykitUserError
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Dstatistic(Alignment):
|
|
14
|
+
def __init__(self, args) -> None:
|
|
15
|
+
parsed = self.process_args(args)
|
|
16
|
+
super().__init__(alignment_file_path=parsed["alignment_path"])
|
|
17
|
+
self.p1 = parsed["p1"]
|
|
18
|
+
self.p2 = parsed["p2"]
|
|
19
|
+
self.p3 = parsed["p3"]
|
|
20
|
+
self.outgroup = parsed["outgroup"]
|
|
21
|
+
self.block_size = parsed["block_size"]
|
|
22
|
+
self.json_output = parsed["json_output"]
|
|
23
|
+
|
|
24
|
+
def process_args(self, args) -> Dict[str, object]:
|
|
25
|
+
return dict(
|
|
26
|
+
alignment_path=args.alignment,
|
|
27
|
+
p1=args.p1,
|
|
28
|
+
p2=args.p2,
|
|
29
|
+
p3=args.p3,
|
|
30
|
+
outgroup=args.outgroup,
|
|
31
|
+
block_size=getattr(args, "block_size", 100),
|
|
32
|
+
json_output=getattr(args, "json", False),
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def run(self):
|
|
36
|
+
# Read alignment sequences
|
|
37
|
+
sequences = {}
|
|
38
|
+
for record in SeqIO.parse(self.alignment_file_path, "fasta"):
|
|
39
|
+
sequences[record.id] = str(record.seq).upper()
|
|
40
|
+
|
|
41
|
+
# Validate taxa are present
|
|
42
|
+
required = {"p1": self.p1, "p2": self.p2, "p3": self.p3, "outgroup": self.outgroup}
|
|
43
|
+
for label, taxon in required.items():
|
|
44
|
+
if taxon not in sequences:
|
|
45
|
+
raise PhykitUserError(
|
|
46
|
+
[f"Taxon '{taxon}' ({label}) not found in alignment. "
|
|
47
|
+
f"Available taxa: {', '.join(sorted(sequences.keys()))}"],
|
|
48
|
+
code=2,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
seq_p1 = sequences[self.p1]
|
|
52
|
+
seq_p2 = sequences[self.p2]
|
|
53
|
+
seq_p3 = sequences[self.p3]
|
|
54
|
+
seq_outgroup = sequences[self.outgroup]
|
|
55
|
+
|
|
56
|
+
# Validate equal lengths
|
|
57
|
+
lengths = {len(seq_p1), len(seq_p2), len(seq_p3), len(seq_outgroup)}
|
|
58
|
+
if len(lengths) != 1:
|
|
59
|
+
raise PhykitUserError(
|
|
60
|
+
["Sequences have different lengths. All sequences must be aligned."],
|
|
61
|
+
code=2,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
aln_length = len(seq_p1)
|
|
65
|
+
skip_chars = {"-", "N", "?", "X", "n", "x"}
|
|
66
|
+
|
|
67
|
+
# Count site patterns
|
|
68
|
+
abba_count = 0
|
|
69
|
+
baba_count = 0
|
|
70
|
+
|
|
71
|
+
for site in range(aln_length):
|
|
72
|
+
p1 = seq_p1[site]
|
|
73
|
+
p2 = seq_p2[site]
|
|
74
|
+
p3 = seq_p3[site]
|
|
75
|
+
o = seq_outgroup[site]
|
|
76
|
+
|
|
77
|
+
# Skip sites with gaps or ambiguous characters
|
|
78
|
+
if any(c in skip_chars for c in [p1, p2, p3, o]):
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
# Skip sites that are not biallelic
|
|
82
|
+
alleles = {p1, p2, p3, o}
|
|
83
|
+
if len(alleles) != 2:
|
|
84
|
+
continue
|
|
85
|
+
|
|
86
|
+
# ABBA: P1=ancestral, P2=derived, P3=derived, O=ancestral
|
|
87
|
+
if p1 == o and p2 != o and p3 != o and p2 == p3:
|
|
88
|
+
abba_count += 1
|
|
89
|
+
# BABA: P1=derived, P2=ancestral, P3=derived, O=ancestral
|
|
90
|
+
elif p2 == o and p1 != o and p3 != o and p1 == p3:
|
|
91
|
+
baba_count += 1
|
|
92
|
+
|
|
93
|
+
informative_sites = abba_count + baba_count
|
|
94
|
+
|
|
95
|
+
# Compute D-statistic
|
|
96
|
+
if informative_sites == 0:
|
|
97
|
+
d_stat = 0.0
|
|
98
|
+
else:
|
|
99
|
+
d_stat = (abba_count - baba_count) / informative_sites
|
|
100
|
+
|
|
101
|
+
# Block jackknife for significance
|
|
102
|
+
n_blocks = aln_length // self.block_size
|
|
103
|
+
se = None
|
|
104
|
+
z_score = None
|
|
105
|
+
p_value = None
|
|
106
|
+
|
|
107
|
+
if n_blocks >= 2:
|
|
108
|
+
block_abba = np.zeros(n_blocks)
|
|
109
|
+
block_baba = np.zeros(n_blocks)
|
|
110
|
+
|
|
111
|
+
for site in range(aln_length):
|
|
112
|
+
block_idx = site // self.block_size
|
|
113
|
+
if block_idx >= n_blocks:
|
|
114
|
+
break
|
|
115
|
+
|
|
116
|
+
p1 = seq_p1[site]
|
|
117
|
+
p2 = seq_p2[site]
|
|
118
|
+
p3 = seq_p3[site]
|
|
119
|
+
o = seq_outgroup[site]
|
|
120
|
+
|
|
121
|
+
if any(c in skip_chars for c in [p1, p2, p3, o]):
|
|
122
|
+
continue
|
|
123
|
+
alleles = {p1, p2, p3, o}
|
|
124
|
+
if len(alleles) != 2:
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
if p1 == o and p2 != o and p3 != o and p2 == p3:
|
|
128
|
+
block_abba[block_idx] += 1
|
|
129
|
+
elif p2 == o and p1 != o and p3 != o and p1 == p3:
|
|
130
|
+
block_baba[block_idx] += 1
|
|
131
|
+
|
|
132
|
+
total_abba = np.sum(block_abba)
|
|
133
|
+
total_baba = np.sum(block_baba)
|
|
134
|
+
|
|
135
|
+
jackknife_d = np.zeros(n_blocks)
|
|
136
|
+
for i in range(n_blocks):
|
|
137
|
+
loo_abba = total_abba - block_abba[i]
|
|
138
|
+
loo_baba = total_baba - block_baba[i]
|
|
139
|
+
denom = loo_abba + loo_baba
|
|
140
|
+
if denom > 0:
|
|
141
|
+
jackknife_d[i] = (loo_abba - loo_baba) / denom
|
|
142
|
+
else:
|
|
143
|
+
jackknife_d[i] = 0.0
|
|
144
|
+
|
|
145
|
+
mean_d = np.mean(jackknife_d)
|
|
146
|
+
se = float(np.sqrt((n_blocks - 1) / n_blocks * np.sum((jackknife_d - mean_d) ** 2)))
|
|
147
|
+
|
|
148
|
+
if se > 0:
|
|
149
|
+
z_score = d_stat / se
|
|
150
|
+
from scipy.stats import norm
|
|
151
|
+
p_value = float(2.0 * norm.sf(abs(z_score)))
|
|
152
|
+
else:
|
|
153
|
+
z_score = float('inf') if d_stat != 0 else 0.0
|
|
154
|
+
p_value = 0.0 if d_stat != 0 else 1.0
|
|
155
|
+
|
|
156
|
+
# Output
|
|
157
|
+
if self.json_output:
|
|
158
|
+
payload = {
|
|
159
|
+
"p1": self.p1,
|
|
160
|
+
"p2": self.p2,
|
|
161
|
+
"p3": self.p3,
|
|
162
|
+
"outgroup": self.outgroup,
|
|
163
|
+
"alignment_length": aln_length,
|
|
164
|
+
"informative_sites": informative_sites,
|
|
165
|
+
"abba_count": abba_count,
|
|
166
|
+
"baba_count": baba_count,
|
|
167
|
+
"d_statistic": round(d_stat, 4),
|
|
168
|
+
"block_size": self.block_size,
|
|
169
|
+
"n_blocks": n_blocks if n_blocks >= 2 else n_blocks,
|
|
170
|
+
"standard_error": round(se, 4) if se is not None else None,
|
|
171
|
+
"z_score": round(z_score, 2) if z_score is not None and z_score != float('inf') else z_score,
|
|
172
|
+
"p_value": round(p_value, 6) if p_value is not None else None,
|
|
173
|
+
}
|
|
174
|
+
print_json(payload, sort_keys=False)
|
|
175
|
+
return
|
|
176
|
+
|
|
177
|
+
try:
|
|
178
|
+
print("Patterson's D-statistic (ABBA-BABA Test)")
|
|
179
|
+
print("=========================================")
|
|
180
|
+
print(f"Topology: ((({self.p1}, {self.p2}), {self.p3}), {self.outgroup})")
|
|
181
|
+
print(f"P1: {self.p1}")
|
|
182
|
+
print(f"P2: {self.p2}")
|
|
183
|
+
print(f"P3: {self.p3}")
|
|
184
|
+
print(f"Outgroup: {self.outgroup}")
|
|
185
|
+
print()
|
|
186
|
+
print(f"Alignment length: {aln_length}")
|
|
187
|
+
print(f"Informative sites: {informative_sites}")
|
|
188
|
+
print(f"ABBA sites: {abba_count}")
|
|
189
|
+
print(f"BABA sites: {baba_count}")
|
|
190
|
+
print(f"D-statistic: {d_stat:.4f}")
|
|
191
|
+
|
|
192
|
+
if se is not None:
|
|
193
|
+
print(f"Block jackknife (block size: {self.block_size}):")
|
|
194
|
+
print(f" Standard error: {se:.4f}")
|
|
195
|
+
if z_score == float('inf'):
|
|
196
|
+
print(" Z-score: inf")
|
|
197
|
+
else:
|
|
198
|
+
print(f" Z-score: {z_score:.2f}")
|
|
199
|
+
print(f" p-value: {p_value:.6f}")
|
|
200
|
+
print()
|
|
201
|
+
print(f"Interpretation: {self._interpret(d_stat, p_value)}")
|
|
202
|
+
else:
|
|
203
|
+
print()
|
|
204
|
+
print("Not enough blocks for jackknife significance test.")
|
|
205
|
+
except BrokenPipeError:
|
|
206
|
+
pass
|
|
207
|
+
|
|
208
|
+
def _interpret(self, d_stat: float, p_value: float, alpha: float = 0.05) -> str:
|
|
209
|
+
if p_value < alpha:
|
|
210
|
+
if d_stat > 0:
|
|
211
|
+
return (
|
|
212
|
+
f"Significant excess of ABBA patterns (p < {alpha}) "
|
|
213
|
+
f"suggests introgression between P2 ({self.p2}) and "
|
|
214
|
+
f"P3 ({self.p3}). Note: D cannot determine the "
|
|
215
|
+
f"direction of gene flow."
|
|
216
|
+
)
|
|
217
|
+
else:
|
|
218
|
+
return (
|
|
219
|
+
f"Significant excess of BABA patterns (p < {alpha}) "
|
|
220
|
+
f"suggests introgression between P1 ({self.p1}) and "
|
|
221
|
+
f"P3 ({self.p3}). Note: D cannot determine the "
|
|
222
|
+
f"direction of gene flow."
|
|
223
|
+
)
|
|
224
|
+
return "No significant evidence of introgression (consistent with ILS)."
|