phykit 2.1.64__tar.gz → 2.1.67__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phykit-2.1.64 → phykit-2.1.67}/PKG-INFO +3 -2
- {phykit-2.1.64 → phykit-2.1.67}/phykit/cli_registry.py +2 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/phykit.py +122 -16
- {phykit-2.1.64 → phykit-2.1.67}/phykit/service_factories.py +1 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/__init__.py +1 -0
- phykit-2.1.67/phykit/services/alignment/dfoil.py +271 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/dstatistic.py +200 -4
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/quartet_pie.py +4 -2
- phykit-2.1.67/phykit/version.py +1 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit.egg-info/PKG-INFO +3 -2
- {phykit-2.1.64 → phykit-2.1.67}/phykit.egg-info/SOURCES.txt +1 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit.egg-info/entry_points.txt +2 -0
- {phykit-2.1.64 → phykit-2.1.67}/setup.py +4 -1
- phykit-2.1.64/phykit/version.py +0 -1
- {phykit-2.1.64 → phykit-2.1.67}/LICENSE.md +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/README.md +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/__init__.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/__main__.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/errors.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/__init__.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/boolean_argument_parsing.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/caching.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/circular_layout.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/color_annotations.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/discrete_models.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/files.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/json_output.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/parallel.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/parsimony_utils.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/plot_config.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/quartet_utils.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/stats_summary.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/streaming.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/__init__.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/alignment_entropy.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/alignment_length.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/alignment_length_no_gaps.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/alignment_outlier_taxa.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/alignment_recoding.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/alignment_subsample.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/base.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/column_score.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/composition_per_taxon.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/compositional_bias_per_site.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/create_concatenation_matrix.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/dna_threader.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/evolutionary_rate_per_site.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/faidx.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/gc_content.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/identity_matrix.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/mask_alignment.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/occupancy_per_taxon.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/pairwise_identity.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/parsimony_informative_sites.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/plot_alignment_qc.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/rcv.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/rcvt.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/rename_fasta_entries.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/sum_of_pairs_score.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/variable_sites.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/base.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/__init__.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/ancestral_reconstruction.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/base.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/bipartition_support_stats.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/branch_length_multiplier.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/character_map.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/collapse_branches.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/concordance_asr.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/consensus_network.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/consensus_tree.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/cont_map.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/cophylo.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/covarying_evolutionary_rates.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/density_map.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/discordance_asymmetry.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/dvmc.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/evo_tempo_map.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/evolutionary_rate.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/fit_continuous.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/fit_discrete.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/hidden_paralogy_check.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/independent_contrasts.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/internal_branch_stats.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/internode_labeler.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/kf_distance.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/last_common_ancestor_subtree.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/lb_score.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/ltt.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/monophyly_check.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/nearest_neighbor_interchange.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/network_signal.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/ou_shift_detection.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/ouwie.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/parsimony_score.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/patristic_distances.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/phenogram.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/phylo_heatmap.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/phylogenetic_glm.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/phylogenetic_ordination.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/phylogenetic_regression.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/phylogenetic_signal.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/phylomorphospace.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/polytomy_test.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/print_tree.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/prune_tree.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/quartet_network.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/rate_heterogeneity.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/relative_rate_test.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/rename_tree_tips.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/rf_distance.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/root_tree.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/saturation.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/spectral_discordance.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/spurious_sequence.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/stochastic_character_map.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/terminal_branch_stats.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/threshold_model.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/tip_labels.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/tip_to_tip_distance.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/tip_to_tip_node_distance.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/total_tree_length.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/trait_correlation.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/trait_rate_map.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/tree_space.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/treeness.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/treeness_over_rcv.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/vcv_utils.py +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit.egg-info/dependency_links.txt +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit.egg-info/requires.txt +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/phykit.egg-info/top_level.txt +0 -0
- {phykit-2.1.64 → phykit-2.1.67}/setup.cfg +0 -0
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: phykit
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.67
|
|
4
4
|
Home-page: https://github.com/jlsteenwyk/phykit
|
|
5
5
|
Author: Jacob L. Steenwyk
|
|
6
6
|
Author-email: jlsteenwyk@gmail.com
|
|
7
7
|
Classifier: Operating System :: OS Independent
|
|
8
8
|
Classifier: Intended Audience :: Science/Research
|
|
9
9
|
Classifier: Programming Language :: Python
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
11
|
Classifier: Programming Language :: Python :: 3.11
|
|
11
12
|
Classifier: Programming Language :: Python :: 3.12
|
|
12
13
|
Classifier: Programming Language :: Python :: 3.13
|
|
13
14
|
Classifier: Topic :: Scientific/Engineering
|
|
14
|
-
Requires-Python: >=3.
|
|
15
|
+
Requires-Python: >=3.10
|
|
15
16
|
Description-Content-Type: text/markdown
|
|
16
17
|
License-File: LICENSE.md
|
|
17
18
|
Requires-Dist: biopython>=1.82
|
|
@@ -23,6 +23,8 @@ ALIAS_TO_HANDLER: Dict[str, str] = {
|
|
|
23
23
|
"aot": "alignment_outlier_taxa",
|
|
24
24
|
"dstat": "dstatistic",
|
|
25
25
|
"abba_baba": "dstatistic",
|
|
26
|
+
"dfoil": "dfoil",
|
|
27
|
+
"dfoil_test": "dfoil",
|
|
26
28
|
"cs": "column_score",
|
|
27
29
|
"comp_bias_per_site": "compositional_bias_per_site",
|
|
28
30
|
"cbps": "compositional_bias_per_site",
|
|
@@ -122,6 +122,10 @@ class Phykit:
|
|
|
122
122
|
dstatistic (alias: dstat; abba_baba)
|
|
123
123
|
- Patterson's D-statistic (ABBA-BABA test) for
|
|
124
124
|
detecting introgression/gene flow
|
|
125
|
+
dfoil (alias: dfoil_test)
|
|
126
|
+
- DFOIL test (Pease & Hahn 2015) for detecting
|
|
127
|
+
and polarizing introgression in a 5-taxon
|
|
128
|
+
symmetric phylogeny
|
|
125
129
|
alignment_outlier_taxa (alias: outlier_taxa; aot)
|
|
126
130
|
- identify potential outlier taxa and why they were flagged
|
|
127
131
|
column_score (alias: cs)
|
|
@@ -1956,21 +1960,26 @@ class Phykit:
|
|
|
1956
1960
|
{help_header}
|
|
1957
1961
|
|
|
1958
1962
|
Compute Patterson's D-statistic (ABBA-BABA test) for
|
|
1959
|
-
detecting introgression or gene flow
|
|
1960
|
-
alignment.
|
|
1963
|
+
detecting introgression or gene flow.
|
|
1961
1964
|
|
|
1962
|
-
|
|
1963
|
-
|
|
1964
|
-
|
|
1965
|
-
significant excess of either pattern indicates gene
|
|
1966
|
-
flow.
|
|
1965
|
+
Two input modes:
|
|
1966
|
+
1) Site patterns from an alignment (-a)
|
|
1967
|
+
2) Quartet topologies from gene trees (-g)
|
|
1967
1968
|
|
|
1968
|
-
|
|
1969
|
-
|
|
1970
|
-
|
|
1969
|
+
Species topology: (((P1, P2), P3), Outgroup).
|
|
1970
|
+
Under ILS alone, ABBA and BABA patterns (or
|
|
1971
|
+
discordant topologies) are equally frequent. A
|
|
1972
|
+
significant excess indicates introgression.
|
|
1971
1973
|
|
|
1972
|
-
|
|
1973
|
-
|
|
1974
|
+
D > 0: introgression between P2 and P3.
|
|
1975
|
+
D < 0: introgression between P1 and P3.
|
|
1976
|
+
D = 0: consistent with ILS alone.
|
|
1977
|
+
Note: D identifies which lineages exchanged genes
|
|
1978
|
+
but cannot determine direction of flow.
|
|
1979
|
+
|
|
1980
|
+
Gene trees can have any number of taxa; only the
|
|
1981
|
+
quartet induced by the four specified taxa is
|
|
1982
|
+
evaluated from each tree.
|
|
1974
1983
|
|
|
1975
1984
|
Aliases:
|
|
1976
1985
|
dstatistic, dstat, abba_baba
|
|
@@ -1978,13 +1987,22 @@ class Phykit:
|
|
|
1978
1987
|
pk_dstatistic, pk_dstat, pk_abba_baba
|
|
1979
1988
|
|
|
1980
1989
|
Usage:
|
|
1981
|
-
phykit dstatistic -a <alignment> --p1 <taxon>
|
|
1982
|
-
--p3 <taxon> --outgroup <taxon>
|
|
1990
|
+
phykit dstatistic -a <alignment> --p1 <taxon>
|
|
1991
|
+
--p2 <taxon> --p3 <taxon> --outgroup <taxon>
|
|
1983
1992
|
[--block-size 100] [--json]
|
|
1993
|
+
phykit dstatistic -g <gene_trees> --p1 <taxon>
|
|
1994
|
+
--p2 <taxon> --p3 <taxon> --outgroup <taxon>
|
|
1995
|
+
[--json]
|
|
1984
1996
|
|
|
1985
1997
|
Options
|
|
1986
1998
|
=====================================================
|
|
1987
1999
|
-a/--alignment FASTA alignment file
|
|
2000
|
+
(site-pattern mode)
|
|
2001
|
+
|
|
2002
|
+
-g/--gene-trees gene trees file, one
|
|
2003
|
+
Newick per line (gene-
|
|
2004
|
+
tree mode; trees can
|
|
2005
|
+
have any number of taxa)
|
|
1988
2006
|
|
|
1989
2007
|
--p1 taxon name for P1
|
|
1990
2008
|
(sister to P2)
|
|
@@ -2000,21 +2018,96 @@ class Phykit:
|
|
|
2000
2018
|
|
|
2001
2019
|
--block-size block size for jackknife
|
|
2002
2020
|
estimation of standard
|
|
2003
|
-
error (default: 100
|
|
2021
|
+
error (default: 100;
|
|
2022
|
+
alignment mode only)
|
|
2023
|
+
|
|
2024
|
+
--support minimum branch support
|
|
2025
|
+
threshold for gene trees;
|
|
2026
|
+
branches below this value
|
|
2027
|
+
are collapsed (treated as
|
|
2028
|
+
unresolved). Gene-tree
|
|
2029
|
+
mode only.
|
|
2004
2030
|
|
|
2005
2031
|
--json output results as JSON
|
|
2006
2032
|
"""
|
|
2007
2033
|
),
|
|
2008
2034
|
)
|
|
2009
|
-
parser.add_argument("-a", "--alignment", type=str, required=
|
|
2035
|
+
parser.add_argument("-a", "--alignment", type=str, required=False, default=None, help=SUPPRESS, metavar="")
|
|
2036
|
+
parser.add_argument("-g", "--gene-trees", type=str, required=False, default=None, help=SUPPRESS, metavar="")
|
|
2010
2037
|
parser.add_argument("--p1", type=str, required=True, help=SUPPRESS, metavar="")
|
|
2011
2038
|
parser.add_argument("--p2", type=str, required=True, help=SUPPRESS, metavar="")
|
|
2012
2039
|
parser.add_argument("--p3", type=str, required=True, help=SUPPRESS, metavar="")
|
|
2013
2040
|
parser.add_argument("--outgroup", type=str, required=True, help=SUPPRESS, metavar="")
|
|
2014
2041
|
parser.add_argument("--block-size", type=int, default=100, help=SUPPRESS, metavar="")
|
|
2042
|
+
parser.add_argument("--support", type=float, default=None, help=SUPPRESS, metavar="")
|
|
2015
2043
|
_add_json_argument(parser)
|
|
2016
2044
|
_run_service(parser, argv, Dstatistic)
|
|
2017
2045
|
|
|
2046
|
+
@staticmethod
|
|
2047
|
+
def dfoil(argv):
|
|
2048
|
+
parser = _new_parser(
|
|
2049
|
+
description=textwrap.dedent(
|
|
2050
|
+
f"""\
|
|
2051
|
+
{help_header}
|
|
2052
|
+
|
|
2053
|
+
Compute DFOIL statistics (Pease & Hahn 2015) for
|
|
2054
|
+
detecting and polarizing introgression in a 5-taxon
|
|
2055
|
+
symmetric phylogeny.
|
|
2056
|
+
|
|
2057
|
+
Topology: ((P1, P2), (P3, P4), Outgroup)
|
|
2058
|
+
P1 and P2 are sister taxa; P3 and P4 are sister
|
|
2059
|
+
taxa; the two pairs are sister to each other with
|
|
2060
|
+
an outgroup rooting the tree.
|
|
2061
|
+
|
|
2062
|
+
Four D-statistics are computed:
|
|
2063
|
+
DFO (far-outer), DIL (inner-left),
|
|
2064
|
+
DFI (far-inner), DOL (outer-left)
|
|
2065
|
+
|
|
2066
|
+
The sign pattern of these four statistics maps to
|
|
2067
|
+
a specific introgression scenario via the lookup
|
|
2068
|
+
table from Pease & Hahn (2015).
|
|
2069
|
+
|
|
2070
|
+
Aliases:
|
|
2071
|
+
dfoil, dfoil_test
|
|
2072
|
+
Command line interfaces:
|
|
2073
|
+
pk_dfoil, pk_dfoil_test
|
|
2074
|
+
|
|
2075
|
+
Usage:
|
|
2076
|
+
phykit dfoil -a <alignment> --p1 <taxon>
|
|
2077
|
+
--p2 <taxon> --p3 <taxon> --p4 <taxon>
|
|
2078
|
+
--outgroup <taxon> [--json]
|
|
2079
|
+
|
|
2080
|
+
Options
|
|
2081
|
+
=====================================================
|
|
2082
|
+
-a/--alignment FASTA alignment file
|
|
2083
|
+
|
|
2084
|
+
--p1 taxon name for P1
|
|
2085
|
+
(sister to P2)
|
|
2086
|
+
|
|
2087
|
+
--p2 taxon name for P2
|
|
2088
|
+
(sister to P1)
|
|
2089
|
+
|
|
2090
|
+
--p3 taxon name for P3
|
|
2091
|
+
(sister to P4)
|
|
2092
|
+
|
|
2093
|
+
--p4 taxon name for P4
|
|
2094
|
+
(sister to P3)
|
|
2095
|
+
|
|
2096
|
+
--outgroup outgroup taxon name
|
|
2097
|
+
|
|
2098
|
+
--json output results as JSON
|
|
2099
|
+
"""
|
|
2100
|
+
),
|
|
2101
|
+
)
|
|
2102
|
+
parser.add_argument("-a", "--alignment", type=str, required=True, help=SUPPRESS, metavar="")
|
|
2103
|
+
parser.add_argument("--p1", type=str, required=True, help=SUPPRESS, metavar="")
|
|
2104
|
+
parser.add_argument("--p2", type=str, required=True, help=SUPPRESS, metavar="")
|
|
2105
|
+
parser.add_argument("--p3", type=str, required=True, help=SUPPRESS, metavar="")
|
|
2106
|
+
parser.add_argument("--p4", type=str, required=True, help=SUPPRESS, metavar="")
|
|
2107
|
+
parser.add_argument("--outgroup", type=str, required=True, help=SUPPRESS, metavar="")
|
|
2108
|
+
_add_json_argument(parser)
|
|
2109
|
+
_run_service(parser, argv, Dfoil)
|
|
2110
|
+
|
|
2018
2111
|
## Tree functions
|
|
2019
2112
|
@staticmethod
|
|
2020
2113
|
def parsimony_score(argv):
|
|
@@ -5556,6 +5649,11 @@ class Phykit:
|
|
|
5556
5649
|
--csv output per-branch concordance
|
|
5557
5650
|
values as a CSV file
|
|
5558
5651
|
|
|
5652
|
+
--pie-size scale factor for pie chart
|
|
5653
|
+
size (default: 1.0; use
|
|
5654
|
+
2.0 for double, 0.5 for
|
|
5655
|
+
half, etc.)
|
|
5656
|
+
|
|
5559
5657
|
--json optional argument to output
|
|
5560
5658
|
per-node concordance as JSON
|
|
5561
5659
|
"""
|
|
@@ -5578,6 +5676,10 @@ class Phykit:
|
|
|
5578
5676
|
"--csv", type=str, required=False, default=None,
|
|
5579
5677
|
help=SUPPRESS, metavar=""
|
|
5580
5678
|
)
|
|
5679
|
+
parser.add_argument(
|
|
5680
|
+
"--pie-size", type=float, required=False, default=1.0,
|
|
5681
|
+
help=SUPPRESS, metavar=""
|
|
5682
|
+
)
|
|
5581
5683
|
add_plot_arguments(parser)
|
|
5582
5684
|
_add_json_argument(parser)
|
|
5583
5685
|
_run_service(parser, argv, QuartetPie)
|
|
@@ -7955,6 +8057,10 @@ def dstatistic(argv=None):
|
|
|
7955
8057
|
Phykit.dstatistic(sys.argv[1:])
|
|
7956
8058
|
|
|
7957
8059
|
|
|
8060
|
+
def dfoil(argv=None):
|
|
8061
|
+
Phykit.dfoil(sys.argv[1:])
|
|
8062
|
+
|
|
8063
|
+
|
|
7958
8064
|
# Tree-based functions
|
|
7959
8065
|
def parsimony_score(argv=None):
|
|
7960
8066
|
Phykit.parsimony_score(sys.argv[1:])
|
|
@@ -30,6 +30,7 @@ CompositionPerTaxon = _LazyServiceFactory("phykit.services.alignment.composition
|
|
|
30
30
|
CreateConcatenationMatrix = _LazyServiceFactory("phykit.services.alignment.create_concatenation_matrix", "CreateConcatenationMatrix")
|
|
31
31
|
DNAThreader = _LazyServiceFactory("phykit.services.alignment.dna_threader", "DNAThreader")
|
|
32
32
|
Dstatistic = _LazyServiceFactory("phykit.services.alignment.dstatistic", "Dstatistic")
|
|
33
|
+
Dfoil = _LazyServiceFactory("phykit.services.alignment.dfoil", "Dfoil")
|
|
33
34
|
EvolutionaryRatePerSite = _LazyServiceFactory("phykit.services.alignment.evolutionary_rate_per_site", "EvolutionaryRatePerSite")
|
|
34
35
|
Faidx = _LazyServiceFactory("phykit.services.alignment.faidx", "Faidx")
|
|
35
36
|
GCContent = _LazyServiceFactory("phykit.services.alignment.gc_content", "GCContent")
|
|
@@ -13,6 +13,7 @@ _EXPORTS = {
|
|
|
13
13
|
"CreateConcatenationMatrix": "create_concatenation_matrix",
|
|
14
14
|
"DNAThreader": "dna_threader",
|
|
15
15
|
"Dstatistic": "dstatistic",
|
|
16
|
+
"Dfoil": "dfoil",
|
|
16
17
|
"EvolutionaryRatePerSite": "evolutionary_rate_per_site",
|
|
17
18
|
"Faidx": "faidx",
|
|
18
19
|
"GCContent": "gc_content",
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
"""DFOIL test (Pease & Hahn 2015) for detecting and polarizing introgression
|
|
2
|
+
in a 5-taxon symmetric phylogeny.
|
|
3
|
+
|
|
4
|
+
Topology: ((P1, P2), (P3, P4), Outgroup)
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Dict
|
|
8
|
+
|
|
9
|
+
from Bio import SeqIO
|
|
10
|
+
|
|
11
|
+
from .base import Alignment
|
|
12
|
+
from ...helpers.json_output import print_json
|
|
13
|
+
from ...errors import PhykitUserError
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# All 16 binary site patterns for 5 taxa (P1, P2, P3, P4, Outgroup).
|
|
17
|
+
# A = matches outgroup (ancestral), B = differs (derived).
|
|
18
|
+
PATTERNS = [
|
|
19
|
+
'AAAAA', 'AAABA', 'AABAA', 'AABBA',
|
|
20
|
+
'ABAAA', 'ABABA', 'ABBAA', 'ABBBA',
|
|
21
|
+
'BAAAA', 'BAABA', 'BABAA', 'BABBA',
|
|
22
|
+
'BBAAA', 'BBABA', 'BBBAA', 'BBBBA',
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
# Invariant / uninformative patterns (all ancestral or all derived).
|
|
26
|
+
_UNINFORMATIVE = {'AAAAA', 'BBBBA'}
|
|
27
|
+
|
|
28
|
+
# Sign-pattern interpretation table (DFO, DIL, DFI, DOL).
|
|
29
|
+
INTERPRETATIONS = {
|
|
30
|
+
'+++0': 'Introgression: P1 -> P3 (or P3 -> P1)',
|
|
31
|
+
'--0+': 'Introgression: P1 -> P4 (or P4 -> P1)',
|
|
32
|
+
'++-0': 'Introgression: P2 -> P3 (or P3 -> P2)',
|
|
33
|
+
'--0-': 'Introgression: P2 -> P4 (or P4 -> P2)',
|
|
34
|
+
'+0++': 'Introgression: P3 -> P1 (or P1 -> P3)',
|
|
35
|
+
'-0++': 'Introgression: P4 -> P1 (or P1 -> P4)',
|
|
36
|
+
'0+--': 'Introgression: P3 -> P2 (or P2 -> P3)',
|
|
37
|
+
'0---': 'Introgression: P4 -> P2 (or P2 -> P4)',
|
|
38
|
+
'++00': 'Introgression: ancestor of (P1,P2) <-> P3',
|
|
39
|
+
'--00': 'Introgression: ancestor of (P1,P2) <-> P4',
|
|
40
|
+
'0000': 'No significant introgression detected',
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class Dfoil(Alignment):
|
|
45
|
+
def __init__(self, args) -> None:
|
|
46
|
+
parsed = self.process_args(args)
|
|
47
|
+
super().__init__(alignment_file_path=parsed["alignment_path"])
|
|
48
|
+
self.p1 = parsed["p1"]
|
|
49
|
+
self.p2 = parsed["p2"]
|
|
50
|
+
self.p3 = parsed["p3"]
|
|
51
|
+
self.p4 = parsed["p4"]
|
|
52
|
+
self.outgroup = parsed["outgroup"]
|
|
53
|
+
self.json_output = parsed["json_output"]
|
|
54
|
+
|
|
55
|
+
def process_args(self, args) -> Dict[str, object]:
|
|
56
|
+
return dict(
|
|
57
|
+
alignment_path=args.alignment,
|
|
58
|
+
p1=args.p1,
|
|
59
|
+
p2=args.p2,
|
|
60
|
+
p3=args.p3,
|
|
61
|
+
p4=args.p4,
|
|
62
|
+
outgroup=args.outgroup,
|
|
63
|
+
json_output=getattr(args, "json", False),
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def run(self):
|
|
67
|
+
# Read alignment sequences
|
|
68
|
+
sequences = {}
|
|
69
|
+
for record in SeqIO.parse(self.alignment_file_path, "fasta"):
|
|
70
|
+
sequences[record.id] = str(record.seq).upper()
|
|
71
|
+
|
|
72
|
+
# Validate taxa are present
|
|
73
|
+
required = {
|
|
74
|
+
"p1": self.p1,
|
|
75
|
+
"p2": self.p2,
|
|
76
|
+
"p3": self.p3,
|
|
77
|
+
"p4": self.p4,
|
|
78
|
+
"outgroup": self.outgroup,
|
|
79
|
+
}
|
|
80
|
+
for label, taxon in required.items():
|
|
81
|
+
if taxon not in sequences:
|
|
82
|
+
raise PhykitUserError(
|
|
83
|
+
[f"Taxon '{taxon}' ({label}) not found in alignment. "
|
|
84
|
+
f"Available taxa: {', '.join(sorted(sequences.keys()))}"],
|
|
85
|
+
code=2,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
seq_p1 = sequences[self.p1]
|
|
89
|
+
seq_p2 = sequences[self.p2]
|
|
90
|
+
seq_p3 = sequences[self.p3]
|
|
91
|
+
seq_p4 = sequences[self.p4]
|
|
92
|
+
seq_o = sequences[self.outgroup]
|
|
93
|
+
|
|
94
|
+
# Validate equal lengths
|
|
95
|
+
lengths = {len(seq_p1), len(seq_p2), len(seq_p3), len(seq_p4), len(seq_o)}
|
|
96
|
+
if len(lengths) != 1:
|
|
97
|
+
raise PhykitUserError(
|
|
98
|
+
["Sequences have different lengths. All sequences must be aligned."],
|
|
99
|
+
code=2,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
aln_length = len(seq_p1)
|
|
103
|
+
skip_chars = {"-", "N", "?", "X", "n", "x"}
|
|
104
|
+
|
|
105
|
+
# Initialize pattern counts
|
|
106
|
+
counts: Dict[str, int] = {p: 0 for p in PATTERNS}
|
|
107
|
+
|
|
108
|
+
for site in range(aln_length):
|
|
109
|
+
p1 = seq_p1[site]
|
|
110
|
+
p2 = seq_p2[site]
|
|
111
|
+
p3 = seq_p3[site]
|
|
112
|
+
p4 = seq_p4[site]
|
|
113
|
+
o = seq_o[site]
|
|
114
|
+
|
|
115
|
+
# Skip sites with gaps or ambiguous characters
|
|
116
|
+
if any(c in skip_chars for c in [p1, p2, p3, p4, o]):
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
# Skip sites that are not biallelic
|
|
120
|
+
alleles = {p1, p2, p3, p4, o}
|
|
121
|
+
if len(alleles) != 2:
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
# Encode pattern: A if matches outgroup, B if differs
|
|
125
|
+
pattern = ''.join(
|
|
126
|
+
'A' if c == o else 'B'
|
|
127
|
+
for c in [p1, p2, p3, p4, o]
|
|
128
|
+
)
|
|
129
|
+
counts[pattern] += 1
|
|
130
|
+
|
|
131
|
+
# Count informative sites (exclude AAAAA and BBBBA)
|
|
132
|
+
informative_sites = sum(
|
|
133
|
+
v for k, v in counts.items() if k not in _UNINFORMATIVE
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Compute the four D-statistics
|
|
137
|
+
dfo_left = counts['AAABA'] + counts['ABABA'] + counts['BABAA'] + counts['BBBAA']
|
|
138
|
+
dfo_right = counts['AABAA'] + counts['ABBAA'] + counts['BAABA'] + counts['BBABA']
|
|
139
|
+
|
|
140
|
+
dil_left = counts['AAABA'] + counts['ABBAA'] + counts['BAABA'] + counts['BBBAA']
|
|
141
|
+
dil_right = counts['AABAA'] + counts['ABABA'] + counts['BABAA'] + counts['BBABA']
|
|
142
|
+
|
|
143
|
+
dfi_left = counts['ABAAA'] + counts['ABABA'] + counts['BABAA'] + counts['BABBA']
|
|
144
|
+
dfi_right = counts['BAAAA'] + counts['ABBAA'] + counts['BAABA'] + counts['ABBBA']
|
|
145
|
+
|
|
146
|
+
dol_left = counts['ABAAA'] + counts['ABBAA'] + counts['BAABA'] + counts['BABBA']
|
|
147
|
+
dol_right = counts['BAAAA'] + counts['ABABA'] + counts['BABAA'] + counts['ABBBA']
|
|
148
|
+
|
|
149
|
+
DFO = (dfo_left - dfo_right) / (dfo_left + dfo_right) if (dfo_left + dfo_right) > 0 else 0.0
|
|
150
|
+
DIL = (dil_left - dil_right) / (dil_left + dil_right) if (dil_left + dil_right) > 0 else 0.0
|
|
151
|
+
DFI = (dfi_left - dfi_right) / (dfi_left + dfi_right) if (dfi_left + dfi_right) > 0 else 0.0
|
|
152
|
+
DOL = (dol_left - dol_right) / (dol_left + dol_right) if (dol_left + dol_right) > 0 else 0.0
|
|
153
|
+
|
|
154
|
+
# Chi-squared significance tests (1 df)
|
|
155
|
+
from scipy.stats import chi2
|
|
156
|
+
|
|
157
|
+
def _chi2_test(left, right):
|
|
158
|
+
total = left + right
|
|
159
|
+
if total == 0:
|
|
160
|
+
return 0.0, 1.0
|
|
161
|
+
chi2_stat = (left - right) ** 2 / total
|
|
162
|
+
p_value = float(chi2.sf(chi2_stat, df=1))
|
|
163
|
+
return float(chi2_stat), p_value
|
|
164
|
+
|
|
165
|
+
dfo_chi2, dfo_p = _chi2_test(dfo_left, dfo_right)
|
|
166
|
+
dil_chi2, dil_p = _chi2_test(dil_left, dil_right)
|
|
167
|
+
dfi_chi2, dfi_p = _chi2_test(dfi_left, dfi_right)
|
|
168
|
+
dol_chi2, dol_p = _chi2_test(dol_left, dol_right)
|
|
169
|
+
|
|
170
|
+
# Sign pattern
|
|
171
|
+
def _get_sign(d_value, p_value, alpha=0.05):
|
|
172
|
+
if p_value >= alpha:
|
|
173
|
+
return '0'
|
|
174
|
+
return '+' if d_value > 0 else '-'
|
|
175
|
+
|
|
176
|
+
sign_pattern = (
|
|
177
|
+
_get_sign(DFO, dfo_p)
|
|
178
|
+
+ _get_sign(DIL, dil_p)
|
|
179
|
+
+ _get_sign(DFI, dfi_p)
|
|
180
|
+
+ _get_sign(DOL, dol_p)
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
interpretation = INTERPRETATIONS.get(
|
|
184
|
+
sign_pattern, 'Ambiguous or complex introgression pattern'
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# Significance stars helper
|
|
188
|
+
def _stars(p):
|
|
189
|
+
if p < 0.001:
|
|
190
|
+
return ' ***'
|
|
191
|
+
elif p < 0.01:
|
|
192
|
+
return ' **'
|
|
193
|
+
elif p < 0.05:
|
|
194
|
+
return ' *'
|
|
195
|
+
return ''
|
|
196
|
+
|
|
197
|
+
# Output
|
|
198
|
+
if self.json_output:
|
|
199
|
+
# Pattern counts excluding AAAAA (always 'AAAAA' key exists)
|
|
200
|
+
pattern_counts = {k: v for k, v in counts.items() if k not in _UNINFORMATIVE}
|
|
201
|
+
|
|
202
|
+
payload = {
|
|
203
|
+
"p1": self.p1,
|
|
204
|
+
"p2": self.p2,
|
|
205
|
+
"p3": self.p3,
|
|
206
|
+
"p4": self.p4,
|
|
207
|
+
"outgroup": self.outgroup,
|
|
208
|
+
"alignment_length": aln_length,
|
|
209
|
+
"informative_sites": informative_sites,
|
|
210
|
+
"pattern_counts": pattern_counts,
|
|
211
|
+
"dfo": {
|
|
212
|
+
"value": round(DFO, 4),
|
|
213
|
+
"left": dfo_left,
|
|
214
|
+
"right": dfo_right,
|
|
215
|
+
"chi2": round(dfo_chi2, 4),
|
|
216
|
+
"p_value": round(dfo_p, 6),
|
|
217
|
+
},
|
|
218
|
+
"dil": {
|
|
219
|
+
"value": round(DIL, 4),
|
|
220
|
+
"left": dil_left,
|
|
221
|
+
"right": dil_right,
|
|
222
|
+
"chi2": round(dil_chi2, 4),
|
|
223
|
+
"p_value": round(dil_p, 6),
|
|
224
|
+
},
|
|
225
|
+
"dfi": {
|
|
226
|
+
"value": round(DFI, 4),
|
|
227
|
+
"left": dfi_left,
|
|
228
|
+
"right": dfi_right,
|
|
229
|
+
"chi2": round(dfi_chi2, 4),
|
|
230
|
+
"p_value": round(dfi_p, 6),
|
|
231
|
+
},
|
|
232
|
+
"dol": {
|
|
233
|
+
"value": round(DOL, 4),
|
|
234
|
+
"left": dol_left,
|
|
235
|
+
"right": dol_right,
|
|
236
|
+
"chi2": round(dol_chi2, 4),
|
|
237
|
+
"p_value": round(dol_p, 6),
|
|
238
|
+
},
|
|
239
|
+
"sign_pattern": sign_pattern,
|
|
240
|
+
"interpretation": interpretation,
|
|
241
|
+
}
|
|
242
|
+
print_json(payload, sort_keys=False)
|
|
243
|
+
return
|
|
244
|
+
|
|
245
|
+
try:
|
|
246
|
+
print("DFOIL Test (Pease & Hahn 2015)")
|
|
247
|
+
print("================================")
|
|
248
|
+
print(f"Topology: (({self.p1}, {self.p2}), ({self.p3}, {self.p4}), {self.outgroup})")
|
|
249
|
+
print(f"P1: {self.p1}, P2: {self.p2}, P3: {self.p3}, P4: {self.p4}, Outgroup: {self.outgroup}")
|
|
250
|
+
print()
|
|
251
|
+
print(f"Alignment length: {aln_length}")
|
|
252
|
+
print(f"Informative sites: {informative_sites}")
|
|
253
|
+
print()
|
|
254
|
+
print("Site pattern counts:")
|
|
255
|
+
# Print informative patterns in a compact layout
|
|
256
|
+
informative_patterns = [p for p in PATTERNS if p not in _UNINFORMATIVE]
|
|
257
|
+
for i in range(0, len(informative_patterns), 4):
|
|
258
|
+
chunk = informative_patterns[i:i + 4]
|
|
259
|
+
parts = [f"{p}: {counts[p]}" for p in chunk]
|
|
260
|
+
print(" " + " ".join(parts))
|
|
261
|
+
print()
|
|
262
|
+
print("D-statistics:")
|
|
263
|
+
print(f" DFO: {DFO:.4f} (p = {dfo_p:.6f}{_stars(dfo_p)})")
|
|
264
|
+
print(f" DIL: {DIL:.4f} (p = {dil_p:.6f}{_stars(dil_p)})")
|
|
265
|
+
print(f" DFI: {DFI:.4f} (p = {dfi_p:.6f}{_stars(dfi_p)})")
|
|
266
|
+
print(f" DOL: {DOL:.4f} (p = {dol_p:.6f}{_stars(dol_p)})")
|
|
267
|
+
print()
|
|
268
|
+
print(f"Sign pattern: {sign_pattern}")
|
|
269
|
+
print(f"Interpretation: {interpretation}")
|
|
270
|
+
except BrokenPipeError:
|
|
271
|
+
pass
|