phykit 2.1.63__tar.gz → 2.1.66__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. {phykit-2.1.63 → phykit-2.1.66}/PKG-INFO +3 -2
  2. {phykit-2.1.63 → phykit-2.1.66}/phykit/cli_registry.py +2 -0
  3. {phykit-2.1.63 → phykit-2.1.66}/phykit/phykit.py +107 -0
  4. {phykit-2.1.63 → phykit-2.1.66}/phykit/service_factories.py +1 -0
  5. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/__init__.py +1 -0
  6. phykit-2.1.66/phykit/services/alignment/dstatistic.py +420 -0
  7. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/quartet_pie.py +4 -2
  8. phykit-2.1.66/phykit/version.py +1 -0
  9. {phykit-2.1.63 → phykit-2.1.66}/phykit.egg-info/PKG-INFO +3 -2
  10. {phykit-2.1.63 → phykit-2.1.66}/phykit.egg-info/SOURCES.txt +1 -0
  11. {phykit-2.1.63 → phykit-2.1.66}/phykit.egg-info/entry_points.txt +3 -0
  12. {phykit-2.1.63 → phykit-2.1.66}/setup.py +5 -1
  13. phykit-2.1.63/phykit/version.py +0 -1
  14. {phykit-2.1.63 → phykit-2.1.66}/LICENSE.md +0 -0
  15. {phykit-2.1.63 → phykit-2.1.66}/README.md +0 -0
  16. {phykit-2.1.63 → phykit-2.1.66}/phykit/__init__.py +0 -0
  17. {phykit-2.1.63 → phykit-2.1.66}/phykit/__main__.py +0 -0
  18. {phykit-2.1.63 → phykit-2.1.66}/phykit/errors.py +0 -0
  19. {phykit-2.1.63 → phykit-2.1.66}/phykit/helpers/__init__.py +0 -0
  20. {phykit-2.1.63 → phykit-2.1.66}/phykit/helpers/boolean_argument_parsing.py +0 -0
  21. {phykit-2.1.63 → phykit-2.1.66}/phykit/helpers/caching.py +0 -0
  22. {phykit-2.1.63 → phykit-2.1.66}/phykit/helpers/circular_layout.py +0 -0
  23. {phykit-2.1.63 → phykit-2.1.66}/phykit/helpers/color_annotations.py +0 -0
  24. {phykit-2.1.63 → phykit-2.1.66}/phykit/helpers/discrete_models.py +0 -0
  25. {phykit-2.1.63 → phykit-2.1.66}/phykit/helpers/files.py +0 -0
  26. {phykit-2.1.63 → phykit-2.1.66}/phykit/helpers/json_output.py +0 -0
  27. {phykit-2.1.63 → phykit-2.1.66}/phykit/helpers/parallel.py +0 -0
  28. {phykit-2.1.63 → phykit-2.1.66}/phykit/helpers/parsimony_utils.py +0 -0
  29. {phykit-2.1.63 → phykit-2.1.66}/phykit/helpers/plot_config.py +0 -0
  30. {phykit-2.1.63 → phykit-2.1.66}/phykit/helpers/quartet_utils.py +0 -0
  31. {phykit-2.1.63 → phykit-2.1.66}/phykit/helpers/stats_summary.py +0 -0
  32. {phykit-2.1.63 → phykit-2.1.66}/phykit/helpers/streaming.py +0 -0
  33. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/__init__.py +0 -0
  34. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/alignment_entropy.py +0 -0
  35. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/alignment_length.py +0 -0
  36. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/alignment_length_no_gaps.py +0 -0
  37. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/alignment_outlier_taxa.py +0 -0
  38. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/alignment_recoding.py +0 -0
  39. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/alignment_subsample.py +0 -0
  40. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/base.py +0 -0
  41. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/column_score.py +0 -0
  42. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/composition_per_taxon.py +0 -0
  43. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/compositional_bias_per_site.py +0 -0
  44. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/create_concatenation_matrix.py +0 -0
  45. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/dna_threader.py +0 -0
  46. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/evolutionary_rate_per_site.py +0 -0
  47. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/faidx.py +0 -0
  48. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/gc_content.py +0 -0
  49. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/identity_matrix.py +0 -0
  50. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/mask_alignment.py +0 -0
  51. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/occupancy_per_taxon.py +0 -0
  52. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/pairwise_identity.py +0 -0
  53. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/parsimony_informative_sites.py +0 -0
  54. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/plot_alignment_qc.py +0 -0
  55. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/rcv.py +0 -0
  56. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/rcvt.py +0 -0
  57. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/rename_fasta_entries.py +0 -0
  58. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/sum_of_pairs_score.py +0 -0
  59. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/alignment/variable_sites.py +0 -0
  60. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/base.py +0 -0
  61. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/__init__.py +0 -0
  62. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/ancestral_reconstruction.py +0 -0
  63. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/base.py +0 -0
  64. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/bipartition_support_stats.py +0 -0
  65. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/branch_length_multiplier.py +0 -0
  66. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/character_map.py +0 -0
  67. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/collapse_branches.py +0 -0
  68. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/concordance_asr.py +0 -0
  69. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/consensus_network.py +0 -0
  70. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/consensus_tree.py +0 -0
  71. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/cont_map.py +0 -0
  72. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/cophylo.py +0 -0
  73. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/covarying_evolutionary_rates.py +0 -0
  74. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/density_map.py +0 -0
  75. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/discordance_asymmetry.py +0 -0
  76. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/dvmc.py +0 -0
  77. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/evo_tempo_map.py +0 -0
  78. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/evolutionary_rate.py +0 -0
  79. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/fit_continuous.py +0 -0
  80. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/fit_discrete.py +0 -0
  81. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/hidden_paralogy_check.py +0 -0
  82. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/independent_contrasts.py +0 -0
  83. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/internal_branch_stats.py +0 -0
  84. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/internode_labeler.py +0 -0
  85. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/kf_distance.py +0 -0
  86. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/last_common_ancestor_subtree.py +0 -0
  87. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/lb_score.py +0 -0
  88. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/ltt.py +0 -0
  89. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/monophyly_check.py +0 -0
  90. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/nearest_neighbor_interchange.py +0 -0
  91. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/network_signal.py +0 -0
  92. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/ou_shift_detection.py +0 -0
  93. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/ouwie.py +0 -0
  94. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/parsimony_score.py +0 -0
  95. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/patristic_distances.py +0 -0
  96. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/phenogram.py +0 -0
  97. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/phylo_heatmap.py +0 -0
  98. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/phylogenetic_glm.py +0 -0
  99. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/phylogenetic_ordination.py +0 -0
  100. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/phylogenetic_regression.py +0 -0
  101. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/phylogenetic_signal.py +0 -0
  102. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/phylomorphospace.py +0 -0
  103. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/polytomy_test.py +0 -0
  104. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/print_tree.py +0 -0
  105. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/prune_tree.py +0 -0
  106. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/quartet_network.py +0 -0
  107. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/rate_heterogeneity.py +0 -0
  108. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/relative_rate_test.py +0 -0
  109. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/rename_tree_tips.py +0 -0
  110. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/rf_distance.py +0 -0
  111. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/root_tree.py +0 -0
  112. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/saturation.py +0 -0
  113. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/spectral_discordance.py +0 -0
  114. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/spurious_sequence.py +0 -0
  115. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/stochastic_character_map.py +0 -0
  116. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/terminal_branch_stats.py +0 -0
  117. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/threshold_model.py +0 -0
  118. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/tip_labels.py +0 -0
  119. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/tip_to_tip_distance.py +0 -0
  120. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/tip_to_tip_node_distance.py +0 -0
  121. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/total_tree_length.py +0 -0
  122. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/trait_correlation.py +0 -0
  123. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/trait_rate_map.py +0 -0
  124. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/tree_space.py +0 -0
  125. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/treeness.py +0 -0
  126. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/treeness_over_rcv.py +0 -0
  127. {phykit-2.1.63 → phykit-2.1.66}/phykit/services/tree/vcv_utils.py +0 -0
  128. {phykit-2.1.63 → phykit-2.1.66}/phykit.egg-info/dependency_links.txt +0 -0
  129. {phykit-2.1.63 → phykit-2.1.66}/phykit.egg-info/requires.txt +0 -0
  130. {phykit-2.1.63 → phykit-2.1.66}/phykit.egg-info/top_level.txt +0 -0
  131. {phykit-2.1.63 → phykit-2.1.66}/setup.cfg +0 -0
@@ -1,17 +1,18 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: phykit
3
- Version: 2.1.63
3
+ Version: 2.1.66
4
4
  Home-page: https://github.com/jlsteenwyk/phykit
5
5
  Author: Jacob L. Steenwyk
6
6
  Author-email: jlsteenwyk@gmail.com
7
7
  Classifier: Operating System :: OS Independent
8
8
  Classifier: Intended Audience :: Science/Research
9
9
  Classifier: Programming Language :: Python
10
+ Classifier: Programming Language :: Python :: 3.10
10
11
  Classifier: Programming Language :: Python :: 3.11
11
12
  Classifier: Programming Language :: Python :: 3.12
12
13
  Classifier: Programming Language :: Python :: 3.13
13
14
  Classifier: Topic :: Scientific/Engineering
14
- Requires-Python: >=3.11
15
+ Requires-Python: >=3.10
15
16
  Description-Content-Type: text/markdown
16
17
  License-File: LICENSE.md
17
18
  Requires-Dist: biopython>=1.82
@@ -21,6 +21,8 @@ ALIAS_TO_HANDLER: Dict[str, str] = {
21
21
  "recode": "alignment_recoding",
22
22
  "outlier_taxa": "alignment_outlier_taxa",
23
23
  "aot": "alignment_outlier_taxa",
24
+ "dstat": "dstatistic",
25
+ "abba_baba": "dstatistic",
24
26
  "cs": "column_score",
25
27
  "comp_bias_per_site": "compositional_bias_per_site",
26
28
  "cbps": "compositional_bias_per_site",
@@ -119,6 +119,9 @@ class Phykit:
119
119
  - recode alignments using reduced character schemes
120
120
  alignment_subsample (alias: aln_subsample; subsample)
121
121
  - randomly subsample genes, partitions, or sites
122
+ dstatistic (alias: dstat; abba_baba)
123
+ - Patterson's D-statistic (ABBA-BABA test) for
124
+ detecting introgression/gene flow
122
125
  alignment_outlier_taxa (alias: outlier_taxa; aot)
123
126
  - identify potential outlier taxa and why they were flagged
124
127
  column_score (alias: cs)
@@ -1945,6 +1948,97 @@ class Phykit:
1945
1948
  _add_json_argument(parser)
1946
1949
  _run_service(parser, argv, AlignmentSubsample)
1947
1950
 
1951
+ @staticmethod
1952
+ def dstatistic(argv):
1953
+ parser = _new_parser(
1954
+ description=textwrap.dedent(
1955
+ f"""\
1956
+ {help_header}
1957
+
1958
+ Compute Patterson's D-statistic (ABBA-BABA test) for
1959
+ detecting introgression or gene flow.
1960
+
1961
+ Two input modes:
1962
+ 1) Site patterns from an alignment (-a)
1963
+ 2) Quartet topologies from gene trees (-g)
1964
+
1965
+ Species topology: (((P1, P2), P3), Outgroup).
1966
+ Under ILS alone, ABBA and BABA patterns (or
1967
+ discordant topologies) are equally frequent. A
1968
+ significant excess indicates introgression.
1969
+
1970
+ D > 0: introgression between P2 and P3.
1971
+ D < 0: introgression between P1 and P3.
1972
+ D = 0: consistent with ILS alone.
1973
+ Note: D identifies which lineages exchanged genes
1974
+ but cannot determine direction of flow.
1975
+
1976
+ Gene trees can have any number of taxa; only the
1977
+ quartet induced by the four specified taxa is
1978
+ evaluated from each tree.
1979
+
1980
+ Aliases:
1981
+ dstatistic, dstat, abba_baba
1982
+ Command line interfaces:
1983
+ pk_dstatistic, pk_dstat, pk_abba_baba
1984
+
1985
+ Usage:
1986
+ phykit dstatistic -a <alignment> --p1 <taxon>
1987
+ --p2 <taxon> --p3 <taxon> --outgroup <taxon>
1988
+ [--block-size 100] [--json]
1989
+ phykit dstatistic -g <gene_trees> --p1 <taxon>
1990
+ --p2 <taxon> --p3 <taxon> --outgroup <taxon>
1991
+ [--json]
1992
+
1993
+ Options
1994
+ =====================================================
1995
+ -a/--alignment FASTA alignment file
1996
+ (site-pattern mode)
1997
+
1998
+ -g/--gene-trees gene trees file, one
1999
+ Newick per line (gene-
2000
+ tree mode; trees can
2001
+ have any number of taxa)
2002
+
2003
+ --p1 taxon name for P1
2004
+ (sister to P2)
2005
+
2006
+ --p2 taxon name for P2
2007
+ (sister to P1; potential
2008
+ recipient of gene flow)
2009
+
2010
+ --p3 taxon name for P3
2011
+ (donor lineage)
2012
+
2013
+ --outgroup outgroup taxon name
2014
+
2015
+ --block-size block size for jackknife
2016
+ estimation of standard
2017
+ error (default: 100;
2018
+ alignment mode only)
2019
+
2020
+ --support minimum branch support
2021
+ threshold for gene trees;
2022
+ branches below this value
2023
+ are collapsed (treated as
2024
+ unresolved). Gene-tree
2025
+ mode only.
2026
+
2027
+ --json output results as JSON
2028
+ """
2029
+ ),
2030
+ )
2031
+ parser.add_argument("-a", "--alignment", type=str, required=False, default=None, help=SUPPRESS, metavar="")
2032
+ parser.add_argument("-g", "--gene-trees", type=str, required=False, default=None, help=SUPPRESS, metavar="")
2033
+ parser.add_argument("--p1", type=str, required=True, help=SUPPRESS, metavar="")
2034
+ parser.add_argument("--p2", type=str, required=True, help=SUPPRESS, metavar="")
2035
+ parser.add_argument("--p3", type=str, required=True, help=SUPPRESS, metavar="")
2036
+ parser.add_argument("--outgroup", type=str, required=True, help=SUPPRESS, metavar="")
2037
+ parser.add_argument("--block-size", type=int, default=100, help=SUPPRESS, metavar="")
2038
+ parser.add_argument("--support", type=float, default=None, help=SUPPRESS, metavar="")
2039
+ _add_json_argument(parser)
2040
+ _run_service(parser, argv, Dstatistic)
2041
+
1948
2042
  ## Tree functions
1949
2043
  @staticmethod
1950
2044
  def parsimony_score(argv):
@@ -5486,6 +5580,11 @@ class Phykit:
5486
5580
  --csv output per-branch concordance
5487
5581
  values as a CSV file
5488
5582
 
5583
+ --pie-size scale factor for pie chart
5584
+ size (default: 1.0; use
5585
+ 2.0 for double, 0.5 for
5586
+ half, etc.)
5587
+
5489
5588
  --json optional argument to output
5490
5589
  per-node concordance as JSON
5491
5590
  """
@@ -5508,6 +5607,10 @@ class Phykit:
5508
5607
  "--csv", type=str, required=False, default=None,
5509
5608
  help=SUPPRESS, metavar=""
5510
5609
  )
5610
+ parser.add_argument(
5611
+ "--pie-size", type=float, required=False, default=1.0,
5612
+ help=SUPPRESS, metavar=""
5613
+ )
5511
5614
  add_plot_arguments(parser)
5512
5615
  _add_json_argument(parser)
5513
5616
  _run_service(parser, argv, QuartetPie)
@@ -7881,6 +7984,10 @@ def alignment_subsample(argv=None):
7881
7984
  Phykit.alignment_subsample(sys.argv[1:])
7882
7985
 
7883
7986
 
7987
+ def dstatistic(argv=None):
7988
+ Phykit.dstatistic(sys.argv[1:])
7989
+
7990
+
7884
7991
  # Tree-based functions
7885
7992
  def parsimony_score(argv=None):
7886
7993
  Phykit.parsimony_score(sys.argv[1:])
@@ -29,6 +29,7 @@ CompositionalBiasPerSite = _LazyServiceFactory("phykit.services.alignment.compos
29
29
  CompositionPerTaxon = _LazyServiceFactory("phykit.services.alignment.composition_per_taxon", "CompositionPerTaxon")
30
30
  CreateConcatenationMatrix = _LazyServiceFactory("phykit.services.alignment.create_concatenation_matrix", "CreateConcatenationMatrix")
31
31
  DNAThreader = _LazyServiceFactory("phykit.services.alignment.dna_threader", "DNAThreader")
32
+ Dstatistic = _LazyServiceFactory("phykit.services.alignment.dstatistic", "Dstatistic")
32
33
  EvolutionaryRatePerSite = _LazyServiceFactory("phykit.services.alignment.evolutionary_rate_per_site", "EvolutionaryRatePerSite")
33
34
  Faidx = _LazyServiceFactory("phykit.services.alignment.faidx", "Faidx")
34
35
  GCContent = _LazyServiceFactory("phykit.services.alignment.gc_content", "GCContent")
@@ -12,6 +12,7 @@ _EXPORTS = {
12
12
  "CompositionPerTaxon": "composition_per_taxon",
13
13
  "CreateConcatenationMatrix": "create_concatenation_matrix",
14
14
  "DNAThreader": "dna_threader",
15
+ "Dstatistic": "dstatistic",
15
16
  "EvolutionaryRatePerSite": "evolutionary_rate_per_site",
16
17
  "Faidx": "faidx",
17
18
  "GCContent": "gc_content",
@@ -0,0 +1,420 @@
1
+ """Patterson's D-statistic (ABBA-BABA test) for detecting introgression.
2
+
3
+ Supports two modes:
4
+ 1) Site patterns from a FASTA alignment (-a)
5
+ 2) Quartet topologies from gene trees (-g)
6
+ """
7
+
8
+ from io import StringIO
9
+ from typing import Dict, List, Optional, Tuple
10
+
11
+ import numpy as np
12
+ from Bio import Phylo, SeqIO
13
+
14
+ from .base import Alignment
15
+ from ...helpers.json_output import print_json
16
+ from ...errors import PhykitUserError
17
+
18
+
19
+ class Dstatistic(Alignment):
20
+ def __init__(self, args) -> None:
21
+ parsed = self.process_args(args)
22
+ super().__init__(alignment_file_path=parsed["alignment_path"])
23
+ self.gene_trees_path = parsed["gene_trees_path"]
24
+ self.p1 = parsed["p1"]
25
+ self.p2 = parsed["p2"]
26
+ self.p3 = parsed["p3"]
27
+ self.outgroup = parsed["outgroup"]
28
+ self.block_size = parsed["block_size"]
29
+ self.support_threshold = parsed["support_threshold"]
30
+ self.json_output = parsed["json_output"]
31
+
32
+ def process_args(self, args) -> Dict[str, object]:
33
+ aln = getattr(args, "alignment", None)
34
+ gt = getattr(args, "gene_trees", None)
35
+ if aln is None and gt is None:
36
+ raise PhykitUserError(
37
+ ["Either -a/--alignment or -g/--gene-trees is required."],
38
+ code=2,
39
+ )
40
+ if aln is not None and gt is not None:
41
+ raise PhykitUserError(
42
+ ["-a/--alignment and -g/--gene-trees are mutually exclusive."],
43
+ code=2,
44
+ )
45
+ return dict(
46
+ alignment_path=aln,
47
+ gene_trees_path=gt,
48
+ p1=args.p1,
49
+ p2=args.p2,
50
+ p3=args.p3,
51
+ outgroup=args.outgroup,
52
+ block_size=getattr(args, "block_size", 100),
53
+ support_threshold=getattr(args, "support", None),
54
+ json_output=getattr(args, "json", False),
55
+ )
56
+
57
+ def run(self):
58
+ if self.gene_trees_path:
59
+ self._run_gene_tree_mode()
60
+ else:
61
+ self._run_alignment_mode()
62
+
63
+ # ------------------------------------------------------------------
64
+ # Gene tree mode
65
+ # ------------------------------------------------------------------
66
+
67
+ def _run_gene_tree_mode(self):
68
+ """Count quartet topologies from gene trees."""
69
+ gene_trees = self._parse_gene_trees(self.gene_trees_path)
70
+ quartet = (self.p1, self.p2, self.p3, self.outgroup)
71
+
72
+ # For each gene tree, determine the quartet topology
73
+ # Species tree: (((P1,P2),P3),O)
74
+ # Concordant: P1+P2 together → ((P1,P2),(P3,O))
75
+ # ABBA: P2+P3 together → ((P2,P3),(P1,O))
76
+ # BABA: P1+P3 together → ((P1,P3),(P2,O))
77
+ concordant = 0
78
+ abba_count = 0
79
+ baba_count = 0
80
+ unresolved = 0
81
+
82
+ for gt in gene_trees:
83
+ topo = self._get_quartet_topology(gt, quartet)
84
+ if topo == "concordant":
85
+ concordant += 1
86
+ elif topo == "abba":
87
+ abba_count += 1
88
+ elif topo == "baba":
89
+ baba_count += 1
90
+ else:
91
+ unresolved += 1
92
+
93
+ n_informative = abba_count + baba_count
94
+ n_total = len(gene_trees)
95
+
96
+ # D-statistic
97
+ if n_informative == 0:
98
+ d_stat = 0.0
99
+ else:
100
+ d_stat = (abba_count - baba_count) / n_informative
101
+
102
+ # Chi-squared test: are ABBA and BABA equally frequent?
103
+ from scipy.stats import chi2
104
+
105
+ p_value = None
106
+ chi2_stat = None
107
+ if n_informative > 0:
108
+ expected = n_informative / 2.0
109
+ chi2_stat = ((abba_count - expected) ** 2 + (baba_count - expected) ** 2) / expected
110
+ p_value = float(chi2.sf(chi2_stat, df=1))
111
+
112
+ # Output
113
+ if self.json_output:
114
+ payload = {
115
+ "mode": "gene_trees",
116
+ "p1": self.p1,
117
+ "p2": self.p2,
118
+ "p3": self.p3,
119
+ "outgroup": self.outgroup,
120
+ "n_gene_trees": n_total,
121
+ "concordant": concordant,
122
+ "abba_count": abba_count,
123
+ "baba_count": baba_count,
124
+ "unresolved": unresolved,
125
+ "d_statistic": round(d_stat, 4),
126
+ "support_threshold": self.support_threshold,
127
+ "chi2_statistic": round(chi2_stat, 4) if chi2_stat is not None else None,
128
+ "p_value": round(p_value, 6) if p_value is not None else None,
129
+ }
130
+ print_json(payload, sort_keys=False)
131
+ return
132
+
133
+ try:
134
+ print("Patterson's D-statistic (Gene Tree Mode)")
135
+ print("=========================================")
136
+ print(f"Topology: ((({self.p1}, {self.p2}), {self.p3}), {self.outgroup})")
137
+ print(f"P1: {self.p1}")
138
+ print(f"P2: {self.p2}")
139
+ print(f"P3: {self.p3}")
140
+ print(f"Outgroup: {self.outgroup}")
141
+ print()
142
+ print(f"Gene trees: {n_total}")
143
+ if self.support_threshold is not None:
144
+ print(f"Support threshold: {self.support_threshold}")
145
+ print(f"Concordant ((P1,P2),P3): {concordant}")
146
+ print(f"ABBA ((P2,P3),P1): {abba_count}")
147
+ print(f"BABA ((P1,P3),P2): {baba_count}")
148
+ print(f"Unresolved: {unresolved}")
149
+ print(f"D-statistic: {d_stat:.4f}")
150
+ if chi2_stat is not None:
151
+ print(f"Chi-squared: {chi2_stat:.4f}")
152
+ print(f"p-value: {p_value:.6f}")
153
+ print()
154
+ print(f"Interpretation: {self._interpret(d_stat, p_value)}")
155
+ else:
156
+ print()
157
+ print("No informative (discordant) gene trees found.")
158
+ except BrokenPipeError:
159
+ pass
160
+
161
+ def _parse_gene_trees(self, path: str) -> list:
162
+ """Parse gene trees from a file (one Newick per line)."""
163
+ try:
164
+ return list(Phylo.parse(path, "newick"))
165
+ except Exception:
166
+ raise PhykitUserError(
167
+ [f"Could not parse gene trees from {path}."],
168
+ code=2,
169
+ )
170
+
171
+ def _get_quartet_topology(self, tree, quartet) -> str:
172
+ """Determine quartet topology from a (possibly multi-taxon) gene tree.
173
+
174
+ If support_threshold is set, branches with support below the
175
+ threshold are excluded (treated as collapsed/unresolved).
176
+
177
+ Returns 'concordant', 'abba', 'baba', or 'unresolved'.
178
+ """
179
+ p1, p2, p3, outgroup = quartet
180
+
181
+ # Get all taxa in the tree
182
+ tree_taxa = {t.name for t in tree.get_terminals()}
183
+
184
+ # Check all four taxa are present
185
+ if not all(t in tree_taxa for t in quartet):
186
+ return "unresolved"
187
+
188
+ # Extract bipartitions from the gene tree
189
+ # Skip branches with support below threshold
190
+ all_taxa = frozenset(tree_taxa)
191
+ bipartitions = []
192
+ for clade in tree.get_nonterminals():
193
+ # Check support threshold
194
+ if self.support_threshold is not None:
195
+ support = clade.confidence
196
+ if support is not None and support < self.support_threshold:
197
+ continue # collapse this branch (skip its bipartition)
198
+
199
+ tips = frozenset(t.name for t in clade.get_terminals())
200
+ if len(tips) <= 1 or tips == all_taxa:
201
+ continue
202
+ complement = all_taxa - tips
203
+ if len(complement) <= 0:
204
+ continue
205
+ bipartitions.append((tips, complement))
206
+
207
+ # Check which quartet topology the bipartitions support
208
+ quartet_set = {p1, p2, p3, outgroup}
209
+ for side_a, side_b in bipartitions:
210
+ in_a = quartet_set & side_a
211
+ in_b = quartet_set & side_b
212
+ if len(in_a) == 2 and len(in_b) == 2:
213
+ pair = frozenset(in_a)
214
+ # Concordant: P1+P2 on one side
215
+ if pair == frozenset({p1, p2}) or pair == frozenset({p3, outgroup}):
216
+ return "concordant"
217
+ # ABBA: P2+P3 on one side
218
+ if pair == frozenset({p2, p3}) or pair == frozenset({p1, outgroup}):
219
+ return "abba"
220
+ # BABA: P1+P3 on one side
221
+ if pair == frozenset({p1, p3}) or pair == frozenset({p2, outgroup}):
222
+ return "baba"
223
+
224
+ return "unresolved"
225
+
226
+ # ------------------------------------------------------------------
227
+ # Alignment mode
228
+ # ------------------------------------------------------------------
229
+
230
+ def _run_alignment_mode(self):
231
+ """Count ABBA/BABA site patterns from an alignment."""
232
+ # Read alignment sequences
233
+ sequences = {}
234
+ for record in SeqIO.parse(self.alignment_file_path, "fasta"):
235
+ sequences[record.id] = str(record.seq).upper()
236
+
237
+ # Validate taxa are present
238
+ required = {"p1": self.p1, "p2": self.p2, "p3": self.p3, "outgroup": self.outgroup}
239
+ for label, taxon in required.items():
240
+ if taxon not in sequences:
241
+ raise PhykitUserError(
242
+ [f"Taxon '{taxon}' ({label}) not found in alignment. "
243
+ f"Available taxa: {', '.join(sorted(sequences.keys()))}"],
244
+ code=2,
245
+ )
246
+
247
+ seq_p1 = sequences[self.p1]
248
+ seq_p2 = sequences[self.p2]
249
+ seq_p3 = sequences[self.p3]
250
+ seq_outgroup = sequences[self.outgroup]
251
+
252
+ # Validate equal lengths
253
+ lengths = {len(seq_p1), len(seq_p2), len(seq_p3), len(seq_outgroup)}
254
+ if len(lengths) != 1:
255
+ raise PhykitUserError(
256
+ ["Sequences have different lengths. All sequences must be aligned."],
257
+ code=2,
258
+ )
259
+
260
+ aln_length = len(seq_p1)
261
+ skip_chars = {"-", "N", "?", "X", "n", "x"}
262
+
263
+ # Count site patterns
264
+ abba_count = 0
265
+ baba_count = 0
266
+
267
+ for site in range(aln_length):
268
+ p1 = seq_p1[site]
269
+ p2 = seq_p2[site]
270
+ p3 = seq_p3[site]
271
+ o = seq_outgroup[site]
272
+
273
+ # Skip sites with gaps or ambiguous characters
274
+ if any(c in skip_chars for c in [p1, p2, p3, o]):
275
+ continue
276
+
277
+ # Skip sites that are not biallelic
278
+ alleles = {p1, p2, p3, o}
279
+ if len(alleles) != 2:
280
+ continue
281
+
282
+ # ABBA: P1=ancestral, P2=derived, P3=derived, O=ancestral
283
+ if p1 == o and p2 != o and p3 != o and p2 == p3:
284
+ abba_count += 1
285
+ # BABA: P1=derived, P2=ancestral, P3=derived, O=ancestral
286
+ elif p2 == o and p1 != o and p3 != o and p1 == p3:
287
+ baba_count += 1
288
+
289
+ informative_sites = abba_count + baba_count
290
+
291
+ # Compute D-statistic
292
+ if informative_sites == 0:
293
+ d_stat = 0.0
294
+ else:
295
+ d_stat = (abba_count - baba_count) / informative_sites
296
+
297
+ # Block jackknife for significance
298
+ n_blocks = aln_length // self.block_size
299
+ se = None
300
+ z_score = None
301
+ p_value = None
302
+
303
+ if n_blocks >= 2:
304
+ block_abba = np.zeros(n_blocks)
305
+ block_baba = np.zeros(n_blocks)
306
+
307
+ for site in range(aln_length):
308
+ block_idx = site // self.block_size
309
+ if block_idx >= n_blocks:
310
+ break
311
+
312
+ p1 = seq_p1[site]
313
+ p2 = seq_p2[site]
314
+ p3 = seq_p3[site]
315
+ o = seq_outgroup[site]
316
+
317
+ if any(c in skip_chars for c in [p1, p2, p3, o]):
318
+ continue
319
+ alleles = {p1, p2, p3, o}
320
+ if len(alleles) != 2:
321
+ continue
322
+
323
+ if p1 == o and p2 != o and p3 != o and p2 == p3:
324
+ block_abba[block_idx] += 1
325
+ elif p2 == o and p1 != o and p3 != o and p1 == p3:
326
+ block_baba[block_idx] += 1
327
+
328
+ total_abba = np.sum(block_abba)
329
+ total_baba = np.sum(block_baba)
330
+
331
+ jackknife_d = np.zeros(n_blocks)
332
+ for i in range(n_blocks):
333
+ loo_abba = total_abba - block_abba[i]
334
+ loo_baba = total_baba - block_baba[i]
335
+ denom = loo_abba + loo_baba
336
+ if denom > 0:
337
+ jackknife_d[i] = (loo_abba - loo_baba) / denom
338
+ else:
339
+ jackknife_d[i] = 0.0
340
+
341
+ mean_d = np.mean(jackknife_d)
342
+ se = float(np.sqrt((n_blocks - 1) / n_blocks * np.sum((jackknife_d - mean_d) ** 2)))
343
+
344
+ if se > 0:
345
+ z_score = d_stat / se
346
+ from scipy.stats import norm
347
+ p_value = float(2.0 * norm.sf(abs(z_score)))
348
+ else:
349
+ z_score = float('inf') if d_stat != 0 else 0.0
350
+ p_value = 0.0 if d_stat != 0 else 1.0
351
+
352
+ # Output
353
+ if self.json_output:
354
+ payload = {
355
+ "p1": self.p1,
356
+ "p2": self.p2,
357
+ "p3": self.p3,
358
+ "outgroup": self.outgroup,
359
+ "alignment_length": aln_length,
360
+ "informative_sites": informative_sites,
361
+ "abba_count": abba_count,
362
+ "baba_count": baba_count,
363
+ "d_statistic": round(d_stat, 4),
364
+ "block_size": self.block_size,
365
+ "n_blocks": n_blocks if n_blocks >= 2 else n_blocks,
366
+ "standard_error": round(se, 4) if se is not None else None,
367
+ "z_score": round(z_score, 2) if z_score is not None and z_score != float('inf') else z_score,
368
+ "p_value": round(p_value, 6) if p_value is not None else None,
369
+ }
370
+ print_json(payload, sort_keys=False)
371
+ return
372
+
373
+ try:
374
+ print("Patterson's D-statistic (ABBA-BABA Test)")
375
+ print("=========================================")
376
+ print(f"Topology: ((({self.p1}, {self.p2}), {self.p3}), {self.outgroup})")
377
+ print(f"P1: {self.p1}")
378
+ print(f"P2: {self.p2}")
379
+ print(f"P3: {self.p3}")
380
+ print(f"Outgroup: {self.outgroup}")
381
+ print()
382
+ print(f"Alignment length: {aln_length}")
383
+ print(f"Informative sites: {informative_sites}")
384
+ print(f"ABBA sites: {abba_count}")
385
+ print(f"BABA sites: {baba_count}")
386
+ print(f"D-statistic: {d_stat:.4f}")
387
+
388
+ if se is not None:
389
+ print(f"Block jackknife (block size: {self.block_size}):")
390
+ print(f" Standard error: {se:.4f}")
391
+ if z_score == float('inf'):
392
+ print(" Z-score: inf")
393
+ else:
394
+ print(f" Z-score: {z_score:.2f}")
395
+ print(f" p-value: {p_value:.6f}")
396
+ print()
397
+ print(f"Interpretation: {self._interpret(d_stat, p_value)}")
398
+ else:
399
+ print()
400
+ print("Not enough blocks for jackknife significance test.")
401
+ except BrokenPipeError:
402
+ pass
403
+
404
+ def _interpret(self, d_stat: float, p_value: float, alpha: float = 0.05) -> str:
405
+ if p_value < alpha:
406
+ if d_stat > 0:
407
+ return (
408
+ f"Significant excess of ABBA patterns (p < {alpha}) "
409
+ f"suggests introgression between P2 ({self.p2}) and "
410
+ f"P3 ({self.p3}). Note: D cannot determine the "
411
+ f"direction of gene flow."
412
+ )
413
+ else:
414
+ return (
415
+ f"Significant excess of BABA patterns (p < {alpha}) "
416
+ f"suggests introgression between P1 ({self.p1}) and "
417
+ f"P3 ({self.p3}). Note: D cannot determine the "
418
+ f"direction of gene flow."
419
+ )
420
+ return "No significant evidence of introgression (consistent with ILS)."
@@ -48,6 +48,7 @@ class QuartetPie(Tree):
48
48
  self.annotate = parsed["annotate"]
49
49
  self.json_output = parsed["json_output"]
50
50
  self.csv_output = parsed["csv_output"]
51
+ self.pie_size = parsed["pie_size"]
51
52
  self.plot_config = parsed["plot_config"]
52
53
 
53
54
  def run(self) -> None:
@@ -104,6 +105,7 @@ class QuartetPie(Tree):
104
105
  annotate=getattr(args, "annotate", False),
105
106
  json_output=getattr(args, "json", False),
106
107
  csv_output=getattr(args, "csv", None),
108
+ pie_size=getattr(args, "pie_size", 1.0),
107
109
  plot_config=PlotConfig.from_args(args),
108
110
  )
109
111
 
@@ -253,7 +255,7 @@ class QuartetPie(Tree):
253
255
  # Circular mode: scale with n_tips but stay larger than rectangular
254
256
  # since radial spacing gives more room between nodes
255
257
  n_tips = len(tips)
256
- pie_size = min(0.05, 0.6 / max(n_tips, 1))
258
+ pie_size = min(0.05, 0.6 / max(n_tips, 1)) * self.pie_size
257
259
 
258
260
  for clade in tree.find_clades(order="preorder"):
259
261
  if clade.is_terminal() or clade == root:
@@ -400,7 +402,7 @@ class QuartetPie(Tree):
400
402
  # appear as perfect circles regardless of axis scaling, and are
401
403
  # drawn above the phylogeny branches.
402
404
  n_tips = len(tips)
403
- pie_size = min(0.06, 0.8 / max(n_tips, 1))
405
+ pie_size = min(0.06, 0.8 / max(n_tips, 1)) * self.pie_size
404
406
 
405
407
  for clade in tree.find_clades(order="preorder"):
406
408
  if clade.is_terminal() or clade == root:
@@ -0,0 +1 @@
1
+ __version__ = "2.1.66"
@@ -1,17 +1,18 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: phykit
3
- Version: 2.1.63
3
+ Version: 2.1.66
4
4
  Home-page: https://github.com/jlsteenwyk/phykit
5
5
  Author: Jacob L. Steenwyk
6
6
  Author-email: jlsteenwyk@gmail.com
7
7
  Classifier: Operating System :: OS Independent
8
8
  Classifier: Intended Audience :: Science/Research
9
9
  Classifier: Programming Language :: Python
10
+ Classifier: Programming Language :: Python :: 3.10
10
11
  Classifier: Programming Language :: Python :: 3.11
11
12
  Classifier: Programming Language :: Python :: 3.12
12
13
  Classifier: Programming Language :: Python :: 3.13
13
14
  Classifier: Topic :: Scientific/Engineering
14
- Requires-Python: >=3.11
15
+ Requires-Python: >=3.10
15
16
  Description-Content-Type: text/markdown
16
17
  License-File: LICENSE.md
17
18
  Requires-Dist: biopython>=1.82
@@ -43,6 +43,7 @@ phykit/services/alignment/composition_per_taxon.py
43
43
  phykit/services/alignment/compositional_bias_per_site.py
44
44
  phykit/services/alignment/create_concatenation_matrix.py
45
45
  phykit/services/alignment/dna_threader.py
46
+ phykit/services/alignment/dstatistic.py
46
47
  phykit/services/alignment/evolutionary_rate_per_site.py
47
48
  phykit/services/alignment/faidx.py
48
49
  phykit/services/alignment/gc_content.py
@@ -1,5 +1,6 @@
1
1
  [console_scripts]
2
2
  phykit = phykit.phykit:main
3
+ pk_abba_baba = phykit.phykit:dstatistic
3
4
  pk_al = phykit.phykit:alignment_length
4
5
  pk_alignment_entropy = phykit.phykit:alignment_entropy
5
6
  pk_alignment_length = phykit.phykit:alignment_length
@@ -63,6 +64,8 @@ pk_dimreduce = phykit.phykit:phylogenetic_ordination
63
64
  pk_disc_asym = phykit.phykit:discordance_asymmetry
64
65
  pk_discordance_asymmetry = phykit.phykit:discordance_asymmetry
65
66
  pk_dmap = phykit.phykit:density_map
67
+ pk_dstat = phykit.phykit:dstatistic
68
+ pk_dstatistic = phykit.phykit:dstatistic
66
69
  pk_dvmc = phykit.phykit:dvmc
67
70
  pk_entropy = phykit.phykit:alignment_entropy
68
71
  pk_erps = phykit.phykit:evolutionary_rate_per_site
@@ -12,6 +12,7 @@ CLASSIFIERS = [
12
12
  'Operating System :: OS Independent',
13
13
  'Intended Audience :: Science/Research',
14
14
  'Programming Language :: Python',
15
+ 'Programming Language :: Python :: 3.10',
15
16
  'Programming Language :: Python :: 3.11',
16
17
  'Programming Language :: Python :: 3.12',
17
18
  'Programming Language :: Python :: 3.13',
@@ -37,7 +38,7 @@ setup(
37
38
  author_email="jlsteenwyk@gmail.com",
38
39
  url="https://github.com/jlsteenwyk/phykit",
39
40
  packages=find_packages(),
40
- python_requires=">=3.11",
41
+ python_requires=">=3.10",
41
42
  classifiers=CLASSIFIERS,
42
43
  entry_points={
43
44
  "console_scripts": [
@@ -106,6 +107,9 @@ setup(
106
107
  "pk_alignment_subsample = phykit.phykit:alignment_subsample",
107
108
  "pk_aln_subsample = phykit.phykit:alignment_subsample",
108
109
  "pk_subsample = phykit.phykit:alignment_subsample",
110
+ "pk_dstatistic = phykit.phykit:dstatistic",
111
+ "pk_dstat = phykit.phykit:dstatistic",
112
+ "pk_abba_baba = phykit.phykit:dstatistic",
109
113
  "pk_ancestral_state_reconstruction = phykit.phykit:ancestral_state_reconstruction", # Tree-based functions
110
114
  "pk_parsimony_score = phykit.phykit:parsimony_score",
111
115
  "pk_parsimony = phykit.phykit:parsimony_score",
@@ -1 +0,0 @@
1
- __version__ = "2.1.63"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes