phykit 2.1.64__tar.gz → 2.1.67__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. {phykit-2.1.64 → phykit-2.1.67}/PKG-INFO +3 -2
  2. {phykit-2.1.64 → phykit-2.1.67}/phykit/cli_registry.py +2 -0
  3. {phykit-2.1.64 → phykit-2.1.67}/phykit/phykit.py +122 -16
  4. {phykit-2.1.64 → phykit-2.1.67}/phykit/service_factories.py +1 -0
  5. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/__init__.py +1 -0
  6. phykit-2.1.67/phykit/services/alignment/dfoil.py +271 -0
  7. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/dstatistic.py +200 -4
  8. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/quartet_pie.py +4 -2
  9. phykit-2.1.67/phykit/version.py +1 -0
  10. {phykit-2.1.64 → phykit-2.1.67}/phykit.egg-info/PKG-INFO +3 -2
  11. {phykit-2.1.64 → phykit-2.1.67}/phykit.egg-info/SOURCES.txt +1 -0
  12. {phykit-2.1.64 → phykit-2.1.67}/phykit.egg-info/entry_points.txt +2 -0
  13. {phykit-2.1.64 → phykit-2.1.67}/setup.py +4 -1
  14. phykit-2.1.64/phykit/version.py +0 -1
  15. {phykit-2.1.64 → phykit-2.1.67}/LICENSE.md +0 -0
  16. {phykit-2.1.64 → phykit-2.1.67}/README.md +0 -0
  17. {phykit-2.1.64 → phykit-2.1.67}/phykit/__init__.py +0 -0
  18. {phykit-2.1.64 → phykit-2.1.67}/phykit/__main__.py +0 -0
  19. {phykit-2.1.64 → phykit-2.1.67}/phykit/errors.py +0 -0
  20. {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/__init__.py +0 -0
  21. {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/boolean_argument_parsing.py +0 -0
  22. {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/caching.py +0 -0
  23. {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/circular_layout.py +0 -0
  24. {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/color_annotations.py +0 -0
  25. {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/discrete_models.py +0 -0
  26. {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/files.py +0 -0
  27. {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/json_output.py +0 -0
  28. {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/parallel.py +0 -0
  29. {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/parsimony_utils.py +0 -0
  30. {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/plot_config.py +0 -0
  31. {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/quartet_utils.py +0 -0
  32. {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/stats_summary.py +0 -0
  33. {phykit-2.1.64 → phykit-2.1.67}/phykit/helpers/streaming.py +0 -0
  34. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/__init__.py +0 -0
  35. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/alignment_entropy.py +0 -0
  36. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/alignment_length.py +0 -0
  37. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/alignment_length_no_gaps.py +0 -0
  38. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/alignment_outlier_taxa.py +0 -0
  39. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/alignment_recoding.py +0 -0
  40. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/alignment_subsample.py +0 -0
  41. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/base.py +0 -0
  42. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/column_score.py +0 -0
  43. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/composition_per_taxon.py +0 -0
  44. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/compositional_bias_per_site.py +0 -0
  45. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/create_concatenation_matrix.py +0 -0
  46. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/dna_threader.py +0 -0
  47. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/evolutionary_rate_per_site.py +0 -0
  48. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/faidx.py +0 -0
  49. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/gc_content.py +0 -0
  50. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/identity_matrix.py +0 -0
  51. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/mask_alignment.py +0 -0
  52. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/occupancy_per_taxon.py +0 -0
  53. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/pairwise_identity.py +0 -0
  54. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/parsimony_informative_sites.py +0 -0
  55. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/plot_alignment_qc.py +0 -0
  56. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/rcv.py +0 -0
  57. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/rcvt.py +0 -0
  58. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/rename_fasta_entries.py +0 -0
  59. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/sum_of_pairs_score.py +0 -0
  60. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/alignment/variable_sites.py +0 -0
  61. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/base.py +0 -0
  62. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/__init__.py +0 -0
  63. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/ancestral_reconstruction.py +0 -0
  64. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/base.py +0 -0
  65. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/bipartition_support_stats.py +0 -0
  66. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/branch_length_multiplier.py +0 -0
  67. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/character_map.py +0 -0
  68. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/collapse_branches.py +0 -0
  69. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/concordance_asr.py +0 -0
  70. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/consensus_network.py +0 -0
  71. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/consensus_tree.py +0 -0
  72. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/cont_map.py +0 -0
  73. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/cophylo.py +0 -0
  74. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/covarying_evolutionary_rates.py +0 -0
  75. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/density_map.py +0 -0
  76. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/discordance_asymmetry.py +0 -0
  77. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/dvmc.py +0 -0
  78. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/evo_tempo_map.py +0 -0
  79. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/evolutionary_rate.py +0 -0
  80. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/fit_continuous.py +0 -0
  81. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/fit_discrete.py +0 -0
  82. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/hidden_paralogy_check.py +0 -0
  83. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/independent_contrasts.py +0 -0
  84. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/internal_branch_stats.py +0 -0
  85. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/internode_labeler.py +0 -0
  86. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/kf_distance.py +0 -0
  87. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/last_common_ancestor_subtree.py +0 -0
  88. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/lb_score.py +0 -0
  89. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/ltt.py +0 -0
  90. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/monophyly_check.py +0 -0
  91. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/nearest_neighbor_interchange.py +0 -0
  92. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/network_signal.py +0 -0
  93. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/ou_shift_detection.py +0 -0
  94. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/ouwie.py +0 -0
  95. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/parsimony_score.py +0 -0
  96. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/patristic_distances.py +0 -0
  97. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/phenogram.py +0 -0
  98. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/phylo_heatmap.py +0 -0
  99. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/phylogenetic_glm.py +0 -0
  100. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/phylogenetic_ordination.py +0 -0
  101. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/phylogenetic_regression.py +0 -0
  102. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/phylogenetic_signal.py +0 -0
  103. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/phylomorphospace.py +0 -0
  104. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/polytomy_test.py +0 -0
  105. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/print_tree.py +0 -0
  106. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/prune_tree.py +0 -0
  107. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/quartet_network.py +0 -0
  108. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/rate_heterogeneity.py +0 -0
  109. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/relative_rate_test.py +0 -0
  110. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/rename_tree_tips.py +0 -0
  111. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/rf_distance.py +0 -0
  112. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/root_tree.py +0 -0
  113. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/saturation.py +0 -0
  114. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/spectral_discordance.py +0 -0
  115. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/spurious_sequence.py +0 -0
  116. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/stochastic_character_map.py +0 -0
  117. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/terminal_branch_stats.py +0 -0
  118. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/threshold_model.py +0 -0
  119. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/tip_labels.py +0 -0
  120. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/tip_to_tip_distance.py +0 -0
  121. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/tip_to_tip_node_distance.py +0 -0
  122. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/total_tree_length.py +0 -0
  123. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/trait_correlation.py +0 -0
  124. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/trait_rate_map.py +0 -0
  125. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/tree_space.py +0 -0
  126. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/treeness.py +0 -0
  127. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/treeness_over_rcv.py +0 -0
  128. {phykit-2.1.64 → phykit-2.1.67}/phykit/services/tree/vcv_utils.py +0 -0
  129. {phykit-2.1.64 → phykit-2.1.67}/phykit.egg-info/dependency_links.txt +0 -0
  130. {phykit-2.1.64 → phykit-2.1.67}/phykit.egg-info/requires.txt +0 -0
  131. {phykit-2.1.64 → phykit-2.1.67}/phykit.egg-info/top_level.txt +0 -0
  132. {phykit-2.1.64 → phykit-2.1.67}/setup.cfg +0 -0
@@ -1,17 +1,18 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: phykit
3
- Version: 2.1.64
3
+ Version: 2.1.67
4
4
  Home-page: https://github.com/jlsteenwyk/phykit
5
5
  Author: Jacob L. Steenwyk
6
6
  Author-email: jlsteenwyk@gmail.com
7
7
  Classifier: Operating System :: OS Independent
8
8
  Classifier: Intended Audience :: Science/Research
9
9
  Classifier: Programming Language :: Python
10
+ Classifier: Programming Language :: Python :: 3.10
10
11
  Classifier: Programming Language :: Python :: 3.11
11
12
  Classifier: Programming Language :: Python :: 3.12
12
13
  Classifier: Programming Language :: Python :: 3.13
13
14
  Classifier: Topic :: Scientific/Engineering
14
- Requires-Python: >=3.11
15
+ Requires-Python: >=3.10
15
16
  Description-Content-Type: text/markdown
16
17
  License-File: LICENSE.md
17
18
  Requires-Dist: biopython>=1.82
@@ -23,6 +23,8 @@ ALIAS_TO_HANDLER: Dict[str, str] = {
23
23
  "aot": "alignment_outlier_taxa",
24
24
  "dstat": "dstatistic",
25
25
  "abba_baba": "dstatistic",
26
+ "dfoil": "dfoil",
27
+ "dfoil_test": "dfoil",
26
28
  "cs": "column_score",
27
29
  "comp_bias_per_site": "compositional_bias_per_site",
28
30
  "cbps": "compositional_bias_per_site",
@@ -122,6 +122,10 @@ class Phykit:
122
122
  dstatistic (alias: dstat; abba_baba)
123
123
  - Patterson's D-statistic (ABBA-BABA test) for
124
124
  detecting introgression/gene flow
125
+ dfoil (alias: dfoil_test)
126
+ - DFOIL test (Pease & Hahn 2015) for detecting
127
+ and polarizing introgression in a 5-taxon
128
+ symmetric phylogeny
125
129
  alignment_outlier_taxa (alias: outlier_taxa; aot)
126
130
  - identify potential outlier taxa and why they were flagged
127
131
  column_score (alias: cs)
@@ -1956,21 +1960,26 @@ class Phykit:
1956
1960
  {help_header}
1957
1961
 
1958
1962
  Compute Patterson's D-statistic (ABBA-BABA test) for
1959
- detecting introgression or gene flow from a four-taxon
1960
- alignment.
1963
+ detecting introgression or gene flow.
1961
1964
 
1962
- The assumed topology is (((P1, P2), P3), Outgroup).
1963
- Under incomplete lineage sorting (ILS) alone, ABBA and
1964
- BABA site patterns should be equally frequent. A
1965
- significant excess of either pattern indicates gene
1966
- flow.
1965
+ Two input modes:
1966
+ 1) Site patterns from an alignment (-a)
1967
+ 2) Quartet topologies from gene trees (-g)
1967
1968
 
1968
- D > 0 suggests gene flow between P2 and P3.
1969
- D < 0 suggests gene flow between P1 and P3.
1970
- D = 0 is consistent with ILS alone.
1969
+ Species topology: (((P1, P2), P3), Outgroup).
1970
+ Under ILS alone, ABBA and BABA patterns (or
1971
+ discordant topologies) are equally frequent. A
1972
+ significant excess indicates introgression.
1971
1973
 
1972
- Significance is assessed via block jackknife (Green
1973
- et al. 2010; Patterson et al. 2012).
1974
+ D > 0: introgression between P2 and P3.
1975
+ D < 0: introgression between P1 and P3.
1976
+ D = 0: consistent with ILS alone.
1977
+ Note: D identifies which lineages exchanged genes
1978
+ but cannot determine direction of flow.
1979
+
1980
+ Gene trees can have any number of taxa; only the
1981
+ quartet induced by the four specified taxa is
1982
+ evaluated from each tree.
1974
1983
 
1975
1984
  Aliases:
1976
1985
  dstatistic, dstat, abba_baba
@@ -1978,13 +1987,22 @@ class Phykit:
1978
1987
  pk_dstatistic, pk_dstat, pk_abba_baba
1979
1988
 
1980
1989
  Usage:
1981
- phykit dstatistic -a <alignment> --p1 <taxon> --p2 <taxon>
1982
- --p3 <taxon> --outgroup <taxon>
1990
+ phykit dstatistic -a <alignment> --p1 <taxon>
1991
+ --p2 <taxon> --p3 <taxon> --outgroup <taxon>
1983
1992
  [--block-size 100] [--json]
1993
+ phykit dstatistic -g <gene_trees> --p1 <taxon>
1994
+ --p2 <taxon> --p3 <taxon> --outgroup <taxon>
1995
+ [--json]
1984
1996
 
1985
1997
  Options
1986
1998
  =====================================================
1987
1999
  -a/--alignment FASTA alignment file
2000
+ (site-pattern mode)
2001
+
2002
+ -g/--gene-trees gene trees file, one
2003
+ Newick per line (gene-
2004
+ tree mode; trees can
2005
+ have any number of taxa)
1988
2006
 
1989
2007
  --p1 taxon name for P1
1990
2008
  (sister to P2)
@@ -2000,21 +2018,96 @@ class Phykit:
2000
2018
 
2001
2019
  --block-size block size for jackknife
2002
2020
  estimation of standard
2003
- error (default: 100)
2021
+ error (default: 100;
2022
+ alignment mode only)
2023
+
2024
+ --support minimum branch support
2025
+ threshold for gene trees;
2026
+ branches below this value
2027
+ are collapsed (treated as
2028
+ unresolved). Gene-tree
2029
+ mode only.
2004
2030
 
2005
2031
  --json output results as JSON
2006
2032
  """
2007
2033
  ),
2008
2034
  )
2009
- parser.add_argument("-a", "--alignment", type=str, required=True, help=SUPPRESS, metavar="")
2035
+ parser.add_argument("-a", "--alignment", type=str, required=False, default=None, help=SUPPRESS, metavar="")
2036
+ parser.add_argument("-g", "--gene-trees", type=str, required=False, default=None, help=SUPPRESS, metavar="")
2010
2037
  parser.add_argument("--p1", type=str, required=True, help=SUPPRESS, metavar="")
2011
2038
  parser.add_argument("--p2", type=str, required=True, help=SUPPRESS, metavar="")
2012
2039
  parser.add_argument("--p3", type=str, required=True, help=SUPPRESS, metavar="")
2013
2040
  parser.add_argument("--outgroup", type=str, required=True, help=SUPPRESS, metavar="")
2014
2041
  parser.add_argument("--block-size", type=int, default=100, help=SUPPRESS, metavar="")
2042
+ parser.add_argument("--support", type=float, default=None, help=SUPPRESS, metavar="")
2015
2043
  _add_json_argument(parser)
2016
2044
  _run_service(parser, argv, Dstatistic)
2017
2045
 
2046
+ @staticmethod
2047
+ def dfoil(argv):
2048
+ parser = _new_parser(
2049
+ description=textwrap.dedent(
2050
+ f"""\
2051
+ {help_header}
2052
+
2053
+ Compute DFOIL statistics (Pease & Hahn 2015) for
2054
+ detecting and polarizing introgression in a 5-taxon
2055
+ symmetric phylogeny.
2056
+
2057
+ Topology: ((P1, P2), (P3, P4), Outgroup)
2058
+ P1 and P2 are sister taxa; P3 and P4 are sister
2059
+ taxa; the two pairs are sister to each other with
2060
+ an outgroup rooting the tree.
2061
+
2062
+ Four D-statistics are computed:
2063
+ DFO (far-outer), DIL (inner-left),
2064
+ DFI (far-inner), DOL (outer-left)
2065
+
2066
+ The sign pattern of these four statistics maps to
2067
+ a specific introgression scenario via the lookup
2068
+ table from Pease & Hahn (2015).
2069
+
2070
+ Aliases:
2071
+ dfoil, dfoil_test
2072
+ Command line interfaces:
2073
+ pk_dfoil, pk_dfoil_test
2074
+
2075
+ Usage:
2076
+ phykit dfoil -a <alignment> --p1 <taxon>
2077
+ --p2 <taxon> --p3 <taxon> --p4 <taxon>
2078
+ --outgroup <taxon> [--json]
2079
+
2080
+ Options
2081
+ =====================================================
2082
+ -a/--alignment FASTA alignment file
2083
+
2084
+ --p1 taxon name for P1
2085
+ (sister to P2)
2086
+
2087
+ --p2 taxon name for P2
2088
+ (sister to P1)
2089
+
2090
+ --p3 taxon name for P3
2091
+ (sister to P4)
2092
+
2093
+ --p4 taxon name for P4
2094
+ (sister to P3)
2095
+
2096
+ --outgroup outgroup taxon name
2097
+
2098
+ --json output results as JSON
2099
+ """
2100
+ ),
2101
+ )
2102
+ parser.add_argument("-a", "--alignment", type=str, required=True, help=SUPPRESS, metavar="")
2103
+ parser.add_argument("--p1", type=str, required=True, help=SUPPRESS, metavar="")
2104
+ parser.add_argument("--p2", type=str, required=True, help=SUPPRESS, metavar="")
2105
+ parser.add_argument("--p3", type=str, required=True, help=SUPPRESS, metavar="")
2106
+ parser.add_argument("--p4", type=str, required=True, help=SUPPRESS, metavar="")
2107
+ parser.add_argument("--outgroup", type=str, required=True, help=SUPPRESS, metavar="")
2108
+ _add_json_argument(parser)
2109
+ _run_service(parser, argv, Dfoil)
2110
+
2018
2111
  ## Tree functions
2019
2112
  @staticmethod
2020
2113
  def parsimony_score(argv):
@@ -5556,6 +5649,11 @@ class Phykit:
5556
5649
  --csv output per-branch concordance
5557
5650
  values as a CSV file
5558
5651
 
5652
+ --pie-size scale factor for pie chart
5653
+ size (default: 1.0; use
5654
+ 2.0 for double, 0.5 for
5655
+ half, etc.)
5656
+
5559
5657
  --json optional argument to output
5560
5658
  per-node concordance as JSON
5561
5659
  """
@@ -5578,6 +5676,10 @@ class Phykit:
5578
5676
  "--csv", type=str, required=False, default=None,
5579
5677
  help=SUPPRESS, metavar=""
5580
5678
  )
5679
+ parser.add_argument(
5680
+ "--pie-size", type=float, required=False, default=1.0,
5681
+ help=SUPPRESS, metavar=""
5682
+ )
5581
5683
  add_plot_arguments(parser)
5582
5684
  _add_json_argument(parser)
5583
5685
  _run_service(parser, argv, QuartetPie)
@@ -7955,6 +8057,10 @@ def dstatistic(argv=None):
7955
8057
  Phykit.dstatistic(sys.argv[1:])
7956
8058
 
7957
8059
 
8060
+ def dfoil(argv=None):
8061
+ Phykit.dfoil(sys.argv[1:])
8062
+
8063
+
7958
8064
  # Tree-based functions
7959
8065
  def parsimony_score(argv=None):
7960
8066
  Phykit.parsimony_score(sys.argv[1:])
@@ -30,6 +30,7 @@ CompositionPerTaxon = _LazyServiceFactory("phykit.services.alignment.composition
30
30
  CreateConcatenationMatrix = _LazyServiceFactory("phykit.services.alignment.create_concatenation_matrix", "CreateConcatenationMatrix")
31
31
  DNAThreader = _LazyServiceFactory("phykit.services.alignment.dna_threader", "DNAThreader")
32
32
  Dstatistic = _LazyServiceFactory("phykit.services.alignment.dstatistic", "Dstatistic")
33
+ Dfoil = _LazyServiceFactory("phykit.services.alignment.dfoil", "Dfoil")
33
34
  EvolutionaryRatePerSite = _LazyServiceFactory("phykit.services.alignment.evolutionary_rate_per_site", "EvolutionaryRatePerSite")
34
35
  Faidx = _LazyServiceFactory("phykit.services.alignment.faidx", "Faidx")
35
36
  GCContent = _LazyServiceFactory("phykit.services.alignment.gc_content", "GCContent")
@@ -13,6 +13,7 @@ _EXPORTS = {
13
13
  "CreateConcatenationMatrix": "create_concatenation_matrix",
14
14
  "DNAThreader": "dna_threader",
15
15
  "Dstatistic": "dstatistic",
16
+ "Dfoil": "dfoil",
16
17
  "EvolutionaryRatePerSite": "evolutionary_rate_per_site",
17
18
  "Faidx": "faidx",
18
19
  "GCContent": "gc_content",
@@ -0,0 +1,271 @@
1
+ """DFOIL test (Pease & Hahn 2015) for detecting and polarizing introgression
2
+ in a 5-taxon symmetric phylogeny.
3
+
4
+ Topology: ((P1, P2), (P3, P4), Outgroup)
5
+ """
6
+
7
+ from typing import Dict
8
+
9
+ from Bio import SeqIO
10
+
11
+ from .base import Alignment
12
+ from ...helpers.json_output import print_json
13
+ from ...errors import PhykitUserError
14
+
15
+
16
+ # All 16 binary site patterns for 5 taxa (P1, P2, P3, P4, Outgroup).
17
+ # A = matches outgroup (ancestral), B = differs (derived).
18
+ PATTERNS = [
19
+ 'AAAAA', 'AAABA', 'AABAA', 'AABBA',
20
+ 'ABAAA', 'ABABA', 'ABBAA', 'ABBBA',
21
+ 'BAAAA', 'BAABA', 'BABAA', 'BABBA',
22
+ 'BBAAA', 'BBABA', 'BBBAA', 'BBBBA',
23
+ ]
24
+
25
+ # Invariant / uninformative patterns (all ancestral or all derived).
26
+ _UNINFORMATIVE = {'AAAAA', 'BBBBA'}
27
+
28
+ # Sign-pattern interpretation table (DFO, DIL, DFI, DOL).
29
+ INTERPRETATIONS = {
30
+ '+++0': 'Introgression: P1 -> P3 (or P3 -> P1)',
31
+ '--0+': 'Introgression: P1 -> P4 (or P4 -> P1)',
32
+ '++-0': 'Introgression: P2 -> P3 (or P3 -> P2)',
33
+ '--0-': 'Introgression: P2 -> P4 (or P4 -> P2)',
34
+ '+0++': 'Introgression: P3 -> P1 (or P1 -> P3)',
35
+ '-0++': 'Introgression: P4 -> P1 (or P1 -> P4)',
36
+ '0+--': 'Introgression: P3 -> P2 (or P2 -> P3)',
37
+ '0---': 'Introgression: P4 -> P2 (or P2 -> P4)',
38
+ '++00': 'Introgression: ancestor of (P1,P2) <-> P3',
39
+ '--00': 'Introgression: ancestor of (P1,P2) <-> P4',
40
+ '0000': 'No significant introgression detected',
41
+ }
42
+
43
+
44
+ class Dfoil(Alignment):
45
+ def __init__(self, args) -> None:
46
+ parsed = self.process_args(args)
47
+ super().__init__(alignment_file_path=parsed["alignment_path"])
48
+ self.p1 = parsed["p1"]
49
+ self.p2 = parsed["p2"]
50
+ self.p3 = parsed["p3"]
51
+ self.p4 = parsed["p4"]
52
+ self.outgroup = parsed["outgroup"]
53
+ self.json_output = parsed["json_output"]
54
+
55
+ def process_args(self, args) -> Dict[str, object]:
56
+ return dict(
57
+ alignment_path=args.alignment,
58
+ p1=args.p1,
59
+ p2=args.p2,
60
+ p3=args.p3,
61
+ p4=args.p4,
62
+ outgroup=args.outgroup,
63
+ json_output=getattr(args, "json", False),
64
+ )
65
+
66
+ def run(self):
67
+ # Read alignment sequences
68
+ sequences = {}
69
+ for record in SeqIO.parse(self.alignment_file_path, "fasta"):
70
+ sequences[record.id] = str(record.seq).upper()
71
+
72
+ # Validate taxa are present
73
+ required = {
74
+ "p1": self.p1,
75
+ "p2": self.p2,
76
+ "p3": self.p3,
77
+ "p4": self.p4,
78
+ "outgroup": self.outgroup,
79
+ }
80
+ for label, taxon in required.items():
81
+ if taxon not in sequences:
82
+ raise PhykitUserError(
83
+ [f"Taxon '{taxon}' ({label}) not found in alignment. "
84
+ f"Available taxa: {', '.join(sorted(sequences.keys()))}"],
85
+ code=2,
86
+ )
87
+
88
+ seq_p1 = sequences[self.p1]
89
+ seq_p2 = sequences[self.p2]
90
+ seq_p3 = sequences[self.p3]
91
+ seq_p4 = sequences[self.p4]
92
+ seq_o = sequences[self.outgroup]
93
+
94
+ # Validate equal lengths
95
+ lengths = {len(seq_p1), len(seq_p2), len(seq_p3), len(seq_p4), len(seq_o)}
96
+ if len(lengths) != 1:
97
+ raise PhykitUserError(
98
+ ["Sequences have different lengths. All sequences must be aligned."],
99
+ code=2,
100
+ )
101
+
102
+ aln_length = len(seq_p1)
103
+ skip_chars = {"-", "N", "?", "X", "n", "x"}
104
+
105
+ # Initialize pattern counts
106
+ counts: Dict[str, int] = {p: 0 for p in PATTERNS}
107
+
108
+ for site in range(aln_length):
109
+ p1 = seq_p1[site]
110
+ p2 = seq_p2[site]
111
+ p3 = seq_p3[site]
112
+ p4 = seq_p4[site]
113
+ o = seq_o[site]
114
+
115
+ # Skip sites with gaps or ambiguous characters
116
+ if any(c in skip_chars for c in [p1, p2, p3, p4, o]):
117
+ continue
118
+
119
+ # Skip sites that are not biallelic
120
+ alleles = {p1, p2, p3, p4, o}
121
+ if len(alleles) != 2:
122
+ continue
123
+
124
+ # Encode pattern: A if matches outgroup, B if differs
125
+ pattern = ''.join(
126
+ 'A' if c == o else 'B'
127
+ for c in [p1, p2, p3, p4, o]
128
+ )
129
+ counts[pattern] += 1
130
+
131
+ # Count informative sites (exclude AAAAA and BBBBA)
132
+ informative_sites = sum(
133
+ v for k, v in counts.items() if k not in _UNINFORMATIVE
134
+ )
135
+
136
+ # Compute the four D-statistics
137
+ dfo_left = counts['AAABA'] + counts['ABABA'] + counts['BABAA'] + counts['BBBAA']
138
+ dfo_right = counts['AABAA'] + counts['ABBAA'] + counts['BAABA'] + counts['BBABA']
139
+
140
+ dil_left = counts['AAABA'] + counts['ABBAA'] + counts['BAABA'] + counts['BBBAA']
141
+ dil_right = counts['AABAA'] + counts['ABABA'] + counts['BABAA'] + counts['BBABA']
142
+
143
+ dfi_left = counts['ABAAA'] + counts['ABABA'] + counts['BABAA'] + counts['BABBA']
144
+ dfi_right = counts['BAAAA'] + counts['ABBAA'] + counts['BAABA'] + counts['ABBBA']
145
+
146
+ dol_left = counts['ABAAA'] + counts['ABBAA'] + counts['BAABA'] + counts['BABBA']
147
+ dol_right = counts['BAAAA'] + counts['ABABA'] + counts['BABAA'] + counts['ABBBA']
148
+
149
+ DFO = (dfo_left - dfo_right) / (dfo_left + dfo_right) if (dfo_left + dfo_right) > 0 else 0.0
150
+ DIL = (dil_left - dil_right) / (dil_left + dil_right) if (dil_left + dil_right) > 0 else 0.0
151
+ DFI = (dfi_left - dfi_right) / (dfi_left + dfi_right) if (dfi_left + dfi_right) > 0 else 0.0
152
+ DOL = (dol_left - dol_right) / (dol_left + dol_right) if (dol_left + dol_right) > 0 else 0.0
153
+
154
+ # Chi-squared significance tests (1 df)
155
+ from scipy.stats import chi2
156
+
157
+ def _chi2_test(left, right):
158
+ total = left + right
159
+ if total == 0:
160
+ return 0.0, 1.0
161
+ chi2_stat = (left - right) ** 2 / total
162
+ p_value = float(chi2.sf(chi2_stat, df=1))
163
+ return float(chi2_stat), p_value
164
+
165
+ dfo_chi2, dfo_p = _chi2_test(dfo_left, dfo_right)
166
+ dil_chi2, dil_p = _chi2_test(dil_left, dil_right)
167
+ dfi_chi2, dfi_p = _chi2_test(dfi_left, dfi_right)
168
+ dol_chi2, dol_p = _chi2_test(dol_left, dol_right)
169
+
170
+ # Sign pattern
171
+ def _get_sign(d_value, p_value, alpha=0.05):
172
+ if p_value >= alpha:
173
+ return '0'
174
+ return '+' if d_value > 0 else '-'
175
+
176
+ sign_pattern = (
177
+ _get_sign(DFO, dfo_p)
178
+ + _get_sign(DIL, dil_p)
179
+ + _get_sign(DFI, dfi_p)
180
+ + _get_sign(DOL, dol_p)
181
+ )
182
+
183
+ interpretation = INTERPRETATIONS.get(
184
+ sign_pattern, 'Ambiguous or complex introgression pattern'
185
+ )
186
+
187
+ # Significance stars helper
188
+ def _stars(p):
189
+ if p < 0.001:
190
+ return ' ***'
191
+ elif p < 0.01:
192
+ return ' **'
193
+ elif p < 0.05:
194
+ return ' *'
195
+ return ''
196
+
197
+ # Output
198
+ if self.json_output:
199
+ # Pattern counts excluding AAAAA (always 'AAAAA' key exists)
200
+ pattern_counts = {k: v for k, v in counts.items() if k not in _UNINFORMATIVE}
201
+
202
+ payload = {
203
+ "p1": self.p1,
204
+ "p2": self.p2,
205
+ "p3": self.p3,
206
+ "p4": self.p4,
207
+ "outgroup": self.outgroup,
208
+ "alignment_length": aln_length,
209
+ "informative_sites": informative_sites,
210
+ "pattern_counts": pattern_counts,
211
+ "dfo": {
212
+ "value": round(DFO, 4),
213
+ "left": dfo_left,
214
+ "right": dfo_right,
215
+ "chi2": round(dfo_chi2, 4),
216
+ "p_value": round(dfo_p, 6),
217
+ },
218
+ "dil": {
219
+ "value": round(DIL, 4),
220
+ "left": dil_left,
221
+ "right": dil_right,
222
+ "chi2": round(dil_chi2, 4),
223
+ "p_value": round(dil_p, 6),
224
+ },
225
+ "dfi": {
226
+ "value": round(DFI, 4),
227
+ "left": dfi_left,
228
+ "right": dfi_right,
229
+ "chi2": round(dfi_chi2, 4),
230
+ "p_value": round(dfi_p, 6),
231
+ },
232
+ "dol": {
233
+ "value": round(DOL, 4),
234
+ "left": dol_left,
235
+ "right": dol_right,
236
+ "chi2": round(dol_chi2, 4),
237
+ "p_value": round(dol_p, 6),
238
+ },
239
+ "sign_pattern": sign_pattern,
240
+ "interpretation": interpretation,
241
+ }
242
+ print_json(payload, sort_keys=False)
243
+ return
244
+
245
+ try:
246
+ print("DFOIL Test (Pease & Hahn 2015)")
247
+ print("================================")
248
+ print(f"Topology: (({self.p1}, {self.p2}), ({self.p3}, {self.p4}), {self.outgroup})")
249
+ print(f"P1: {self.p1}, P2: {self.p2}, P3: {self.p3}, P4: {self.p4}, Outgroup: {self.outgroup}")
250
+ print()
251
+ print(f"Alignment length: {aln_length}")
252
+ print(f"Informative sites: {informative_sites}")
253
+ print()
254
+ print("Site pattern counts:")
255
+ # Print informative patterns in a compact layout
256
+ informative_patterns = [p for p in PATTERNS if p not in _UNINFORMATIVE]
257
+ for i in range(0, len(informative_patterns), 4):
258
+ chunk = informative_patterns[i:i + 4]
259
+ parts = [f"{p}: {counts[p]}" for p in chunk]
260
+ print(" " + " ".join(parts))
261
+ print()
262
+ print("D-statistics:")
263
+ print(f" DFO: {DFO:.4f} (p = {dfo_p:.6f}{_stars(dfo_p)})")
264
+ print(f" DIL: {DIL:.4f} (p = {dil_p:.6f}{_stars(dil_p)})")
265
+ print(f" DFI: {DFI:.4f} (p = {dfi_p:.6f}{_stars(dfi_p)})")
266
+ print(f" DOL: {DOL:.4f} (p = {dol_p:.6f}{_stars(dol_p)})")
267
+ print()
268
+ print(f"Sign pattern: {sign_pattern}")
269
+ print(f"Interpretation: {interpretation}")
270
+ except BrokenPipeError:
271
+ pass