phykit 2.1.62__tar.gz → 2.1.64__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. {phykit-2.1.62 → phykit-2.1.64}/PKG-INFO +1 -1
  2. {phykit-2.1.62 → phykit-2.1.64}/phykit/cli_registry.py +5 -0
  3. {phykit-2.1.62 → phykit-2.1.64}/phykit/phykit.py +203 -0
  4. {phykit-2.1.62 → phykit-2.1.64}/phykit/service_factories.py +2 -0
  5. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/__init__.py +1 -0
  6. phykit-2.1.64/phykit/services/alignment/dstatistic.py +224 -0
  7. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/__init__.py +1 -0
  8. phykit-2.1.64/phykit/services/tree/trait_rate_map.py +658 -0
  9. phykit-2.1.64/phykit/version.py +1 -0
  10. {phykit-2.1.62 → phykit-2.1.64}/phykit.egg-info/PKG-INFO +1 -1
  11. {phykit-2.1.62 → phykit-2.1.64}/phykit.egg-info/SOURCES.txt +2 -0
  12. {phykit-2.1.62 → phykit-2.1.64}/phykit.egg-info/entry_points.txt +6 -0
  13. {phykit-2.1.62 → phykit-2.1.64}/setup.py +6 -0
  14. phykit-2.1.62/phykit/version.py +0 -1
  15. {phykit-2.1.62 → phykit-2.1.64}/LICENSE.md +0 -0
  16. {phykit-2.1.62 → phykit-2.1.64}/README.md +0 -0
  17. {phykit-2.1.62 → phykit-2.1.64}/phykit/__init__.py +0 -0
  18. {phykit-2.1.62 → phykit-2.1.64}/phykit/__main__.py +0 -0
  19. {phykit-2.1.62 → phykit-2.1.64}/phykit/errors.py +0 -0
  20. {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/__init__.py +0 -0
  21. {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/boolean_argument_parsing.py +0 -0
  22. {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/caching.py +0 -0
  23. {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/circular_layout.py +0 -0
  24. {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/color_annotations.py +0 -0
  25. {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/discrete_models.py +0 -0
  26. {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/files.py +0 -0
  27. {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/json_output.py +0 -0
  28. {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/parallel.py +0 -0
  29. {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/parsimony_utils.py +0 -0
  30. {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/plot_config.py +0 -0
  31. {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/quartet_utils.py +0 -0
  32. {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/stats_summary.py +0 -0
  33. {phykit-2.1.62 → phykit-2.1.64}/phykit/helpers/streaming.py +0 -0
  34. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/__init__.py +0 -0
  35. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/alignment_entropy.py +0 -0
  36. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/alignment_length.py +0 -0
  37. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/alignment_length_no_gaps.py +0 -0
  38. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/alignment_outlier_taxa.py +0 -0
  39. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/alignment_recoding.py +0 -0
  40. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/alignment_subsample.py +0 -0
  41. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/base.py +0 -0
  42. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/column_score.py +0 -0
  43. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/composition_per_taxon.py +0 -0
  44. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/compositional_bias_per_site.py +0 -0
  45. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/create_concatenation_matrix.py +0 -0
  46. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/dna_threader.py +0 -0
  47. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/evolutionary_rate_per_site.py +0 -0
  48. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/faidx.py +0 -0
  49. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/gc_content.py +0 -0
  50. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/identity_matrix.py +0 -0
  51. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/mask_alignment.py +0 -0
  52. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/occupancy_per_taxon.py +0 -0
  53. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/pairwise_identity.py +0 -0
  54. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/parsimony_informative_sites.py +0 -0
  55. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/plot_alignment_qc.py +0 -0
  56. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/rcv.py +0 -0
  57. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/rcvt.py +0 -0
  58. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/rename_fasta_entries.py +0 -0
  59. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/sum_of_pairs_score.py +0 -0
  60. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/alignment/variable_sites.py +0 -0
  61. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/base.py +0 -0
  62. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/ancestral_reconstruction.py +0 -0
  63. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/base.py +0 -0
  64. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/bipartition_support_stats.py +0 -0
  65. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/branch_length_multiplier.py +0 -0
  66. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/character_map.py +0 -0
  67. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/collapse_branches.py +0 -0
  68. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/concordance_asr.py +0 -0
  69. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/consensus_network.py +0 -0
  70. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/consensus_tree.py +0 -0
  71. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/cont_map.py +0 -0
  72. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/cophylo.py +0 -0
  73. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/covarying_evolutionary_rates.py +0 -0
  74. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/density_map.py +0 -0
  75. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/discordance_asymmetry.py +0 -0
  76. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/dvmc.py +0 -0
  77. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/evo_tempo_map.py +0 -0
  78. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/evolutionary_rate.py +0 -0
  79. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/fit_continuous.py +0 -0
  80. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/fit_discrete.py +0 -0
  81. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/hidden_paralogy_check.py +0 -0
  82. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/independent_contrasts.py +0 -0
  83. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/internal_branch_stats.py +0 -0
  84. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/internode_labeler.py +0 -0
  85. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/kf_distance.py +0 -0
  86. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/last_common_ancestor_subtree.py +0 -0
  87. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/lb_score.py +0 -0
  88. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/ltt.py +0 -0
  89. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/monophyly_check.py +0 -0
  90. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/nearest_neighbor_interchange.py +0 -0
  91. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/network_signal.py +0 -0
  92. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/ou_shift_detection.py +0 -0
  93. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/ouwie.py +0 -0
  94. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/parsimony_score.py +0 -0
  95. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/patristic_distances.py +0 -0
  96. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/phenogram.py +0 -0
  97. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/phylo_heatmap.py +0 -0
  98. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/phylogenetic_glm.py +0 -0
  99. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/phylogenetic_ordination.py +0 -0
  100. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/phylogenetic_regression.py +0 -0
  101. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/phylogenetic_signal.py +0 -0
  102. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/phylomorphospace.py +0 -0
  103. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/polytomy_test.py +0 -0
  104. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/print_tree.py +0 -0
  105. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/prune_tree.py +0 -0
  106. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/quartet_network.py +0 -0
  107. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/quartet_pie.py +0 -0
  108. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/rate_heterogeneity.py +0 -0
  109. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/relative_rate_test.py +0 -0
  110. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/rename_tree_tips.py +0 -0
  111. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/rf_distance.py +0 -0
  112. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/root_tree.py +0 -0
  113. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/saturation.py +0 -0
  114. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/spectral_discordance.py +0 -0
  115. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/spurious_sequence.py +0 -0
  116. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/stochastic_character_map.py +0 -0
  117. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/terminal_branch_stats.py +0 -0
  118. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/threshold_model.py +0 -0
  119. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/tip_labels.py +0 -0
  120. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/tip_to_tip_distance.py +0 -0
  121. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/tip_to_tip_node_distance.py +0 -0
  122. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/total_tree_length.py +0 -0
  123. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/trait_correlation.py +0 -0
  124. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/tree_space.py +0 -0
  125. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/treeness.py +0 -0
  126. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/treeness_over_rcv.py +0 -0
  127. {phykit-2.1.62 → phykit-2.1.64}/phykit/services/tree/vcv_utils.py +0 -0
  128. {phykit-2.1.62 → phykit-2.1.64}/phykit.egg-info/dependency_links.txt +0 -0
  129. {phykit-2.1.62 → phykit-2.1.64}/phykit.egg-info/requires.txt +0 -0
  130. {phykit-2.1.62 → phykit-2.1.64}/phykit.egg-info/top_level.txt +0 -0
  131. {phykit-2.1.62 → phykit-2.1.64}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: phykit
3
- Version: 2.1.62
3
+ Version: 2.1.64
4
4
  Home-page: https://github.com/jlsteenwyk/phykit
5
5
  Author: Jacob L. Steenwyk
6
6
  Author-email: jlsteenwyk@gmail.com
@@ -21,6 +21,8 @@ ALIAS_TO_HANDLER: Dict[str, str] = {
21
21
  "recode": "alignment_recoding",
22
22
  "outlier_taxa": "alignment_outlier_taxa",
23
23
  "aot": "alignment_outlier_taxa",
24
+ "dstat": "dstatistic",
25
+ "abba_baba": "dstatistic",
24
26
  "cs": "column_score",
25
27
  "comp_bias_per_site": "compositional_bias_per_site",
26
28
  "cbps": "compositional_bias_per_site",
@@ -182,6 +184,9 @@ ALIAS_TO_HANDLER: Dict[str, str] = {
182
184
  "trait_correlation": "trait_correlation",
183
185
  "trait_corr": "trait_correlation",
184
186
  "phylo_corr": "trait_correlation",
187
+ "trait_rate_map": "trait_rate_map",
188
+ "rate_map": "trait_rate_map",
189
+ "branch_rates": "trait_rate_map",
185
190
  "tree_space": "tree_space",
186
191
  "tspace": "tree_space",
187
192
  "tree_landscape": "tree_space",
@@ -119,6 +119,9 @@ class Phykit:
119
119
  - recode alignments using reduced character schemes
120
120
  alignment_subsample (alias: aln_subsample; subsample)
121
121
  - randomly subsample genes, partitions, or sites
122
+ dstatistic (alias: dstat; abba_baba)
123
+ - Patterson's D-statistic (ABBA-BABA test) for
124
+ detecting introgression/gene flow
122
125
  alignment_outlier_taxa (alias: outlier_taxa; aot)
123
126
  - identify potential outlier taxa and why they were flagged
124
127
  column_score (alias: cs)
@@ -234,6 +237,8 @@ class Phykit:
234
237
  - stochastic character mapping (SIMMAP) of discrete traits
235
238
  cont_map (alias: contmap; cmap)
236
239
  - continuous trait map (contMap) visualization on a phylogeny
240
+ trait_rate_map (alias: rate_map; branch_rates)
241
+ - per-branch evolutionary rate map for a continuous trait
237
242
  density_map (alias: densitymap; dmap)
238
243
  - density map of posterior state probabilities on a phylogeny
239
244
  cophylo (alias: tanglegram; tangle)
@@ -1943,6 +1948,73 @@ class Phykit:
1943
1948
  _add_json_argument(parser)
1944
1949
  _run_service(parser, argv, AlignmentSubsample)
1945
1950
 
1951
+ @staticmethod
1952
+ def dstatistic(argv):
1953
+ parser = _new_parser(
1954
+ description=textwrap.dedent(
1955
+ f"""\
1956
+ {help_header}
1957
+
1958
+ Compute Patterson's D-statistic (ABBA-BABA test) for
1959
+ detecting introgression or gene flow from a four-taxon
1960
+ alignment.
1961
+
1962
+ The assumed topology is (((P1, P2), P3), Outgroup).
1963
+ Under incomplete lineage sorting (ILS) alone, ABBA and
1964
+ BABA site patterns should be equally frequent. A
1965
+ significant excess of either pattern indicates gene
1966
+ flow.
1967
+
1968
+ D > 0 suggests gene flow between P2 and P3.
1969
+ D < 0 suggests gene flow between P1 and P3.
1970
+ D = 0 is consistent with ILS alone.
1971
+
1972
+ Significance is assessed via block jackknife (Green
1973
+ et al. 2010; Patterson et al. 2012).
1974
+
1975
+ Aliases:
1976
+ dstatistic, dstat, abba_baba
1977
+ Command line interfaces:
1978
+ pk_dstatistic, pk_dstat, pk_abba_baba
1979
+
1980
+ Usage:
1981
+ phykit dstatistic -a <alignment> --p1 <taxon> --p2 <taxon>
1982
+ --p3 <taxon> --outgroup <taxon>
1983
+ [--block-size 100] [--json]
1984
+
1985
+ Options
1986
+ =====================================================
1987
+ -a/--alignment FASTA alignment file
1988
+
1989
+ --p1 taxon name for P1
1990
+ (sister to P2)
1991
+
1992
+ --p2 taxon name for P2
1993
+ (sister to P1; potential
1994
+ recipient of gene flow)
1995
+
1996
+ --p3 taxon name for P3
1997
+ (donor lineage)
1998
+
1999
+ --outgroup outgroup taxon name
2000
+
2001
+ --block-size block size for jackknife
2002
+ estimation of standard
2003
+ error (default: 100)
2004
+
2005
+ --json output results as JSON
2006
+ """
2007
+ ),
2008
+ )
2009
+ parser.add_argument("-a", "--alignment", type=str, required=True, help=SUPPRESS, metavar="")
2010
+ parser.add_argument("--p1", type=str, required=True, help=SUPPRESS, metavar="")
2011
+ parser.add_argument("--p2", type=str, required=True, help=SUPPRESS, metavar="")
2012
+ parser.add_argument("--p3", type=str, required=True, help=SUPPRESS, metavar="")
2013
+ parser.add_argument("--outgroup", type=str, required=True, help=SUPPRESS, metavar="")
2014
+ parser.add_argument("--block-size", type=int, default=100, help=SUPPRESS, metavar="")
2015
+ _add_json_argument(parser)
2016
+ _run_service(parser, argv, Dstatistic)
2017
+
1946
2018
  ## Tree functions
1947
2019
  @staticmethod
1948
2020
  def parsimony_score(argv):
@@ -7353,6 +7425,129 @@ class Phykit:
7353
7425
  _add_json_argument(parser)
7354
7426
  _run_service(parser, argv, SpectralDiscordance)
7355
7427
 
7428
+ @staticmethod
7429
+ def trait_rate_map(argv):
7430
+ parser = _new_parser(
7431
+ description=textwrap.dedent(
7432
+ f"""\
7433
+ {help_header}
7434
+
7435
+ Trait Rate Map — estimate per-branch evolutionary
7436
+ rates for a continuous trait and display them as a
7437
+ branch-colored phylogram.
7438
+
7439
+ Ancestral states are reconstructed via Felsenstein's
7440
+ weighted-average method (inverse-branch-length
7441
+ weighting, postorder traversal). Per-branch rate is
7442
+ the squared standardized contrast:
7443
+ rate = (child_val - parent_val)^2 / branch_length
7444
+
7445
+ Input is a phylogenetic tree and either:
7446
+ (a) a two-column TSV (taxon<tab>value, no header), or
7447
+ (b) a multi-column TSV with header (use --trait to
7448
+ select a column)
7449
+
7450
+ Aliases:
7451
+ trait_rate_map, rate_map, branch_rates
7452
+ Command line interfaces:
7453
+ pk_trait_rate_map, pk_rate_map, pk_branch_rates
7454
+
7455
+ Usage:
7456
+ phykit trait_rate_map -t <tree> -d <trait_data> -o <output>
7457
+ [--trait <column>] [--json]
7458
+ [--fig-width <float>] [--fig-height <float>]
7459
+ [--dpi <int>] [--no-title] [--title <str>]
7460
+ [--legend-position <str>]
7461
+ [--ylabel-fontsize <float>] [--xlabel-fontsize <float>]
7462
+ [--title-fontsize <float>] [--axis-fontsize <float>]
7463
+ [--colors <str>] [--ladderize] [--cladogram] [--circular] [--color-file <file>]
7464
+
7465
+ Options
7466
+ =====================================================
7467
+ -t/--tree a tree file
7468
+
7469
+ -d/--trait_data tab-delimited trait file
7470
+ (two-column: taxon<tab>value,
7471
+ no header; or multi-column
7472
+ with header when --trait is
7473
+ used)
7474
+
7475
+ -o/--output output plot file path
7476
+ (required)
7477
+
7478
+ --trait column name to use from a
7479
+ multi-column trait file
7480
+ (if omitted, two-column
7481
+ format is expected)
7482
+
7483
+ --fig-width figure width in inches
7484
+ (auto-scaled if omitted)
7485
+
7486
+ --fig-height figure height in inches
7487
+ (auto-scaled if omitted)
7488
+
7489
+ --dpi resolution in DPI
7490
+ (default: 300)
7491
+
7492
+ --no-title hide the plot title
7493
+
7494
+ --title custom title text
7495
+
7496
+ --legend-position legend location (e.g.,
7497
+ "upper right", "none")
7498
+
7499
+ --ylabel-fontsize font size for y-axis labels;
7500
+ 0 to hide
7501
+
7502
+ --xlabel-fontsize font size for x-axis labels;
7503
+ 0 to hide
7504
+
7505
+ --title-fontsize font size for the title
7506
+
7507
+ --axis-fontsize font size for axis labels
7508
+
7509
+ --colors comma-separated colors
7510
+ (hex or named, e.g.,
7511
+ "#ff0000,blue,#00ff00")
7512
+
7513
+ --ladderize ladderize (sort) the tree
7514
+ before plotting
7515
+
7516
+ --cladogram draw cladogram (equal branch
7517
+ lengths, tips aligned)
7518
+ instead of phylogram
7519
+
7520
+ --circular draw circular (radial/fan)
7521
+ phylogram instead of
7522
+ rectangular
7523
+
7524
+ --color-file color annotation file for
7525
+ tip labels, clade ranges,
7526
+ and branch colors (iTOL-
7527
+ inspired TSV format)
7528
+
7529
+ --json optional argument to also
7530
+ output results as JSON
7531
+ """
7532
+ ),
7533
+ )
7534
+ parser.add_argument(
7535
+ "-t", "--tree", type=str, required=True, help=SUPPRESS, metavar=""
7536
+ )
7537
+ parser.add_argument(
7538
+ "-d", "--trait_data", type=str, required=True, help=SUPPRESS, metavar=""
7539
+ )
7540
+ parser.add_argument(
7541
+ "-o", "--output", type=str, required=True, help=SUPPRESS, metavar=""
7542
+ )
7543
+ parser.add_argument(
7544
+ "--trait", type=str, required=False, default=None,
7545
+ help=SUPPRESS, metavar=""
7546
+ )
7547
+ add_plot_arguments(parser)
7548
+ _add_json_argument(parser)
7549
+ _run_service(parser, argv, TraitRateMap)
7550
+
7356
7551
  @staticmethod
7357
7552
  def tree_space(argv):
7358
7553
  parser = _new_parser(
@@ -7756,6 +7951,10 @@ def alignment_subsample(argv=None):
7756
7951
  Phykit.alignment_subsample(sys.argv[1:])
7757
7952
 
7758
7953
 
7954
+ def dstatistic(argv=None):
7955
+ Phykit.dstatistic(sys.argv[1:])
7956
+
7957
+
7759
7958
  # Tree-based functions
7760
7959
  def parsimony_score(argv=None):
7761
7960
  Phykit.parsimony_score(sys.argv[1:])
@@ -8029,3 +8228,7 @@ def spectral_discordance(argv=None):
8029
8228
 
8030
8229
  def tree_space(argv=None):
8031
8230
  Phykit.tree_space(sys.argv[1:])
8231
+
8232
+
8233
+ def trait_rate_map(argv=None):
8234
+ Phykit.trait_rate_map(sys.argv[1:])
@@ -29,6 +29,7 @@ CompositionalBiasPerSite = _LazyServiceFactory("phykit.services.alignment.compos
29
29
  CompositionPerTaxon = _LazyServiceFactory("phykit.services.alignment.composition_per_taxon", "CompositionPerTaxon")
30
30
  CreateConcatenationMatrix = _LazyServiceFactory("phykit.services.alignment.create_concatenation_matrix", "CreateConcatenationMatrix")
31
31
  DNAThreader = _LazyServiceFactory("phykit.services.alignment.dna_threader", "DNAThreader")
32
+ Dstatistic = _LazyServiceFactory("phykit.services.alignment.dstatistic", "Dstatistic")
32
33
  EvolutionaryRatePerSite = _LazyServiceFactory("phykit.services.alignment.evolutionary_rate_per_site", "EvolutionaryRatePerSite")
33
34
  Faidx = _LazyServiceFactory("phykit.services.alignment.faidx", "Faidx")
34
35
  GCContent = _LazyServiceFactory("phykit.services.alignment.gc_content", "GCContent")
@@ -109,6 +110,7 @@ EvoTempoMap = _LazyServiceFactory("phykit.services.tree.evo_tempo_map", "EvoTemp
109
110
  DiscordanceAsymmetry = _LazyServiceFactory("phykit.services.tree.discordance_asymmetry", "DiscordanceAsymmetry")
110
111
  SpectralDiscordance = _LazyServiceFactory("phykit.services.tree.spectral_discordance", "SpectralDiscordance")
111
112
  TraitCorrelation = _LazyServiceFactory("phykit.services.tree.trait_correlation", "TraitCorrelation")
113
+ TraitRateMap = _LazyServiceFactory("phykit.services.tree.trait_rate_map", "TraitRateMap")
112
114
  TreeSpace = _LazyServiceFactory("phykit.services.tree.tree_space", "TreeSpace")
113
115
 
114
116
  SERVICE_FACTORIES: Dict[str, _LazyServiceFactory] = {
@@ -12,6 +12,7 @@ _EXPORTS = {
12
12
  "CompositionPerTaxon": "composition_per_taxon",
13
13
  "CreateConcatenationMatrix": "create_concatenation_matrix",
14
14
  "DNAThreader": "dna_threader",
15
+ "Dstatistic": "dstatistic",
15
16
  "EvolutionaryRatePerSite": "evolutionary_rate_per_site",
16
17
  "Faidx": "faidx",
17
18
  "GCContent": "gc_content",
@@ -0,0 +1,224 @@
1
+ """Patterson's D-statistic (ABBA-BABA test) for detecting introgression."""
2
+
3
+ from typing import Dict
4
+
5
+ import numpy as np
6
+ from Bio import SeqIO
7
+
8
+ from .base import Alignment
9
+ from ...helpers.json_output import print_json
10
+ from ...errors import PhykitUserError
11
+
12
+
13
+ class Dstatistic(Alignment):
14
+ def __init__(self, args) -> None:
15
+ parsed = self.process_args(args)
16
+ super().__init__(alignment_file_path=parsed["alignment_path"])
17
+ self.p1 = parsed["p1"]
18
+ self.p2 = parsed["p2"]
19
+ self.p3 = parsed["p3"]
20
+ self.outgroup = parsed["outgroup"]
21
+ self.block_size = parsed["block_size"]
22
+ self.json_output = parsed["json_output"]
23
+
24
+ def process_args(self, args) -> Dict[str, object]:
25
+ return dict(
26
+ alignment_path=args.alignment,
27
+ p1=args.p1,
28
+ p2=args.p2,
29
+ p3=args.p3,
30
+ outgroup=args.outgroup,
31
+ block_size=getattr(args, "block_size", 100),
32
+ json_output=getattr(args, "json", False),
33
+ )
34
+
35
+ def run(self):
36
+ # Read alignment sequences
37
+ sequences = {}
38
+ for record in SeqIO.parse(self.alignment_file_path, "fasta"):
39
+ sequences[record.id] = str(record.seq).upper()
40
+
41
+ # Validate taxa are present
42
+ required = {"p1": self.p1, "p2": self.p2, "p3": self.p3, "outgroup": self.outgroup}
43
+ for label, taxon in required.items():
44
+ if taxon not in sequences:
45
+ raise PhykitUserError(
46
+ [f"Taxon '{taxon}' ({label}) not found in alignment. "
47
+ f"Available taxa: {', '.join(sorted(sequences.keys()))}"],
48
+ code=2,
49
+ )
50
+
51
+ seq_p1 = sequences[self.p1]
52
+ seq_p2 = sequences[self.p2]
53
+ seq_p3 = sequences[self.p3]
54
+ seq_outgroup = sequences[self.outgroup]
55
+
56
+ # Validate equal lengths
57
+ lengths = {len(seq_p1), len(seq_p2), len(seq_p3), len(seq_outgroup)}
58
+ if len(lengths) != 1:
59
+ raise PhykitUserError(
60
+ ["Sequences have different lengths. All sequences must be aligned."],
61
+ code=2,
62
+ )
63
+
64
+ aln_length = len(seq_p1)
65
+ skip_chars = {"-", "N", "?", "X", "n", "x"}
66
+
67
+ # Count site patterns
68
+ abba_count = 0
69
+ baba_count = 0
70
+
71
+ for site in range(aln_length):
72
+ p1 = seq_p1[site]
73
+ p2 = seq_p2[site]
74
+ p3 = seq_p3[site]
75
+ o = seq_outgroup[site]
76
+
77
+ # Skip sites with gaps or ambiguous characters
78
+ if any(c in skip_chars for c in [p1, p2, p3, o]):
79
+ continue
80
+
81
+ # Skip sites that are not biallelic
82
+ alleles = {p1, p2, p3, o}
83
+ if len(alleles) != 2:
84
+ continue
85
+
86
+ # ABBA: P1=ancestral, P2=derived, P3=derived, O=ancestral
87
+ if p1 == o and p2 != o and p3 != o and p2 == p3:
88
+ abba_count += 1
89
+ # BABA: P1=derived, P2=ancestral, P3=derived, O=ancestral
90
+ elif p2 == o and p1 != o and p3 != o and p1 == p3:
91
+ baba_count += 1
92
+
93
+ informative_sites = abba_count + baba_count
94
+
95
+ # Compute D-statistic
96
+ if informative_sites == 0:
97
+ d_stat = 0.0
98
+ else:
99
+ d_stat = (abba_count - baba_count) / informative_sites
100
+
101
+ # Block jackknife for significance
102
+ n_blocks = aln_length // self.block_size
103
+ se = None
104
+ z_score = None
105
+ p_value = None
106
+
107
+ if n_blocks >= 2:
108
+ block_abba = np.zeros(n_blocks)
109
+ block_baba = np.zeros(n_blocks)
110
+
111
+ for site in range(aln_length):
112
+ block_idx = site // self.block_size
113
+ if block_idx >= n_blocks:
114
+ break
115
+
116
+ p1 = seq_p1[site]
117
+ p2 = seq_p2[site]
118
+ p3 = seq_p3[site]
119
+ o = seq_outgroup[site]
120
+
121
+ if any(c in skip_chars for c in [p1, p2, p3, o]):
122
+ continue
123
+ alleles = {p1, p2, p3, o}
124
+ if len(alleles) != 2:
125
+ continue
126
+
127
+ if p1 == o and p2 != o and p3 != o and p2 == p3:
128
+ block_abba[block_idx] += 1
129
+ elif p2 == o and p1 != o and p3 != o and p1 == p3:
130
+ block_baba[block_idx] += 1
131
+
132
+ total_abba = np.sum(block_abba)
133
+ total_baba = np.sum(block_baba)
134
+
135
+ jackknife_d = np.zeros(n_blocks)
136
+ for i in range(n_blocks):
137
+ loo_abba = total_abba - block_abba[i]
138
+ loo_baba = total_baba - block_baba[i]
139
+ denom = loo_abba + loo_baba
140
+ if denom > 0:
141
+ jackknife_d[i] = (loo_abba - loo_baba) / denom
142
+ else:
143
+ jackknife_d[i] = 0.0
144
+
145
+ mean_d = np.mean(jackknife_d)
146
+ se = float(np.sqrt((n_blocks - 1) / n_blocks * np.sum((jackknife_d - mean_d) ** 2)))
147
+
148
+ if se > 0:
149
+ z_score = d_stat / se
150
+ from scipy.stats import norm
151
+ p_value = float(2.0 * norm.sf(abs(z_score)))
152
+ else:
153
+ z_score = float('inf') if d_stat != 0 else 0.0
154
+ p_value = 0.0 if d_stat != 0 else 1.0
155
+
156
+ # Output
157
+ if self.json_output:
158
+ payload = {
159
+ "p1": self.p1,
160
+ "p2": self.p2,
161
+ "p3": self.p3,
162
+ "outgroup": self.outgroup,
163
+ "alignment_length": aln_length,
164
+ "informative_sites": informative_sites,
165
+ "abba_count": abba_count,
166
+ "baba_count": baba_count,
167
+ "d_statistic": round(d_stat, 4),
168
+ "block_size": self.block_size,
169
+ "n_blocks": n_blocks if n_blocks >= 2 else n_blocks,
170
+ "standard_error": round(se, 4) if se is not None else None,
171
+ "z_score": round(z_score, 2) if z_score is not None and z_score != float('inf') else z_score,
172
+ "p_value": round(p_value, 6) if p_value is not None else None,
173
+ }
174
+ print_json(payload, sort_keys=False)
175
+ return
176
+
177
+ try:
178
+ print("Patterson's D-statistic (ABBA-BABA Test)")
179
+ print("=========================================")
180
+ print(f"Topology: ((({self.p1}, {self.p2}), {self.p3}), {self.outgroup})")
181
+ print(f"P1: {self.p1}")
182
+ print(f"P2: {self.p2}")
183
+ print(f"P3: {self.p3}")
184
+ print(f"Outgroup: {self.outgroup}")
185
+ print()
186
+ print(f"Alignment length: {aln_length}")
187
+ print(f"Informative sites: {informative_sites}")
188
+ print(f"ABBA sites: {abba_count}")
189
+ print(f"BABA sites: {baba_count}")
190
+ print(f"D-statistic: {d_stat:.4f}")
191
+
192
+ if se is not None:
193
+ print(f"Block jackknife (block size: {self.block_size}):")
194
+ print(f" Standard error: {se:.4f}")
195
+ if z_score == float('inf'):
196
+ print(" Z-score: inf")
197
+ else:
198
+ print(f" Z-score: {z_score:.2f}")
199
+ print(f" p-value: {p_value:.6f}")
200
+ print()
201
+ print(f"Interpretation: {self._interpret(d_stat, p_value)}")
202
+ else:
203
+ print()
204
+ print("Not enough blocks for jackknife significance test.")
205
+ except BrokenPipeError:
206
+ pass
207
+
208
+ def _interpret(self, d_stat: float, p_value: float, alpha: float = 0.05) -> str:
209
+ if p_value < alpha:
210
+ if d_stat > 0:
211
+ return (
212
+ f"Significant excess of ABBA patterns (p < {alpha}) "
213
+ f"suggests introgression between P2 ({self.p2}) and "
214
+ f"P3 ({self.p3}). Note: D cannot determine the "
215
+ f"direction of gene flow."
216
+ )
217
+ else:
218
+ return (
219
+ f"Significant excess of BABA patterns (p < {alpha}) "
220
+ f"suggests introgression between P1 ({self.p1}) and "
221
+ f"P3 ({self.p3}). Note: D cannot determine the "
222
+ f"direction of gene flow."
223
+ )
224
+ return "No significant evidence of introgression (consistent with ILS)."
@@ -49,6 +49,7 @@ _EXPORTS = {
49
49
  "TreenessOverRCV": "treeness_over_rcv",
50
50
  "ConcordanceAsr": "concordance_asr",
51
51
  "TraitCorrelation": "trait_correlation",
52
+ "TraitRateMap": "trait_rate_map",
52
53
  "TreeSpace": "tree_space",
53
54
  }
54
55