phykit 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- phykit/__init__.py +0 -0
- phykit/__main__.py +6 -0
- phykit/helpers/__init__.py +0 -0
- phykit/helpers/boolean_argument_parsing.py +12 -0
- phykit/helpers/caching.py +201 -0
- phykit/helpers/files.py +125 -0
- phykit/helpers/parallel.py +305 -0
- phykit/helpers/stats_summary.py +64 -0
- phykit/helpers/streaming.py +152 -0
- phykit/phykit.py +2862 -0
- phykit/services/__init__.py +0 -0
- phykit/services/alignment/__init__.py +17 -0
- phykit/services/alignment/alignment_length.py +16 -0
- phykit/services/alignment/alignment_length_no_gaps.py +69 -0
- phykit/services/alignment/alignment_recoding.py +89 -0
- phykit/services/alignment/base.py +103 -0
- phykit/services/alignment/column_score.py +66 -0
- phykit/services/alignment/compositional_bias_per_site.py +98 -0
- phykit/services/alignment/create_concatenation_matrix.py +254 -0
- phykit/services/alignment/dna_threader.py +145 -0
- phykit/services/alignment/evolutionary_rate_per_site.py +85 -0
- phykit/services/alignment/faidx.py +21 -0
- phykit/services/alignment/gc_content.py +94 -0
- phykit/services/alignment/pairwise_identity.py +159 -0
- phykit/services/alignment/parsimony_informative_sites.py +81 -0
- phykit/services/alignment/rcv.py +14 -0
- phykit/services/alignment/rcvt.py +47 -0
- phykit/services/alignment/rename_fasta_entries.py +53 -0
- phykit/services/alignment/sum_of_pairs_score.py +157 -0
- phykit/services/alignment/variable_sites.py +54 -0
- phykit/services/base.py +9 -0
- phykit/services/tree/__init__.py +29 -0
- phykit/services/tree/base.py +178 -0
- phykit/services/tree/bipartition_support_stats.py +48 -0
- phykit/services/tree/branch_length_multiplier.py +37 -0
- phykit/services/tree/collapse_branches.py +27 -0
- phykit/services/tree/covarying_evolutionary_rates.py +272 -0
- phykit/services/tree/dvmc.py +37 -0
- phykit/services/tree/evolutionary_rate.py +17 -0
- phykit/services/tree/hidden_paralogy_check.py +128 -0
- phykit/services/tree/internal_branch_stats.py +77 -0
- phykit/services/tree/internode_labeler.py +33 -0
- phykit/services/tree/last_common_ancestor_subtree.py +35 -0
- phykit/services/tree/lb_score.py +196 -0
- phykit/services/tree/monophyly_check.py +106 -0
- phykit/services/tree/nearest_neighbor_interchange.py +140 -0
- phykit/services/tree/patristic_distances.py +113 -0
- phykit/services/tree/polytomy_test.py +546 -0
- phykit/services/tree/print_tree.py +28 -0
- phykit/services/tree/prune_tree.py +40 -0
- phykit/services/tree/rename_tree_tips.py +64 -0
- phykit/services/tree/rf_distance.py +136 -0
- phykit/services/tree/root_tree.py +35 -0
- phykit/services/tree/saturation.py +209 -0
- phykit/services/tree/spurious_sequence.py +75 -0
- phykit/services/tree/terminal_branch_stats.py +87 -0
- phykit/services/tree/tip_labels.py +18 -0
- phykit/services/tree/tip_to_tip_distance.py +41 -0
- phykit/services/tree/tip_to_tip_node_distance.py +41 -0
- phykit/services/tree/total_tree_length.py +25 -0
- phykit/services/tree/treeness.py +16 -0
- phykit/services/tree/treeness_over_rcv.py +40 -0
- phykit/version.py +1 -0
- phykit-2.1.0.dist-info/METADATA +150 -0
- phykit-2.1.0.dist-info/RECORD +69 -0
- phykit-2.1.0.dist-info/WHEEL +5 -0
- phykit-2.1.0.dist-info/entry_points.txt +121 -0
- phykit-2.1.0.dist-info/licenses/LICENSE.md +7 -0
- phykit-2.1.0.dist-info/top_level.txt +1 -0
phykit/phykit.py
ADDED
|
@@ -0,0 +1,2862 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import sys
|
|
5
|
+
import textwrap
|
|
6
|
+
|
|
7
|
+
from .version import __version__
|
|
8
|
+
|
|
9
|
+
from argparse import (
|
|
10
|
+
ArgumentParser,
|
|
11
|
+
SUPPRESS,
|
|
12
|
+
RawDescriptionHelpFormatter,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from .services.alignment import (
|
|
16
|
+
AlignmentLength,
|
|
17
|
+
AlignmentLengthNoGaps,
|
|
18
|
+
AlignmentRecoding,
|
|
19
|
+
ColumnScore,
|
|
20
|
+
CompositionalBiasPerSite,
|
|
21
|
+
CreateConcatenationMatrix,
|
|
22
|
+
DNAThreader,
|
|
23
|
+
EvolutionaryRatePerSite,
|
|
24
|
+
Faidx,
|
|
25
|
+
GCContent,
|
|
26
|
+
PairwiseIdentity,
|
|
27
|
+
ParsimonyInformative,
|
|
28
|
+
RelativeCompositionVariability,
|
|
29
|
+
RelativeCompositionVariabilityTaxon,
|
|
30
|
+
RenameFastaEntries,
|
|
31
|
+
SumOfPairsScore,
|
|
32
|
+
VariableSites,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
from .services.tree import (
|
|
36
|
+
BipartitionSupportStats,
|
|
37
|
+
BranchLengthMultiplier,
|
|
38
|
+
CollapseBranches,
|
|
39
|
+
CovaryingEvolutionaryRates,
|
|
40
|
+
DVMC,
|
|
41
|
+
EvolutionaryRate,
|
|
42
|
+
HiddenParalogyCheck,
|
|
43
|
+
InternalBranchStats,
|
|
44
|
+
InternodeLabeler,
|
|
45
|
+
LastCommonAncestorSubtree,
|
|
46
|
+
LBScore,
|
|
47
|
+
MonophylyCheck,
|
|
48
|
+
NearestNeighborInterchange,
|
|
49
|
+
PatristicDistances,
|
|
50
|
+
PolytomyTest,
|
|
51
|
+
PrintTree,
|
|
52
|
+
PruneTree,
|
|
53
|
+
RenameTreeTips,
|
|
54
|
+
RobinsonFouldsDistance,
|
|
55
|
+
RootTree,
|
|
56
|
+
Saturation,
|
|
57
|
+
SpuriousSequence,
|
|
58
|
+
TerminalBranchStats,
|
|
59
|
+
TipLabels,
|
|
60
|
+
TipToTipDistance,
|
|
61
|
+
TipToTipNodeDistance,
|
|
62
|
+
TotalTreeLength,
|
|
63
|
+
Treeness,
|
|
64
|
+
TreenessOverRCV,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
from .helpers.boolean_argument_parsing import str2bool
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
logger = logging.getLogger(__name__)
|
|
71
|
+
ch = logging.StreamHandler()
|
|
72
|
+
ch.setLevel(logging.INFO)
|
|
73
|
+
logger.addHandler(ch)
|
|
74
|
+
|
|
75
|
+
help_header = f"""
|
|
76
|
+
_____ _ _ _______ _______
|
|
77
|
+
| __ \| | | |/ /_ _|__ __|
|
|
78
|
+
| |__) | |__ _ _| ' / | | | |
|
|
79
|
+
| ___/| '_ \| | | | < | | | |
|
|
80
|
+
| | | | | | |_| | . \ _| |_ | |
|
|
81
|
+
|_| |_| |_|\__, |_|\_\_____| |_|
|
|
82
|
+
__/ |
|
|
83
|
+
|___/
|
|
84
|
+
|
|
85
|
+
Version: {__version__}
|
|
86
|
+
Citation: Steenwyk et al. 2021, Bioinformatics. doi: 10.1093/bioinformatics/btab096
|
|
87
|
+
Documentation link: https://jlsteenwyk.com/PhyKIT
|
|
88
|
+
Publication link: https://academic.oup.com/bioinformatics/article-abstract/37/16/2325/6131675
|
|
89
|
+
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class Phykit(object):
|
|
94
|
+
help_header = f"""
|
|
95
|
+
_____ _ _ _______ _______
|
|
96
|
+
| __ \| | | |/ /_ _|__ __|
|
|
97
|
+
| |__) | |__ _ _| ' / | | | |
|
|
98
|
+
| ___/| '_ \| | | | < | | | |
|
|
99
|
+
| | | | | | |_| | . \ _| |_ | |
|
|
100
|
+
|_| |_| |_|\__, |_|\_\_____| |_|
|
|
101
|
+
__/ |
|
|
102
|
+
|___/
|
|
103
|
+
|
|
104
|
+
Version: {__version__}
|
|
105
|
+
Citation: Steenwyk et al. 2021, Bioinformatics. doi: 10.1093/bioinformatics/btab096
|
|
106
|
+
Documentation link: https://jlsteenwyk.com/PhyKIT
|
|
107
|
+
Publication link: https://academic.oup.com/bioinformatics/article-abstract/37/16/2325/6131675
|
|
108
|
+
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
def __init__(self):
|
|
112
|
+
parser = ArgumentParser(
|
|
113
|
+
add_help=True,
|
|
114
|
+
usage=SUPPRESS,
|
|
115
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
116
|
+
description=textwrap.dedent(
|
|
117
|
+
f"""\
|
|
118
|
+
{self.help_header}
|
|
119
|
+
|
|
120
|
+
PhyKIT helps process and analyze multiple sequence alignments and phylogenies.
|
|
121
|
+
|
|
122
|
+
Generally, all functions are designed to help understand the contents of alignments
|
|
123
|
+
(e.g., gc content or the number of parsimony informative sites) and the shape
|
|
124
|
+
of trees (e.g., treeness, degree of violation of a molecular clock).
|
|
125
|
+
|
|
126
|
+
Some help messages indicate that summary statistics are reported (e.g.,
|
|
127
|
+
bipartition_support_stats). Summary statistics include mean, median, 25th percentile,
|
|
128
|
+
75th percentile, minimum, maximum, standard deviation, and variance. These functions
|
|
129
|
+
typically have a verbose option that allows users to get the underlying data
|
|
130
|
+
used to calculate summary statistics.
|
|
131
|
+
|
|
132
|
+
Usage: phykit <command> [optional command arguments]
|
|
133
|
+
|
|
134
|
+
Command specific help messages can be viewed by adding a
|
|
135
|
+
-h/--help argument after the command. For example, to see the
|
|
136
|
+
to see the help message for the command 'treeness', execute
|
|
137
|
+
"phykit treeness -h" or "phykit treeness --help".
|
|
138
|
+
|
|
139
|
+
Lastly, each function comes with aliases to save the user some
|
|
140
|
+
key strokes. For example, to get the help message for the 'treeness'
|
|
141
|
+
function, you can type "phykit tness -h". All aliases are specified
|
|
142
|
+
in parentheses after the long form of the function name.
|
|
143
|
+
|
|
144
|
+
Alignment-based commands
|
|
145
|
+
========================
|
|
146
|
+
alignment_length (alias: aln_len; al)
|
|
147
|
+
- calculates alignment length
|
|
148
|
+
alignment_length_no_gaps (alias: aln_len_no_gaps; alng)
|
|
149
|
+
- calculates alignment length after removing sites with gaps
|
|
150
|
+
alignment_recoding (alias: aln_recoding, recode)
|
|
151
|
+
- recode alignments using reduced character schemes
|
|
152
|
+
column_score (alias: cs)
|
|
153
|
+
- calculate column score between a reference and query alignment
|
|
154
|
+
compositional_bias_per_site (alias: comp_bias_per_site; cbps)
|
|
155
|
+
- detects site-wise compositional biases in an alignment
|
|
156
|
+
create_concatenation_matrix (alias: create_concat; cc)
|
|
157
|
+
- create concatenation matrix from a set of alignments
|
|
158
|
+
evolutionary_rate_per_site (alias: evo_rate_per_site; erps)
|
|
159
|
+
- estimate evolutionary per site in an alignment
|
|
160
|
+
faidx (alias: get_entry; ge)
|
|
161
|
+
- extract query fasta entry from multi-fasta file
|
|
162
|
+
gc_content (alias: gc)
|
|
163
|
+
- calculate GC content of a fasta entries or entries thereof
|
|
164
|
+
pairwise_identity (alias: pairwise_id, pi)
|
|
165
|
+
- calculates average pairwise identify among sequences in
|
|
166
|
+
an alignment file. This is a proxy for evolutionary rate
|
|
167
|
+
parsimony_informative_sites (alias: pis)
|
|
168
|
+
- calculates the number and percentage of parsimony
|
|
169
|
+
informative sites in an alignment
|
|
170
|
+
relative_composition_variability (alias: rel_comp_var, rcv)
|
|
171
|
+
- calculates relative composition variability in an alignment
|
|
172
|
+
relative_composition_variability_taxon (alias: rel_comp_var_taxon, rcvt)
|
|
173
|
+
- calculates relative composition variability of each taxa in an alignment
|
|
174
|
+
rename_fasta_entries (alias: rename_fasta)
|
|
175
|
+
- rename entries in a fasta file
|
|
176
|
+
sum_of_pairs_score (alias: sops; sop)
|
|
177
|
+
- calculate sum-of-pairs score between a reference and query alignment
|
|
178
|
+
thread_dna (alias: pal2nal; p2n)
|
|
179
|
+
- thread dna sequences over a protein alignment
|
|
180
|
+
variable_sites (alias: vs)
|
|
181
|
+
- calculates the number and percentage of variable sites
|
|
182
|
+
in an alignment
|
|
183
|
+
|
|
184
|
+
Tree-based commands
|
|
185
|
+
===================
|
|
186
|
+
bipartition_support_stats (alias: bss)
|
|
187
|
+
- calculates summary statistics for bipartition support
|
|
188
|
+
branch_length_multiplier (alias: blm)
|
|
189
|
+
- multiply all branch lengths by a specified factor
|
|
190
|
+
collapse_branches (alias: collapse; cb)
|
|
191
|
+
- collapses branches according to bipartition support
|
|
192
|
+
covarying_evolutionary_rates (alias: cover)
|
|
193
|
+
- calculates correlation in the evolutionary rate of two trees
|
|
194
|
+
degree_of_violation_of_a_molecular_clock (alias: dvmc)
|
|
195
|
+
- reports the degree of violation of the molecular clock
|
|
196
|
+
evolutionary_rate (alias: evo_rate)
|
|
197
|
+
- reports a tree-based estimation of evolutionary rate for a gene
|
|
198
|
+
hidden_paralogy_check (alias: clan_check)
|
|
199
|
+
- check for monophyly of specific clades of taxa
|
|
200
|
+
internal_branch_stats (alias: ibs)
|
|
201
|
+
- calculates summary statistics for internal branch lengths
|
|
202
|
+
internode_labeler (alias: il)
|
|
203
|
+
- create labels at internodes in a phylogeny
|
|
204
|
+
last_common_ancestor_subtree (alias: lca_subtree)
|
|
205
|
+
- get last common ancestor of a set of taxa
|
|
206
|
+
long_branch_score (alias: lb_score; lbs)
|
|
207
|
+
- calculates lb (long branch) score for taxa in a phylogeny
|
|
208
|
+
monophyly_check (alias: is_monophyletic)
|
|
209
|
+
- determines if a set of tip names are monophyletic
|
|
210
|
+
nearest_neighbor_interchange (alias: nni)
|
|
211
|
+
- make nearest neighbor interchange moves on a tree
|
|
212
|
+
patristic_distances (alias: pd)
|
|
213
|
+
- calculate all pairwise distances between tips in a tree
|
|
214
|
+
polytomy_test (alias: polyt_test; polyt; ptt)
|
|
215
|
+
- conducts a polytomy test using gene
|
|
216
|
+
support frequencies
|
|
217
|
+
print_tree (alias: print; pt)
|
|
218
|
+
- prints ascii tree
|
|
219
|
+
prune_tree (alias: prune)
|
|
220
|
+
- prune taxa from a phylogeny
|
|
221
|
+
rename_tree_tips (alias: rename_tree; rename_tips)
|
|
222
|
+
- renames tips in a phylogeny according to a file with
|
|
223
|
+
the desired new tip names
|
|
224
|
+
robinson_foulds_distance (alias: rf_distance; rf_dist; rf)
|
|
225
|
+
- calculates Robinson-Foulds distance between two trees
|
|
226
|
+
root_tree (alias: root; rt)
|
|
227
|
+
- roots tree on user-specified taxa or taxon
|
|
228
|
+
spurious_sequence (alias: spurious_seq; ss)
|
|
229
|
+
- identifies putatively spurious sequences by identifying
|
|
230
|
+
branch lengths that are atypically long
|
|
231
|
+
tip_labels (alias: tree_labels; labels; tl)
|
|
232
|
+
- print leaf names in a phylogeny
|
|
233
|
+
tip_to_tip_distance (alias: t2t_dist; t2t)
|
|
234
|
+
- calculate tip-to-tip distance in a phylogeny
|
|
235
|
+
tip_to_tip_node_distance (alias: t2t_node_dist; t2t_nd)
|
|
236
|
+
- calculate tip-to-tip node distance in a phylogeny
|
|
237
|
+
total_tree_length (alias: tree_len)
|
|
238
|
+
- calculates total tree length
|
|
239
|
+
treeness (alias: tness)
|
|
240
|
+
- reports treeness or stemminess, a measure of signal-to-
|
|
241
|
+
noise ratio in a phylogeny
|
|
242
|
+
|
|
243
|
+
Alignment- and tree-based commands
|
|
244
|
+
==================================
|
|
245
|
+
saturation (alias: sat)
|
|
246
|
+
- calculates saturation by examining the slope of
|
|
247
|
+
patristic distance and uncorrected distances
|
|
248
|
+
treeness_over_rcv (alias: toverr)
|
|
249
|
+
- calculates treeness/rcv, treeness, and rcv
|
|
250
|
+
"""
|
|
251
|
+
),
|
|
252
|
+
)
|
|
253
|
+
parser.add_argument("command", help=SUPPRESS)
|
|
254
|
+
args = parser.parse_args(sys.argv[1:2])
|
|
255
|
+
|
|
256
|
+
# if command is part of the possible commands (i.e., the long form
|
|
257
|
+
# commands, run). Otherwise, assume it is an alias and look to the
|
|
258
|
+
# run_alias function
|
|
259
|
+
try:
|
|
260
|
+
if hasattr(self, args.command):
|
|
261
|
+
getattr(self, args.command)(sys.argv[2:])
|
|
262
|
+
else:
|
|
263
|
+
self.run_alias(args.command, sys.argv[2:])
|
|
264
|
+
except SystemExit:
|
|
265
|
+
# Re-raise SystemExit as-is to preserve exit code
|
|
266
|
+
raise
|
|
267
|
+
except NameError:
|
|
268
|
+
sys.exit(2)
|
|
269
|
+
|
|
270
|
+
## Aliases
|
|
271
|
+
def run_alias(self, command, argv):
|
|
272
|
+
# version
|
|
273
|
+
if command in ["version", "v"]:
|
|
274
|
+
return self.version()
|
|
275
|
+
# Alignment aliases
|
|
276
|
+
if command in ["aln_len", "al"]:
|
|
277
|
+
return self.alignment_length(argv)
|
|
278
|
+
elif command in ["aln_len_no_gaps", "alng"]:
|
|
279
|
+
return self.alignment_length_no_gaps(argv)
|
|
280
|
+
elif command in ["aln_recoding", "recode"]:
|
|
281
|
+
return self.alignment_recoding(argv)
|
|
282
|
+
elif command == "cs":
|
|
283
|
+
return self.column_score(argv)
|
|
284
|
+
elif command in ["comp_bias_per_site", "cbps"]:
|
|
285
|
+
return self.compositional_bias_per_site(argv)
|
|
286
|
+
elif command in ["evo_rate_per_site", "erps"]:
|
|
287
|
+
return self.evolutionary_rate_per_site(argv)
|
|
288
|
+
elif command in ["get_entry", "ge"]:
|
|
289
|
+
return self.faidx(argv)
|
|
290
|
+
elif command == "gc":
|
|
291
|
+
return self.gc_content(argv)
|
|
292
|
+
elif command in ["pairwise_id", "pi"]:
|
|
293
|
+
return self.pairwise_identity(argv)
|
|
294
|
+
elif command == "pis":
|
|
295
|
+
return self.parsimony_informative_sites(argv)
|
|
296
|
+
elif command in ["rel_comp_var", "relative_composition_variability"]:
|
|
297
|
+
return self.rcv(argv)
|
|
298
|
+
elif command in ["relative_composition_variability_taxon", "rel_comp_var_taxon"]:
|
|
299
|
+
return self.rcvt(argv)
|
|
300
|
+
elif command == "rename_fasta":
|
|
301
|
+
return self.rename_fasta_entries(argv)
|
|
302
|
+
elif command in ["sum_of_pairs_score", "sops", "sop"]:
|
|
303
|
+
return self.sum_of_pairs_score(argv)
|
|
304
|
+
elif command == "vs":
|
|
305
|
+
return self.variable_sites(argv)
|
|
306
|
+
# Tree aliases
|
|
307
|
+
elif command == "bss":
|
|
308
|
+
return self.bipartition_support_stats(argv)
|
|
309
|
+
elif command == "blm":
|
|
310
|
+
return self.branch_length_multiplier(argv)
|
|
311
|
+
elif command in ["collapse", "cb"]:
|
|
312
|
+
return self.collapse_branches(argv)
|
|
313
|
+
elif command == "cover":
|
|
314
|
+
return self.covarying_evolutionary_rates(argv)
|
|
315
|
+
elif command == "degree_of_violation_of_a_molecular_clock":
|
|
316
|
+
return self.dvmc(argv)
|
|
317
|
+
elif command == "evo_rate":
|
|
318
|
+
return self.evolutionary_rate(argv)
|
|
319
|
+
elif command == "clan_check":
|
|
320
|
+
return self.hidden_paralogy_check(argv)
|
|
321
|
+
elif command == "ibs":
|
|
322
|
+
return self.internal_branch_stats(argv)
|
|
323
|
+
elif command == "il":
|
|
324
|
+
return self.internode_labeler(argv)
|
|
325
|
+
elif command in ["lca_subtree"]:
|
|
326
|
+
return self.last_common_ancestor_subtree(argv)
|
|
327
|
+
elif command in ["long_branch_score", "lbs"]:
|
|
328
|
+
return self.lb_score(argv)
|
|
329
|
+
elif command == "is_monophyletic":
|
|
330
|
+
return self.monophyly_check(argv)
|
|
331
|
+
elif command == "nni":
|
|
332
|
+
return self.nearest_neighbor_interchange(argv)
|
|
333
|
+
elif command == "pd":
|
|
334
|
+
return self.patristic_distances(argv)
|
|
335
|
+
elif command in ["polyt_test", "ptt", "polyt"]:
|
|
336
|
+
return self.polytomy_test(argv)
|
|
337
|
+
elif command in ["print", "pt"]:
|
|
338
|
+
return self.print_tree(argv)
|
|
339
|
+
elif command == "prune":
|
|
340
|
+
return self.prune_tree(argv)
|
|
341
|
+
elif command in ["rename_tree", "rename_tips"]:
|
|
342
|
+
return self.rename_tree_tips(argv)
|
|
343
|
+
elif command in ["robinson_foulds_distance", "rf_dist", "rf"]:
|
|
344
|
+
return self.rf_distance(argv)
|
|
345
|
+
elif command in ["root", "rt"]:
|
|
346
|
+
return self.root_tree(argv)
|
|
347
|
+
elif command in ["spurious_seq", "ss"]:
|
|
348
|
+
return self.spurious_sequence(argv)
|
|
349
|
+
elif command == "tbs":
|
|
350
|
+
return self.terminal_branch_stats(argv)
|
|
351
|
+
elif command in ["labels", "tree_labels", "tl"]:
|
|
352
|
+
return self.tip_labels(argv)
|
|
353
|
+
elif command in ["t2t_dist", "t2t"]:
|
|
354
|
+
return self.tip_to_tip_distance(argv)
|
|
355
|
+
elif command in ["t2t_node_dist", "t2t_nd"]:
|
|
356
|
+
return self.tip_to_tip_node_distance(argv)
|
|
357
|
+
elif command == "tree_len":
|
|
358
|
+
return self.total_tree_length(argv)
|
|
359
|
+
elif command == "tness":
|
|
360
|
+
return self.treeness(argv)
|
|
361
|
+
# Alignment- and tree-based aliases
|
|
362
|
+
elif command == "sat":
|
|
363
|
+
return self.saturation(argv)
|
|
364
|
+
elif command in ["toverr", "tor"]:
|
|
365
|
+
return self.treeness_over_rcv(argv)
|
|
366
|
+
# Helper aliases
|
|
367
|
+
elif command in ["create_concat", "cc"]:
|
|
368
|
+
return self.create_concatenation_matrix(argv)
|
|
369
|
+
elif command in ["pal2nal", "p2n"]:
|
|
370
|
+
return self.thread_dna(argv)
|
|
371
|
+
else:
|
|
372
|
+
print(textwrap.dedent(help_header))
|
|
373
|
+
print(
|
|
374
|
+
"Invalid command option. See help for a complete list of commands and aliases."
|
|
375
|
+
)
|
|
376
|
+
sys.exit(1)
|
|
377
|
+
|
|
378
|
+
## print version
|
|
379
|
+
def version(self):
|
|
380
|
+
print(
|
|
381
|
+
textwrap.dedent(
|
|
382
|
+
f"""\
|
|
383
|
+
{self.help_header}
|
|
384
|
+
"""
|
|
385
|
+
)
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
## Alignment functions
|
|
389
|
+
@staticmethod
|
|
390
|
+
def alignment_length(argv):
|
|
391
|
+
parser = ArgumentParser(
|
|
392
|
+
add_help=True,
|
|
393
|
+
usage=SUPPRESS,
|
|
394
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
395
|
+
description=textwrap.dedent(
|
|
396
|
+
f"""\
|
|
397
|
+
{help_header}
|
|
398
|
+
|
|
399
|
+
Length of an input alignment is calculated using this function.
|
|
400
|
+
|
|
401
|
+
Longer alignments are associated with strong phylogenetic signal.
|
|
402
|
+
|
|
403
|
+
Association between alignment length and phylogenetic signal
|
|
404
|
+
was determined by Shen et al., Genome Biology and Evolution (2016),
|
|
405
|
+
doi: 10.1093/gbe/evw179.
|
|
406
|
+
|
|
407
|
+
Aliases:
|
|
408
|
+
alignment_length, aln_len, al
|
|
409
|
+
Command line interfaces:
|
|
410
|
+
pk_alignment_length, pk_aln_len, pk_al
|
|
411
|
+
|
|
412
|
+
Usage:
|
|
413
|
+
phykit alignment_length <alignment>
|
|
414
|
+
|
|
415
|
+
Options
|
|
416
|
+
=====================================================
|
|
417
|
+
<alignment> first argument after
|
|
418
|
+
function name should be
|
|
419
|
+
an alignment file
|
|
420
|
+
"""
|
|
421
|
+
),
|
|
422
|
+
)
|
|
423
|
+
parser.add_argument("alignment", type=str, help=SUPPRESS)
|
|
424
|
+
args = parser.parse_args(argv)
|
|
425
|
+
AlignmentLength(args).run()
|
|
426
|
+
|
|
427
|
+
@staticmethod
|
|
428
|
+
def alignment_length_no_gaps(argv):
|
|
429
|
+
parser = ArgumentParser(
|
|
430
|
+
add_help=True,
|
|
431
|
+
usage=SUPPRESS,
|
|
432
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
433
|
+
description=textwrap.dedent(
|
|
434
|
+
f"""\
|
|
435
|
+
{help_header}
|
|
436
|
+
|
|
437
|
+
Calculate alignment length excluding sites with gaps.
|
|
438
|
+
|
|
439
|
+
Longer alignments when excluding sites with gaps is
|
|
440
|
+
associated with strong phylogenetic signal.
|
|
441
|
+
|
|
442
|
+
PhyKIT reports three tab delimited values:
|
|
443
|
+
col1: number of sites without gaps
|
|
444
|
+
col2: total number of sites
|
|
445
|
+
col3: percentage of sites without gaps
|
|
446
|
+
|
|
447
|
+
Association between alignment length when excluding sites
|
|
448
|
+
with gaps and phylogenetic signal was determined by Shen
|
|
449
|
+
et al., Genome Biology and Evolution (2016),
|
|
450
|
+
doi: 10.1093/gbe/evw179.
|
|
451
|
+
|
|
452
|
+
Aliases:
|
|
453
|
+
alignment_length_no_gaps, aln_len_no_gaps, alng
|
|
454
|
+
Command line interfaces:
|
|
455
|
+
pk_alignment_length_no_gaps, pk_aln_len_no_gaps, pk_alng
|
|
456
|
+
|
|
457
|
+
Usage:
|
|
458
|
+
phykit alignment_length_no_gaps <alignment>
|
|
459
|
+
|
|
460
|
+
Options
|
|
461
|
+
=====================================================
|
|
462
|
+
<alignment> first argument after
|
|
463
|
+
function name should be
|
|
464
|
+
an alignment file
|
|
465
|
+
"""
|
|
466
|
+
),
|
|
467
|
+
)
|
|
468
|
+
parser.add_argument("alignment", type=str, help=SUPPRESS)
|
|
469
|
+
args = parser.parse_args(argv)
|
|
470
|
+
AlignmentLengthNoGaps(args).run()
|
|
471
|
+
|
|
472
|
+
@staticmethod
|
|
473
|
+
def alignment_recoding(argv):
|
|
474
|
+
parser = ArgumentParser(
|
|
475
|
+
add_help=True,
|
|
476
|
+
usage=SUPPRESS,
|
|
477
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
478
|
+
description=textwrap.dedent(
|
|
479
|
+
f"""\
|
|
480
|
+
{help_header}
|
|
481
|
+
|
|
482
|
+
Recode alignments using reduced character states.
|
|
483
|
+
|
|
484
|
+
Alignments can be recoded using established or
|
|
485
|
+
custom recoding schemes. Recoding schemes are
|
|
486
|
+
specified using the -c/--code argument. Custom
|
|
487
|
+
recoding schemes can be used and should be formatted
|
|
488
|
+
as a two column file wherein the first column is the
|
|
489
|
+
recoded character and the second column is the character
|
|
490
|
+
in the alignment.
|
|
491
|
+
|
|
492
|
+
Aliases:
|
|
493
|
+
alignment_recoding, aln_recoding, recode
|
|
494
|
+
Command line interfaces:
|
|
495
|
+
bk_alignment_recoding, bk_aln_recoding, bk_recode
|
|
496
|
+
|
|
497
|
+
Usage:
|
|
498
|
+
phykit alignment_recoding <fasta> -c/--code <code>
|
|
499
|
+
|
|
500
|
+
Options
|
|
501
|
+
=====================================================
|
|
502
|
+
<fasta> first argument after
|
|
503
|
+
function name should be
|
|
504
|
+
a fasta file
|
|
505
|
+
|
|
506
|
+
-c/--code recoding scheme to use
|
|
507
|
+
|
|
508
|
+
Codes for which recoding scheme to use
|
|
509
|
+
=====================================================
|
|
510
|
+
RY-nucleotide
|
|
511
|
+
R = purines (i.e., A and G)
|
|
512
|
+
Y = pyrimidines (i.e., T and C)
|
|
513
|
+
|
|
514
|
+
SandR-6
|
|
515
|
+
0 = A, P, S, and T
|
|
516
|
+
1 = D, E, N, and G
|
|
517
|
+
2 = Q, K, and R
|
|
518
|
+
3 = M, I, V, and L
|
|
519
|
+
4 = W and C
|
|
520
|
+
5 = F, Y, and H
|
|
521
|
+
|
|
522
|
+
KGB-6
|
|
523
|
+
0 = A, G, P, and S
|
|
524
|
+
1 = D, E, N, Q, H, K, R, and T
|
|
525
|
+
2 = M, I, and L
|
|
526
|
+
3 = W
|
|
527
|
+
4 = F and Y
|
|
528
|
+
5 = C and V
|
|
529
|
+
|
|
530
|
+
Dayhoff-6
|
|
531
|
+
0 = A, G, P, S, and T
|
|
532
|
+
1 = D, E, N, and Q
|
|
533
|
+
2 = H, K, and R
|
|
534
|
+
3 = I, L, M, and V
|
|
535
|
+
4 = F, W, and Y
|
|
536
|
+
5 = C
|
|
537
|
+
|
|
538
|
+
Dayhoff-9
|
|
539
|
+
0 = D, E, H, N, and Q
|
|
540
|
+
1 = I, L, M, and V
|
|
541
|
+
2 = F and Y
|
|
542
|
+
3 = A, S, and T
|
|
543
|
+
4 = K and R
|
|
544
|
+
5 = G
|
|
545
|
+
6 = P
|
|
546
|
+
7 = C
|
|
547
|
+
8 = W
|
|
548
|
+
|
|
549
|
+
Dayhoff-12
|
|
550
|
+
0 = D, E, and Q
|
|
551
|
+
1 = M, L, I, and V
|
|
552
|
+
2 = F and Y
|
|
553
|
+
3 = K, H, and R
|
|
554
|
+
4 = G
|
|
555
|
+
5 = A
|
|
556
|
+
6 = P
|
|
557
|
+
7 = S
|
|
558
|
+
8 = T
|
|
559
|
+
9 = N
|
|
560
|
+
A = W
|
|
561
|
+
B = C
|
|
562
|
+
|
|
563
|
+
Dayhoff-15
|
|
564
|
+
0 = D, E, and Q
|
|
565
|
+
1 = M and L
|
|
566
|
+
2 = I and V
|
|
567
|
+
3 = F and Y
|
|
568
|
+
4 = G
|
|
569
|
+
5 = A
|
|
570
|
+
6 = P
|
|
571
|
+
7 = S
|
|
572
|
+
8 = T
|
|
573
|
+
9 = N
|
|
574
|
+
A = K
|
|
575
|
+
B = H
|
|
576
|
+
C = R
|
|
577
|
+
D = W
|
|
578
|
+
E = C
|
|
579
|
+
|
|
580
|
+
Dayhoff-18
|
|
581
|
+
0 = F and Y
|
|
582
|
+
1 = M and L
|
|
583
|
+
2 = I
|
|
584
|
+
3 = V
|
|
585
|
+
4 = G
|
|
586
|
+
5 = A
|
|
587
|
+
6 = P
|
|
588
|
+
7 = S
|
|
589
|
+
8 = T
|
|
590
|
+
9 = D
|
|
591
|
+
A = E
|
|
592
|
+
B = Q
|
|
593
|
+
C = N
|
|
594
|
+
D = K
|
|
595
|
+
E = H
|
|
596
|
+
F = R
|
|
597
|
+
G = W
|
|
598
|
+
H = C
|
|
599
|
+
""" # noqa
|
|
600
|
+
),
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
parser.add_argument("alignment", type=str, help=SUPPRESS)
|
|
604
|
+
parser.add_argument("-c", "--code", type=str, help=SUPPRESS)
|
|
605
|
+
args = parser.parse_args(argv)
|
|
606
|
+
AlignmentRecoding(args).run()
|
|
607
|
+
|
|
608
|
+
@staticmethod
|
|
609
|
+
def column_score(argv):
|
|
610
|
+
parser = ArgumentParser(
|
|
611
|
+
add_help=True,
|
|
612
|
+
usage=SUPPRESS,
|
|
613
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
614
|
+
description=textwrap.dedent(
|
|
615
|
+
f"""\
|
|
616
|
+
{help_header}
|
|
617
|
+
|
|
618
|
+
Calculates column score.
|
|
619
|
+
|
|
620
|
+
Column is an accuracy metric for a multiple alignment relative
|
|
621
|
+
to a reference alignment. It is calculated by summing the correctly
|
|
622
|
+
aligned columns over all columns in an alignment. Thus, values range
|
|
623
|
+
from 0 to 1 and higher values indicate more accurate alignments.
|
|
624
|
+
|
|
625
|
+
Aliases:
|
|
626
|
+
column_score, cs
|
|
627
|
+
Command line interfaces:
|
|
628
|
+
pk_column_score, pk_cs
|
|
629
|
+
|
|
630
|
+
Usage:
|
|
631
|
+
phykit column_score <fasta> -r/--reference <ref.aln>
|
|
632
|
+
|
|
633
|
+
Options
|
|
634
|
+
=====================================================
|
|
635
|
+
<fasta> first argument after
|
|
636
|
+
function name should be a
|
|
637
|
+
query fasta alignment file
|
|
638
|
+
to be scored for accuracy
|
|
639
|
+
|
|
640
|
+
-r/--reference reference fasta alignment to
|
|
641
|
+
compare query alignment to
|
|
642
|
+
"""
|
|
643
|
+
),
|
|
644
|
+
)
|
|
645
|
+
parser.add_argument("fasta", type=str, help=SUPPRESS)
|
|
646
|
+
parser.add_argument("-r", "--reference", type=str, help=SUPPRESS)
|
|
647
|
+
args = parser.parse_args(argv)
|
|
648
|
+
ColumnScore(args).run()
|
|
649
|
+
|
|
650
|
+
@staticmethod
|
|
651
|
+
def compositional_bias_per_site(argv):
|
|
652
|
+
parser = ArgumentParser(
|
|
653
|
+
add_help=True,
|
|
654
|
+
usage=SUPPRESS,
|
|
655
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
656
|
+
description=textwrap.dedent(
|
|
657
|
+
f"""\
|
|
658
|
+
{help_header}
|
|
659
|
+
|
|
660
|
+
Calculates compositional bias per site in an alignment.
|
|
661
|
+
|
|
662
|
+
Site-wise chi-squared tests are conducted in an alignment to
|
|
663
|
+
detect compositional biases. PhyKIT outputs four columns:
|
|
664
|
+
col 1: index in alignment
|
|
665
|
+
col 2: chi-squared statistic (higher values indicate greater bias)
|
|
666
|
+
col 3: multi-test corrected p-value (Benjamini-Hochberg false discovery rate procedure)
|
|
667
|
+
col 4: uncorrected p-value
|
|
668
|
+
|
|
669
|
+
Aliases:
|
|
670
|
+
compositional_bias_per_site; comp_bias_per_site; cbps
|
|
671
|
+
Command line interfaces:
|
|
672
|
+
pk_compositional_bias_per_site; pk_compositional_bias_per_site; pk_cbps
|
|
673
|
+
|
|
674
|
+
Usage:
|
|
675
|
+
phykit compositional_bias_per_site <alignment>
|
|
676
|
+
|
|
677
|
+
Options
|
|
678
|
+
=====================================================
|
|
679
|
+
<alignment> first argument after the
|
|
680
|
+
function name should be a
|
|
681
|
+
fasta alignment file
|
|
682
|
+
"""
|
|
683
|
+
),
|
|
684
|
+
)
|
|
685
|
+
parser.add_argument("alignment", type=str, help=SUPPRESS)
|
|
686
|
+
args = parser.parse_args(argv)
|
|
687
|
+
CompositionalBiasPerSite(args).run()
|
|
688
|
+
|
|
689
|
+
@staticmethod
|
|
690
|
+
def evolutionary_rate_per_site(argv):
|
|
691
|
+
parser = ArgumentParser(
|
|
692
|
+
add_help=True,
|
|
693
|
+
usage=SUPPRESS,
|
|
694
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
695
|
+
description=textwrap.dedent(
|
|
696
|
+
f"""\
|
|
697
|
+
{help_header}
|
|
698
|
+
|
|
699
|
+
Estimate evolutionary rate per site.
|
|
700
|
+
|
|
701
|
+
Evolutionary rate per site is one minus the sum of squared
|
|
702
|
+
frequency of different characters at a given site. Values
|
|
703
|
+
may range from 0 (slow evolving; no diversity at the given
|
|
704
|
+
site) to 1 (fast evolving; all characters appear only once).
|
|
705
|
+
|
|
706
|
+
PhyKIT prints out two columns of information.
|
|
707
|
+
col 1: site in alignment
|
|
708
|
+
col 2: estimated evolutionary rate
|
|
709
|
+
|
|
710
|
+
Aliases:
|
|
711
|
+
evolutionary_rate_per_site; evo_rate_per_site; erps
|
|
712
|
+
Command line interfaces:
|
|
713
|
+
pk_evolutionary_rate_per_site; pk_evo_rate_per_site; pk_erps
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
Usage:
|
|
717
|
+
phykit evo_rate_per_site <fasta>
|
|
718
|
+
|
|
719
|
+
Options
|
|
720
|
+
=====================================================
|
|
721
|
+
<fasta> first argument after
|
|
722
|
+
function name should be a
|
|
723
|
+
query fasta file
|
|
724
|
+
"""
|
|
725
|
+
),
|
|
726
|
+
)
|
|
727
|
+
parser.add_argument("alignment", type=str, help=SUPPRESS)
|
|
728
|
+
args = parser.parse_args(argv)
|
|
729
|
+
EvolutionaryRatePerSite(args).run()
|
|
730
|
+
|
|
731
|
+
@staticmethod
|
|
732
|
+
def faidx(argv):
|
|
733
|
+
parser = ArgumentParser(
|
|
734
|
+
add_help=True,
|
|
735
|
+
usage=SUPPRESS,
|
|
736
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
737
|
+
description=textwrap.dedent(
|
|
738
|
+
f"""\
|
|
739
|
+
{help_header}
|
|
740
|
+
|
|
741
|
+
Extracts sequence entry from fasta file.
|
|
742
|
+
|
|
743
|
+
This function works similarly to the faidx function
|
|
744
|
+
in samtools, but does not requiring an indexing step.
|
|
745
|
+
|
|
746
|
+
To obtain multiple entries, input multiple entries separated
|
|
747
|
+
by a comma (,). For example, if you want entries
|
|
748
|
+
named "seq_0" and "seq_1", the string "seq_0,seq_1"
|
|
749
|
+
should be associated with the -e argument.
|
|
750
|
+
|
|
751
|
+
Aliases:
|
|
752
|
+
faidx, get_entry; ge
|
|
753
|
+
Command line interfaces:
|
|
754
|
+
pk_faidx, pk_get_entry, pk_ge
|
|
755
|
+
|
|
756
|
+
|
|
757
|
+
Usage:
|
|
758
|
+
phykit faidx <fasta> -e/--entry <fasta entry>
|
|
759
|
+
|
|
760
|
+
Options
|
|
761
|
+
=====================================================
|
|
762
|
+
<fasta> first argument after
|
|
763
|
+
function name should be a
|
|
764
|
+
query fasta file
|
|
765
|
+
|
|
766
|
+
-e/--entry entry name to be extracted
|
|
767
|
+
from the inputted fasta file
|
|
768
|
+
"""
|
|
769
|
+
),
|
|
770
|
+
)
|
|
771
|
+
parser.add_argument("fasta", type=str, help=SUPPRESS)
|
|
772
|
+
parser.add_argument("-e", "--entry", type=str, help=SUPPRESS)
|
|
773
|
+
args = parser.parse_args(argv)
|
|
774
|
+
Faidx(args).run()
|
|
775
|
+
|
|
776
|
+
@staticmethod
|
|
777
|
+
def gc_content(argv):
|
|
778
|
+
parser = ArgumentParser(
|
|
779
|
+
add_help=True,
|
|
780
|
+
usage=SUPPRESS,
|
|
781
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
782
|
+
description=textwrap.dedent(
|
|
783
|
+
f"""\
|
|
784
|
+
{help_header}
|
|
785
|
+
|
|
786
|
+
Calculate GC content of a fasta file.
|
|
787
|
+
|
|
788
|
+
GC content is negatively correlated with phylogenetic signal.
|
|
789
|
+
|
|
790
|
+
If there are multiple entries, use the -v/--verbose option
|
|
791
|
+
to determine the GC content of each fasta entry separately.
|
|
792
|
+
|
|
793
|
+
Association between GC content and phylogenetic signal was
|
|
794
|
+
determined by Shen et al., Genome Biology and Evolution (2016),
|
|
795
|
+
doi: 10.1093/gbe/evw179.
|
|
796
|
+
|
|
797
|
+
Aliases:
|
|
798
|
+
gc_content, gc
|
|
799
|
+
Command line interfaces:
|
|
800
|
+
pk_gc_content, pk_gc
|
|
801
|
+
|
|
802
|
+
Usage:
|
|
803
|
+
phykit gc_content <fasta> [-v/--verbose]
|
|
804
|
+
|
|
805
|
+
Options
|
|
806
|
+
=====================================================
|
|
807
|
+
<fasta> first argument after
|
|
808
|
+
function name should be
|
|
809
|
+
a fasta file
|
|
810
|
+
|
|
811
|
+
-v/--verbose optional argument to print
|
|
812
|
+
the GC content of each fasta
|
|
813
|
+
entry
|
|
814
|
+
"""
|
|
815
|
+
),
|
|
816
|
+
)
|
|
817
|
+
parser.add_argument("fasta", type=str, help=SUPPRESS)
|
|
818
|
+
parser.add_argument(
|
|
819
|
+
"-v", "--verbose", action="store_true", required=False, help=SUPPRESS
|
|
820
|
+
)
|
|
821
|
+
args = parser.parse_args(argv)
|
|
822
|
+
GCContent(args).run()
|
|
823
|
+
|
|
824
|
+
@staticmethod
|
|
825
|
+
def pairwise_identity(argv):
|
|
826
|
+
parser = ArgumentParser(
|
|
827
|
+
add_help=True,
|
|
828
|
+
usage=SUPPRESS,
|
|
829
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
830
|
+
description=textwrap.dedent(
|
|
831
|
+
f"""\
|
|
832
|
+
{help_header}
|
|
833
|
+
|
|
834
|
+
Calculate the average pairwise identity among sequences.
|
|
835
|
+
|
|
836
|
+
Pairwise identities can be used as proxies for the
|
|
837
|
+
evolutionary rate of sequences.
|
|
838
|
+
|
|
839
|
+
Pairwise identity is defined as the number of identical
|
|
840
|
+
columns (including gaps) between two aligned sequences divided
|
|
841
|
+
by the number of columns in the alignment. Summary statistics
|
|
842
|
+
are reported unless used with the verbose option in which
|
|
843
|
+
all pairwise identities will be reported.
|
|
844
|
+
|
|
845
|
+
An example of pairwise identities being used as a proxy
|
|
846
|
+
for evolutionary rate can be found here: Chen et al.
|
|
847
|
+
Genome Biology and Evolution (2017), doi: 10.1093/gbe/evx147.
|
|
848
|
+
|
|
849
|
+
Aliases:
|
|
850
|
+
pairwise_identity, pairwise_id, pi
|
|
851
|
+
Command line interfaces:
|
|
852
|
+
pk_pairwise_identity, pk_pairwise_id, pk_pi
|
|
853
|
+
|
|
854
|
+
Usage:
|
|
855
|
+
phykit pairwise_identity <alignment> [-v/--verbose]
|
|
856
|
+
|
|
857
|
+
Options
|
|
858
|
+
=====================================================
|
|
859
|
+
<alignment> first argument after
|
|
860
|
+
function name should be
|
|
861
|
+
an alignment file
|
|
862
|
+
|
|
863
|
+
-v/--verbose optional argument to print
|
|
864
|
+
identity per pair
|
|
865
|
+
|
|
866
|
+
-e/--exclude_gaps if a site has a gap, ignore it
|
|
867
|
+
"""
|
|
868
|
+
),
|
|
869
|
+
)
|
|
870
|
+
parser.add_argument("alignment", type=str, help=SUPPRESS)
|
|
871
|
+
parser.add_argument(
|
|
872
|
+
"-v", "--verbose", action="store_true", required=False, help=SUPPRESS
|
|
873
|
+
)
|
|
874
|
+
parser.add_argument(
|
|
875
|
+
"-e", "--exclude_gaps", action="store_true", required=False, help=SUPPRESS
|
|
876
|
+
)
|
|
877
|
+
args = parser.parse_args(argv)
|
|
878
|
+
PairwiseIdentity(args).run()
|
|
879
|
+
|
|
880
|
+
@staticmethod
|
|
881
|
+
def parsimony_informative_sites(argv):
|
|
882
|
+
parser = ArgumentParser(
|
|
883
|
+
add_help=True,
|
|
884
|
+
usage=SUPPRESS,
|
|
885
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
886
|
+
description=textwrap.dedent(
|
|
887
|
+
f"""\
|
|
888
|
+
{help_header}
|
|
889
|
+
|
|
890
|
+
Calculate the number and percentage of parismony
|
|
891
|
+
informative sites in an alignment.
|
|
892
|
+
|
|
893
|
+
The number of parsimony informative sites in an alignment
|
|
894
|
+
is associated with strong phylogenetic signal.
|
|
895
|
+
|
|
896
|
+
PhyKIT reports three tab delimited values:
|
|
897
|
+
col1: number of parsimony informative sites
|
|
898
|
+
col2: total number of sites
|
|
899
|
+
col3: percentage of parsimony informative sites
|
|
900
|
+
|
|
901
|
+
Association between the number of parsimony informative
|
|
902
|
+
sites and phylogenetic signal was determined by Shen
|
|
903
|
+
et al., Genome Biology and Evolution (2016),
|
|
904
|
+
doi: 10.1093/gbe/evw179 and Steenwyk et al., bioRxiv
|
|
905
|
+
(2020), doi: 10.1101/2020.06.08.140384.
|
|
906
|
+
|
|
907
|
+
Aliases:
|
|
908
|
+
parsimony_informative_sites, pis
|
|
909
|
+
Command line interfaces:
|
|
910
|
+
pk_parsimony_informative_sites, pk_pis
|
|
911
|
+
|
|
912
|
+
Usage:
|
|
913
|
+
phykit parsimony_informative_sites <alignment>
|
|
914
|
+
|
|
915
|
+
Options
|
|
916
|
+
=====================================================
|
|
917
|
+
<alignment> first argument after
|
|
918
|
+
function name should be
|
|
919
|
+
an alignment file
|
|
920
|
+
"""
|
|
921
|
+
),
|
|
922
|
+
)
|
|
923
|
+
parser.add_argument("alignment", type=str, help=SUPPRESS)
|
|
924
|
+
args = parser.parse_args(argv)
|
|
925
|
+
ParsimonyInformative(args).run()
|
|
926
|
+
|
|
927
|
+
@staticmethod
|
|
928
|
+
def rcv(argv):
|
|
929
|
+
parser = ArgumentParser(
|
|
930
|
+
add_help=True,
|
|
931
|
+
usage=SUPPRESS,
|
|
932
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
933
|
+
description=textwrap.dedent(
|
|
934
|
+
f"""\
|
|
935
|
+
{help_header}
|
|
936
|
+
|
|
937
|
+
Calculate RCV (relative composition variability) for an alignment.
|
|
938
|
+
|
|
939
|
+
Lower RCV values are thought to be desirable because they represent
|
|
940
|
+
a lower composition bias in an alignment. Statistically, RCV describes
|
|
941
|
+
the average variability in sequence composition among taxa.
|
|
942
|
+
|
|
943
|
+
RCV is calculated following Phillips and Penny, Molecular Phylogenetics
|
|
944
|
+
and Evolution (2003), doi: 10.1016/S1055-7903(03)00057-5.
|
|
945
|
+
|
|
946
|
+
Aliases:
|
|
947
|
+
relative_composition_variability, rel_comp_var, rcv
|
|
948
|
+
Command line interfaces:
|
|
949
|
+
pk_relative_composition_variability, pk_rel_comp_var, pk_rcv
|
|
950
|
+
|
|
951
|
+
Usage:
|
|
952
|
+
phykit relative_composition_variability <alignment>
|
|
953
|
+
|
|
954
|
+
Options
|
|
955
|
+
=====================================================
|
|
956
|
+
<alignment> first argument after
|
|
957
|
+
function name should be
|
|
958
|
+
an alignment file
|
|
959
|
+
"""
|
|
960
|
+
),
|
|
961
|
+
)
|
|
962
|
+
parser.add_argument("alignment", type=str, help=SUPPRESS)
|
|
963
|
+
args = parser.parse_args(argv)
|
|
964
|
+
RelativeCompositionVariability(args).run()
|
|
965
|
+
|
|
966
|
+
@staticmethod
|
|
967
|
+
def rcvt(argv):
|
|
968
|
+
parser = ArgumentParser(
|
|
969
|
+
add_help=True,
|
|
970
|
+
usage=SUPPRESS,
|
|
971
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
972
|
+
description=textwrap.dedent(
|
|
973
|
+
f"""\
|
|
974
|
+
{help_header}
|
|
975
|
+
|
|
976
|
+
Calculate RCVT (relative composition variability, taxon) for an alignment.
|
|
977
|
+
|
|
978
|
+
RCVT is the relative composition variability metric for individual taxa.
|
|
979
|
+
This facilitates identifying specific taxa that may have compositional
|
|
980
|
+
biases. Lower RCVT values are more desirable because they indicate
|
|
981
|
+
a lower composition bias for a given taxon in an alignment.
|
|
982
|
+
|
|
983
|
+
Aliases:
|
|
984
|
+
relative_composition_variability_taxon, rel_comp_var_taxon, rcvt
|
|
985
|
+
Command line interfaces:
|
|
986
|
+
pk_relative_composition_variability_taxon, pk_rel_comp_var_taxon, pk_rcvt
|
|
987
|
+
|
|
988
|
+
Usage:
|
|
989
|
+
phykit relative_composition_variability_taxon <alignment>
|
|
990
|
+
|
|
991
|
+
Options
|
|
992
|
+
=====================================================
|
|
993
|
+
<alignment> first argument after
|
|
994
|
+
function name should be
|
|
995
|
+
an alignment file
|
|
996
|
+
"""
|
|
997
|
+
),
|
|
998
|
+
)
|
|
999
|
+
parser.add_argument("alignment", type=str, help=SUPPRESS)
|
|
1000
|
+
args = parser.parse_args(argv)
|
|
1001
|
+
RelativeCompositionVariabilityTaxon(args).run()
|
|
1002
|
+
|
|
1003
|
+
@staticmethod
|
|
1004
|
+
def rename_fasta_entries(argv):
|
|
1005
|
+
parser = ArgumentParser(
|
|
1006
|
+
add_help=True,
|
|
1007
|
+
usage=SUPPRESS,
|
|
1008
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1009
|
+
description=textwrap.dedent(
|
|
1010
|
+
f"""\
|
|
1011
|
+
{help_header}
|
|
1012
|
+
|
|
1013
|
+
Renames fasta entries.
|
|
1014
|
+
|
|
1015
|
+
Renaming fasta entries will follow the scheme of a tab-delimited
|
|
1016
|
+
file wherein the first column is the current fasta entry name and
|
|
1017
|
+
the second column is the new fasta entry name in the resulting
|
|
1018
|
+
output alignment.
|
|
1019
|
+
|
|
1020
|
+
Aliases:
|
|
1021
|
+
rename_fasta_entries, rename_fasta
|
|
1022
|
+
Command line interfaces:
|
|
1023
|
+
pk_rename_fasta_entries, pk_rename_fasta
|
|
1024
|
+
|
|
1025
|
+
Usage:
|
|
1026
|
+
phykit rename_fasta_entries <fasta> -i/--idmap <idmap>
|
|
1027
|
+
[-o/--output <output_file>]
|
|
1028
|
+
|
|
1029
|
+
Options
|
|
1030
|
+
=====================================================
|
|
1031
|
+
<fasta> first argument after
|
|
1032
|
+
function name should be
|
|
1033
|
+
a fasta file
|
|
1034
|
+
|
|
1035
|
+
-i/--idmap identifier map of current FASTA
|
|
1036
|
+
names (col1) and desired FASTA
|
|
1037
|
+
names (col2)
|
|
1038
|
+
|
|
1039
|
+
-o/--output optional argument to write
|
|
1040
|
+
the renamed fasta file to.
|
|
1041
|
+
Default output has the same
|
|
1042
|
+
name as the input file with
|
|
1043
|
+
the suffix ".renamed.fa" added
|
|
1044
|
+
to it.
|
|
1045
|
+
"""
|
|
1046
|
+
),
|
|
1047
|
+
)
|
|
1048
|
+
parser.add_argument("fasta", type=str, help=SUPPRESS)
|
|
1049
|
+
parser.add_argument("-i", "--idmap", type=str, help=SUPPRESS)
|
|
1050
|
+
parser.add_argument("-o", "--output", type=str, required=False, help=SUPPRESS)
|
|
1051
|
+
args = parser.parse_args(argv)
|
|
1052
|
+
RenameFastaEntries(args).run()
|
|
1053
|
+
|
|
1054
|
+
@staticmethod
|
|
1055
|
+
def sum_of_pairs_score(argv):
|
|
1056
|
+
parser = ArgumentParser(
|
|
1057
|
+
add_help=True,
|
|
1058
|
+
usage=SUPPRESS,
|
|
1059
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1060
|
+
description=textwrap.dedent(
|
|
1061
|
+
f"""\
|
|
1062
|
+
{help_header}
|
|
1063
|
+
|
|
1064
|
+
Calculates sum-of-pairs score.
|
|
1065
|
+
|
|
1066
|
+
Sum-of-pairs is an accuracy metric for a multiple alignment relative
|
|
1067
|
+
to a reference alignment. It is calculated by summing the correctly
|
|
1068
|
+
aligned residue pairs over all pairs of sequences. Thus, values range
|
|
1069
|
+
from 0 to 1 and higher values indicate more accurate alignments.
|
|
1070
|
+
|
|
1071
|
+
Aliases:
|
|
1072
|
+
sum_of_pairs_score, sops, sop
|
|
1073
|
+
Command line interfaces:
|
|
1074
|
+
pk_sum_of_pairs_score, pk_sops, pk_sop
|
|
1075
|
+
|
|
1076
|
+
Usage:
|
|
1077
|
+
phykit sum_of_pairs_score <fasta> -r/--reference <ref.aln>
|
|
1078
|
+
|
|
1079
|
+
Options
|
|
1080
|
+
=====================================================
|
|
1081
|
+
<fasta> first argument after
|
|
1082
|
+
function name should be a
|
|
1083
|
+
query fasta alignment file
|
|
1084
|
+
to be scored for accuracy
|
|
1085
|
+
|
|
1086
|
+
-r/--reference reference fasta alignment to
|
|
1087
|
+
compare query alignment to
|
|
1088
|
+
"""
|
|
1089
|
+
),
|
|
1090
|
+
)
|
|
1091
|
+
parser.add_argument("fasta", type=str, help=SUPPRESS)
|
|
1092
|
+
parser.add_argument("-r", "--reference", type=str, help=SUPPRESS)
|
|
1093
|
+
args = parser.parse_args(argv)
|
|
1094
|
+
SumOfPairsScore(args).run()
|
|
1095
|
+
|
|
1096
|
+
@staticmethod
|
|
1097
|
+
def variable_sites(argv):
|
|
1098
|
+
parser = ArgumentParser(
|
|
1099
|
+
add_help=True,
|
|
1100
|
+
usage=SUPPRESS,
|
|
1101
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1102
|
+
description=textwrap.dedent(
|
|
1103
|
+
f"""\
|
|
1104
|
+
{help_header}
|
|
1105
|
+
|
|
1106
|
+
Calculate the number of variable sites in an alignment.
|
|
1107
|
+
|
|
1108
|
+
The number of variable sites in an alignment is
|
|
1109
|
+
associated with strong phylogenetic signal.
|
|
1110
|
+
|
|
1111
|
+
PhyKIT reports three tab delimited values:
|
|
1112
|
+
col1: number of variable sites
|
|
1113
|
+
col2: total number of sites
|
|
1114
|
+
col3: percentage of variable sites
|
|
1115
|
+
|
|
1116
|
+
Association between the number of variable sites and
|
|
1117
|
+
phylogenetic signal was determined by Shen et al.,
|
|
1118
|
+
Genome Biology and Evolution (2016),
|
|
1119
|
+
doi: 10.1093/gbe/evw179.
|
|
1120
|
+
|
|
1121
|
+
Aliases:
|
|
1122
|
+
variable_sites, vs
|
|
1123
|
+
Command line interfaces:
|
|
1124
|
+
pk_variable_sites, pk_vs
|
|
1125
|
+
|
|
1126
|
+
Usage:
|
|
1127
|
+
phykit variable_sites <alignment>
|
|
1128
|
+
|
|
1129
|
+
Options
|
|
1130
|
+
=====================================================
|
|
1131
|
+
<alignment> first argument after
|
|
1132
|
+
function name should be
|
|
1133
|
+
an alignment file
|
|
1134
|
+
"""
|
|
1135
|
+
),
|
|
1136
|
+
)
|
|
1137
|
+
parser.add_argument("alignment", type=str, help=SUPPRESS)
|
|
1138
|
+
args = parser.parse_args(argv)
|
|
1139
|
+
VariableSites(args).run()
|
|
1140
|
+
|
|
1141
|
+
## Tree functions
|
|
1142
|
+
@staticmethod
|
|
1143
|
+
def bipartition_support_stats(argv):
|
|
1144
|
+
parser = ArgumentParser(
|
|
1145
|
+
add_help=True,
|
|
1146
|
+
usage=SUPPRESS,
|
|
1147
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1148
|
+
description=textwrap.dedent(
|
|
1149
|
+
f"""\
|
|
1150
|
+
{help_header}
|
|
1151
|
+
Calculate summary statistics for bipartition support.
|
|
1152
|
+
|
|
1153
|
+
High bipartition support values are thought to be desirable because
|
|
1154
|
+
they are indicative of greater certainty in tree topology.
|
|
1155
|
+
|
|
1156
|
+
To obtain all bipartition support values, use the -v/--verbose option.
|
|
1157
|
+
In addition to support values for each node, the names of all terminal
|
|
1158
|
+
branches tips are also included. Each terminal branch name is separated
|
|
1159
|
+
with a semi-colon (;).
|
|
1160
|
+
|
|
1161
|
+
Aliases:
|
|
1162
|
+
bipartition_support_stats, bss
|
|
1163
|
+
Command line interfaces:
|
|
1164
|
+
pk_bipartition_support_stats, pk_bss
|
|
1165
|
+
|
|
1166
|
+
Usage:
|
|
1167
|
+
phykit bipartition_support_stats <tree> [-v/--verbose]
|
|
1168
|
+
|
|
1169
|
+
Options
|
|
1170
|
+
=====================================================
|
|
1171
|
+
<tree> first argument after
|
|
1172
|
+
function name should be
|
|
1173
|
+
a tree file
|
|
1174
|
+
|
|
1175
|
+
-v/--verbose optional argument to print
|
|
1176
|
+
all bipartition support
|
|
1177
|
+
values
|
|
1178
|
+
"""
|
|
1179
|
+
),
|
|
1180
|
+
)
|
|
1181
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
1182
|
+
parser.add_argument(
|
|
1183
|
+
"-v", "--verbose", action="store_true", required=False, help=SUPPRESS
|
|
1184
|
+
)
|
|
1185
|
+
args = parser.parse_args(argv)
|
|
1186
|
+
BipartitionSupportStats(args).run()
|
|
1187
|
+
|
|
1188
|
+
@staticmethod
|
|
1189
|
+
def branch_length_multiplier(argv):
|
|
1190
|
+
parser = ArgumentParser(
|
|
1191
|
+
add_help=True,
|
|
1192
|
+
usage=SUPPRESS,
|
|
1193
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1194
|
+
description=textwrap.dedent(
|
|
1195
|
+
f"""\
|
|
1196
|
+
{help_header}
|
|
1197
|
+
|
|
1198
|
+
Multiply branch lengths in a phylogeny by a given factor.
|
|
1199
|
+
|
|
1200
|
+
This can help modify reference trees when conducting simulations
|
|
1201
|
+
or other analyses.
|
|
1202
|
+
|
|
1203
|
+
Alias:
|
|
1204
|
+
branch_length_multiplier, blm
|
|
1205
|
+
Command line interfaces:
|
|
1206
|
+
pk_branch_length_multiplier, pk_blm
|
|
1207
|
+
|
|
1208
|
+
Usage:
|
|
1209
|
+
phykit branch_length_multiplier <tree> -f n [-o/--output <output_file>]
|
|
1210
|
+
|
|
1211
|
+
Options
|
|
1212
|
+
=====================================================
|
|
1213
|
+
<tree> first argument after
|
|
1214
|
+
function name should be
|
|
1215
|
+
an tree file
|
|
1216
|
+
|
|
1217
|
+
-f/--factor factor to multiply branch
|
|
1218
|
+
lengths by
|
|
1219
|
+
|
|
1220
|
+
-o/--output optional argument to name
|
|
1221
|
+
the outputted tree file.
|
|
1222
|
+
Default output will have
|
|
1223
|
+
the same name as the input
|
|
1224
|
+
file but with the suffix
|
|
1225
|
+
".factor_(n).tre"
|
|
1226
|
+
"""
|
|
1227
|
+
),
|
|
1228
|
+
)
|
|
1229
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
1230
|
+
parser.add_argument("-f", "--factor", type=float, required=True, help=SUPPRESS)
|
|
1231
|
+
parser.add_argument("-o", "--output", type=str, required=False, help=SUPPRESS)
|
|
1232
|
+
args = parser.parse_args(argv)
|
|
1233
|
+
BranchLengthMultiplier(args).run()
|
|
1234
|
+
|
|
1235
|
+
@staticmethod
|
|
1236
|
+
def collapse_branches(argv):
|
|
1237
|
+
parser = ArgumentParser(
|
|
1238
|
+
add_help=True,
|
|
1239
|
+
usage=SUPPRESS,
|
|
1240
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1241
|
+
description=textwrap.dedent(
|
|
1242
|
+
f"""\
|
|
1243
|
+
{help_header}
|
|
1244
|
+
|
|
1245
|
+
Collapse branches on a phylogeny according to bipartition support.
|
|
1246
|
+
Bipartitions will be collapsed if they are less than the user specified
|
|
1247
|
+
value.
|
|
1248
|
+
|
|
1249
|
+
Aliases:
|
|
1250
|
+
collapse_branches, collapse, cb
|
|
1251
|
+
Command line interfaces:
|
|
1252
|
+
pk_collapse_branches, pk_collapse, pk_cb
|
|
1253
|
+
|
|
1254
|
+
Usage:
|
|
1255
|
+
phykit collapse_branches <tree> -s/--support n [-o/--output <output_file>]
|
|
1256
|
+
|
|
1257
|
+
Options
|
|
1258
|
+
=====================================================
|
|
1259
|
+
<tree> first argument after
|
|
1260
|
+
function name should be
|
|
1261
|
+
an tree file
|
|
1262
|
+
|
|
1263
|
+
-s/--support bipartitions with support less
|
|
1264
|
+
than this value will be collapsed
|
|
1265
|
+
|
|
1266
|
+
-o/--output optional argument to name
|
|
1267
|
+
the outputted tree file.
|
|
1268
|
+
Default output will have
|
|
1269
|
+
the same name as the input
|
|
1270
|
+
file but with the suffix
|
|
1271
|
+
".collapsed_(support).tre"
|
|
1272
|
+
|
|
1273
|
+
"""
|
|
1274
|
+
),
|
|
1275
|
+
)
|
|
1276
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
1277
|
+
parser.add_argument("-s", "--support", type=float, required=True, help=SUPPRESS)
|
|
1278
|
+
parser.add_argument("-o", "--output", type=str, required=False, help=SUPPRESS)
|
|
1279
|
+
args = parser.parse_args(argv)
|
|
1280
|
+
CollapseBranches(args).run()
|
|
1281
|
+
|
|
1282
|
+
@staticmethod
|
|
1283
|
+
def covarying_evolutionary_rates(argv):
|
|
1284
|
+
parser = ArgumentParser(
|
|
1285
|
+
add_help=True,
|
|
1286
|
+
usage=SUPPRESS,
|
|
1287
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1288
|
+
description=textwrap.dedent(
|
|
1289
|
+
f"""\
|
|
1290
|
+
{help_header}
|
|
1291
|
+
|
|
1292
|
+
Determine if two genes have a signature of covariation with one another.
|
|
1293
|
+
|
|
1294
|
+
Genes that have covarying evolutionary histories tend to have
|
|
1295
|
+
similar functions and expression levels.
|
|
1296
|
+
|
|
1297
|
+
Input two phylogenies and calculate the correlation among relative
|
|
1298
|
+
evolutionary rates between the two phylogenies. The two input trees
|
|
1299
|
+
do not have to have the same taxa. This function will first prune both
|
|
1300
|
+
trees to have the same tips. To transform branch lengths into relative
|
|
1301
|
+
rates, PhyKIT uses the putative species tree's branch lengths, which is
|
|
1302
|
+
inputted by the user. As recommended by the original method developers,
|
|
1303
|
+
outlier branche lengths are removed. Outlier branches have a relative
|
|
1304
|
+
evolutionary rate greater than five.
|
|
1305
|
+
|
|
1306
|
+
PhyKIT reports two tab delimited values:
|
|
1307
|
+
col1: correlation coefficient
|
|
1308
|
+
col2: p-value
|
|
1309
|
+
|
|
1310
|
+
Method is empirically evaluated by Clark et al., Genome Research
|
|
1311
|
+
(2012), doi: 10.1101/gr.132647.111. Normalization method using a
|
|
1312
|
+
species tree follows Sato et al., Bioinformatics (2005), doi:
|
|
1313
|
+
10.1093/bioinformatics/bti564.
|
|
1314
|
+
|
|
1315
|
+
|
|
1316
|
+
Aliases:
|
|
1317
|
+
covarying_evolutionary_rates, cover
|
|
1318
|
+
Command line interfaces:
|
|
1319
|
+
pk_covarying_evolutionary_rates, pk_cover
|
|
1320
|
+
|
|
1321
|
+
Usage:
|
|
1322
|
+
phykit covarying_evolutionary_rates <tree_file_zero> <tree_file_one>
|
|
1323
|
+
-r/--reference <reference_tree_file> [-v/--verbose]
|
|
1324
|
+
|
|
1325
|
+
Options
|
|
1326
|
+
=====================================================
|
|
1327
|
+
<tree_file_zero> first argument after
|
|
1328
|
+
function name should be
|
|
1329
|
+
an alignment file
|
|
1330
|
+
|
|
1331
|
+
<tree_file_one> first argument after
|
|
1332
|
+
function name should be
|
|
1333
|
+
an alignment file
|
|
1334
|
+
|
|
1335
|
+
-r/--reference a tree to correct branch
|
|
1336
|
+
lengths by in the two input
|
|
1337
|
+
trees. Typically, this is a
|
|
1338
|
+
putative species tree.
|
|
1339
|
+
|
|
1340
|
+
-v/--verbose print out corrected branch
|
|
1341
|
+
lengths shared between
|
|
1342
|
+
tree 0 and tree 1
|
|
1343
|
+
"""
|
|
1344
|
+
),
|
|
1345
|
+
)
|
|
1346
|
+
parser.add_argument("tree_zero", type=str, help=SUPPRESS)
|
|
1347
|
+
parser.add_argument("tree_one", type=str, help=SUPPRESS)
|
|
1348
|
+
parser.add_argument(
|
|
1349
|
+
"-r", "--reference", type=str, required=True, help=SUPPRESS, metavar=""
|
|
1350
|
+
)
|
|
1351
|
+
parser.add_argument(
|
|
1352
|
+
"-v", "--verbose", action="store_true", required=False, help=SUPPRESS
|
|
1353
|
+
)
|
|
1354
|
+
args = parser.parse_args(argv)
|
|
1355
|
+
CovaryingEvolutionaryRates(args).run()
|
|
1356
|
+
|
|
1357
|
+
@staticmethod
|
|
1358
|
+
def dvmc(argv):
|
|
1359
|
+
parser = ArgumentParser(
|
|
1360
|
+
add_help=True,
|
|
1361
|
+
usage=SUPPRESS,
|
|
1362
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1363
|
+
description=textwrap.dedent(
|
|
1364
|
+
f"""\
|
|
1365
|
+
{help_header}
|
|
1366
|
+
|
|
1367
|
+
Calculate degree of violation of a molecular clock (or DVMC) in a phylogeny.
|
|
1368
|
+
|
|
1369
|
+
Lower DVMC values are thought to be desirable because they are indicative
|
|
1370
|
+
of a lower degree of violation in the molecular clock assumption.
|
|
1371
|
+
|
|
1372
|
+
Typically, outgroup taxa are not included in molecular clock analysis. Thus,
|
|
1373
|
+
prior to calculating DVMC from a single gene tree, users may want to prune
|
|
1374
|
+
outgroup taxa from the phylogeny. To prune tips from a phylogeny, see the
|
|
1375
|
+
prune_tree function.
|
|
1376
|
+
|
|
1377
|
+
Calculate DVMC in a tree following Liu et al., PNAS (2017), doi: 10.1073/pnas.1616744114.
|
|
1378
|
+
|
|
1379
|
+
Aliases:
|
|
1380
|
+
degree_of_violation_of_a_molecular_clock, dvmc
|
|
1381
|
+
Command line interfaces:
|
|
1382
|
+
pk_degree_of_violation_of_a_molecular_clock, pk_dvmc
|
|
1383
|
+
|
|
1384
|
+
Usage:
|
|
1385
|
+
phykit degree_of_violation_of_a_molecular_clock <tree>
|
|
1386
|
+
|
|
1387
|
+
Options
|
|
1388
|
+
=====================================================
|
|
1389
|
+
<tree> first argument after
|
|
1390
|
+
function name should be
|
|
1391
|
+
a tree file
|
|
1392
|
+
"""
|
|
1393
|
+
),
|
|
1394
|
+
)
|
|
1395
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
1396
|
+
args = parser.parse_args(argv)
|
|
1397
|
+
DVMC(args).run()
|
|
1398
|
+
|
|
1399
|
+
@staticmethod
|
|
1400
|
+
def evolutionary_rate(argv):
|
|
1401
|
+
parser = ArgumentParser(
|
|
1402
|
+
add_help=True,
|
|
1403
|
+
usage=SUPPRESS,
|
|
1404
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1405
|
+
description=textwrap.dedent(
|
|
1406
|
+
f"""\
|
|
1407
|
+
{help_header}
|
|
1408
|
+
Calculate a tree-based estimation of the evolutionary rate of a gene.
|
|
1409
|
+
|
|
1410
|
+
Evolutionary rate is the total tree length divided by the number
|
|
1411
|
+
of terminals.
|
|
1412
|
+
|
|
1413
|
+
Calculate evolutionary rate following Telford et al., Proceedings
|
|
1414
|
+
of the Royal Society B (2014).
|
|
1415
|
+
|
|
1416
|
+
Aliases:
|
|
1417
|
+
evolutionary_rate, evo_rate
|
|
1418
|
+
Command line interfaces:
|
|
1419
|
+
pk_evolutionary_rate, pk_evo_rate
|
|
1420
|
+
|
|
1421
|
+
Usage:
|
|
1422
|
+
phykit evolutionary_rate <tree>
|
|
1423
|
+
|
|
1424
|
+
Options
|
|
1425
|
+
=====================================================
|
|
1426
|
+
<tree> first argument after
|
|
1427
|
+
function name should be
|
|
1428
|
+
a tree file
|
|
1429
|
+
"""
|
|
1430
|
+
),
|
|
1431
|
+
)
|
|
1432
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
1433
|
+
args = parser.parse_args(argv)
|
|
1434
|
+
EvolutionaryRate(args).run()
|
|
1435
|
+
|
|
1436
|
+
@staticmethod
|
|
1437
|
+
def hidden_paralogy_check(argv):
|
|
1438
|
+
parser = ArgumentParser(
|
|
1439
|
+
add_help=True,
|
|
1440
|
+
usage=SUPPRESS,
|
|
1441
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1442
|
+
description=textwrap.dedent(
|
|
1443
|
+
f"""\
|
|
1444
|
+
{help_header}
|
|
1445
|
+
Scan tree for evidence of hidden paralogy.
|
|
1446
|
+
|
|
1447
|
+
This analysis can be used to identify hidden paralogy.
|
|
1448
|
+
Specifically, this method will examine if a set of
|
|
1449
|
+
well known monophyletic taxa are, in fact, monophyletic.
|
|
1450
|
+
If they are not, the evolutionary history of the gene may
|
|
1451
|
+
be subject to hidden paralogy. This analysis is typically
|
|
1452
|
+
done with single-copy orthologous genes.
|
|
1453
|
+
|
|
1454
|
+
Requires a clade file, which species which monophyletic
|
|
1455
|
+
lineages to check for. Multiple monophyletic
|
|
1456
|
+
lineages can be specified. Each lineage should
|
|
1457
|
+
be specified on a single line and each tip name
|
|
1458
|
+
(or taxon name) should be separated by a space.
|
|
1459
|
+
For example, if it is anticipated that tips
|
|
1460
|
+
"A", "B", and "C" are monophyletic and "D",
|
|
1461
|
+
"E", and "F" are expected to be monophyletic, the
|
|
1462
|
+
clade file should be formatted as follows:
|
|
1463
|
+
"
|
|
1464
|
+
A B C
|
|
1465
|
+
D E F
|
|
1466
|
+
"
|
|
1467
|
+
Tip names not present in the tree will not be considered
|
|
1468
|
+
when assessing hidden paralogy.
|
|
1469
|
+
|
|
1470
|
+
The output will report if the specified taxa were monophyletic
|
|
1471
|
+
or not. The number of rows will reflect how many groups of taxa
|
|
1472
|
+
were checked for monophyly. For example, if there were three
|
|
1473
|
+
rows of clades in the -c file, there will be three rows in the
|
|
1474
|
+
output where the first row in the output corresponds to the
|
|
1475
|
+
results of the first row in the clade file.
|
|
1476
|
+
|
|
1477
|
+
The concept behind this analysis follows
|
|
1478
|
+
Siu-Ting et al., Molecular Biology and Evolution (2019).
|
|
1479
|
+
|
|
1480
|
+
Aliases:
|
|
1481
|
+
hidden_paralogy_check, clan_check
|
|
1482
|
+
Command line interfaces:
|
|
1483
|
+
pk_hidden_paralogy_check, pk_clan_check
|
|
1484
|
+
|
|
1485
|
+
Usage:
|
|
1486
|
+
phykit hidden_paralogy_check <tree> -c/--clade <clade_file>
|
|
1487
|
+
|
|
1488
|
+
Options
|
|
1489
|
+
=====================================================
|
|
1490
|
+
<tree> first argument after
|
|
1491
|
+
function name should be
|
|
1492
|
+
a tree file
|
|
1493
|
+
|
|
1494
|
+
<clade_file> clade file that specifies
|
|
1495
|
+
what monophyletic clades
|
|
1496
|
+
to expect
|
|
1497
|
+
"""
|
|
1498
|
+
),
|
|
1499
|
+
)
|
|
1500
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
1501
|
+
parser.add_argument("-c", "--clade", type=str, required=False, help=SUPPRESS)
|
|
1502
|
+
args = parser.parse_args(argv)
|
|
1503
|
+
HiddenParalogyCheck(args).run()
|
|
1504
|
+
|
|
1505
|
+
@staticmethod
|
|
1506
|
+
def internal_branch_stats(argv):
|
|
1507
|
+
parser = ArgumentParser(
|
|
1508
|
+
add_help=True,
|
|
1509
|
+
usage=SUPPRESS,
|
|
1510
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1511
|
+
description=textwrap.dedent(
|
|
1512
|
+
f"""\
|
|
1513
|
+
{help_header}
|
|
1514
|
+
|
|
1515
|
+
Calculate summary statistics for internal branch lengths in a phylogeny.
|
|
1516
|
+
|
|
1517
|
+
Internal branch lengths can be useful for phylogeny diagnostics.
|
|
1518
|
+
|
|
1519
|
+
To obtain all internal branch lengths, use the -v/--verbose option.
|
|
1520
|
+
|
|
1521
|
+
Aliases:
|
|
1522
|
+
internal_branch_stats, ibs
|
|
1523
|
+
Command line interfaces:
|
|
1524
|
+
pk_internal_branch_stats, pk_ibs
|
|
1525
|
+
|
|
1526
|
+
Usage:
|
|
1527
|
+
phykit internal_branch_stats <tree> [-v/--verbose]
|
|
1528
|
+
|
|
1529
|
+
Options
|
|
1530
|
+
=====================================================
|
|
1531
|
+
<tree> first argument after
|
|
1532
|
+
function name should be
|
|
1533
|
+
a tree file
|
|
1534
|
+
|
|
1535
|
+
-v/--verbose optional argument to print
|
|
1536
|
+
all internal branch lengths
|
|
1537
|
+
"""
|
|
1538
|
+
),
|
|
1539
|
+
)
|
|
1540
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
1541
|
+
parser.add_argument(
|
|
1542
|
+
"-v", "--verbose", action="store_true", required=False, help=SUPPRESS
|
|
1543
|
+
)
|
|
1544
|
+
args = parser.parse_args(argv)
|
|
1545
|
+
InternalBranchStats(args).run()
|
|
1546
|
+
|
|
1547
|
+
@staticmethod
|
|
1548
|
+
def internode_labeler(argv):
|
|
1549
|
+
parser = ArgumentParser(
|
|
1550
|
+
add_help=True,
|
|
1551
|
+
usage=SUPPRESS,
|
|
1552
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1553
|
+
description=textwrap.dedent(
|
|
1554
|
+
f"""\
|
|
1555
|
+
{help_header}
|
|
1556
|
+
|
|
1557
|
+
Appends numerical identifiers to bipartitions in place
|
|
1558
|
+
of support values. This is helpful for pointing to
|
|
1559
|
+
specific internodes in supplementary files or otherwise.
|
|
1560
|
+
|
|
1561
|
+
Alias:
|
|
1562
|
+
internode_labeler, il
|
|
1563
|
+
Command line interfaces:
|
|
1564
|
+
pk_internode_labeler, pk_il
|
|
1565
|
+
|
|
1566
|
+
Usage:
|
|
1567
|
+
phykit internode_labeler <tree> [-o/--output <file>]
|
|
1568
|
+
|
|
1569
|
+
Options
|
|
1570
|
+
=====================================================
|
|
1571
|
+
<tree> first argument after
|
|
1572
|
+
function name should be
|
|
1573
|
+
a tree file
|
|
1574
|
+
|
|
1575
|
+
-o/--output optional argument to name
|
|
1576
|
+
the outputted tree file
|
|
1577
|
+
"""
|
|
1578
|
+
),
|
|
1579
|
+
)
|
|
1580
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
1581
|
+
parser.add_argument("-o", "--output", type=str, required=False, help=SUPPRESS)
|
|
1582
|
+
args = parser.parse_args(argv)
|
|
1583
|
+
InternodeLabeler(args).run()
|
|
1584
|
+
|
|
1585
|
+
@staticmethod
|
|
1586
|
+
def last_common_ancestor_subtree(argv):
|
|
1587
|
+
parser = ArgumentParser(
|
|
1588
|
+
add_help=True,
|
|
1589
|
+
usage=SUPPRESS,
|
|
1590
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1591
|
+
description=textwrap.dedent(
|
|
1592
|
+
f"""\
|
|
1593
|
+
{help_header}
|
|
1594
|
+
|
|
1595
|
+
Obtains subtree from a phylogeny by getting
|
|
1596
|
+
the last common ancestor from a list of taxa.
|
|
1597
|
+
|
|
1598
|
+
Alias:
|
|
1599
|
+
last_common_ancestor_subtree, lca_subtree
|
|
1600
|
+
Command line interfaces:
|
|
1601
|
+
pk_last_common_ancestor_subtree, pk_lca_subtree
|
|
1602
|
+
|
|
1603
|
+
Usage:
|
|
1604
|
+
phykit last_common_ancestor_subtree <file> <list_of_taxa>
|
|
1605
|
+
[-o/--output <file>]
|
|
1606
|
+
|
|
1607
|
+
Options
|
|
1608
|
+
=====================================================
|
|
1609
|
+
<tree> first argument after
|
|
1610
|
+
function name should be
|
|
1611
|
+
a tree file
|
|
1612
|
+
|
|
1613
|
+
<list_of_taxa> list of taxa to get the last
|
|
1614
|
+
common ancestor subtree for
|
|
1615
|
+
|
|
1616
|
+
-o/--output optional argument to name
|
|
1617
|
+
the outputted tree file
|
|
1618
|
+
"""
|
|
1619
|
+
),
|
|
1620
|
+
)
|
|
1621
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
1622
|
+
parser.add_argument("list_of_taxa", type=str, help=SUPPRESS)
|
|
1623
|
+
parser.add_argument("-o", "--output", type=str, required=False, help=SUPPRESS)
|
|
1624
|
+
args = parser.parse_args(argv)
|
|
1625
|
+
LastCommonAncestorSubtree(args).run()
|
|
1626
|
+
|
|
1627
|
+
@staticmethod
|
|
1628
|
+
def lb_score(argv):
|
|
1629
|
+
parser = ArgumentParser(
|
|
1630
|
+
add_help=True,
|
|
1631
|
+
usage=SUPPRESS,
|
|
1632
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1633
|
+
description=textwrap.dedent(
|
|
1634
|
+
f"""\
|
|
1635
|
+
{help_header}
|
|
1636
|
+
|
|
1637
|
+
Calculate long branch (LB) scores in a phylogeny.
|
|
1638
|
+
|
|
1639
|
+
Lower LB scores are thought to be desirable because
|
|
1640
|
+
they are indicative of taxa or trees that likely do
|
|
1641
|
+
not have issues with long branch attraction.
|
|
1642
|
+
|
|
1643
|
+
LB score is the mean pairwise patristic distance of
|
|
1644
|
+
taxon i compared to all other taxa over the average
|
|
1645
|
+
pairwise patristic distance.
|
|
1646
|
+
|
|
1647
|
+
PhyKIT reports summary statistics. To obtain LB scores
|
|
1648
|
+
for each taxa, use the -v/--verbose option.
|
|
1649
|
+
|
|
1650
|
+
LB scores are calculated following Struck, Evolutionary
|
|
1651
|
+
Bioinformatics (2014), doi: 10.4137/EBO.S14239.
|
|
1652
|
+
|
|
1653
|
+
Aliases:
|
|
1654
|
+
long_branch_score, lb_score, lbs
|
|
1655
|
+
Command line interfaces:
|
|
1656
|
+
pk_long_branch_score, pk_lb_score, pk_lbs
|
|
1657
|
+
|
|
1658
|
+
Usage:
|
|
1659
|
+
phykit long_branch_score <tree> [-v/--verbose]
|
|
1660
|
+
|
|
1661
|
+
Options
|
|
1662
|
+
=====================================================
|
|
1663
|
+
<tree> first argument after
|
|
1664
|
+
function name should be
|
|
1665
|
+
a tree file
|
|
1666
|
+
|
|
1667
|
+
-v/--verbose optional argument to print
|
|
1668
|
+
all LB score values
|
|
1669
|
+
"""
|
|
1670
|
+
),
|
|
1671
|
+
)
|
|
1672
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
1673
|
+
parser.add_argument(
|
|
1674
|
+
"-v", "--verbose", action="store_true", required=False, help=SUPPRESS
|
|
1675
|
+
)
|
|
1676
|
+
args = parser.parse_args(argv)
|
|
1677
|
+
LBScore(args).run()
|
|
1678
|
+
|
|
1679
|
+
@staticmethod
|
|
1680
|
+
def monophyly_check(argv):
|
|
1681
|
+
parser = ArgumentParser(
|
|
1682
|
+
add_help=True,
|
|
1683
|
+
usage=SUPPRESS,
|
|
1684
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1685
|
+
description=textwrap.dedent(
|
|
1686
|
+
f"""\
|
|
1687
|
+
{help_header}
|
|
1688
|
+
Check for monophyly of a lineage.
|
|
1689
|
+
|
|
1690
|
+
This analysis can be used to determine if a set of
|
|
1691
|
+
taxa are monophyletic.
|
|
1692
|
+
|
|
1693
|
+
Requires a taxa file, which species which tip names
|
|
1694
|
+
are expected to be monophyletic. File format is a
|
|
1695
|
+
single column file with tip names. Tip names not
|
|
1696
|
+
present in the tree will not be considered when
|
|
1697
|
+
examining monophyly.
|
|
1698
|
+
|
|
1699
|
+
The output will have six columns.
|
|
1700
|
+
col 1: if the clade was or wasn't monophyletic
|
|
1701
|
+
col 2: average bipartition support value in the clade of interest
|
|
1702
|
+
col 3: maximum bipartition support value in the clade of interest
|
|
1703
|
+
col 4: minimum bipartition support value in the clade of interest
|
|
1704
|
+
col 5: standard deviation of bipartition support values in the clade of interest
|
|
1705
|
+
col 6: tip names of taxa monophyletic with the lineage of interest
|
|
1706
|
+
excluding those that are listed in the taxa_of_interest file
|
|
1707
|
+
|
|
1708
|
+
Aliases:
|
|
1709
|
+
monophyly_check, is_monophyletic
|
|
1710
|
+
Command line interfaces:
|
|
1711
|
+
pk_monophyly_check, pk_is_monophyletic
|
|
1712
|
+
|
|
1713
|
+
Usage:
|
|
1714
|
+
phykit monophyly_check <tree> <list_of_taxa>
|
|
1715
|
+
|
|
1716
|
+
Options
|
|
1717
|
+
=====================================================
|
|
1718
|
+
<tree> first argument after
|
|
1719
|
+
function name should be
|
|
1720
|
+
a tree file
|
|
1721
|
+
|
|
1722
|
+
<list_of_taxa> single column file with
|
|
1723
|
+
list of tip names to
|
|
1724
|
+
examine the monophyly of
|
|
1725
|
+
"""
|
|
1726
|
+
),
|
|
1727
|
+
)
|
|
1728
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
1729
|
+
parser.add_argument("list_of_taxa", type=str, help=SUPPRESS)
|
|
1730
|
+
args = parser.parse_args(argv)
|
|
1731
|
+
MonophylyCheck(args).run()
|
|
1732
|
+
|
|
1733
|
+
@staticmethod
|
|
1734
|
+
def nearest_neighbor_interchange(argv):
|
|
1735
|
+
parser = ArgumentParser(
|
|
1736
|
+
add_help=True,
|
|
1737
|
+
usage=SUPPRESS,
|
|
1738
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1739
|
+
description=textwrap.dedent(
|
|
1740
|
+
f"""\
|
|
1741
|
+
{help_header}
|
|
1742
|
+
|
|
1743
|
+
Generate all nearest neighbor interchange moves for a binary
|
|
1744
|
+
rooted tree.
|
|
1745
|
+
|
|
1746
|
+
The output file will also include the original phylogeny.
|
|
1747
|
+
|
|
1748
|
+
Aliases:
|
|
1749
|
+
nearest_neighbor_interchange, nni
|
|
1750
|
+
Command line interfaces:
|
|
1751
|
+
pk_nearest_neighbor_interchange, pk_nni
|
|
1752
|
+
|
|
1753
|
+
Usage:
|
|
1754
|
+
phykit nearest_neighbor_interchange <tree> [-o/--output <output_file>]
|
|
1755
|
+
|
|
1756
|
+
Options
|
|
1757
|
+
=====================================================
|
|
1758
|
+
<tree> first argument after
|
|
1759
|
+
function name should be
|
|
1760
|
+
a tree file
|
|
1761
|
+
|
|
1762
|
+
-o/--output name of output file that will
|
|
1763
|
+
contain all trees with the
|
|
1764
|
+
nearest neighbor interchange
|
|
1765
|
+
moves.
|
|
1766
|
+
Default output will have
|
|
1767
|
+
the same name as the input
|
|
1768
|
+
file but with the suffix
|
|
1769
|
+
".NNIs"
|
|
1770
|
+
"""
|
|
1771
|
+
),
|
|
1772
|
+
)
|
|
1773
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
1774
|
+
parser.add_argument("-o", "--output", type=str, required=False, help=SUPPRESS)
|
|
1775
|
+
args = parser.parse_args(argv)
|
|
1776
|
+
NearestNeighborInterchange(args).run()
|
|
1777
|
+
|
|
1778
|
+
@staticmethod
|
|
1779
|
+
def patristic_distances(argv):
|
|
1780
|
+
parser = ArgumentParser(
|
|
1781
|
+
add_help=True,
|
|
1782
|
+
usage=SUPPRESS,
|
|
1783
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1784
|
+
description=textwrap.dedent(
|
|
1785
|
+
f"""\
|
|
1786
|
+
{help_header}
|
|
1787
|
+
|
|
1788
|
+
Calculate summary statistics among patristic distances in a phylogeny.
|
|
1789
|
+
|
|
1790
|
+
Patristic distances are all tip-to-tip distances in a phylogeny.
|
|
1791
|
+
|
|
1792
|
+
To obtain all patristic distances, use the -v/--verbose option.
|
|
1793
|
+
With the -v option, the first column will have two taxon names
|
|
1794
|
+
separated by a '-' followed by the patristic distance. Features
|
|
1795
|
+
will be tab separated.
|
|
1796
|
+
|
|
1797
|
+
Aliases:
|
|
1798
|
+
patristic_distances, pd
|
|
1799
|
+
Command line interfaces:
|
|
1800
|
+
pk_patristic_distances, pk_pd
|
|
1801
|
+
|
|
1802
|
+
Usage:
|
|
1803
|
+
phykit patristic_distances <tree> [-v/--verbose]
|
|
1804
|
+
|
|
1805
|
+
Options
|
|
1806
|
+
=====================================================
|
|
1807
|
+
<tree> first argument after
|
|
1808
|
+
function name should be
|
|
1809
|
+
a tree file
|
|
1810
|
+
|
|
1811
|
+
-v/--verbose optional argument to print
|
|
1812
|
+
all patristic distances between
|
|
1813
|
+
taxa
|
|
1814
|
+
"""
|
|
1815
|
+
),
|
|
1816
|
+
)
|
|
1817
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
1818
|
+
parser.add_argument(
|
|
1819
|
+
"-v", "--verbose", action="store_true", required=False, help=SUPPRESS
|
|
1820
|
+
)
|
|
1821
|
+
args = parser.parse_args(argv)
|
|
1822
|
+
PatristicDistances(args).run()
|
|
1823
|
+
|
|
1824
|
+
@staticmethod
|
|
1825
|
+
def polytomy_test(argv):
|
|
1826
|
+
parser = ArgumentParser(
|
|
1827
|
+
add_help=True,
|
|
1828
|
+
usage=SUPPRESS,
|
|
1829
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1830
|
+
description=textwrap.dedent(
|
|
1831
|
+
f"""\
|
|
1832
|
+
{help_header}
|
|
1833
|
+
|
|
1834
|
+
Conduct a polytomy test for three clades in a phylogeny.
|
|
1835
|
+
|
|
1836
|
+
Polytomy tests can be used to identify putative radiations
|
|
1837
|
+
as well as identify well supported alternative topologies.
|
|
1838
|
+
|
|
1839
|
+
The polytomy testing function takes as input a file with
|
|
1840
|
+
the three groups of taxa to test the relationships for and
|
|
1841
|
+
a single column file with the names of the desired tree files
|
|
1842
|
+
to use for polytomy testing. Next, the script to examine
|
|
1843
|
+
support for the grouping of the three taxa using triplets
|
|
1844
|
+
and gene support frequencies.
|
|
1845
|
+
|
|
1846
|
+
This function can account for uncertainty in gene trees -
|
|
1847
|
+
that is, the input phylogenies can have collapsed bipartitions.
|
|
1848
|
+
|
|
1849
|
+
Thereafter, a chi-squared test is conducted to determine if there
|
|
1850
|
+
is evidence to reject the null hypothesis wherein the null
|
|
1851
|
+
hypothesis is that the three possible topologies among the three
|
|
1852
|
+
groups are equally supported. This test is done using gene support
|
|
1853
|
+
frequencies.
|
|
1854
|
+
|
|
1855
|
+
Aliases:
|
|
1856
|
+
polytomy_test, polyt_test, polyt, ptt
|
|
1857
|
+
Command line interfaces:
|
|
1858
|
+
pk_polytomy_test, pk_polyt_test, pk_polyt, pk_ptt
|
|
1859
|
+
|
|
1860
|
+
Usage:
|
|
1861
|
+
phykit polytomy_test -t/--trees <trees> -g/--groups <groups>
|
|
1862
|
+
|
|
1863
|
+
Options
|
|
1864
|
+
=====================================================
|
|
1865
|
+
-t/--trees single column file with names
|
|
1866
|
+
of phylogenies to use for
|
|
1867
|
+
polytomy testing
|
|
1868
|
+
|
|
1869
|
+
-g/--groups a tab-delimited file with the
|
|
1870
|
+
grouping designations to test.
|
|
1871
|
+
Lines starting with comments
|
|
1872
|
+
are not considered. Names
|
|
1873
|
+
of individual taxa should be
|
|
1874
|
+
separated by a semi-colon ';'
|
|
1875
|
+
|
|
1876
|
+
For example, the groups file could look like the following:
|
|
1877
|
+
#labels group0 group1 group2
|
|
1878
|
+
name_of_test tip_name_A;tip_name_B tip_name_C tip_name_D;tip_name_E
|
|
1879
|
+
"""
|
|
1880
|
+
),
|
|
1881
|
+
)
|
|
1882
|
+
parser.add_argument("-t", "--trees", type=str, help=SUPPRESS)
|
|
1883
|
+
parser.add_argument("-g", "--groups", type=str, help=SUPPRESS)
|
|
1884
|
+
args = parser.parse_args(argv)
|
|
1885
|
+
PolytomyTest(args).run()
|
|
1886
|
+
|
|
1887
|
+
@staticmethod
|
|
1888
|
+
def print_tree(argv):
|
|
1889
|
+
parser = ArgumentParser(
|
|
1890
|
+
add_help=True,
|
|
1891
|
+
usage=SUPPRESS,
|
|
1892
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1893
|
+
description=textwrap.dedent(
|
|
1894
|
+
f"""\
|
|
1895
|
+
{help_header}
|
|
1896
|
+
|
|
1897
|
+
Print ascii tree of input phylogeny.
|
|
1898
|
+
|
|
1899
|
+
Phylogeny can be printed with or without branch lengths.
|
|
1900
|
+
By default, the phylogeny will be printed with branch lengths
|
|
1901
|
+
but branch lengths can be removed using the -r/--remove argument.
|
|
1902
|
+
|
|
1903
|
+
Aliases:
|
|
1904
|
+
print_tree, print, pt
|
|
1905
|
+
Command line interfaces:
|
|
1906
|
+
pk_print_tree, pk_print, pk_pt
|
|
1907
|
+
|
|
1908
|
+
Usage:
|
|
1909
|
+
phykit print_tree <tree> [-r/--remove]
|
|
1910
|
+
|
|
1911
|
+
Options
|
|
1912
|
+
=====================================================
|
|
1913
|
+
<tree> first argument after
|
|
1914
|
+
function name should be
|
|
1915
|
+
a tree file
|
|
1916
|
+
|
|
1917
|
+
-r/--remove optional argument to print
|
|
1918
|
+
the phylogeny without branch
|
|
1919
|
+
lengths
|
|
1920
|
+
"""
|
|
1921
|
+
),
|
|
1922
|
+
)
|
|
1923
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
1924
|
+
parser.add_argument(
|
|
1925
|
+
"-r", "--remove", action="store_true", required=False, help=SUPPRESS
|
|
1926
|
+
)
|
|
1927
|
+
args = parser.parse_args(argv)
|
|
1928
|
+
PrintTree(args).run()
|
|
1929
|
+
|
|
1930
|
+
@staticmethod
|
|
1931
|
+
def prune_tree(argv):
|
|
1932
|
+
parser = ArgumentParser(
|
|
1933
|
+
add_help=True,
|
|
1934
|
+
usage=SUPPRESS,
|
|
1935
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1936
|
+
description=textwrap.dedent(
|
|
1937
|
+
f"""\
|
|
1938
|
+
{help_header}
|
|
1939
|
+
|
|
1940
|
+
Prune tips from a phylogeny.
|
|
1941
|
+
|
|
1942
|
+
Provide a single column file with the names of the tips
|
|
1943
|
+
in the input phylogeny you would like to prune from the
|
|
1944
|
+
tree.
|
|
1945
|
+
|
|
1946
|
+
Aliases:
|
|
1947
|
+
prune_tree, prune
|
|
1948
|
+
Command line interfaces:
|
|
1949
|
+
pk_prune_tree, pk_prune
|
|
1950
|
+
|
|
1951
|
+
Usage:
|
|
1952
|
+
phykit prune_tree <tree> <list_of_taxa> [-o/--output <output_file>
|
|
1953
|
+
-k/--keep]
|
|
1954
|
+
|
|
1955
|
+
Options
|
|
1956
|
+
=====================================================
|
|
1957
|
+
<tree> first argument after
|
|
1958
|
+
function name should be
|
|
1959
|
+
a tree file
|
|
1960
|
+
|
|
1961
|
+
<list_of_taxa> single column file with the
|
|
1962
|
+
names of the tips to remove
|
|
1963
|
+
from the phylogeny
|
|
1964
|
+
|
|
1965
|
+
-o/--output name of output file for the
|
|
1966
|
+
pruned phylogeny.
|
|
1967
|
+
Default output will have
|
|
1968
|
+
the same name as the input
|
|
1969
|
+
file but with the suffix
|
|
1970
|
+
".pruned"
|
|
1971
|
+
|
|
1972
|
+
-k/--keep optional argument. If used
|
|
1973
|
+
instead of pruning taxa in
|
|
1974
|
+
<list_of_taxa>, keep them
|
|
1975
|
+
"""
|
|
1976
|
+
),
|
|
1977
|
+
)
|
|
1978
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
1979
|
+
parser.add_argument("list_of_taxa", type=str, help=SUPPRESS)
|
|
1980
|
+
parser.add_argument("-o", "--output", type=str, required=False, help=SUPPRESS)
|
|
1981
|
+
parser.add_argument(
|
|
1982
|
+
"-k", "--keep", type=str2bool, nargs="?", default=False, help=SUPPRESS
|
|
1983
|
+
)
|
|
1984
|
+
args = parser.parse_args(argv)
|
|
1985
|
+
PruneTree(args).run()
|
|
1986
|
+
|
|
1987
|
+
@staticmethod
|
|
1988
|
+
def rename_tree_tips(argv):
|
|
1989
|
+
parser = ArgumentParser(
|
|
1990
|
+
add_help=True,
|
|
1991
|
+
usage=SUPPRESS,
|
|
1992
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
1993
|
+
description=textwrap.dedent(
|
|
1994
|
+
f"""\
|
|
1995
|
+
{help_header}
|
|
1996
|
+
|
|
1997
|
+
Renames tips in a phylogeny.
|
|
1998
|
+
|
|
1999
|
+
Renaming tip files will follow the scheme of a tab-delimited
|
|
2000
|
+
file wherein the first column is the current tip name and the
|
|
2001
|
+
second column is the desired tip name in the resulting
|
|
2002
|
+
phylogeny.
|
|
2003
|
+
|
|
2004
|
+
Aliases:
|
|
2005
|
+
rename_tree_tips, rename_tree, rename_tips
|
|
2006
|
+
Command line interfaces:
|
|
2007
|
+
pk_rename_tree_tips, pk_rename_tree, pk_rename_tips
|
|
2008
|
+
|
|
2009
|
+
Usage:
|
|
2010
|
+
phykit rename_tree_tips <tree> -i/--idmap <idmap.txt>
|
|
2011
|
+
[-o/--output <output_file>]
|
|
2012
|
+
|
|
2013
|
+
Options
|
|
2014
|
+
=====================================================
|
|
2015
|
+
<tree> first argument after
|
|
2016
|
+
function name should be
|
|
2017
|
+
a tree file
|
|
2018
|
+
|
|
2019
|
+
-i/--idmap identifier map of current tip
|
|
2020
|
+
names (col1) and desired tip
|
|
2021
|
+
names (col2)
|
|
2022
|
+
|
|
2023
|
+
-o/--output optional argument to write
|
|
2024
|
+
the renamed tree files to.
|
|
2025
|
+
Default output will have
|
|
2026
|
+
the same name as the input
|
|
2027
|
+
file but with the suffix
|
|
2028
|
+
".renamed"
|
|
2029
|
+
"""
|
|
2030
|
+
),
|
|
2031
|
+
)
|
|
2032
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
2033
|
+
parser.add_argument("-i", "--idmap", type=str, help=SUPPRESS)
|
|
2034
|
+
parser.add_argument("-o", "--output", type=str, required=False, help=SUPPRESS)
|
|
2035
|
+
args = parser.parse_args(argv)
|
|
2036
|
+
RenameTreeTips(args).run()
|
|
2037
|
+
|
|
2038
|
+
@staticmethod
|
|
2039
|
+
def rf_distance(argv):
|
|
2040
|
+
parser = ArgumentParser(
|
|
2041
|
+
add_help=True,
|
|
2042
|
+
usage=SUPPRESS,
|
|
2043
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
2044
|
+
description=textwrap.dedent(
|
|
2045
|
+
f"""\
|
|
2046
|
+
{help_header}
|
|
2047
|
+
|
|
2048
|
+
Calculate Robinson-Foulds (RF) distance between two trees.
|
|
2049
|
+
|
|
2050
|
+
Low RF distances reflect greater similarity between two phylogenies.
|
|
2051
|
+
This function prints out two values, the plain RF value and the
|
|
2052
|
+
normalized RF value, which are separated by a tab. Normalized RF values
|
|
2053
|
+
are calculated by taking the plain RF value and dividing it by 2(n-3)
|
|
2054
|
+
where n is the number of tips in the phylogeny. Prior to calculating
|
|
2055
|
+
an RF value, PhyKIT will first determine the number of shared tips
|
|
2056
|
+
between the two input phylogenies and prune them to a common set of
|
|
2057
|
+
tips. Thus, users can input trees with different topologies and
|
|
2058
|
+
infer an RF value among subtrees with shared tips.
|
|
2059
|
+
|
|
2060
|
+
PhyKIT will print out
|
|
2061
|
+
col 1; the plain RF distance and
|
|
2062
|
+
col 2: the normalized RF distance.
|
|
2063
|
+
|
|
2064
|
+
RF distances are calculated following Robinson & Foulds, Mathematical
|
|
2065
|
+
Biosciences (1981), doi: 10.1016/0025-5564(81)90043-2.
|
|
2066
|
+
|
|
2067
|
+
Aliases:
|
|
2068
|
+
robinson_foulds_distance, rf_distance, rf_dist, rf
|
|
2069
|
+
Command line interfaces:
|
|
2070
|
+
pk_robinson_foulds_distance, pk_rf_distance, pk_rf_dist, pk_rf
|
|
2071
|
+
|
|
2072
|
+
Usage:
|
|
2073
|
+
phykit robinson_foulds_distance <tree_file_zero> <tree_file_one>
|
|
2074
|
+
|
|
2075
|
+
Options
|
|
2076
|
+
=====================================================
|
|
2077
|
+
<tree_file_zero> first argument after
|
|
2078
|
+
function name should be
|
|
2079
|
+
a tree file
|
|
2080
|
+
|
|
2081
|
+
<tree_file_one> second argument after
|
|
2082
|
+
function name should be
|
|
2083
|
+
a tree file
|
|
2084
|
+
"""
|
|
2085
|
+
),
|
|
2086
|
+
)
|
|
2087
|
+
parser.add_argument("tree_zero", type=str, help=SUPPRESS)
|
|
2088
|
+
parser.add_argument("tree_one", type=str, help=SUPPRESS)
|
|
2089
|
+
args = parser.parse_args(argv)
|
|
2090
|
+
RobinsonFouldsDistance(args).run()
|
|
2091
|
+
|
|
2092
|
+
@staticmethod
|
|
2093
|
+
def root_tree(argv):
|
|
2094
|
+
parser = ArgumentParser(
|
|
2095
|
+
add_help=True,
|
|
2096
|
+
usage=SUPPRESS,
|
|
2097
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
2098
|
+
description=textwrap.dedent(
|
|
2099
|
+
f"""\
|
|
2100
|
+
{help_header}
|
|
2101
|
+
|
|
2102
|
+
Roots phylogeny using user-specified taxa.
|
|
2103
|
+
|
|
2104
|
+
A list of taxa to root the phylogeny on should be
|
|
2105
|
+
specified using the -r argument. The root_taxa file
|
|
2106
|
+
should be a single-column file with taxa names. The
|
|
2107
|
+
outputted file will have the same name as the inputted
|
|
2108
|
+
tree file but with the suffix ".rooted".
|
|
2109
|
+
|
|
2110
|
+
Aliases:
|
|
2111
|
+
root_tree, root, rt
|
|
2112
|
+
Command line interfaces:
|
|
2113
|
+
pk_root_tree, pk_root, pk_rt
|
|
2114
|
+
|
|
2115
|
+
Usage:
|
|
2116
|
+
phykit root_tree <tree> -r/--root <root_taxa>
|
|
2117
|
+
[-o/--output <output_file>]
|
|
2118
|
+
|
|
2119
|
+
Options
|
|
2120
|
+
=====================================================
|
|
2121
|
+
<tree> first argument after
|
|
2122
|
+
function name should be
|
|
2123
|
+
a tree file
|
|
2124
|
+
|
|
2125
|
+
-r/--root single column file with
|
|
2126
|
+
tip names of root taxa
|
|
2127
|
+
|
|
2128
|
+
-o/--output optional argument to write
|
|
2129
|
+
the rooted tree file to.
|
|
2130
|
+
Default output will have
|
|
2131
|
+
the same name as the input
|
|
2132
|
+
file but with the suffix
|
|
2133
|
+
".rooted"
|
|
2134
|
+
"""
|
|
2135
|
+
),
|
|
2136
|
+
)
|
|
2137
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
2138
|
+
parser.add_argument("-r", "--root", type=str, required=True, help=SUPPRESS)
|
|
2139
|
+
parser.add_argument("-o", "--output", type=str, required=False, help=SUPPRESS)
|
|
2140
|
+
args = parser.parse_args(argv)
|
|
2141
|
+
RootTree(args).run()
|
|
2142
|
+
|
|
2143
|
+
@staticmethod
|
|
2144
|
+
def spurious_sequence(argv):
|
|
2145
|
+
parser = ArgumentParser(
|
|
2146
|
+
add_help=True,
|
|
2147
|
+
usage=SUPPRESS,
|
|
2148
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
2149
|
+
description=textwrap.dedent(
|
|
2150
|
+
f"""\
|
|
2151
|
+
{help_header}
|
|
2152
|
+
|
|
2153
|
+
Determines potentially spurious homologs using branch lengths.
|
|
2154
|
+
|
|
2155
|
+
Identifies potentially spurious sequences and reports
|
|
2156
|
+
tips in the phylogeny that could possibly be removed
|
|
2157
|
+
from the associated multiple sequence alignment. PhyKIT
|
|
2158
|
+
does so by identifying and reporting long terminal branches
|
|
2159
|
+
defined as branches that are equal to or 20 times the median
|
|
2160
|
+
length of all branches.
|
|
2161
|
+
|
|
2162
|
+
PhyKIT reports the following information
|
|
2163
|
+
col1: name of tip that is a putatively spurious sequence
|
|
2164
|
+
col2: length of branch leading to putatively spurious sequence
|
|
2165
|
+
col3: threshold used to identify putatively spurious sequences
|
|
2166
|
+
col4: median branch length in the phylogeny
|
|
2167
|
+
|
|
2168
|
+
If there are no putatively spurious sequences, "None" is reported.
|
|
2169
|
+
|
|
2170
|
+
Using this method to identify potentially spurious sequences
|
|
2171
|
+
was, to my knowledge, first introduced by Shen et al., (2018)
|
|
2172
|
+
Cell doi: 10.1016/j.cell.2018.10.023.
|
|
2173
|
+
|
|
2174
|
+
Aliases:
|
|
2175
|
+
spurious_sequence, spurious_seq, ss
|
|
2176
|
+
Command line interfaces:
|
|
2177
|
+
pk_spurious_sequence, pk_spurious_seq, pk_ss
|
|
2178
|
+
|
|
2179
|
+
Usage:
|
|
2180
|
+
phykit spurious_sequence <file> [-f 20]
|
|
2181
|
+
|
|
2182
|
+
Options
|
|
2183
|
+
=====================================================
|
|
2184
|
+
<file> first argument after
|
|
2185
|
+
function name should be
|
|
2186
|
+
an tree file
|
|
2187
|
+
|
|
2188
|
+
-f/--factor factor to multiply median
|
|
2189
|
+
branch length by to calculate
|
|
2190
|
+
the threshold of long branches.
|
|
2191
|
+
(Default: 20)
|
|
2192
|
+
"""
|
|
2193
|
+
),
|
|
2194
|
+
)
|
|
2195
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
2196
|
+
parser.add_argument("-f", "--factor", type=float, required=False, help=SUPPRESS)
|
|
2197
|
+
args = parser.parse_args(argv)
|
|
2198
|
+
SpuriousSequence(args).run()
|
|
2199
|
+
|
|
2200
|
+
@staticmethod
|
|
2201
|
+
def terminal_branch_stats(argv):
|
|
2202
|
+
parser = ArgumentParser(
|
|
2203
|
+
add_help=True,
|
|
2204
|
+
usage=SUPPRESS,
|
|
2205
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
2206
|
+
description=textwrap.dedent(
|
|
2207
|
+
f"""\
|
|
2208
|
+
{help_header}
|
|
2209
|
+
|
|
2210
|
+
Calculate summary statistics for terminal branch lengths in a phylogeny.
|
|
2211
|
+
|
|
2212
|
+
Terminal branch lengths can be useful for phylogeny diagnostics.
|
|
2213
|
+
|
|
2214
|
+
To obtain all terminal branch lengths, use the -v/--verbose option.
|
|
2215
|
+
|
|
2216
|
+
Aliases:
|
|
2217
|
+
terminal_branch_stats, tbs
|
|
2218
|
+
Command line interfaces:
|
|
2219
|
+
pk_terminal_branch_stats, pk_tbs
|
|
2220
|
+
|
|
2221
|
+
Usage:
|
|
2222
|
+
phykit terminal_branch_stats <tree> [-v/--verbose]
|
|
2223
|
+
|
|
2224
|
+
Options
|
|
2225
|
+
=====================================================
|
|
2226
|
+
<tree> first argument after
|
|
2227
|
+
function name should be
|
|
2228
|
+
a tree file
|
|
2229
|
+
|
|
2230
|
+
-v/--verbose optional argument to print
|
|
2231
|
+
all internal branch lengths
|
|
2232
|
+
"""
|
|
2233
|
+
),
|
|
2234
|
+
)
|
|
2235
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
2236
|
+
parser.add_argument(
|
|
2237
|
+
"-v", "--verbose", action="store_true", required=False, help=SUPPRESS
|
|
2238
|
+
)
|
|
2239
|
+
args = parser.parse_args(argv)
|
|
2240
|
+
TerminalBranchStats(args).run()
|
|
2241
|
+
|
|
2242
|
+
@staticmethod
|
|
2243
|
+
def tip_labels(argv):
|
|
2244
|
+
parser = ArgumentParser(
|
|
2245
|
+
add_help=True,
|
|
2246
|
+
usage=SUPPRESS,
|
|
2247
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
2248
|
+
description=textwrap.dedent(
|
|
2249
|
+
f"""\
|
|
2250
|
+
{help_header}
|
|
2251
|
+
|
|
2252
|
+
Prints the tip labels (or names) a phylogeny.
|
|
2253
|
+
|
|
2254
|
+
Aliases:
|
|
2255
|
+
tip_labels, tree_labels; labels; tl
|
|
2256
|
+
Command line interfaces:
|
|
2257
|
+
pk_tip_labels, pk_tree_labels; pk_labels; pk_tl
|
|
2258
|
+
|
|
2259
|
+
Usage:
|
|
2260
|
+
phykit tip_labels <tree>
|
|
2261
|
+
|
|
2262
|
+
Options
|
|
2263
|
+
=====================================================
|
|
2264
|
+
<tree> first argument after
|
|
2265
|
+
function name should be
|
|
2266
|
+
a tree file
|
|
2267
|
+
"""
|
|
2268
|
+
),
|
|
2269
|
+
)
|
|
2270
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
2271
|
+
args = parser.parse_args(argv)
|
|
2272
|
+
TipLabels(args).run()
|
|
2273
|
+
|
|
2274
|
+
@staticmethod
|
|
2275
|
+
def tip_to_tip_distance(argv):
|
|
2276
|
+
parser = ArgumentParser(
|
|
2277
|
+
add_help=True,
|
|
2278
|
+
usage=SUPPRESS,
|
|
2279
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
2280
|
+
description=textwrap.dedent(
|
|
2281
|
+
f"""\
|
|
2282
|
+
{help_header}
|
|
2283
|
+
|
|
2284
|
+
Calculate distance between two tips (or leaves) in a phylogeny.
|
|
2285
|
+
|
|
2286
|
+
Distances are in substitutions per site.
|
|
2287
|
+
|
|
2288
|
+
Aliases:
|
|
2289
|
+
tip_to_tip_distance, t2t_dist, t2t
|
|
2290
|
+
Command line interfaces:
|
|
2291
|
+
pk_tip_to_tip_distance, pk_t2t_dist, pk_t2t
|
|
2292
|
+
|
|
2293
|
+
Usage:
|
|
2294
|
+
phykit tip_to_tip_distance <tree_file> <tip_1> <tip_2>
|
|
2295
|
+
|
|
2296
|
+
Options
|
|
2297
|
+
=====================================================
|
|
2298
|
+
<tree_file> first argument after
|
|
2299
|
+
function name should be
|
|
2300
|
+
a tree file
|
|
2301
|
+
|
|
2302
|
+
<tip_1> second argument after
|
|
2303
|
+
function name should be
|
|
2304
|
+
one of the tip names
|
|
2305
|
+
|
|
2306
|
+
<tip_2> third argument after
|
|
2307
|
+
function name should be
|
|
2308
|
+
the second tip name
|
|
2309
|
+
"""
|
|
2310
|
+
),
|
|
2311
|
+
)
|
|
2312
|
+
parser.add_argument("tree_zero", type=str, help=SUPPRESS)
|
|
2313
|
+
parser.add_argument("tip_1", type=str, help=SUPPRESS)
|
|
2314
|
+
parser.add_argument("tip_2", type=str, help=SUPPRESS)
|
|
2315
|
+
args = parser.parse_args(argv)
|
|
2316
|
+
TipToTipDistance(args).run()
|
|
2317
|
+
|
|
2318
|
+
@staticmethod
|
|
2319
|
+
def tip_to_tip_node_distance(argv):
|
|
2320
|
+
parser = ArgumentParser(
|
|
2321
|
+
add_help=True,
|
|
2322
|
+
usage=SUPPRESS,
|
|
2323
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
2324
|
+
description=textwrap.dedent(
|
|
2325
|
+
f"""\
|
|
2326
|
+
{help_header}
|
|
2327
|
+
|
|
2328
|
+
Calculate distance between two tips (or leaves) in a phylogeny.
|
|
2329
|
+
|
|
2330
|
+
Distance is measured by the number of nodes between one tip
|
|
2331
|
+
and another.
|
|
2332
|
+
|
|
2333
|
+
Aliases:
|
|
2334
|
+
tip_to_tip_node_distance, t2t_node_dist, t2t_nd
|
|
2335
|
+
Command line interfaces:
|
|
2336
|
+
pk_tip_to_tip_node_distance, pk_t2t_node_dist, pk_t2t_nd
|
|
2337
|
+
|
|
2338
|
+
Usage:
|
|
2339
|
+
phykit tip_to_tip_node_distance <tree_file> <tip_1> <tip_2>
|
|
2340
|
+
|
|
2341
|
+
Options
|
|
2342
|
+
=====================================================
|
|
2343
|
+
<tree_file> first argument after
|
|
2344
|
+
function name should be
|
|
2345
|
+
a tree file
|
|
2346
|
+
|
|
2347
|
+
<tip_1> second argument after
|
|
2348
|
+
function name should be
|
|
2349
|
+
one of the tip names
|
|
2350
|
+
|
|
2351
|
+
<tip_2> third argument after
|
|
2352
|
+
function name should be
|
|
2353
|
+
the second tip name
|
|
2354
|
+
"""
|
|
2355
|
+
),
|
|
2356
|
+
)
|
|
2357
|
+
parser.add_argument("tree_zero", type=str, help=SUPPRESS)
|
|
2358
|
+
parser.add_argument("tip_1", type=str, help=SUPPRESS)
|
|
2359
|
+
parser.add_argument("tip_2", type=str, help=SUPPRESS)
|
|
2360
|
+
args = parser.parse_args(argv)
|
|
2361
|
+
TipToTipNodeDistance(args).run()
|
|
2362
|
+
|
|
2363
|
+
@staticmethod
|
|
2364
|
+
def total_tree_length(argv):
|
|
2365
|
+
parser = ArgumentParser(
|
|
2366
|
+
add_help=True,
|
|
2367
|
+
usage=SUPPRESS,
|
|
2368
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
2369
|
+
description=textwrap.dedent(
|
|
2370
|
+
f"""\
|
|
2371
|
+
{help_header}
|
|
2372
|
+
|
|
2373
|
+
Calculate total tree length, which is a sum of all branches.
|
|
2374
|
+
|
|
2375
|
+
Aliases:
|
|
2376
|
+
total_tree_length, tree_len
|
|
2377
|
+
Command line interfaces:
|
|
2378
|
+
pk_total_tree_length, pk_tree_len
|
|
2379
|
+
|
|
2380
|
+
Usage:
|
|
2381
|
+
phykit total_tree_length <tree>
|
|
2382
|
+
|
|
2383
|
+
Options
|
|
2384
|
+
=====================================================
|
|
2385
|
+
<tree> first argument after
|
|
2386
|
+
function name should be
|
|
2387
|
+
a tree file
|
|
2388
|
+
"""
|
|
2389
|
+
),
|
|
2390
|
+
)
|
|
2391
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
2392
|
+
args = parser.parse_args(argv)
|
|
2393
|
+
TotalTreeLength(args).run()
|
|
2394
|
+
|
|
2395
|
+
@staticmethod
|
|
2396
|
+
def treeness(argv):
|
|
2397
|
+
parser = ArgumentParser(
|
|
2398
|
+
add_help=True,
|
|
2399
|
+
usage=SUPPRESS,
|
|
2400
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
2401
|
+
description=textwrap.dedent(
|
|
2402
|
+
f"""\
|
|
2403
|
+
{help_header}
|
|
2404
|
+
|
|
2405
|
+
Calculate treeness statistic for a phylogeny.
|
|
2406
|
+
|
|
2407
|
+
Higher treeness values are thought to be desirable because they
|
|
2408
|
+
represent a higher signal-to-noise ratio.
|
|
2409
|
+
|
|
2410
|
+
Treeness describes the proportion of the tree distance found on
|
|
2411
|
+
internal branches. Treeness can be used as a measure of the
|
|
2412
|
+
signal-to-noise ratio in a phylogeny.
|
|
2413
|
+
|
|
2414
|
+
Calculate treeness (also referred to as stemminess) following
|
|
2415
|
+
Lanyon, The Auk (1988), doi: 10.1093/auk/105.3.565 and
|
|
2416
|
+
Phillips and Penny, Molecular Phylogenetics and Evolution
|
|
2417
|
+
(2003), doi: 10.1016/S1055-7903(03)00057-5.
|
|
2418
|
+
|
|
2419
|
+
Aliases:
|
|
2420
|
+
treeness, tness
|
|
2421
|
+
Command line interfaces:
|
|
2422
|
+
pk_treeness, pk_tness
|
|
2423
|
+
|
|
2424
|
+
Usage:
|
|
2425
|
+
phykit treeness <tree>
|
|
2426
|
+
|
|
2427
|
+
Options
|
|
2428
|
+
=====================================================
|
|
2429
|
+
<tree> first argument after
|
|
2430
|
+
function name should be
|
|
2431
|
+
a tree file
|
|
2432
|
+
"""
|
|
2433
|
+
),
|
|
2434
|
+
)
|
|
2435
|
+
parser.add_argument("tree", type=str, help=SUPPRESS)
|
|
2436
|
+
args = parser.parse_args(argv)
|
|
2437
|
+
Treeness(args).run()
|
|
2438
|
+
|
|
2439
|
+
## Alignment and tree functions
|
|
2440
|
+
@staticmethod
|
|
2441
|
+
def saturation(argv):
|
|
2442
|
+
parser = ArgumentParser(
|
|
2443
|
+
add_help=True,
|
|
2444
|
+
usage=SUPPRESS,
|
|
2445
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
2446
|
+
description=textwrap.dedent(
|
|
2447
|
+
f"""\
|
|
2448
|
+
{help_header}
|
|
2449
|
+
|
|
2450
|
+
Calculate saturation for a given tree and alignment.
|
|
2451
|
+
|
|
2452
|
+
Saturation is defined as sequences in multiple sequence
|
|
2453
|
+
alignments that have undergone numerous substitutions such
|
|
2454
|
+
that the distances between taxa are underestimated.
|
|
2455
|
+
|
|
2456
|
+
Data with no saturation will have a value of 1. The closer
|
|
2457
|
+
the value is to 1, the less saturated the data.
|
|
2458
|
+
|
|
2459
|
+
This function outputs two values (as of v1.19.9). The first
|
|
2460
|
+
value is the saturation value and the second column is the absolute
|
|
2461
|
+
value of saturation minus 1. Thus, lower values in the second column
|
|
2462
|
+
are indicative of values closer to one and, thus, less saturation.
|
|
2463
|
+
|
|
2464
|
+
Saturation is calculated following Philippe et al., PLoS
|
|
2465
|
+
Biology (2011), doi: 10.1371/journal.pbio.1000602.
|
|
2466
|
+
|
|
2467
|
+
Aliases:
|
|
2468
|
+
saturation, sat
|
|
2469
|
+
Command line interfaces:
|
|
2470
|
+
pk_saturation, pk_sat
|
|
2471
|
+
|
|
2472
|
+
Usage:
|
|
2473
|
+
phykit saturation -a <alignment> -t <tree> [-v/--verbose]
|
|
2474
|
+
|
|
2475
|
+
Options
|
|
2476
|
+
=====================================================
|
|
2477
|
+
-a/--alignment an alignment file
|
|
2478
|
+
|
|
2479
|
+
-t/--tree a tree file
|
|
2480
|
+
|
|
2481
|
+
-e/--exclude_gaps if a site has a gap, ignore it
|
|
2482
|
+
|
|
2483
|
+
-v/--verbose print out patristic distances
|
|
2484
|
+
and uncorrected distances used
|
|
2485
|
+
to determine saturation
|
|
2486
|
+
"""
|
|
2487
|
+
),
|
|
2488
|
+
)
|
|
2489
|
+
parser.add_argument(
|
|
2490
|
+
"-a", "--alignment", type=str, required=True, help=SUPPRESS, metavar=""
|
|
2491
|
+
)
|
|
2492
|
+
parser.add_argument(
|
|
2493
|
+
"-t", "--tree", type=str, required=True, help=SUPPRESS, metavar=""
|
|
2494
|
+
)
|
|
2495
|
+
parser.add_argument(
|
|
2496
|
+
"-v", "--verbose", action="store_true", required=False, help=SUPPRESS
|
|
2497
|
+
)
|
|
2498
|
+
parser.add_argument(
|
|
2499
|
+
"-e", "--exclude_gaps", action="store_true", required=False, help=SUPPRESS
|
|
2500
|
+
)
|
|
2501
|
+
args = parser.parse_args(argv)
|
|
2502
|
+
Saturation(args).run()
|
|
2503
|
+
|
|
2504
|
+
@staticmethod
|
|
2505
|
+
def treeness_over_rcv(argv):
|
|
2506
|
+
parser = ArgumentParser(
|
|
2507
|
+
add_help=True,
|
|
2508
|
+
usage=SUPPRESS,
|
|
2509
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
2510
|
+
description=textwrap.dedent(
|
|
2511
|
+
f"""\
|
|
2512
|
+
{help_header}
|
|
2513
|
+
|
|
2514
|
+
Calculate treeness/RCV for a given alignment and tree.
|
|
2515
|
+
|
|
2516
|
+
Higher treeness/RCV values are thought to be desirable because
|
|
2517
|
+
they harbor a high signal-to-noise ratio are least susceptible
|
|
2518
|
+
to composition bias.
|
|
2519
|
+
|
|
2520
|
+
PhyKIT reports three tab delimited values:
|
|
2521
|
+
col1: treeness/RCV
|
|
2522
|
+
col2: treeness
|
|
2523
|
+
col3: RCV
|
|
2524
|
+
|
|
2525
|
+
Calculate treeness/RCV following Phillips and Penny, Molecular
|
|
2526
|
+
Phylogenetics and Evolution (2003), doi: 10.1016/S1055-7903(03)00057-5.
|
|
2527
|
+
|
|
2528
|
+
Aliases:
|
|
2529
|
+
treeness_over_rcv, toverr, tor
|
|
2530
|
+
Command line interfaces:
|
|
2531
|
+
pk_treeness_over_rcv, pk_toverr, pk_tor
|
|
2532
|
+
|
|
2533
|
+
Usage:
|
|
2534
|
+
phykit treeness_over_rcv -a/--alignment <alignment> -t/--tree <tree>
|
|
2535
|
+
|
|
2536
|
+
Options
|
|
2537
|
+
=====================================================
|
|
2538
|
+
-a/--alignment an alignment file
|
|
2539
|
+
|
|
2540
|
+
-t/--tree a tree file
|
|
2541
|
+
"""
|
|
2542
|
+
),
|
|
2543
|
+
)
|
|
2544
|
+
parser.add_argument(
|
|
2545
|
+
"-a", "--alignment", type=str, required=True, help=SUPPRESS, metavar=""
|
|
2546
|
+
)
|
|
2547
|
+
parser.add_argument(
|
|
2548
|
+
"-t", "--tree", type=str, required=True, help=SUPPRESS, metavar=""
|
|
2549
|
+
)
|
|
2550
|
+
args = parser.parse_args(argv)
|
|
2551
|
+
TreenessOverRCV(args).run()
|
|
2552
|
+
|
|
2553
|
+
### Helper commands
|
|
2554
|
+
@staticmethod
|
|
2555
|
+
def create_concatenation_matrix(argv):
|
|
2556
|
+
parser = ArgumentParser(
|
|
2557
|
+
add_help=True,
|
|
2558
|
+
usage=SUPPRESS,
|
|
2559
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
2560
|
+
description=textwrap.dedent(
|
|
2561
|
+
f"""\
|
|
2562
|
+
{help_header}
|
|
2563
|
+
|
|
2564
|
+
Create a concatenated alignment file. This function is
|
|
2565
|
+
used to help in the construction of multi-locus data
|
|
2566
|
+
matrices.
|
|
2567
|
+
|
|
2568
|
+
PhyKIT will output three files:
|
|
2569
|
+
1) A fasta file with '.fa' appended to the prefix specified
|
|
2570
|
+
with the -p/--prefix parameter.
|
|
2571
|
+
2) A partition file ready for input into RAxML or IQ-tree.
|
|
2572
|
+
3) An occupancy file that summarizes the taxon occupancy
|
|
2573
|
+
per sequence.
|
|
2574
|
+
- column 1: alignment name
|
|
2575
|
+
- column 2: # of taxa present
|
|
2576
|
+
- column 3: # of taxa missing
|
|
2577
|
+
- column 4: fraction of occupancy
|
|
2578
|
+
- column 5: names of missing taxa (; separated)
|
|
2579
|
+
|
|
2580
|
+
Aliases:
|
|
2581
|
+
create_concatenation_matrix, create_concat, cc
|
|
2582
|
+
Command line interfaces:
|
|
2583
|
+
pk_create_concatenation_matrix, pk_create_concat, pk_cc
|
|
2584
|
+
|
|
2585
|
+
Usage:
|
|
2586
|
+
phykit create_concatenation_matrix -a <file> -p <string>
|
|
2587
|
+
|
|
2588
|
+
Options
|
|
2589
|
+
=====================================================
|
|
2590
|
+
-a/--alignment alignment list file. File
|
|
2591
|
+
should contain a single
|
|
2592
|
+
column list of alignment
|
|
2593
|
+
sequence files to concatenate
|
|
2594
|
+
into a single matrix. Provide
|
|
2595
|
+
path to files relative to
|
|
2596
|
+
working directory or provide
|
|
2597
|
+
absolute path.
|
|
2598
|
+
|
|
2599
|
+
-p/--prefix prefix of output files
|
|
2600
|
+
"""
|
|
2601
|
+
),
|
|
2602
|
+
)
|
|
2603
|
+
parser.add_argument("-a", "--alignment_list", type=str, help=SUPPRESS)
|
|
2604
|
+
parser.add_argument("-p", "--prefix", type=str, help=SUPPRESS)
|
|
2605
|
+
args = parser.parse_args(argv)
|
|
2606
|
+
CreateConcatenationMatrix(args).run()
|
|
2607
|
+
|
|
2608
|
+
@staticmethod
|
|
2609
|
+
def thread_dna(argv):
|
|
2610
|
+
parser = ArgumentParser(
|
|
2611
|
+
add_help=True,
|
|
2612
|
+
usage=SUPPRESS,
|
|
2613
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
2614
|
+
description=textwrap.dedent(
|
|
2615
|
+
f"""\
|
|
2616
|
+
{help_header}
|
|
2617
|
+
|
|
2618
|
+
Thread DNA sequence onto a protein alignment to create a
|
|
2619
|
+
codon-based alignment.
|
|
2620
|
+
|
|
2621
|
+
This function requires input alignments are in fasta format.
|
|
2622
|
+
Codon alignments are then printed to stdout. Note, paired
|
|
2623
|
+
sequences are assumed to have the same name between the
|
|
2624
|
+
protein and nucleotide file. The order does not matter.
|
|
2625
|
+
|
|
2626
|
+
To thread nucleotide sequences over a trimmed amino acid
|
|
2627
|
+
alignment, provide PhyKIT with a log file specifying which
|
|
2628
|
+
sites have been trimmed and which have been kept. The log
|
|
2629
|
+
file must be formatted the same as the log files outputted
|
|
2630
|
+
by the alignment trimming toolkit ClipKIT (see -l in ClipKIT
|
|
2631
|
+
documentation.) Details about ClipKIT can be seen here:
|
|
2632
|
+
https://github.com/JLSteenwyk/ClipKIT.
|
|
2633
|
+
|
|
2634
|
+
If using a ClipKIT log file, the untrimmed protein alignment
|
|
2635
|
+
should be provided in the -p/--protein argument.
|
|
2636
|
+
|
|
2637
|
+
Aliases:
|
|
2638
|
+
thread_dna, pal2nal, p2n
|
|
2639
|
+
Command line interfaces:
|
|
2640
|
+
pk_thread_dna, pk_pal2nal, pk_p2n
|
|
2641
|
+
|
|
2642
|
+
Usage:
|
|
2643
|
+
phykit thread_dna -p <file> -n <file> [-c/--clipkit_log_file
|
|
2644
|
+
<clipkit outputted log file> -s]
|
|
2645
|
+
|
|
2646
|
+
Options
|
|
2647
|
+
=====================================================
|
|
2648
|
+
-p/--protein protein alignment file
|
|
2649
|
+
|
|
2650
|
+
-n/--nucleotide nucleotide sequence file
|
|
2651
|
+
|
|
2652
|
+
-c/--clipkit_log clipkit outputted log file
|
|
2653
|
+
|
|
2654
|
+
-s/--stop boolean for whether or not
|
|
2655
|
+
stop codons should be kept.
|
|
2656
|
+
If used, stop codons will
|
|
2657
|
+
be removed.
|
|
2658
|
+
"""
|
|
2659
|
+
),
|
|
2660
|
+
)
|
|
2661
|
+
parser.add_argument("-p", "--protein", type=str, help=SUPPRESS)
|
|
2662
|
+
parser.add_argument("-n", "--nucleotide", type=str, help=SUPPRESS)
|
|
2663
|
+
parser.add_argument(
|
|
2664
|
+
"-c",
|
|
2665
|
+
"--clipkit_log_file",
|
|
2666
|
+
type=str,
|
|
2667
|
+
required=False,
|
|
2668
|
+
help=SUPPRESS,
|
|
2669
|
+
)
|
|
2670
|
+
parser.add_argument(
|
|
2671
|
+
"-s", "--stop", type=str2bool, nargs="?", default=True, help=SUPPRESS
|
|
2672
|
+
)
|
|
2673
|
+
args = parser.parse_args(argv)
|
|
2674
|
+
DNAThreader(args).run()
|
|
2675
|
+
|
|
2676
|
+
|
|
2677
|
+
def main(argv=None):
|
|
2678
|
+
Phykit()
|
|
2679
|
+
|
|
2680
|
+
|
|
2681
|
+
# Alignment-based functions
|
|
2682
|
+
def alignment_length(argv=None):
|
|
2683
|
+
Phykit.alignment_length(sys.argv[1:])
|
|
2684
|
+
|
|
2685
|
+
|
|
2686
|
+
def alignment_length_no_gaps(argv=None):
|
|
2687
|
+
Phykit.alignment_length_no_gaps(sys.argv[1:])
|
|
2688
|
+
|
|
2689
|
+
|
|
2690
|
+
def column_score(argv=None):
|
|
2691
|
+
Phykit.column_score(sys.argv[1:])
|
|
2692
|
+
|
|
2693
|
+
|
|
2694
|
+
def compositional_bias_per_site(argv=None):
|
|
2695
|
+
Phykit.compositional_bias_per_site(sys.argv[1:])
|
|
2696
|
+
|
|
2697
|
+
|
|
2698
|
+
def evolutionary_rate_per_site(argv=None):
|
|
2699
|
+
Phykit.evolutionary_rate_per_site(sys.argv[1:])
|
|
2700
|
+
|
|
2701
|
+
|
|
2702
|
+
def faidx(argv=None):
|
|
2703
|
+
Phykit.faidx(sys.argv[1:])
|
|
2704
|
+
|
|
2705
|
+
|
|
2706
|
+
def gc_content(argv=None):
|
|
2707
|
+
Phykit.gc_content(sys.argv[1:])
|
|
2708
|
+
|
|
2709
|
+
|
|
2710
|
+
def pairwise_identity(argv=None):
|
|
2711
|
+
Phykit.pairwise_identity(sys.argv[1:])
|
|
2712
|
+
|
|
2713
|
+
|
|
2714
|
+
def parsimony_informative_sites(argv=None):
|
|
2715
|
+
Phykit.parsimony_informative_sites(sys.argv[1:])
|
|
2716
|
+
|
|
2717
|
+
|
|
2718
|
+
def rcv(argv=None):
|
|
2719
|
+
Phykit.rcv(sys.argv[1:])
|
|
2720
|
+
|
|
2721
|
+
|
|
2722
|
+
def rcvt(argv=None):
|
|
2723
|
+
Phykit.rcvt(sys.argv[1:])
|
|
2724
|
+
|
|
2725
|
+
|
|
2726
|
+
def rename_fasta_entries(argv=None):
|
|
2727
|
+
Phykit.rename_fasta_entries(sys.argv[1:])
|
|
2728
|
+
|
|
2729
|
+
|
|
2730
|
+
def sum_of_pairs_score(argv=None):
|
|
2731
|
+
Phykit.sum_of_pairs_score(sys.argv[1:])
|
|
2732
|
+
|
|
2733
|
+
|
|
2734
|
+
def variable_sites(argv=None):
|
|
2735
|
+
Phykit.variable_sites(sys.argv[1:])
|
|
2736
|
+
|
|
2737
|
+
|
|
2738
|
+
# Tree-based functions
|
|
2739
|
+
def bipartition_support_stats(argv=None):
|
|
2740
|
+
Phykit.bipartition_support_stats(sys.argv[1:])
|
|
2741
|
+
|
|
2742
|
+
|
|
2743
|
+
def branch_length_multiplier(argv=None):
|
|
2744
|
+
Phykit.branch_length_multiplier(sys.argv[1:])
|
|
2745
|
+
|
|
2746
|
+
|
|
2747
|
+
def collapse_branches(argv=None):
|
|
2748
|
+
Phykit.collapse_branches(sys.argv[1:])
|
|
2749
|
+
|
|
2750
|
+
|
|
2751
|
+
def covarying_evolutionary_rates(argv=None):
|
|
2752
|
+
Phykit.covarying_evolutionary_rates(sys.argv[1:])
|
|
2753
|
+
|
|
2754
|
+
|
|
2755
|
+
def dvmc(argv=None):
|
|
2756
|
+
Phykit.dvmc(sys.argv[1:])
|
|
2757
|
+
|
|
2758
|
+
|
|
2759
|
+
def evolutionary_rate(argv=None):
|
|
2760
|
+
Phykit.evolutionary_rate(sys.argv[1:])
|
|
2761
|
+
|
|
2762
|
+
|
|
2763
|
+
def hidden_paralogy_check(argv=None):
|
|
2764
|
+
Phykit.hidden_paralogy_check(sys.argv[1:])
|
|
2765
|
+
|
|
2766
|
+
|
|
2767
|
+
def internal_branch_stats(argv=None):
|
|
2768
|
+
Phykit.internal_branch_stats(sys.argv[1:])
|
|
2769
|
+
|
|
2770
|
+
|
|
2771
|
+
def internode_labeler(argv=None):
|
|
2772
|
+
Phykit.internode_labeler(sys.argv[1:])
|
|
2773
|
+
|
|
2774
|
+
|
|
2775
|
+
def last_common_ancestor_subtree(argv=None):
|
|
2776
|
+
Phykit.last_common_ancestor_subtree(sys.argv[1:])
|
|
2777
|
+
|
|
2778
|
+
|
|
2779
|
+
def lb_score(argv=None):
|
|
2780
|
+
Phykit.lb_score(sys.argv[1:])
|
|
2781
|
+
|
|
2782
|
+
|
|
2783
|
+
def monophyly_check(argv=None):
|
|
2784
|
+
Phykit.monophyly_check(sys.argv[1:])
|
|
2785
|
+
|
|
2786
|
+
|
|
2787
|
+
def nearest_neighbor_interchange(argv=None):
|
|
2788
|
+
Phykit.nearest_neighbor_interchange(sys.argv[1:])
|
|
2789
|
+
|
|
2790
|
+
|
|
2791
|
+
def patristic_distances(argv=None):
|
|
2792
|
+
Phykit.patristic_distances(sys.argv[1:])
|
|
2793
|
+
|
|
2794
|
+
|
|
2795
|
+
def polytomy_test(argv=None):
|
|
2796
|
+
Phykit.polytomy_test(sys.argv[1:])
|
|
2797
|
+
|
|
2798
|
+
|
|
2799
|
+
def print_tree(argv=None):
|
|
2800
|
+
Phykit.print_tree(sys.argv[1:])
|
|
2801
|
+
|
|
2802
|
+
|
|
2803
|
+
def prune_tree(argv=None):
|
|
2804
|
+
Phykit.prune_tree(sys.argv[1:])
|
|
2805
|
+
|
|
2806
|
+
|
|
2807
|
+
def rename_tree_tips(argv=None):
|
|
2808
|
+
Phykit.rename_tree_tips(sys.argv[1:])
|
|
2809
|
+
|
|
2810
|
+
|
|
2811
|
+
def rf_distance(argv=None):
|
|
2812
|
+
Phykit.rf_distance(sys.argv[1:])
|
|
2813
|
+
|
|
2814
|
+
|
|
2815
|
+
def root_tree(argv=None):
|
|
2816
|
+
Phykit.root_tree(sys.argv[1:])
|
|
2817
|
+
|
|
2818
|
+
|
|
2819
|
+
def spurious_sequence(argv=None):
|
|
2820
|
+
Phykit.spurious_sequence(sys.argv[1:])
|
|
2821
|
+
|
|
2822
|
+
|
|
2823
|
+
def terminal_branch_stats(argv=None):
|
|
2824
|
+
Phykit.terminal_branch_stats(sys.argv[1:])
|
|
2825
|
+
|
|
2826
|
+
|
|
2827
|
+
def tip_labels(argv=None):
|
|
2828
|
+
Phykit.tip_labels(sys.argv[1:])
|
|
2829
|
+
|
|
2830
|
+
|
|
2831
|
+
def tip_to_tip_distance(argv=None):
|
|
2832
|
+
Phykit.tip_to_tip_distance(sys.argv[1:])
|
|
2833
|
+
|
|
2834
|
+
|
|
2835
|
+
def tip_to_tip_node_distance(argv=None):
|
|
2836
|
+
Phykit.tip_to_tip_node_distance(sys.argv[1:])
|
|
2837
|
+
|
|
2838
|
+
|
|
2839
|
+
def total_tree_length(argv=None):
|
|
2840
|
+
Phykit.total_tree_length(sys.argv[1:])
|
|
2841
|
+
|
|
2842
|
+
|
|
2843
|
+
def treeness(argv=None):
|
|
2844
|
+
Phykit.treeness(sys.argv[1:])
|
|
2845
|
+
|
|
2846
|
+
|
|
2847
|
+
# Alignment- and tree-based functions
|
|
2848
|
+
def saturation(argv=None):
|
|
2849
|
+
Phykit.saturation(sys.argv[1:])
|
|
2850
|
+
|
|
2851
|
+
|
|
2852
|
+
def treeness_over_rcv(argv=None):
|
|
2853
|
+
Phykit.treeness_over_rcv(sys.argv[1:])
|
|
2854
|
+
|
|
2855
|
+
|
|
2856
|
+
# Helper functions
|
|
2857
|
+
def create_concatenation_matrix(argv=None):
|
|
2858
|
+
Phykit.create_concatenation_matrix(sys.argv[1:])
|
|
2859
|
+
|
|
2860
|
+
|
|
2861
|
+
def thread_dna(argv=None):
|
|
2862
|
+
Phykit.thread_dna(sys.argv[1:])
|