phykit 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. phykit/__init__.py +0 -0
  2. phykit/__main__.py +6 -0
  3. phykit/helpers/__init__.py +0 -0
  4. phykit/helpers/boolean_argument_parsing.py +12 -0
  5. phykit/helpers/caching.py +201 -0
  6. phykit/helpers/files.py +125 -0
  7. phykit/helpers/parallel.py +305 -0
  8. phykit/helpers/stats_summary.py +64 -0
  9. phykit/helpers/streaming.py +152 -0
  10. phykit/phykit.py +2862 -0
  11. phykit/services/__init__.py +0 -0
  12. phykit/services/alignment/__init__.py +17 -0
  13. phykit/services/alignment/alignment_length.py +16 -0
  14. phykit/services/alignment/alignment_length_no_gaps.py +69 -0
  15. phykit/services/alignment/alignment_recoding.py +89 -0
  16. phykit/services/alignment/base.py +103 -0
  17. phykit/services/alignment/column_score.py +66 -0
  18. phykit/services/alignment/compositional_bias_per_site.py +98 -0
  19. phykit/services/alignment/create_concatenation_matrix.py +254 -0
  20. phykit/services/alignment/dna_threader.py +145 -0
  21. phykit/services/alignment/evolutionary_rate_per_site.py +85 -0
  22. phykit/services/alignment/faidx.py +21 -0
  23. phykit/services/alignment/gc_content.py +94 -0
  24. phykit/services/alignment/pairwise_identity.py +159 -0
  25. phykit/services/alignment/parsimony_informative_sites.py +81 -0
  26. phykit/services/alignment/rcv.py +14 -0
  27. phykit/services/alignment/rcvt.py +47 -0
  28. phykit/services/alignment/rename_fasta_entries.py +53 -0
  29. phykit/services/alignment/sum_of_pairs_score.py +157 -0
  30. phykit/services/alignment/variable_sites.py +54 -0
  31. phykit/services/base.py +9 -0
  32. phykit/services/tree/__init__.py +29 -0
  33. phykit/services/tree/base.py +178 -0
  34. phykit/services/tree/bipartition_support_stats.py +48 -0
  35. phykit/services/tree/branch_length_multiplier.py +37 -0
  36. phykit/services/tree/collapse_branches.py +27 -0
  37. phykit/services/tree/covarying_evolutionary_rates.py +272 -0
  38. phykit/services/tree/dvmc.py +37 -0
  39. phykit/services/tree/evolutionary_rate.py +17 -0
  40. phykit/services/tree/hidden_paralogy_check.py +128 -0
  41. phykit/services/tree/internal_branch_stats.py +77 -0
  42. phykit/services/tree/internode_labeler.py +33 -0
  43. phykit/services/tree/last_common_ancestor_subtree.py +35 -0
  44. phykit/services/tree/lb_score.py +196 -0
  45. phykit/services/tree/monophyly_check.py +106 -0
  46. phykit/services/tree/nearest_neighbor_interchange.py +140 -0
  47. phykit/services/tree/patristic_distances.py +113 -0
  48. phykit/services/tree/polytomy_test.py +546 -0
  49. phykit/services/tree/print_tree.py +28 -0
  50. phykit/services/tree/prune_tree.py +40 -0
  51. phykit/services/tree/rename_tree_tips.py +64 -0
  52. phykit/services/tree/rf_distance.py +136 -0
  53. phykit/services/tree/root_tree.py +35 -0
  54. phykit/services/tree/saturation.py +209 -0
  55. phykit/services/tree/spurious_sequence.py +75 -0
  56. phykit/services/tree/terminal_branch_stats.py +87 -0
  57. phykit/services/tree/tip_labels.py +18 -0
  58. phykit/services/tree/tip_to_tip_distance.py +41 -0
  59. phykit/services/tree/tip_to_tip_node_distance.py +41 -0
  60. phykit/services/tree/total_tree_length.py +25 -0
  61. phykit/services/tree/treeness.py +16 -0
  62. phykit/services/tree/treeness_over_rcv.py +40 -0
  63. phykit/version.py +1 -0
  64. phykit-2.1.0.dist-info/METADATA +150 -0
  65. phykit-2.1.0.dist-info/RECORD +69 -0
  66. phykit-2.1.0.dist-info/WHEEL +5 -0
  67. phykit-2.1.0.dist-info/entry_points.txt +121 -0
  68. phykit-2.1.0.dist-info/licenses/LICENSE.md +7 -0
  69. phykit-2.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,136 @@
1
+ from typing import Dict, List, Set, Tuple
2
+ from functools import lru_cache
3
+ from concurrent.futures import ProcessPoolExecutor
4
+ import pickle
5
+
6
+ from Bio.Phylo import Newick
7
+
8
+ from .base import Tree
9
+
10
+
11
+ class RobinsonFouldsDistance(Tree):
12
+ def __init__(self, args) -> None:
13
+ super().__init__(**self.process_args(args))
14
+
15
+ def run(self):
16
+ tree_zero = self.read_tree_file()
17
+ tree_one = self.read_tree1_file()
18
+
19
+ # get shared tree tip names - use sets for efficiency
20
+ tree_zero_tips = set(self.get_tip_names_from_tree(tree_zero))
21
+ tree_one_tips = set(self.get_tip_names_from_tree(tree_one))
22
+ shared_tree_tips = tree_zero_tips & tree_one_tips
23
+
24
+ # prune to common set - already have sets
25
+ tree_zero_tips_to_prune = list(tree_zero_tips - shared_tree_tips)
26
+ tree_one_tips_to_prune = list(tree_one_tips - shared_tree_tips)
27
+
28
+ if tree_zero_tips_to_prune:
29
+ tree_zero = self.prune_tree_using_taxa_list(tree_zero, tree_zero_tips_to_prune)
30
+ if tree_one_tips_to_prune:
31
+ tree_one = self.prune_tree_using_taxa_list(tree_one, tree_one_tips_to_prune)
32
+
33
+ # Get first terminal for rooting
34
+ tip_for_rooting = tree_zero.get_terminals()[0].name
35
+ tree_zero.root_with_outgroup(tip_for_rooting)
36
+ tree_one.root_with_outgroup(tip_for_rooting)
37
+
38
+ plain_rf, normalized_rf = self.calculate_robinson_foulds_distance(
39
+ tree_zero, tree_one
40
+ )
41
+
42
+ print(f"{plain_rf}\t{round(normalized_rf, 4)}")
43
+
44
+ def process_args(self, args) -> Dict[str, str]:
45
+ return dict(
46
+ tree_file_path=args.tree_zero,
47
+ tree1_file_path=args.tree_one,
48
+ )
49
+
50
+ def calculate_robinson_foulds_distance(self, tree_zero, tree_one):
51
+ plain_rf = 0
52
+ plain_rf = self.compare_trees_optimized(plain_rf, tree_zero, tree_one)
53
+ plain_rf = self.compare_trees_optimized(plain_rf, tree_one, tree_zero)
54
+
55
+ tip_count = tree_zero.count_terminals()
56
+ normalized_rf = plain_rf / (2 * (tip_count - 3))
57
+
58
+ return plain_rf, normalized_rf
59
+
60
+ def compare_trees_optimized(
61
+ self,
62
+ plain_rf: int,
63
+ tree_zero: Newick.Tree,
64
+ tree_one: Newick.Tree
65
+ ) -> int:
66
+ # Cache tip names for clades to avoid recomputation
67
+ tip_names_cache = {}
68
+
69
+ def get_cached_tips(clade):
70
+ clade_id = id(clade)
71
+ if clade_id not in tip_names_cache:
72
+ tip_names_cache[clade_id] = frozenset(self.get_tip_names_from_tree(clade))
73
+ return tip_names_cache[clade_id]
74
+
75
+ # loop through tree_zero and find similar clade in tree_one
76
+ for clade_zero in tree_zero.get_nonterminals()[1:]:
77
+ # Get tip names from tree_zero clade
78
+ tip_names_zero = get_cached_tips(clade_zero)
79
+ # get common ancestor of tree_zero tip names in tree_one
80
+ clade_one = tree_one.common_ancestor(list(tip_names_zero))
81
+ # Get tip names from tree_one clade
82
+ tip_names_one = get_cached_tips(clade_one)
83
+ # compare the list of tip names
84
+ if tip_names_zero != tip_names_one:
85
+ plain_rf += 1
86
+
87
+ return plain_rf
88
+
89
+ @staticmethod
90
+ def _calculate_rf_batch(tree_pairs_pickle):
91
+ """Calculate RF distance for a batch of tree pairs in parallel."""
92
+ tree_pairs = pickle.loads(tree_pairs_pickle)
93
+ results = []
94
+
95
+ for tree_zero, tree_one in tree_pairs:
96
+ rf_calc = RobinsonFouldsDistance.__new__(RobinsonFouldsDistance)
97
+ rf_calc.__dict__.update({'tree_format': 'newick'})
98
+
99
+ # Calculate bipartitions
100
+ bipartitions_zero = rf_calc.get_all_bipartitions(tree_zero)
101
+ bipartitions_one = rf_calc.get_all_bipartitions(tree_one)
102
+
103
+ # Calculate RF distance
104
+ plain_rf = len(bipartitions_zero ^ bipartitions_one) # Symmetric difference
105
+ tip_count = tree_zero.count_terminals()
106
+ normalized_rf = plain_rf / (2 * (tip_count - 3))
107
+
108
+ results.append((plain_rf, normalized_rf))
109
+
110
+ return results
111
+
112
+ def calculate_multiple_rf_distances(self, tree_pairs: List[Tuple]) -> List[Tuple[int, float]]:
113
+ """Calculate RF distances for multiple tree pairs in parallel."""
114
+ if len(tree_pairs) < 5:
115
+ # Sequential for small datasets
116
+ results = []
117
+ for tree_zero, tree_one in tree_pairs:
118
+ plain_rf, normalized_rf = self.calculate_robinson_foulds_distance(tree_zero, tree_one)
119
+ results.append((plain_rf, normalized_rf))
120
+ return results
121
+
122
+ # Parallel processing for larger datasets
123
+ batch_size = max(2, len(tree_pairs) // 4)
124
+ batches = [tree_pairs[i:i + batch_size] for i in range(0, len(tree_pairs), batch_size)]
125
+
126
+ with ProcessPoolExecutor(max_workers=min(4, len(batches))) as executor:
127
+ futures = []
128
+ for batch in batches:
129
+ batch_pickle = pickle.dumps(batch)
130
+ futures.append(executor.submit(self._calculate_rf_batch, batch_pickle))
131
+
132
+ all_results = []
133
+ for future in futures:
134
+ all_results.extend(future.result())
135
+
136
+ return all_results
@@ -0,0 +1,35 @@
1
+ import copy
2
+ from Bio import Phylo
3
+
4
+ from .base import Tree
5
+
6
+ from ...helpers.files import read_single_column_file_to_list
7
+
8
+
9
+ class RootTree(Tree):
10
+ def __init__(self, args) -> None:
11
+ super().__init__(**self.process_args(args))
12
+
13
+ def run(self):
14
+ tree = self.read_tree_file()
15
+ # Make a deep copy to avoid modifying the cached tree
16
+ tree_copy = copy.deepcopy(tree)
17
+
18
+ outgroup = \
19
+ read_single_column_file_to_list(self.outgroup_taxa_file_path)
20
+
21
+ Phylo.BaseTree.Tree.root_with_outgroup(tree_copy, outgroup)
22
+
23
+ self.write_tree_file(tree_copy, self.output_file_path)
24
+
25
+ def process_args(self, args):
26
+ tree_file_path = args.tree
27
+
28
+ output_file_path = \
29
+ args.output if args.output else f"{tree_file_path}.rooted"
30
+
31
+ return dict(
32
+ tree_file_path=tree_file_path,
33
+ outgroup_taxa_file_path=args.root,
34
+ output_file_path=output_file_path,
35
+ )
@@ -0,0 +1,209 @@
1
+ from enum import Enum
2
+ import itertools
3
+ import sys
4
+ from typing import Dict, List, Tuple
5
+ import multiprocessing as mp
6
+ from functools import partial
7
+
8
+ from Bio import Align
9
+ from Bio.Phylo import Newick
10
+ import numpy as np
11
+ from sklearn.linear_model import LinearRegression
12
+
13
+ from .base import Tree
14
+ from ...helpers.files import (
15
+ get_alignment_and_format as get_alignment_and_format_helper
16
+ )
17
+
18
+
19
+ class FileFormat(Enum):
20
+ fasta = "fasta"
21
+ clustal = "clustal"
22
+ maf = "maf"
23
+ mauve = "mauve"
24
+ phylip = "phylip"
25
+ phylip_seq = "phylip-sequential"
26
+ phylip_rel = "phylip-relaxed"
27
+ stockholm = "stockholm"
28
+
29
+
30
+ class Saturation(Tree):
31
+ def __init__(self, args) -> None:
32
+ super().__init__(**self.process_args(args))
33
+
34
+ def run(self) -> None:
35
+ alignment, _, is_protein = get_alignment_and_format_helper(
36
+ self.alignment_file_path
37
+ )
38
+
39
+ tree = self.read_tree_file()
40
+
41
+ tips = self.get_tip_names_from_tree(tree)
42
+ combos = list(itertools.combinations(tips, 2))
43
+
44
+ (
45
+ patristic_distances,
46
+ uncorrected_distances,
47
+ ) = self.loop_through_combos_and_calculate_pds_and_pis(
48
+ combos, alignment, tree, self.exclude_gaps
49
+ )
50
+
51
+ # calculate slope and fit the y-intercept to zero
52
+ # Fitting the y-intercept to zero follows Jeffroy et al.
53
+ # See fig 2 https://www.cell.com/trends/genetics/fulltext/S0168-9525(06)00051-5
54
+ model = LinearRegression(fit_intercept=False)
55
+ model.fit(
56
+ np.array(patristic_distances).reshape(-1, 1),
57
+ np.array(uncorrected_distances)
58
+ )
59
+
60
+ self.print_res(
61
+ self.verbose, combos, uncorrected_distances, patristic_distances, model.coef_[0]
62
+ )
63
+
64
+ def process_args(self, args) -> Dict[str, str]:
65
+ return dict(
66
+ tree_file_path=args.tree,
67
+ alignment_file_path=args.alignment,
68
+ exclude_gaps=args.exclude_gaps,
69
+ verbose=args.verbose,
70
+ )
71
+
72
+ def _process_combo_batch(self, tree, seq_arrays, gap_mask, exclude_gaps, combo_batch):
73
+ """Process a batch of combinations in parallel."""
74
+ results = []
75
+ for combo in combo_batch:
76
+ # Calculate patristic distance
77
+ pd = tree.distance(combo[0], combo[1])
78
+
79
+ # Calculate uncorrected distance using numpy operations
80
+ seq1_arr = seq_arrays[combo[0]]
81
+ seq2_arr = seq_arrays[combo[1]]
82
+
83
+ if exclude_gaps:
84
+ # Use pre-computed gap masks
85
+ gap_mask1 = gap_mask[combo[0]]
86
+ gap_mask2 = gap_mask[combo[1]]
87
+ valid_positions = ~(gap_mask1 | gap_mask2)
88
+
89
+ if np.any(valid_positions):
90
+ matches = seq1_arr[valid_positions] == seq2_arr[valid_positions]
91
+ identities = np.sum(matches)
92
+ adjusted_len = np.sum(valid_positions)
93
+ ud = 1 - (identities / adjusted_len)
94
+ else:
95
+ ud = float('nan')
96
+ else:
97
+ matches = seq1_arr == seq2_arr
98
+ identities = np.sum(matches)
99
+ ud = 1 - (identities / len(seq1_arr))
100
+
101
+ results.append((pd, ud))
102
+ return results
103
+
104
+ def loop_through_combos_and_calculate_pds_and_pis(
105
+ self,
106
+ combos: List[Tuple[str, str]],
107
+ alignment: Align.MultipleSeqAlignment,
108
+ tree: Newick.Tree,
109
+ exclude_gaps: bool,
110
+ ) -> Tuple[
111
+ List[float],
112
+ List[float]
113
+ ]:
114
+ """
115
+ loop through all taxon combinations and determine
116
+ their patristic distance and pairwise identity
117
+ """
118
+ gap_chars = self.get_gap_chars()
119
+
120
+ # Convert sequences to numpy arrays for vectorized operations
121
+ seq_arrays = {}
122
+ gap_mask = {}
123
+ for record in alignment:
124
+ seq_arr = np.array([c.upper() for c in str(record.seq)], dtype='U1')
125
+ seq_arrays[record.name] = seq_arr
126
+ if exclude_gaps:
127
+ gap_mask[record.name] = np.isin(seq_arr, list(gap_chars))
128
+
129
+ # For small datasets, process sequentially
130
+ if len(combos) < 50:
131
+ patristic_distances = []
132
+ uncorrected_distances = []
133
+ for combo in combos:
134
+ pd = tree.distance(combo[0], combo[1])
135
+ patristic_distances.append(pd)
136
+
137
+ seq1_arr = seq_arrays[combo[0]]
138
+ seq2_arr = seq_arrays[combo[1]]
139
+
140
+ if exclude_gaps:
141
+ gap_mask1 = gap_mask[combo[0]]
142
+ gap_mask2 = gap_mask[combo[1]]
143
+ valid_positions = ~(gap_mask1 | gap_mask2)
144
+
145
+ if np.any(valid_positions):
146
+ matches = seq1_arr[valid_positions] == seq2_arr[valid_positions]
147
+ identities = np.sum(matches)
148
+ adjusted_len = np.sum(valid_positions)
149
+ ud = 1 - (identities / adjusted_len)
150
+ else:
151
+ ud = float('nan')
152
+ else:
153
+ matches = seq1_arr == seq2_arr
154
+ identities = np.sum(matches)
155
+ ud = 1 - (identities / len(seq1_arr))
156
+
157
+ uncorrected_distances.append(ud)
158
+ else:
159
+ # Use multiprocessing for larger datasets
160
+ num_workers = min(mp.cpu_count(), 8)
161
+ chunk_size = max(1, len(combos) // (num_workers * 4))
162
+ combo_chunks = [combos[i:i + chunk_size] for i in range(0, len(combos), chunk_size)]
163
+
164
+ # Create partial function
165
+ process_func = partial(
166
+ self._process_combo_batch,
167
+ tree,
168
+ seq_arrays,
169
+ gap_mask,
170
+ exclude_gaps
171
+ )
172
+
173
+ # Process in parallel
174
+ with mp.Pool(processes=num_workers) as pool:
175
+ chunk_results = pool.map(process_func, combo_chunks)
176
+
177
+ # Flatten results
178
+ patristic_distances = []
179
+ uncorrected_distances = []
180
+ for chunk_result in chunk_results:
181
+ for pd, ud in chunk_result:
182
+ patristic_distances.append(pd)
183
+ uncorrected_distances.append(ud)
184
+
185
+ return patristic_distances, uncorrected_distances
186
+
187
+ def print_res(
188
+ self,
189
+ verbose: bool,
190
+ combos: List[str],
191
+ uncorrected_distances: List[float],
192
+ patristic_distances: List[float],
193
+ slope: float,
194
+ ) -> None:
195
+ """
196
+ print results to stdout
197
+ """
198
+ try:
199
+ if verbose:
200
+ for cbo, dist, pd in zip(
201
+ combos, uncorrected_distances, patristic_distances
202
+ ):
203
+ print(
204
+ f"{cbo[0]}\t{cbo[1]}\t{round(dist,4)}\t{round(pd, 4)}"
205
+ )
206
+ else:
207
+ print(f"{round(slope, 4)}\t{abs(round(1-slope, 4))}")
208
+ except BrokenPipeError:
209
+ pass
@@ -0,0 +1,75 @@
1
+ import statistics as stat
2
+ from typing import Dict, List, Tuple
3
+
4
+ from Bio.Phylo import Newick
5
+
6
+ from .base import Tree
7
+
8
+
9
+ class SpuriousSequence(Tree):
10
+ def __init__(self, args) -> None:
11
+ super().__init__(**self.process_args(args))
12
+
13
+ def run(self) -> None:
14
+ tree = self.read_tree_file()
15
+ name_and_branch_len, threshold, median = \
16
+ self.identify_spurious_sequence(
17
+ tree, self.factor
18
+ )
19
+
20
+ counter = 0
21
+ for name, length in name_and_branch_len.items():
22
+ if length >= threshold:
23
+ try:
24
+ print(
25
+ f"{name}\t{round(length, 4)}\t{round(threshold, 4)}\t{round(median, 4)}"
26
+ )
27
+ except BrokenPipeError:
28
+ pass
29
+ counter += 1
30
+
31
+ if counter == 0:
32
+ print("None")
33
+
34
+ def process_args(self, args) -> Dict[str, str]:
35
+ return dict(
36
+ tree_file_path=args.tree,
37
+ factor=args.factor or 20
38
+ )
39
+
40
+ def identify_spurious_sequence(
41
+ self,
42
+ tree: Newick.Tree,
43
+ factor: float,
44
+ ) -> Tuple[
45
+ Dict[str, float],
46
+ float,
47
+ float
48
+ ]:
49
+ branch_lengths, name_and_branch_len = \
50
+ self.get_branch_lengths_and_their_names(tree)
51
+
52
+ median = stat.median(branch_lengths)
53
+
54
+ threshold = median * factor
55
+
56
+ return name_and_branch_len, threshold, median
57
+
58
+ def get_branch_lengths_and_their_names(
59
+ self,
60
+ tree: Newick.Tree,
61
+ ) -> Tuple[
62
+ List[float],
63
+ Dict[str, float],
64
+ ]:
65
+ branch_lengths = []
66
+ name_and_branch_len = {}
67
+
68
+ # collect terminal branch lengths only for spurious sequence detection
69
+ # (internal branches are not considered for spurious sequence detection)
70
+ for terminal in tree.get_terminals():
71
+ if terminal.branch_length is not None:
72
+ branch_lengths.append(terminal.branch_length)
73
+ name_and_branch_len[terminal.name] = terminal.branch_length
74
+
75
+ return branch_lengths, name_and_branch_len
@@ -0,0 +1,87 @@
1
+ import sys
2
+ from typing import Dict, List, Tuple, Union
3
+
4
+ from Bio.Phylo import Newick
5
+
6
+ from .base import Tree
7
+
8
+ from ...helpers.stats_summary import (
9
+ calculate_summary_statistics_from_arr,
10
+ print_summary_statistics,
11
+ )
12
+
13
+
14
+ class TerminalBranchStats(Tree):
15
+ def __init__(self, args) -> None:
16
+ super().__init__(**self.process_args(args))
17
+
18
+ def run(self):
19
+ tree = self.read_tree_file()
20
+ _, stats, lengths_and_names = \
21
+ self.calculate_terminal_branch_stats(tree)
22
+
23
+ if self.verbose:
24
+ try:
25
+ for len_and_name in lengths_and_names:
26
+ print(round(len_and_name[0], 4), len_and_name[1])
27
+ except BrokenPipeError:
28
+ pass
29
+ else:
30
+ print_summary_statistics(stats)
31
+
32
+ def process_args(self, args) -> Dict[str, str]:
33
+ return dict(tree_file_path=args.tree, verbose=args.verbose)
34
+
35
+ def get_terminal_branch_lengths(
36
+ self,
37
+ tree: Newick.Tree,
38
+ ) -> List[
39
+ Union[
40
+ float,
41
+ List[List[Union[float, str]]],
42
+ ]
43
+ ]:
44
+ """
45
+ loop through tree and get all terminal branch lengths
46
+ """
47
+ terminal_branch_lengths = []
48
+ lengths_and_names = []
49
+ for terminal_branch in tree.get_terminals():
50
+ if terminal_branch.branch_length is not None:
51
+ temp = []
52
+ temp.append(terminal_branch.branch_length)
53
+ terminal_branch_lengths.append(terminal_branch.branch_length)
54
+ temp.append(terminal_branch.name)
55
+ lengths_and_names.append(temp)
56
+
57
+ return terminal_branch_lengths, lengths_and_names
58
+
59
+ def check_tree_has_branch_lengths(
60
+ self,
61
+ terminal_branch_lengths: List[float],
62
+ ) -> None:
63
+ """
64
+ if tree has no branch lengths, exit
65
+ """
66
+ if len(terminal_branch_lengths) == 0:
67
+ print(
68
+ "Calculating terminal branch statistics requires a phylogeny with branch lengths."
69
+ )
70
+ sys.exit(2)
71
+
72
+ def calculate_terminal_branch_stats(
73
+ self,
74
+ tree: Newick.Tree,
75
+ ) -> Tuple[
76
+ List[float],
77
+ Dict[str, float],
78
+ List[List[Union[float, str]]],
79
+ ]:
80
+ terminal_branch_lengths, lengths_and_names = \
81
+ self.get_terminal_branch_lengths(tree)
82
+
83
+ self.check_tree_has_branch_lengths(terminal_branch_lengths)
84
+
85
+ stats = calculate_summary_statistics_from_arr(terminal_branch_lengths)
86
+
87
+ return terminal_branch_lengths, stats, lengths_and_names
@@ -0,0 +1,18 @@
1
+ from typing import Dict
2
+
3
+ from .base import Tree
4
+
5
+
6
+ class TipLabels(Tree):
7
+ def __init__(self, args) -> None:
8
+ super().__init__(**self.process_args(args))
9
+
10
+ def run(self) -> None:
11
+ tree = self.read_tree_file()
12
+ try:
13
+ print("\n".join([tip.name for tip in tree.get_terminals()]))
14
+ except BrokenPipeError:
15
+ pass
16
+
17
+ def process_args(self, args) -> Dict[str, str]:
18
+ return dict(tree_file_path=args.tree)
@@ -0,0 +1,41 @@
1
+ import sys
2
+ from typing import Dict
3
+
4
+ from Bio.Phylo.BaseTree import TreeMixin
5
+ from Bio.Phylo import Newick
6
+
7
+ from .base import Tree
8
+
9
+
10
+ class TipToTipDistance(Tree):
11
+ def __init__(self, args) -> None:
12
+ super().__init__(**self.process_args(args))
13
+
14
+ def run(self):
15
+ tree_zero = self.read_tree_file()
16
+
17
+ self.check_leaves(tree_zero, self.tip_1, self.tip_2)
18
+
19
+ print(round(TreeMixin.distance(tree_zero, self.tip_1, self.tip_2), 4))
20
+
21
+ def check_leaves(
22
+ self,
23
+ tree_zero: Newick.Tree,
24
+ tip_1: str,
25
+ tip_2: str,
26
+ ) -> None:
27
+ leaf1 = TreeMixin.find_any(tree_zero, tip_1)
28
+ if not bool(leaf1):
29
+ print(tip_1, "not on tree\nExiting...")
30
+ sys.exit(2)
31
+ leaf2 = TreeMixin.find_any(tree_zero, tip_2)
32
+ if not bool(leaf2):
33
+ print(tip_2, "not on tree\nExiting...")
34
+ sys.exit(2)
35
+
36
+ def process_args(self, args) -> Dict[str, str]:
37
+ return dict(
38
+ tree_file_path=args.tree_zero,
39
+ tip_1=args.tip_1,
40
+ tip_2=args.tip_2,
41
+ )
@@ -0,0 +1,41 @@
1
+ import sys
2
+ from typing import Dict
3
+
4
+ from Bio.Phylo.BaseTree import TreeMixin
5
+ from Bio.Phylo import Newick
6
+
7
+ from .base import Tree
8
+
9
+
10
+ class TipToTipNodeDistance(Tree):
11
+ def __init__(self, args) -> None:
12
+ super().__init__(**self.process_args(args))
13
+
14
+ def run(self) -> None:
15
+ tree_zero = self.read_tree_file()
16
+
17
+ self.check_leaves(tree_zero, self.tip_1, self.tip_2)
18
+
19
+ print(len(TreeMixin.trace(tree_zero, self.tip_1, self.tip_2)))
20
+
21
+ def check_leaves(
22
+ self,
23
+ tree_zero: Newick.Tree,
24
+ tip_1: str,
25
+ tip_2: str,
26
+ ) -> None:
27
+ leaf1 = TreeMixin.find_any(tree_zero, tip_1)
28
+ if not bool(leaf1):
29
+ print(tip_1, "not on tree\nExiting...")
30
+ sys.exit(2)
31
+ leaf2 = TreeMixin.find_any(tree_zero, tip_2)
32
+ if not bool(leaf2):
33
+ print(tip_2, "not on tree\nExiting...")
34
+ sys.exit(2)
35
+
36
+ def process_args(self, args) -> Dict[str, str]:
37
+ return dict(
38
+ tree_file_path=args.tree_zero,
39
+ tip_1=args.tip_1,
40
+ tip_2=args.tip_2,
41
+ )
@@ -0,0 +1,25 @@
1
+ from typing import Dict, Union
2
+
3
+ from Bio.Phylo import Newick
4
+ from .base import Tree
5
+
6
+
7
+ class TotalTreeLength(Tree):
8
+ def __init__(self, args) -> None:
9
+ super().__init__(**self.process_args(args))
10
+
11
+ def run(self) -> None:
12
+ tree = self.read_tree_file()
13
+ print(round(self.calculate_total_tree_length(tree), 4))
14
+
15
+ def process_args(self, args) -> Dict[str, str]:
16
+ return dict(tree_file_path=args.tree)
17
+
18
+ def calculate_total_tree_length(
19
+ self,
20
+ tree: Newick.Tree
21
+ ) -> Union[int, float]:
22
+ total_len = tree.total_branch_length()
23
+
24
+ if isinstance(total_len, (int, float)):
25
+ return total_len
@@ -0,0 +1,16 @@
1
+ from typing import Dict
2
+
3
+ from .base import Tree
4
+
5
+
6
+ class Treeness(Tree):
7
+ def __init__(self, args) -> None:
8
+ super().__init__(**self.process_args(args))
9
+
10
+ def run(self) -> None:
11
+ tree = self.read_tree_file()
12
+ treeness = self.calculate_treeness(tree)
13
+ print(round(treeness, 4))
14
+
15
+ def process_args(self, args) -> Dict[str, str]:
16
+ return dict(tree_file_path=args.tree)