phykit 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. phykit/__init__.py +0 -0
  2. phykit/__main__.py +6 -0
  3. phykit/helpers/__init__.py +0 -0
  4. phykit/helpers/boolean_argument_parsing.py +12 -0
  5. phykit/helpers/caching.py +201 -0
  6. phykit/helpers/files.py +125 -0
  7. phykit/helpers/parallel.py +305 -0
  8. phykit/helpers/stats_summary.py +64 -0
  9. phykit/helpers/streaming.py +152 -0
  10. phykit/phykit.py +2862 -0
  11. phykit/services/__init__.py +0 -0
  12. phykit/services/alignment/__init__.py +17 -0
  13. phykit/services/alignment/alignment_length.py +16 -0
  14. phykit/services/alignment/alignment_length_no_gaps.py +69 -0
  15. phykit/services/alignment/alignment_recoding.py +89 -0
  16. phykit/services/alignment/base.py +103 -0
  17. phykit/services/alignment/column_score.py +66 -0
  18. phykit/services/alignment/compositional_bias_per_site.py +98 -0
  19. phykit/services/alignment/create_concatenation_matrix.py +254 -0
  20. phykit/services/alignment/dna_threader.py +145 -0
  21. phykit/services/alignment/evolutionary_rate_per_site.py +85 -0
  22. phykit/services/alignment/faidx.py +21 -0
  23. phykit/services/alignment/gc_content.py +94 -0
  24. phykit/services/alignment/pairwise_identity.py +159 -0
  25. phykit/services/alignment/parsimony_informative_sites.py +81 -0
  26. phykit/services/alignment/rcv.py +14 -0
  27. phykit/services/alignment/rcvt.py +47 -0
  28. phykit/services/alignment/rename_fasta_entries.py +53 -0
  29. phykit/services/alignment/sum_of_pairs_score.py +157 -0
  30. phykit/services/alignment/variable_sites.py +54 -0
  31. phykit/services/base.py +9 -0
  32. phykit/services/tree/__init__.py +29 -0
  33. phykit/services/tree/base.py +178 -0
  34. phykit/services/tree/bipartition_support_stats.py +48 -0
  35. phykit/services/tree/branch_length_multiplier.py +37 -0
  36. phykit/services/tree/collapse_branches.py +27 -0
  37. phykit/services/tree/covarying_evolutionary_rates.py +272 -0
  38. phykit/services/tree/dvmc.py +37 -0
  39. phykit/services/tree/evolutionary_rate.py +17 -0
  40. phykit/services/tree/hidden_paralogy_check.py +128 -0
  41. phykit/services/tree/internal_branch_stats.py +77 -0
  42. phykit/services/tree/internode_labeler.py +33 -0
  43. phykit/services/tree/last_common_ancestor_subtree.py +35 -0
  44. phykit/services/tree/lb_score.py +196 -0
  45. phykit/services/tree/monophyly_check.py +106 -0
  46. phykit/services/tree/nearest_neighbor_interchange.py +140 -0
  47. phykit/services/tree/patristic_distances.py +113 -0
  48. phykit/services/tree/polytomy_test.py +546 -0
  49. phykit/services/tree/print_tree.py +28 -0
  50. phykit/services/tree/prune_tree.py +40 -0
  51. phykit/services/tree/rename_tree_tips.py +64 -0
  52. phykit/services/tree/rf_distance.py +136 -0
  53. phykit/services/tree/root_tree.py +35 -0
  54. phykit/services/tree/saturation.py +209 -0
  55. phykit/services/tree/spurious_sequence.py +75 -0
  56. phykit/services/tree/terminal_branch_stats.py +87 -0
  57. phykit/services/tree/tip_labels.py +18 -0
  58. phykit/services/tree/tip_to_tip_distance.py +41 -0
  59. phykit/services/tree/tip_to_tip_node_distance.py +41 -0
  60. phykit/services/tree/total_tree_length.py +25 -0
  61. phykit/services/tree/treeness.py +16 -0
  62. phykit/services/tree/treeness_over_rcv.py +40 -0
  63. phykit/version.py +1 -0
  64. phykit-2.1.0.dist-info/METADATA +150 -0
  65. phykit-2.1.0.dist-info/RECORD +69 -0
  66. phykit-2.1.0.dist-info/WHEEL +5 -0
  67. phykit-2.1.0.dist-info/entry_points.txt +121 -0
  68. phykit-2.1.0.dist-info/licenses/LICENSE.md +7 -0
  69. phykit-2.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,128 @@
1
+ import sys
2
+ from typing import Dict, List, Union
3
+ from functools import lru_cache
4
+ import multiprocessing as mp
5
+ from functools import partial
6
+
7
+ from Bio import Phylo
8
+
9
+ from .base import Tree
10
+
11
+
12
+ class HiddenParalogyCheck(Tree):
13
+ def __init__(self, args) -> None:
14
+ super().__init__(**self.process_args(args))
15
+
16
+ @staticmethod
17
+ def _process_clade_batch(clade_batch, tree_file_path, master_tree_tips):
18
+ """Process a batch of clades in parallel."""
19
+ batch_results = []
20
+
21
+ for clade in clade_batch:
22
+ # Read a fresh copy of the tree for each clade
23
+ tree = Phylo.read(tree_file_path, "newick")
24
+ clade_of_interest = set(clade).intersection(master_tree_tips)
25
+
26
+ if len(clade_of_interest) <= 1:
27
+ batch_results.append(["insufficient_taxon_representation"])
28
+ continue
29
+
30
+ diff_tips = master_tree_tips - clade_of_interest
31
+
32
+ # Root and find common ancestor
33
+ try:
34
+ tree.root_with_outgroup(list(diff_tips))
35
+ subtree = tree.common_ancestor(clade_of_interest)
36
+
37
+ # Get terminal names efficiently
38
+ common_ancestor_tips = set(tip.name for tip in subtree.get_terminals())
39
+
40
+ diff_tips_between_clade_and_curr_tree = \
41
+ clade_of_interest.symmetric_difference(common_ancestor_tips)
42
+
43
+ batch_results.append([
44
+ "monophyletic" if not diff_tips_between_clade_and_curr_tree else "not_monophyletic",
45
+ list(diff_tips_between_clade_and_curr_tree),
46
+ ])
47
+ except (ValueError, AttributeError):
48
+ # Handle edge cases where rooting fails
49
+ batch_results.append(["processing_error"])
50
+
51
+ return batch_results
52
+
53
+ def run(self) -> None:
54
+ # Read the master tree once to get all tip names
55
+ master_tree = self.read_tree_file()
56
+ master_tree_tips = frozenset(self.get_tip_names_from_tree(master_tree))
57
+
58
+ # Read clades
59
+ clades = self.read_clades_file(self.clade)
60
+
61
+ # For small datasets, process sequentially
62
+ if len(clades) < 10:
63
+ res_arr = []
64
+ for clade in clades:
65
+ # Read a fresh tree for each clade instead of deep copying
66
+ tree = Phylo.read(self.tree_file_path, "newick")
67
+ clade_of_interest = set(clade).intersection(master_tree_tips)
68
+
69
+ if len(clade_of_interest) <= 1:
70
+ res_arr.append(["insufficient_taxon_representation"])
71
+ continue
72
+
73
+ diff_tips = master_tree_tips - clade_of_interest
74
+ tree.root_with_outgroup(list(diff_tips))
75
+
76
+ subtree = tree.common_ancestor(clade_of_interest)
77
+ common_ancestor_tips = set(self.get_tip_names_from_tree(subtree))
78
+
79
+ diff_tips_between_clade_and_curr_tree = \
80
+ clade_of_interest.symmetric_difference(common_ancestor_tips)
81
+
82
+ res_arr.append([
83
+ "monophyletic" if not diff_tips_between_clade_and_curr_tree else "not_monophyletic",
84
+ list(diff_tips_between_clade_and_curr_tree),
85
+ ])
86
+ else:
87
+ # Use multiprocessing for larger datasets
88
+ num_workers = min(mp.cpu_count(), 8)
89
+ batch_size = max(1, len(clades) // num_workers)
90
+
91
+ # Create clade batches
92
+ clade_batches = [clades[i:i + batch_size]
93
+ for i in range(0, len(clades), batch_size)]
94
+
95
+ # Process batches in parallel
96
+ process_func = partial(
97
+ self._process_clade_batch,
98
+ tree_file_path=self.tree_file_path,
99
+ master_tree_tips=master_tree_tips
100
+ )
101
+
102
+ with mp.Pool(processes=num_workers) as pool:
103
+ batch_results = pool.map(process_func, clade_batches)
104
+
105
+ # Flatten results
106
+ res_arr = []
107
+ for batch_result in batch_results:
108
+ res_arr.extend(batch_result)
109
+
110
+ self.print_results(res_arr)
111
+
112
+ def process_args(self, args) -> Dict[str, str]:
113
+ return dict(
114
+ tree_file_path=args.tree,
115
+ clade=args.clade,
116
+ )
117
+
118
+ def read_clades_file(self, clades: str) -> List[List[str]]:
119
+ try:
120
+ with open(clades, 'r') as file:
121
+ return [line.split() for line in file.readlines()]
122
+ except FileNotFoundError:
123
+ print("Clade file not found. Please check the path.")
124
+ sys.exit(2)
125
+
126
+ def print_results(self, res_arr: List[List[Union[List, str]]]) -> None:
127
+ for res in res_arr:
128
+ print(res[0])
@@ -0,0 +1,77 @@
1
+ import sys
2
+ from typing import (
3
+ Dict,
4
+ List,
5
+ Tuple,
6
+ )
7
+
8
+ from Bio.Phylo import Newick
9
+
10
+ from .base import Tree
11
+
12
+ from ...helpers.stats_summary import calculate_summary_statistics_from_arr, print_summary_statistics
13
+
14
+
15
+ class InternalBranchStats(Tree):
16
+ def __init__(self, args) -> None:
17
+ super().__init__(**self.process_args(args))
18
+
19
+ def run(self):
20
+ tree = self.read_tree_file()
21
+ stats, lengths_and_names \
22
+ = self.calculate_internal_branch_stats(tree)
23
+
24
+ if self.verbose:
25
+ try:
26
+ for length, names in lengths_and_names:
27
+ print(round(length, 4), ";".join(names))
28
+ except BrokenPipeError:
29
+ pass
30
+ else:
31
+ print_summary_statistics(stats)
32
+
33
+ def process_args(self, args) -> Dict[str, str]:
34
+ return dict(tree_file_path=args.tree, verbose=args.verbose)
35
+
36
+ def get_internal_branch_lengths(
37
+ self,
38
+ tree: Newick.Tree
39
+ ) -> Tuple[
40
+ List[float],
41
+ List[Tuple[float, List[str]]]
42
+ ]:
43
+ internal_branch_lengths = []
44
+ lengths_and_names = []
45
+
46
+ # Collect branch lengths and associated names in one pass
47
+ for internal_branch in tree.get_nonterminals():
48
+ if internal_branch.branch_length is not None:
49
+ internal_branch_lengths.append(internal_branch.branch_length)
50
+ term_names = [
51
+ term.name for term in internal_branch.get_terminals()
52
+ ]
53
+ lengths_and_names.append(
54
+ (
55
+ internal_branch.branch_length, term_names
56
+ )
57
+ )
58
+
59
+ return internal_branch_lengths, lengths_and_names
60
+
61
+ def calculate_internal_branch_stats(
62
+ self,
63
+ tree: Newick.Tree
64
+ ) -> Tuple[
65
+ Dict[str, float],
66
+ List[Tuple[float, List[str]]],
67
+ ]:
68
+ internal_branch_lengths, lengths_and_names = \
69
+ self.get_internal_branch_lengths(tree)
70
+
71
+ if not internal_branch_lengths:
72
+ print("Calculating internal branch statistics requires a phylogeny with branch lengths.")
73
+ sys.exit(2)
74
+
75
+ stats = calculate_summary_statistics_from_arr(internal_branch_lengths)
76
+
77
+ return stats, lengths_and_names
@@ -0,0 +1,33 @@
1
+ from typing import Dict
2
+ import copy
3
+
4
+ from Bio.Phylo import Newick
5
+
6
+ from .base import Tree
7
+
8
+
9
+ class InternodeLabeler(Tree):
10
+ def __init__(self, args) -> None:
11
+ super().__init__(**self.process_args(args))
12
+
13
+ def run(self):
14
+ tree = self.read_tree_file()
15
+ # Make a deep copy to avoid modifying the cached tree
16
+ tree_copy = copy.deepcopy(tree)
17
+ self.add_labels_to_tree(tree_copy)
18
+ self.write_tree_file(tree_copy, self.output_file_path)
19
+
20
+ def process_args(self, args) -> Dict[str, str]:
21
+ output_file_path = args.output or f"{args.tree}.internode_labels.tre"
22
+
23
+ return dict(
24
+ tree_file_path=args.tree,
25
+ output_file_path=output_file_path,
26
+ )
27
+
28
+ def add_labels_to_tree(
29
+ self,
30
+ tree: Newick.Tree
31
+ ) -> None:
32
+ for label, node in enumerate(tree.get_nonterminals(), start=1):
33
+ node.confidence = label
@@ -0,0 +1,35 @@
1
+ import sys
2
+ import copy
3
+ from typing import Dict
4
+
5
+ from .base import Tree
6
+
7
+ from ...helpers.files import read_single_column_file_to_list
8
+
9
+
10
+ class LastCommonAncestorSubtree(Tree):
11
+ def __init__(self, args) -> None:
12
+ super().__init__(**self.process_args(args))
13
+
14
+ def run(self):
15
+ tree = self.read_tree_file()
16
+ # Make a deep copy to avoid issues with cached tree modifications
17
+ tree_copy = copy.deepcopy(tree)
18
+ try:
19
+ taxa = read_single_column_file_to_list(self.list_of_taxa)
20
+ except FileNotFoundError:
21
+ print("Taxa list file is not found. Please check pathing.")
22
+ sys.exit(2)
23
+ subtree = tree_copy.common_ancestor(taxa)
24
+
25
+ self.write_tree_file(subtree, self.output_file_path)
26
+
27
+ def process_args(self, args) -> Dict[str, str]:
28
+ tree_file_path = args.tree
29
+ output_file_path = args.output or f"{tree_file_path}.subtree.tre"
30
+
31
+ return dict(
32
+ tree_file_path=tree_file_path,
33
+ output_file_path=output_file_path,
34
+ list_of_taxa=args.list_of_taxa,
35
+ )
@@ -0,0 +1,196 @@
1
+ import sys
2
+ import itertools
3
+ from typing import Dict, List, Tuple
4
+ from concurrent.futures import ProcessPoolExecutor, as_completed
5
+ import multiprocessing as mp
6
+ import numpy as np
7
+ import pickle
8
+ from functools import lru_cache
9
+ try:
10
+ from tqdm import tqdm
11
+ HAS_TQDM = True
12
+ except ImportError:
13
+ HAS_TQDM = False
14
+
15
+ from Bio.Phylo import Newick
16
+
17
+ from .base import Tree
18
+
19
+ from ...helpers.stats_summary import (
20
+ calculate_summary_statistics_from_arr,
21
+ print_summary_statistics,
22
+ )
23
+
24
+
25
+ class LBScore(Tree):
26
+ def __init__(self, args) -> None:
27
+ super().__init__(**self.process_args(args))
28
+
29
+ def run(self) -> None:
30
+ tree = self.read_tree_file()
31
+ tips, LBis = self.calculate_lb_score(tree)
32
+ if self.verbose:
33
+ try:
34
+ for tip, LBi in zip(tips, LBis):
35
+ print(f"{tip}\t{round(LBi, 4)}")
36
+ except BrokenPipeError:
37
+ pass
38
+ else:
39
+ stats = calculate_summary_statistics_from_arr(LBis)
40
+ print_summary_statistics(stats)
41
+
42
+ def process_args(self, args) -> Dict[str, str]:
43
+ return dict(tree_file_path=args.tree, verbose=args.verbose)
44
+
45
+ @staticmethod
46
+ def _calculate_distances_batch(tree_pickle, tip_pairs):
47
+ """Calculate distances for a batch of tip pairs."""
48
+ tree = pickle.loads(tree_pickle)
49
+ return [tree.distance(tip1, tip2) for tip1, tip2 in tip_pairs]
50
+
51
+ def calculate_average_distance_between_tips(
52
+ self,
53
+ tips: List[str],
54
+ tree: Newick.Tree,
55
+ ) -> float:
56
+ num_tips = len(tips)
57
+ if num_tips < 2:
58
+ return 0
59
+
60
+ # Get all combinations
61
+ all_pairs = list(itertools.combinations(tips, 2))
62
+ num_combos = len(all_pairs)
63
+
64
+ # For small datasets, use sequential processing
65
+ if num_combos < 100:
66
+ total_dist = sum(
67
+ tree.distance(tip1, tip2)
68
+ for tip1, tip2 in all_pairs
69
+ )
70
+ else:
71
+ # Use multiprocessing for large datasets
72
+ tree_pickle = pickle.dumps(tree)
73
+ batch_size = max(50, num_combos // mp.cpu_count())
74
+
75
+ with ProcessPoolExecutor(max_workers=min(mp.cpu_count(), 8)) as executor:
76
+ futures = []
77
+ for i in range(0, num_combos, batch_size):
78
+ batch = all_pairs[i:i + batch_size]
79
+ futures.append(
80
+ executor.submit(self._calculate_distances_batch, tree_pickle, batch)
81
+ )
82
+
83
+ total_dist = 0
84
+ # Add progress bar if available and dataset is large
85
+ if HAS_TQDM and num_combos > 1000:
86
+ futures_iter = tqdm(as_completed(futures), total=len(futures), desc="Computing distances")
87
+ else:
88
+ futures_iter = as_completed(futures)
89
+
90
+ for future in futures_iter:
91
+ total_dist += sum(future.result())
92
+
93
+ return total_dist / num_combos if num_combos else 0
94
+
95
+ @staticmethod
96
+ def _calculate_tip_distances_batch(tree_pickle, tips_data):
97
+ """Calculate average distances for a batch of tips."""
98
+ tree = pickle.loads(tree_pickle)
99
+ results = []
100
+
101
+ for tip, other_tips in tips_data:
102
+ distances = [tree.distance(tip, other_tip) for other_tip in other_tips]
103
+ avg_dist = sum(distances) / len(distances) if distances else 0
104
+ results.append(avg_dist)
105
+
106
+ return results
107
+
108
+ def calculate_average_distance_of_taxon_to_other_taxa(
109
+ self,
110
+ tips: List[str],
111
+ tree: Newick.Tree,
112
+ ) -> List[float]:
113
+ # IMPORTANT: Original code has a bug where it uses set(tip) which creates
114
+ # a set of characters, not a set containing the tip. This includes the
115
+ # current tip in distance calculations. We preserve this for compatibility.
116
+
117
+ # For small datasets or to maintain exact compatibility, use sequential
118
+ if len(tips) <= 50:
119
+ avg_PDis = []
120
+ for tip in tips:
121
+ # Preserve the original bug: set(tip) creates set of characters
122
+ tips_minus_i = list(set(tips) - set(tip))
123
+ PDi = []
124
+ for tip_minus in tips_minus_i:
125
+ PDi.append(tree.distance(tip, tip_minus))
126
+ PDi = sum(PDi) / len(PDi) if PDi else 0
127
+ avg_PDis.append(PDi)
128
+
129
+ return avg_PDis
130
+
131
+ # For larger datasets, use parallel processing but preserve the bug
132
+ tips_data = []
133
+ for tip in tips:
134
+ # Preserve the bug: set(tip) creates set of characters
135
+ tips_minus_i = list(set(tips) - set(tip))
136
+ tips_data.append((tip, tips_minus_i))
137
+
138
+ # Process in batches
139
+ batch_size = max(10, len(tips) // mp.cpu_count())
140
+ tree_pickle = pickle.dumps(tree)
141
+
142
+ with ProcessPoolExecutor(max_workers=min(mp.cpu_count(), 8)) as executor:
143
+ # Keep track of batch order
144
+ future_to_index = {}
145
+
146
+ for i in range(0, len(tips_data), batch_size):
147
+ batch = tips_data[i:i + batch_size]
148
+ future = executor.submit(self._calculate_tip_distances_batch, tree_pickle, batch)
149
+ future_to_index[future] = i
150
+
151
+ # Collect results in order
152
+ results_dict = {}
153
+ for future in as_completed(future_to_index):
154
+ batch_index = future_to_index[future]
155
+ results_dict[batch_index] = future.result()
156
+
157
+ # Reconstruct ordered results
158
+ avg_PDis = []
159
+ for i in sorted(results_dict.keys()):
160
+ avg_PDis.extend(results_dict[i])
161
+
162
+ return avg_PDis
163
+
164
+ def calculate_lb_score_per_taxa(
165
+ self,
166
+ avg_PDis: List[float],
167
+ avg_dist: float
168
+ ) -> List[float]:
169
+ if avg_dist == 0:
170
+ try:
171
+ print("Invalid tree. Tree should contain branch lengths")
172
+ sys.exit(2)
173
+ except BrokenPipeError:
174
+ pass
175
+ return []
176
+
177
+ # Use NumPy for vectorized computation
178
+ PDis_array = np.array(avg_PDis)
179
+ LBis = ((PDis_array / avg_dist) - 1) * 100
180
+
181
+ return LBis.tolist()
182
+
183
+ def calculate_lb_score(
184
+ self,
185
+ tree: Newick.Tree
186
+ ) -> Tuple[List[str], List[float]]:
187
+ tips = self.get_tip_names_from_tree(tree)
188
+
189
+ avg_dist = self.calculate_average_distance_between_tips(tips, tree)
190
+
191
+ avg_PDis = \
192
+ self.calculate_average_distance_of_taxon_to_other_taxa(tips, tree)
193
+
194
+ LBis = self.calculate_lb_score_per_taxa(avg_PDis, avg_dist)
195
+
196
+ return tips, LBis
@@ -0,0 +1,106 @@
1
+ import sys
2
+ from typing import Dict, List, Union
3
+
4
+ from Bio.Phylo import Newick
5
+
6
+ from .base import Tree
7
+
8
+ from ...helpers.stats_summary import calculate_summary_statistics_from_arr
9
+ from ...helpers.files import read_single_column_file_to_list
10
+
11
+
12
+ class MonophylyCheck(Tree):
13
+ def __init__(self, args) -> None:
14
+ super().__init__(**self.process_args(args))
15
+
16
+ def run(self) -> None:
17
+ tree = self.read_tree_file()
18
+ taxa = read_single_column_file_to_list(self.list_of_taxa)
19
+
20
+ res_arr = []
21
+
22
+ # Use frozenset for more efficient set operations
23
+ tree_tips = frozenset(self.get_tip_names_from_tree(tree))
24
+ taxa_set = frozenset(taxa)
25
+ taxa_of_interest = taxa_set.intersection(tree_tips)
26
+
27
+ if len(taxa_of_interest) <= 1:
28
+ res_arr.append(["insufficient_taxon_representation"])
29
+ sys.exit(2)
30
+
31
+ # Convert back to list for functions that need it
32
+ taxa_of_interest_list = list(taxa_of_interest)
33
+ shared_tree_tips = self.shared_tips(taxa_of_interest_list, list(tree_tips))
34
+
35
+ # Use set difference directly
36
+ diff_tips = list(tree_tips - frozenset(shared_tree_tips))
37
+ tree.root_with_outgroup(diff_tips)
38
+ tree = tree.common_ancestor(shared_tree_tips)
39
+
40
+ # Cache common ancestor tips as set
41
+ common_ancestor_tips = frozenset(self.get_tip_names_from_tree(tree))
42
+ diff_tips_between_clade_and_curr_tree = list(
43
+ taxa_of_interest.symmetric_difference(common_ancestor_tips)
44
+ )
45
+
46
+ stats = self.get_bootstrap_statistics(tree)
47
+
48
+ res_arr = self.populate_res_arr(
49
+ diff_tips_between_clade_and_curr_tree, stats, res_arr
50
+ )
51
+
52
+ self.print_results(res_arr)
53
+
54
+ def process_args(self, args) -> Dict[str, str]:
55
+ return dict(
56
+ tree_file_path=args.tree,
57
+ list_of_taxa=args.list_of_taxa,
58
+ )
59
+
60
+ def get_bootstrap_statistics(
61
+ self,
62
+ clade: Newick.Clade
63
+ ) -> Dict[str, Union[int, float]]:
64
+ # Use generator for memory efficiency
65
+ bs_vals = [
66
+ terminal.confidence for terminal in clade.get_nonterminals()
67
+ if terminal.confidence is not None
68
+ ]
69
+
70
+ return calculate_summary_statistics_from_arr(bs_vals)
71
+
72
+ def populate_res_arr(
73
+ self,
74
+ diff_tips_between_clade_and_curr_tree: List[str],
75
+ stats: Dict[str, float],
76
+ res_arr: List,
77
+ ) -> List[List[Union[str, int, float]]]:
78
+ temp = []
79
+
80
+ if len(diff_tips_between_clade_and_curr_tree) == 0:
81
+ temp.append("monophyletic")
82
+ else:
83
+ temp.append("not_monophyletic")
84
+ temp.append(stats["mean"])
85
+ temp.append(stats["maximum"])
86
+ temp.append(stats["minimum"])
87
+ temp.append(stats["standard_deviation"])
88
+ temp.append(diff_tips_between_clade_and_curr_tree)
89
+ res_arr.append(temp)
90
+
91
+ return res_arr
92
+
93
+ def print_results(self, res_arr: List[List[Union[str, int, float]]]) -> None:
94
+ for res in res_arr:
95
+ try:
96
+ if res[5]:
97
+ res[5].sort()
98
+ print(
99
+ f"{res[0]}\t{round(res[1], 4)}\t{round(res[2], 4)}\t{round(res[3], 4)}\t{round(res[4], 4)}\t{';'.join(res[5])}"
100
+ )
101
+ else:
102
+ print(
103
+ f"{res[0]}\t{round(res[1], 4)}\t{round(res[2], 4)}\t{round(res[3], 4)}\t{round(res[4], 4)}"
104
+ )
105
+ except IndexError:
106
+ print(f"{res[0]}")