phykit 2.1.4__tar.gz → 2.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phykit-2.1.4 → phykit-2.1.8}/PKG-INFO +4 -5
- {phykit-2.1.4 → phykit-2.1.8}/README.md +2 -2
- {phykit-2.1.4 → phykit-2.1.8}/phykit/__main__.py +0 -1
- phykit-2.1.8/phykit/cli_registry.py +105 -0
- phykit-2.1.8/phykit/errors.py +12 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/helpers/caching.py +7 -3
- {phykit-2.1.4 → phykit-2.1.8}/phykit/helpers/files.py +23 -11
- phykit-2.1.8/phykit/helpers/json_output.py +26 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/helpers/parallel.py +1 -2
- {phykit-2.1.4 → phykit-2.1.8}/phykit/helpers/streaming.py +4 -5
- {phykit-2.1.4 → phykit-2.1.8}/phykit/phykit.py +867 -569
- phykit-2.1.8/phykit/service_factories.py +82 -0
- phykit-2.1.8/phykit/services/alignment/alignment_entropy.py +117 -0
- phykit-2.1.8/phykit/services/alignment/alignment_length.py +25 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/alignment/alignment_length_no_gaps.py +18 -3
- phykit-2.1.8/phykit/services/alignment/alignment_outlier_taxa.py +353 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/alignment/alignment_recoding.py +19 -3
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/alignment/base.py +3 -9
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/alignment/column_score.py +16 -3
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/alignment/composition_per_taxon.py +23 -2
- phykit-2.1.8/phykit/services/alignment/compositional_bias_per_site.py +214 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/alignment/create_concatenation_matrix.py +143 -5
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/alignment/dna_threader.py +17 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/alignment/evolutionary_rate_per_site.py +61 -6
- phykit-2.1.8/phykit/services/alignment/faidx.py +42 -0
- phykit-2.1.8/phykit/services/alignment/gc_content.py +114 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/alignment/mask_alignment.py +20 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/alignment/occupancy_per_taxon.py +21 -2
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/alignment/pairwise_identity.py +107 -3
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/alignment/parsimony_informative_sites.py +24 -6
- phykit-2.1.8/phykit/services/alignment/plot_alignment_qc.py +210 -0
- phykit-2.1.8/phykit/services/alignment/rcv.py +24 -0
- phykit-2.1.8/phykit/services/alignment/rcvt.py +106 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/alignment/rename_fasta_entries.py +29 -3
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/alignment/sum_of_pairs_score.py +16 -3
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/alignment/variable_sites.py +22 -5
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/base.py +1 -1
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/__init__.py +1 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/base.py +23 -31
- phykit-2.1.8/phykit/services/tree/bipartition_support_stats.py +152 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/branch_length_multiplier.py +24 -4
- phykit-2.1.8/phykit/services/tree/collapse_branches.py +47 -0
- phykit-2.1.8/phykit/services/tree/consensus_tree.py +141 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/covarying_evolutionary_rates.py +145 -58
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/dvmc.py +8 -4
- phykit-2.1.8/phykit/services/tree/evolutionary_rate.py +24 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/hidden_paralogy_check.py +23 -3
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/internal_branch_stats.py +26 -2
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/internode_labeler.py +22 -3
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/last_common_ancestor_subtree.py +20 -1
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/lb_score.py +27 -3
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/monophyly_check.py +19 -1
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/nearest_neighbor_interchange.py +19 -4
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/patristic_distances.py +30 -2
- phykit-2.1.8/phykit/services/tree/phylogenetic_signal.py +331 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/polytomy_test.py +37 -10
- phykit-2.1.8/phykit/services/tree/print_tree.py +47 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/prune_tree.py +23 -1
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/rename_tree_tips.py +24 -4
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/rf_distance.py +18 -3
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/root_tree.py +19 -1
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/saturation.py +92 -16
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/spurious_sequence.py +25 -7
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/terminal_branch_stats.py +26 -2
- phykit-2.1.8/phykit/services/tree/tip_labels.py +26 -0
- phykit-2.1.8/phykit/services/tree/tip_to_tip_distance.py +164 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/tip_to_tip_node_distance.py +20 -2
- phykit-2.1.8/phykit/services/tree/total_tree_length.py +32 -0
- phykit-2.1.8/phykit/services/tree/treeness.py +23 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/tree/treeness_over_rcv.py +18 -15
- phykit-2.1.8/phykit/version.py +1 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit.egg-info/PKG-INFO +4 -5
- {phykit-2.1.4 → phykit-2.1.8}/phykit.egg-info/SOURCES.txt +8 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit.egg-info/entry_points.txt +12 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit.egg-info/requires.txt +0 -1
- {phykit-2.1.4 → phykit-2.1.8}/setup.py +13 -3
- phykit-2.1.4/phykit/services/alignment/alignment_entropy.py +0 -52
- phykit-2.1.4/phykit/services/alignment/alignment_length.py +0 -16
- phykit-2.1.4/phykit/services/alignment/compositional_bias_per_site.py +0 -98
- phykit-2.1.4/phykit/services/alignment/faidx.py +0 -21
- phykit-2.1.4/phykit/services/alignment/gc_content.py +0 -94
- phykit-2.1.4/phykit/services/alignment/rcv.py +0 -14
- phykit-2.1.4/phykit/services/alignment/rcvt.py +0 -65
- phykit-2.1.4/phykit/services/tree/bipartition_support_stats.py +0 -48
- phykit-2.1.4/phykit/services/tree/collapse_branches.py +0 -27
- phykit-2.1.4/phykit/services/tree/evolutionary_rate.py +0 -17
- phykit-2.1.4/phykit/services/tree/print_tree.py +0 -28
- phykit-2.1.4/phykit/services/tree/tip_labels.py +0 -18
- phykit-2.1.4/phykit/services/tree/tip_to_tip_distance.py +0 -41
- phykit-2.1.4/phykit/services/tree/total_tree_length.py +0 -25
- phykit-2.1.4/phykit/services/tree/treeness.py +0 -16
- phykit-2.1.4/phykit/version.py +0 -1
- {phykit-2.1.4 → phykit-2.1.8}/LICENSE.md +0 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/__init__.py +0 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/helpers/__init__.py +0 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/helpers/boolean_argument_parsing.py +0 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/helpers/stats_summary.py +0 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/__init__.py +0 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit/services/alignment/__init__.py +0 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit.egg-info/dependency_links.txt +0 -0
- {phykit-2.1.4 → phykit-2.1.8}/phykit.egg-info/top_level.txt +0 -0
- {phykit-2.1.4 → phykit-2.1.8}/setup.cfg +0 -0
|
@@ -1,18 +1,17 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: phykit
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.8
|
|
4
4
|
Home-page: https://github.com/jlsteenwyk/phykit
|
|
5
5
|
Author: Jacob L. Steenwyk
|
|
6
6
|
Author-email: jlsteenwyk@gmail.com
|
|
7
7
|
Classifier: Operating System :: OS Independent
|
|
8
8
|
Classifier: Intended Audience :: Science/Research
|
|
9
9
|
Classifier: Programming Language :: Python
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
12
10
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
14
12
|
Classifier: Programming Language :: Python :: 3.13
|
|
15
13
|
Classifier: Topic :: Scientific/Engineering
|
|
14
|
+
Requires-Python: >=3.11
|
|
16
15
|
Description-Content-Type: text/markdown
|
|
17
16
|
License-File: LICENSE.md
|
|
18
17
|
|
|
@@ -34,8 +33,8 @@ License-File: LICENSE.md
|
|
|
34
33
|
<a href="https://codecov.io/gh/jlsteenwyk/phykit" alt="Coverage">
|
|
35
34
|
<img src="https://codecov.io/gh/jlsteenwyk/phykit/branch/master/graph/badge.svg?token=0J49I6441V">
|
|
36
35
|
</a>
|
|
37
|
-
<a href="https://github.com/
|
|
38
|
-
<img src="https://img.shields.io/github/contributors/
|
|
36
|
+
<a href="https://github.com/JLSteenwyk/PhyKIT/graphs/contributors" alt="Contributors">
|
|
37
|
+
<img src="https://img.shields.io/github/contributors/JLSteenwyk/PhyKIT">
|
|
39
38
|
</a>
|
|
40
39
|
<a href="https://bsky.app/profile/jlsteenwyk.bsky.social" target="_blank" rel="noopener noreferrer">
|
|
41
40
|
<img src="https://img.shields.io/badge/Bluesky-0285FF?logo=bluesky&logoColor=fff">
|
|
@@ -16,8 +16,8 @@
|
|
|
16
16
|
<a href="https://codecov.io/gh/jlsteenwyk/phykit" alt="Coverage">
|
|
17
17
|
<img src="https://codecov.io/gh/jlsteenwyk/phykit/branch/master/graph/badge.svg?token=0J49I6441V">
|
|
18
18
|
</a>
|
|
19
|
-
<a href="https://github.com/
|
|
20
|
-
<img src="https://img.shields.io/github/contributors/
|
|
19
|
+
<a href="https://github.com/JLSteenwyk/PhyKIT/graphs/contributors" alt="Contributors">
|
|
20
|
+
<img src="https://img.shields.io/github/contributors/JLSteenwyk/PhyKIT">
|
|
21
21
|
</a>
|
|
22
22
|
<a href="https://bsky.app/profile/jlsteenwyk.bsky.social" target="_blank" rel="noopener noreferrer">
|
|
23
23
|
<img src="https://img.shields.io/badge/Bluesky-0285FF?logo=bluesky&logoColor=fff">
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Declarative CLI alias registry."""
|
|
2
|
+
|
|
3
|
+
from typing import Dict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# Map user-facing aliases (and a few long-form synonyms) to Phykit handler names.
|
|
7
|
+
ALIAS_TO_HANDLER: Dict[str, str] = {
|
|
8
|
+
# version
|
|
9
|
+
"version": "version",
|
|
10
|
+
"v": "version",
|
|
11
|
+
# Alignment aliases
|
|
12
|
+
"aln_len": "alignment_length",
|
|
13
|
+
"al": "alignment_length",
|
|
14
|
+
"aln_len_no_gaps": "alignment_length_no_gaps",
|
|
15
|
+
"alng": "alignment_length_no_gaps",
|
|
16
|
+
"aln_entropy": "alignment_entropy",
|
|
17
|
+
"entropy": "alignment_entropy",
|
|
18
|
+
"aln_recoding": "alignment_recoding",
|
|
19
|
+
"recode": "alignment_recoding",
|
|
20
|
+
"outlier_taxa": "alignment_outlier_taxa",
|
|
21
|
+
"aot": "alignment_outlier_taxa",
|
|
22
|
+
"cs": "column_score",
|
|
23
|
+
"comp_bias_per_site": "compositional_bias_per_site",
|
|
24
|
+
"cbps": "compositional_bias_per_site",
|
|
25
|
+
"evo_rate_per_site": "evolutionary_rate_per_site",
|
|
26
|
+
"erps": "evolutionary_rate_per_site",
|
|
27
|
+
"get_entry": "faidx",
|
|
28
|
+
"ge": "faidx",
|
|
29
|
+
"gc": "gc_content",
|
|
30
|
+
"mask_aln": "mask_alignment",
|
|
31
|
+
"mask": "mask_alignment",
|
|
32
|
+
"plot_qc": "plot_alignment_qc",
|
|
33
|
+
"paqc": "plot_alignment_qc",
|
|
34
|
+
"occupancy_taxon": "occupancy_per_taxon",
|
|
35
|
+
"occ_tax": "occupancy_per_taxon",
|
|
36
|
+
"pairwise_id": "pairwise_identity",
|
|
37
|
+
"pi": "pairwise_identity",
|
|
38
|
+
"comp_taxon": "composition_per_taxon",
|
|
39
|
+
"comp_tax": "composition_per_taxon",
|
|
40
|
+
"pis": "parsimony_informative_sites",
|
|
41
|
+
"rel_comp_var": "rcv",
|
|
42
|
+
"relative_composition_variability": "rcv",
|
|
43
|
+
"relative_composition_variability_taxon": "rcvt",
|
|
44
|
+
"rel_comp_var_taxon": "rcvt",
|
|
45
|
+
"rename_fasta": "rename_fasta_entries",
|
|
46
|
+
"sum_of_pairs_score": "sum_of_pairs_score",
|
|
47
|
+
"sops": "sum_of_pairs_score",
|
|
48
|
+
"sop": "sum_of_pairs_score",
|
|
49
|
+
"vs": "variable_sites",
|
|
50
|
+
# Tree aliases
|
|
51
|
+
"bss": "bipartition_support_stats",
|
|
52
|
+
"blm": "branch_length_multiplier",
|
|
53
|
+
"collapse": "collapse_branches",
|
|
54
|
+
"cb": "collapse_branches",
|
|
55
|
+
"cover": "covarying_evolutionary_rates",
|
|
56
|
+
"consensus": "consensus_tree",
|
|
57
|
+
"ctree": "consensus_tree",
|
|
58
|
+
"degree_of_violation_of_a_molecular_clock": "dvmc",
|
|
59
|
+
"evo_rate": "evolutionary_rate",
|
|
60
|
+
"clan_check": "hidden_paralogy_check",
|
|
61
|
+
"ibs": "internal_branch_stats",
|
|
62
|
+
"il": "internode_labeler",
|
|
63
|
+
"lca_subtree": "last_common_ancestor_subtree",
|
|
64
|
+
"long_branch_score": "lb_score",
|
|
65
|
+
"lbs": "lb_score",
|
|
66
|
+
"is_monophyletic": "monophyly_check",
|
|
67
|
+
"nni": "nearest_neighbor_interchange",
|
|
68
|
+
"pd": "patristic_distances",
|
|
69
|
+
"phylo_signal": "phylogenetic_signal",
|
|
70
|
+
"ps": "phylogenetic_signal",
|
|
71
|
+
"polyt_test": "polytomy_test",
|
|
72
|
+
"ptt": "polytomy_test",
|
|
73
|
+
"polyt": "polytomy_test",
|
|
74
|
+
"print": "print_tree",
|
|
75
|
+
"pt": "print_tree",
|
|
76
|
+
"prune": "prune_tree",
|
|
77
|
+
"rename_tree": "rename_tree_tips",
|
|
78
|
+
"rename_tips": "rename_tree_tips",
|
|
79
|
+
"robinson_foulds_distance": "rf_distance",
|
|
80
|
+
"rf_dist": "rf_distance",
|
|
81
|
+
"rf": "rf_distance",
|
|
82
|
+
"root": "root_tree",
|
|
83
|
+
"rt": "root_tree",
|
|
84
|
+
"spurious_seq": "spurious_sequence",
|
|
85
|
+
"ss": "spurious_sequence",
|
|
86
|
+
"tbs": "terminal_branch_stats",
|
|
87
|
+
"labels": "tip_labels",
|
|
88
|
+
"tree_labels": "tip_labels",
|
|
89
|
+
"tl": "tip_labels",
|
|
90
|
+
"t2t_dist": "tip_to_tip_distance",
|
|
91
|
+
"t2t": "tip_to_tip_distance",
|
|
92
|
+
"t2t_node_dist": "tip_to_tip_node_distance",
|
|
93
|
+
"t2t_nd": "tip_to_tip_node_distance",
|
|
94
|
+
"tree_len": "total_tree_length",
|
|
95
|
+
"tness": "treeness",
|
|
96
|
+
# Alignment- and tree-based aliases
|
|
97
|
+
"sat": "saturation",
|
|
98
|
+
"toverr": "treeness_over_rcv",
|
|
99
|
+
"tor": "treeness_over_rcv",
|
|
100
|
+
# Helper aliases
|
|
101
|
+
"create_concat": "create_concatenation_matrix",
|
|
102
|
+
"cc": "create_concatenation_matrix",
|
|
103
|
+
"pal2nal": "thread_dna",
|
|
104
|
+
"p2n": "thread_dna",
|
|
105
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Shared PhyKIT exception types."""
|
|
2
|
+
|
|
3
|
+
from typing import Iterable, List
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class PhykitUserError(SystemExit):
|
|
7
|
+
"""User-facing error that should be rendered by the CLI with a specific exit code."""
|
|
8
|
+
|
|
9
|
+
def __init__(self, messages: Iterable[str], code: int = 2):
|
|
10
|
+
self.messages: List[str] = list(messages)
|
|
11
|
+
super().__init__(code)
|
|
12
|
+
|
|
@@ -57,7 +57,7 @@ class ResultCache:
|
|
|
57
57
|
try:
|
|
58
58
|
with open(cache_file, 'rb') as f:
|
|
59
59
|
return pickle.load(f)
|
|
60
|
-
except:
|
|
60
|
+
except Exception:
|
|
61
61
|
# Cache corrupted, remove it
|
|
62
62
|
os.remove(cache_file)
|
|
63
63
|
|
|
@@ -70,7 +70,7 @@ class ResultCache:
|
|
|
70
70
|
try:
|
|
71
71
|
with open(cache_file, 'wb') as f:
|
|
72
72
|
pickle.dump(value, f)
|
|
73
|
-
except:
|
|
73
|
+
except Exception:
|
|
74
74
|
# Caching failed, continue without caching
|
|
75
75
|
pass
|
|
76
76
|
|
|
@@ -162,6 +162,8 @@ class AlignmentCache:
|
|
|
162
162
|
Cache alignment column.
|
|
163
163
|
"""
|
|
164
164
|
self._column_cache[f"{alignment_hash}_{column_idx}"] = column
|
|
165
|
+
# Prevent stale values from the lru_cache layer.
|
|
166
|
+
self.get_column.cache_clear()
|
|
165
167
|
|
|
166
168
|
@lru_cache(maxsize=128)
|
|
167
169
|
def get_stats(self, alignment_hash: str, stat_type: str) -> Any:
|
|
@@ -175,6 +177,8 @@ class AlignmentCache:
|
|
|
175
177
|
Cache alignment statistics.
|
|
176
178
|
"""
|
|
177
179
|
self._stats_cache[f"{alignment_hash}_{stat_type}"] = stats
|
|
180
|
+
# Prevent stale values from the lru_cache layer.
|
|
181
|
+
self.get_stats.cache_clear()
|
|
178
182
|
|
|
179
183
|
def clear(self) -> None:
|
|
180
184
|
"""
|
|
@@ -198,4 +202,4 @@ def get_result_cache() -> ResultCache:
|
|
|
198
202
|
|
|
199
203
|
def get_alignment_cache() -> AlignmentCache:
|
|
200
204
|
"""Get global alignment cache instance."""
|
|
201
|
-
return _alignment_cache
|
|
205
|
+
return _alignment_cache
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
-
import sys
|
|
3
2
|
from typing import Tuple, Optional
|
|
4
3
|
from functools import lru_cache
|
|
5
4
|
import hashlib
|
|
@@ -7,6 +6,7 @@ import os
|
|
|
7
6
|
|
|
8
7
|
from Bio import AlignIO
|
|
9
8
|
from Bio.Align import MultipleSeqAlignment
|
|
9
|
+
from ..errors import PhykitUserError
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class FileFormat(Enum):
|
|
@@ -30,7 +30,7 @@ def _get_file_hash(file_path: str) -> str:
|
|
|
30
30
|
|
|
31
31
|
def _detect_format_by_content(file_path: str) -> Optional[str]:
|
|
32
32
|
"""Attempt to detect file format by examining file content."""
|
|
33
|
-
with open(file_path
|
|
33
|
+
with open(file_path) as f:
|
|
34
34
|
first_line = f.readline().strip()
|
|
35
35
|
|
|
36
36
|
# Quick format detection based on first line
|
|
@@ -60,9 +60,13 @@ def get_alignment_and_format(
|
|
|
60
60
|
) -> Tuple[MultipleSeqAlignment, str, bool]:
|
|
61
61
|
# Check if file exists first
|
|
62
62
|
if not os.path.exists(alignment_file_path):
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
63
|
+
raise PhykitUserError(
|
|
64
|
+
[
|
|
65
|
+
f"{alignment_file_path} corresponds to no such file.",
|
|
66
|
+
"Please check file name and pathing",
|
|
67
|
+
],
|
|
68
|
+
code=2,
|
|
69
|
+
)
|
|
66
70
|
|
|
67
71
|
# Try to detect format by content first
|
|
68
72
|
detected_format = _detect_format_by_content(alignment_file_path)
|
|
@@ -95,9 +99,13 @@ def get_alignment_and_format(
|
|
|
95
99
|
continue
|
|
96
100
|
|
|
97
101
|
# If we get here, no format worked
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
102
|
+
raise PhykitUserError(
|
|
103
|
+
[
|
|
104
|
+
f"Could not determine format for {alignment_file_path}",
|
|
105
|
+
"Please ensure the file is in a supported format",
|
|
106
|
+
],
|
|
107
|
+
code=2,
|
|
108
|
+
)
|
|
101
109
|
|
|
102
110
|
|
|
103
111
|
def is_protein_alignment(alignment: MultipleSeqAlignment) -> bool:
|
|
@@ -120,6 +128,10 @@ def read_single_column_file_to_list(single_col_file_path: str) -> list:
|
|
|
120
128
|
with open(single_col_file_path) as f:
|
|
121
129
|
return [line.rstrip("\n").strip() for line in f]
|
|
122
130
|
except FileNotFoundError:
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
131
|
+
raise PhykitUserError(
|
|
132
|
+
[
|
|
133
|
+
f"{single_col_file_path} corresponds to no such file or directory.",
|
|
134
|
+
"Please check file name and pathing",
|
|
135
|
+
],
|
|
136
|
+
code=2,
|
|
137
|
+
)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def to_builtin_json_types(value):
|
|
7
|
+
if isinstance(value, dict):
|
|
8
|
+
return {key: to_builtin_json_types(sub_value) for key, sub_value in value.items()}
|
|
9
|
+
if isinstance(value, list):
|
|
10
|
+
return [to_builtin_json_types(sub_value) for sub_value in value]
|
|
11
|
+
if isinstance(value, tuple):
|
|
12
|
+
return [to_builtin_json_types(sub_value) for sub_value in value]
|
|
13
|
+
if isinstance(value, np.integer):
|
|
14
|
+
return int(value)
|
|
15
|
+
if isinstance(value, np.floating):
|
|
16
|
+
return float(value)
|
|
17
|
+
if isinstance(value, np.ndarray):
|
|
18
|
+
return [to_builtin_json_types(sub_value) for sub_value in value.tolist()]
|
|
19
|
+
return value
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def print_json(payload, sort_keys=True):
|
|
23
|
+
try:
|
|
24
|
+
print(json.dumps(to_builtin_json_types(payload), sort_keys=sort_keys))
|
|
25
|
+
except BrokenPipeError:
|
|
26
|
+
pass
|
|
@@ -3,7 +3,6 @@ Parallel processing utilities for batch operations
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import multiprocessing as mp
|
|
6
|
-
from functools import partial
|
|
7
6
|
from typing import List, Any, Callable, Optional, Tuple
|
|
8
7
|
import numpy as np
|
|
9
8
|
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
|
|
@@ -302,4 +301,4 @@ class NumpyParallel:
|
|
|
302
301
|
if symmetric:
|
|
303
302
|
result_matrix[j, i] = value
|
|
304
303
|
|
|
305
|
-
return result_matrix
|
|
304
|
+
return result_matrix
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
Streaming utilities for memory-efficient processing of large files
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from typing import Iterator,
|
|
5
|
+
from typing import Iterator, Optional
|
|
6
6
|
import mmap
|
|
7
7
|
import os
|
|
8
8
|
from Bio import SeqIO
|
|
@@ -31,9 +31,8 @@ class StreamingFastaReader:
|
|
|
31
31
|
"""
|
|
32
32
|
Stream sequences one at a time.
|
|
33
33
|
"""
|
|
34
|
-
with open(self.file_path
|
|
35
|
-
|
|
36
|
-
yield record
|
|
34
|
+
with open(self.file_path) as handle:
|
|
35
|
+
yield from SeqIO.parse(handle, "fasta")
|
|
37
36
|
|
|
38
37
|
def stream_chunks(self) -> Iterator[list]:
|
|
39
38
|
"""
|
|
@@ -149,4 +148,4 @@ class MemoryEfficientAlignmentProcessor:
|
|
|
149
148
|
batch_result = processing_func(batch)
|
|
150
149
|
results.append(batch_result)
|
|
151
150
|
|
|
152
|
-
return results
|
|
151
|
+
return results
|