dayhoff-tools 1.2.5__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dayhoff_tools/__init__.py +0 -0
- dayhoff_tools/chemistry/standardizer.py +0 -0
- dayhoff_tools/chemistry/utils.py +0 -0
- dayhoff_tools/cli/__init__.py +0 -0
- dayhoff_tools/cli/cloud_commands.py +0 -0
- dayhoff_tools/cli/main.py +0 -0
- dayhoff_tools/cli/swarm_commands.py +0 -0
- dayhoff_tools/cli/utility_commands.py +0 -0
- dayhoff_tools/deployment/base.py +0 -0
- dayhoff_tools/deployment/deploy_aws.py +0 -0
- dayhoff_tools/deployment/deploy_gcp.py +0 -0
- dayhoff_tools/deployment/deploy_utils.py +0 -0
- dayhoff_tools/deployment/job_runner.py +0 -0
- dayhoff_tools/deployment/processors.py +0 -0
- dayhoff_tools/deployment/swarm.py +0 -0
- dayhoff_tools/embedders.py +1 -1
- dayhoff_tools/fasta.py +13 -5
- dayhoff_tools/file_ops.py +0 -0
- dayhoff_tools/h5.py +0 -0
- dayhoff_tools/intake/gcp.py +0 -0
- dayhoff_tools/intake/gtdb.py +0 -0
- dayhoff_tools/intake/kegg.py +0 -0
- dayhoff_tools/intake/mmseqs.py +0 -0
- dayhoff_tools/intake/structure.py +0 -0
- dayhoff_tools/intake/uniprot.py +0 -0
- dayhoff_tools/logs.py +0 -0
- dayhoff_tools/sqlite.py +0 -0
- dayhoff_tools/warehouse.py +0 -0
- {dayhoff_tools-1.2.5.dist-info → dayhoff_tools-1.3.1.dist-info}/METADATA +6 -4
- {dayhoff_tools-1.2.5.dist-info → dayhoff_tools-1.3.1.dist-info}/RECORD +6 -6
- {dayhoff_tools-1.2.5.dist-info → dayhoff_tools-1.3.1.dist-info}/WHEEL +0 -0
- {dayhoff_tools-1.2.5.dist-info → dayhoff_tools-1.3.1.dist-info}/entry_points.txt +0 -0
dayhoff_tools/__init__.py
CHANGED
File without changes
|
File without changes
|
dayhoff_tools/chemistry/utils.py
CHANGED
File without changes
|
dayhoff_tools/cli/__init__.py
CHANGED
File without changes
|
File without changes
|
dayhoff_tools/cli/main.py
CHANGED
File without changes
|
File without changes
|
File without changes
|
dayhoff_tools/deployment/base.py
CHANGED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
dayhoff_tools/embedders.py
CHANGED
@@ -683,7 +683,7 @@ class Embedder(Processor):
|
|
683
683
|
sequence_ids, sequences, sequence_lengths = zip(*batch)
|
684
684
|
|
685
685
|
# Prepare sequences for tokenization
|
686
|
-
tokenizer_input = self.prepare_tokenizer_input(sequences)
|
686
|
+
tokenizer_input = self.prepare_tokenizer_input(list(sequences))
|
687
687
|
|
688
688
|
# Tokenize sequences
|
689
689
|
encoded_input = self.tokenizer.batch_encode_plus(
|
dayhoff_tools/fasta.py
CHANGED
@@ -25,7 +25,7 @@ def _clean_noncanonical_fasta(
|
|
25
25
|
) -> Optional[dict[str, str]]:
|
26
26
|
"""
|
27
27
|
Read in a FASTA file containing multiple sequences, replace non-canonical amino acids,
|
28
|
-
remove empty sequences, and either write the sequences to a new FASTA file or return them as a dictionary.
|
28
|
+
remove stop codons, remove empty sequences, and either write the sequences to a new FASTA file or return them as a dictionary.
|
29
29
|
|
30
30
|
Args:
|
31
31
|
input_path (str): Path to the input FASTA file.
|
@@ -48,7 +48,11 @@ def _clean_noncanonical_fasta(
|
|
48
48
|
for line in fasta_file:
|
49
49
|
if line.startswith(">"):
|
50
50
|
if seq_id and seq_lines:
|
51
|
-
seq =
|
51
|
+
seq = (
|
52
|
+
"".join(seq_lines)
|
53
|
+
.translate(str.maketrans("OJUZB", "XLCED"))
|
54
|
+
.replace("*", "")
|
55
|
+
)
|
52
56
|
if seq.strip(): # Only process non-empty sequences
|
53
57
|
sequences[seq_id] = seq
|
54
58
|
if output_path:
|
@@ -61,7 +65,11 @@ def _clean_noncanonical_fasta(
|
|
61
65
|
|
62
66
|
# Process the last sequence
|
63
67
|
if seq_id and seq_lines:
|
64
|
-
seq =
|
68
|
+
seq = (
|
69
|
+
"".join(seq_lines)
|
70
|
+
.translate(str.maketrans("OJUZB", "XLCED"))
|
71
|
+
.replace("*", "")
|
72
|
+
)
|
65
73
|
if seq.strip(): # Only process non-empty sequences
|
66
74
|
sequences[seq_id] = seq
|
67
75
|
if output_path:
|
@@ -92,7 +100,7 @@ def clean_noncanonical_fasta(
|
|
92
100
|
):
|
93
101
|
"""
|
94
102
|
Read in a FASTA file containing multiple sequences and write the sequences to a new FASTA file.
|
95
|
-
Replace non-canonical amino acids along the way.
|
103
|
+
Replace non-canonical amino acids and remove stop codons along the way.
|
96
104
|
|
97
105
|
Args:
|
98
106
|
input_path (str): Path to the input FASTA file.
|
@@ -112,7 +120,7 @@ def clean_noncanonical_fasta_to_dict(
|
|
112
120
|
) -> dict[str, str]:
|
113
121
|
"""
|
114
122
|
Read in a FASTA file containing multiple sequences and return the sequences as a dictionary.
|
115
|
-
Replace non-canonical amino acids along the way.
|
123
|
+
Replace non-canonical amino acids and remove stop codons along the way.
|
116
124
|
|
117
125
|
Args:
|
118
126
|
input_path (str): Path to the input FASTA file.
|
dayhoff_tools/file_ops.py
CHANGED
File without changes
|
dayhoff_tools/h5.py
CHANGED
File without changes
|
dayhoff_tools/intake/gcp.py
CHANGED
File without changes
|
dayhoff_tools/intake/gtdb.py
CHANGED
File without changes
|
dayhoff_tools/intake/kegg.py
CHANGED
File without changes
|
dayhoff_tools/intake/mmseqs.py
CHANGED
File without changes
|
File without changes
|
dayhoff_tools/intake/uniprot.py
CHANGED
File without changes
|
dayhoff_tools/logs.py
CHANGED
File without changes
|
dayhoff_tools/sqlite.py
CHANGED
File without changes
|
dayhoff_tools/warehouse.py
CHANGED
File without changes
|
@@ -1,11 +1,13 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: dayhoff-tools
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.3.1
|
4
4
|
Summary: Common tools for all the repos at Dayhoff Labs
|
5
5
|
Author: Daniel Martin-Alarcon
|
6
6
|
Author-email: dma@dayhofflabs.com
|
7
|
-
Requires-Python: >=3.
|
7
|
+
Requires-Python: >=3.10,<4.0
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
9
11
|
Classifier: Programming Language :: Python :: 3.12
|
10
12
|
Classifier: Programming Language :: Python :: 3.13
|
11
13
|
Provides-Extra: embedders
|
@@ -24,13 +26,13 @@ Requires-Dist: pandas (>=2.2.3) ; extra == "embedders"
|
|
24
26
|
Requires-Dist: pandas (>=2.2.3) ; extra == "full"
|
25
27
|
Requires-Dist: pyyaml (>=6.0)
|
26
28
|
Requires-Dist: questionary (>=2.0.1)
|
27
|
-
Requires-Dist: rdkit (>=
|
29
|
+
Requires-Dist: rdkit-pypi (>=2022.9.5) ; extra == "full"
|
28
30
|
Requires-Dist: requests (>=2.31.0)
|
29
31
|
Requires-Dist: sentencepiece (>=0.2.0) ; extra == "embedders"
|
30
32
|
Requires-Dist: sentencepiece (>=0.2.0) ; extra == "full"
|
31
33
|
Requires-Dist: sqlalchemy (>=2.0.40,<3.0.0) ; extra == "full"
|
32
34
|
Requires-Dist: toml (>=0.10)
|
33
|
-
Requires-Dist: torch (>=2.
|
35
|
+
Requires-Dist: torch (>=2.4.0) ; extra == "embedders"
|
34
36
|
Requires-Dist: tqdm (>=4.67.1) ; extra == "embedders"
|
35
37
|
Requires-Dist: tqdm (>=4.67.1) ; extra == "full"
|
36
38
|
Requires-Dist: transformers (==4.36.2) ; extra == "full"
|
@@ -13,8 +13,8 @@ dayhoff_tools/deployment/deploy_utils.py,sha256=StFwbqnr2_FWiKVg3xnJF4kagTHzndqq
|
|
13
13
|
dayhoff_tools/deployment/job_runner.py,sha256=hljvFpH2Bw96uYyUup5Ths72PZRL_X27KxlYzBMgguo,5086
|
14
14
|
dayhoff_tools/deployment/processors.py,sha256=LM0CQbr4XCb3AtLbrcuDQm4tYPXsoNqgVJ4WQYDjzJc,12406
|
15
15
|
dayhoff_tools/deployment/swarm.py,sha256=YJfvVOcAS8cYcIj2fiN4qwC2leh0I9w5A4px8ZWSF6g,22833
|
16
|
-
dayhoff_tools/embedders.py,sha256=
|
17
|
-
dayhoff_tools/fasta.py,sha256=
|
16
|
+
dayhoff_tools/embedders.py,sha256=1THnmio4FYkBswy_xkIiwT-ZOEMn6ZLbTAa-Uz0-kyE,36615
|
17
|
+
dayhoff_tools/fasta.py,sha256=USdemH4c_dNhWXOTAhldvlDi8eLHogsy0YSrOnODB5I,50773
|
18
18
|
dayhoff_tools/file_ops.py,sha256=JlGowvr-CUJFidV-4g_JmhUTN9bsYuaxtqKmnKomm-Q,8506
|
19
19
|
dayhoff_tools/h5.py,sha256=j1nxxaiHsMidVX_XwB33P1Pz9d7K8ZKiDZwJWQUUQSY,21158
|
20
20
|
dayhoff_tools/intake/gcp.py,sha256=uCeEskhbEwJIYpN6ne6siT1dbpTizCjjel-hRe0kReE,3030
|
@@ -26,7 +26,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
|
|
26
26
|
dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
|
27
27
|
dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
|
28
28
|
dayhoff_tools/warehouse.py,sha256=8YbnQ--usrEgDQGfvpV4MrMji55A0rq2hZaOgFGh6ag,15896
|
29
|
-
dayhoff_tools-1.
|
30
|
-
dayhoff_tools-1.
|
31
|
-
dayhoff_tools-1.
|
32
|
-
dayhoff_tools-1.
|
29
|
+
dayhoff_tools-1.3.1.dist-info/METADATA,sha256=AyP_2vo_5tVylBVzP-EMkI3tPPxJIV-VpBdQxRQFIZU,2842
|
30
|
+
dayhoff_tools-1.3.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
31
|
+
dayhoff_tools-1.3.1.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
|
32
|
+
dayhoff_tools-1.3.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|