dayhoff-tools 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dayhoff_tools/__init__.py +0 -0
- dayhoff_tools/chemistry/standardizer.py +297 -0
- dayhoff_tools/chemistry/utils.py +63 -0
- dayhoff_tools/cli/__init__.py +0 -0
- dayhoff_tools/cli/main.py +90 -0
- dayhoff_tools/cli/swarm_commands.py +156 -0
- dayhoff_tools/cli/utility_commands.py +244 -0
- dayhoff_tools/deployment/base.py +434 -0
- dayhoff_tools/deployment/deploy_aws.py +458 -0
- dayhoff_tools/deployment/deploy_gcp.py +176 -0
- dayhoff_tools/deployment/deploy_utils.py +781 -0
- dayhoff_tools/deployment/job_runner.py +153 -0
- dayhoff_tools/deployment/processors.py +125 -0
- dayhoff_tools/deployment/swarm.py +591 -0
- dayhoff_tools/embedders.py +893 -0
- dayhoff_tools/fasta.py +1082 -0
- dayhoff_tools/file_ops.py +261 -0
- dayhoff_tools/gcp.py +85 -0
- dayhoff_tools/h5.py +542 -0
- dayhoff_tools/kegg.py +37 -0
- dayhoff_tools/logs.py +27 -0
- dayhoff_tools/mmseqs.py +164 -0
- dayhoff_tools/sqlite.py +516 -0
- dayhoff_tools/structure.py +751 -0
- dayhoff_tools/uniprot.py +434 -0
- dayhoff_tools/warehouse.py +418 -0
- dayhoff_tools-1.0.0.dist-info/METADATA +122 -0
- dayhoff_tools-1.0.0.dist-info/RECORD +30 -0
- dayhoff_tools-1.0.0.dist-info/WHEEL +4 -0
- dayhoff_tools-1.0.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,418 @@
|
|
1
|
+
import os
|
2
|
+
import subprocess
|
3
|
+
from datetime import datetime
|
4
|
+
from io import StringIO
|
5
|
+
from pathlib import Path
|
6
|
+
from zoneinfo import ZoneInfo
|
7
|
+
|
8
|
+
from ruamel.yaml import YAML
|
9
|
+
|
10
|
+
|
11
|
+
def human_readable_size(size_bytes):
|
12
|
+
"""Convert size in bytes to a human-readable format"""
|
13
|
+
for unit in ["B", "KB", "MB", "GB", "TB"]:
|
14
|
+
if size_bytes < 1024.0:
|
15
|
+
return f"{size_bytes:.2f} {unit}"
|
16
|
+
size_bytes /= 1024.0
|
17
|
+
return f"{size_bytes:.2f} PB"
|
18
|
+
|
19
|
+
|
20
|
+
def format_yaml_with_meta_spacing(yaml_str: str) -> str:
|
21
|
+
"""
|
22
|
+
Format YAML content with blank lines between top-level sections and meta subsections.
|
23
|
+
Avoids adding duplicate blank lines.
|
24
|
+
"""
|
25
|
+
lines = yaml_str.split("\n")
|
26
|
+
formatted_lines = []
|
27
|
+
in_meta = False
|
28
|
+
meta_depth = 0
|
29
|
+
last_line_blank = True # Start true to avoid adding blank line at the beginning
|
30
|
+
|
31
|
+
for i, line in enumerate(lines):
|
32
|
+
stripped_line = line.strip()
|
33
|
+
if stripped_line == "meta:":
|
34
|
+
in_meta = True
|
35
|
+
meta_depth = 0
|
36
|
+
if not last_line_blank:
|
37
|
+
formatted_lines.append("") # Add a blank line before 'meta:' if needed
|
38
|
+
formatted_lines.append(line)
|
39
|
+
if (
|
40
|
+
i + 1 < len(lines) and lines[i + 1].strip()
|
41
|
+
): # Check if next line is not blank
|
42
|
+
formatted_lines.append(
|
43
|
+
""
|
44
|
+
) # Add a blank line after 'meta:' only if needed
|
45
|
+
last_line_blank = True
|
46
|
+
elif in_meta:
|
47
|
+
if stripped_line and not line.startswith(" "):
|
48
|
+
in_meta = False
|
49
|
+
if not last_line_blank:
|
50
|
+
formatted_lines.append(
|
51
|
+
""
|
52
|
+
) # Add a blank line before leaving 'meta' if needed
|
53
|
+
formatted_lines.append(line)
|
54
|
+
last_line_blank = False
|
55
|
+
else:
|
56
|
+
current_depth = len(line) - len(line.lstrip())
|
57
|
+
if current_depth == 2 and meta_depth >= 2 and not last_line_blank:
|
58
|
+
formatted_lines.append(
|
59
|
+
""
|
60
|
+
) # Add a blank line before new top-level category in meta if needed
|
61
|
+
formatted_lines.append(line)
|
62
|
+
meta_depth = current_depth
|
63
|
+
last_line_blank = not stripped_line
|
64
|
+
else:
|
65
|
+
if stripped_line and not line.startswith(" ") and not last_line_blank:
|
66
|
+
formatted_lines.append(
|
67
|
+
""
|
68
|
+
) # Add a blank line before top-level keys if needed
|
69
|
+
formatted_lines.append(line)
|
70
|
+
last_line_blank = not stripped_line
|
71
|
+
|
72
|
+
return "\n".join(formatted_lines).rstrip() + "\n"
|
73
|
+
|
74
|
+
|
75
|
+
def update_dvc_files(directory):
|
76
|
+
"""Traverse directory and update .dvc files with human-readable size, preserving existing formatting"""
|
77
|
+
yaml = YAML()
|
78
|
+
yaml.preserve_quotes = True
|
79
|
+
yaml.indent(mapping=2, sequence=4, offset=2)
|
80
|
+
|
81
|
+
for root, dirs, files in os.walk(directory):
|
82
|
+
for file in files:
|
83
|
+
if file.endswith(".dvc"):
|
84
|
+
file_path = Path(root) / file
|
85
|
+
with open(file_path, "r") as f:
|
86
|
+
dvc_content = yaml.load(f)
|
87
|
+
|
88
|
+
if "outs" in dvc_content and dvc_content["outs"]:
|
89
|
+
size_bytes = dvc_content["outs"][0].get("size", 0)
|
90
|
+
human_size = human_readable_size(size_bytes)
|
91
|
+
|
92
|
+
if "meta" not in dvc_content:
|
93
|
+
dvc_content["meta"] = {}
|
94
|
+
|
95
|
+
# Create a new ordered dict with 'size' as the first item
|
96
|
+
new_meta = {"size": human_size}
|
97
|
+
new_meta.update(dvc_content["meta"])
|
98
|
+
dvc_content["meta"] = new_meta
|
99
|
+
|
100
|
+
# Convert the updated content to a string and format it
|
101
|
+
string_stream = StringIO()
|
102
|
+
yaml.dump(dvc_content, string_stream)
|
103
|
+
formatted_content = format_yaml_with_meta_spacing(
|
104
|
+
string_stream.getvalue()
|
105
|
+
)
|
106
|
+
|
107
|
+
with open(file_path, "w") as f:
|
108
|
+
f.write(formatted_content)
|
109
|
+
|
110
|
+
print(f"Updated {file_path}")
|
111
|
+
|
112
|
+
|
113
|
+
def import_from_warehouse(
|
114
|
+
warehouse_path: str,
|
115
|
+
output_folder: str = "same_as_warehouse",
|
116
|
+
branch: str = "main",
|
117
|
+
logger=None,
|
118
|
+
) -> str:
|
119
|
+
"""Import a file from warehouse, or update if it exists already.
|
120
|
+
|
121
|
+
Args:
|
122
|
+
warehouse_path (str): The relative path to a .dvc file in the
|
123
|
+
warehouse submodule of the current repo.
|
124
|
+
eg, 'warehouse/data/toy/2seqs.fasta.dvc'
|
125
|
+
output_folder (str): A folder where the file will be imported.
|
126
|
+
eg, 'data/raw/'. Defaults to the same folder as the
|
127
|
+
original location in warehouse.
|
128
|
+
branch (str): The branch of warehouse to import from.
|
129
|
+
|
130
|
+
Returns: The path to the imported/updated file.
|
131
|
+
"""
|
132
|
+
assert warehouse_path.startswith(
|
133
|
+
"warehouse"
|
134
|
+
), "expected the relative path to start with 'warehouse'"
|
135
|
+
assert warehouse_path.endswith(
|
136
|
+
".dvc"
|
137
|
+
), "expected the relative path to end with '.dvc'"
|
138
|
+
|
139
|
+
if branch != "main":
|
140
|
+
if logger:
|
141
|
+
logger.warning("You should usually import data from main.")
|
142
|
+
else:
|
143
|
+
print("WARNING: You should usually import data from main.\n")
|
144
|
+
|
145
|
+
# Remove extra slashes
|
146
|
+
if output_folder.endswith("/"):
|
147
|
+
output_folder = output_folder[:-1]
|
148
|
+
|
149
|
+
# The core path is the same within warehouse and in the
|
150
|
+
# local data folder where the file will be imported by default
|
151
|
+
core_path = warehouse_path[len("warehouse/") : -len(".dvc")]
|
152
|
+
filename = core_path.split("/")[-1]
|
153
|
+
|
154
|
+
command = [
|
155
|
+
"dvc",
|
156
|
+
"import",
|
157
|
+
"https://github.com/dayhofflabs/warehouse",
|
158
|
+
core_path,
|
159
|
+
]
|
160
|
+
|
161
|
+
if output_folder == "same_as_warehouse":
|
162
|
+
final_path = core_path
|
163
|
+
final_folder = "/".join(final_path.split("/")[:-1])
|
164
|
+
else:
|
165
|
+
final_folder = output_folder
|
166
|
+
final_path = final_folder + "/" + filename
|
167
|
+
|
168
|
+
os.makedirs(final_folder, exist_ok=True)
|
169
|
+
command += ["--out", final_path, "--rev", branch]
|
170
|
+
|
171
|
+
if os.path.exists(final_path):
|
172
|
+
# Update existing file. This re-writes if it doesn't match origin,
|
173
|
+
# and also updates the .dvc file.
|
174
|
+
if logger:
|
175
|
+
logger.info(
|
176
|
+
"File already exists. Will `dvc update` instead of `dvc import`."
|
177
|
+
)
|
178
|
+
else:
|
179
|
+
print(f"File already exists. Will `dvc update` instead of `dvc import`.")
|
180
|
+
subprocess.run(
|
181
|
+
["dvc", "update", final_path + ".dvc", "--rev", branch], check=True
|
182
|
+
)
|
183
|
+
else:
|
184
|
+
if logger:
|
185
|
+
logger.info(f"Importing from warehouse: {final_path}")
|
186
|
+
else:
|
187
|
+
print(f"Importing from warehouse: {final_path}")
|
188
|
+
subprocess.run(command, check=True)
|
189
|
+
|
190
|
+
# Copy meta section from warehouse_path to final_path.dvc
|
191
|
+
yaml = YAML()
|
192
|
+
yaml.preserve_quotes = True
|
193
|
+
yaml.indent(mapping=2, sequence=4, offset=2)
|
194
|
+
|
195
|
+
# Read the original warehouse .dvc file
|
196
|
+
with open(warehouse_path, "r") as f:
|
197
|
+
warehouse_content = yaml.load(f)
|
198
|
+
|
199
|
+
# Read the newly created/updated .dvc file
|
200
|
+
final_dvc_path = final_path + ".dvc"
|
201
|
+
with open(final_dvc_path, "r") as f:
|
202
|
+
final_dvc_content = yaml.load(f)
|
203
|
+
|
204
|
+
# Copy the meta section if it exists in the warehouse file
|
205
|
+
if "meta" in warehouse_content:
|
206
|
+
final_dvc_content["meta"] = warehouse_content["meta"]
|
207
|
+
|
208
|
+
# Convert the updated content to a string and format it
|
209
|
+
string_stream = StringIO()
|
210
|
+
yaml.dump(final_dvc_content, string_stream)
|
211
|
+
formatted_content = format_yaml_with_meta_spacing(string_stream.getvalue())
|
212
|
+
|
213
|
+
# Write the formatted content back to the file
|
214
|
+
with open(final_dvc_path, "w") as f:
|
215
|
+
f.write(formatted_content)
|
216
|
+
|
217
|
+
if logger:
|
218
|
+
logger.info(f"Updated {final_dvc_path} with meta section from {warehouse_path}")
|
219
|
+
else:
|
220
|
+
print(f"Updated {final_dvc_path} with meta section from {warehouse_path}")
|
221
|
+
|
222
|
+
return final_path
|
223
|
+
|
224
|
+
|
225
|
+
def add_to_warehouse(
|
226
|
+
warehouse_path: str,
|
227
|
+
ancestor_dvc_paths: list[str],
|
228
|
+
) -> str:
|
229
|
+
"""Upload a file to warehouse, using `dvc add`, and edit its .dvc file
|
230
|
+
to add information about ancestors.
|
231
|
+
Args:
|
232
|
+
warehouse_path (str): The relative path (in warehouse) where the new
|
233
|
+
data file should go.
|
234
|
+
ancestor_dvc_paths (list[str]): A list of all the paths to the frozen
|
235
|
+
.dvc files that were produced when importing the
|
236
|
+
ancestors to this file.
|
237
|
+
Returns: The path to the new .dvc file.
|
238
|
+
Raises:
|
239
|
+
ValueError: If the function is executed outside of the repo's root directory.
|
240
|
+
ValueError: If an ancestor .dvc file is not frozen.
|
241
|
+
"""
|
242
|
+
|
243
|
+
print(f"Uploading to Warehouse: {warehouse_path}")
|
244
|
+
assert warehouse_path.startswith(
|
245
|
+
"warehouse/"
|
246
|
+
), "expected the relative path to start with 'warehouse/'"
|
247
|
+
warehouse_path = warehouse_path.replace("warehouse/", "")
|
248
|
+
|
249
|
+
# Process each ancestor .dvc file
|
250
|
+
ancestors = []
|
251
|
+
yaml_loader = YAML()
|
252
|
+
yaml_loader.preserve_quotes = True
|
253
|
+
yaml_loader.indent(mapping=2, sequence=4, offset=2)
|
254
|
+
for path in ancestor_dvc_paths:
|
255
|
+
assert path.endswith(".dvc"), "ERROR: Not a .dvc file"
|
256
|
+
with open(path, "r") as file:
|
257
|
+
ancestor_content = yaml_loader.load(file)
|
258
|
+
|
259
|
+
# Check if the .dvc file is frozen
|
260
|
+
if (
|
261
|
+
"frozen" not in ancestor_content
|
262
|
+
or ancestor_content["frozen"] is not True
|
263
|
+
):
|
264
|
+
raise ValueError(
|
265
|
+
f"Error: Not a frozen .dvc file generated by 'dvc import': {path}"
|
266
|
+
)
|
267
|
+
|
268
|
+
ancestor_info = {
|
269
|
+
"name": os.path.basename(ancestor_content["outs"][0]["path"]),
|
270
|
+
"file_md5_hash": ancestor_content["outs"][0]["md5"],
|
271
|
+
"repo_url": ancestor_content["deps"][0]["repo"]["url"],
|
272
|
+
"repo_path": ancestor_content["deps"][0]["path"],
|
273
|
+
"commit_hash": ancestor_content["deps"][0]["repo"]["rev_lock"],
|
274
|
+
}
|
275
|
+
|
276
|
+
# Add the optional "git_branch" field if available
|
277
|
+
if "rev" in ancestor_content["deps"][0]["repo"]:
|
278
|
+
ancestor_info["git_branch"] = ancestor_content["deps"][0]["repo"]["rev"]
|
279
|
+
|
280
|
+
ancestors.append(ancestor_info)
|
281
|
+
|
282
|
+
# Change the working directory to the warehouse folder
|
283
|
+
os.chdir("warehouse")
|
284
|
+
|
285
|
+
# Add and push the data file
|
286
|
+
subprocess.run(["dvc", "add", warehouse_path], check=True)
|
287
|
+
|
288
|
+
# Read the generated .dvc file
|
289
|
+
dvc_file_path = f"{warehouse_path}.dvc"
|
290
|
+
with open(dvc_file_path, "r") as file:
|
291
|
+
dvc_content = yaml_loader.load(file)
|
292
|
+
|
293
|
+
# Add the ancestors' information
|
294
|
+
dvc_content["ancestors"] = ancestors
|
295
|
+
|
296
|
+
# Get the human-readable size
|
297
|
+
size_bytes = dvc_content["outs"][0]["size"]
|
298
|
+
human_size = human_readable_size(size_bytes)
|
299
|
+
|
300
|
+
# Write this, plus more metadata, back to the .dvc file
|
301
|
+
today = datetime.now(ZoneInfo("UTC")).strftime("%Y-%m-%d")
|
302
|
+
|
303
|
+
# Use ruamel.yaml's ScalarString for block-style literal formatting
|
304
|
+
from ruamel.yaml.scalarstring import LiteralScalarString
|
305
|
+
|
306
|
+
description = LiteralScalarString("MISSING_METADATA\nMISSING_METADATA")
|
307
|
+
|
308
|
+
yaml_content = {
|
309
|
+
"outs": dvc_content["outs"],
|
310
|
+
"meta": {
|
311
|
+
"size": human_size,
|
312
|
+
"date_created": today,
|
313
|
+
"author": "MISSING_METADATA",
|
314
|
+
"description": description,
|
315
|
+
"transformation_source_code": [
|
316
|
+
"MISSING_METADATA",
|
317
|
+
],
|
318
|
+
"ancestors": dvc_content["ancestors"],
|
319
|
+
},
|
320
|
+
}
|
321
|
+
|
322
|
+
# Convert the updated content to a string and format it
|
323
|
+
string_stream = StringIO()
|
324
|
+
yaml_loader.dump(yaml_content, string_stream)
|
325
|
+
formatted_content = format_yaml_with_meta_spacing(string_stream.getvalue())
|
326
|
+
|
327
|
+
# Write the formatted content back to the file
|
328
|
+
with open(dvc_file_path, "w") as file:
|
329
|
+
file.write(formatted_content)
|
330
|
+
|
331
|
+
# Point the user to the updated .dvc file
|
332
|
+
print(f"\033[92m\n\nMade .dvc file: {dvc_file_path}\033[0m")
|
333
|
+
print(
|
334
|
+
f"\033[92mRemember to manually fill out the missing metadata fields.\n\033[0m"
|
335
|
+
)
|
336
|
+
|
337
|
+
subprocess.run(["dvc", "push"], check=True)
|
338
|
+
os.chdir("..")
|
339
|
+
|
340
|
+
return "warehouse/" + dvc_file_path
|
341
|
+
|
342
|
+
|
343
|
+
def get_from_warehouse(
|
344
|
+
warehouse_path: str,
|
345
|
+
output_folder: str = "same_as_warehouse",
|
346
|
+
branch: str = "main",
|
347
|
+
logger=None,
|
348
|
+
) -> str:
|
349
|
+
"""`dvc get` a file from warehouse.
|
350
|
+
|
351
|
+
Args:
|
352
|
+
warehouse_path (str): The relative path to a .dvc file in the
|
353
|
+
warehouse submodule of the current repo.
|
354
|
+
eg, 'warehouse/data/toy/2seqs.fasta.dvc'
|
355
|
+
output_folder (str): A folder where the file will be imported.
|
356
|
+
eg, 'data/raw/'. Defaults to the same folder as the
|
357
|
+
original location in warehouse.
|
358
|
+
branch (str): The branch of warehouse to import from.
|
359
|
+
|
360
|
+
Returns: The path to the imported/updated file.
|
361
|
+
Raises:
|
362
|
+
ValueError: If the function is executed outside of the repo's root directory.
|
363
|
+
"""
|
364
|
+
|
365
|
+
assert warehouse_path.startswith(
|
366
|
+
"warehouse"
|
367
|
+
), "expected the relative path to start with 'warehouse'"
|
368
|
+
assert warehouse_path.endswith(
|
369
|
+
".dvc"
|
370
|
+
), "expected the relative path to end with '.dvc'"
|
371
|
+
|
372
|
+
if branch != "main":
|
373
|
+
if logger:
|
374
|
+
logger.warning("You should usually import data from main.")
|
375
|
+
else:
|
376
|
+
print("WARNING: You should usually import data from main.\n")
|
377
|
+
|
378
|
+
# Remove extra slashes
|
379
|
+
if output_folder.endswith("/"):
|
380
|
+
output_folder = output_folder[:-1]
|
381
|
+
|
382
|
+
# The core path is the same within warehouse and in the
|
383
|
+
# local data folder where the file will be imported by default
|
384
|
+
core_path = warehouse_path[len("warehouse/") : -len(".dvc")]
|
385
|
+
filename = core_path.split("/")[-1]
|
386
|
+
|
387
|
+
command = [
|
388
|
+
"dvc",
|
389
|
+
"get",
|
390
|
+
"https://github.com/dayhofflabs/warehouse",
|
391
|
+
core_path,
|
392
|
+
]
|
393
|
+
|
394
|
+
if output_folder == "same_as_warehouse":
|
395
|
+
final_path = core_path
|
396
|
+
final_folder = "/".join(final_path.split("/")[:-1])
|
397
|
+
else:
|
398
|
+
final_folder = output_folder
|
399
|
+
final_path = final_folder + "/" + filename
|
400
|
+
|
401
|
+
os.makedirs(final_folder, exist_ok=True)
|
402
|
+
command += ["--out", final_path, "--rev", branch]
|
403
|
+
|
404
|
+
if os.path.exists(final_path):
|
405
|
+
# Update existing file. This re-writes if it doesn't match origin,
|
406
|
+
# and also updates the .dvc file.
|
407
|
+
if logger:
|
408
|
+
logger.info("File already exists. Will exit without changing.")
|
409
|
+
else:
|
410
|
+
print(f"File already exists. Will exit without changing.")
|
411
|
+
else:
|
412
|
+
if logger:
|
413
|
+
logger.info(f"Getting from warehouse: {final_path}")
|
414
|
+
else:
|
415
|
+
print(f"Getting from warehouse: {final_path}")
|
416
|
+
subprocess.run(command, check=True)
|
417
|
+
|
418
|
+
return final_path
|
@@ -0,0 +1,122 @@
|
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: dayhoff-tools
|
3
|
+
Version: 1.0.0
|
4
|
+
Summary: Common tools for all the repos at Dayhoff Labs
|
5
|
+
Author: Daniel Martin-Alarcon
|
6
|
+
Author-email: dma@dayhofflabs.com
|
7
|
+
Requires-Python: >=3.10,<4.0
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
13
|
+
Provides-Extra: all
|
14
|
+
Provides-Extra: core
|
15
|
+
Provides-Extra: dev
|
16
|
+
Requires-Dist: biopython (>=1.84) ; extra == "all"
|
17
|
+
Requires-Dist: biopython (>=1.84) ; extra == "core"
|
18
|
+
Requires-Dist: black (>=25.1.0) ; extra == "all"
|
19
|
+
Requires-Dist: black (>=25.1.0) ; extra == "dev"
|
20
|
+
Requires-Dist: boto3 (>=1.36.8) ; extra == "all"
|
21
|
+
Requires-Dist: boto3 (>=1.36.8) ; extra == "core"
|
22
|
+
Requires-Dist: colorlog (>=6.8.2) ; extra == "all"
|
23
|
+
Requires-Dist: colorlog (>=6.8.2) ; extra == "dev"
|
24
|
+
Requires-Dist: docker (>=7.1.0) ; extra == "all"
|
25
|
+
Requires-Dist: docker (>=7.1.0) ; extra == "core"
|
26
|
+
Requires-Dist: dvc (>=3.48.2) ; extra == "all"
|
27
|
+
Requires-Dist: dvc (>=3.48.2) ; extra == "dev"
|
28
|
+
Requires-Dist: dvc-gs (>=3.0.1) ; extra == "all"
|
29
|
+
Requires-Dist: dvc-gs (>=3.0.1) ; extra == "dev"
|
30
|
+
Requires-Dist: fair-esm (>=2.0.0) ; extra == "all"
|
31
|
+
Requires-Dist: fair-esm (>=2.0.0) ; extra == "core"
|
32
|
+
Requires-Dist: firebase-admin (>=6.5.0)
|
33
|
+
Requires-Dist: flake8 (>=7.0.0) ; extra == "all"
|
34
|
+
Requires-Dist: flake8 (>=7.0.0) ; extra == "dev"
|
35
|
+
Requires-Dist: h5py (>=3.11.0) ; extra == "all"
|
36
|
+
Requires-Dist: h5py (>=3.11.0) ; extra == "core"
|
37
|
+
Requires-Dist: isort (>=5.13.2) ; extra == "all"
|
38
|
+
Requires-Dist: isort (>=5.13.2) ; extra == "dev"
|
39
|
+
Requires-Dist: numpy (<2.0.0) ; extra == "all"
|
40
|
+
Requires-Dist: numpy (<2.0.0) ; extra == "dev"
|
41
|
+
Requires-Dist: pandas (>=2.2.3) ; extra == "dev"
|
42
|
+
Requires-Dist: pylance (>=0.10.2) ; extra == "all"
|
43
|
+
Requires-Dist: pylance (>=0.10.2) ; extra == "dev"
|
44
|
+
Requires-Dist: pylint (>=3.1.0) ; extra == "all"
|
45
|
+
Requires-Dist: pylint (>=3.1.0) ; extra == "dev"
|
46
|
+
Requires-Dist: pytest (>=8.0.2) ; extra == "all"
|
47
|
+
Requires-Dist: pytest (>=8.0.2) ; extra == "dev"
|
48
|
+
Requires-Dist: pytest-cov (>=4.1.0) ; extra == "all"
|
49
|
+
Requires-Dist: pytest-cov (>=4.1.0) ; extra == "dev"
|
50
|
+
Requires-Dist: pytest-mock (>=3.12.0) ; extra == "all"
|
51
|
+
Requires-Dist: pytest-mock (>=3.12.0) ; extra == "dev"
|
52
|
+
Requires-Dist: pyyaml (>=6.0)
|
53
|
+
Requires-Dist: questionary (>=2.0.1) ; extra == "all"
|
54
|
+
Requires-Dist: questionary (>=2.0.1) ; extra == "core"
|
55
|
+
Requires-Dist: rdkit-pypi (>=2022.9.5) ; extra == "all"
|
56
|
+
Requires-Dist: rdkit-pypi (>=2022.9.5) ; extra == "dev"
|
57
|
+
Requires-Dist: requests (>=2.31.0)
|
58
|
+
Requires-Dist: torch (>=1.10.0) ; extra == "all"
|
59
|
+
Requires-Dist: torch (>=1.10.0) ; extra == "dev"
|
60
|
+
Requires-Dist: torchvision (>=0.11.0) ; extra == "all"
|
61
|
+
Requires-Dist: torchvision (>=0.11.0) ; extra == "dev"
|
62
|
+
Requires-Dist: transformers (>=4.20.0) ; extra == "all"
|
63
|
+
Requires-Dist: transformers (>=4.36.2) ; extra == "dev"
|
64
|
+
Requires-Dist: typer (>=0.9.0)
|
65
|
+
Description-Content-Type: text/markdown
|
66
|
+
|
67
|
+
# dayhoff-tools
|
68
|
+
|
69
|
+
A set of small, sharp tools for everyone at Dayhoff.
|
70
|
+
|
71
|
+
## Hosting and Auth
|
72
|
+
|
73
|
+
This repo uses Poetry to build and publish a package to GCP Artifact Registry, at `https://us-central1-python.pkg.dev/enzyme-discovery/pypirate/`. This depends on a Poetry plugin that's now in the standard chassis setup (`keyrings.google-artifactregistry-auth`), and also on the active service account having read access to Artifact Registry. That much is set up for the standard dev container service account, but may not be available to other intended users.
|
74
|
+
|
75
|
+
## CLI commands
|
76
|
+
|
77
|
+
Unlike all the repos that use dayhoff-tools, here you have to install the package explicitly before using the CLI:
|
78
|
+
|
79
|
+
```sh
|
80
|
+
poetry install
|
81
|
+
```
|
82
|
+
|
83
|
+
## Publish a new version
|
84
|
+
|
85
|
+
1. Update version number in `pyproject.toml`
|
86
|
+
2. Run `dh wheel`
|
87
|
+
3. In other repos, run `poetry update dayhoff-tools`
|
88
|
+
|
89
|
+
If you want to overwrite an existing wheel, you'll have to manually delete it from the `dist` folder and also the [Artifact Registry repo](https://console.cloud.google.com/artifacts/python/enzyme-discovery/us-central1/pypirate/dayhoff-tools).
|
90
|
+
|
91
|
+
## Install in other repos
|
92
|
+
|
93
|
+
Installing this library is tricky because we need GCS authentication and also a couple of plugins to install this with either Pip or Poetry. These have been incorporated into `chassis`, but it's worth noting here what the various parts are. All this info came from this [Medium post](https://medium.com/google-cloud/python-packages-via-gcps-artifact-registry-ce1714f8e7c1).
|
94
|
+
|
95
|
+
1. Get a Service Account with read access to Artifact Registry (such as `github-actions`, which I made for this purpose).
|
96
|
+
2. Export the SA key file, copy it to your repo, and make it available through this envvar: `export GOOGLE_APPLICATION_CREDENTIALS=github_actions_key.json`
|
97
|
+
|
98
|
+
### ... with Pip
|
99
|
+
|
100
|
+
1. `pip install keyring`
|
101
|
+
2. `pip install keyrings.google-artifactregistry-auth`
|
102
|
+
3. `pip install --upgrade dayhoff-tools --index-url https://us-central1-python.pkg.dev/enzyme-discovery/pypirate/simple/`
|
103
|
+
|
104
|
+
### ... with Poetry
|
105
|
+
|
106
|
+
1. Add this plugin: `poetry self add keyrings.google-artifactregistry-auth`
|
107
|
+
2. Add these sections to `pyproject.toml`. Note that dayhoff-tools is in a separate group `pypirate` that installs separately from the others.
|
108
|
+
|
109
|
+
```toml
|
110
|
+
[tool.poetry.group.pypirate.dependencies]
|
111
|
+
dayhoff-tools = {version = "*", source = "pypirate"}
|
112
|
+
|
113
|
+
[[tool.poetry.source]]
|
114
|
+
name = "pypirate"
|
115
|
+
url = "https://us-central1-python.pkg.dev/enzyme-discovery/pypirate/simple/"
|
116
|
+
priority = "supplemental"
|
117
|
+
```
|
118
|
+
|
119
|
+
3. When building a dev container, or in other circumstances when you can't easily authenticate as above, run `poetry install --without pypirate`.
|
120
|
+
4. Otherwise, just `poetry install`.
|
121
|
+
5. To ensure you have the latest version, run `poetry update dayhoff-tools`.
|
122
|
+
|
@@ -0,0 +1,30 @@
|
|
1
|
+
dayhoff_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
dayhoff_tools/chemistry/standardizer.py,sha256=uMn7VwHnx02nc404eO6fRuS4rsl4dvSPf2ElfZDXEpY,11188
|
3
|
+
dayhoff_tools/chemistry/utils.py,sha256=jt-7JgF-GeeVC421acX-bobKbLU_X94KNOW24p_P-_M,2257
|
4
|
+
dayhoff_tools/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
dayhoff_tools/cli/main.py,sha256=pIVwkeewZcTCAl6lM7EOXjggWDBzTR_JF5Dtwndvvfw,2978
|
6
|
+
dayhoff_tools/cli/swarm_commands.py,sha256=5EyKj8yietvT5lfoz8Zx0iQvVaNgc3SJX1z2zQR6o6M,5614
|
7
|
+
dayhoff_tools/cli/utility_commands.py,sha256=2J69bfOvQV9E8wWynYhUXZ89BfhdxbGm1OXxXkZgVC0,8760
|
8
|
+
dayhoff_tools/deployment/base.py,sha256=u-AjbtHnFLoLt33dhYXHIpV-6jcieMEHHGBGN_U9Hm0,15626
|
9
|
+
dayhoff_tools/deployment/deploy_aws.py,sha256=O0gQxHioSU_sNU8T8MD4wSOPvWc--V8eRRZzlRu035I,16446
|
10
|
+
dayhoff_tools/deployment/deploy_gcp.py,sha256=DxBM4sUzwPK9RWLP9bSfr38n1HHl-TVrp4TsbdN8pUA,5795
|
11
|
+
dayhoff_tools/deployment/deploy_utils.py,sha256=StFwbqnr2_FWiKVg3xnJF4kagTHzndqqDkpaIOaAn_4,26027
|
12
|
+
dayhoff_tools/deployment/job_runner.py,sha256=4tmdplpvqSE9bVxRWHo2U5kwkYrYod0Uwzpg2Q7qG5o,4850
|
13
|
+
dayhoff_tools/deployment/processors.py,sha256=D0oI0az0mWBh7aNZbLUipcQePTVquxuSBUMMCReRPfU,4656
|
14
|
+
dayhoff_tools/deployment/swarm.py,sha256=MGcS2_x4RNFtnVjWlU_SwNfhICz8NlGYr9cYBK4ZKDA,21688
|
15
|
+
dayhoff_tools/embedders.py,sha256=CRgcb2z7KeeFrRQawyUZuJ4Yi0-J5jSr0hwuRhjG_FI,36513
|
16
|
+
dayhoff_tools/fasta.py,sha256=c8QXZ93AxvPMnIjLY8iENei7YGMMAC4npvxv-EJC2-o,40593
|
17
|
+
dayhoff_tools/file_ops.py,sha256=JlGowvr-CUJFidV-4g_JmhUTN9bsYuaxtqKmnKomm-Q,8506
|
18
|
+
dayhoff_tools/gcp.py,sha256=uCeEskhbEwJIYpN6ne6siT1dbpTizCjjel-hRe0kReE,3030
|
19
|
+
dayhoff_tools/h5.py,sha256=j1nxxaiHsMidVX_XwB33P1Pz9d7K8ZKiDZwJWQUUQSY,21158
|
20
|
+
dayhoff_tools/kegg.py,sha256=SaVbumB4leNTSevamT29yIqHurejw1wmcCC32D5Qyco,965
|
21
|
+
dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
|
22
|
+
dayhoff_tools/mmseqs.py,sha256=uEYzRsthJAlUeRYNCfFtJFE73SbuhfUIS1ygYFkhmtw,6435
|
23
|
+
dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
|
24
|
+
dayhoff_tools/structure.py,sha256=ufN3gAodQxhnt7psK1VTQeu9rKERmo_PhoxIbB4QKMw,27660
|
25
|
+
dayhoff_tools/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJqE4,16456
|
26
|
+
dayhoff_tools/warehouse.py,sha256=TqV8nex1AluNaL4JuXH5zuu9P7qmE89lSo6f_oViy6U,14965
|
27
|
+
dayhoff_tools-1.0.0.dist-info/METADATA,sha256=PTETz-zfValRWDwYlgilMcmfrK7lHaAbT6iXLTCinKs,5773
|
28
|
+
dayhoff_tools-1.0.0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
29
|
+
dayhoff_tools-1.0.0.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
|
30
|
+
dayhoff_tools-1.0.0.dist-info/RECORD,,
|