dayhoff-tools 1.1.41__py3-none-any.whl → 1.1.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dayhoff_tools/deployment/deploy_aws.py +10 -0
- dayhoff_tools/deployment/processors.py +0 -109
- dayhoff_tools/deployment/swarm.py +47 -12
- {dayhoff_tools-1.1.41.dist-info → dayhoff_tools-1.1.43.dist-info}/METADATA +1 -1
- {dayhoff_tools-1.1.41.dist-info → dayhoff_tools-1.1.43.dist-info}/RECORD +7 -7
- {dayhoff_tools-1.1.41.dist-info → dayhoff_tools-1.1.43.dist-info}/WHEEL +0 -0
- {dayhoff_tools-1.1.41.dist-info → dayhoff_tools-1.1.43.dist-info}/entry_points.txt +0 -0
@@ -374,6 +374,11 @@ def create_or_update_job_definition(
|
|
374
374
|
"timeout": {"attemptDurationSeconds": aws_config.get("timeout_seconds", 86400)},
|
375
375
|
}
|
376
376
|
|
377
|
+
# Add tags if specified in config
|
378
|
+
if "tags" in aws_config:
|
379
|
+
job_definition_args["tags"] = aws_config["tags"]
|
380
|
+
print(f"Adding tags to job definition: {aws_config['tags']}")
|
381
|
+
|
377
382
|
# Register new revision using the session client
|
378
383
|
response = batch.register_job_definition(**job_definition_args)
|
379
384
|
|
@@ -472,6 +477,11 @@ def submit_aws_batch_job(
|
|
472
477
|
print(f"Setting retry attempts to {retry_attempts}")
|
473
478
|
job_submit_args["retryStrategy"] = {"attempts": retry_attempts}
|
474
479
|
|
480
|
+
# Add tags if specified in config
|
481
|
+
if "tags" in aws_config:
|
482
|
+
job_submit_args["tags"] = aws_config["tags"]
|
483
|
+
print(f"Adding tags to batch job: {aws_config['tags']}")
|
484
|
+
|
475
485
|
# Submit the job using the session client
|
476
486
|
response = batch.submit_job(**job_submit_args)
|
477
487
|
|
@@ -1,9 +1,5 @@
|
|
1
|
-
import csv
|
2
1
|
import json
|
3
2
|
import logging
|
4
|
-
import os
|
5
|
-
import shlex
|
6
|
-
import shutil
|
7
3
|
import subprocess
|
8
4
|
from abc import ABC, abstractmethod
|
9
5
|
from pathlib import Path
|
@@ -295,108 +291,3 @@ class InterProScanProcessor(Processor):
|
|
295
291
|
cleaned_input_file_path.unlink()
|
296
292
|
|
297
293
|
return str(chunk_output_dir)
|
298
|
-
|
299
|
-
|
300
|
-
class BoltzPredictor(Processor):
|
301
|
-
"""Processor for running Boltz docking predictions.
|
302
|
-
|
303
|
-
This class wraps the Boltz docking tool to predict protein structures
|
304
|
-
from sequence data.
|
305
|
-
"""
|
306
|
-
|
307
|
-
def __init__(self, num_workers: int, boltz_options: str | None = None):
|
308
|
-
"""Initialize the BoltzPredictor.
|
309
|
-
|
310
|
-
Args:
|
311
|
-
num_workers: Number of worker threads to use as a default.
|
312
|
-
This can be overridden if --num_workers is present
|
313
|
-
in boltz_options.
|
314
|
-
boltz_options: A string containing additional command-line options
|
315
|
-
to pass to the Boltz predictor. Options should be
|
316
|
-
space-separated (e.g., "--option1 value1 --option2").
|
317
|
-
"""
|
318
|
-
self.num_workers = num_workers
|
319
|
-
self.boltz_options = boltz_options
|
320
|
-
|
321
|
-
def run(self, input_file: str) -> str:
|
322
|
-
"""Run Boltz prediction on the input file.
|
323
|
-
|
324
|
-
Constructs the command using the input file, default number of workers,
|
325
|
-
and any additional options provided via `boltz_options`. If `--num_workers`
|
326
|
-
is specified in `boltz_options`, it overrides the default `num_workers`.
|
327
|
-
|
328
|
-
Args:
|
329
|
-
input_file: Path to the input file containing sequences
|
330
|
-
|
331
|
-
Returns:
|
332
|
-
Path to the output directory created by Boltz
|
333
|
-
|
334
|
-
Raises:
|
335
|
-
subprocess.CalledProcessError: If Boltz prediction fails
|
336
|
-
"""
|
337
|
-
# Determine expected output directory name
|
338
|
-
input_base = os.path.splitext(os.path.basename(input_file))[0]
|
339
|
-
expected_output_dir = f"boltz_results_{input_base}"
|
340
|
-
logger.info(f"Expected output directory: {expected_output_dir}")
|
341
|
-
|
342
|
-
# Start building the command
|
343
|
-
cmd = ["boltz", "predict", input_file]
|
344
|
-
|
345
|
-
# Parse additional options if provided
|
346
|
-
additional_args = []
|
347
|
-
num_workers_in_opts = False
|
348
|
-
if self.boltz_options:
|
349
|
-
try:
|
350
|
-
parsed_opts = shlex.split(self.boltz_options)
|
351
|
-
additional_args.extend(parsed_opts)
|
352
|
-
if "--num_workers" in parsed_opts:
|
353
|
-
num_workers_in_opts = True
|
354
|
-
logger.info(
|
355
|
-
f"Using --num_workers from BOLTZ_OPTIONS: {self.boltz_options}"
|
356
|
-
)
|
357
|
-
except ValueError as e:
|
358
|
-
logger.error(f"Error parsing BOLTZ_OPTIONS '{self.boltz_options}': {e}")
|
359
|
-
# Decide if we should raise an error or proceed without options
|
360
|
-
# For now, proceed without the additional options
|
361
|
-
additional_args = [] # Clear potentially partially parsed args
|
362
|
-
|
363
|
-
# Add num_workers if not specified in options
|
364
|
-
if not num_workers_in_opts:
|
365
|
-
logger.info(f"Using default num_workers: {self.num_workers}")
|
366
|
-
cmd.extend(["--num_workers", str(self.num_workers)])
|
367
|
-
|
368
|
-
# Add the parsed additional arguments
|
369
|
-
cmd.extend(additional_args)
|
370
|
-
|
371
|
-
# Log the final command
|
372
|
-
# Use shlex.join for safer command logging, especially if paths/args have spaces
|
373
|
-
try:
|
374
|
-
safe_cmd_str = shlex.join(cmd)
|
375
|
-
logger.info(f"Running command: {safe_cmd_str}")
|
376
|
-
except AttributeError: # shlex.join is Python 3.8+
|
377
|
-
logger.info(f"Running command: {' '.join(cmd)}")
|
378
|
-
|
379
|
-
# Stream output in real-time
|
380
|
-
process = subprocess.Popen(
|
381
|
-
cmd,
|
382
|
-
stdout=subprocess.PIPE,
|
383
|
-
stderr=subprocess.STDOUT,
|
384
|
-
text=True,
|
385
|
-
bufsize=1,
|
386
|
-
)
|
387
|
-
|
388
|
-
stdout = process.stdout
|
389
|
-
if stdout:
|
390
|
-
for line in iter(stdout.readline, ""):
|
391
|
-
logger.info(f"BOLTZ: {line.rstrip()}")
|
392
|
-
|
393
|
-
# Wait for process to complete
|
394
|
-
return_code = process.wait()
|
395
|
-
if return_code != 0:
|
396
|
-
logger.error(f"Boltz prediction failed with exit code {return_code}")
|
397
|
-
raise subprocess.CalledProcessError(return_code, cmd)
|
398
|
-
|
399
|
-
logger.info(
|
400
|
-
f"Boltz prediction completed successfully. Output in {expected_output_dir}"
|
401
|
-
)
|
402
|
-
return expected_output_dir
|
@@ -128,23 +128,58 @@ def publish_cards(
|
|
128
128
|
names: List[str],
|
129
129
|
firestore_collection: str,
|
130
130
|
):
|
131
|
-
"""Publish cards to Firebase
|
132
|
-
|
131
|
+
"""Publish cards to Firebase using batch writes for optimal performance.
|
132
|
+
|
133
|
+
Expects a list of filenames (not full paths), which will each be published
|
134
|
+
as a new document in the collection. Uses Firestore batch writes to minimize
|
135
|
+
network round-trips and improve performance.
|
136
|
+
|
137
|
+
Args:
|
138
|
+
names: List of packet filenames to publish as cards
|
139
|
+
firestore_collection: Name of the Firestore collection to write to
|
140
|
+
"""
|
141
|
+
if not names:
|
142
|
+
print("No cards to upload.")
|
143
|
+
return
|
133
144
|
|
134
145
|
initialize_firebase()
|
135
|
-
|
146
|
+
db = firestore.client()
|
147
|
+
collection = db.collection(firestore_collection)
|
148
|
+
|
149
|
+
# Firestore batch limit is 500 operations
|
150
|
+
BATCH_SIZE = 500
|
151
|
+
total_cards = len(names)
|
152
|
+
cards_processed = 0
|
153
|
+
|
154
|
+
# Process names in batches of up to 500
|
155
|
+
for i in range(0, total_cards, BATCH_SIZE):
|
156
|
+
batch = db.batch()
|
157
|
+
batch_names = names[i : i + BATCH_SIZE]
|
158
|
+
|
159
|
+
# Add all operations for this batch
|
160
|
+
for name in batch_names:
|
161
|
+
doc_ref = collection.document() # Auto-generate document ID
|
162
|
+
batch.set(
|
163
|
+
doc_ref,
|
164
|
+
{
|
165
|
+
"status": "available",
|
166
|
+
"packet_filename": name,
|
167
|
+
"created": datetime.now(ZoneInfo("America/Los_Angeles")),
|
168
|
+
},
|
169
|
+
)
|
136
170
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
}
|
171
|
+
# Commit the entire batch atomically
|
172
|
+
batch.commit()
|
173
|
+
cards_processed += len(batch_names)
|
174
|
+
|
175
|
+
print(
|
176
|
+
f"Batch {i // BATCH_SIZE + 1}: Created {len(batch_names)} cards "
|
177
|
+
f"({cards_processed}/{total_cards} total)"
|
144
178
|
)
|
145
|
-
print(f"Creating card {name}")
|
146
179
|
|
147
|
-
print(
|
180
|
+
print(
|
181
|
+
f"Successfully uploaded {total_cards} cards in {(total_cards + BATCH_SIZE - 1) // BATCH_SIZE} batch(es)."
|
182
|
+
)
|
148
183
|
|
149
184
|
|
150
185
|
@transactional
|
@@ -7,12 +7,12 @@ dayhoff_tools/cli/main.py,sha256=47EGb28ALaYFc7oAUGlY1D66AIDmc4RZiXxN-gPVrpQ,451
|
|
7
7
|
dayhoff_tools/cli/swarm_commands.py,sha256=5EyKj8yietvT5lfoz8Zx0iQvVaNgc3SJX1z2zQR6o6M,5614
|
8
8
|
dayhoff_tools/cli/utility_commands.py,sha256=ER4VrJt4hu904MwrcltUXjwBWT4uFrP-aPXjdXyT3F8,24685
|
9
9
|
dayhoff_tools/deployment/base.py,sha256=8tXwsPYvRo-zV-aNhHw1c7Rji-KWg8S5xoCCznFnVVI,17412
|
10
|
-
dayhoff_tools/deployment/deploy_aws.py,sha256=
|
10
|
+
dayhoff_tools/deployment/deploy_aws.py,sha256=GvZpE2YIFA5Dl9rkAljFjtUypmPDNbWgw8NicHYTP24,18265
|
11
11
|
dayhoff_tools/deployment/deploy_gcp.py,sha256=xgaOVsUDmP6wSEMYNkm1yRNcVskfdz80qJtCulkBIAM,8860
|
12
12
|
dayhoff_tools/deployment/deploy_utils.py,sha256=StFwbqnr2_FWiKVg3xnJF4kagTHzndqqDkpaIOaAn_4,26027
|
13
13
|
dayhoff_tools/deployment/job_runner.py,sha256=hljvFpH2Bw96uYyUup5Ths72PZRL_X27KxlYzBMgguo,5086
|
14
|
-
dayhoff_tools/deployment/processors.py,sha256=
|
15
|
-
dayhoff_tools/deployment/swarm.py,sha256=
|
14
|
+
dayhoff_tools/deployment/processors.py,sha256=LM0CQbr4XCb3AtLbrcuDQm4tYPXsoNqgVJ4WQYDjzJc,12406
|
15
|
+
dayhoff_tools/deployment/swarm.py,sha256=YJfvVOcAS8cYcIj2fiN4qwC2leh0I9w5A4px8ZWSF6g,22833
|
16
16
|
dayhoff_tools/embedders.py,sha256=fRkyWjHo8OmbNUBY_FwrgfvyiLqpmrpI57UAb1Szn1Y,36609
|
17
17
|
dayhoff_tools/fasta.py,sha256=_kA2Cpiy7JAGbBqLrjElkzbcUD_p-nO2d5Aj1LVmOvc,50509
|
18
18
|
dayhoff_tools/file_ops.py,sha256=JlGowvr-CUJFidV-4g_JmhUTN9bsYuaxtqKmnKomm-Q,8506
|
@@ -26,7 +26,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
|
|
26
26
|
dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
|
27
27
|
dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
|
28
28
|
dayhoff_tools/warehouse.py,sha256=TqV8nex1AluNaL4JuXH5zuu9P7qmE89lSo6f_oViy6U,14965
|
29
|
-
dayhoff_tools-1.1.
|
30
|
-
dayhoff_tools-1.1.
|
31
|
-
dayhoff_tools-1.1.
|
32
|
-
dayhoff_tools-1.1.
|
29
|
+
dayhoff_tools-1.1.43.dist-info/METADATA,sha256=S3WFgeHSXhXJUg6E8nHcSBmbGktzd9B-2A7LIdX1c9k,2843
|
30
|
+
dayhoff_tools-1.1.43.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
31
|
+
dayhoff_tools-1.1.43.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
|
32
|
+
dayhoff_tools-1.1.43.dist-info/RECORD,,
|
File without changes
|
File without changes
|