dayhoff-tools 1.1.41__py3-none-any.whl → 1.1.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -374,6 +374,11 @@ def create_or_update_job_definition(
374
374
  "timeout": {"attemptDurationSeconds": aws_config.get("timeout_seconds", 86400)},
375
375
  }
376
376
 
377
+ # Add tags if specified in config
378
+ if "tags" in aws_config:
379
+ job_definition_args["tags"] = aws_config["tags"]
380
+ print(f"Adding tags to job definition: {aws_config['tags']}")
381
+
377
382
  # Register new revision using the session client
378
383
  response = batch.register_job_definition(**job_definition_args)
379
384
 
@@ -472,6 +477,11 @@ def submit_aws_batch_job(
472
477
  print(f"Setting retry attempts to {retry_attempts}")
473
478
  job_submit_args["retryStrategy"] = {"attempts": retry_attempts}
474
479
 
480
+ # Add tags if specified in config
481
+ if "tags" in aws_config:
482
+ job_submit_args["tags"] = aws_config["tags"]
483
+ print(f"Adding tags to batch job: {aws_config['tags']}")
484
+
475
485
  # Submit the job using the session client
476
486
  response = batch.submit_job(**job_submit_args)
477
487
 
@@ -1,9 +1,5 @@
1
- import csv
2
1
  import json
3
2
  import logging
4
- import os
5
- import shlex
6
- import shutil
7
3
  import subprocess
8
4
  from abc import ABC, abstractmethod
9
5
  from pathlib import Path
@@ -295,108 +291,3 @@ class InterProScanProcessor(Processor):
295
291
  cleaned_input_file_path.unlink()
296
292
 
297
293
  return str(chunk_output_dir)
298
-
299
-
300
- class BoltzPredictor(Processor):
301
- """Processor for running Boltz docking predictions.
302
-
303
- This class wraps the Boltz docking tool to predict protein structures
304
- from sequence data.
305
- """
306
-
307
- def __init__(self, num_workers: int, boltz_options: str | None = None):
308
- """Initialize the BoltzPredictor.
309
-
310
- Args:
311
- num_workers: Number of worker threads to use as a default.
312
- This can be overridden if --num_workers is present
313
- in boltz_options.
314
- boltz_options: A string containing additional command-line options
315
- to pass to the Boltz predictor. Options should be
316
- space-separated (e.g., "--option1 value1 --option2").
317
- """
318
- self.num_workers = num_workers
319
- self.boltz_options = boltz_options
320
-
321
- def run(self, input_file: str) -> str:
322
- """Run Boltz prediction on the input file.
323
-
324
- Constructs the command using the input file, default number of workers,
325
- and any additional options provided via `boltz_options`. If `--num_workers`
326
- is specified in `boltz_options`, it overrides the default `num_workers`.
327
-
328
- Args:
329
- input_file: Path to the input file containing sequences
330
-
331
- Returns:
332
- Path to the output directory created by Boltz
333
-
334
- Raises:
335
- subprocess.CalledProcessError: If Boltz prediction fails
336
- """
337
- # Determine expected output directory name
338
- input_base = os.path.splitext(os.path.basename(input_file))[0]
339
- expected_output_dir = f"boltz_results_{input_base}"
340
- logger.info(f"Expected output directory: {expected_output_dir}")
341
-
342
- # Start building the command
343
- cmd = ["boltz", "predict", input_file]
344
-
345
- # Parse additional options if provided
346
- additional_args = []
347
- num_workers_in_opts = False
348
- if self.boltz_options:
349
- try:
350
- parsed_opts = shlex.split(self.boltz_options)
351
- additional_args.extend(parsed_opts)
352
- if "--num_workers" in parsed_opts:
353
- num_workers_in_opts = True
354
- logger.info(
355
- f"Using --num_workers from BOLTZ_OPTIONS: {self.boltz_options}"
356
- )
357
- except ValueError as e:
358
- logger.error(f"Error parsing BOLTZ_OPTIONS '{self.boltz_options}': {e}")
359
- # Decide if we should raise an error or proceed without options
360
- # For now, proceed without the additional options
361
- additional_args = [] # Clear potentially partially parsed args
362
-
363
- # Add num_workers if not specified in options
364
- if not num_workers_in_opts:
365
- logger.info(f"Using default num_workers: {self.num_workers}")
366
- cmd.extend(["--num_workers", str(self.num_workers)])
367
-
368
- # Add the parsed additional arguments
369
- cmd.extend(additional_args)
370
-
371
- # Log the final command
372
- # Use shlex.join for safer command logging, especially if paths/args have spaces
373
- try:
374
- safe_cmd_str = shlex.join(cmd)
375
- logger.info(f"Running command: {safe_cmd_str}")
376
- except AttributeError: # shlex.join is Python 3.8+
377
- logger.info(f"Running command: {' '.join(cmd)}")
378
-
379
- # Stream output in real-time
380
- process = subprocess.Popen(
381
- cmd,
382
- stdout=subprocess.PIPE,
383
- stderr=subprocess.STDOUT,
384
- text=True,
385
- bufsize=1,
386
- )
387
-
388
- stdout = process.stdout
389
- if stdout:
390
- for line in iter(stdout.readline, ""):
391
- logger.info(f"BOLTZ: {line.rstrip()}")
392
-
393
- # Wait for process to complete
394
- return_code = process.wait()
395
- if return_code != 0:
396
- logger.error(f"Boltz prediction failed with exit code {return_code}")
397
- raise subprocess.CalledProcessError(return_code, cmd)
398
-
399
- logger.info(
400
- f"Boltz prediction completed successfully. Output in {expected_output_dir}"
401
- )
402
- return expected_output_dir
@@ -128,23 +128,58 @@ def publish_cards(
128
128
  names: List[str],
129
129
  firestore_collection: str,
130
130
  ):
131
- """Publish cards to Firebase. Expects a list of filenames (not full paths),
132
- which will each be published as a new document in the collection."""
131
+ """Publish cards to Firebase using batch writes for optimal performance.
132
+
133
+ Expects a list of filenames (not full paths), which will each be published
134
+ as a new document in the collection. Uses Firestore batch writes to minimize
135
+ network round-trips and improve performance.
136
+
137
+ Args:
138
+ names: List of packet filenames to publish as cards
139
+ firestore_collection: Name of the Firestore collection to write to
140
+ """
141
+ if not names:
142
+ print("No cards to upload.")
143
+ return
133
144
 
134
145
  initialize_firebase()
135
- collection = firestore.client().collection(firestore_collection)
146
+ db = firestore.client()
147
+ collection = db.collection(firestore_collection)
148
+
149
+ # Firestore batch limit is 500 operations
150
+ BATCH_SIZE = 500
151
+ total_cards = len(names)
152
+ cards_processed = 0
153
+
154
+ # Process names in batches of up to 500
155
+ for i in range(0, total_cards, BATCH_SIZE):
156
+ batch = db.batch()
157
+ batch_names = names[i : i + BATCH_SIZE]
158
+
159
+ # Add all operations for this batch
160
+ for name in batch_names:
161
+ doc_ref = collection.document() # Auto-generate document ID
162
+ batch.set(
163
+ doc_ref,
164
+ {
165
+ "status": "available",
166
+ "packet_filename": name,
167
+ "created": datetime.now(ZoneInfo("America/Los_Angeles")),
168
+ },
169
+ )
136
170
 
137
- for name in names:
138
- collection.document().set(
139
- {
140
- "status": "available",
141
- "packet_filename": name,
142
- "created": datetime.now(ZoneInfo("America/Los_Angeles")),
143
- }
171
+ # Commit the entire batch atomically
172
+ batch.commit()
173
+ cards_processed += len(batch_names)
174
+
175
+ print(
176
+ f"Batch {i // BATCH_SIZE + 1}: Created {len(batch_names)} cards "
177
+ f"({cards_processed}/{total_cards} total)"
144
178
  )
145
- print(f"Creating card {name}")
146
179
 
147
- print(f"Uploaded {len(names)} cards.")
180
+ print(
181
+ f"Successfully uploaded {total_cards} cards in {(total_cards + BATCH_SIZE - 1) // BATCH_SIZE} batch(es)."
182
+ )
148
183
 
149
184
 
150
185
  @transactional
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dayhoff-tools
3
- Version: 1.1.41
3
+ Version: 1.1.43
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
@@ -7,12 +7,12 @@ dayhoff_tools/cli/main.py,sha256=47EGb28ALaYFc7oAUGlY1D66AIDmc4RZiXxN-gPVrpQ,451
7
7
  dayhoff_tools/cli/swarm_commands.py,sha256=5EyKj8yietvT5lfoz8Zx0iQvVaNgc3SJX1z2zQR6o6M,5614
8
8
  dayhoff_tools/cli/utility_commands.py,sha256=ER4VrJt4hu904MwrcltUXjwBWT4uFrP-aPXjdXyT3F8,24685
9
9
  dayhoff_tools/deployment/base.py,sha256=8tXwsPYvRo-zV-aNhHw1c7Rji-KWg8S5xoCCznFnVVI,17412
10
- dayhoff_tools/deployment/deploy_aws.py,sha256=jQyQ0fbm2793jEHFO84lr5tNqiOpdBg6U0S5zCVJr1M,17884
10
+ dayhoff_tools/deployment/deploy_aws.py,sha256=GvZpE2YIFA5Dl9rkAljFjtUypmPDNbWgw8NicHYTP24,18265
11
11
  dayhoff_tools/deployment/deploy_gcp.py,sha256=xgaOVsUDmP6wSEMYNkm1yRNcVskfdz80qJtCulkBIAM,8860
12
12
  dayhoff_tools/deployment/deploy_utils.py,sha256=StFwbqnr2_FWiKVg3xnJF4kagTHzndqqDkpaIOaAn_4,26027
13
13
  dayhoff_tools/deployment/job_runner.py,sha256=hljvFpH2Bw96uYyUup5Ths72PZRL_X27KxlYzBMgguo,5086
14
- dayhoff_tools/deployment/processors.py,sha256=f4L52ekx_zYirl8C4WfavxtOioyD-c34TdTJVDoLpWs,16572
15
- dayhoff_tools/deployment/swarm.py,sha256=MGcS2_x4RNFtnVjWlU_SwNfhICz8NlGYr9cYBK4ZKDA,21688
14
+ dayhoff_tools/deployment/processors.py,sha256=LM0CQbr4XCb3AtLbrcuDQm4tYPXsoNqgVJ4WQYDjzJc,12406
15
+ dayhoff_tools/deployment/swarm.py,sha256=YJfvVOcAS8cYcIj2fiN4qwC2leh0I9w5A4px8ZWSF6g,22833
16
16
  dayhoff_tools/embedders.py,sha256=fRkyWjHo8OmbNUBY_FwrgfvyiLqpmrpI57UAb1Szn1Y,36609
17
17
  dayhoff_tools/fasta.py,sha256=_kA2Cpiy7JAGbBqLrjElkzbcUD_p-nO2d5Aj1LVmOvc,50509
18
18
  dayhoff_tools/file_ops.py,sha256=JlGowvr-CUJFidV-4g_JmhUTN9bsYuaxtqKmnKomm-Q,8506
@@ -26,7 +26,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
26
26
  dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
27
27
  dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
28
28
  dayhoff_tools/warehouse.py,sha256=TqV8nex1AluNaL4JuXH5zuu9P7qmE89lSo6f_oViy6U,14965
29
- dayhoff_tools-1.1.41.dist-info/METADATA,sha256=HgiBVffpoYUtLMGV4uAKXJCyiPVmo39ytRHQ41b6-hg,2843
30
- dayhoff_tools-1.1.41.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
31
- dayhoff_tools-1.1.41.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
32
- dayhoff_tools-1.1.41.dist-info/RECORD,,
29
+ dayhoff_tools-1.1.43.dist-info/METADATA,sha256=S3WFgeHSXhXJUg6E8nHcSBmbGktzd9B-2A7LIdX1c9k,2843
30
+ dayhoff_tools-1.1.43.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
31
+ dayhoff_tools-1.1.43.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
32
+ dayhoff_tools-1.1.43.dist-info/RECORD,,