PyPI - dayhoff-tools - Versions diffs - 1.1.41__py3-none-any.whl → 1.1.43__py3-none-any.whl - Mend

dayhoff-tools 1.1.41py3-none-any.whl → 1.1.43py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

dayhoff_tools/deployment/deploy_aws.py CHANGED Viewed

@@ -374,6 +374,11 @@ def create_or_update_job_definition(
         "timeout": {"attemptDurationSeconds": aws_config.get("timeout_seconds", 86400)},
     }
+    # Add tags if specified in config
+    if "tags" in aws_config:
+        job_definition_args["tags"] = aws_config["tags"]
+        print(f"Adding tags to job definition: {aws_config['tags']}")
     # Register new revision using the session client
     response = batch.register_job_definition(**job_definition_args)
@@ -472,6 +477,11 @@ def submit_aws_batch_job(
     print(f"Setting retry attempts to {retry_attempts}")
     job_submit_args["retryStrategy"] = {"attempts": retry_attempts}
+    # Add tags if specified in config
+    if "tags" in aws_config:
+        job_submit_args["tags"] = aws_config["tags"]
+        print(f"Adding tags to batch job: {aws_config['tags']}")
     # Submit the job using the session client
     response = batch.submit_job(**job_submit_args)

dayhoff_tools/deployment/processors.py CHANGED Viewed

@@ -1,9 +1,5 @@
-import csv
 import json
 import logging
-import os
-import shlex
-import shutil
 import subprocess
 from abc import ABC, abstractmethod
 from pathlib import Path
@@ -295,108 +291,3 @@ class InterProScanProcessor(Processor):
                 cleaned_input_file_path.unlink()
         return str(chunk_output_dir)
-class BoltzPredictor(Processor):
-    """Processor for running Boltz docking predictions.
-    This class wraps the Boltz docking tool to predict protein structures
-    from sequence data.
-    """
-    def __init__(self, num_workers: int, boltz_options: str | None = None):
-        """Initialize the BoltzPredictor.
-        Args:
-            num_workers: Number of worker threads to use as a default.
-                         This can be overridden if --num_workers is present
-                         in boltz_options.
-            boltz_options: A string containing additional command-line options
-                           to pass to the Boltz predictor. Options should be
-                           space-separated (e.g., "--option1 value1 --option2").
-        """
-        self.num_workers = num_workers
-        self.boltz_options = boltz_options
-    def run(self, input_file: str) -> str:
-        """Run Boltz prediction on the input file.
-        Constructs the command using the input file, default number of workers,
-        and any additional options provided via `boltz_options`. If `--num_workers`
-        is specified in `boltz_options`, it overrides the default `num_workers`.
-        Args:
-            input_file: Path to the input file containing sequences
-        Returns:
-            Path to the output directory created by Boltz
-        Raises:
-            subprocess.CalledProcessError: If Boltz prediction fails
-        """
-        # Determine expected output directory name
-        input_base = os.path.splitext(os.path.basename(input_file))[0]
-        expected_output_dir = f"boltz_results_{input_base}"
-        logger.info(f"Expected output directory: {expected_output_dir}")
-        # Start building the command
-        cmd = ["boltz", "predict", input_file]
-        # Parse additional options if provided
-        additional_args = []
-        num_workers_in_opts = False
-        if self.boltz_options:
-            try:
-                parsed_opts = shlex.split(self.boltz_options)
-                additional_args.extend(parsed_opts)
-                if "--num_workers" in parsed_opts:
-                    num_workers_in_opts = True
-                    logger.info(
-                        f"Using --num_workers from BOLTZ_OPTIONS: {self.boltz_options}"
-                    )
-            except ValueError as e:
-                logger.error(f"Error parsing BOLTZ_OPTIONS '{self.boltz_options}': {e}")
-                # Decide if we should raise an error or proceed without options
-                # For now, proceed without the additional options
-                additional_args = []  # Clear potentially partially parsed args
-        # Add num_workers if not specified in options
-        if not num_workers_in_opts:
-            logger.info(f"Using default num_workers: {self.num_workers}")
-            cmd.extend(["--num_workers", str(self.num_workers)])
-        # Add the parsed additional arguments
-        cmd.extend(additional_args)
-        # Log the final command
-        # Use shlex.join for safer command logging, especially if paths/args have spaces
-        try:
-            safe_cmd_str = shlex.join(cmd)
-            logger.info(f"Running command: {safe_cmd_str}")
-        except AttributeError:  # shlex.join is Python 3.8+
-            logger.info(f"Running command: {' '.join(cmd)}")
-        # Stream output in real-time
-        process = subprocess.Popen(
-            cmd,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-            bufsize=1,
-        )
-        stdout = process.stdout
-        if stdout:
-            for line in iter(stdout.readline, ""):
-                logger.info(f"BOLTZ: {line.rstrip()}")
-        # Wait for process to complete
-        return_code = process.wait()
-        if return_code != 0:
-            logger.error(f"Boltz prediction failed with exit code {return_code}")
-            raise subprocess.CalledProcessError(return_code, cmd)
-        logger.info(
-            f"Boltz prediction completed successfully. Output in {expected_output_dir}"
-        )
-        return expected_output_dir

dayhoff_tools/deployment/swarm.py CHANGED Viewed

@@ -128,23 +128,58 @@ def publish_cards(
     names: List[str],
     firestore_collection: str,
 ):
-    """Publish cards to Firebase. Expects a list of filenames (not full paths),
-    which will each be published as a new document in the collection."""
+    """Publish cards to Firebase using batch writes for optimal performance.
+    Expects a list of filenames (not full paths), which will each be published
+    as a new document in the collection. Uses Firestore batch writes to minimize
+    network round-trips and improve performance.
+    Args:
+        names: List of packet filenames to publish as cards
+        firestore_collection: Name of the Firestore collection to write to
+    """
+    if not names:
+        print("No cards to upload.")
+        return
     initialize_firebase()
-    collection = firestore.client().collection(firestore_collection)
+    db = firestore.client()
+    collection = db.collection(firestore_collection)
+    # Firestore batch limit is 500 operations
+    BATCH_SIZE = 500
+    total_cards = len(names)
+    cards_processed = 0
+    # Process names in batches of up to 500
+    for i in range(0, total_cards, BATCH_SIZE):
+        batch = db.batch()
+        batch_names = names[i : i + BATCH_SIZE]
+        # Add all operations for this batch
+        for name in batch_names:
+            doc_ref = collection.document()  # Auto-generate document ID
+            batch.set(
+                doc_ref,
+                {
+                    "status": "available",
+                    "packet_filename": name,
+                    "created": datetime.now(ZoneInfo("America/Los_Angeles")),
+                },
+            )
-    for name in names:
-        collection.document().set(
-            {
-                "status": "available",
-                "packet_filename": name,
-                "created": datetime.now(ZoneInfo("America/Los_Angeles")),
-            }
+        # Commit the entire batch atomically
+        batch.commit()
+        cards_processed += len(batch_names)
+        print(
+            f"Batch {i // BATCH_SIZE + 1}: Created {len(batch_names)} cards "
+            f"({cards_processed}/{total_cards} total)"
         )
-        print(f"Creating card {name}")
-    print(f"Uploaded {len(names)} cards.")
+    print(
+        f"Successfully uploaded {total_cards} cards in {(total_cards + BATCH_SIZE - 1) // BATCH_SIZE} batch(es)."
+    )
 @transactional

{dayhoff_tools-1.1.41.dist-info → dayhoff_tools-1.1.43.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: dayhoff-tools
-Version: 1.1.41
+Version: 1.1.43
 Summary: Common tools for all the repos at Dayhoff Labs
 Author: Daniel Martin-Alarcon
 Author-email: dma@dayhofflabs.com

{dayhoff_tools-1.1.41.dist-info → dayhoff_tools-1.1.43.dist-info}/RECORD RENAMED Viewed

@@ -7,12 +7,12 @@ dayhoff_tools/cli/main.py,sha256=47EGb28ALaYFc7oAUGlY1D66AIDmc4RZiXxN-gPVrpQ,451
 dayhoff_tools/cli/swarm_commands.py,sha256=5EyKj8yietvT5lfoz8Zx0iQvVaNgc3SJX1z2zQR6o6M,5614
 dayhoff_tools/cli/utility_commands.py,sha256=ER4VrJt4hu904MwrcltUXjwBWT4uFrP-aPXjdXyT3F8,24685
 dayhoff_tools/deployment/base.py,sha256=8tXwsPYvRo-zV-aNhHw1c7Rji-KWg8S5xoCCznFnVVI,17412
-dayhoff_tools/deployment/deploy_aws.py,sha256=jQyQ0fbm2793jEHFO84lr5tNqiOpdBg6U0S5zCVJr1M,17884
+dayhoff_tools/deployment/deploy_aws.py,sha256=GvZpE2YIFA5Dl9rkAljFjtUypmPDNbWgw8NicHYTP24,18265
 dayhoff_tools/deployment/deploy_gcp.py,sha256=xgaOVsUDmP6wSEMYNkm1yRNcVskfdz80qJtCulkBIAM,8860
 dayhoff_tools/deployment/deploy_utils.py,sha256=StFwbqnr2_FWiKVg3xnJF4kagTHzndqqDkpaIOaAn_4,26027
 dayhoff_tools/deployment/job_runner.py,sha256=hljvFpH2Bw96uYyUup5Ths72PZRL_X27KxlYzBMgguo,5086
-dayhoff_tools/deployment/processors.py,sha256=f4L52ekx_zYirl8C4WfavxtOioyD-c34TdTJVDoLpWs,16572
-dayhoff_tools/deployment/swarm.py,sha256=MGcS2_x4RNFtnVjWlU_SwNfhICz8NlGYr9cYBK4ZKDA,21688
+dayhoff_tools/deployment/processors.py,sha256=LM0CQbr4XCb3AtLbrcuDQm4tYPXsoNqgVJ4WQYDjzJc,12406
+dayhoff_tools/deployment/swarm.py,sha256=YJfvVOcAS8cYcIj2fiN4qwC2leh0I9w5A4px8ZWSF6g,22833
 dayhoff_tools/embedders.py,sha256=fRkyWjHo8OmbNUBY_FwrgfvyiLqpmrpI57UAb1Szn1Y,36609
 dayhoff_tools/fasta.py,sha256=_kA2Cpiy7JAGbBqLrjElkzbcUD_p-nO2d5Aj1LVmOvc,50509
 dayhoff_tools/file_ops.py,sha256=JlGowvr-CUJFidV-4g_JmhUTN9bsYuaxtqKmnKomm-Q,8506
@@ -26,7 +26,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
 dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
 dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
 dayhoff_tools/warehouse.py,sha256=TqV8nex1AluNaL4JuXH5zuu9P7qmE89lSo6f_oViy6U,14965
-dayhoff_tools-1.1.41.dist-info/METADATA,sha256=HgiBVffpoYUtLMGV4uAKXJCyiPVmo39ytRHQ41b6-hg,2843
-dayhoff_tools-1.1.41.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-dayhoff_tools-1.1.41.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
-dayhoff_tools-1.1.41.dist-info/RECORD,,
+dayhoff_tools-1.1.43.dist-info/METADATA,sha256=S3WFgeHSXhXJUg6E8nHcSBmbGktzd9B-2A7LIdX1c9k,2843
+dayhoff_tools-1.1.43.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+dayhoff_tools-1.1.43.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
+dayhoff_tools-1.1.43.dist-info/RECORD,,

{dayhoff_tools-1.1.41.dist-info → dayhoff_tools-1.1.43.dist-info}/WHEEL RENAMED Viewed

File without changes

{dayhoff_tools-1.1.41.dist-info → dayhoff_tools-1.1.43.dist-info}/entry_points.txt RENAMED Viewed

File without changes

dayhoff-tools 1.1.41__py3-none-any.whl → 1.1.43__py3-none-any.whl

dayhoff-tools 1.1.41py3-none-any.whl → 1.1.43py3-none-any.whl