PyPI - dayhoff-tools - Versions diffs - 1.1.10__py3-none-any.whl → 1.13.12__py3-none-any.whl - Mend

dayhoff-tools 1.1.10py3-none-any.whl → 1.13.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

dayhoff_tools/__init__.py +10 -0
dayhoff_tools/cli/cloud_commands.py +179 -43
dayhoff_tools/cli/engine1/__init__.py +323 -0
dayhoff_tools/cli/engine1/engine_core.py +703 -0
dayhoff_tools/cli/engine1/engine_lifecycle.py +136 -0
dayhoff_tools/cli/engine1/engine_maintenance.py +431 -0
dayhoff_tools/cli/engine1/engine_management.py +505 -0
dayhoff_tools/cli/engine1/shared.py +501 -0
dayhoff_tools/cli/engine1/studio_commands.py +825 -0
dayhoff_tools/cli/engines_studios/__init__.py +6 -0
dayhoff_tools/cli/engines_studios/api_client.py +351 -0
dayhoff_tools/cli/engines_studios/auth.py +144 -0
dayhoff_tools/cli/engines_studios/engine-studio-cli.md +1230 -0
dayhoff_tools/cli/engines_studios/engine_commands.py +1151 -0
dayhoff_tools/cli/engines_studios/progress.py +260 -0
dayhoff_tools/cli/engines_studios/simulators/cli-simulators.md +151 -0
dayhoff_tools/cli/engines_studios/simulators/demo.sh +75 -0
dayhoff_tools/cli/engines_studios/simulators/engine_list_simulator.py +319 -0
dayhoff_tools/cli/engines_studios/simulators/engine_status_simulator.py +369 -0
dayhoff_tools/cli/engines_studios/simulators/idle_status_simulator.py +476 -0
dayhoff_tools/cli/engines_studios/simulators/simulator_utils.py +180 -0
dayhoff_tools/cli/engines_studios/simulators/studio_list_simulator.py +374 -0
dayhoff_tools/cli/engines_studios/simulators/studio_status_simulator.py +164 -0
dayhoff_tools/cli/engines_studios/studio_commands.py +755 -0
dayhoff_tools/cli/main.py +106 -7
dayhoff_tools/cli/utility_commands.py +896 -179
dayhoff_tools/deployment/base.py +70 -6
dayhoff_tools/deployment/deploy_aws.py +165 -25
dayhoff_tools/deployment/deploy_gcp.py +78 -5
dayhoff_tools/deployment/deploy_utils.py +20 -7
dayhoff_tools/deployment/job_runner.py +9 -4
dayhoff_tools/deployment/processors.py +230 -418
dayhoff_tools/deployment/swarm.py +47 -12
dayhoff_tools/embedders.py +28 -26
dayhoff_tools/fasta.py +181 -64
dayhoff_tools/warehouse.py +268 -1
{dayhoff_tools-1.1.10.dist-info → dayhoff_tools-1.13.12.dist-info}/METADATA +20 -5
dayhoff_tools-1.13.12.dist-info/RECORD +54 -0
{dayhoff_tools-1.1.10.dist-info → dayhoff_tools-1.13.12.dist-info}/WHEEL +1 -1
dayhoff_tools-1.1.10.dist-info/RECORD +0 -32
{dayhoff_tools-1.1.10.dist-info → dayhoff_tools-1.13.12.dist-info}/entry_points.txt +0 -0

dayhoff_tools/deployment/swarm.py CHANGED Viewed

@@ -128,23 +128,58 @@ def publish_cards(
     names: List[str],
     firestore_collection: str,
 ):
-    """Publish cards to Firebase. Expects a list of filenames (not full paths),
-    which will each be published as a new document in the collection."""
+    """Publish cards to Firebase using batch writes for optimal performance.
+    Expects a list of filenames (not full paths), which will each be published
+    as a new document in the collection. Uses Firestore batch writes to minimize
+    network round-trips and improve performance.
+    Args:
+        names: List of packet filenames to publish as cards
+        firestore_collection: Name of the Firestore collection to write to
+    """
+    if not names:
+        print("No cards to upload.")
+        return
     initialize_firebase()
-    collection = firestore.client().collection(firestore_collection)
+    db = firestore.client()
+    collection = db.collection(firestore_collection)
+    # Firestore batch limit is 500 operations
+    BATCH_SIZE = 500
+    total_cards = len(names)
+    cards_processed = 0
+    # Process names in batches of up to 500
+    for i in range(0, total_cards, BATCH_SIZE):
+        batch = db.batch()
+        batch_names = names[i : i + BATCH_SIZE]
+        # Add all operations for this batch
+        for name in batch_names:
+            doc_ref = collection.document()  # Auto-generate document ID
+            batch.set(
+                doc_ref,
+                {
+                    "status": "available",
+                    "packet_filename": name,
+                    "created": datetime.now(ZoneInfo("America/Los_Angeles")),
+                },
+            )
-    for name in names:
-        collection.document().set(
-            {
-                "status": "available",
-                "packet_filename": name,
-                "created": datetime.now(ZoneInfo("America/Los_Angeles")),
-            }
+        # Commit the entire batch atomically
+        batch.commit()
+        cards_processed += len(batch_names)
+        print(
+            f"Batch {i // BATCH_SIZE + 1}: Created {len(batch_names)} cards "
+            f"({cards_processed}/{total_cards} total)"
         )
-        print(f"Creating card {name}")
-    print(f"Uploaded {len(names)} cards.")
+    print(
+        f"Successfully uploaded {total_cards} cards in {(total_cards + BATCH_SIZE - 1) // BATCH_SIZE} batch(es)."
+    )
 @transactional

dayhoff_tools/embedders.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import logging
 import os
 import time
-from abc import ABC, abstractmethod
 from typing import Dict, List, Literal, Optional, Tuple, cast
 import h5py
@@ -443,35 +442,38 @@ class Embedder(Processor):
             for seq_id, seq in small_seqs_sorted:
                 seq_len = len(seq)
-                if current_size + seq_len > self.batch_residue_limit:
-                    if current_batch:
-                        small_batch_count += 1
-                        logger.info(
-                            f"Processing small batch {small_batch_count}/{total_small_batches} with {len(current_batch)} sequences"
-                        )
-                        batch_results = self.embed_batch(current_batch)
-                        results.update(batch_results)
-                        self.cleanup_memory()
+                # Check if adding this sequence would exceed the limit
+                if current_batch and current_size + seq_len > self.batch_residue_limit:
+                    # Process current batch before adding the new sequence
+                    small_batch_count += 1
+                    logger.info(
+                        f"Processing small batch {small_batch_count}/{total_small_batches} with {len(current_batch)} sequences"
+                    )
+                    batch_results = self.embed_batch(current_batch)
+                    results.update(batch_results)
+                    self.cleanup_memory()
-                        # Update progress
-                        processed_sequences += len(current_batch)
-                        elapsed_time = time.time() - start_time
-                        remaining_sequences = total_sequences - processed_sequences
-                        avg_time_per_seq = (
-                            elapsed_time / processed_sequences
-                            if processed_sequences > 0
-                            else 0
-                        )
-                        estimated_time_left = avg_time_per_seq * remaining_sequences
+                    # Update progress
+                    processed_sequences += len(current_batch)
+                    elapsed_time = time.time() - start_time
+                    remaining_sequences = total_sequences - processed_sequences
+                    avg_time_per_seq = (
+                        elapsed_time / processed_sequences
+                        if processed_sequences > 0
+                        else 0
+                    )
+                    estimated_time_left = avg_time_per_seq * remaining_sequences
-                        logger.info(
-                            f"Progress: {processed_sequences}/{total_sequences} sequences ({processed_sequences/total_sequences*100:.1f}%) | "
-                            f"Elapsed: {elapsed_time/60:.1f} min | "
-                            f"Est. remaining: {estimated_time_left/60:.1f} min"
-                        )
+                    logger.info(
+                        f"Progress: {processed_sequences}/{total_sequences} sequences ({processed_sequences/total_sequences*100:.1f}%) | "
+                        f"Elapsed: {elapsed_time/60:.1f} min | "
+                        f"Est. remaining: {estimated_time_left/60:.1f} min"
+                    )
+                    # Start new batch
                     current_batch = []
                     current_size = 0
+                # Add the current sequence to the batch
                 current_batch.append((seq_id, seq, seq_len))
                 current_size += seq_len
@@ -681,7 +683,7 @@ class Embedder(Processor):
         sequence_ids, sequences, sequence_lengths = zip(*batch)
         # Prepare sequences for tokenization
-        tokenizer_input = self.prepare_tokenizer_input(sequences)
+        tokenizer_input = self.prepare_tokenizer_input(list(sequences))
         # Tokenize sequences
         encoded_input = self.tokenizer.batch_encode_plus(

dayhoff_tools/fasta.py CHANGED Viewed

@@ -13,8 +13,6 @@ from typing import Dict, Iterator, List, Optional, Set, Tuple, Union
 import requests
 from Bio import SeqIO
 from Bio.SeqRecord import SeqRecord
-from tqdm import tqdm
-from tqdm.notebook import tqdm as tqdm_notebook
 logger = logging.getLogger(__name__)
@@ -27,7 +25,7 @@ def _clean_noncanonical_fasta(
 ) -> Optional[dict[str, str]]:
     """
     Read in a FASTA file containing multiple sequences, replace non-canonical amino acids,
-    remove empty sequences, and either write the sequences to a new FASTA file or return them as a dictionary.
+    remove stop codons, remove empty sequences, and either write the sequences to a new FASTA file or return them as a dictionary.
     Args:
         input_path (str): Path to the input FASTA file.
@@ -50,7 +48,11 @@ def _clean_noncanonical_fasta(
         for line in fasta_file:
             if line.startswith(">"):
                 if seq_id and seq_lines:
-                    seq = "".join(seq_lines).translate(str.maketrans("OJUZB", "XLCED"))
+                    seq = (
+                        "".join(seq_lines)
+                        .translate(str.maketrans("OJUZB", "XLCED"))
+                        .replace("*", "")
+                    )
                     if seq.strip():  # Only process non-empty sequences
                         sequences[seq_id] = seq
                         if output_path:
@@ -63,7 +65,11 @@ def _clean_noncanonical_fasta(
         # Process the last sequence
         if seq_id and seq_lines:
-            seq = "".join(seq_lines).translate(str.maketrans("OJUZB", "XLCED"))
+            seq = (
+                "".join(seq_lines)
+                .translate(str.maketrans("OJUZB", "XLCED"))
+                .replace("*", "")
+            )
             if seq.strip():  # Only process non-empty sequences
                 sequences[seq_id] = seq
                 if output_path:
@@ -94,7 +100,7 @@ def clean_noncanonical_fasta(
 ):
     """
     Read in a FASTA file containing multiple sequences and write the sequences to a new FASTA file.
-    Replace non-canonical amino acids along the way.
+    Replace non-canonical amino acids and remove stop codons along the way.
     Args:
         input_path (str): Path to the input FASTA file.
@@ -114,7 +120,7 @@ def clean_noncanonical_fasta_to_dict(
 ) -> dict[str, str]:
     """
     Read in a FASTA file containing multiple sequences and return the sequences as a dictionary.
-    Replace non-canonical amino acids along the way.
+    Replace non-canonical amino acids and remove stop codons along the way.
     Args:
         input_path (str): Path to the input FASTA file.
@@ -140,6 +146,8 @@ def combine_fasta_files(input_path: Union[str, List[str]], output_path: str) ->
     Raises:
         FileExistsError: If the output file already exists.
     """
+    from tqdm import tqdm
     _check_output_file(output_path)
     if isinstance(input_path, str):
@@ -290,6 +298,11 @@ def split_fasta(
     Returns:
         int: The number of output files created.
     """
+    from typing import TYPE_CHECKING, Optional
+    if TYPE_CHECKING:
+        from tqdm import tqdm
     # Ensure the target folder exists
     os.makedirs(target_folder, exist_ok=True)
@@ -299,7 +312,7 @@ def split_fasta(
     files_created = 0
     current_output_file_sequence_count = 0
     current_output_file_bytes_written = 0
-    pbar: tqdm | None = None
+    pbar: Optional["tqdm"] = None
     output_file = None  # Will be opened when we encounter the first header line
     output_file_path = ""
@@ -314,6 +327,8 @@ def split_fasta(
         # Open the large FASTA file for reading
         with open(fasta_file, "r", buffering=1024 * 1024) as fasta:
             if show_progress:
+                from tqdm import tqdm
                 total_size = os.path.getsize(fasta_file)
                 pbar = tqdm(
                     total=total_size,
@@ -441,6 +456,9 @@ def subtract_fasta_files(file1: str, file2: str, output_file: str):
     Raises:
         FileExistsError: If the output file already exists.
     """
+    from Bio import SeqIO
+    from tqdm import tqdm
     _check_output_file(output_file)
     # Load sequences from file1 with a progress bar
@@ -497,6 +515,8 @@ def simplify_fasta_ids(
     Raises:
         FileExistsError: If the output file already exists.
     """
+    from Bio import SeqIO
     _check_output_file(output_fasta)
     count = 0
@@ -575,6 +595,9 @@ def extract_ids_from_fasta(fasta_file: str) -> Set[str]:
     Raises:
         ValueError: If there's an issue reading or parsing the input file.
     """
+    from Bio import SeqIO
+    from tqdm import tqdm
     sequence_ids: Set[str] = set()
     try:
         estimated_records = estimate_sequences(fasta_file)
@@ -604,29 +627,48 @@ def process_chunk(
 ) -> Tuple[List[str], Set[str]]:
     output_sequences = []
     written_ids = set()
-    current_id = ""
-    current_seq = []
-    def id_matches(seq_id: str) -> bool:
-        return any(part.lower() in target_ids_lower for part in seq_id.split("|"))
-    for line in chunk:
-        line = line.strip()
-        if line.startswith(">"):
-            if current_id and current_seq:
-                if id_matches(current_id) != exclude:
-                    output_sequences.append(f">{current_id}\n{''.join(current_seq)}\n")
-                    written_ids.add(current_id)
-            current_id = line[1:]
-            current_seq = []
-        elif current_id:
-            current_seq.append(line)
-    # Process the last sequence in the chunk
-    if current_id and current_seq and id_matches(current_id) != exclude:
-        output_sequences.append(f">{current_id}\n{''.join(current_seq)}\n")
-        written_ids.add(current_id)
+    current_id: str = ""
+    current_seq: List[str] = []
+    # Get a unique worker ID, could be process ID
+    worker_id = os.getpid()
+    logger.debug(
+        f"SUBSET_FASTA_PROCESS_CHUNK: Worker {worker_id} processing a chunk. Target IDs count: {len(target_ids_lower)}, Exclude: {exclude}"
+    )
+    try:
+        def id_matches(seq_id: str) -> bool:
+            return any(part.lower() in target_ids_lower for part in seq_id.split("|"))
+        for line in chunk:
+            line = line.strip()
+            if line.startswith(">"):
+                if current_id and current_seq:
+                    if id_matches(current_id) != exclude:
+                        output_sequences.append(
+                            f">{current_id}\n{''.join(current_seq)}\n"
+                        )
+                        written_ids.add(current_id)
+                current_id = line[1:]
+                current_seq = []
+            elif current_id:
+                current_seq.append(line)
+        # Process the last sequence in the chunk
+        if current_id and current_seq and id_matches(current_id) != exclude:
+            output_sequences.append(f">{current_id}\n{''.join(current_seq)}\n")
+            written_ids.add(current_id)
+    except Exception as e:
+        logger.error(
+            f"SUBSET_FASTA_PROCESS_CHUNK: Worker {worker_id} encountered error: {e}",
+            exc_info=True,
+        )
+        # Re-raising the exception so the main process's pool error handling can catch it
+        raise
+    logger.debug(
+        f"SUBSET_FASTA_PROCESS_CHUNK: Worker {worker_id} finished chunk. Output sequences: {len(output_sequences)}, Written IDs: {len(written_ids)}"
+    )
     return output_sequences, written_ids
@@ -655,54 +697,104 @@ def subset_fasta(
     Raises:
         FileExistsError: If the output file already exists.
     """
+    logger.info(
+        f"SUBSET_FASTA: Starting for input '{fasta_file}', output '{output_path}'. Target IDs: {len(target_ids)}, Exclude: {exclude}"
+    )
     _check_output_file(output_path)
     target_ids_lower = {id.lower() for id in target_ids}
     total_size = os.path.getsize(fasta_file)
-    chunk_size = max(
-        1, total_size // (multiprocessing.cpu_count() * 2)
-    )  # Adjust chunk size based on CPU count
-    def chunk_reader(file_obj, chunk_size: int):
+    # Determine a reasonable number of processes
+    num_processes = multiprocessing.cpu_count()
+    # Adjust chunk size based on number of processes to balance load vs memory
+    # Aim for at least a few chunks per process if possible, but not too many small chunks.
+    # This is a heuristic and might need tuning.
+    # Let's make chunks reasonably large, e.g., 10-50MB, or ensure at least num_processes chunks.
+    # If total_size is very small, chunk_size could become 0 if not handled.
+    desired_chunk_size_mb = 32
+    chunk_size = max(1, desired_chunk_size_mb * 1024 * 1024)
+    num_chunks = max(1, math.ceil(total_size / chunk_size))
+    def chunk_reader(
+        file_obj, cs: int
+    ) -> Iterator[List[str]]:  # Explicitly Iterator[List[str]]
         chunk = []
         chunk_bytes = 0
         for line in file_obj:
             chunk.append(line)
             chunk_bytes += len(line)
-            if chunk_bytes >= chunk_size and line.startswith(">"):
+            if chunk_bytes >= cs and line.startswith(">"):
                 yield chunk
                 chunk = [line]
                 chunk_bytes = len(line)
         if chunk:
             yield chunk
-    open_func = gzip.open if fasta_file.endswith(".gz") else open
-    mode = "rt" if fasta_file.endswith(".gz") else "r"
+    mode = "rt"  # text mode for both gzip and regular open
-    with open_func(fasta_file, mode) as input_file:
-        with multiprocessing.Pool() as pool:
-            process_func = partial(
-                process_chunk, target_ids_lower=target_ids_lower, exclude=exclude
+    all_written_ids: Set[str] = set()
+    try:
+        with open(fasta_file, mode) as input_file:
+            logger.info(
+                f"SUBSET_FASTA: Using up to {num_processes} worker processes for {num_chunks} potential chunks."
             )
-            results = list(
-                tqdm(
-                    pool.imap(process_func, chunk_reader(input_file, chunk_size)),
-                    total=total_size // chunk_size,
-                    desc="Processing FASTA",
+            with multiprocessing.Pool(processes=num_processes) as pool:
+                logger.info(
+                    f"SUBSET_FASTA: Multiprocessing pool created (intended processes: {num_processes})."
                 )
-            )
-    all_written_ids = set()
-    with open(output_path, "w") as output_file:
-        for output_sequences, written_ids in results:
-            output_file.writelines(output_sequences)
-            all_written_ids.update(written_ids)
+                process_func = partial(
+                    process_chunk, target_ids_lower=target_ids_lower, exclude=exclude
+                )
-    print(f"Wrote {len(all_written_ids)} sequences to {output_path}")
+                # Using imap_unordered can sometimes be better for memory with many results,
+                # as results are processed as they complete.
+                # However, for aggregation later, order doesn't strictly matter for building the final set/list of strings.
+                # tqdm will work with imap and imap_unordered.
+                # Calculate total for tqdm more robustly
+                actual_num_chunks_for_tqdm = num_chunks  # Use the calculated num_chunks
+                try:
+                    from tqdm import tqdm
+                    results_buffer = []
+                    for result_tuple in tqdm(
+                        pool.imap(process_func, chunk_reader(input_file, chunk_size)),
+                        total=actual_num_chunks_for_tqdm,  # Use calculated number of chunks
+                        desc="Processing FASTA (subset_fasta)",
+                    ):
+                        results_buffer.append(result_tuple)
+                    logger.debug("SUBSET_FASTA: pool.imap completed.")
+                except Exception as e_pool:
+                    logger.error(
+                        f"SUBSET_FASTA: Error during multiprocessing pool.imap: {e_pool}",
+                        exc_info=True,
+                    )
+                    raise
+        logger.debug(
+            f"SUBSET_FASTA: Aggregating results from {len(results_buffer)} processed chunks."
+        )
+        with open(output_path, "w") as output_file:
+            for output_sequences, written_ids_chunk in results_buffer:
+                output_file.writelines(output_sequences)
+                all_written_ids.update(written_ids_chunk)
+    except Exception as e_main:
+        logger.error(
+            f"SUBSET_FASTA: Error in main processing logic: {e_main}", exc_info=True
+        )
+        raise
+    logger.info(
+        f"SUBSET_FASTA: Wrote {len(all_written_ids)} sequences to {output_path}. Finished."
+    )
     return all_written_ids if return_written_ids else None
-def load_fasta_as_dict(fasta_file: str) -> Dict[str, SeqRecord]:
+def load_fasta_as_dict(fasta_file: str) -> Dict[str, "SeqRecord"]:
     """
     Load a FASTA file into a dictionary with record IDs as keys.
     Keep only the first instance of each identifier.
@@ -713,6 +805,10 @@ def load_fasta_as_dict(fasta_file: str) -> Dict[str, SeqRecord]:
     Returns:
         Dict[str, SeqRecord]: A dictionary with record IDs as keys and SeqRecord objects as values.
     """
+    from Bio import SeqIO
+    from Bio.SeqRecord import SeqRecord
+    from tqdm import tqdm
     record_dict: Dict[str, SeqRecord] = {}
     estimated_sequences = estimate_sequences(fasta_file)
@@ -748,6 +844,9 @@ def fasta_to_sqlite(fasta_file: str, db_file: str, batch_size: int = 1000) -> No
     Example:
         fasta_to_sqlite("proteins.fasta", "proteins.db")
     """
+    from Bio import SeqIO
+    from tqdm import tqdm
     _check_output_file(db_file)
     if not os.path.exists(fasta_file):
@@ -779,7 +878,7 @@ def fasta_to_sqlite(fasta_file: str, db_file: str, batch_size: int = 1000) -> No
         batch = []
         for protein_id, sequence in tqdm(
-            _protein_generator(fasta_file),
+            _protein_generator(Path(fasta_file)),  # Pass as Path object
             total=estimated_records,
             desc="Processing proteins",
         ):
@@ -804,22 +903,29 @@ def fasta_to_sqlite(fasta_file: str, db_file: str, batch_size: int = 1000) -> No
     print(f"Conversion completed. SQLite database saved to {db_file}")
-def _protein_generator(fasta_path: Path) -> Iterator[tuple[str, str]]:
+def _protein_generator(
+    fasta_path: Path,
+) -> Iterator[tuple[str, str]]:  # fasta_path is Path
     """
     Generate protein data from a FASTA file.
     Args:
         fasta_path (Path): Path to the FASTA file.
     Yields:
         tuple[str, str]: A tuple containing protein_id and sequence.
     """
-    for record in SeqIO.parse(fasta_path, "fasta"):
-        protein_id = record.id.split()[
-            0
-        ]  # Assumes the first part of the id is the protein_id
-        sequence = str(record.seq)
-        yield protein_id, sequence
+    from Bio import SeqIO
+    # Ensure we use 'rt' for text mode reading, especially if gzipped
+    open_func = gzip.open if str(fasta_path).endswith(".gz") else open
+    mode = "rt"
+    with open_func(fasta_path, mode) as handle:
+        for record in SeqIO.parse(handle, "fasta"):
+            protein_id = record.id.split()[
+                0
+            ]  # Assumes the first part of the id is the protein_id
+            sequence = str(record.seq)
+            yield protein_id, sequence
 def check_fasta_duplicates(fasta_path: str) -> tuple[set[str], set[str]]:
@@ -839,6 +945,9 @@ def check_fasta_duplicates(fasta_path: str) -> tuple[set[str], set[str]]:
         FileNotFoundError: If the input file doesn't exist
         ValueError: If the FASTA file is malformed
     """
+    from Bio import SeqIO
+    from tqdm import tqdm
     if not os.path.exists(fasta_path):
         raise FileNotFoundError(f"FASTA file not found: {fasta_path}")
@@ -915,6 +1024,12 @@ def clean_fasta_duplicates(
         FileExistsError: If the output file already exists
         FileNotFoundError: If the input file doesn't exist
     """
+    from Bio import SeqIO
+    from tqdm import tqdm
+    if not os.path.exists(input_path):
+        raise FileNotFoundError(f"Input FASTA file not found: {input_path}")
     _check_output_file(output_path)
     # First pass: collect sequence hashes for each ID
@@ -1003,6 +1118,8 @@ def fetch_uniprot_fasta(
     Returns:
         tuple: (success_count, failed_count, output_filepath, failed_accessions)
     """
+    from tqdm.notebook import tqdm as tqdm_notebook
     # Convert set to list for batch processing
     accession_list = list(accession_set)

dayhoff-tools 1.1.10__py3-none-any.whl → 1.13.12__py3-none-any.whl

dayhoff-tools 1.1.10py3-none-any.whl → 1.13.12py3-none-any.whl