PyPI - dayhoff-tools - Versions diffs - 1.1.36__py3-none-any.whl → 1.1.38__py3-none-any.whl - Mend

dayhoff-tools 1.1.36py3-none-any.whl → 1.1.38py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

dayhoff_tools/deployment/deploy_gcp.py CHANGED Viewed

@@ -61,6 +61,7 @@ def create_batch_job_config(config: dict, image_uri: str) -> dict:
         "allocation_policy",  # Goes into batch_config.allocationPolicy
         "logs_policy",  # Goes into batch_config.logsPolicy
         "batch_job",  # Contains detailed task and resource specs
+        "image_uri",
         # Keys like job_name, region, registry_uri, repository are used by other functions
         # or for other purposes, not directly for constructing the core batch_config JSON here.
     }

dayhoff_tools/fasta.py CHANGED Viewed

@@ -13,8 +13,6 @@ from typing import Dict, Iterator, List, Optional, Set, Tuple, Union
 import requests
 from Bio import SeqIO
 from Bio.SeqRecord import SeqRecord
-from tqdm import tqdm
-from tqdm.notebook import tqdm as tqdm_notebook
 logger = logging.getLogger(__name__)
@@ -140,6 +138,8 @@ def combine_fasta_files(input_path: Union[str, List[str]], output_path: str) ->
     Raises:
         FileExistsError: If the output file already exists.
     """
+    from tqdm import tqdm
     _check_output_file(output_path)
     if isinstance(input_path, str):
@@ -290,6 +290,11 @@ def split_fasta(
     Returns:
         int: The number of output files created.
     """
+    from typing import TYPE_CHECKING, Optional
+    if TYPE_CHECKING:
+        from tqdm import tqdm
     # Ensure the target folder exists
     os.makedirs(target_folder, exist_ok=True)
@@ -299,7 +304,7 @@ def split_fasta(
     files_created = 0
     current_output_file_sequence_count = 0
     current_output_file_bytes_written = 0
-    pbar: tqdm | None = None
+    pbar: Optional["tqdm"] = None
     output_file = None  # Will be opened when we encounter the first header line
     output_file_path = ""
@@ -314,6 +319,8 @@ def split_fasta(
         # Open the large FASTA file for reading
         with open(fasta_file, "r", buffering=1024 * 1024) as fasta:
             if show_progress:
+                from tqdm import tqdm
                 total_size = os.path.getsize(fasta_file)
                 pbar = tqdm(
                     total=total_size,
@@ -441,6 +448,9 @@ def subtract_fasta_files(file1: str, file2: str, output_file: str):
     Raises:
         FileExistsError: If the output file already exists.
     """
+    from Bio import SeqIO
+    from tqdm import tqdm
     _check_output_file(output_file)
     # Load sequences from file1 with a progress bar
@@ -497,6 +507,8 @@ def simplify_fasta_ids(
     Raises:
         FileExistsError: If the output file already exists.
     """
+    from Bio import SeqIO
     _check_output_file(output_fasta)
     count = 0
@@ -575,6 +587,9 @@ def extract_ids_from_fasta(fasta_file: str) -> Set[str]:
     Raises:
         ValueError: If there's an issue reading or parsing the input file.
     """
+    from Bio import SeqIO
+    from tqdm import tqdm
     sequence_ids: Set[str] = set()
     try:
         estimated_records = estimate_sequences(fasta_file)
@@ -735,6 +750,8 @@ def subset_fasta(
                 actual_num_chunks_for_tqdm = num_chunks  # Use the calculated num_chunks
                 try:
+                    from tqdm import tqdm
                     results_buffer = []
                     for result_tuple in tqdm(
                         pool.imap(process_func, chunk_reader(input_file, chunk_size)),
@@ -769,7 +786,7 @@ def subset_fasta(
     return all_written_ids if return_written_ids else None
-def load_fasta_as_dict(fasta_file: str) -> Dict[str, SeqRecord]:
+def load_fasta_as_dict(fasta_file: str) -> Dict[str, "SeqRecord"]:
     """
     Load a FASTA file into a dictionary with record IDs as keys.
     Keep only the first instance of each identifier.
@@ -780,6 +797,10 @@ def load_fasta_as_dict(fasta_file: str) -> Dict[str, SeqRecord]:
     Returns:
         Dict[str, SeqRecord]: A dictionary with record IDs as keys and SeqRecord objects as values.
     """
+    from Bio import SeqIO
+    from Bio.SeqRecord import SeqRecord
+    from tqdm import tqdm
     record_dict: Dict[str, SeqRecord] = {}
     estimated_sequences = estimate_sequences(fasta_file)
@@ -815,6 +836,9 @@ def fasta_to_sqlite(fasta_file: str, db_file: str, batch_size: int = 1000) -> No
     Example:
         fasta_to_sqlite("proteins.fasta", "proteins.db")
     """
+    from Bio import SeqIO
+    from tqdm import tqdm
     _check_output_file(db_file)
     if not os.path.exists(fasta_file):
@@ -881,6 +905,8 @@ def _protein_generator(
     Yields:
         tuple[str, str]: A tuple containing protein_id and sequence.
     """
+    from Bio import SeqIO
     # Ensure we use 'rt' for text mode reading, especially if gzipped
     open_func = gzip.open if str(fasta_path).endswith(".gz") else open
     mode = "rt"
@@ -911,6 +937,9 @@ def check_fasta_duplicates(fasta_path: str) -> tuple[set[str], set[str]]:
         FileNotFoundError: If the input file doesn't exist
         ValueError: If the FASTA file is malformed
     """
+    from Bio import SeqIO
+    from tqdm import tqdm
     if not os.path.exists(fasta_path):
         raise FileNotFoundError(f"FASTA file not found: {fasta_path}")
@@ -987,6 +1016,12 @@ def clean_fasta_duplicates(
         FileExistsError: If the output file already exists
         FileNotFoundError: If the input file doesn't exist
     """
+    from Bio import SeqIO
+    from tqdm import tqdm
+    if not os.path.exists(input_path):
+        raise FileNotFoundError(f"Input FASTA file not found: {input_path}")
     _check_output_file(output_path)
     # First pass: collect sequence hashes for each ID
@@ -1075,6 +1110,8 @@ def fetch_uniprot_fasta(
     Returns:
         tuple: (success_count, failed_count, output_filepath, failed_accessions)
     """
+    from tqdm.notebook import tqdm as tqdm_notebook
     # Convert set to list for batch processing
     accession_list = list(accession_set)

{dayhoff_tools-1.1.36.dist-info → dayhoff_tools-1.1.38.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: dayhoff-tools
-Version: 1.1.36
+Version: 1.1.38
 Summary: Common tools for all the repos at Dayhoff Labs
 Author: Daniel Martin-Alarcon
 Author-email: dma@dayhofflabs.com

{dayhoff_tools-1.1.36.dist-info → dayhoff_tools-1.1.38.dist-info}/RECORD RENAMED Viewed

@@ -8,13 +8,13 @@ dayhoff_tools/cli/swarm_commands.py,sha256=5EyKj8yietvT5lfoz8Zx0iQvVaNgc3SJX1z2z
 dayhoff_tools/cli/utility_commands.py,sha256=ER4VrJt4hu904MwrcltUXjwBWT4uFrP-aPXjdXyT3F8,24685
 dayhoff_tools/deployment/base.py,sha256=8tXwsPYvRo-zV-aNhHw1c7Rji-KWg8S5xoCCznFnVVI,17412
 dayhoff_tools/deployment/deploy_aws.py,sha256=jQyQ0fbm2793jEHFO84lr5tNqiOpdBg6U0S5zCVJr1M,17884
-dayhoff_tools/deployment/deploy_gcp.py,sha256=jiEE_tBVeSavAI8o_6qPDPpaoXKexcaNIa4uXcv3y0M,8839
+dayhoff_tools/deployment/deploy_gcp.py,sha256=xgaOVsUDmP6wSEMYNkm1yRNcVskfdz80qJtCulkBIAM,8860
 dayhoff_tools/deployment/deploy_utils.py,sha256=StFwbqnr2_FWiKVg3xnJF4kagTHzndqqDkpaIOaAn_4,26027
 dayhoff_tools/deployment/job_runner.py,sha256=hljvFpH2Bw96uYyUup5Ths72PZRL_X27KxlYzBMgguo,5086
 dayhoff_tools/deployment/processors.py,sha256=A7zvF47TfCkuLTCvaqZmk1M9ZgZcv6CAoXZCV6rEXuE,34599
 dayhoff_tools/deployment/swarm.py,sha256=MGcS2_x4RNFtnVjWlU_SwNfhICz8NlGYr9cYBK4ZKDA,21688
 dayhoff_tools/embedders.py,sha256=svP_ksm3FdyVZ8i8R9R5uoGu2qI_hVQ_eztG0drXkN8,36477
-dayhoff_tools/fasta.py,sha256=Ls6AG84IgG8COgAefqB3KS6iMbnixP_Up5EwUur-VUs,49780
+dayhoff_tools/fasta.py,sha256=_kA2Cpiy7JAGbBqLrjElkzbcUD_p-nO2d5Aj1LVmOvc,50509
 dayhoff_tools/file_ops.py,sha256=JlGowvr-CUJFidV-4g_JmhUTN9bsYuaxtqKmnKomm-Q,8506
 dayhoff_tools/h5.py,sha256=j1nxxaiHsMidVX_XwB33P1Pz9d7K8ZKiDZwJWQUUQSY,21158
 dayhoff_tools/intake/gcp.py,sha256=uCeEskhbEwJIYpN6ne6siT1dbpTizCjjel-hRe0kReE,3030
@@ -26,7 +26,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
 dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
 dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
 dayhoff_tools/warehouse.py,sha256=TqV8nex1AluNaL4JuXH5zuu9P7qmE89lSo6f_oViy6U,14965
-dayhoff_tools-1.1.36.dist-info/METADATA,sha256=71FGAv8K1KGE-Q7MN3W2bfZYDHCYUb7GGnsLkwPLQPw,2843
-dayhoff_tools-1.1.36.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-dayhoff_tools-1.1.36.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
-dayhoff_tools-1.1.36.dist-info/RECORD,,
+dayhoff_tools-1.1.38.dist-info/METADATA,sha256=nDSK0SHTOMdieTxWDLScNArXB4g5TLAocONnt4xD89k,2843
+dayhoff_tools-1.1.38.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+dayhoff_tools-1.1.38.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
+dayhoff_tools-1.1.38.dist-info/RECORD,,

{dayhoff_tools-1.1.36.dist-info → dayhoff_tools-1.1.38.dist-info}/WHEEL RENAMED Viewed

File without changes

{dayhoff_tools-1.1.36.dist-info → dayhoff_tools-1.1.38.dist-info}/entry_points.txt RENAMED Viewed

File without changes

dayhoff-tools 1.1.36__py3-none-any.whl → 1.1.38__py3-none-any.whl

dayhoff-tools 1.1.36py3-none-any.whl → 1.1.38py3-none-any.whl