PyPI - dayhoff-tools - Versions diffs - 1.1.36__py3-none-any.whl → 1.1.37__py3-none-any.whl - Mend

dayhoff-tools 1.1.36py3-none-any.whl → 1.1.37py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

dayhoff_tools/embedders.py CHANGED Viewed

@@ -7,14 +7,11 @@ import h5py
 import numpy as np
 import pandas as pd
 import torch
-import torch.utils.data
 from dayhoff_tools.deployment.processors import Processor
 from dayhoff_tools.fasta import (
     clean_noncanonical_fasta,
     clean_noncanonical_fasta_to_dict,
 )
-from esm import FastaBatchedDataset, pretrained
-from transformers import T5EncoderModel, T5Tokenizer
 logger = logging.getLogger(__name__)
@@ -52,6 +49,8 @@ class ESMEmbedder(Processor):
     def _load_model(self):
         """Download pre-trained model and load onto device"""
+        from esm import pretrained
         self.model, self.alphabet = pretrained.load_model_and_alphabet(self.model_name)
         self.model.eval()
         if torch.cuda.is_available():
@@ -62,6 +61,8 @@ class ESMEmbedder(Processor):
     def _load_dataset(self, fasta_file: str) -> None:
         """Load FASTA file into batched dataset and dataloader"""
+        from esm import FastaBatchedDataset
         if not fasta_file.endswith(".fasta"):
             raise ValueError("Input file must have .fasta extension.")
@@ -763,6 +764,8 @@ class ProstT5Embedder(Embedder):
             The model automatically selects half precision (float16) when running on GPU
             and full precision (float32) when running on CPU.
         """
+        from transformers import T5EncoderModel, T5Tokenizer
         tokenizer = T5Tokenizer.from_pretrained(
             "Rostlab/ProstT5", do_lower_case=False, legacy=True
         )
@@ -851,6 +854,8 @@ class T5Embedder(Embedder):
             The model automatically handles memory management and batch processing
             based on sequence sizes and available resources.
         """
+        from transformers import T5EncoderModel, T5Tokenizer
         tokenizer = T5Tokenizer.from_pretrained(
             "Rostlab/prot_t5_xl_half_uniref50-enc", do_lower_case=False
         )

dayhoff_tools/fasta.py CHANGED Viewed

@@ -11,10 +11,6 @@ from pathlib import Path
 from typing import Dict, Iterator, List, Optional, Set, Tuple, Union
 import requests
-from Bio import SeqIO
-from Bio.SeqRecord import SeqRecord
-from tqdm import tqdm
-from tqdm.notebook import tqdm as tqdm_notebook
 logger = logging.getLogger(__name__)
@@ -441,6 +437,8 @@ def subtract_fasta_files(file1: str, file2: str, output_file: str):
     Raises:
         FileExistsError: If the output file already exists.
     """
+    from Bio import SeqIO
     _check_output_file(output_file)
     # Load sequences from file1 with a progress bar
@@ -497,6 +495,8 @@ def simplify_fasta_ids(
     Raises:
         FileExistsError: If the output file already exists.
     """
+    from Bio import SeqIO
     _check_output_file(output_fasta)
     count = 0
@@ -575,6 +575,9 @@ def extract_ids_from_fasta(fasta_file: str) -> Set[str]:
     Raises:
         ValueError: If there's an issue reading or parsing the input file.
     """
+    from Bio import SeqIO
+    from tqdm import tqdm
     sequence_ids: Set[str] = set()
     try:
         estimated_records = estimate_sequences(fasta_file)
@@ -769,7 +772,7 @@ def subset_fasta(
     return all_written_ids if return_written_ids else None
-def load_fasta_as_dict(fasta_file: str) -> Dict[str, SeqRecord]:
+def load_fasta_as_dict(fasta_file: str) -> Dict[str, "SeqRecord"]:
     """
     Load a FASTA file into a dictionary with record IDs as keys.
     Keep only the first instance of each identifier.
@@ -780,6 +783,10 @@ def load_fasta_as_dict(fasta_file: str) -> Dict[str, SeqRecord]:
     Returns:
         Dict[str, SeqRecord]: A dictionary with record IDs as keys and SeqRecord objects as values.
     """
+    from Bio import SeqIO
+    from Bio.SeqRecord import SeqRecord
+    from tqdm import tqdm
     record_dict: Dict[str, SeqRecord] = {}
     estimated_sequences = estimate_sequences(fasta_file)
@@ -815,6 +822,9 @@ def fasta_to_sqlite(fasta_file: str, db_file: str, batch_size: int = 1000) -> No
     Example:
         fasta_to_sqlite("proteins.fasta", "proteins.db")
     """
+    from Bio import SeqIO
+    from tqdm import tqdm
     _check_output_file(db_file)
     if not os.path.exists(fasta_file):
@@ -881,6 +891,8 @@ def _protein_generator(
     Yields:
         tuple[str, str]: A tuple containing protein_id and sequence.
     """
+    from Bio import SeqIO
     # Ensure we use 'rt' for text mode reading, especially if gzipped
     open_func = gzip.open if str(fasta_path).endswith(".gz") else open
     mode = "rt"
@@ -911,6 +923,9 @@ def check_fasta_duplicates(fasta_path: str) -> tuple[set[str], set[str]]:
         FileNotFoundError: If the input file doesn't exist
         ValueError: If the FASTA file is malformed
     """
+    from Bio import SeqIO
+    from tqdm import tqdm
     if not os.path.exists(fasta_path):
         raise FileNotFoundError(f"FASTA file not found: {fasta_path}")
@@ -987,6 +1002,12 @@ def clean_fasta_duplicates(
         FileExistsError: If the output file already exists
         FileNotFoundError: If the input file doesn't exist
     """
+    from Bio import SeqIO
+    from tqdm import tqdm
+    if not os.path.exists(input_path):
+        raise FileNotFoundError(f"Input FASTA file not found: {input_path}")
     _check_output_file(output_path)
     # First pass: collect sequence hashes for each ID
@@ -1075,6 +1096,8 @@ def fetch_uniprot_fasta(
     Returns:
         tuple: (success_count, failed_count, output_filepath, failed_accessions)
     """
+    from tqdm.notebook import tqdm as tqdm_notebook
     # Convert set to list for batch processing
     accession_list = list(accession_set)

{dayhoff_tools-1.1.36.dist-info → dayhoff_tools-1.1.37.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: dayhoff-tools
-Version: 1.1.36
+Version: 1.1.37
 Summary: Common tools for all the repos at Dayhoff Labs
 Author: Daniel Martin-Alarcon
 Author-email: dma@dayhofflabs.com

{dayhoff_tools-1.1.36.dist-info → dayhoff_tools-1.1.37.dist-info}/RECORD RENAMED Viewed

@@ -13,8 +13,8 @@ dayhoff_tools/deployment/deploy_utils.py,sha256=StFwbqnr2_FWiKVg3xnJF4kagTHzndqq
 dayhoff_tools/deployment/job_runner.py,sha256=hljvFpH2Bw96uYyUup5Ths72PZRL_X27KxlYzBMgguo,5086
 dayhoff_tools/deployment/processors.py,sha256=A7zvF47TfCkuLTCvaqZmk1M9ZgZcv6CAoXZCV6rEXuE,34599
 dayhoff_tools/deployment/swarm.py,sha256=MGcS2_x4RNFtnVjWlU_SwNfhICz8NlGYr9cYBK4ZKDA,21688
-dayhoff_tools/embedders.py,sha256=svP_ksm3FdyVZ8i8R9R5uoGu2qI_hVQ_eztG0drXkN8,36477
-dayhoff_tools/fasta.py,sha256=Ls6AG84IgG8COgAefqB3KS6iMbnixP_Up5EwUur-VUs,49780
+dayhoff_tools/embedders.py,sha256=yO13jwVSqd-gk-PUhBE_7lnlRUIHn2tirxZ_mFAVAgM,36557
+dayhoff_tools/fasta.py,sha256=UARjl3w4O6hlZSgzuZZkG-Mi89TLxvxUDL-WEt0-5OU,50210
 dayhoff_tools/file_ops.py,sha256=JlGowvr-CUJFidV-4g_JmhUTN9bsYuaxtqKmnKomm-Q,8506
 dayhoff_tools/h5.py,sha256=j1nxxaiHsMidVX_XwB33P1Pz9d7K8ZKiDZwJWQUUQSY,21158
 dayhoff_tools/intake/gcp.py,sha256=uCeEskhbEwJIYpN6ne6siT1dbpTizCjjel-hRe0kReE,3030
@@ -26,7 +26,7 @@ dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJq
 dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
 dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
 dayhoff_tools/warehouse.py,sha256=TqV8nex1AluNaL4JuXH5zuu9P7qmE89lSo6f_oViy6U,14965
-dayhoff_tools-1.1.36.dist-info/METADATA,sha256=71FGAv8K1KGE-Q7MN3W2bfZYDHCYUb7GGnsLkwPLQPw,2843
-dayhoff_tools-1.1.36.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-dayhoff_tools-1.1.36.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
-dayhoff_tools-1.1.36.dist-info/RECORD,,
+dayhoff_tools-1.1.37.dist-info/METADATA,sha256=wN6qyLCZ5vAQaZdS-8KNuzFnlHIWSz1moDJSugGyVjY,2843
+dayhoff_tools-1.1.37.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+dayhoff_tools-1.1.37.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
+dayhoff_tools-1.1.37.dist-info/RECORD,,

{dayhoff_tools-1.1.36.dist-info → dayhoff_tools-1.1.37.dist-info}/WHEEL RENAMED Viewed

File without changes

{dayhoff_tools-1.1.36.dist-info → dayhoff_tools-1.1.37.dist-info}/entry_points.txt RENAMED Viewed

File without changes

dayhoff-tools 1.1.36__py3-none-any.whl → 1.1.37__py3-none-any.whl

dayhoff-tools 1.1.36py3-none-any.whl → 1.1.37py3-none-any.whl