dayhoff-tools 1.2.5__tar.gz → 1.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/PKG-INFO +6 -4
  2. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/README.md +0 -0
  3. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/__init__.py +0 -0
  4. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/chemistry/standardizer.py +0 -0
  5. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/chemistry/utils.py +0 -0
  6. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/cli/__init__.py +0 -0
  7. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/cli/cloud_commands.py +0 -0
  8. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/cli/main.py +0 -0
  9. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/cli/swarm_commands.py +0 -0
  10. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/cli/utility_commands.py +0 -0
  11. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/deployment/base.py +0 -0
  12. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/deployment/deploy_aws.py +0 -0
  13. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/deployment/deploy_gcp.py +0 -0
  14. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/deployment/deploy_utils.py +0 -0
  15. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/deployment/job_runner.py +0 -0
  16. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/deployment/processors.py +0 -0
  17. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/deployment/swarm.py +0 -0
  18. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/embedders.py +1 -1
  19. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/fasta.py +13 -5
  20. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/file_ops.py +0 -0
  21. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/h5.py +0 -0
  22. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/intake/gcp.py +0 -0
  23. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/intake/gtdb.py +0 -0
  24. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/intake/kegg.py +0 -0
  25. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/intake/mmseqs.py +0 -0
  26. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/intake/structure.py +0 -0
  27. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/intake/uniprot.py +0 -0
  28. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/logs.py +0 -0
  29. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/sqlite.py +0 -0
  30. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/dayhoff_tools/warehouse.py +0 -0
  31. {dayhoff_tools-1.2.5 → dayhoff_tools-1.3.1}/pyproject.toml +6 -6
@@ -1,11 +1,13 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dayhoff-tools
3
- Version: 1.2.5
3
+ Version: 1.3.1
4
4
  Summary: Common tools for all the repos at Dayhoff Labs
5
5
  Author: Daniel Martin-Alarcon
6
6
  Author-email: dma@dayhofflabs.com
7
- Requires-Python: >=3.12,<4.0
7
+ Requires-Python: >=3.10,<4.0
8
8
  Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
9
11
  Classifier: Programming Language :: Python :: 3.12
10
12
  Classifier: Programming Language :: Python :: 3.13
11
13
  Provides-Extra: embedders
@@ -24,13 +26,13 @@ Requires-Dist: pandas (>=2.2.3) ; extra == "embedders"
24
26
  Requires-Dist: pandas (>=2.2.3) ; extra == "full"
25
27
  Requires-Dist: pyyaml (>=6.0)
26
28
  Requires-Dist: questionary (>=2.0.1)
27
- Requires-Dist: rdkit (>=2025.3.2) ; extra == "full"
29
+ Requires-Dist: rdkit-pypi (>=2022.9.5) ; extra == "full"
28
30
  Requires-Dist: requests (>=2.31.0)
29
31
  Requires-Dist: sentencepiece (>=0.2.0) ; extra == "embedders"
30
32
  Requires-Dist: sentencepiece (>=0.2.0) ; extra == "full"
31
33
  Requires-Dist: sqlalchemy (>=2.0.40,<3.0.0) ; extra == "full"
32
34
  Requires-Dist: toml (>=0.10)
33
- Requires-Dist: torch (>=2.5.0) ; extra == "embedders"
35
+ Requires-Dist: torch (>=2.4.0) ; extra == "embedders"
34
36
  Requires-Dist: tqdm (>=4.67.1) ; extra == "embedders"
35
37
  Requires-Dist: tqdm (>=4.67.1) ; extra == "full"
36
38
  Requires-Dist: transformers (==4.36.2) ; extra == "full"
File without changes
@@ -683,7 +683,7 @@ class Embedder(Processor):
683
683
  sequence_ids, sequences, sequence_lengths = zip(*batch)
684
684
 
685
685
  # Prepare sequences for tokenization
686
- tokenizer_input = self.prepare_tokenizer_input(sequences)
686
+ tokenizer_input = self.prepare_tokenizer_input(list(sequences))
687
687
 
688
688
  # Tokenize sequences
689
689
  encoded_input = self.tokenizer.batch_encode_plus(
@@ -25,7 +25,7 @@ def _clean_noncanonical_fasta(
25
25
  ) -> Optional[dict[str, str]]:
26
26
  """
27
27
  Read in a FASTA file containing multiple sequences, replace non-canonical amino acids,
28
- remove empty sequences, and either write the sequences to a new FASTA file or return them as a dictionary.
28
+ remove stop codons, remove empty sequences, and either write the sequences to a new FASTA file or return them as a dictionary.
29
29
 
30
30
  Args:
31
31
  input_path (str): Path to the input FASTA file.
@@ -48,7 +48,11 @@ def _clean_noncanonical_fasta(
48
48
  for line in fasta_file:
49
49
  if line.startswith(">"):
50
50
  if seq_id and seq_lines:
51
- seq = "".join(seq_lines).translate(str.maketrans("OJUZB", "XLCED"))
51
+ seq = (
52
+ "".join(seq_lines)
53
+ .translate(str.maketrans("OJUZB", "XLCED"))
54
+ .replace("*", "")
55
+ )
52
56
  if seq.strip(): # Only process non-empty sequences
53
57
  sequences[seq_id] = seq
54
58
  if output_path:
@@ -61,7 +65,11 @@ def _clean_noncanonical_fasta(
61
65
 
62
66
  # Process the last sequence
63
67
  if seq_id and seq_lines:
64
- seq = "".join(seq_lines).translate(str.maketrans("OJUZB", "XLCED"))
68
+ seq = (
69
+ "".join(seq_lines)
70
+ .translate(str.maketrans("OJUZB", "XLCED"))
71
+ .replace("*", "")
72
+ )
65
73
  if seq.strip(): # Only process non-empty sequences
66
74
  sequences[seq_id] = seq
67
75
  if output_path:
@@ -92,7 +100,7 @@ def clean_noncanonical_fasta(
92
100
  ):
93
101
  """
94
102
  Read in a FASTA file containing multiple sequences and write the sequences to a new FASTA file.
95
- Replace non-canonical amino acids along the way.
103
+ Replace non-canonical amino acids and remove stop codons along the way.
96
104
 
97
105
  Args:
98
106
  input_path (str): Path to the input FASTA file.
@@ -112,7 +120,7 @@ def clean_noncanonical_fasta_to_dict(
112
120
  ) -> dict[str, str]:
113
121
  """
114
122
  Read in a FASTA file containing multiple sequences and return the sequences as a dictionary.
115
- Replace non-canonical amino acids along the way.
123
+ Replace non-canonical amino acids and remove stop codons along the way.
116
124
 
117
125
  Args:
118
126
  input_path (str): Path to the input FASTA file.
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
5
5
 
6
6
  [project]
7
7
  name = "dayhoff-tools"
8
- version = "1.2.5"
8
+ version = "1.3.1"
9
9
  description = "Common tools for all the repos at Dayhoff Labs"
10
10
  authors = [
11
11
  {name = "Daniel Martin-Alarcon", email = "dma@dayhofflabs.com"}
@@ -21,7 +21,7 @@ dependencies = [
21
21
  "questionary>=2.0.1",
22
22
  "tzdata>=2025.2",
23
23
  ]
24
- requires-python = ">=3.12,<4.0"
24
+ requires-python = ">=3.10,<4.0"
25
25
 
26
26
  [project.optional-dependencies]
27
27
  full = [
@@ -31,7 +31,7 @@ full = [
31
31
  "fair-esm>=2.0.0",
32
32
  "h5py>=3.11.0",
33
33
  "pandas>=2.2.3",
34
- "rdkit>=2025.3.2",
34
+ "rdkit-pypi>=2022.9.5",
35
35
  "sqlalchemy (>=2.0.40,<3.0.0)",
36
36
  "transformers==4.36.2",
37
37
  "sentencepiece>=0.2.0",
@@ -44,7 +44,7 @@ embedders = [
44
44
  "numpy>=1.26.4",
45
45
  "pandas>=2.2.3",
46
46
  "sentencepiece>=0.2.0",
47
- "torch>=2.5.0",
47
+ "torch>=2.4.0",
48
48
  "tqdm>=4.67.1",
49
49
  "transformers>=4.36.2",
50
50
  ]
@@ -79,8 +79,8 @@ dev-dependencies = [
79
79
  "pytest>=8.0.2,<9",
80
80
  "pytest-cov>=4.1.0,<5",
81
81
  "pytest-mock>=3.12.0,<4",
82
- "torch>=2.5.0",
83
- "torchvision>=0.20.0",
82
+ "torch==2.4.0+cu121",
83
+ "torchvision==0.19.0+cu121",
84
84
  ]
85
85
 
86
86
  [project.scripts]