dayhoff-tools 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. dayhoff_tools-1.0.0/PKG-INFO +122 -0
  2. dayhoff_tools-1.0.0/README.md +55 -0
  3. dayhoff_tools-1.0.0/dayhoff_tools/__init__.py +0 -0
  4. dayhoff_tools-1.0.0/dayhoff_tools/chemistry/standardizer.py +297 -0
  5. dayhoff_tools-1.0.0/dayhoff_tools/chemistry/utils.py +63 -0
  6. dayhoff_tools-1.0.0/dayhoff_tools/cli/__init__.py +0 -0
  7. dayhoff_tools-1.0.0/dayhoff_tools/cli/main.py +90 -0
  8. dayhoff_tools-1.0.0/dayhoff_tools/cli/swarm_commands.py +156 -0
  9. dayhoff_tools-1.0.0/dayhoff_tools/cli/utility_commands.py +244 -0
  10. dayhoff_tools-1.0.0/dayhoff_tools/deployment/base.py +434 -0
  11. dayhoff_tools-1.0.0/dayhoff_tools/deployment/deploy_aws.py +458 -0
  12. dayhoff_tools-1.0.0/dayhoff_tools/deployment/deploy_gcp.py +176 -0
  13. dayhoff_tools-1.0.0/dayhoff_tools/deployment/deploy_utils.py +781 -0
  14. dayhoff_tools-1.0.0/dayhoff_tools/deployment/job_runner.py +153 -0
  15. dayhoff_tools-1.0.0/dayhoff_tools/deployment/processors.py +125 -0
  16. dayhoff_tools-1.0.0/dayhoff_tools/deployment/swarm.py +591 -0
  17. dayhoff_tools-1.0.0/dayhoff_tools/embedders.py +893 -0
  18. dayhoff_tools-1.0.0/dayhoff_tools/fasta.py +1082 -0
  19. dayhoff_tools-1.0.0/dayhoff_tools/file_ops.py +261 -0
  20. dayhoff_tools-1.0.0/dayhoff_tools/gcp.py +85 -0
  21. dayhoff_tools-1.0.0/dayhoff_tools/h5.py +542 -0
  22. dayhoff_tools-1.0.0/dayhoff_tools/kegg.py +37 -0
  23. dayhoff_tools-1.0.0/dayhoff_tools/logs.py +27 -0
  24. dayhoff_tools-1.0.0/dayhoff_tools/mmseqs.py +164 -0
  25. dayhoff_tools-1.0.0/dayhoff_tools/sqlite.py +516 -0
  26. dayhoff_tools-1.0.0/dayhoff_tools/structure.py +751 -0
  27. dayhoff_tools-1.0.0/dayhoff_tools/uniprot.py +434 -0
  28. dayhoff_tools-1.0.0/dayhoff_tools/warehouse.py +418 -0
  29. dayhoff_tools-1.0.0/pyproject.toml +86 -0
@@ -0,0 +1,122 @@
1
+ Metadata-Version: 2.3
2
+ Name: dayhoff-tools
3
+ Version: 1.0.0
4
+ Summary: Common tools for all the repos at Dayhoff Labs
5
+ Author: Daniel Martin-Alarcon
6
+ Author-email: dma@dayhofflabs.com
7
+ Requires-Python: >=3.10,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Provides-Extra: all
14
+ Provides-Extra: core
15
+ Provides-Extra: dev
16
+ Requires-Dist: biopython (>=1.84) ; extra == "all"
17
+ Requires-Dist: biopython (>=1.84) ; extra == "core"
18
+ Requires-Dist: black (>=25.1.0) ; extra == "all"
19
+ Requires-Dist: black (>=25.1.0) ; extra == "dev"
20
+ Requires-Dist: boto3 (>=1.36.8) ; extra == "all"
21
+ Requires-Dist: boto3 (>=1.36.8) ; extra == "core"
22
+ Requires-Dist: colorlog (>=6.8.2) ; extra == "all"
23
+ Requires-Dist: colorlog (>=6.8.2) ; extra == "dev"
24
+ Requires-Dist: docker (>=7.1.0) ; extra == "all"
25
+ Requires-Dist: docker (>=7.1.0) ; extra == "core"
26
+ Requires-Dist: dvc (>=3.48.2) ; extra == "all"
27
+ Requires-Dist: dvc (>=3.48.2) ; extra == "dev"
28
+ Requires-Dist: dvc-gs (>=3.0.1) ; extra == "all"
29
+ Requires-Dist: dvc-gs (>=3.0.1) ; extra == "dev"
30
+ Requires-Dist: fair-esm (>=2.0.0) ; extra == "all"
31
+ Requires-Dist: fair-esm (>=2.0.0) ; extra == "core"
32
+ Requires-Dist: firebase-admin (>=6.5.0)
33
+ Requires-Dist: flake8 (>=7.0.0) ; extra == "all"
34
+ Requires-Dist: flake8 (>=7.0.0) ; extra == "dev"
35
+ Requires-Dist: h5py (>=3.11.0) ; extra == "all"
36
+ Requires-Dist: h5py (>=3.11.0) ; extra == "core"
37
+ Requires-Dist: isort (>=5.13.2) ; extra == "all"
38
+ Requires-Dist: isort (>=5.13.2) ; extra == "dev"
39
+ Requires-Dist: numpy (<2.0.0) ; extra == "all"
40
+ Requires-Dist: numpy (<2.0.0) ; extra == "dev"
41
+ Requires-Dist: pandas (>=2.2.3) ; extra == "dev"
42
+ Requires-Dist: pylance (>=0.10.2) ; extra == "all"
43
+ Requires-Dist: pylance (>=0.10.2) ; extra == "dev"
44
+ Requires-Dist: pylint (>=3.1.0) ; extra == "all"
45
+ Requires-Dist: pylint (>=3.1.0) ; extra == "dev"
46
+ Requires-Dist: pytest (>=8.0.2) ; extra == "all"
47
+ Requires-Dist: pytest (>=8.0.2) ; extra == "dev"
48
+ Requires-Dist: pytest-cov (>=4.1.0) ; extra == "all"
49
+ Requires-Dist: pytest-cov (>=4.1.0) ; extra == "dev"
50
+ Requires-Dist: pytest-mock (>=3.12.0) ; extra == "all"
51
+ Requires-Dist: pytest-mock (>=3.12.0) ; extra == "dev"
52
+ Requires-Dist: pyyaml (>=6.0)
53
+ Requires-Dist: questionary (>=2.0.1) ; extra == "all"
54
+ Requires-Dist: questionary (>=2.0.1) ; extra == "core"
55
+ Requires-Dist: rdkit-pypi (>=2022.9.5) ; extra == "all"
56
+ Requires-Dist: rdkit-pypi (>=2022.9.5) ; extra == "dev"
57
+ Requires-Dist: requests (>=2.31.0)
58
+ Requires-Dist: torch (>=1.10.0) ; extra == "all"
59
+ Requires-Dist: torch (>=1.10.0) ; extra == "dev"
60
+ Requires-Dist: torchvision (>=0.11.0) ; extra == "all"
61
+ Requires-Dist: torchvision (>=0.11.0) ; extra == "dev"
62
+ Requires-Dist: transformers (>=4.20.0) ; extra == "all"
63
+ Requires-Dist: transformers (>=4.36.2) ; extra == "dev"
64
+ Requires-Dist: typer (>=0.9.0)
65
+ Description-Content-Type: text/markdown
66
+
67
+ # dayhoff-tools
68
+
69
+ A set of small, sharp tools for everyone at Dayhoff.
70
+
71
+ ## Hosting and Auth
72
+
73
+ This repo uses Poetry to build and publish a package to GCP Artifact Registry, at `https://us-central1-python.pkg.dev/enzyme-discovery/pypirate/`. This depends on a Poetry plugin that's now in the standard chassis setup (`keyrings.google-artifactregistry-auth`), and also on the active service account having read access to Artifact Registry. That much is set up for the standard dev container service account, but may not be available to other intended users.
74
+
75
+ ## CLI commands
76
+
77
+ Unlike all the repos that use dayhoff-tools, here you have to install the package explicitly before using the CLI:
78
+
79
+ ```sh
80
+ poetry install
81
+ ```
82
+
83
+ ## Publish a new version
84
+
85
+ 1. Update version number in `pyproject.toml`
86
+ 2. Run `dh wheel`
87
+ 3. In other repos, run `poetry update dayhoff-tools`
88
+
89
+ If you want to overwrite an existing wheel, you'll have to manually delete it from the `dist` folder and also the [Artifact Registry repo](https://console.cloud.google.com/artifacts/python/enzyme-discovery/us-central1/pypirate/dayhoff-tools).
90
+
91
+ ## Install in other repos
92
+
93
+ Installing this library is tricky because we need GCS authentication and also a couple of plugins to install this with either Pip or Poetry. These have been incorporated into `chassis`, but it's worth noting here what the various parts are. All this info came from this [Medium post](https://medium.com/google-cloud/python-packages-via-gcps-artifact-registry-ce1714f8e7c1).
94
+
95
+ 1. Get a Service Account with read access to Artifact Registry (such as `github-actions`, which I made for this purpose).
96
+ 2. Export the SA key file, copy it to your repo, and make it available through this envvar: `export GOOGLE_APPLICATION_CREDENTIALS=github_actions_key.json`
97
+
98
+ ### ... with Pip
99
+
100
+ 1. `pip install keyring`
101
+ 2. `pip install keyrings.google-artifactregistry-auth`
102
+ 3. `pip install --upgrade dayhoff-tools --index-url https://us-central1-python.pkg.dev/enzyme-discovery/pypirate/simple/`
103
+
104
+ ### ... with Poetry
105
+
106
+ 1. Add this plugin: `poetry self add keyrings.google-artifactregistry-auth`
107
+ 2. Add these sections to `pyproject.toml`. Note that dayhoff-tools is in a separate group `pypirate` that installs separately from the others.
108
+
109
+ ```toml
110
+ [tool.poetry.group.pypirate.dependencies]
111
+ dayhoff-tools = {version = "*", source = "pypirate"}
112
+
113
+ [[tool.poetry.source]]
114
+ name = "pypirate"
115
+ url = "https://us-central1-python.pkg.dev/enzyme-discovery/pypirate/simple/"
116
+ priority = "supplemental"
117
+ ```
118
+
119
+ 3. When building a dev container, or in other circumstances when you can't easily authenticate as above, run `poetry install --without pypirate`.
120
+ 4. Otherwise, just `poetry install`.
121
+ 5. To ensure you have the latest version, run `poetry update dayhoff-tools`.
122
+
@@ -0,0 +1,55 @@
1
+ # dayhoff-tools
2
+
3
+ A set of small, sharp tools for everyone at Dayhoff.
4
+
5
+ ## Hosting and Auth
6
+
7
+ This repo uses Poetry to build and publish a package to GCP Artifact Registry, at `https://us-central1-python.pkg.dev/enzyme-discovery/pypirate/`. This depends on a Poetry plugin that's now in the standard chassis setup (`keyrings.google-artifactregistry-auth`), and also on the active service account having read access to Artifact Registry. That much is set up for the standard dev container service account, but may not be available to other intended users.
8
+
9
+ ## CLI commands
10
+
11
+ Unlike all the repos that use dayhoff-tools, here you have to install the package explicitly before using the CLI:
12
+
13
+ ```sh
14
+ poetry install
15
+ ```
16
+
17
+ ## Publish a new version
18
+
19
+ 1. Update version number in `pyproject.toml`
20
+ 2. Run `dh wheel`
21
+ 3. In other repos, run `poetry update dayhoff-tools`
22
+
23
+ If you want to overwrite an existing wheel, you'll have to manually delete it from the `dist` folder and also the [Artifact Registry repo](https://console.cloud.google.com/artifacts/python/enzyme-discovery/us-central1/pypirate/dayhoff-tools).
24
+
25
+ ## Install in other repos
26
+
27
+ Installing this library is tricky because we need GCS authentication and also a couple of plugins to install this with either Pip or Poetry. These have been incorporated into `chassis`, but it's worth noting here what the various parts are. All this info came from this [Medium post](https://medium.com/google-cloud/python-packages-via-gcps-artifact-registry-ce1714f8e7c1).
28
+
29
+ 1. Get a Service Account with read access to Artifact Registry (such as `github-actions`, which I made for this purpose).
30
+ 2. Export the SA key file, copy it to your repo, and make it available through this envvar: `export GOOGLE_APPLICATION_CREDENTIALS=github_actions_key.json`
31
+
32
+ ### ... with Pip
33
+
34
+ 1. `pip install keyring`
35
+ 2. `pip install keyrings.google-artifactregistry-auth`
36
+ 3. `pip install --upgrade dayhoff-tools --index-url https://us-central1-python.pkg.dev/enzyme-discovery/pypirate/simple/`
37
+
38
+ ### ... with Poetry
39
+
40
+ 1. Add this plugin: `poetry self add keyrings.google-artifactregistry-auth`
41
+ 2. Add these sections to `pyproject.toml`. Note that dayhoff-tools is in a separate group `pypirate` that installs separately from the others.
42
+
43
+ ```toml
44
+ [tool.poetry.group.pypirate.dependencies]
45
+ dayhoff-tools = {version = "*", source = "pypirate"}
46
+
47
+ [[tool.poetry.source]]
48
+ name = "pypirate"
49
+ url = "https://us-central1-python.pkg.dev/enzyme-discovery/pypirate/simple/"
50
+ priority = "supplemental"
51
+ ```
52
+
53
+ 3. When building a dev container, or in other circumstances when you can't easily authenticate as above, run `poetry install --without pypirate`.
54
+ 4. Otherwise, just `poetry install`.
55
+ 5. To ensure you have the latest version, run `poetry update dayhoff-tools`.
File without changes
@@ -0,0 +1,297 @@
1
+ """Normalization classes for molecules and reactions."""
2
+
3
+ from abc import ABC, abstractmethod
4
+
5
+ from rdkit import Chem, rdBase
6
+ from rdkit.Chem import AllChem
7
+ from rdkit.Chem.MolStandardize import rdMolStandardize # type: ignore
8
+
9
+ rdBase.DisableLog("rdApp.error")
10
+ rdBase.DisableLog("rdApp.warning")
11
+
12
+
13
+ def is_smiles_aromatic(smiles: str) -> bool:
14
+ """Check if SMILES string contains aromatic atoms.
15
+
16
+ Args:
17
+ smiles (str): Input SMILES string
18
+
19
+ Returns:
20
+ bool: True if aromatic atoms are found
21
+ """
22
+
23
+ rdmol = Chem.MolFromSmiles(smiles, sanitize=False) # type: ignore
24
+ if rdmol is None:
25
+ raise ValueError("invalid SMILES string")
26
+ return any(at.GetIsAromatic() for at in rdmol.GetAtoms())
27
+
28
+
29
+ class BaseStandardizer(ABC):
30
+ """Abstract base class for normalizing molecules and reactions."""
31
+
32
+ @abstractmethod
33
+ def standardize_molecule(self, smiles: str) -> str:
34
+ """Standardize molecules as SMILES strings.
35
+
36
+ Args:
37
+ smiles (str): Input SMILES string
38
+
39
+ Returns:
40
+ str: Output SMILES string
41
+ """
42
+ pass
43
+
44
+ def standardize_reaction(self, smiles: str) -> str:
45
+ """Standardize reactions as SMILES/SMARTS strings.
46
+
47
+ Args:
48
+ smiles (str): Input SMILES/SMARTS string
49
+
50
+ Returns:
51
+ str: Output SMILES/SMARTS string
52
+ """
53
+ rdrxn = AllChem.ReactionFromSmarts(smiles, useSmiles=True) # type: ignore
54
+ rdrxn1 = AllChem.ChemicalReaction() # type: ignore
55
+ for rdmol in rdrxn.GetReactants():
56
+ smiles1 = Chem.MolToSmiles(rdmol, canonical=False) # type: ignore
57
+ smiles2 = self.standardize_molecule(smiles1)
58
+ rdmol1 = Chem.MolFromSmiles(smiles2, sanitize=False) # type: ignore
59
+ rdfrags = Chem.GetMolFrags(rdmol1, asMols=True, sanitizeFrags=False) # type: ignore
60
+ if len(rdfrags) == 1:
61
+ rdrxn1.AddReactantTemplate(rdmol1)
62
+ else:
63
+ for rdfrag in rdfrags:
64
+ rdrxn1.AddReactantTemplate(rdfrag)
65
+ # rdrxn1.AddReactantTemplate(rdmol1)
66
+ for rdmol in rdrxn.GetProducts():
67
+ smiles1 = Chem.MolToSmiles(rdmol, canonical=False) # type: ignore
68
+ smiles2 = self.standardize_molecule(smiles1)
69
+ rdmol1 = Chem.MolFromSmiles(smiles2, sanitize=False) # type: ignore
70
+ rdfrags = Chem.GetMolFrags(rdmol1, asMols=True, sanitizeFrags=False) # type: ignore
71
+ if len(rdfrags) == 1:
72
+ rdrxn1.AddProductTemplate(rdmol1)
73
+ else:
74
+ for rdfrag in rdfrags:
75
+ rdrxn1.AddProductTemplate(rdfrag)
76
+ return AllChem.ReactionToSmiles(rdrxn1) # type: ignore
77
+
78
+
79
+ class HypervalentStandardizer(BaseStandardizer):
80
+ """Standardizer for converting double to single bonds in hypervalent
81
+ compounds."""
82
+
83
+ def standardize_molecule(self, smiles: str) -> str:
84
+ """Standardize molecules as SMILES strings.
85
+
86
+ Args:
87
+ smiles (str): Input SMILES string
88
+
89
+ Returns:
90
+ str: Output SMILES string
91
+ """
92
+ rdmol = Chem.MolFromSmiles(smiles, sanitize=False) # type: ignore
93
+ if rdmol is None:
94
+ raise ValueError(f"Invalid SMILES input '{smiles}'")
95
+ ret = Chem.SanitizeMol(rdmol, sanitizeOps=Chem.SANITIZE_CLEANUP) # type: ignore
96
+ if ret > 0:
97
+ raise ValueError(f"Sanitization failed for SMILES input '{smiles}'")
98
+ return Chem.MolToSmiles(rdmol) # type: ignore
99
+
100
+
101
+ class RemoveHsStandardizer(BaseStandardizer):
102
+ """Standardizer for removing explicit hydrogens from molecules."""
103
+
104
+ def standardize_molecule(self, smiles: str) -> str:
105
+ """Standardize molecules as SMILES strings.
106
+
107
+ Args:
108
+ smiles (str): Input SMILES string
109
+
110
+ Returns:
111
+ str: Output SMILES string
112
+ """
113
+ rdmol = Chem.MolFromSmiles(smiles, sanitize=False) # type: ignore
114
+ if rdmol is None:
115
+ raise ValueError(f"Invalid SMILES input '{smiles}'")
116
+ rdmol1 = Chem.RemoveHs(rdmol, sanitize=False) # type: ignore
117
+ ret = Chem.SanitizeMol(rdmol1, sanitizeOps=Chem.SANITIZE_FINDRADICALS) # type: ignore
118
+ if ret > 0:
119
+ raise ValueError(f"Sanitization failed for SMILES input '{smiles}'")
120
+ return Chem.MolToSmiles(rdmol1, canonical=True) # type: ignore
121
+
122
+
123
+ class KekulizeStandardizer(BaseStandardizer):
124
+ """Standardizer for kekulizing aromatic compounds."""
125
+
126
+ def standardize_molecule(self, smiles: str) -> str:
127
+ """Standardize molecules as SMILES strings.
128
+
129
+ Args:
130
+ smiles (str): Input SMILES string
131
+
132
+ Returns:
133
+ str: Output SMILES string
134
+ """
135
+ rdmol = Chem.MolFromSmiles(smiles, sanitize=False) # type: ignore
136
+ if rdmol is None:
137
+ raise ValueError(f"Invalid SMILES input '{smiles}'")
138
+ rdmol.UpdatePropertyCache(strict=False)
139
+ Chem.Kekulize(rdmol, clearAromaticFlags=True) # type: ignore
140
+ return Chem.MolToSmiles(rdmol, canonical=True) # type: ignore
141
+
142
+
143
+ class UnchargeStandardizer(BaseStandardizer):
144
+ """Standardizer for removing charges from molecules by protonation/deprotonation."""
145
+
146
+ def __init__(self, *args, **kwargs):
147
+ super().__init__(*args, **kwargs)
148
+ self._uncharger = rdMolStandardize.Uncharger()
149
+
150
+ def standardize_molecule(self, smiles: str) -> str:
151
+ """Standardize molecules as SMILES strings.
152
+
153
+ Args:
154
+ smiles (str): Input SMILES string
155
+
156
+ Returns:
157
+ str: Output SMILES string
158
+ """
159
+ rdmol = Chem.MolFromSmiles(smiles, sanitize=False) # type: ignore
160
+ if rdmol is None:
161
+ raise ValueError(f"Invalid SMILES input '{smiles}'")
162
+ rdmol1 = self._uncharger.uncharge(rdmol)
163
+ return Chem.MolToSmiles(rdmol1) # type: ignore
164
+
165
+ def standardize_reaction(self, smiles: str) -> str:
166
+ """Standardize reactions as SMILES/SMARTS strings.
167
+
168
+ Args:
169
+ smiles (str): Input SMILES/SMARTS string
170
+
171
+ Returns:
172
+ str: Output SMILES/SMARTS string
173
+ """
174
+ rdrxn = AllChem.ReactionFromSmarts(smiles, useSmiles=True) # type: ignore
175
+ rdrxn1 = AllChem.ChemicalReaction() # type: ignore
176
+
177
+ # Remove all explicit protons from the reaction
178
+ reactant_total_charge = 0
179
+ product_total_charge = 0
180
+ for rdmol in rdrxn.GetReactants():
181
+ smiles1 = Chem.MolToSmiles(rdmol, canonical=False) # type: ignore
182
+ if smiles1 != "[H+]":
183
+ smiles2 = self.standardize_molecule(smiles1)
184
+ rdmol1 = Chem.MolFromSmiles(smiles2, sanitize=False) # type: ignore
185
+ reactant_total_charge += Chem.GetFormalCharge(rdmol1) # type: ignore
186
+ rdrxn1.AddReactantTemplate(rdmol1)
187
+ for rdmol in rdrxn.GetProducts():
188
+ smiles1 = Chem.MolToSmiles(rdmol, canonical=False) # type: ignore
189
+ if smiles1 != "[H+]":
190
+ smiles2 = self.standardize_molecule(smiles1)
191
+ rdmol1 = Chem.MolFromSmiles(smiles2, sanitize=False) # type: ignore
192
+ product_total_charge += Chem.GetFormalCharge(rdmol1) # type: ignore
193
+ rdrxn1.AddProductTemplate(rdmol1)
194
+
195
+ # Rebalance reaction with protons
196
+ if reactant_total_charge > product_total_charge:
197
+ rdmol1 = Chem.MolFromSmiles("[H+]", sanitize=False) # type: ignore
198
+ for _ in range(reactant_total_charge - product_total_charge):
199
+ rdrxn1.AddProductTemplate(rdmol1)
200
+ elif product_total_charge > reactant_total_charge:
201
+ rdmol1 = Chem.MolFromSmiles("[H+]", sanitize=False) # type: ignore
202
+ for _ in range(product_total_charge - reactant_total_charge):
203
+ rdrxn1.AddReactantTemplate(rdmol1)
204
+ return AllChem.ReactionToSmiles(rdrxn1) # type: ignore
205
+
206
+
207
+ class MetalStandardizer(BaseStandardizer):
208
+ """Standardizer for disconnecting bonds between metals and N, O, F atoms."""
209
+
210
+ def __init__(self, *args, **kwargs):
211
+ super().__init__(*args, **kwargs)
212
+ self._disconnector = rdMolStandardize.MetalDisconnector()
213
+
214
+ def standardize_molecule(self, smiles: str) -> str:
215
+ """Standardize molecules as SMILES strings.
216
+
217
+ Args:
218
+ smiles (str): Input SMILES string
219
+
220
+ Returns:
221
+ str: Output SMILES string
222
+ """
223
+ rdmol = Chem.MolFromSmiles(smiles, sanitize=False) # type: ignore
224
+ if rdmol is None:
225
+ raise ValueError(f"Invalid SMILES input '{smiles}'")
226
+
227
+ flags = Chem.SANITIZE_ALL ^ Chem.SANITIZE_PROPERTIES # type: ignore
228
+ if not is_smiles_aromatic(smiles):
229
+ flags ^= Chem.SANITIZE_SETAROMATICITY # type: ignore
230
+ Chem.SanitizeMol(rdmol, sanitizeOps=flags) # type: ignore
231
+ rdmol1 = self._disconnector.Disconnect(rdmol)
232
+ return Chem.MolToSmiles(rdmol1) # type: ignore
233
+
234
+
235
+ class Standardizer(BaseStandardizer):
236
+ """Aggregate standardizer for molecules and reactions."""
237
+
238
+ def __init__(
239
+ self,
240
+ *,
241
+ standardize_hypervalent: bool = True,
242
+ standardize_remove_hs: bool = True,
243
+ standardize_kekulize: bool = False,
244
+ standardize_uncharge: bool = False,
245
+ standardize_metals: bool = True,
246
+ ):
247
+ """Initialize the standardizer.
248
+
249
+ Args:
250
+ standardize_hypervalent (bool): Convert double to single bonds in
251
+ hypervalent compounds
252
+ standardize_remove_hs (bool): Remove explicit hydrogen atoms
253
+ standardize_kekulize (bool): Kekulize aromatic compounds
254
+ standardize_uncharge (bool): Remove charges from molecules by
255
+ protonation/deprotonation
256
+ standardize_metals (bool): Disconnect bonds between metals and
257
+ N, O, F atoms
258
+ """
259
+ self._standardizers = []
260
+ if standardize_hypervalent:
261
+ self._standardizers.append(HypervalentStandardizer())
262
+ if standardize_remove_hs:
263
+ self._standardizers.append(RemoveHsStandardizer())
264
+ if standardize_kekulize:
265
+ self._standardizers.append(KekulizeStandardizer())
266
+ if standardize_uncharge:
267
+ self._standardizers.append(UnchargeStandardizer())
268
+ if standardize_metals:
269
+ self._standardizers.append(MetalStandardizer())
270
+
271
+ def standardize_molecule(self, smiles: str) -> str:
272
+ """Standardize molecules as SMILES strings.
273
+
274
+ Args:
275
+ smiles (str): Input SMILES string
276
+
277
+ Returns:
278
+ str: Output SMILES string
279
+ """
280
+ smiles1 = smiles
281
+ for standardizer in self._standardizers:
282
+ smiles1 = standardizer.standardize_molecule(smiles1)
283
+ return smiles1
284
+
285
+ def standardize_reaction(self, smiles: str) -> str:
286
+ """Standardize reactions as SMILES/SMARTS strings.
287
+
288
+ Args:
289
+ smarts (str): Input SMILES/SMARTS string
290
+
291
+ Returns:
292
+ str: Output SMILES/SMARTS string
293
+ """
294
+ smiles1 = smiles
295
+ for standardizer in self._standardizers:
296
+ smiles1 = standardizer.standardize_reaction(smiles1)
297
+ return smiles1
@@ -0,0 +1,63 @@
1
+ """Chemistry utils for refinery."""
2
+
3
+ from typing import Optional
4
+
5
+ from rdkit import Chem, rdBase
6
+
7
+ __all__ = ["generate_inchikey"]
8
+
9
+ rdBase.DisableLog("rdApp.warning")
10
+ rdBase.DisableLog("rdApp.error")
11
+
12
+
13
+ def generate_inchikey(
14
+ s: str, rgroup_smiles: Optional[str] = None, ignore_direction: bool = False
15
+ ) -> str:
16
+ """Generate INChI key from SMILES or reaction SMILES string.
17
+ Passes exceptions to the caller.
18
+
19
+ Args:
20
+ s (str): SMILES or reaction SMILES
21
+ rgroup_smiles (Optional[str]): Replacement SMILES string for R groups (*).
22
+ If None, R groups raise a ValueError.
23
+ ignore_direction (bool, optional): Ignore direction in reaction SMILES.
24
+ Has no effect on SMILES. Defaults to False.
25
+
26
+ Returns:
27
+ str: INChI key of molecule (products>>substrates)
28
+ """
29
+
30
+ if ">>" in s:
31
+ reactants, products = s.split(">>", maxsplit=1)
32
+ reactants_inchikey = generate_inchikey(reactants, rgroup_smiles=rgroup_smiles)
33
+ products_inchikey = generate_inchikey(products, rgroup_smiles=rgroup_smiles)
34
+ if ignore_direction and reactants_inchikey > products_inchikey:
35
+ reaction_inchikey = products_inchikey + ">>" + reactants_inchikey
36
+ else:
37
+ reaction_inchikey = reactants_inchikey + ">>" + products_inchikey
38
+ return reaction_inchikey
39
+ elif "*" in s:
40
+ if rgroup_smiles is not None:
41
+ replaced_smiles = s.replace("*", rgroup_smiles)
42
+ if "()" in replaced_smiles:
43
+ replaced_smiles = replaced_smiles.replace("()", "")
44
+ return generate_inchikey(replaced_smiles)
45
+ else:
46
+ raise ValueError(
47
+ f"Found R (*) groups in SMILES string {s}. Set rgroup_smiles to replace."
48
+ )
49
+ elif s != "":
50
+ rdmol = None
51
+ try:
52
+ rdmol = Chem.MolFromSmiles(s, sanitize=True) # type: ignore
53
+ except Exception:
54
+ pass
55
+ if rdmol is None:
56
+ raise ValueError(f"Invalid SMILES string {s}")
57
+ inchikey = Chem.MolToInchiKey(rdmol)
58
+ if inchikey != "":
59
+ return inchikey
60
+ else:
61
+ raise ValueError("Could not generate INChI key")
62
+ else:
63
+ raise ValueError("Empty SMILES string")
File without changes
@@ -0,0 +1,90 @@
1
+ """Entry file for the CLI, which aggregates and aliases all commands."""
2
+
3
+ import typer
4
+ from dayhoff_tools.cli.utility_commands import (
5
+ add_to_warehouse_typer,
6
+ build_and_upload_wheel,
7
+ delete_local_branch,
8
+ get_ancestry,
9
+ import_from_warehouse_typer,
10
+ rebuild_devcontainer_file,
11
+ test_github_actions_locally,
12
+ )
13
+
14
+ app = typer.Typer()
15
+
16
+ # Utility commands
17
+ app.command("clean")(delete_local_branch)
18
+ app.command("gha")(test_github_actions_locally)
19
+ app.command("rebuild")(rebuild_devcontainer_file)
20
+ app.command("wadd")(add_to_warehouse_typer)
21
+ app.command("wancestry")(get_ancestry)
22
+ app.command("wheel")(build_and_upload_wheel)
23
+ app.command("wimport")(import_from_warehouse_typer)
24
+
25
+
26
+ # Use lazy loading for slow-loading swarm commands
27
+ @app.command("reset")
28
+ def reset_wrapper(
29
+ firestore_collection: str = typer.Option(prompt=True),
30
+ old_status: str = typer.Option(default="failed", prompt=True),
31
+ new_status: str = typer.Option(default="available", prompt=True),
32
+ delete_old: bool = typer.Option(default=True, prompt=True),
33
+ ):
34
+ """Find all the documents in the database with a given status, and
35
+ make a new document with the same name and a new status."""
36
+ from dayhoff_tools.cli.swarm_commands import reset_failed_cards
37
+
38
+ reset_failed_cards(firestore_collection, old_status, new_status, delete_old)
39
+
40
+
41
+ @app.command("zombie")
42
+ def zombie_wrapper(
43
+ firestore_collection: str = typer.Option(prompt=True),
44
+ delete_old: bool = typer.Option(default=True, prompt=True),
45
+ minutes_threshold: int = typer.Option(default=60, prompt=True),
46
+ ):
47
+ """Find all the documents in the database with status "assigned", and "last_updated"
48
+ older than a specified threshold, and make a new "available" document for them."""
49
+ from dayhoff_tools.cli.swarm_commands import reset_zombie_cards
50
+
51
+ reset_zombie_cards(firestore_collection, delete_old, minutes_threshold)
52
+
53
+
54
+ @app.command("status")
55
+ def status_wrapper(
56
+ firestore_collection: str = typer.Argument(),
57
+ ):
58
+ """Count the various statuses of items in a given collection."""
59
+ from dayhoff_tools.cli.swarm_commands import get_firestore_collection_status
60
+
61
+ get_firestore_collection_status(firestore_collection)
62
+
63
+
64
+ # Deployment commands - use lazy loading but preserve argument passing
65
+ @app.command("deploy")
66
+ def deploy_command(
67
+ mode: str = typer.Argument(help="Deployment mode. Options: local, shell, batch"),
68
+ config_path: str = typer.Argument(help="Path to the YAML configuration file"),
69
+ ):
70
+ """Unified deployment command."""
71
+ from dayhoff_tools.deployment.base import deploy
72
+
73
+ deploy(mode, config_path)
74
+
75
+
76
+ @app.command("job")
77
+ def run_job_command(
78
+ mode: str = typer.Argument(
79
+ default="setup_and_execute",
80
+ help="Mode to run in: setup (setup only), execute (execute only), or setup_and_execute (both)",
81
+ )
82
+ ):
83
+ """Run a job."""
84
+ from dayhoff_tools.deployment.job_runner import run_job
85
+
86
+ run_job(mode)
87
+
88
+
89
+ if __name__ == "__main__":
90
+ app()