dayhoff-tools 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dayhoff_tools/__init__.py +0 -0
- dayhoff_tools/chemistry/standardizer.py +297 -0
- dayhoff_tools/chemistry/utils.py +63 -0
- dayhoff_tools/cli/__init__.py +0 -0
- dayhoff_tools/cli/main.py +90 -0
- dayhoff_tools/cli/swarm_commands.py +156 -0
- dayhoff_tools/cli/utility_commands.py +244 -0
- dayhoff_tools/deployment/base.py +434 -0
- dayhoff_tools/deployment/deploy_aws.py +458 -0
- dayhoff_tools/deployment/deploy_gcp.py +176 -0
- dayhoff_tools/deployment/deploy_utils.py +781 -0
- dayhoff_tools/deployment/job_runner.py +153 -0
- dayhoff_tools/deployment/processors.py +125 -0
- dayhoff_tools/deployment/swarm.py +591 -0
- dayhoff_tools/embedders.py +893 -0
- dayhoff_tools/fasta.py +1082 -0
- dayhoff_tools/file_ops.py +261 -0
- dayhoff_tools/gcp.py +85 -0
- dayhoff_tools/h5.py +542 -0
- dayhoff_tools/kegg.py +37 -0
- dayhoff_tools/logs.py +27 -0
- dayhoff_tools/mmseqs.py +164 -0
- dayhoff_tools/sqlite.py +516 -0
- dayhoff_tools/structure.py +751 -0
- dayhoff_tools/uniprot.py +434 -0
- dayhoff_tools/warehouse.py +418 -0
- dayhoff_tools-1.0.0.dist-info/METADATA +122 -0
- dayhoff_tools-1.0.0.dist-info/RECORD +30 -0
- dayhoff_tools-1.0.0.dist-info/WHEEL +4 -0
- dayhoff_tools-1.0.0.dist-info/entry_points.txt +3 -0
File without changes
|
@@ -0,0 +1,297 @@
|
|
1
|
+
"""Normalization classes for molecules and reactions."""
|
2
|
+
|
3
|
+
from abc import ABC, abstractmethod
|
4
|
+
|
5
|
+
from rdkit import Chem, rdBase
|
6
|
+
from rdkit.Chem import AllChem
|
7
|
+
from rdkit.Chem.MolStandardize import rdMolStandardize # type: ignore
|
8
|
+
|
9
|
+
rdBase.DisableLog("rdApp.error")
|
10
|
+
rdBase.DisableLog("rdApp.warning")
|
11
|
+
|
12
|
+
|
13
|
+
def is_smiles_aromatic(smiles: str) -> bool:
|
14
|
+
"""Check if SMILES string contains aromatic atoms.
|
15
|
+
|
16
|
+
Args:
|
17
|
+
smiles (str): Input SMILES string
|
18
|
+
|
19
|
+
Returns:
|
20
|
+
bool: True if aromatic atoms are found
|
21
|
+
"""
|
22
|
+
|
23
|
+
rdmol = Chem.MolFromSmiles(smiles, sanitize=False) # type: ignore
|
24
|
+
if rdmol is None:
|
25
|
+
raise ValueError("invalid SMILES string")
|
26
|
+
return any(at.GetIsAromatic() for at in rdmol.GetAtoms())
|
27
|
+
|
28
|
+
|
29
|
+
class BaseStandardizer(ABC):
|
30
|
+
"""Abstract base class for normalizing molecules and reactions."""
|
31
|
+
|
32
|
+
@abstractmethod
|
33
|
+
def standardize_molecule(self, smiles: str) -> str:
|
34
|
+
"""Standardize molecules as SMILES strings.
|
35
|
+
|
36
|
+
Args:
|
37
|
+
smiles (str): Input SMILES string
|
38
|
+
|
39
|
+
Returns:
|
40
|
+
str: Output SMILES string
|
41
|
+
"""
|
42
|
+
pass
|
43
|
+
|
44
|
+
def standardize_reaction(self, smiles: str) -> str:
|
45
|
+
"""Standardize reactions as SMILES/SMARTS strings.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
smiles (str): Input SMILES/SMARTS string
|
49
|
+
|
50
|
+
Returns:
|
51
|
+
str: Output SMILES/SMARTS string
|
52
|
+
"""
|
53
|
+
rdrxn = AllChem.ReactionFromSmarts(smiles, useSmiles=True) # type: ignore
|
54
|
+
rdrxn1 = AllChem.ChemicalReaction() # type: ignore
|
55
|
+
for rdmol in rdrxn.GetReactants():
|
56
|
+
smiles1 = Chem.MolToSmiles(rdmol, canonical=False) # type: ignore
|
57
|
+
smiles2 = self.standardize_molecule(smiles1)
|
58
|
+
rdmol1 = Chem.MolFromSmiles(smiles2, sanitize=False) # type: ignore
|
59
|
+
rdfrags = Chem.GetMolFrags(rdmol1, asMols=True, sanitizeFrags=False) # type: ignore
|
60
|
+
if len(rdfrags) == 1:
|
61
|
+
rdrxn1.AddReactantTemplate(rdmol1)
|
62
|
+
else:
|
63
|
+
for rdfrag in rdfrags:
|
64
|
+
rdrxn1.AddReactantTemplate(rdfrag)
|
65
|
+
# rdrxn1.AddReactantTemplate(rdmol1)
|
66
|
+
for rdmol in rdrxn.GetProducts():
|
67
|
+
smiles1 = Chem.MolToSmiles(rdmol, canonical=False) # type: ignore
|
68
|
+
smiles2 = self.standardize_molecule(smiles1)
|
69
|
+
rdmol1 = Chem.MolFromSmiles(smiles2, sanitize=False) # type: ignore
|
70
|
+
rdfrags = Chem.GetMolFrags(rdmol1, asMols=True, sanitizeFrags=False) # type: ignore
|
71
|
+
if len(rdfrags) == 1:
|
72
|
+
rdrxn1.AddProductTemplate(rdmol1)
|
73
|
+
else:
|
74
|
+
for rdfrag in rdfrags:
|
75
|
+
rdrxn1.AddProductTemplate(rdfrag)
|
76
|
+
return AllChem.ReactionToSmiles(rdrxn1) # type: ignore
|
77
|
+
|
78
|
+
|
79
|
+
class HypervalentStandardizer(BaseStandardizer):
|
80
|
+
"""Standardizer for converting double to single bonds in hypervalent
|
81
|
+
compounds."""
|
82
|
+
|
83
|
+
def standardize_molecule(self, smiles: str) -> str:
|
84
|
+
"""Standardize molecules as SMILES strings.
|
85
|
+
|
86
|
+
Args:
|
87
|
+
smiles (str): Input SMILES string
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
str: Output SMILES string
|
91
|
+
"""
|
92
|
+
rdmol = Chem.MolFromSmiles(smiles, sanitize=False) # type: ignore
|
93
|
+
if rdmol is None:
|
94
|
+
raise ValueError(f"Invalid SMILES input '{smiles}'")
|
95
|
+
ret = Chem.SanitizeMol(rdmol, sanitizeOps=Chem.SANITIZE_CLEANUP) # type: ignore
|
96
|
+
if ret > 0:
|
97
|
+
raise ValueError(f"Sanitization failed for SMILES input '{smiles}'")
|
98
|
+
return Chem.MolToSmiles(rdmol) # type: ignore
|
99
|
+
|
100
|
+
|
101
|
+
class RemoveHsStandardizer(BaseStandardizer):
|
102
|
+
"""Standardizer for removing explicit hydrogens from molecules."""
|
103
|
+
|
104
|
+
def standardize_molecule(self, smiles: str) -> str:
|
105
|
+
"""Standardize molecules as SMILES strings.
|
106
|
+
|
107
|
+
Args:
|
108
|
+
smiles (str): Input SMILES string
|
109
|
+
|
110
|
+
Returns:
|
111
|
+
str: Output SMILES string
|
112
|
+
"""
|
113
|
+
rdmol = Chem.MolFromSmiles(smiles, sanitize=False) # type: ignore
|
114
|
+
if rdmol is None:
|
115
|
+
raise ValueError(f"Invalid SMILES input '{smiles}'")
|
116
|
+
rdmol1 = Chem.RemoveHs(rdmol, sanitize=False) # type: ignore
|
117
|
+
ret = Chem.SanitizeMol(rdmol1, sanitizeOps=Chem.SANITIZE_FINDRADICALS) # type: ignore
|
118
|
+
if ret > 0:
|
119
|
+
raise ValueError(f"Sanitization failed for SMILES input '{smiles}'")
|
120
|
+
return Chem.MolToSmiles(rdmol1, canonical=True) # type: ignore
|
121
|
+
|
122
|
+
|
123
|
+
class KekulizeStandardizer(BaseStandardizer):
|
124
|
+
"""Standardizer for kekulizing aromatic compounds."""
|
125
|
+
|
126
|
+
def standardize_molecule(self, smiles: str) -> str:
|
127
|
+
"""Standardize molecules as SMILES strings.
|
128
|
+
|
129
|
+
Args:
|
130
|
+
smiles (str): Input SMILES string
|
131
|
+
|
132
|
+
Returns:
|
133
|
+
str: Output SMILES string
|
134
|
+
"""
|
135
|
+
rdmol = Chem.MolFromSmiles(smiles, sanitize=False) # type: ignore
|
136
|
+
if rdmol is None:
|
137
|
+
raise ValueError(f"Invalid SMILES input '{smiles}'")
|
138
|
+
rdmol.UpdatePropertyCache(strict=False)
|
139
|
+
Chem.Kekulize(rdmol, clearAromaticFlags=True) # type: ignore
|
140
|
+
return Chem.MolToSmiles(rdmol, canonical=True) # type: ignore
|
141
|
+
|
142
|
+
|
143
|
+
class UnchargeStandardizer(BaseStandardizer):
|
144
|
+
"""Standardizer for removing charges from molecules by protonation/deprotonation."""
|
145
|
+
|
146
|
+
def __init__(self, *args, **kwargs):
|
147
|
+
super().__init__(*args, **kwargs)
|
148
|
+
self._uncharger = rdMolStandardize.Uncharger()
|
149
|
+
|
150
|
+
def standardize_molecule(self, smiles: str) -> str:
|
151
|
+
"""Standardize molecules as SMILES strings.
|
152
|
+
|
153
|
+
Args:
|
154
|
+
smiles (str): Input SMILES string
|
155
|
+
|
156
|
+
Returns:
|
157
|
+
str: Output SMILES string
|
158
|
+
"""
|
159
|
+
rdmol = Chem.MolFromSmiles(smiles, sanitize=False) # type: ignore
|
160
|
+
if rdmol is None:
|
161
|
+
raise ValueError(f"Invalid SMILES input '{smiles}'")
|
162
|
+
rdmol1 = self._uncharger.uncharge(rdmol)
|
163
|
+
return Chem.MolToSmiles(rdmol1) # type: ignore
|
164
|
+
|
165
|
+
def standardize_reaction(self, smiles: str) -> str:
|
166
|
+
"""Standardize reactions as SMILES/SMARTS strings.
|
167
|
+
|
168
|
+
Args:
|
169
|
+
smiles (str): Input SMILES/SMARTS string
|
170
|
+
|
171
|
+
Returns:
|
172
|
+
str: Output SMILES/SMARTS string
|
173
|
+
"""
|
174
|
+
rdrxn = AllChem.ReactionFromSmarts(smiles, useSmiles=True) # type: ignore
|
175
|
+
rdrxn1 = AllChem.ChemicalReaction() # type: ignore
|
176
|
+
|
177
|
+
# Remove all explicit protons from the reaction
|
178
|
+
reactant_total_charge = 0
|
179
|
+
product_total_charge = 0
|
180
|
+
for rdmol in rdrxn.GetReactants():
|
181
|
+
smiles1 = Chem.MolToSmiles(rdmol, canonical=False) # type: ignore
|
182
|
+
if smiles1 != "[H+]":
|
183
|
+
smiles2 = self.standardize_molecule(smiles1)
|
184
|
+
rdmol1 = Chem.MolFromSmiles(smiles2, sanitize=False) # type: ignore
|
185
|
+
reactant_total_charge += Chem.GetFormalCharge(rdmol1) # type: ignore
|
186
|
+
rdrxn1.AddReactantTemplate(rdmol1)
|
187
|
+
for rdmol in rdrxn.GetProducts():
|
188
|
+
smiles1 = Chem.MolToSmiles(rdmol, canonical=False) # type: ignore
|
189
|
+
if smiles1 != "[H+]":
|
190
|
+
smiles2 = self.standardize_molecule(smiles1)
|
191
|
+
rdmol1 = Chem.MolFromSmiles(smiles2, sanitize=False) # type: ignore
|
192
|
+
product_total_charge += Chem.GetFormalCharge(rdmol1) # type: ignore
|
193
|
+
rdrxn1.AddProductTemplate(rdmol1)
|
194
|
+
|
195
|
+
# Rebalance reaction with protons
|
196
|
+
if reactant_total_charge > product_total_charge:
|
197
|
+
rdmol1 = Chem.MolFromSmiles("[H+]", sanitize=False) # type: ignore
|
198
|
+
for _ in range(reactant_total_charge - product_total_charge):
|
199
|
+
rdrxn1.AddProductTemplate(rdmol1)
|
200
|
+
elif product_total_charge > reactant_total_charge:
|
201
|
+
rdmol1 = Chem.MolFromSmiles("[H+]", sanitize=False) # type: ignore
|
202
|
+
for _ in range(product_total_charge - reactant_total_charge):
|
203
|
+
rdrxn1.AddReactantTemplate(rdmol1)
|
204
|
+
return AllChem.ReactionToSmiles(rdrxn1) # type: ignore
|
205
|
+
|
206
|
+
|
207
|
+
class MetalStandardizer(BaseStandardizer):
|
208
|
+
"""Standardizer for disconnecting bonds between metals and N, O, F atoms."""
|
209
|
+
|
210
|
+
def __init__(self, *args, **kwargs):
|
211
|
+
super().__init__(*args, **kwargs)
|
212
|
+
self._disconnector = rdMolStandardize.MetalDisconnector()
|
213
|
+
|
214
|
+
def standardize_molecule(self, smiles: str) -> str:
|
215
|
+
"""Standardize molecules as SMILES strings.
|
216
|
+
|
217
|
+
Args:
|
218
|
+
smiles (str): Input SMILES string
|
219
|
+
|
220
|
+
Returns:
|
221
|
+
str: Output SMILES string
|
222
|
+
"""
|
223
|
+
rdmol = Chem.MolFromSmiles(smiles, sanitize=False) # type: ignore
|
224
|
+
if rdmol is None:
|
225
|
+
raise ValueError(f"Invalid SMILES input '{smiles}'")
|
226
|
+
|
227
|
+
flags = Chem.SANITIZE_ALL ^ Chem.SANITIZE_PROPERTIES # type: ignore
|
228
|
+
if not is_smiles_aromatic(smiles):
|
229
|
+
flags ^= Chem.SANITIZE_SETAROMATICITY # type: ignore
|
230
|
+
Chem.SanitizeMol(rdmol, sanitizeOps=flags) # type: ignore
|
231
|
+
rdmol1 = self._disconnector.Disconnect(rdmol)
|
232
|
+
return Chem.MolToSmiles(rdmol1) # type: ignore
|
233
|
+
|
234
|
+
|
235
|
+
class Standardizer(BaseStandardizer):
|
236
|
+
"""Aggregate standardizer for molecules and reactions."""
|
237
|
+
|
238
|
+
def __init__(
|
239
|
+
self,
|
240
|
+
*,
|
241
|
+
standardize_hypervalent: bool = True,
|
242
|
+
standardize_remove_hs: bool = True,
|
243
|
+
standardize_kekulize: bool = False,
|
244
|
+
standardize_uncharge: bool = False,
|
245
|
+
standardize_metals: bool = True,
|
246
|
+
):
|
247
|
+
"""Initialize the standardizer.
|
248
|
+
|
249
|
+
Args:
|
250
|
+
standardize_hypervalent (bool): Convert double to single bonds in
|
251
|
+
hypervalent compounds
|
252
|
+
standardize_remove_hs (bool): Remove explicit hydrogen atoms
|
253
|
+
standardize_kekulize (bool): Kekulize aromatic compounds
|
254
|
+
standardize_uncharge (bool): Remove charges from molecules by
|
255
|
+
protonation/deprotonation
|
256
|
+
standardize_metals (bool): Disconnect bonds between metals and
|
257
|
+
N, O, F atoms
|
258
|
+
"""
|
259
|
+
self._standardizers = []
|
260
|
+
if standardize_hypervalent:
|
261
|
+
self._standardizers.append(HypervalentStandardizer())
|
262
|
+
if standardize_remove_hs:
|
263
|
+
self._standardizers.append(RemoveHsStandardizer())
|
264
|
+
if standardize_kekulize:
|
265
|
+
self._standardizers.append(KekulizeStandardizer())
|
266
|
+
if standardize_uncharge:
|
267
|
+
self._standardizers.append(UnchargeStandardizer())
|
268
|
+
if standardize_metals:
|
269
|
+
self._standardizers.append(MetalStandardizer())
|
270
|
+
|
271
|
+
def standardize_molecule(self, smiles: str) -> str:
|
272
|
+
"""Standardize molecules as SMILES strings.
|
273
|
+
|
274
|
+
Args:
|
275
|
+
smiles (str): Input SMILES string
|
276
|
+
|
277
|
+
Returns:
|
278
|
+
str: Output SMILES string
|
279
|
+
"""
|
280
|
+
smiles1 = smiles
|
281
|
+
for standardizer in self._standardizers:
|
282
|
+
smiles1 = standardizer.standardize_molecule(smiles1)
|
283
|
+
return smiles1
|
284
|
+
|
285
|
+
def standardize_reaction(self, smiles: str) -> str:
|
286
|
+
"""Standardize reactions as SMILES/SMARTS strings.
|
287
|
+
|
288
|
+
Args:
|
289
|
+
smarts (str): Input SMILES/SMARTS string
|
290
|
+
|
291
|
+
Returns:
|
292
|
+
str: Output SMILES/SMARTS string
|
293
|
+
"""
|
294
|
+
smiles1 = smiles
|
295
|
+
for standardizer in self._standardizers:
|
296
|
+
smiles1 = standardizer.standardize_reaction(smiles1)
|
297
|
+
return smiles1
|
@@ -0,0 +1,63 @@
|
|
1
|
+
"""Chemistry utils for refinery."""
|
2
|
+
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from rdkit import Chem, rdBase
|
6
|
+
|
7
|
+
__all__ = ["generate_inchikey"]
|
8
|
+
|
9
|
+
rdBase.DisableLog("rdApp.warning")
|
10
|
+
rdBase.DisableLog("rdApp.error")
|
11
|
+
|
12
|
+
|
13
|
+
def generate_inchikey(
|
14
|
+
s: str, rgroup_smiles: Optional[str] = None, ignore_direction: bool = False
|
15
|
+
) -> str:
|
16
|
+
"""Generate INChI key from SMILES or reaction SMILES string.
|
17
|
+
Passes exceptions to the caller.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
s (str): SMILES or reaction SMILES
|
21
|
+
rgroup_smiles (Optional[str]): Replacement SMILES string for R groups (*).
|
22
|
+
If None, R groups raise a ValueError.
|
23
|
+
ignore_direction (bool, optional): Ignore direction in reaction SMILES.
|
24
|
+
Has no effect on SMILES. Defaults to False.
|
25
|
+
|
26
|
+
Returns:
|
27
|
+
str: INChI key of molecule (products>>substrates)
|
28
|
+
"""
|
29
|
+
|
30
|
+
if ">>" in s:
|
31
|
+
reactants, products = s.split(">>", maxsplit=1)
|
32
|
+
reactants_inchikey = generate_inchikey(reactants, rgroup_smiles=rgroup_smiles)
|
33
|
+
products_inchikey = generate_inchikey(products, rgroup_smiles=rgroup_smiles)
|
34
|
+
if ignore_direction and reactants_inchikey > products_inchikey:
|
35
|
+
reaction_inchikey = products_inchikey + ">>" + reactants_inchikey
|
36
|
+
else:
|
37
|
+
reaction_inchikey = reactants_inchikey + ">>" + products_inchikey
|
38
|
+
return reaction_inchikey
|
39
|
+
elif "*" in s:
|
40
|
+
if rgroup_smiles is not None:
|
41
|
+
replaced_smiles = s.replace("*", rgroup_smiles)
|
42
|
+
if "()" in replaced_smiles:
|
43
|
+
replaced_smiles = replaced_smiles.replace("()", "")
|
44
|
+
return generate_inchikey(replaced_smiles)
|
45
|
+
else:
|
46
|
+
raise ValueError(
|
47
|
+
f"Found R (*) groups in SMILES string {s}. Set rgroup_smiles to replace."
|
48
|
+
)
|
49
|
+
elif s != "":
|
50
|
+
rdmol = None
|
51
|
+
try:
|
52
|
+
rdmol = Chem.MolFromSmiles(s, sanitize=True) # type: ignore
|
53
|
+
except Exception:
|
54
|
+
pass
|
55
|
+
if rdmol is None:
|
56
|
+
raise ValueError(f"Invalid SMILES string {s}")
|
57
|
+
inchikey = Chem.MolToInchiKey(rdmol)
|
58
|
+
if inchikey != "":
|
59
|
+
return inchikey
|
60
|
+
else:
|
61
|
+
raise ValueError("Could not generate INChI key")
|
62
|
+
else:
|
63
|
+
raise ValueError("Empty SMILES string")
|
File without changes
|
@@ -0,0 +1,90 @@
|
|
1
|
+
"""Entry file for the CLI, which aggregates and aliases all commands."""
|
2
|
+
|
3
|
+
import typer
|
4
|
+
from dayhoff_tools.cli.utility_commands import (
|
5
|
+
add_to_warehouse_typer,
|
6
|
+
build_and_upload_wheel,
|
7
|
+
delete_local_branch,
|
8
|
+
get_ancestry,
|
9
|
+
import_from_warehouse_typer,
|
10
|
+
rebuild_devcontainer_file,
|
11
|
+
test_github_actions_locally,
|
12
|
+
)
|
13
|
+
|
14
|
+
app = typer.Typer()
|
15
|
+
|
16
|
+
# Utility commands
|
17
|
+
app.command("clean")(delete_local_branch)
|
18
|
+
app.command("gha")(test_github_actions_locally)
|
19
|
+
app.command("rebuild")(rebuild_devcontainer_file)
|
20
|
+
app.command("wadd")(add_to_warehouse_typer)
|
21
|
+
app.command("wancestry")(get_ancestry)
|
22
|
+
app.command("wheel")(build_and_upload_wheel)
|
23
|
+
app.command("wimport")(import_from_warehouse_typer)
|
24
|
+
|
25
|
+
|
26
|
+
# Use lazy loading for slow-loading swarm commands
|
27
|
+
@app.command("reset")
|
28
|
+
def reset_wrapper(
|
29
|
+
firestore_collection: str = typer.Option(prompt=True),
|
30
|
+
old_status: str = typer.Option(default="failed", prompt=True),
|
31
|
+
new_status: str = typer.Option(default="available", prompt=True),
|
32
|
+
delete_old: bool = typer.Option(default=True, prompt=True),
|
33
|
+
):
|
34
|
+
"""Find all the documents in the database with a given status, and
|
35
|
+
make a new document with the same name and a new status."""
|
36
|
+
from dayhoff_tools.cli.swarm_commands import reset_failed_cards
|
37
|
+
|
38
|
+
reset_failed_cards(firestore_collection, old_status, new_status, delete_old)
|
39
|
+
|
40
|
+
|
41
|
+
@app.command("zombie")
|
42
|
+
def zombie_wrapper(
|
43
|
+
firestore_collection: str = typer.Option(prompt=True),
|
44
|
+
delete_old: bool = typer.Option(default=True, prompt=True),
|
45
|
+
minutes_threshold: int = typer.Option(default=60, prompt=True),
|
46
|
+
):
|
47
|
+
"""Find all the documents in the database with status "assigned", and "last_updated"
|
48
|
+
older than a specified threshold, and make a new "available" document for them."""
|
49
|
+
from dayhoff_tools.cli.swarm_commands import reset_zombie_cards
|
50
|
+
|
51
|
+
reset_zombie_cards(firestore_collection, delete_old, minutes_threshold)
|
52
|
+
|
53
|
+
|
54
|
+
@app.command("status")
|
55
|
+
def status_wrapper(
|
56
|
+
firestore_collection: str = typer.Argument(),
|
57
|
+
):
|
58
|
+
"""Count the various statuses of items in a given collection."""
|
59
|
+
from dayhoff_tools.cli.swarm_commands import get_firestore_collection_status
|
60
|
+
|
61
|
+
get_firestore_collection_status(firestore_collection)
|
62
|
+
|
63
|
+
|
64
|
+
# Deployment commands - use lazy loading but preserve argument passing
|
65
|
+
@app.command("deploy")
|
66
|
+
def deploy_command(
|
67
|
+
mode: str = typer.Argument(help="Deployment mode. Options: local, shell, batch"),
|
68
|
+
config_path: str = typer.Argument(help="Path to the YAML configuration file"),
|
69
|
+
):
|
70
|
+
"""Unified deployment command."""
|
71
|
+
from dayhoff_tools.deployment.base import deploy
|
72
|
+
|
73
|
+
deploy(mode, config_path)
|
74
|
+
|
75
|
+
|
76
|
+
@app.command("job")
|
77
|
+
def run_job_command(
|
78
|
+
mode: str = typer.Argument(
|
79
|
+
default="setup_and_execute",
|
80
|
+
help="Mode to run in: setup (setup only), execute (execute only), or setup_and_execute (both)",
|
81
|
+
)
|
82
|
+
):
|
83
|
+
"""Run a job."""
|
84
|
+
from dayhoff_tools.deployment.job_runner import run_job
|
85
|
+
|
86
|
+
run_job(mode)
|
87
|
+
|
88
|
+
|
89
|
+
if __name__ == "__main__":
|
90
|
+
app()
|
@@ -0,0 +1,156 @@
|
|
1
|
+
"""CLI commands specific for this repo."""
|
2
|
+
|
3
|
+
from datetime import datetime, timedelta
|
4
|
+
from zoneinfo import ZoneInfo
|
5
|
+
|
6
|
+
import typer
|
7
|
+
|
8
|
+
|
9
|
+
def get_firestore_collection_status(
|
10
|
+
firestore_collection: str = typer.Argument(),
|
11
|
+
) -> None:
|
12
|
+
"""Count the various statuses of items in a given collection."""
|
13
|
+
print(f"Checking collection: {firestore_collection}")
|
14
|
+
|
15
|
+
# Import heavy libraries inside the function
|
16
|
+
from dayhoff_tools.deployment.swarm import initialize_firebase
|
17
|
+
from firebase_admin import firestore
|
18
|
+
|
19
|
+
initialize_firebase()
|
20
|
+
collection = firestore.client().collection(firestore_collection)
|
21
|
+
docs = collection.stream()
|
22
|
+
|
23
|
+
# Instead of directly counting, we'll aggregate based on values
|
24
|
+
value_counts = {}
|
25
|
+
for doc in docs:
|
26
|
+
doc_data = doc.to_dict()
|
27
|
+
value = doc_data.get("status", None) if doc_data else None
|
28
|
+
if value is not None:
|
29
|
+
value_counts[value] = value_counts.get(value, 0) + 1
|
30
|
+
|
31
|
+
for value, count in value_counts.items():
|
32
|
+
typer.echo(f"status == {value}: {count}")
|
33
|
+
|
34
|
+
|
35
|
+
def reset_failed_cards(
|
36
|
+
firestore_collection: str = typer.Option(prompt=True),
|
37
|
+
old_status: str = typer.Option(default="failed", prompt=True),
|
38
|
+
new_status: str = typer.Option(default="available", prompt=True),
|
39
|
+
delete_old: bool = typer.Option(default=True, prompt=True),
|
40
|
+
):
|
41
|
+
"""Find all the documents in the database with a given status, and
|
42
|
+
make a new document with the same name and a new status."""
|
43
|
+
# Import heavy libraries inside the function
|
44
|
+
from dayhoff_tools.deployment.swarm import initialize_firebase
|
45
|
+
from firebase_admin import firestore
|
46
|
+
from google.cloud.firestore_v1.base_query import FieldFilter
|
47
|
+
|
48
|
+
initialize_firebase()
|
49
|
+
collection = firestore.client().collection(firestore_collection)
|
50
|
+
failed_cards = collection.where(
|
51
|
+
filter=FieldFilter("status", "==", old_status)
|
52
|
+
).stream()
|
53
|
+
|
54
|
+
# Count the number of documents that would be changed
|
55
|
+
change_count = sum(1 for _ in failed_cards)
|
56
|
+
|
57
|
+
# Ask for confirmation before proceeding
|
58
|
+
confirmation = input(
|
59
|
+
f"This operation will change {change_count} documents. Do you want to continue? (Y/n): "
|
60
|
+
)
|
61
|
+
if confirmation.lower() not in ["y", "Y", ""]:
|
62
|
+
print("Operation cancelled.")
|
63
|
+
return
|
64
|
+
|
65
|
+
# Reset the failed_cards generator
|
66
|
+
failed_cards = collection.where(
|
67
|
+
filter=FieldFilter("status", "==", old_status)
|
68
|
+
).stream()
|
69
|
+
|
70
|
+
reset_count = 0
|
71
|
+
for card in failed_cards:
|
72
|
+
# Make a fresh new one
|
73
|
+
packet_filename = card.to_dict()["packet_filename"] # type: ignore
|
74
|
+
collection.document().set(
|
75
|
+
{
|
76
|
+
"status": new_status,
|
77
|
+
"packet_filename": packet_filename,
|
78
|
+
"created": datetime.now(ZoneInfo("America/Los_Angeles")),
|
79
|
+
}
|
80
|
+
)
|
81
|
+
reset_count += 1
|
82
|
+
if delete_old:
|
83
|
+
card.reference.delete()
|
84
|
+
|
85
|
+
print(f"Done: {reset_count} new '{new_status}' cards were created.")
|
86
|
+
if delete_old:
|
87
|
+
print(f"Done: {reset_count} '{old_status}' cards were deleted.")
|
88
|
+
|
89
|
+
|
90
|
+
def reset_zombie_cards(
|
91
|
+
firestore_collection: str = typer.Option(prompt=True),
|
92
|
+
delete_old: bool = typer.Option(default=True, prompt=True),
|
93
|
+
minutes_threshold: int = typer.Option(default=20, prompt=True),
|
94
|
+
):
|
95
|
+
"""Find all the documents in the database with status "assigned", and "last_update"
|
96
|
+
older than a specified threshold, and make a new "available" document for them.
|
97
|
+
|
98
|
+
This implementation avoids requiring a composite index by filtering on the client side.
|
99
|
+
"""
|
100
|
+
# Import heavy libraries inside the function
|
101
|
+
from dayhoff_tools.deployment.swarm import initialize_firebase
|
102
|
+
from firebase_admin import firestore
|
103
|
+
from google.cloud.firestore_v1.base_query import FieldFilter
|
104
|
+
|
105
|
+
initialize_firebase()
|
106
|
+
collection = firestore.client().collection(firestore_collection)
|
107
|
+
current_time = datetime.now(ZoneInfo("America/Los_Angeles"))
|
108
|
+
threshold_time = current_time - timedelta(minutes=minutes_threshold)
|
109
|
+
|
110
|
+
# First, get all documents with status "assigned"
|
111
|
+
assigned_cards = collection.where(
|
112
|
+
filter=FieldFilter("status", "==", "assigned")
|
113
|
+
).stream()
|
114
|
+
|
115
|
+
# Filter client-side for those with last_update older than threshold
|
116
|
+
zombie_cards = []
|
117
|
+
for card in assigned_cards:
|
118
|
+
card_data = card.to_dict()
|
119
|
+
last_update = card_data.get(
|
120
|
+
"last_update"
|
121
|
+
) # Note: field is "last_update", not "last_updated"
|
122
|
+
if last_update and last_update < threshold_time:
|
123
|
+
zombie_cards.append(card)
|
124
|
+
|
125
|
+
# Ask for confirmation before proceeding
|
126
|
+
change_count = len(zombie_cards)
|
127
|
+
if change_count == 0:
|
128
|
+
print("No zombie cards found. Nothing to do.")
|
129
|
+
return
|
130
|
+
|
131
|
+
confirmation = input(
|
132
|
+
f"This operation will reset {change_count} zombie documents. Do you want to continue? (Y/n): "
|
133
|
+
)
|
134
|
+
if confirmation.lower() not in ["y", "Y", ""]:
|
135
|
+
print("Operation cancelled.")
|
136
|
+
return
|
137
|
+
|
138
|
+
reset_count = 0
|
139
|
+
for card in zombie_cards:
|
140
|
+
# Make a fresh new one
|
141
|
+
card_data = card.to_dict()
|
142
|
+
packet_filename = card_data["packet_filename"]
|
143
|
+
collection.document().set(
|
144
|
+
{
|
145
|
+
"status": "available",
|
146
|
+
"packet_filename": packet_filename,
|
147
|
+
"created": current_time,
|
148
|
+
}
|
149
|
+
)
|
150
|
+
reset_count += 1
|
151
|
+
if delete_old:
|
152
|
+
card.reference.delete()
|
153
|
+
|
154
|
+
print(f"Done: {reset_count} new 'available' cards were created.")
|
155
|
+
if delete_old:
|
156
|
+
print(f"Done: {reset_count} 'assigned' zombie cards were deleted.")
|