rowan-python 2.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1012 @@
1
+ import asyncio
2
+ import copy
3
+ import logging
4
+ import time
5
+ from typing import Iterable, Literal, TypeAlias, TypedDict
6
+
7
+ import numpy as np
8
+ import stjames
9
+ from rdkit import Chem
10
+ from rdkit.Chem import AllChem
11
+
12
+ import rowan
13
+ from rowan.utils import ATOMIC_NUMBER_TO_ATOMIC_SYMBOL, get_api_key
14
+
15
+ RdkitMol: TypeAlias = Chem.rdchem.Mol | Chem.rdchem.RWMol
16
+ pKaMode = Literal["reckless", "rapid", "careful"]
17
+ TautomerMode = Literal["reckless", "rapid", "careful"]
18
+ ConformerMode = Literal["reckless", "rapid"]
19
+ FAST_METHODS: list[stjames.Method] = [
20
+ *stjames.method.XTB_METHODS,
21
+ *stjames.method.NNP_METHODS,
22
+ ]
23
+
24
+
25
+ class ConversionError(ValueError):
26
+ pass
27
+
28
+
29
+ class NoConformersError(Exception):
30
+ pass
31
+
32
+
33
+ class MethodTooSlowError(Exception):
34
+ pass
35
+
36
+
37
+ class ChargesResult(TypedDict):
38
+ conformer_index: int
39
+ charges: list[float]
40
+
41
+
42
+ ChargesResults = list[ChargesResult]
43
+
44
+
45
+ class ConformerResult(TypedDict):
46
+ molecule: RdkitMol
47
+ energies: list[float]
48
+
49
+
50
+ class PKaResult(TypedDict):
51
+ element: str
52
+ index: int
53
+ pKa: float
54
+
55
+
56
+ class PKaResults(TypedDict):
57
+ acidic_pkas: list[PKaResult]
58
+ basic_pkas: list[PKaResult]
59
+
60
+
61
+ class TautomerResult(TypedDict):
62
+ molecule: RdkitMol
63
+ predicted_relative_energy: float
64
+ weight: float
65
+
66
+
67
+ TautomerResults = list[TautomerResult]
68
+
69
+
70
+ class ConformerEnergyResult(TypedDict):
71
+ conformer_index: int
72
+ energy: float
73
+
74
+
75
+ class OptimizeResult(TypedDict):
76
+ molecule: RdkitMol
77
+ energies: list[float]
78
+
79
+
80
+ def apply_nest_asyncio() -> None:
81
+ try:
82
+ asyncio.get_running_loop()
83
+ except RuntimeError:
84
+ return
85
+ try:
86
+ import nest_asyncio # type: ignore [import-untyped]
87
+
88
+ nest_asyncio.apply()
89
+ except ImportError:
90
+ pass
91
+
92
+
93
+ # actually apply it
94
+ apply_nest_asyncio()
95
+
96
+
97
+ def _get_rdkit_mol_from_uuid(calculation_uuid: str) -> RdkitMol:
98
+ stjames_mol_dict = rowan.retrieve_calculation_molecules(calculation_uuid)[-1]
99
+
100
+ return Chem.MolFromXYZBlock(stjames.Molecule(**stjames_mol_dict).to_xyz())
101
+
102
+
103
+ def _embed_rdkit_mol(rdkm: RdkitMol) -> RdkitMol:
104
+ try:
105
+ AllChem.SanitizeMol(rdkm) # type: ignore [attr-defined]
106
+ except Exception as e:
107
+ raise ValueError("Molecule could not be generated -- invalid chemistry!") from e
108
+
109
+ rdkm = AllChem.AddHs(rdkm) # type: ignore [attr-defined]
110
+ try:
111
+ assert AllChem.EmbedMolecule(rdkm, maxAttempts=200) >= 0 # type: ignore [attr-defined]
112
+ except AssertionError as e:
113
+ status1 = AllChem.EmbedMolecule(rdkm, maxAttempts=200, useRandomCoords=True) # type: ignore [attr-defined]
114
+ if status1 < 0:
115
+ raise ValueError("Cannot embed molecule!") from e
116
+ try:
117
+ assert AllChem.MMFFOptimizeMolecule(rdkm, maxIters=200) >= 0 # type: ignore [attr-defined]
118
+ except AssertionError:
119
+ pass
120
+
121
+ return rdkm
122
+
123
+
124
+ def _rdkit_to_stjames(rdkm: RdkitMol, cid: int = 0) -> stjames.Molecule:
125
+ return stjames.Molecule.from_rdkit(rdkm, cid=cid)
126
+
127
+
128
+ def run_pka(
129
+ mol: RdkitMol,
130
+ mode: pKaMode = "rapid",
131
+ timeout: int = 600,
132
+ name: str = "pKa API Workflow",
133
+ pka_range: tuple[int, int] = (2, 12),
134
+ deprotonate_elements: list[int] | None = None,
135
+ protonate_elements: list[int] | None = None,
136
+ folder_uuid: str | None = None,
137
+ ) -> PKaResults:
138
+ """
139
+ Calculate the pKa of a Molecule.
140
+
141
+ :param mol: RDKit Molecule
142
+ :param mode: pKa calculation Mode. See [list of available modes](https://github.com/rowansci/stjames-public/blob/master/stjames/mode.py)
143
+ for options.
144
+ :param timeout: time in seconds before the Workflow times out
145
+ :param name: name for the job
146
+ :param pka_range: range of pKa values to calculate
147
+ :param deprotonate_elements: elements to deprotonate
148
+ :param protonate_elements: elements to protonate
149
+ :param folder_uuid: folder UUID
150
+ :return: dictionary of pKa values indexed by atom
151
+ """
152
+ protonate_elements = protonate_elements or [7]
153
+ deprotonate_elements = deprotonate_elements or [7, 8, 16]
154
+
155
+ return asyncio.run(
156
+ _single_pka(
157
+ mol,
158
+ mode,
159
+ timeout,
160
+ name,
161
+ pka_range,
162
+ deprotonate_elements,
163
+ protonate_elements,
164
+ folder_uuid,
165
+ )
166
+ )
167
+
168
+
169
+ def batch_pka(
170
+ mols: Iterable[RdkitMol],
171
+ mode: pKaMode = "rapid",
172
+ timeout: int = 600,
173
+ name: str = "pKa API Workflow",
174
+ pka_range: tuple[int, int] = (2, 12),
175
+ deprotonate_elements: list[int] | None = None,
176
+ protonate_elements: list[int] | None = None,
177
+ folder_uuid: str | None = None,
178
+ ) -> list[PKaResults]:
179
+ """
180
+ Calculate the pKa of a batch of Molecules.
181
+
182
+ :param mols: list of RDKit Molecules
183
+ :param mode: pKa calculation mode. See [list of available modes](https://github.com/rowansci/stjames-public/blob/master/stjames/mode.py)
184
+ for options.
185
+ :param timeout: time in seconds before the Workflow times out
186
+ :param name: name for the job
187
+ :param pka_range: range of pKa values to calculate
188
+ :param deprotonate_elements: elements to deprotonate
189
+ :param protonate_elements: elements to protonate
190
+ :return: list of dictionary of pKa values indexed by atom
191
+ """
192
+ protonate_elements = protonate_elements or [7]
193
+ deprotonate_elements = deprotonate_elements or [7, 8, 16]
194
+
195
+ async def _run():
196
+ tasks = [
197
+ _single_pka(
198
+ mol,
199
+ mode,
200
+ timeout,
201
+ name,
202
+ pka_range,
203
+ deprotonate_elements,
204
+ protonate_elements,
205
+ folder_uuid,
206
+ )
207
+ for mol in mols
208
+ ]
209
+ return await asyncio.gather(*tasks)
210
+
211
+ return asyncio.run(_run())
212
+
213
+
214
+ async def _single_pka(
215
+ mol: RdkitMol,
216
+ mode: pKaMode = "rapid",
217
+ timeout: int = 600,
218
+ name: str = "pKa API Workflow",
219
+ pka_range: tuple[int, int] = (2, 12),
220
+ deprotonate_elements: list[int] | None = None,
221
+ protonate_elements: list[int] | None = None,
222
+ folder_uuid: str | None = None,
223
+ ) -> PKaResults:
224
+ """
225
+ Calculate the pKa of a Molecule.
226
+
227
+ :param mol: RDKit Molecule
228
+ :param mode: pKa calculation mode. See [list of available modes](https://github.com/rowansci/stjames-public/blob/master/stjames/mode.py)
229
+ for options.
230
+ :param timeout: time in seconds before the Workflow times out
231
+ :param name: name for the job
232
+ :param pka_range: range of pKa values to calculate
233
+ :param deprotonate_elements: elements to deprotonate
234
+ :param protonate_elements: elements to protonate
235
+ :param folder_uuid: folder UUID
236
+ :return: dictionary of pKa values
237
+ """
238
+ get_api_key()
239
+ protonate_elements = protonate_elements or [7]
240
+ deprotonate_elements = deprotonate_elements or [7, 8, 16]
241
+
242
+ post = rowan.submit_workflow(
243
+ name=name,
244
+ workflow_type="pka",
245
+ initial_molecule=_rdkit_to_stjames(mol),
246
+ workflow_data={
247
+ "pka_range": pka_range,
248
+ "deprotonate_elements": deprotonate_elements,
249
+ "deprotonate_atoms": [],
250
+ "protonate_elements": protonate_elements,
251
+ "protonate_atoms": [],
252
+ "mode": mode,
253
+ },
254
+ folder_uuid=folder_uuid,
255
+ )
256
+
257
+ start = time.time()
258
+ while not post.is_finished():
259
+ await asyncio.sleep(5)
260
+ if time.time() - start > timeout:
261
+ raise TimeoutError("Workflow timed out")
262
+
263
+ data = rowan.retrieve_workflow(post.uuid).data
264
+
265
+ if not data:
266
+ raise Exception("Could not retrieve workflow data")
267
+
268
+ acidic_pkas: list[PKaResult] = []
269
+ for microstate in data.get("conjugate_bases", []):
270
+ atomic_number = data.get("initial_molecule", {})["atoms"][microstate["atom_index"] - 1][
271
+ "atomic_number"
272
+ ]
273
+ acidic_pkas.append(
274
+ {
275
+ "element": ATOMIC_NUMBER_TO_ATOMIC_SYMBOL[str(atomic_number)],
276
+ "index": microstate["atom_index"],
277
+ "pKa": round(microstate["pka"], 2),
278
+ }
279
+ )
280
+
281
+ basic_pkas: list[PKaResult] = []
282
+ for microstate in data.get("conjugate_bases", []):
283
+ atomic_number = data.get("initial_molecule", {})["atoms"][microstate["atom_index"] - 1][
284
+ "atomic_number"
285
+ ]
286
+
287
+ basic_pkas.append(
288
+ {
289
+ "element": ATOMIC_NUMBER_TO_ATOMIC_SYMBOL[str(atomic_number)],
290
+ "index": microstate["atom_index"],
291
+ "pKa": round(microstate["pka"], 2),
292
+ }
293
+ )
294
+
295
+ return {"acidic_pkas": acidic_pkas, "basic_pkas": basic_pkas}
296
+
297
+
298
+ def run_tautomers(
299
+ mol: RdkitMol,
300
+ mode: TautomerMode = "reckless",
301
+ timeout: int = 600,
302
+ name: str = "Tautomers API Workflow",
303
+ folder_uuid: str | None = None,
304
+ ) -> TautomerResults:
305
+ """
306
+ Generate possible tautomers of a Molecule.
307
+
308
+ :param mol: RDKit Molecule
309
+ :param mode: Tautomer mode. See [list of available modes](https://github.com/rowansci/stjames-public/blob/master/stjames/mode.py)
310
+ for options.
311
+ :param timeout: time in seconds before the Workflow times out
312
+ :param name: name for the job
313
+ :param folder_uuid: folder UUID
314
+ :return: list of dictionaries containing RDKit Molecule, relative energies, and weights
315
+ """
316
+ return asyncio.run(_single_tautomers(mol, mode, timeout, name, folder_uuid))
317
+
318
+
319
+ def batch_tautomers(
320
+ mols: Iterable[RdkitMol],
321
+ mode: TautomerMode = "reckless",
322
+ timeout: int = 600,
323
+ name: str = "Tautomers API Workflow",
324
+ folder_uuid: str | None = None,
325
+ ) -> list[TautomerResults]:
326
+ """
327
+ Generate possible tautomers of a Molecule.
328
+
329
+ :param mols: RDKit Molecule
330
+ :param mode: Tautomer mode. See [list of available modes](https://github.com/rowansci/stjames-public/blob/master/stjames/mode.py)
331
+ for options.
332
+ :param timeout: time in seconds before the Workflow times out
333
+ :param name: name for the job
334
+ :param folder_uuid: folder UUID
335
+ :return: list of lists of dictionaries containing RDKit Molecule, relative energies, and weights
336
+ """
337
+
338
+ async def _run():
339
+ tasks = [_single_tautomers(mol, mode, timeout, name, folder_uuid) for mol in mols]
340
+ return await asyncio.gather(*tasks)
341
+
342
+ return asyncio.run(_run())
343
+
344
+
345
+ async def _single_tautomers(
346
+ mol: RdkitMol,
347
+ mode: TautomerMode = "reckless",
348
+ timeout: int = 600,
349
+ name: str = "Tautomers API Workflow",
350
+ folder_uuid: str | None = None,
351
+ ) -> TautomerResults:
352
+ """
353
+ Generate possible tautomers of a Molecule.
354
+
355
+ :param mol: RDKit Molecule
356
+ :param mode: Tautomer mode
357
+ :param timeout: time in seconds before the Workflow times out
358
+ :param name: name for the job
359
+ :param folder_uuid: folder UUID
360
+ :return: dictionaries containing RDKit Molecule, relative energy, and weight
361
+ """
362
+ get_api_key()
363
+
364
+ post = rowan.submit_workflow(
365
+ name=name,
366
+ workflow_type="tautomers",
367
+ initial_molecule=_rdkit_to_stjames(mol),
368
+ workflow_data={"mode": mode},
369
+ folder_uuid=folder_uuid,
370
+ )
371
+
372
+ start = time.time()
373
+ while not post.is_finished():
374
+ await asyncio.sleep(5)
375
+ if time.time() - start > timeout:
376
+ raise TimeoutError("Workflow timed out")
377
+
378
+ data = rowan.retrieve_workflow(post.uuid).data
379
+
380
+ if not data:
381
+ raise Exception("Could not retrieve workflow data")
382
+
383
+ return [
384
+ {
385
+ "molecule": _get_rdkit_mol_from_uuid(tautomer["structures"][0]["uuid"]),
386
+ "predicted_relative_energy": round(tautomer["predicted_relative_energy"], 2),
387
+ "weight": round(tautomer["weight"], 5),
388
+ }
389
+ for tautomer in data.get("tautomers", [])
390
+ ]
391
+
392
+
393
+ def run_energy(
394
+ mol: RdkitMol,
395
+ method: str = "aimnet2_wb97md3",
396
+ engine: str = "aimnet2",
397
+ mode: str = "auto",
398
+ timeout: int = 600,
399
+ name: str = "Energy API Workflow",
400
+ folder_uuid: str | None = None,
401
+ ) -> list[ConformerEnergyResult]:
402
+ """
403
+ Computes the energy for the given molecule.
404
+
405
+ :param mol: RDKit Molecule
406
+ :param method: Method to use for the calculation.
407
+ See [list of available methods](https://github.com/rowansci/stjames-public/blob/master/stjames/method.py)
408
+ :param engine: Engine to run the energy. See [list of available engines](https://github.com/rowansci/stjames-public/blob/master/stjames/engine.py)
409
+ :param mode: Mode to run the energy. See [list of available modes](https://github.com/rowansci/stjames-public/blob/master/stjames/mode.py)
410
+ for options.
411
+ :param timeout: time in seconds before the Workflow times out
412
+ :param name: name for the job
413
+ :param folder_uuid: folder UUID
414
+ :raises: MethodTooSlowError if the method is invalid
415
+ :return: dictionary with the energy in Hartree and the conformer index
416
+ """
417
+ return asyncio.run(_single_energy(mol, method, engine, mode, timeout, name, folder_uuid))
418
+
419
+
420
+ def batch_energy(
421
+ mols: Iterable[RdkitMol],
422
+ method: str = "aimnet2_wb97md3",
423
+ engine: str = "aimnet2",
424
+ mode: str = "auto",
425
+ timeout: int = 600,
426
+ name: str = "Energy API Workflow",
427
+ folder_uuid: str | None = None,
428
+ ) -> list[list[ConformerEnergyResult]]:
429
+ """
430
+ Computes the energy for the given molecule.
431
+
432
+ :param mols: RDKit Molecule
433
+ :param method: Method to use for the calculation.
434
+ See [list of available methods](https://github.com/rowansci/stjames-public/blob/master/stjames/method.py)
435
+ :param engine: Engine to run the energy. See [list of available engines](https://github.com/rowansci/stjames-public/blob/master/stjames/engine.py)
436
+ :param mode: Mode to run the energy. See [list of available modes](https://github.com/rowansci/stjames-public/blob/master/stjames/mode.py)
437
+ for options.
438
+ :param timeout: time in seconds before the Workflow times out
439
+ :param name: name for the job
440
+ :param folder_uuid: folder UUID
441
+ :raises: MethodTooSlowError if the method is invalid
442
+ :return: list of dictionaries with the energy in Hartree and the conformer index
443
+ """
444
+
445
+ async def _run():
446
+ tasks = [
447
+ _single_energy(mol, method, engine, mode, timeout, name, folder_uuid) for mol in mols
448
+ ]
449
+ return await asyncio.gather(*tasks)
450
+
451
+ return asyncio.run(_run())
452
+
453
+
454
+ async def _single_energy(
455
+ mol: RdkitMol,
456
+ method: str = "aimnet2_wb97md3",
457
+ engine: str = "aimnet2",
458
+ mode: str = "auto",
459
+ timeout: int = 600,
460
+ name: str = "Energy API Workflow",
461
+ folder_uuid: str | None = None,
462
+ ) -> list[ConformerEnergyResult]:
463
+ """
464
+ Computes the energy for the given molecule.
465
+
466
+ :param mol: RDKit Molecule
467
+ :param method: Method to use for the calculation.
468
+ See [list of available methods](https://github.com/rowansci/stjames-public/blob/master/stjames/method.py)
469
+ :param engine: Engine to run the energy. See [list of available engines](https://github.com/rowansci/stjames-public/blob/master/stjames/engine.py)
470
+ :param mode: Mode to run the energy
471
+ :param timeout: time in seconds before the Workflow times out
472
+ :param name: name for the job
473
+ :param folder_uuid: folder UUID
474
+ :raises: MethodTooSlowError if the method is invalid
475
+ :return: dictionary with the energy in Hartree and the conformer index
476
+ """
477
+ get_api_key()
478
+ method = stjames.Method(method)
479
+
480
+ if mol.GetNumConformers() == 0:
481
+ mol = _embed_rdkit_mol(mol)
482
+ if mol.GetNumConformers() == 0:
483
+ raise NoConformersError("This molecule has no conformers")
484
+
485
+ if method not in FAST_METHODS:
486
+ raise MethodTooSlowError(
487
+ "This method is too slow; try running this through our web interface."
488
+ )
489
+
490
+ workflow_uuids = []
491
+ for conformer in mol.GetConformers():
492
+ cid = conformer.GetId()
493
+ stjames_mol = _rdkit_to_stjames(mol, cid)
494
+ post = rowan.submit_workflow(
495
+ name=name,
496
+ workflow_type="basic_calculation",
497
+ initial_molecule=stjames_mol,
498
+ workflow_data={
499
+ "settings": {
500
+ "method": method.value,
501
+ "corrections": [],
502
+ "tasks": ["energy"],
503
+ "mode": mode,
504
+ "opt_settings": {"constraints": []},
505
+ },
506
+ "engine": engine,
507
+ },
508
+ folder_uuid=folder_uuid,
509
+ )
510
+
511
+ workflow_uuids.append(post.uuid)
512
+
513
+ start = time.time()
514
+ while not all(rowan.retrieve_workflow(uuid).is_finished() for uuid in workflow_uuids):
515
+ await asyncio.sleep(5)
516
+ if time.time() - start > timeout:
517
+ raise TimeoutError("Workflow timed out")
518
+
519
+ results = [rowan.retrieve_workflow(uuid).data for uuid in workflow_uuids]
520
+
521
+ energies = [
522
+ rowan.retrieve_calculation_molecules(data["calculation_uuid"])[-1]["energy"]
523
+ for data in results
524
+ if data is not None
525
+ ]
526
+
527
+ return [{"conformer_index": index, "energy": energy} for index, energy in enumerate(energies)]
528
+
529
+
530
+ def run_optimize(
531
+ mol: RdkitMol,
532
+ method: str = "aimnet2_wb97md3",
533
+ engine: str = "aimnet2",
534
+ mode: str = "auto",
535
+ return_energies: bool = False,
536
+ timeout: int = 600,
537
+ name: str = "Optimize API Workflow",
538
+ folder_uuid: str | None = None,
539
+ ) -> OptimizeResult:
540
+ """
541
+ Optimize each of a molecule's conformers and then return the molecule.
542
+
543
+ :param mol: RDKit Molecule
544
+ :param method: Method to use for the calculation.
545
+ See [list of available methods](https://github.com/rowansci/stjames-public/blob/master/stjames/method.py)
546
+ :param engine: Engine to run the optimization. See [list of available engines](https://github.com/rowansci/stjames-public/blob/master/stjames/engine.py)
547
+ :param return_energies: whether to return energies in Hartree too
548
+ :param mode: Mode to run the optimization. See [list of available modes](https://github.com/rowansci/stjames-public/blob/master/stjames/mode.py)
549
+ for options.
550
+ :param timeout: time in seconds before the Workflow times out
551
+ :param name: name for the job
552
+ :param folder_uuid: folder UUID
553
+ :raises: MethodTooSlowError if the method is invalid
554
+ :return: dictionary with the optimized conformer(s) and optional list of energies per conformer
555
+ """
556
+ return asyncio.run(
557
+ _single_optimize(mol, method, engine, mode, return_energies, timeout, name, folder_uuid)
558
+ )
559
+
560
+
561
+ def batch_optimize(
562
+ mols: Iterable[RdkitMol],
563
+ method: str = "aimnet2_wb97md3",
564
+ engine: str = "aimnet2",
565
+ mode: str = "auto",
566
+ return_energies: bool = False,
567
+ timeout: int = 600,
568
+ name: str = "Optimize API Workflow",
569
+ folder_uuid: str | None = None,
570
+ ) -> list[OptimizeResult]:
571
+ """
572
+ Optimize each of a Molecule's conformers and then return the Molecule.
573
+
574
+ :param mols: input Molecule
575
+ :param method: Method to use for the calculation.
576
+ See [list of available methods](https://github.com/rowansci/stjames-public/blob/master/stjames/method.py)
577
+ :param engine: Engine to run the optimization. See [list of available engines](https://github.com/rowansci/stjames-public/blob/master/stjames/engine.py)
578
+ :param mode: Mode to run the optimization. See [list of available modes](https://github.com/rowansci/stjames-public/blob/master/stjames/mode.py)
579
+ for options.
580
+ :param return_energies: whether to return energies in Hartree too
581
+ :param timeout: time in seconds before the Workflow times out
582
+ :param name: name for the job
583
+ :param folder_uuid: folder UUID
584
+ :raises: MethodTooSlowError if the Method is invalid
585
+ :return: dictionaries with optimized conformer(s) and optional list of energies per conformer
586
+ """
587
+
588
+ async def _run():
589
+ tasks = [
590
+ _single_optimize(mol, method, engine, mode, return_energies, timeout, name, folder_uuid)
591
+ for mol in mols
592
+ ]
593
+ return await asyncio.gather(*tasks)
594
+
595
+ return asyncio.run(_run())
596
+
597
+
598
+ async def _single_optimize(
599
+ mol: RdkitMol,
600
+ method: str = "aimnet2_wb97md3",
601
+ engine: str = "aimnet2",
602
+ mode: str = "auto",
603
+ return_energies: bool = False,
604
+ timeout: int = 600,
605
+ name: str = "Optimize API Workflow",
606
+ folder_uuid: str | None = None,
607
+ ) -> OptimizeResult:
608
+ """
609
+ Optimize each of a molecule's conformers and then return the molecule.
610
+
611
+ :param mol: RDKit Molecule
612
+ :param method: Method to use for the calculation.
613
+ See [list of available methods](https://github.com/rowansci/stjames-public/blob/master/stjames/method.py)
614
+ :param engine: Engine to run the optimization. See [list of available engines](https://github.com/rowansci/stjames-public/blob/master/stjames/engine.py)
615
+ :param mode: Mode to run the optimization. See [list of available modes](https://github.com/rowansci/stjames-public/blob/master/stjames/mode.py)
616
+ :param return_energies: whether to return energies in Hartree too
617
+ :param timeout: time in seconds before the Workflow times out
618
+ :param name: name for the job
619
+ :param folder_uuid: folder UUID
620
+ :raises: MethodTooSlowError if the method is invalid
621
+ :return: dictionary with the optimized conformer(s) and optional list of energies per conformer
622
+ """
623
+ get_api_key()
624
+ method = stjames.Method(method)
625
+
626
+ if mol.GetNumConformers() == 0:
627
+ mol = _embed_rdkit_mol(mol)
628
+ if mol.GetNumConformers() == 0:
629
+ raise NoConformersError("This molecule has no conformers")
630
+
631
+ if method not in FAST_METHODS:
632
+ raise MethodTooSlowError(
633
+ "This method is too slow; try running this through our web interface."
634
+ )
635
+
636
+ optimized_mol = copy.deepcopy(mol)
637
+
638
+ workflow_uuids = []
639
+ for conformer in mol.GetConformers():
640
+ cid = conformer.GetId()
641
+ stjames_mol = _rdkit_to_stjames(mol, cid)
642
+
643
+ post = rowan.submit_workflow(
644
+ name=name,
645
+ workflow_type="basic_calculation",
646
+ initial_molecule=stjames_mol,
647
+ workflow_data={
648
+ "settings": {
649
+ "method": method.value,
650
+ "corrections": [],
651
+ "tasks": ["optimize"],
652
+ "mode": mode,
653
+ "opt_settings": {"constraints": []},
654
+ },
655
+ "engine": engine,
656
+ },
657
+ folder_uuid=folder_uuid,
658
+ )
659
+
660
+ workflow_uuids.append(post.uuid)
661
+
662
+ start = time.time()
663
+ while not all(rowan.retrieve_workflow(uuid).is_finished() for uuid in workflow_uuids):
664
+ await asyncio.sleep(5)
665
+ if time.time() - start > timeout:
666
+ raise TimeoutError("Workflow timed out")
667
+
668
+ results = [rowan.retrieve_workflow(uuid).data for uuid in workflow_uuids]
669
+ calculations = [
670
+ rowan.retrieve_calculation_molecules(data["calculation_uuid"])
671
+ for data in results
672
+ if data is not None
673
+ ]
674
+ optimization_atoms = [cacluation[-1]["atoms"] for cacluation in calculations]
675
+ optimized_positions = [[atom["position"] for atom in atoms] for atoms in optimization_atoms]
676
+
677
+ energies = [cacluation[-1]["energy"] for cacluation in calculations]
678
+
679
+ for i, conformer in enumerate(optimized_mol.GetConformers()):
680
+ conformer.SetPositions(np.array(optimized_positions[i]))
681
+
682
+ return {
683
+ "molecule": mol,
684
+ "energies": energies if return_energies else [],
685
+ }
686
+
687
+
688
+ def run_conformers(
689
+ mol: RdkitMol,
690
+ num_conformers=10,
691
+ method: str = "aimnet2_wb97md3",
692
+ mode: str = "rapid",
693
+ return_energies: bool = False,
694
+ timeout: int = 600,
695
+ name: str = "Conformer API Workflow",
696
+ folder_uuid: str | None = None,
697
+ ) -> ConformerResult:
698
+ """
699
+ Generate conformers for a Molecule.
700
+
701
+ :param mol: RDKit Molecule
702
+ :param num_conformers: number of conformers to generate
703
+ :param method: Method to use for the calculation.
704
+ See [list of available methods](https://github.com/rowansci/stjames-public/blob/master/stjames/method.py)
705
+ :param mode: Mode for conformer generation. See [list of available modes](https://github.com/rowansci/stjames-public/blob/master/stjames/mode.py)
706
+ for options.
707
+ :param return_energies: whether to return energies in Hartree too
708
+ :param timeout: time in seconds before the Workflow times out
709
+ :param name: name for the job
710
+ :param folder_uuid: folder UUID
711
+ :return: dictionary with the RDKit Molecule and energies
712
+ """
713
+ return asyncio.run(
714
+ _single_conformers(
715
+ mol,
716
+ num_conformers,
717
+ method,
718
+ mode,
719
+ return_energies,
720
+ timeout,
721
+ name,
722
+ folder_uuid,
723
+ )
724
+ )
725
+
726
+
727
+ def batch_conformers(
728
+ mols: Iterable[RdkitMol],
729
+ num_conformers=10,
730
+ method: str = "aimnet2_wb97md3",
731
+ mode: str = "rapid",
732
+ return_energies: bool = False,
733
+ timeout: int = 600,
734
+ name: str = "Conformer API Workflow",
735
+ folder_uuid: str | None = None,
736
+ ) -> list[ConformerResult]:
737
+ """
738
+ Generate conformers for a Molecule.
739
+
740
+ :param mols: RDKit molecule object
741
+ :param num_conformers: number of conformers to generate
742
+ :param method: Method to use for the calculation.
743
+ See [list of available methods](https://github.com/rowansci/stjames-public/blob/master/stjames/method.py)
744
+ :param mode: conformer generation mode. See [list of available modes](https://github.com/rowansci/stjames-public/blob/master/stjames/mode.py)
745
+ for options.
746
+ :param return_energies: whether to return energies in Hartree too
747
+ :param timeout: time in seconds before the Workflow times out
748
+ :param name: name for the job
749
+ :return: list of dictonaries with RDKit Molecule and energies
750
+ """
751
+
752
+ async def _run():
753
+ tasks = [
754
+ _single_conformers(
755
+ mol,
756
+ num_conformers,
757
+ method,
758
+ mode,
759
+ return_energies,
760
+ timeout,
761
+ name,
762
+ folder_uuid,
763
+ )
764
+ for mol in mols
765
+ ]
766
+ return await asyncio.gather(*tasks)
767
+
768
+ return asyncio.run(_run())
769
+
770
+
771
+ async def _single_conformers(
772
+ mol: RdkitMol,
773
+ num_conformers=10,
774
+ method: str = "aimnet2_wb97md3",
775
+ mode: str = "rapid",
776
+ return_energies: bool = False,
777
+ timeout: int = 600,
778
+ name: str = "Conformer API Workflow",
779
+ folder_uuid: str | None = None,
780
+ ) -> ConformerResult:
781
+ """
782
+ Generate conformers for a molecule.
783
+
784
+ :param mol: RDKit Molecule
785
+ :param num_conformers: number of conformers to generate
786
+ :param method: Method to use for the calculation.
787
+ See [list of available methods](https://github.com/rowansci/stjames-public/blob/master/stjames/method.py)
788
+ :param mode: conformer generation mode. See [list of available modes](https://github.com/rowansci/stjames-public/blob/master/stjames/mode.py)
789
+ for options.
790
+ :param return_energies: whether to return energies in Hartree too
791
+ :param timeout: time in seconds before the Workflow times out
792
+ :param name: name for the job
793
+ :param folder_uuid: folder UUID
794
+ :return: dictionary with RDKit molecule and energies
795
+ """
796
+ get_api_key()
797
+ method = stjames.Method(method)
798
+
799
+ if mol.GetNumConformers() == 0:
800
+ mol = _embed_rdkit_mol(mol)
801
+ if mol.GetNumConformers() == 0:
802
+ raise NoConformersError("This molecule has no conformers")
803
+
804
+ if method not in FAST_METHODS:
805
+ raise MethodTooSlowError(
806
+ "This method is too slow; try running this through our web interface."
807
+ )
808
+
809
+ post = rowan.submit_workflow(
810
+ name=name,
811
+ workflow_type="conformer_search",
812
+ initial_molecule=_rdkit_to_stjames(mol),
813
+ workflow_data={
814
+ "conf_gen_mode": "rapid",
815
+ "mode": mode,
816
+ "mso_mode": "manual",
817
+ "multistage_opt_settings": {
818
+ "mode": "manual",
819
+ "optimization_settings": [
820
+ {
821
+ "method": method.value,
822
+ "tasks": ["optimize"],
823
+ "corrections": [],
824
+ "mode": "auto",
825
+ }
826
+ ],
827
+ "solvent": None,
828
+ "transition_state": False,
829
+ "constraints": [],
830
+ },
831
+ },
832
+ folder_uuid=folder_uuid,
833
+ )
834
+
835
+ start = time.time()
836
+ while not post.is_finished():
837
+ await asyncio.sleep(5)
838
+ if time.time() - start > timeout:
839
+ raise TimeoutError("Workflow timed out")
840
+
841
+ data = rowan.retrieve_workflow(post.uuid).data
842
+
843
+ if data is None:
844
+ raise NoConformersError("This molecule has no conformers")
845
+
846
+ sorted_data = sorted(
847
+ zip(data["energies"], data["conformer_uuids"], strict=True),
848
+ key=lambda x: x[0],
849
+ )
850
+
851
+ if len(sorted_data) < num_conformers:
852
+ logging.warning(
853
+ "Number of conformers requested is greater than number of conformers available"
854
+ )
855
+ num_conformers = min(num_conformers, len(sorted_data))
856
+
857
+ # Extract the UUIDs of the lowest n energies
858
+ lowest_n_uuids = [item[1][0] for item in sorted_data[:num_conformers]]
859
+ lowest_energies = [item[0] for item in sorted_data[:num_conformers]]
860
+
861
+ AllChem.EmbedMultipleConfs(mol, numConfs=num_conformers) # type: ignore [attr-defined]
862
+
863
+ for i, conformer in enumerate(mol.GetConformers()):
864
+ atoms = rowan.retrieve_calculation_molecules(lowest_n_uuids[i])[-1]["atoms"]
865
+ pos = [atom["position"] for atom in atoms]
866
+ conformer.SetPositions(np.array(pos))
867
+
868
+ return {
869
+ "molecule": mol,
870
+ "energies": lowest_energies if return_energies else [],
871
+ }
872
+
873
+
874
+ def run_charges(
875
+ mol: RdkitMol,
876
+ method: str = "aimnet2_wb97md3",
877
+ engine: str = "aimnet2",
878
+ mode: str = "auto",
879
+ timeout: int = 600,
880
+ name: str = "Charges API Workflow",
881
+ folder_uuid: str | None = None,
882
+ ) -> ChargesResults:
883
+ """
884
+ Computes atom-centered charges for the given Molecule.
885
+
886
+ :param mol: RDKit Molecule
887
+ :param method: Method to use for the calculation.
888
+ See [list of available methods](https://github.com/rowansci/stjames-public/blob/master/stjames/method.py)
889
+ :param engine: Engine to run the charges. See [list of available engines](https://github.com/rowansci/stjames-public/blob/master/stjames/engine.py)
890
+ :param mode: The mode to run the calculation in. See [list of available modes](https://github.com/rowansci/stjames-public/blob/master/stjames/mode.py)
891
+ for options.
892
+ :param timeout: timeout in seconds
893
+ :param name: name of the job
894
+ :param folder_uuid: folder UUID
895
+ :raises: MethodTooSlowError if the method is invalid
896
+ :return: dictionary with the charges and the conformer index
897
+ """
898
+ return asyncio.run(_single_charges(mol, method, engine, mode, timeout, name, folder_uuid))
899
+
900
+
901
+ def batch_charges(
902
+ mols: Iterable[RdkitMol],
903
+ method: str = "aimnet2_wb97md3",
904
+ engine: str = "aimnet2",
905
+ mode: str = "auto",
906
+ timeout: int = 600,
907
+ name: str = "Charges API Workflow",
908
+ folder_uuid: str | None = None,
909
+ ) -> list[ChargesResults]:
910
+ """
911
+ Computes atom-centered charges for the given Molecules.
912
+
913
+ :param mols: RDKit Molecule
914
+ :param method: Method to use for the calculation.
915
+ See [list of available methods](https://github.com/rowansci/stjames-public/blob/master/stjames/method.py)
916
+ :param engine: Engine to run the charges. See [list of available engines](https://github.com/rowansci/stjames-public/blob/master/stjames/engine.py)
917
+ :param mode: The mode to run the calculation in. See [list of available modes](https://github.com/rowansci/stjames-public/blob/master/stjames/mode.py)
918
+ for options.
919
+ :param timeout: timeout in seconds
920
+ :param name: name of the job
921
+ :param folder_uuid: folder UUID
922
+ :raises: MethodTooSlowError if the method is invalid
923
+ :return: list of dictionaries with the charges and the conformer index
924
+ """
925
+
926
+ async def _run():
927
+ tasks = [
928
+ _single_charges(mol, method, engine, mode, timeout, name, folder_uuid) for mol in mols
929
+ ]
930
+ return await asyncio.gather(*tasks)
931
+
932
+ return asyncio.run(_run())
933
+
934
+
935
+ async def _single_charges(
936
+ mol: RdkitMol,
937
+ method: str = "aimnet2_wb97md3",
938
+ engine: str = "aimnet2",
939
+ mode: str = "auto",
940
+ timeout: int = 600,
941
+ name: str = "Energy API Workflow",
942
+ folder_uuid: str | None = None,
943
+ ) -> ChargesResults:
944
+ """
945
+ Computes atom-centered charges for the given Molecule.
946
+
947
+ :param mol: RDKit Molecule
948
+ :param method: Method to use for the calculation.
949
+ See [list of available methods](https://github.com/rowansci/stjames-public/blob/master/stjames/method.py)
950
+ :param engine: Engine to run the charges. See [list of available engines](https://github.com/rowansci/stjames-public/blob/master/stjames/engine.py)
951
+ :param mode: The mode to run the calculation in. See [list of available modes](https://github.com/rowansci/stjames-public/blob/master/stjames/mode.py)
952
+ for options.
953
+ :param timeout: timeout in seconds
954
+ :param name: name of the job
955
+ :param folder_uuid: folder UUID
956
+ :raises: MethodTooSlowError if the method is invalid
957
+ :return: dictionary with the charges and the conformer index
958
+ """
959
+ get_api_key()
960
+ method = stjames.Method(method)
961
+
962
+ if mol.GetNumConformers() == 0:
963
+ mol = _embed_rdkit_mol(mol)
964
+ if mol.GetNumConformers() == 0:
965
+ raise NoConformersError("This molecule has no conformers")
966
+
967
+ if method not in FAST_METHODS:
968
+ raise MethodTooSlowError(
969
+ "This method is too slow; try running this through our web interface."
970
+ )
971
+
972
+ workflow_uuids = []
973
+ for conformer in mol.GetConformers():
974
+ cid = conformer.GetId()
975
+
976
+ post = rowan.submit_workflow(
977
+ name=name,
978
+ workflow_type="basic_calculation",
979
+ initial_molecule=_rdkit_to_stjames(mol, cid),
980
+ workflow_data={
981
+ "settings": {
982
+ "method": method.value,
983
+ "corrections": [],
984
+ "tasks": ["charge"],
985
+ "mode": mode,
986
+ "opt_settings": {"constraints": []},
987
+ },
988
+ "engine": engine,
989
+ },
990
+ folder_uuid=folder_uuid,
991
+ )
992
+
993
+ workflow_uuids.append(post.uuid)
994
+
995
+ start = time.time()
996
+ while not all(rowan.retrieve_workflow(uuid).is_finished() for uuid in workflow_uuids):
997
+ await asyncio.sleep(5)
998
+ if time.time() - start > timeout:
999
+ raise TimeoutError("Workflow timed out")
1000
+
1001
+ def grab_charges(uuid: str) -> list[float]:
1002
+ """Grab mulliken charges by UUID of workflow."""
1003
+ data = rowan.retrieve_workflow(uuid).data
1004
+ if data is None:
1005
+ raise KeyError("Workflow data not found")
1006
+ molecules = rowan.retrieve_calculation_molecules(data["calculation_uuid"])
1007
+ return molecules[-1]["mulliken_charges"]
1008
+
1009
+ return [
1010
+ {"conformer_index": i, "charges": grab_charges(uuid)}
1011
+ for i, uuid in enumerate(workflow_uuids)
1012
+ ]