rdworks 0.43.2__tar.gz → 0.45.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {rdworks-0.43.2 → rdworks-0.45.1}/PKG-INFO +1 -1
  2. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/__init__.py +1 -1
  3. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/conf.py +42 -2
  4. rdworks-0.45.1/src/rdworks/xtb/wrapper.py +508 -0
  5. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks.egg-info/PKG-INFO +1 -1
  6. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks.egg-info/SOURCES.txt +1 -1
  7. rdworks-0.45.1/tests/test_xtb.py +72 -0
  8. rdworks-0.43.2/src/rdworks/xtb/wrapper.py +0 -304
  9. rdworks-0.43.2/tests/test_xtb_wrapper.py +0 -51
  10. {rdworks-0.43.2 → rdworks-0.45.1}/LICENSE +0 -0
  11. {rdworks-0.43.2 → rdworks-0.45.1}/README.md +0 -0
  12. {rdworks-0.43.2 → rdworks-0.45.1}/pyproject.toml +0 -0
  13. {rdworks-0.43.2 → rdworks-0.45.1}/setup.cfg +0 -0
  14. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/autograph/__init__.py +0 -0
  15. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/autograph/autograph.py +0 -0
  16. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/autograph/centroid.py +0 -0
  17. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/autograph/dynamictreecut.py +0 -0
  18. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/autograph/nmrclust.py +0 -0
  19. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/autograph/rckmeans.py +0 -0
  20. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/bitqt/__init__.py +0 -0
  21. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/bitqt/bitqt.py +0 -0
  22. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/descriptor.py +0 -0
  23. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/display.py +0 -0
  24. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/ionized.py +0 -0
  25. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/matchedseries.py +0 -0
  26. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/mol.py +0 -0
  27. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/mollibr.py +0 -0
  28. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/pka.py +0 -0
  29. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/Asinex_fragment.xml +0 -0
  30. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/Astex_RO3.xml +0 -0
  31. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/Baell2010_PAINS/Baell2010A.xml +0 -0
  32. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/Baell2010_PAINS/Baell2010B.xml +0 -0
  33. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/Baell2010_PAINS/Baell2010C.xml +0 -0
  34. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/Baell2010_PAINS/PAINS-less-than-015-hits.xml +0 -0
  35. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/Baell2010_PAINS/PAINS-less-than-150-hits.xml +0 -0
  36. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/Baell2010_PAINS/PAINS-more-than-150-hits.xml +0 -0
  37. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/Baell2010_PAINS/makexml.py +0 -0
  38. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/Brenk2008_Dundee/makexml.py +0 -0
  39. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/CNS.xml +0 -0
  40. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/ChEMBL_Walters/BMS.xml +0 -0
  41. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/ChEMBL_Walters/Dundee.xml +0 -0
  42. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/ChEMBL_Walters/Glaxo.xml +0 -0
  43. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/ChEMBL_Walters/Inpharmatica.xml +0 -0
  44. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/ChEMBL_Walters/LINT.xml +0 -0
  45. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/ChEMBL_Walters/MLSMR.xml +0 -0
  46. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/ChEMBL_Walters/PAINS.xml +0 -0
  47. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/ChEMBL_Walters/SureChEMBL.xml +0 -0
  48. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/ChEMBL_Walters/makexml.py +0 -0
  49. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999.xml +0 -0
  50. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999Acid.xml +0 -0
  51. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999Base.xml +0 -0
  52. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999ElPh.xml +0 -0
  53. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999NuPh.xml +0 -0
  54. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/Hann1999_Glaxo/makexml.py +0 -0
  55. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/Kazius2005/Kazius2005.xml +0 -0
  56. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/Kazius2005/makexml.py +0 -0
  57. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/ZINC_druglike.xml +0 -0
  58. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/ZINC_fragment.xml +0 -0
  59. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/ZINC_leadlike.xml +0 -0
  60. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/fragment.xml +0 -0
  61. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/ionized/simple_smarts_pattern.csv +0 -0
  62. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/ionized/smarts_pattern.csv +0 -0
  63. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/misc/makexml.py +0 -0
  64. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/misc/reactive-part-2.xml +0 -0
  65. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/misc/reactive-part-3.xml +0 -0
  66. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/predefined/misc/reactive.xml +0 -0
  67. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/readin.py +0 -0
  68. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/rgroup.py +0 -0
  69. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/scaffold.py +0 -0
  70. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/std.py +0 -0
  71. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/stereoisomers.py +0 -0
  72. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/tautomers.py +0 -0
  73. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/testdata.py +0 -0
  74. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/torsion.py +0 -0
  75. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/units.py +0 -0
  76. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/utils.py +0 -0
  77. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/xml.py +0 -0
  78. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks/xtb/__init__.py +0 -0
  79. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks.egg-info/dependency_links.txt +0 -0
  80. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks.egg-info/requires.txt +0 -0
  81. {rdworks-0.43.2 → rdworks-0.45.1}/src/rdworks.egg-info/top_level.txt +0 -0
  82. {rdworks-0.43.2 → rdworks-0.45.1}/tests/test_basics.py +0 -0
  83. {rdworks-0.43.2 → rdworks-0.45.1}/tests/test_decimals.py +0 -0
  84. {rdworks-0.43.2 → rdworks-0.45.1}/tests/test_gypsumdl.py +0 -0
  85. {rdworks-0.43.2 → rdworks-0.45.1}/tests/test_iupac_name.py +0 -0
  86. {rdworks-0.43.2 → rdworks-0.45.1}/tests/test_nn_xtb.py +0 -0
  87. {rdworks-0.43.2 → rdworks-0.45.1}/tests/test_web.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rdworks
3
- Version: 0.43.2
3
+ Version: 0.45.1
4
4
  Summary: Routine tasks built on RDKit and other tools
5
5
  Author-email: Sung-Hun Bae <sunghun.bae@gmail.com>
6
6
  Maintainer-email: Sung-Hun Bae <sunghun.bae@gmail.com>
@@ -1,4 +1,4 @@
1
- __version__ = '0.43.2'
1
+ __version__ = '0.45.1'
2
2
 
3
3
  from rdworks.conf import Conf
4
4
  from rdworks.mol import Mol
@@ -217,8 +217,12 @@ class Conf:
217
217
  # wbo = Wiberg_bond_orders,
218
218
  # geometry = rdmol_opt,
219
219
  # )
220
- self.rdmol = result.geometry
221
- PE_final = result.PE
220
+ try:
221
+ self.rdmol = result.geometry
222
+ PE_final = result.PE
223
+ retcode = 0
224
+ except:
225
+ retcode = 1
222
226
 
223
227
  elif calculator.lower() == 'MMFF94'.lower() or calculator.lower() == 'MMFF'.lower():
224
228
  retcode = Chem.rdForceFieldHelpers.MMFFOptimizeMolecule(self.rdmol,
@@ -673,6 +677,42 @@ class Conf:
673
677
  return '\n'.join(lines)
674
678
 
675
679
 
680
+ def to_turbomole_coord(self, bohr: bool = False) -> str:
681
+ """Returns TURBOMOLE coord file formatted strings.
682
+
683
+ Turbomole coord file format:
684
+
685
+ - It starts with the keyword `$coord`.
686
+ - Each line after the $coord line specifies an atom, consisting of:
687
+ - Three real numbers representing the Cartesian coordinates (x, y, z).
688
+ - A string for the element name.
689
+ - Optional: an "f" label at the end to indicate that the atom's coordinates are frozen during optimization.
690
+ - Coordinates can be given in Bohr (default), Ångström (`$coord angs`), or fractional coordinates (`$coord frac`).
691
+ - Optional data groups like periodicity (`$periodic`), lattice parameters (`$lattice`), and cell parameters (`$cell`) can also be included.
692
+ - Regarding precision:
693
+ The precision of the coordinates is crucial for accurate calculations, especially geometry optimizations.
694
+ Tools like the TURBOMOLEOptimizer might check for differences in atomic positions with a tolerance of 1e-13.
695
+
696
+ Args:
697
+ bohr (bool): whether to use Bohr units of the coordinates. Defaults to False.
698
+ Otherwise, Angstrom units will be used.
699
+
700
+ Returns:
701
+ str: TURBOMOLE coord formatted file.
702
+ """
703
+ if bohr:
704
+ lines = ["$coord"]
705
+ else:
706
+ lines = ["$coord angs"]
707
+
708
+ for (x, y, z), e in zip(self.positions(), self.symbols()):
709
+ lines.append(f"{x:20.15f} {y:20.15f} {z:20.15f} {e}")
710
+
711
+ lines.append("$end")
712
+
713
+ return '\n'.join(lines)
714
+
715
+
676
716
  def to_sdf(self, props:bool=True) -> str:
677
717
  """Returns the SDF-formatted strings.
678
718
 
@@ -0,0 +1,508 @@
1
+ import os
2
+ import resource
3
+ import subprocess
4
+ import json
5
+ import tempfile
6
+ import logging
7
+ import shutil
8
+ import re
9
+
10
+ from pathlib import Path
11
+ from types import SimpleNamespace
12
+
13
+ from rdkit import Chem
14
+ from rdkit.Geometry import Point3D
15
+
16
+
17
+ logger = logging.getLogger()
18
+
19
+ # In ASE, the default energy unit is eV (electron volt).
20
+ # It will be converted to kcal/mol
21
+ # CODATA 2018 energy conversion factor
22
+ hartree2ev = 27.211386245988
23
+ hartree2kcalpermol = 627.50947337481
24
+ ev2kcalpermol = 23.060547830619026
25
+
26
+
27
+ class GFN2xTB:
28
+ def __init__(self, molecule: Chem.Mol, ncores: int = 8):
29
+ assert isinstance(molecule, Chem.Mol), "molecule is not rdkit.Chem.Mol type"
30
+ assert molecule.GetConformer().Is3D(), "molecule is not a 3D conformer"
31
+ assert self.is_xtb_ready(), "xtb is not accessible"
32
+
33
+ self.rdmol = molecule
34
+ self.natoms = molecule.GetNumAtoms()
35
+ self.symbols = [ atom.GetSymbol() for atom in molecule.GetAtoms() ]
36
+ self.positions = molecule.GetConformer().GetPositions().tolist()
37
+
38
+ # Parallelisation
39
+ os.environ['OMP_STACKSIZE'] = '4G'
40
+ os.environ['OMP_NUM_THREADS'] = f'{ncores},1'
41
+ os.environ['OMP_MAX_ACTIVE_LEVELS'] = '1'
42
+ os.environ['MKL_NUM_THREADS'] = f'{ncores}'
43
+
44
+ # unlimit the system stack
45
+ resource.setrlimit(resource.RLIMIT_STACK, (resource.RLIM_INFINITY, resource.RLIM_INFINITY))
46
+
47
+
48
+ def version(self) -> str | None:
49
+ """Check xtb version.
50
+
51
+ Returns:
52
+ str | None: version statement.
53
+ """
54
+ cmd = ['xtb', '--version']
55
+ proc = subprocess.run(cmd, capture_output=True, text=True)
56
+ assert proc.returncode == 0, "GFN2xTB() Error: xtb not available"
57
+ for line in proc.stdout.split('\n'):
58
+ line = line.strip()
59
+ if 'version' in line:
60
+ return line
61
+
62
+ return None
63
+
64
+
65
+ def is_xtb_ready(self, cmd: str = 'xtb') -> bool:
66
+ """Check if xtb is available.
67
+
68
+ Returns:
69
+ bool: True if `xtb` is available, False otherwise.
70
+ """
71
+ return shutil.which(cmd) is not None
72
+
73
+
74
+ def is_cpx_ready(self, cmd: str = 'cpx') -> bool:
75
+ """Checks if the CPCM-X command-line tool, `cpx`, is accessible in the system.
76
+
77
+ Returns:
78
+ bool: True if the cpx is found, False otherwise.
79
+ """
80
+ return shutil.which(cmd) is not None
81
+
82
+
83
+ def is_cpcmx_ready(self) -> bool:
84
+ """Checks if xtb works with the `--cpcmx` option.
85
+
86
+ xtb distributed by the conda does not include CPCM-X function (as of June 17, 2025).
87
+ xtb installed from the github source codes by using meson and ninja includes it.
88
+
89
+ Returns:
90
+ bool: True if the --cpcmx option is working, False otherwise.
91
+ """
92
+ cmd = ['xtb', '--cpcmx']
93
+ proc = subprocess.run(cmd, capture_output=True, text=True)
94
+ # we are expecting an error because no input file is given
95
+ assert proc.returncode != 0
96
+ for line in proc.stdout.split('\n'):
97
+ line = line.strip()
98
+ if 'CPCM-X library was not included' in line:
99
+ return False
100
+
101
+ return True
102
+
103
+
104
+ def to_xyz(self) -> str:
105
+ """Export to XYZ formatted string.
106
+
107
+ Returns:
108
+ str: XYZ formatted string
109
+ """
110
+ lines = [f'{self.natoms}', ' ']
111
+ for e, (x, y, z) in zip(self.symbols, self.positions):
112
+ lines.append(f'{e:5} {x:23.14f} {y:23.14f} {z:23.14f}')
113
+
114
+ return '\n'.join(lines)
115
+
116
+
117
+ def to_turbomole_coord(self, bohr: bool = False) -> str:
118
+ """Returns TURBOMOLE coord file formatted strings.
119
+
120
+ Turbomole coord file format:
121
+
122
+ - It starts with the keyword `$coord`.
123
+ - Each line after the $coord line specifies an atom, consisting of:
124
+ - Three real numbers representing the Cartesian coordinates (x, y, z).
125
+ - A string for the element name.
126
+ - Optional: an "f" label at the end to indicate that the atom's coordinates are frozen during optimization.
127
+ - Coordinates can be given in Bohr (default), Ångström (`$coord angs`), or fractional coordinates (`$coord frac`).
128
+ - Optional data groups like periodicity (`$periodic`), lattice parameters (`$lattice`), and cell parameters (`$cell`) can also be included.
129
+ - Regarding precision:
130
+ The precision of the coordinates is crucial for accurate calculations, especially geometry optimizations.
131
+ Tools like the TURBOMOLEOptimizer might check for differences in atomic positions with a tolerance of 1e-13.
132
+
133
+ Args:
134
+ bohr (bool): whether to use Bohr units of the coordinates. Defaults to False.
135
+ Otherwise, Angstrom units will be used.
136
+
137
+ Returns:
138
+ str: TURBOMOLE coord formatted file.
139
+ """
140
+ if bohr:
141
+ lines = ["$coord"]
142
+ else:
143
+ lines = ["$coord angs"]
144
+
145
+ for (x, y, z), e in zip(self.positions, self.symbols):
146
+ lines.append(f"{x:20.15f} {y:20.15f} {z:20.15f} {e}")
147
+
148
+ lines.append("$end")
149
+
150
+ return '\n'.join(lines)
151
+
152
+
153
+ def load_xyz(self, geometry_input_path: Path) -> Chem.Mol:
154
+ """Load geometry.
155
+
156
+ Args:
157
+ geometry_input_path (Path): pathlib.Path to the xyz
158
+
159
+ Returns:
160
+ Chem.Mol: rdkit Chem.Mol object.
161
+ """
162
+ rdmol_opt = Chem.Mol(self.rdmol)
163
+ with open(geometry_input_path, 'r') as f:
164
+ for lineno, line in enumerate(f):
165
+ if lineno == 0:
166
+ assert int(line.strip()) == self.natoms
167
+ continue
168
+ elif lineno == 1: # comment or title
169
+ continue
170
+ (symbol, x, y, z) = line.strip().split()
171
+ x, y, z = float(x), float(y), float(z)
172
+ atom = rdmol_opt.GetAtomWithIdx(lineno-2)
173
+ assert symbol == atom.GetSymbol()
174
+ rdmol_opt.GetConformer().SetAtomPosition(atom.GetIdx(), Point3D(x, y, z))
175
+
176
+ return rdmol_opt
177
+
178
+
179
+ def load_wbo(self, wbo_path: Path) -> dict[tuple[int, int], float]:
180
+ """Load Wiberg bond order.
181
+
182
+ singlepoint() creates a wbo output file.
183
+
184
+ Args:
185
+ wbo_path (Path): path to the wbo file.
186
+
187
+ Returns:
188
+ dict(tuple[int, int], float): { (i, j) : wbo, ... } where i and j are atom indices for a bond.
189
+ """
190
+
191
+ with open(wbo_path, 'r') as f:
192
+ # Wiberg bond order (WBO)
193
+ Wiberg_bond_orders = {}
194
+ for line in f:
195
+ line = line.strip()
196
+ if line:
197
+ # wbo output has 1-based indices
198
+ (i, j, wbo) = line.split()
199
+ # changes to 0-based indices
200
+ i = int(i) - 1
201
+ j = int(j) - 1
202
+ # wbo ouput indices are ascending order
203
+ ij = (i, j) if i < j else (j, i)
204
+ Wiberg_bond_orders[ij] = float(wbo)
205
+
206
+ return Wiberg_bond_orders
207
+
208
+
209
+ def cpx(self, verbose: bool = False) -> float | None:
210
+ """Runs cpx and returns Gsolv (kcal/mol)
211
+
212
+ Warning:
213
+ Solvation energy obtained from `xtb --cpcmx water` differs from
214
+ `cpx --solvent water` (difference between gas.out and solv.out in terms of total energy).
215
+ There are other correction terms not clearly defined in the output files.
216
+ So, this method is not reliable and should be discarded
217
+
218
+ Returns:
219
+ float or None: Gsolv energy in kcal/mol or None.
220
+ """
221
+ with tempfile.TemporaryDirectory() as temp_dir: # tmpdir is a string
222
+ workdir = Path(temp_dir)
223
+ if verbose:
224
+ logger.info(f'xtb.cpx workdir= {temp_dir}')
225
+
226
+ geometry_input_path = workdir / 'coord'
227
+ geometry_output_path = workdir / 'xtbtopo.mol'
228
+ gas_out_path = workdir / 'gas.out'
229
+ solv_out_path = workdir / 'solv.out'
230
+ wbo_path = workdir / 'wbo'
231
+
232
+ with open(geometry_input_path, 'w') as f:
233
+ f.write(self.to_turbomole_coord())
234
+
235
+ cmd = ['cpx']
236
+ options = ['--solvent', 'water']
237
+
238
+ proc = subprocess.run(cmd + options, cwd=temp_dir, capture_output=True, text=True)
239
+ # cpx creates the following files:
240
+ # charges gas.energy solute_sigma.txt solvent_sigma.txt xtbtopo.mol
241
+ # coord gas.out solute_sigma3.txt solvent_sigma3.txt
242
+ # error solute.cosmo solv.out wbo
243
+
244
+ # example of solv.out
245
+ # :::::::::::::::::::::::::::::::::::::::::::::::::::::
246
+ # :: SUMMARY ::
247
+ # :::::::::::::::::::::::::::::::::::::::::::::::::::::
248
+ # :: total energy -119.507131639760 Eh ::
249
+ # :: w/o Gsasa/hb/shift -119.494560363045 Eh ::
250
+ # :: gradient norm 0.084154442395 Eh/a0 ::
251
+ # :: HOMO-LUMO gap 2.966157362876 eV ::
252
+ # ::.................................................::
253
+ # :: SCC energy -121.121278922798 Eh ::
254
+ # :: -> isotropic ES 0.180705208303 Eh ::
255
+ # :: -> anisotropic ES 0.003924951393 Eh ::
256
+ # :: -> anisotropic XC 0.040710819025 Eh ::
257
+ # :: -> dispersion -0.088336282215 Eh ::
258
+ # :: -> Gsolv -0.039236762590 Eh ::
259
+ # :: -> Gelec -0.026665485874 Eh ::
260
+ # :: -> Gsasa -0.012571276716 Eh ::
261
+ # :: -> Ghb 0.000000000000 Eh ::
262
+ # :: -> Gshift 0.000000000000 Eh ::
263
+ # :: repulsion energy 1.614147283037 Eh ::
264
+ # :: add. restraining 0.000000000000 Eh ::
265
+ # :: total charge -0.000000000000 e ::
266
+ # :::::::::::::::::::::::::::::::::::::::::::::::::::::
267
+
268
+ # example gas.out
269
+ # :::::::::::::::::::::::::::::::::::::::::::::::::::::
270
+ # :: SUMMARY ::
271
+ # :::::::::::::::::::::::::::::::::::::::::::::::::::::
272
+ # :: total energy -119.473726280382 Eh ::
273
+ # :: gradient norm 0.085445002241 Eh/a0 ::
274
+ # :: HOMO-LUMO gap 2.562893747102 eV ::
275
+ # ::.................................................::
276
+ # :: SCC energy -121.087873563419 Eh ::
277
+ # :: -> isotropic ES 0.152557320965 Eh ::
278
+ # :: -> anisotropic ES 0.007343156635 Eh ::
279
+ # :: -> anisotropic XC 0.039625076440 Eh ::
280
+ # :: -> dispersion -0.088605122696 Eh ::
281
+ # :: repulsion energy 1.614147283037 Eh ::
282
+ # :: add. restraining 0.000000000000 Eh ::
283
+ # :: total charge -0.000000000000 e ::
284
+ # :::::::::::::::::::::::::::::::::::::::::::::::::::::
285
+
286
+ if proc.returncode == 0:
287
+ total_energy_solv = None
288
+ total_energy_gas = None
289
+
290
+ with open(solv_out_path, 'r') as f:
291
+ for line in f:
292
+ if 'total energy' in line:
293
+ m = re.search(r"total energy\s+(?P<solv>[-+]?\d*\.?\d+)\s+Eh", line)
294
+ total_energy_solv = float(m.group('solv'))
295
+ with open(gas_out_path, 'r') as f:
296
+ for line in f:
297
+ if 'total energy' in line:
298
+ m = re.search(r"total energy\s+(?P<gas>[-+]?\d*.?\d+)\s+Eh", line)
299
+ total_energy_gas = float(m.group('gas'))
300
+
301
+ if total_energy_solv and total_energy_gas:
302
+ return (total_energy_solv - total_energy_gas) * hartree2kcalpermol
303
+
304
+ return None
305
+
306
+
307
+ def singlepoint(self, water: str | None = None, verbose: bool = False) -> SimpleNamespace:
308
+ """Calculate single point energy.
309
+
310
+ Total energy from xtb output in atomic units (Eh, hartree) is converted to kcal/mol.
311
+
312
+ Args:
313
+ water (str, optional) : water solvation model (choose 'gbsa' or 'alpb')
314
+ alpb: ALPB solvation model (Analytical Linearized Poisson-Boltzmann).
315
+ gbsa: generalized Born (GB) model with Surface Area contributions.
316
+
317
+ Returns:
318
+ SimpleNamespace(PE(total energy in kcal/mol), charges, wbo)
319
+ """
320
+
321
+ with tempfile.TemporaryDirectory() as temp_dir: # tmpdir is a string
322
+ workdir = Path(temp_dir)
323
+ if verbose:
324
+ logger.info(f'xtb.singlepoint workdir= {temp_dir}')
325
+
326
+ geometry_input_path = workdir / 'geometry.xyz'
327
+ xtbout_path = workdir / 'xtbout.json'
328
+ stdout_path = workdir / 'fort.6'
329
+ wbo_path = workdir / 'wbo'
330
+ geometry_output_path = workdir / 'xtbtopo.mol'
331
+
332
+ with open(geometry_input_path, 'w') as geometry:
333
+ geometry.write(self.to_xyz())
334
+
335
+ cmd = ['xtb', geometry_input_path.as_posix()]
336
+
337
+ options = ['--gfn', '2', '--json']
338
+
339
+ Gsolv = None
340
+
341
+ if water is not None and isinstance(water, str):
342
+ if water == 'gbsa':
343
+ options += ['--gbsa', 'H2O']
344
+ # it does not provide Gsolv contribution to the total energy
345
+ elif water == 'alpb':
346
+ options += ['--alpb', 'water']
347
+ # it does not provide Gsolv contribution to the total energy
348
+ elif water == 'cpcmx' and self.is_cpcmx_ready():
349
+ options += ['--cpcmx', 'water']
350
+
351
+ # 'xtbout.json', 'xtbrestart', 'xtbtopo.mol', 'charges', and 'wbo' files will be
352
+ # created in the current working directory.
353
+ proc = subprocess.run(cmd + options, cwd=temp_dir, capture_output=True, text=True)
354
+
355
+ # if proc.returncode == 0:
356
+ # print("Standard Output:")
357
+ # print(proc.stdout)
358
+ # else:
359
+ # print("Error:")
360
+ # print(proc.stderr)
361
+
362
+ if proc.returncode == 0:
363
+ if xtbout_path.is_file():
364
+ with open(xtbout_path, 'r') as f:
365
+ datadict = json.load(f) # takes the file object as input
366
+
367
+ if (water is not None) and (Gsolv is None) and stdout_path.is_file():
368
+ # Free Energy contributions: [Eh] [kcal/mol]
369
+ # -------------------------------------------------------------------------
370
+ # solvation free energy (dG_solv): -0.92587E-03 -0.58099
371
+ # gas phase energy (E) -0.52068E+01
372
+ # -------------------------------------------------------------------------
373
+ # total free energy (dG) -0.52077E+01
374
+ with open(stdout_path, 'r') as f:
375
+ for line in f:
376
+ if 'solvation free energy' in line:
377
+ m = re.search(r"solvation free energy \(dG_solv\)\:\s+[-+]?\d*\.?\d+E[-+]?\d*\s+(?P<kcalpermol>[-+]?\d*\.?\d+)", line)
378
+ Gsolv = float(m.group('kcalpermol'))
379
+
380
+ Wiberg_bond_orders = self.load_wbo(wbo_path)
381
+
382
+ return SimpleNamespace(
383
+ PE = datadict['total energy'] * hartree2kcalpermol,
384
+ Gsolv = Gsolv,
385
+ charges = datadict['partial charges'],
386
+ wbo = Wiberg_bond_orders,
387
+ )
388
+
389
+ # something went wrong if it reaches here
390
+ return SimpleNamespace()
391
+
392
+
393
+
394
+ def optimize(self, water: str | None = None, verbose: bool = False) -> SimpleNamespace:
395
+ """Optimize geometry.
396
+
397
+ Fortran runtime errror:
398
+ At line 852 of file ../src/optimizer.f90 (unit = 6, file = 'stdout')
399
+ Fortran runtime error: Missing comma between descriptors
400
+ (1x,"("f7.2"%)")
401
+ ^
402
+ Error termination.
403
+
404
+ Args:
405
+ water (str, optional) : water solvation model (choose 'gbsa' or 'alpb')
406
+ alpb: ALPB solvation model (Analytical Linearized Poisson-Boltzmann).
407
+ gbsa: generalized Born (GB) model with Surface Area contributions.
408
+
409
+ Returns:
410
+ (total energy in kcal/mol, optimized geometry)
411
+ """
412
+ with tempfile.TemporaryDirectory() as temp_dir: # tmpdir is a string
413
+ workdir = Path(temp_dir)
414
+ if verbose:
415
+ logger.info(f'xtb.optimize workdir= {temp_dir}')
416
+
417
+ geometry_input_path = workdir / 'geometry.xyz'
418
+ xtbout_path = workdir / 'xtbout.json'
419
+ geometry_output_path = workdir / 'xtbopt.xyz'
420
+ wbo_path = workdir / 'wbo'
421
+
422
+ with open(geometry_input_path, 'w') as geometry:
423
+ geometry.write(self.to_xyz())
424
+
425
+ cmd = ['xtb', geometry_input_path.as_posix()]
426
+
427
+ options = ['--opt', '--gfn', '2', '--json']
428
+
429
+ if water is not None and isinstance(water, str):
430
+ if water == 'gbsa':
431
+ options += ['--gbsa', 'H2O']
432
+ elif water == 'alpb':
433
+ options += ['--alpb', 'water']
434
+ elif water == 'cpcmx':
435
+ logger.warning('optimize with --cpcmx option is not implemented in xtb yet')
436
+
437
+ proc = subprocess.run(cmd + options, cwd=temp_dir, capture_output=True, text=True)
438
+
439
+ if proc.returncode == 0 and xtbout_path.is_file():
440
+ with open(xtbout_path, 'r') as f:
441
+ datadict = json.load(f) # takes the file object as input
442
+
443
+ Wiberg_bond_orders = self.load_wbo(wbo_path)
444
+ rdmol_opt = self.load_xyz(geometry_output_path)
445
+
446
+ return SimpleNamespace(
447
+ PE = datadict['total energy'] * hartree2kcalpermol,
448
+ charges = datadict['partial charges'],
449
+ wbo = Wiberg_bond_orders,
450
+ geometry = rdmol_opt,
451
+ )
452
+
453
+ # something went wrong if it reaches here
454
+ return SimpleNamespace()
455
+
456
+
457
+ def esp(self, water: str | None = None, verbose: bool = False) -> None:
458
+ """Calculate electrostatic potential
459
+
460
+ Example:
461
+ v = py3Dmol.view()
462
+ v.addVolumetricData(dt,
463
+ "cube.gz", {
464
+ 'isoval': 0.005,
465
+ 'smoothness': 2,
466
+ 'opacity':.9,
467
+ 'voldata': esp,
468
+ 'volformat': 'cube.gz',
469
+ 'volscheme': {
470
+ 'gradient':'rwb',
471
+ 'min':-.1,
472
+ 'max':.1,
473
+ }
474
+ });
475
+ v.addModel(dt,'cube')
476
+ v.setStyle({'stick':{}})
477
+ v.zoomTo()
478
+ v.show()
479
+ """
480
+ with tempfile.TemporaryDirectory() as temp_dir: # tmpdir is a string
481
+ workdir = Path(temp_dir)
482
+ if verbose:
483
+ logger.info(f'xtb.optimize workdir= {temp_dir}')
484
+
485
+ geometry_input_path = workdir / 'geometry.xyz'
486
+ xtb_esp_dat = workdir / 'xtb_esp_dat'
487
+
488
+ with open(geometry_input_path, 'w') as geometry:
489
+ geometry.write(self.to_xyz())
490
+
491
+ cmd = ['xtb', geometry_input_path.as_posix()]
492
+
493
+ options = ['--esp', '--gfn', '2']
494
+
495
+ if water is not None and isinstance(water, str):
496
+ if water == 'gbsa':
497
+ options += ['--gbsa', 'H2O']
498
+ elif water == 'alpb':
499
+ options += ['--alpb', 'water']
500
+
501
+ proc = subprocess.run(cmd + options, cwd=temp_dir, capture_output=True, text=True)
502
+ # output files: xtb_esp.cosmo, xtb_esp.dat, xtb_esp_profile.dat
503
+
504
+ if proc.returncode == 0 and xtb_esp_dat.is_file():
505
+ with open(xtb_esp_dat, 'r') as f:
506
+ pass
507
+
508
+ return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rdworks
3
- Version: 0.43.2
3
+ Version: 0.45.1
4
4
  Summary: Routine tasks built on RDKit and other tools
5
5
  Author-email: Sung-Hun Bae <sunghun.bae@gmail.com>
6
6
  Maintainer-email: Sung-Hun Bae <sunghun.bae@gmail.com>
@@ -80,4 +80,4 @@ tests/test_gypsumdl.py
80
80
  tests/test_iupac_name.py
81
81
  tests/test_nn_xtb.py
82
82
  tests/test_web.py
83
- tests/test_xtb_wrapper.py
83
+ tests/test_xtb.py
@@ -0,0 +1,72 @@
1
+ from rdworks import Mol
2
+ from rdworks.xtb.wrapper import GFN2xTB
3
+ from rdworks.testdata import drugs
4
+
5
+ from pathlib import Path
6
+
7
+ import re
8
+ import shutil
9
+
10
+ # In ASE, the default energy unit is eV (electron volt).
11
+ # It will be converted to kcal/mol
12
+ # CODATA 2018 energy conversion factor
13
+ hartree2ev = 27.211386245988
14
+ hartree2kcalpermol = 627.50947337481
15
+ ev2kcalpermol = 23.060547830619026
16
+
17
+
18
+ datadir = Path(__file__).parent.resolve() / "data"
19
+ workdir = Path(__file__).parent.resolve() / "outfiles"
20
+
21
+ workdir.mkdir(exist_ok=True)
22
+
23
+ name = 'Atorvastatin'
24
+ testmol = Mol(drugs[name], name).make_confs(n=50).optimize_confs()
25
+ testmol = testmol.drop_confs(similar=True, verbose=True).sort_confs()
26
+
27
+
28
+ def test_singlepoint():
29
+ mol = testmol.copy()
30
+
31
+ print("number of conformers=", mol.count())
32
+ print("number of atoms=", mol.confs[0].natoms)
33
+
34
+ gfn2xtb = GFN2xTB(mol.confs[0].rdmol, ncores=8)
35
+
36
+ print("GFN2xTB.version():", gfn2xtb.version())
37
+ print()
38
+
39
+ print("GFN2xTB.singlepoint()")
40
+ outdict = gfn2xtb.singlepoint()
41
+ print(outdict)
42
+ print()
43
+
44
+ print("GFN2xTB.singlepoint(water='gbsa')")
45
+ outdict = gfn2xtb.singlepoint(water='gbsa')
46
+ print(outdict)
47
+ print()
48
+
49
+ print("GFN2xTB.singlepoint(water='alpb')")
50
+ outdict = gfn2xtb.singlepoint(water='alpb')
51
+ print(outdict)
52
+ print()
53
+
54
+ print("GFN2xTB.singlepoint(water='cpcmx')")
55
+ print("GFN2xTB.is_cpcmx_ready()", gfn2xtb.is_cpcmx_ready())
56
+ outdict = gfn2xtb.singlepoint(water='cpcmx')
57
+ print(outdict)
58
+ print()
59
+
60
+
61
+ def test_optimize():
62
+ mol = testmol.copy()
63
+ print("number of conformers=", mol.count())
64
+ print("GFN2xTB.optimize()")
65
+ outdict = GFN2xTB(mol.confs[0].rdmol, ncores=8).optimize(verbose=True)
66
+ print(outdict)
67
+ print()
68
+
69
+
70
+ if __name__ == '__main__':
71
+ test_singlepoint()
72
+ test_optimize()