rdworks 0.47.1__tar.gz → 0.49.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rdworks-0.47.1 → rdworks-0.49.1}/PKG-INFO +1 -1
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/__init__.py +9 -8
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/ionized.py +87 -60
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/mol.py +31 -20
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/xtb/wrapper.py +21 -17
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks.egg-info/PKG-INFO +1 -1
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks.egg-info/SOURCES.txt +3 -5
- {rdworks-0.47.1 → rdworks-0.49.1}/tests/test_basics.py +1 -99
- rdworks-0.49.1/tests/test_ionized.py +66 -0
- rdworks-0.47.1/tests/test_decimals.py → rdworks-0.49.1/tests/test_round.py +79 -71
- rdworks-0.49.1/tests/test_torsion.py +98 -0
- rdworks-0.49.1/tests/test_xtb.py +160 -0
- rdworks-0.47.1/tests/test_gypsumdl.py +0 -15
- rdworks-0.47.1/tests/test_iupac_name.py +0 -39
- rdworks-0.47.1/tests/test_nn_xtb.py +0 -91
- rdworks-0.47.1/tests/test_web.py +0 -378
- rdworks-0.47.1/tests/test_xtb.py +0 -72
- {rdworks-0.47.1 → rdworks-0.49.1}/LICENSE +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/README.md +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/pyproject.toml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/setup.cfg +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/autograph/__init__.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/autograph/autograph.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/autograph/centroid.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/autograph/dynamictreecut.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/autograph/nmrclust.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/autograph/rckmeans.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/bitqt/__init__.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/bitqt/bitqt.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/conf.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/descriptor.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/display.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/matchedseries.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/mollibr.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/pka.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/Asinex_fragment.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/Astex_RO3.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/Baell2010_PAINS/Baell2010A.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/Baell2010_PAINS/Baell2010B.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/Baell2010_PAINS/Baell2010C.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/Baell2010_PAINS/PAINS-less-than-015-hits.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/Baell2010_PAINS/PAINS-less-than-150-hits.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/Baell2010_PAINS/PAINS-more-than-150-hits.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/Baell2010_PAINS/makexml.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/Brenk2008_Dundee/makexml.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/CNS.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/ChEMBL_Walters/BMS.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/ChEMBL_Walters/Dundee.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/ChEMBL_Walters/Glaxo.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/ChEMBL_Walters/Inpharmatica.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/ChEMBL_Walters/LINT.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/ChEMBL_Walters/MLSMR.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/ChEMBL_Walters/PAINS.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/ChEMBL_Walters/SureChEMBL.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/ChEMBL_Walters/makexml.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999Acid.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999Base.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999ElPh.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999NuPh.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/Hann1999_Glaxo/makexml.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/Kazius2005/Kazius2005.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/Kazius2005/makexml.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/ZINC_druglike.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/ZINC_fragment.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/ZINC_leadlike.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/fragment.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/ionized/simple_smarts_pattern.csv +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/ionized/smarts_pattern.csv +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/misc/makexml.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/misc/reactive-part-2.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/misc/reactive-part-3.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/predefined/misc/reactive.xml +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/readin.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/rgroup.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/scaffold.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/std.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/stereoisomers.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/tautomers.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/testdata.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/torsion.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/units.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/utils.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/xml.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks/xtb/__init__.py +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks.egg-info/dependency_links.txt +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks.egg-info/requires.txt +0 -0
- {rdworks-0.47.1 → rdworks-0.49.1}/src/rdworks.egg-info/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
__version__ = '0.
|
1
|
+
__version__ = '0.49.1'
|
2
2
|
|
3
3
|
from rdworks.conf import Conf
|
4
4
|
from rdworks.mol import Mol
|
@@ -25,10 +25,11 @@ __rdkit_version__ = rdkit.rdBase.rdkitVersion
|
|
25
25
|
|
26
26
|
rdkit_logger = rdkit.RDLogger.logger().setLevel(rdkit.RDLogger.CRITICAL)
|
27
27
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
28
|
+
logger = logging.getLogger(__name__)
|
29
|
+
logger.setLevel(logging.INFO) # level: DEBUG < INFO < WARNING < ERROR < CRITICAL
|
30
|
+
|
31
|
+
logger_stream = logging.StreamHandler() # sys.stdout or sys.stderr
|
32
|
+
logger_format = logging.Formatter(fmt='%(asctime)s %(levelname)s %(message)s',
|
33
|
+
datefmt='%Y-%m-%d %H:%M:%S')
|
34
|
+
logger_stream.setFormatter(logger_format)
|
35
|
+
logger.addHandler(logger_stream)
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import importlib.resources
|
2
|
+
from types import SimpleNamespace
|
2
3
|
import pandas as pd
|
3
4
|
|
4
5
|
from rdkit import Chem
|
@@ -6,6 +7,11 @@ from rdkit import Chem
|
|
6
7
|
# adapted from https://github.com/dptech-corp/Uni-pKa/enumerator
|
7
8
|
|
8
9
|
class IonizedStates:
|
10
|
+
"""Knowledge-based enumeration of (de)protonated states"""
|
11
|
+
|
12
|
+
smarts_path = importlib.resources.files('rdworks.predefined.ionized')
|
13
|
+
ionization_patterns = pd.read_csv(smarts_path / 'simple_smarts_pattern.csv')
|
14
|
+
|
9
15
|
# Unreasonable chemical structures
|
10
16
|
unreasonable_patterns = [
|
11
17
|
Chem.MolFromSmarts(s) for s in [
|
@@ -31,67 +37,38 @@ class IonizedStates:
|
|
31
37
|
"[N+1](=O)-[O]-[H]",
|
32
38
|
]]
|
33
39
|
|
34
|
-
smarts_path = importlib.resources.files('rdworks.predefined.ionized')
|
35
|
-
protonation_patterns = pd.read_csv(smarts_path / 'simple_smarts_pattern.csv')
|
36
40
|
|
37
|
-
def __init__(self, smiles:str):
|
41
|
+
def __init__(self, smiles: str, charge_min: int = -2, charge_max: int = 2):
|
38
42
|
self.smiles = Chem.CanonSmiles(smiles)
|
43
|
+
self.charge_max = charge_max
|
44
|
+
self.charge_min = charge_min
|
45
|
+
|
39
46
|
self.rdmol = Chem.MolFromSmiles(self.smiles)
|
40
47
|
self.rdmol_H = Chem.AddHs(self.rdmol)
|
41
48
|
self.charge = Chem.GetFormalCharge(self.rdmol_H)
|
42
|
-
|
43
|
-
self.charge_min = -2
|
49
|
+
|
44
50
|
# initial states
|
45
51
|
self.states = {self.smiles : (self.rdmol_H, self.charge)}
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
self.protonate(self.smiles)
|
52
|
+
|
53
|
+
# initial ionization sites
|
54
|
+
self.sites = {self.smiles: self.set_ionization_sites(self.smiles)}
|
50
55
|
|
51
|
-
|
52
|
-
|
53
|
-
|
56
|
+
# pKa pairs:
|
57
|
+
# HA(acid) + H2O == A-(base) + H3O+ or HA+(acid) + H2O == A(base) + H3O+
|
58
|
+
self.pairs = []
|
54
59
|
|
55
|
-
|
56
|
-
def get_states_by_charge(self) -> dict:
|
60
|
+
# iteratively build an ensemble of ionized states
|
57
61
|
self.ensemble()
|
58
|
-
data = {}
|
59
|
-
for smiles, (romol, charge) in self.states.items():
|
60
|
-
if charge in data:
|
61
|
-
data[charge].append(smiles)
|
62
|
-
else:
|
63
|
-
data[charge] = [smiles]
|
64
62
|
|
65
|
-
return data
|
66
|
-
|
67
|
-
def get_states(self) -> list:
|
68
|
-
return [smiles for smiles in self.states]
|
69
|
-
|
70
|
-
|
71
|
-
def get_states_mol(self) -> list[Chem.Mol]:
|
72
|
-
return [romol for smiles, (romol, charge) in self.states.items()]
|
73
|
-
|
74
|
-
|
75
|
-
def get_num_states(self) -> int:
|
76
|
-
return len(self.states)
|
77
63
|
|
78
64
|
|
79
65
|
@staticmethod
|
80
|
-
def
|
81
|
-
Chem.SanitizeMol(rdmol)
|
82
|
-
rdmol = Chem.MolFromSmiles(Chem.MolToSmiles(rdmol))
|
83
|
-
rdmol_H = Chem.AddHs(rdmol)
|
84
|
-
rdmol = Chem.RemoveHs(rdmol_H)
|
85
|
-
return Chem.CanonSmiles(Chem.MolToSmiles(rdmol))
|
86
|
-
|
87
|
-
|
88
|
-
@staticmethod
|
89
|
-
def set_protonation_sites(smiles:str) -> tuple:
|
66
|
+
def set_ionization_sites(smiles: str) -> tuple:
|
90
67
|
subject = Chem.MolFromSmiles(smiles)
|
91
68
|
subject = Chem.AddHs(subject)
|
92
69
|
charge = Chem.GetFormalCharge(subject)
|
93
70
|
indices = [] # atom indices of protonation/deprotonation site(s)
|
94
|
-
for i, name, smarts, smarts_index, acid_or_base in IonizedStates.
|
71
|
+
for i, name, smarts, smarts_index, acid_or_base in IonizedStates.ionization_patterns.itertuples():
|
95
72
|
pattern = Chem.MolFromSmarts(smarts)
|
96
73
|
matches = subject.GetSubstructMatches(pattern)
|
97
74
|
# returns a list of tuples, where each tuple contains the indices
|
@@ -100,21 +77,34 @@ class IonizedStates:
|
|
100
77
|
if len(matches) > 0:
|
101
78
|
smarts_index = int(smarts_index)
|
102
79
|
indices += [(match[smarts_index], acid_or_base) for match in matches]
|
80
|
+
|
103
81
|
return (list(set(indices)), subject, charge)
|
82
|
+
|
83
|
+
|
84
|
+
@staticmethod
|
85
|
+
def clean_smiles(rdmol: Chem.Mol) -> str:
|
86
|
+
Chem.SanitizeMol(rdmol)
|
87
|
+
rdmol = Chem.MolFromSmiles(Chem.MolToSmiles(rdmol))
|
88
|
+
rdmol_H = Chem.AddHs(rdmol)
|
89
|
+
rdmol = Chem.RemoveHs(rdmol_H)
|
90
|
+
return Chem.CanonSmiles(Chem.MolToSmiles(rdmol))
|
104
91
|
|
105
92
|
|
106
93
|
@staticmethod
|
107
|
-
def reasonable(romol:Chem.Mol) -> bool:
|
94
|
+
def reasonable(romol: Chem.Mol) -> bool:
|
108
95
|
return all([len(romol.GetSubstructMatches(p)) == 0 for p in IonizedStates.unreasonable_patterns])
|
96
|
+
|
109
97
|
|
110
|
-
|
111
|
-
def protonate(self, smiles:str) -> int:
|
98
|
+
def ionize(self, smiles: str | None = None) -> int:
|
112
99
|
num_added_states = 0
|
113
100
|
|
114
|
-
if smiles
|
115
|
-
|
101
|
+
if smiles is None:
|
102
|
+
smiles = self.smiles
|
103
|
+
|
104
|
+
if smiles not in self.sites:
|
105
|
+
self.sites[smiles] = self.set_ionization_sites(smiles)
|
116
106
|
|
117
|
-
(indices, subject, charge) = self.
|
107
|
+
(indices, subject, charge) = self.sites[smiles]
|
118
108
|
|
119
109
|
if (charge >= self.charge_max) or (charge <= self.charge_min):
|
120
110
|
# formal charge will be increased or decreased by protonation/deprotonation
|
@@ -140,6 +130,9 @@ class IonizedStates:
|
|
140
130
|
edmol.RemoveAtom(bonded_H_indices[0])
|
141
131
|
|
142
132
|
elif acid_or_base == 'B': # protonate
|
133
|
+
# note that protonation at tertiary nitrogen may results in stereoisomers
|
134
|
+
# current implementation ignores the stereochemistry
|
135
|
+
# use rdworks.complete_stereoisomers() function to complete the stereoisomers
|
143
136
|
B = edmol.GetAtomWithIdx(i)
|
144
137
|
assert B.GetAtomicNum() > 1, f"Cannot protonate an atom (idx={i}; {B.GetAtomicNum()})"
|
145
138
|
charge = B.GetFormalCharge() + 1
|
@@ -148,23 +141,57 @@ class IonizedStates:
|
|
148
141
|
B.SetNumExplicitHs(nH+1)
|
149
142
|
edmol = Chem.AddHs(edmol)
|
150
143
|
|
151
|
-
#
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
if self.reasonable(
|
157
|
-
if
|
144
|
+
# clean up and save SMILES
|
145
|
+
ionized_smiles = IonizedStates.clean_smiles(edmol)
|
146
|
+
ionized_mol = Chem.MolFromSmiles(ionized_smiles)
|
147
|
+
ionized_mol = Chem.AddHs(ionized_mol)
|
148
|
+
ionized_charge = Chem.GetFormalCharge(ionized_mol)
|
149
|
+
if self.reasonable(ionized_mol):
|
150
|
+
if ionized_smiles in self.states:
|
158
151
|
continue
|
159
|
-
self.states[
|
152
|
+
self.states[ionized_smiles] = (ionized_mol, ionized_charge)
|
160
153
|
num_added_states += 1
|
154
|
+
|
155
|
+
# store acid-base pair information for pKa
|
156
|
+
if acid_or_base == 'A':
|
157
|
+
self.pairs.append((i, smiles, ionized_smiles))
|
158
|
+
elif acid_or_base == 'B':
|
159
|
+
self.pairs.append((i, ionized_smiles, smiles))
|
161
160
|
|
162
161
|
return num_added_states
|
163
|
-
|
162
|
+
|
164
163
|
|
165
164
|
def ensemble(self) -> None:
|
165
|
+
# populate initial states
|
166
|
+
self.ionize()
|
167
|
+
|
168
|
+
# propagate
|
166
169
|
num_added_states = None
|
167
170
|
while num_added_states is None or num_added_states > 0:
|
168
|
-
states = self.states.copy()
|
171
|
+
states = self.states.copy() # dictionary
|
172
|
+
# self.ionize(smiles) below will change self.states
|
173
|
+
# so we cannot iterate self.states. Instead we will
|
174
|
+
# iterate over a copy of the self.states
|
169
175
|
for smiles in states:
|
170
|
-
num_added_states = self.
|
176
|
+
num_added_states = self.ionize(smiles)
|
177
|
+
|
178
|
+
|
179
|
+
def count(self) -> int:
|
180
|
+
return len(self.states)
|
181
|
+
|
182
|
+
|
183
|
+
def get_sites(self) -> dict:
|
184
|
+
return self.sites
|
185
|
+
|
186
|
+
|
187
|
+
def get_smiles(self) -> list[str]:
|
188
|
+
return [smiles for smiles in self.states]
|
189
|
+
|
190
|
+
|
191
|
+
def get_rdmol(self) -> list[Chem.Mol]:
|
192
|
+
return [romol for smiles, (romol, charge) in self.states.items()]
|
193
|
+
|
194
|
+
|
195
|
+
def get_pairs(self) -> list:
|
196
|
+
return self.pairs
|
197
|
+
|
@@ -46,7 +46,8 @@ from rdworks.display import render_svg, render_png
|
|
46
46
|
|
47
47
|
from scour.scour import scourString
|
48
48
|
|
49
|
-
|
49
|
+
|
50
|
+
logger = logging.getLogger(__name__)
|
50
51
|
|
51
52
|
|
52
53
|
class Mol:
|
@@ -328,11 +329,7 @@ class Mol:
|
|
328
329
|
return self
|
329
330
|
|
330
331
|
|
331
|
-
def make_confs(self,
|
332
|
-
n:int = 50,
|
333
|
-
method:str = 'ETKDG',
|
334
|
-
calculator:str | Callable = 'MMFF94',
|
335
|
-
) -> Self:
|
332
|
+
def make_confs(self, n: int = 50, method: str = 'ETKDG', **kwargs) -> Self:
|
336
333
|
"""Generates 3D conformers.
|
337
334
|
|
338
335
|
Args:
|
@@ -348,12 +345,8 @@ class Mol:
|
|
348
345
|
High-Quality Conformer Generation with CONFORGE: Algorithm and Performance Assessment.
|
349
346
|
J. Chem. Inf. Model. 63, 5549-5570 (2023).
|
350
347
|
"""
|
351
|
-
|
352
|
-
|
353
|
-
# rot_bonds = rd_descriptor_f['RotBonds'](self.rdmol)
|
354
|
-
# n = min(max(1, int(8.481 * (rot_bonds **1.642))), 1000)
|
355
|
-
# n = max(1, math.ceil(n * n_rel)) # ensures that n is at least 1
|
356
|
-
|
348
|
+
verbose = kwargs.get('verbose', False)
|
349
|
+
|
357
350
|
self.confs = []
|
358
351
|
|
359
352
|
if method.upper() == 'ETKDG':
|
@@ -426,6 +419,7 @@ class Mol:
|
|
426
419
|
os.remove(tmp_filename)
|
427
420
|
|
428
421
|
# energy evaluations for ranking
|
422
|
+
calculator = kwargs.get('calculator', 'MMFF94')
|
429
423
|
for conf in self.confs:
|
430
424
|
conf.potential_energy(calculator) # default: MMFF94
|
431
425
|
|
@@ -436,8 +430,15 @@ class Mol:
|
|
436
430
|
for conf in self.confs:
|
437
431
|
conf.props.update({"E_rel(kcal/mol)": conf.props[sort_by] - lowest_energy})
|
438
432
|
|
433
|
+
# rename conformers
|
439
434
|
self = self.rename()
|
440
435
|
|
436
|
+
if verbose:
|
437
|
+
rot_bonds = rd_descriptor_f['RotBonds'](self.rdmol)
|
438
|
+
nrb_suggested = int(8.481 * (rot_bonds **1.642))
|
439
|
+
logger.info(f"make_confs() rotatable bonds {rot_bonds} (suggested conformers {nrb_suggested}) generated {self.count()}")
|
440
|
+
logger.info(f"make_confs() updated potential energies E_tot(kcal/mol) and E_rel(kcal/mol) by {calculator}")
|
441
|
+
|
441
442
|
return self
|
442
443
|
|
443
444
|
|
@@ -499,10 +500,15 @@ class Mol:
|
|
499
500
|
Returns:
|
500
501
|
Self: modified self.
|
501
502
|
"""
|
503
|
+
verbose = kwargs.get('verbose', False)
|
504
|
+
|
502
505
|
if calculator is not None:
|
503
506
|
# re-calculate potential energies
|
507
|
+
if verbose :
|
508
|
+
logger.info(f"sort_cons() calculate potential energy by {calculator}")
|
509
|
+
|
504
510
|
for conf in self.confs:
|
505
|
-
PE = conf.potential_energy(calculator
|
511
|
+
PE = conf.potential_energy(calculator, **kwargs) # sets `E_tot(kcal/mol)`
|
506
512
|
|
507
513
|
if all(['E_tot(kcal/mol)' in conf.props for conf in self.confs]):
|
508
514
|
sort_by = 'E_tot(kcal/mol)'
|
@@ -767,7 +773,7 @@ class Mol:
|
|
767
773
|
cluster: bool | None =None,
|
768
774
|
k: int | None = None,
|
769
775
|
window: float | None = None,
|
770
|
-
|
776
|
+
**kwargs) -> Self:
|
771
777
|
"""Drop conformers that meet some condition(s).
|
772
778
|
|
773
779
|
Args:
|
@@ -792,6 +798,8 @@ class Mol:
|
|
792
798
|
Self: modified self.
|
793
799
|
"""
|
794
800
|
|
801
|
+
verbose = kwargs.get('verbose', False)
|
802
|
+
|
795
803
|
reasons = [f'stereo flipped',
|
796
804
|
f'unconverged',
|
797
805
|
f'similar({similar_rmsd})',
|
@@ -805,13 +813,13 @@ class Mol:
|
|
805
813
|
mask = [Chem.MolToSmiles(Chem.RemoveHs(_.rdmol)) == self.smiles for _ in self.confs]
|
806
814
|
self.confs = list(itertools.compress(self.confs, mask))
|
807
815
|
if verbose:
|
808
|
-
|
816
|
+
logger.info(f'drop_confs() {mask.count(False):3d} {reasons[0]:<{w}} -> {self.count()}')
|
809
817
|
|
810
818
|
if unconverged and self.count() > 0:
|
811
819
|
mask = [_.props['Converged'] if 'Converged' in _.props else True for _ in self.confs]
|
812
820
|
self.confs = list(itertools.compress(self.confs, mask))
|
813
821
|
if verbose:
|
814
|
-
|
822
|
+
logger.info(f'drop_confs() {mask.count(False):3d} {reasons[1]:<{w}} -> {self.count()}')
|
815
823
|
|
816
824
|
if similar and self.count() > 1:
|
817
825
|
# it is observed that there are essentially identical conformers
|
@@ -831,7 +839,7 @@ class Mol:
|
|
831
839
|
mask = [conf_idx in centroid_indices for conf_idx, conf in enumerate(self.confs)]
|
832
840
|
self.confs = list(itertools.compress(self.confs, mask))
|
833
841
|
if verbose:
|
834
|
-
|
842
|
+
logger.info(f'drop_confs() {mask.count(False):3d} {reasons[2]:<{w}} -> {self.count()}')
|
835
843
|
|
836
844
|
# note: it will retain the conformers with lower index
|
837
845
|
# so, it should be sorted before dropping
|
@@ -856,9 +864,12 @@ class Mol:
|
|
856
864
|
mask = [_.props['centroid'] if 'centroid' in _.props else True for _ in self.confs]
|
857
865
|
self.confs = list(itertools.compress(self.confs, mask))
|
858
866
|
if verbose:
|
859
|
-
|
867
|
+
logger.info(f'drop_confs() {mask.count(False):3d} {reasons[3]:<{w}} -> {self.count()}')
|
860
868
|
|
861
869
|
if (k or window) and self.count() > 0:
|
870
|
+
# confs must be sorted by energies
|
871
|
+
if not all(['E_rel(kcal/mol)' in _.props for _ in self.confs]):
|
872
|
+
self = self.sort_confs(**kwargs)
|
862
873
|
if k:
|
863
874
|
mask_k = [i < k for i,_ in enumerate(self.confs)]
|
864
875
|
else:
|
@@ -871,7 +882,7 @@ class Mol:
|
|
871
882
|
mask = [(x and y) for (x,y) in zip(mask_k, mask_window)]
|
872
883
|
self.confs = list(itertools.compress(self.confs, mask))
|
873
884
|
if verbose:
|
874
|
-
|
885
|
+
logger.info(f'drop_confs() {mask.count(False):3d} {reasons[4]:<{w}} -> {self.count()}')
|
875
886
|
|
876
887
|
return self
|
877
888
|
|
@@ -974,7 +985,7 @@ class Mol:
|
|
974
985
|
|
975
986
|
|
976
987
|
def torsion_energies(self,
|
977
|
-
calculator: str | Callable,
|
988
|
+
calculator: str | Callable = 'MMFF94',
|
978
989
|
torsion_key: int | None = None,
|
979
990
|
simplify: bool = True,
|
980
991
|
fmax: float = 0.05,
|
@@ -14,7 +14,8 @@ from rdkit import Chem
|
|
14
14
|
from rdkit.Geometry import Point3D
|
15
15
|
|
16
16
|
|
17
|
-
logger = logging.getLogger()
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
18
19
|
|
19
20
|
# In ASE, the default energy unit is eV (electron volt).
|
20
21
|
# It will be converted to kcal/mol
|
@@ -25,7 +26,7 @@ ev2kcalpermol = 23.060547830619026
|
|
25
26
|
|
26
27
|
|
27
28
|
class GFN2xTB:
|
28
|
-
def __init__(self, molecule: Chem.Mol, ncores: int =
|
29
|
+
def __init__(self, molecule: Chem.Mol, ncores: int | None = None):
|
29
30
|
assert isinstance(molecule, Chem.Mol), "molecule is not rdkit.Chem.Mol type"
|
30
31
|
assert molecule.GetConformer().Is3D(), "molecule is not a 3D conformer"
|
31
32
|
assert self.is_xtb_ready(), "xtb is not accessible"
|
@@ -35,6 +36,9 @@ class GFN2xTB:
|
|
35
36
|
self.symbols = [ atom.GetSymbol() for atom in molecule.GetAtoms() ]
|
36
37
|
self.positions = molecule.GetConformer().GetPositions().tolist()
|
37
38
|
|
39
|
+
if ncores is None:
|
40
|
+
ncores = os.cpu_count()
|
41
|
+
|
38
42
|
# Parallelisation
|
39
43
|
os.environ['OMP_STACKSIZE'] = '4G'
|
40
44
|
os.environ['OMP_NUM_THREADS'] = f'{ncores},1'
|
@@ -107,7 +111,7 @@ class GFN2xTB:
|
|
107
111
|
Returns:
|
108
112
|
str | None: version statement.
|
109
113
|
"""
|
110
|
-
if GFN2xTB.
|
114
|
+
if GFN2xTB.is_xtb_ready():
|
111
115
|
cmd = ['xtb', '--version']
|
112
116
|
proc = subprocess.run(cmd, capture_output=True, text=True)
|
113
117
|
assert proc.returncode == 0, "GFN2xTB() Error: xtb not available"
|
@@ -338,12 +342,9 @@ class GFN2xTB:
|
|
338
342
|
|
339
343
|
with tempfile.TemporaryDirectory() as temp_dir: # tmpdir is a string
|
340
344
|
workdir = Path(temp_dir)
|
341
|
-
if verbose:
|
342
|
-
logger.info(f'xtb.singlepoint workdir= {temp_dir}')
|
343
345
|
|
344
346
|
geometry_input_path = workdir / 'geometry.xyz'
|
345
347
|
xtbout_path = workdir / 'xtbout.json'
|
346
|
-
stdout_path = workdir / 'fort.6'
|
347
348
|
wbo_path = workdir / 'wbo'
|
348
349
|
geometry_output_path = workdir / 'xtbtopo.mol'
|
349
350
|
|
@@ -354,8 +355,6 @@ class GFN2xTB:
|
|
354
355
|
|
355
356
|
options = ['--gfn', '2', '--json']
|
356
357
|
|
357
|
-
Gsolv = None
|
358
|
-
|
359
358
|
if water is not None and isinstance(water, str):
|
360
359
|
if water == 'gbsa':
|
361
360
|
options += ['--gbsa', 'H2O']
|
@@ -366,6 +365,9 @@ class GFN2xTB:
|
|
366
365
|
elif water == 'cpcmx' and self.is_cpcmx_option_ready():
|
367
366
|
options += ['--cpcmx', 'water']
|
368
367
|
|
368
|
+
if verbose:
|
369
|
+
logger.info(f"singlepoint() {' '.join(cmd+options)}")
|
370
|
+
|
369
371
|
# 'xtbout.json', 'xtbrestart', 'xtbtopo.mol', 'charges', and 'wbo' files will be
|
370
372
|
# created in the current working directory.
|
371
373
|
proc = subprocess.run(cmd + options, cwd=temp_dir, capture_output=True, text=True)
|
@@ -381,19 +383,20 @@ class GFN2xTB:
|
|
381
383
|
if xtbout_path.is_file():
|
382
384
|
with open(xtbout_path, 'r') as f:
|
383
385
|
datadict = json.load(f) # takes the file object as input
|
386
|
+
|
387
|
+
Gsolv = None
|
384
388
|
|
385
|
-
if
|
389
|
+
if water is not None:
|
386
390
|
# Free Energy contributions: [Eh] [kcal/mol]
|
387
391
|
# -------------------------------------------------------------------------
|
388
392
|
# solvation free energy (dG_solv): -0.92587E-03 -0.58099
|
389
393
|
# gas phase energy (E) -0.52068E+01
|
390
394
|
# -------------------------------------------------------------------------
|
391
395
|
# total free energy (dG) -0.52077E+01
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
Gsolv = float(m.group('kcalpermol'))
|
396
|
+
for line in proc.stdout.splitlines():
|
397
|
+
if 'solvation free energy' in line:
|
398
|
+
m = re.search(r"solvation free energy \(dG_solv\)\:\s+[-+]?\d*\.?\d+E[-+]?\d*\s+(?P<kcalpermol>[-+]?\d*\.?\d+)", line)
|
399
|
+
Gsolv = float(m.group('kcalpermol'))
|
397
400
|
|
398
401
|
Wiberg_bond_orders = self.load_wbo(wbo_path)
|
399
402
|
|
@@ -429,8 +432,6 @@ class GFN2xTB:
|
|
429
432
|
"""
|
430
433
|
with tempfile.TemporaryDirectory() as temp_dir: # tmpdir is a string
|
431
434
|
workdir = Path(temp_dir)
|
432
|
-
if verbose:
|
433
|
-
logger.info(f'xtb.optimize workdir= {temp_dir}')
|
434
435
|
|
435
436
|
geometry_input_path = workdir / 'geometry.xyz'
|
436
437
|
xtbout_path = workdir / 'xtbout.json'
|
@@ -452,6 +453,9 @@ class GFN2xTB:
|
|
452
453
|
elif water == 'cpcmx':
|
453
454
|
logger.warning('optimize with --cpcmx option is not implemented in xtb yet')
|
454
455
|
|
456
|
+
if verbose:
|
457
|
+
logger.info(f"optimize() {' '.join(cmd+options)}")
|
458
|
+
|
455
459
|
proc = subprocess.run(cmd + options, cwd=temp_dir, capture_output=True, text=True)
|
456
460
|
|
457
461
|
if proc.returncode == 0 and xtbout_path.is_file():
|
@@ -523,4 +527,4 @@ class GFN2xTB:
|
|
523
527
|
with open(xtb_esp_dat, 'r') as f:
|
524
528
|
pass
|
525
529
|
|
526
|
-
return None
|
530
|
+
return None
|
@@ -75,9 +75,7 @@ src/rdworks/predefined/misc/reactive.xml
|
|
75
75
|
src/rdworks/xtb/__init__.py
|
76
76
|
src/rdworks/xtb/wrapper.py
|
77
77
|
tests/test_basics.py
|
78
|
-
tests/
|
79
|
-
tests/
|
80
|
-
tests/
|
81
|
-
tests/test_nn_xtb.py
|
82
|
-
tests/test_web.py
|
78
|
+
tests/test_ionized.py
|
79
|
+
tests/test_round.py
|
80
|
+
tests/test_torsion.py
|
83
81
|
tests/test_xtb.py
|
@@ -51,24 +51,6 @@ drug_names = [
|
|
51
51
|
"Methixene", "Ethopropazine", "Aspirin", "Fluconazole", "Linezolid",
|
52
52
|
]
|
53
53
|
|
54
|
-
# Lahey, S.-L. J., Thien Phuc, T. N. & Rowley, C. N.
|
55
|
-
# Benchmarking Force Field and the ANI Neural Network Potentials for the
|
56
|
-
# Torsional Potential Energy Surface of Biaryl Drug Fragments.
|
57
|
-
# J. Chem. Inf. Model. 60, 6258–6268 (2020)
|
58
|
-
|
59
|
-
torsion_dataset_smiles = [
|
60
|
-
"C1(C2=CC=CN2)=CC=CC=C1",
|
61
|
-
"C1(C2=NC=CN2)=CC=CC=C1",
|
62
|
-
"C1(N2C=CC=C2)=NC=CC=N1",
|
63
|
-
"C1(C2=NC=NC=N2)=CC=CC=C1",
|
64
|
-
"C1(N2C=CC=C2)=CC=CC=C1",
|
65
|
-
"O=C(N1)C=CC=C1C2=COC=C2",
|
66
|
-
"C1(C2=NC=CC=N2)=NC=CC=N1",
|
67
|
-
"O=C(N1)C=CC=C1C2=NC=CN2",
|
68
|
-
]
|
69
|
-
|
70
|
-
torsion_dataset_names=["07", "09","20", "39", "10", "23", "12", "29"]
|
71
|
-
|
72
54
|
|
73
55
|
def test_init_mol():
|
74
56
|
mol = Mol(drug_smiles[0], drug_names[0])
|
@@ -458,86 +440,6 @@ def test_optimize_confs():
|
|
458
440
|
mol = mol.make_confs().optimize_confs(calculator='MMFF94')
|
459
441
|
|
460
442
|
|
461
|
-
def test_xtb_wrapper():
|
462
|
-
from rdworks.xtb.wrapper import GFN2xTB
|
463
|
-
assert GFN2xTB.is_xtb_ready() == True
|
464
|
-
assert GFN2xTB.is_cpx_ready() == True
|
465
|
-
assert GFN2xTB.is_cpcmx_option_ready() == True
|
466
|
-
assert GFN2xTB.is_ready() == True
|
467
|
-
assert GFN2xTB.version() is not None
|
468
|
-
|
469
|
-
|
470
|
-
def test_torsion_fragment():
|
471
|
-
from rdworks.torsion import create_torsion_fragment
|
472
|
-
mol = Mol(molecule="CC(C)C1=C(C(=C(N1CC[C@H](C[C@H](CC(=O)O)O)O)C2=CC=C(C=C2)F)C3=CC=CC=C3)C(=O)NC4=CC=CC=C4",
|
473
|
-
name="atorvastatin").make_confs(n=1)
|
474
|
-
ta = mol.torsion_atoms()
|
475
|
-
assert len(ta) == 12
|
476
|
-
# {0: (0, 1, 3, 7), 1: (3, 4, 32, 33), 2: (4, 5, 26, 27), 3: (7, 6, 19, 20),
|
477
|
-
# 4: (3, 7, 8, 9), 5: (7, 8, 9, 10), 6: (8, 9, 10, 18), 7: (18, 10, 11, 12),
|
478
|
-
# 8: (10, 11, 12, 17), 9: (17, 12, 13, 14), 10: (12, 13, 14, 15), 11: (36, 35, 34, 32)}
|
479
|
-
(frag, frag_ijkl, frag_created, wbo_filtered) = create_torsion_fragment(mol.confs[0].rdmol, ta[6])
|
480
|
-
assert frag_ijkl == (5, 6, 7, 12)
|
481
|
-
assert frag_created == True
|
482
|
-
assert wbo_filtered == True
|
483
|
-
|
484
|
-
mol2 = Mol(molecule='CC(=O)Nc1ccc(O)cc1', name='acetaminophen.3').make_confs(n=1)
|
485
|
-
ta2 = mol2.torsion_atoms()
|
486
|
-
# {0: (5, 4, 3, 1)}
|
487
|
-
assert len(ta2) == 1
|
488
|
-
|
489
|
-
(frag, frag_ijkl, frag_created, wbo_filtered) = create_torsion_fragment(mol2.confs[0].rdmol, ta2[0])
|
490
|
-
# expects no fragmentation
|
491
|
-
assert frag == mol2.confs[0].rdmol
|
492
|
-
assert frag_ijkl == ta2[0]
|
493
|
-
assert frag_created == False
|
494
|
-
assert wbo_filtered == False
|
495
|
-
|
496
|
-
|
497
|
-
def test_torsion_fragment_from_conf():
|
498
|
-
from rdworks.torsion import create_torsion_fragment
|
499
|
-
mol = Mol(molecule="CC(C)C1=C(C(=C(N1CC[C@H](C[C@H](CC(=O)O)O)O)C2=CC=C(C=C2)F)C3=CC=CC=C3)C(=O)NC4=CC=CC=C4",
|
500
|
-
name="atorvastatin").make_confs(n=1)
|
501
|
-
ref_conf = mol.confs[0]
|
502
|
-
ta = ref_conf.torsion_atoms()
|
503
|
-
assert len(ta) == 12
|
504
|
-
# {0: (0, 1, 3, 7), 1: (3, 4, 32, 33), 2: (4, 5, 26, 27), 3: (7, 6, 19, 20),
|
505
|
-
# 4: (3, 7, 8, 9), 5: (7, 8, 9, 10), 6: (8, 9, 10, 18), 7: (18, 10, 11, 12),
|
506
|
-
# 8: (10, 11, 12, 17), 9: (17, 12, 13, 14), 10: (12, 13, 14, 15), 11: (36, 35, 34, 32)}
|
507
|
-
frag, frag_ijkl, frag_created, wbo_filtered = create_torsion_fragment(ref_conf.rdmol, ta[6])
|
508
|
-
assert frag_ijkl == (5, 6, 7, 12)
|
509
|
-
assert frag_created == True
|
510
|
-
assert wbo_filtered == True
|
511
|
-
|
512
|
-
ref_conf = ref_conf.torsion_energies(calculator='MMFF94', torsion_key=6, interval=15)
|
513
|
-
|
514
|
-
mol2 = Mol(molecule='CC(=O)Nc1ccc(O)cc1', name='acetaminophen.3').make_confs(n=1)
|
515
|
-
ref_conf2 = mol2.confs[0]
|
516
|
-
ta2 = ref_conf2.torsion_atoms()
|
517
|
-
# {0: (5, 4, 3, 1)}
|
518
|
-
assert len(ta2) == 1
|
519
|
-
frag, frag_ijkl, frag_created, wbo_filtered = create_torsion_fragment(ref_conf2.rdmol, ta2[0])
|
520
|
-
# expects no fragmentation
|
521
|
-
assert frag == ref_conf2.rdmol
|
522
|
-
assert frag_ijkl == ta2[0]
|
523
|
-
assert frag_created == False
|
524
|
-
assert wbo_filtered == False
|
525
|
-
|
526
|
-
ref_conf2 = ref_conf2.torsion_energies(calculator='MMFF94', interval=15)
|
527
|
-
ref_conf3 = ref_conf2.torsion_energies_one(calculator='MMFF94', indices=frag_ijkl)
|
528
|
-
|
529
|
-
|
530
|
-
def test_torsion_energies():
|
531
|
-
libr = MolLibr(torsion_dataset_smiles, torsion_dataset_names)
|
532
|
-
with open(workdir / 'test_torsion_energies.html', 'w') as f:
|
533
|
-
for mol in libr[:1]:
|
534
|
-
mol = mol.make_confs().drop_confs(similar=True, similar_rmsd=0.3).sort_confs().rename()
|
535
|
-
mol = mol.optimize_confs(calculator='MMFF94').torsion_energies(calculator='MMFF94',
|
536
|
-
interval=15)
|
537
|
-
f.write(mol.to_html())
|
538
|
-
print(mol.dumps('torsion', decimals=2))
|
539
|
-
|
540
|
-
|
541
443
|
def test_workflow():
|
542
444
|
state_mol = Mol('Cc1nc2cc(Cl)nc(Cl)c2nc1C', 'A-1250')
|
543
445
|
state_mol = state_mol.make_confs(n=50, method='ETKDG')
|
@@ -563,4 +465,4 @@ def test_serialization():
|
|
563
465
|
rebuilt = Mol().deserialize(serialized)
|
564
466
|
assert rebuilt.count() == 10
|
565
467
|
assert rebuilt.name == name
|
566
|
-
assert rebuilt == mol
|
468
|
+
assert rebuilt == mol
|