rdworks 0.46.1__tar.gz → 0.48.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rdworks-0.46.1 → rdworks-0.48.1}/PKG-INFO +1 -1
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/__init__.py +1 -1
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/ionized.py +83 -59
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/torsion.py +1 -1
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/xtb/wrapper.py +49 -31
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks.egg-info/PKG-INFO +1 -1
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks.egg-info/SOURCES.txt +3 -5
- {rdworks-0.46.1 → rdworks-0.48.1}/tests/test_basics.py +1 -95
- rdworks-0.48.1/tests/test_ionized.py +34 -0
- rdworks-0.46.1/tests/test_decimals.py → rdworks-0.48.1/tests/test_round.py +79 -71
- rdworks-0.48.1/tests/test_torsion.py +98 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/tests/test_xtb.py +10 -6
- rdworks-0.46.1/tests/test_gypsumdl.py +0 -15
- rdworks-0.46.1/tests/test_iupac_name.py +0 -39
- rdworks-0.46.1/tests/test_nn_xtb.py +0 -91
- rdworks-0.46.1/tests/test_web.py +0 -378
- {rdworks-0.46.1 → rdworks-0.48.1}/LICENSE +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/README.md +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/pyproject.toml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/setup.cfg +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/autograph/__init__.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/autograph/autograph.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/autograph/centroid.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/autograph/dynamictreecut.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/autograph/nmrclust.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/autograph/rckmeans.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/bitqt/__init__.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/bitqt/bitqt.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/conf.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/descriptor.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/display.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/matchedseries.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/mol.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/mollibr.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/pka.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/Asinex_fragment.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/Astex_RO3.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/Baell2010_PAINS/Baell2010A.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/Baell2010_PAINS/Baell2010B.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/Baell2010_PAINS/Baell2010C.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/Baell2010_PAINS/PAINS-less-than-015-hits.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/Baell2010_PAINS/PAINS-less-than-150-hits.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/Baell2010_PAINS/PAINS-more-than-150-hits.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/Baell2010_PAINS/makexml.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/Brenk2008_Dundee/makexml.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/CNS.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/ChEMBL_Walters/BMS.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/ChEMBL_Walters/Dundee.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/ChEMBL_Walters/Glaxo.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/ChEMBL_Walters/Inpharmatica.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/ChEMBL_Walters/LINT.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/ChEMBL_Walters/MLSMR.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/ChEMBL_Walters/PAINS.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/ChEMBL_Walters/SureChEMBL.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/ChEMBL_Walters/makexml.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999Acid.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999Base.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999ElPh.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999NuPh.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/Hann1999_Glaxo/makexml.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/Kazius2005/Kazius2005.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/Kazius2005/makexml.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/ZINC_druglike.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/ZINC_fragment.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/ZINC_leadlike.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/fragment.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/ionized/simple_smarts_pattern.csv +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/ionized/smarts_pattern.csv +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/misc/makexml.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/misc/reactive-part-2.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/misc/reactive-part-3.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/predefined/misc/reactive.xml +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/readin.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/rgroup.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/scaffold.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/std.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/stereoisomers.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/tautomers.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/testdata.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/units.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/utils.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/xml.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks/xtb/__init__.py +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks.egg-info/dependency_links.txt +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks.egg-info/requires.txt +0 -0
- {rdworks-0.46.1 → rdworks-0.48.1}/src/rdworks.egg-info/top_level.txt +0 -0
@@ -1,4 +1,5 @@
|
|
1
1
|
import importlib.resources
|
2
|
+
from types import SimpleNamespace
|
2
3
|
import pandas as pd
|
3
4
|
|
4
5
|
from rdkit import Chem
|
@@ -6,6 +7,11 @@ from rdkit import Chem
|
|
6
7
|
# adapted from https://github.com/dptech-corp/Uni-pKa/enumerator
|
7
8
|
|
8
9
|
class IonizedStates:
|
10
|
+
"""Knowledge-based enumeration of (de)protonated states"""
|
11
|
+
|
12
|
+
smarts_path = importlib.resources.files('rdworks.predefined.ionized')
|
13
|
+
ionization_patterns = pd.read_csv(smarts_path / 'simple_smarts_pattern.csv')
|
14
|
+
|
9
15
|
# Unreasonable chemical structures
|
10
16
|
unreasonable_patterns = [
|
11
17
|
Chem.MolFromSmarts(s) for s in [
|
@@ -31,67 +37,38 @@ class IonizedStates:
|
|
31
37
|
"[N+1](=O)-[O]-[H]",
|
32
38
|
]]
|
33
39
|
|
34
|
-
smarts_path = importlib.resources.files('rdworks.predefined.ionized')
|
35
|
-
protonation_patterns = pd.read_csv(smarts_path / 'simple_smarts_pattern.csv')
|
36
40
|
|
37
|
-
def __init__(self, smiles:str):
|
41
|
+
def __init__(self, smiles: str, charge_min: int = -2, charge_max: int = 2):
|
38
42
|
self.smiles = Chem.CanonSmiles(smiles)
|
43
|
+
self.charge_max = charge_max
|
44
|
+
self.charge_min = charge_min
|
45
|
+
|
39
46
|
self.rdmol = Chem.MolFromSmiles(self.smiles)
|
40
47
|
self.rdmol_H = Chem.AddHs(self.rdmol)
|
41
48
|
self.charge = Chem.GetFormalCharge(self.rdmol_H)
|
42
|
-
|
43
|
-
self.charge_min = -2
|
49
|
+
|
44
50
|
# initial states
|
45
51
|
self.states = {self.smiles : (self.rdmol_H, self.charge)}
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
self.protonate(self.smiles)
|
52
|
+
|
53
|
+
# initial ionization sites
|
54
|
+
self.sites = {self.smiles: self.set_ionization_sites(self.smiles)}
|
50
55
|
|
51
|
-
|
52
|
-
|
53
|
-
|
56
|
+
# pKa pairs:
|
57
|
+
# HA(acid) + H2O == A-(base) + H3O+ or HA+(acid) + H2O == A(base) + H3O+
|
58
|
+
self.pairs = []
|
54
59
|
|
55
|
-
|
56
|
-
def get_states_by_charge(self) -> dict:
|
60
|
+
# iteratively build an ensemble of ionized states
|
57
61
|
self.ensemble()
|
58
|
-
data = {}
|
59
|
-
for smiles, (romol, charge) in self.states.items():
|
60
|
-
if charge in data:
|
61
|
-
data[charge].append(smiles)
|
62
|
-
else:
|
63
|
-
data[charge] = [smiles]
|
64
62
|
|
65
|
-
return data
|
66
|
-
|
67
|
-
def get_states(self) -> list:
|
68
|
-
return [smiles for smiles in self.states]
|
69
|
-
|
70
|
-
|
71
|
-
def get_states_mol(self) -> list[Chem.Mol]:
|
72
|
-
return [romol for smiles, (romol, charge) in self.states.items()]
|
73
|
-
|
74
|
-
|
75
|
-
def get_num_states(self) -> int:
|
76
|
-
return len(self.states)
|
77
63
|
|
78
64
|
|
79
65
|
@staticmethod
|
80
|
-
def
|
81
|
-
Chem.SanitizeMol(rdmol)
|
82
|
-
rdmol = Chem.MolFromSmiles(Chem.MolToSmiles(rdmol))
|
83
|
-
rdmol_H = Chem.AddHs(rdmol)
|
84
|
-
rdmol = Chem.RemoveHs(rdmol_H)
|
85
|
-
return Chem.CanonSmiles(Chem.MolToSmiles(rdmol))
|
86
|
-
|
87
|
-
|
88
|
-
@staticmethod
|
89
|
-
def set_protonation_sites(smiles:str) -> tuple:
|
66
|
+
def set_ionization_sites(smiles: str) -> tuple:
|
90
67
|
subject = Chem.MolFromSmiles(smiles)
|
91
68
|
subject = Chem.AddHs(subject)
|
92
69
|
charge = Chem.GetFormalCharge(subject)
|
93
70
|
indices = [] # atom indices of protonation/deprotonation site(s)
|
94
|
-
for i, name, smarts, smarts_index, acid_or_base in IonizedStates.
|
71
|
+
for i, name, smarts, smarts_index, acid_or_base in IonizedStates.ionization_patterns.itertuples():
|
95
72
|
pattern = Chem.MolFromSmarts(smarts)
|
96
73
|
matches = subject.GetSubstructMatches(pattern)
|
97
74
|
# returns a list of tuples, where each tuple contains the indices
|
@@ -100,21 +77,34 @@ class IonizedStates:
|
|
100
77
|
if len(matches) > 0:
|
101
78
|
smarts_index = int(smarts_index)
|
102
79
|
indices += [(match[smarts_index], acid_or_base) for match in matches]
|
80
|
+
|
103
81
|
return (list(set(indices)), subject, charge)
|
82
|
+
|
83
|
+
|
84
|
+
@staticmethod
|
85
|
+
def clean_smiles(rdmol: Chem.Mol) -> str:
|
86
|
+
Chem.SanitizeMol(rdmol)
|
87
|
+
rdmol = Chem.MolFromSmiles(Chem.MolToSmiles(rdmol))
|
88
|
+
rdmol_H = Chem.AddHs(rdmol)
|
89
|
+
rdmol = Chem.RemoveHs(rdmol_H)
|
90
|
+
return Chem.CanonSmiles(Chem.MolToSmiles(rdmol))
|
104
91
|
|
105
92
|
|
106
93
|
@staticmethod
|
107
|
-
def reasonable(romol:Chem.Mol) -> bool:
|
94
|
+
def reasonable(romol: Chem.Mol) -> bool:
|
108
95
|
return all([len(romol.GetSubstructMatches(p)) == 0 for p in IonizedStates.unreasonable_patterns])
|
96
|
+
|
109
97
|
|
110
|
-
|
111
|
-
def protonate(self, smiles:str) -> int:
|
98
|
+
def ionize(self, smiles: str | None = None) -> int:
|
112
99
|
num_added_states = 0
|
113
100
|
|
114
|
-
if smiles
|
115
|
-
|
101
|
+
if smiles is None:
|
102
|
+
smiles = self.smiles
|
103
|
+
|
104
|
+
if smiles not in self.sites:
|
105
|
+
self.sites[smiles] = self.set_ionization_sites(smiles)
|
116
106
|
|
117
|
-
(indices, subject, charge) = self.
|
107
|
+
(indices, subject, charge) = self.sites[smiles]
|
118
108
|
|
119
109
|
if (charge >= self.charge_max) or (charge <= self.charge_min):
|
120
110
|
# formal charge will be increased or decreased by protonation/deprotonation
|
@@ -149,22 +139,56 @@ class IonizedStates:
|
|
149
139
|
edmol = Chem.AddHs(edmol)
|
150
140
|
|
151
141
|
# Clean up and save SMILES
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
if self.reasonable(
|
157
|
-
if
|
142
|
+
ionized_smiles = IonizedStates.clean_smiles(edmol)
|
143
|
+
ionized_mol = Chem.MolFromSmiles(ionized_smiles)
|
144
|
+
ionized_mol = Chem.AddHs(ionized_mol)
|
145
|
+
ionized_charge = Chem.GetFormalCharge(ionized_mol)
|
146
|
+
if self.reasonable(ionized_mol):
|
147
|
+
if ionized_smiles in self.states:
|
158
148
|
continue
|
159
|
-
self.states[
|
149
|
+
self.states[ionized_smiles] = (ionized_mol, ionized_charge)
|
160
150
|
num_added_states += 1
|
151
|
+
|
152
|
+
# store acid-base pair information for pKa
|
153
|
+
if acid_or_base == 'A':
|
154
|
+
self.pairs.append((i, smiles, ionized_smiles))
|
155
|
+
elif acid_or_base == 'B':
|
156
|
+
self.pairs.append((i, ionized_smiles, smiles))
|
161
157
|
|
162
158
|
return num_added_states
|
163
|
-
|
159
|
+
|
164
160
|
|
165
161
|
def ensemble(self) -> None:
|
162
|
+
# populate initial states
|
163
|
+
self.ionize()
|
164
|
+
|
165
|
+
# propagate
|
166
166
|
num_added_states = None
|
167
167
|
while num_added_states is None or num_added_states > 0:
|
168
|
-
states = self.states.copy()
|
168
|
+
states = self.states.copy() # dictionary
|
169
|
+
# self.ionize(smiles) below will change self.states
|
170
|
+
# so we cannot iterate self.states. Instead we will
|
171
|
+
# iterate over a copy of the self.states
|
169
172
|
for smiles in states:
|
170
|
-
num_added_states = self.
|
173
|
+
num_added_states = self.ionize(smiles)
|
174
|
+
|
175
|
+
|
176
|
+
def count(self) -> int:
|
177
|
+
return len(self.states)
|
178
|
+
|
179
|
+
|
180
|
+
def get_sites(self) -> dict:
|
181
|
+
return self.sites
|
182
|
+
|
183
|
+
|
184
|
+
def get_smiles(self) -> list[str]:
|
185
|
+
return [smiles for smiles in self.states]
|
186
|
+
|
187
|
+
|
188
|
+
def get_rdmol(self) -> list[Chem.Mol]:
|
189
|
+
return [romol for smiles, (romol, charge) in self.states.items()]
|
190
|
+
|
191
|
+
|
192
|
+
def get_pairs(self) -> list:
|
193
|
+
return self.pairs
|
194
|
+
|
@@ -462,7 +462,7 @@ def create_torsion_fragment(rdmol: Chem.Mol,
|
|
462
462
|
|
463
463
|
# fragmented
|
464
464
|
WBO_filtered = False
|
465
|
-
if GFN2xTB
|
465
|
+
if GFN2xTB.is_ready():
|
466
466
|
# filter candidate(s) by Wiberg bond order (WBO) if xTB is available
|
467
467
|
jk = tuple(sorted([j, k]))
|
468
468
|
wbo_passed_candidates = {}
|
@@ -45,42 +45,28 @@ class GFN2xTB:
|
|
45
45
|
resource.setrlimit(resource.RLIMIT_STACK, (resource.RLIM_INFINITY, resource.RLIM_INFINITY))
|
46
46
|
|
47
47
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
Returns:
|
52
|
-
str | None: version statement.
|
53
|
-
"""
|
54
|
-
cmd = ['xtb', '--version']
|
55
|
-
proc = subprocess.run(cmd, capture_output=True, text=True)
|
56
|
-
assert proc.returncode == 0, "GFN2xTB() Error: xtb not available"
|
57
|
-
for line in proc.stdout.split('\n'):
|
58
|
-
line = line.strip()
|
59
|
-
if 'version' in line:
|
60
|
-
return line
|
61
|
-
|
62
|
-
return None
|
63
|
-
|
64
|
-
|
65
|
-
def is_xtb_ready(self, cmd: str = 'xtb') -> bool:
|
48
|
+
@staticmethod
|
49
|
+
def is_xtb_ready() -> bool:
|
66
50
|
"""Check if xtb is available.
|
67
51
|
|
68
52
|
Returns:
|
69
53
|
bool: True if `xtb` is available, False otherwise.
|
70
54
|
"""
|
71
|
-
return shutil.which(
|
55
|
+
return shutil.which('xtb') is not None
|
72
56
|
|
73
57
|
|
74
|
-
|
58
|
+
@staticmethod
|
59
|
+
def is_cpx_ready() -> bool:
|
75
60
|
"""Checks if the CPCM-X command-line tool, `cpx`, is accessible in the system.
|
76
61
|
|
77
62
|
Returns:
|
78
63
|
bool: True if the cpx is found, False otherwise.
|
79
64
|
"""
|
80
|
-
return shutil.which(
|
65
|
+
return shutil.which('cpx') is not None
|
81
66
|
|
82
67
|
|
83
|
-
|
68
|
+
@staticmethod
|
69
|
+
def is_cpcmx_option_ready() -> bool:
|
84
70
|
"""Checks if xtb works with the `--cpcmx` option.
|
85
71
|
|
86
72
|
xtb distributed by the conda does not include CPCM-X function (as of June 17, 2025).
|
@@ -89,18 +75,50 @@ class GFN2xTB:
|
|
89
75
|
Returns:
|
90
76
|
bool: True if the --cpcmx option is working, False otherwise.
|
91
77
|
"""
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
line
|
98
|
-
|
99
|
-
|
78
|
+
if GFN2xTB.is_xtb_ready():
|
79
|
+
cmd = ['xtb', '--cpcmx']
|
80
|
+
proc = subprocess.run(cmd, capture_output=True, text=True)
|
81
|
+
# we are expecting an error because no input file is given
|
82
|
+
assert proc.returncode != 0
|
83
|
+
for line in proc.stdout.split('\n'):
|
84
|
+
line = line.strip()
|
85
|
+
if 'CPCM-X library was not included' in line:
|
86
|
+
return False
|
100
87
|
|
101
88
|
return True
|
102
89
|
|
103
90
|
|
91
|
+
@staticmethod
|
92
|
+
def is_ready() -> bool:
|
93
|
+
"""Check if `xtb` and `cpx` are accessible and `xtb --cpcmx` are available.
|
94
|
+
|
95
|
+
Returns:
|
96
|
+
bool: True if both `xtb` and `cpx` are accessible, False otherwise.
|
97
|
+
"""
|
98
|
+
return all([GFN2xTB.is_xtb_ready(),
|
99
|
+
GFN2xTB.is_cpx_ready(),
|
100
|
+
GFN2xTB.is_cpcmx_option_ready()])
|
101
|
+
|
102
|
+
|
103
|
+
@staticmethod
|
104
|
+
def version() -> str | None:
|
105
|
+
"""Check xtb version.
|
106
|
+
|
107
|
+
Returns:
|
108
|
+
str | None: version statement.
|
109
|
+
"""
|
110
|
+
if GFN2xTB.is_ready():
|
111
|
+
cmd = ['xtb', '--version']
|
112
|
+
proc = subprocess.run(cmd, capture_output=True, text=True)
|
113
|
+
assert proc.returncode == 0, "GFN2xTB() Error: xtb not available"
|
114
|
+
for line in proc.stdout.split('\n'):
|
115
|
+
line = line.strip()
|
116
|
+
if 'version' in line:
|
117
|
+
return line
|
118
|
+
|
119
|
+
return None
|
120
|
+
|
121
|
+
|
104
122
|
def to_xyz(self) -> str:
|
105
123
|
"""Export to XYZ formatted string.
|
106
124
|
|
@@ -345,7 +363,7 @@ class GFN2xTB:
|
|
345
363
|
elif water == 'alpb':
|
346
364
|
options += ['--alpb', 'water']
|
347
365
|
# it does not provide Gsolv contribution to the total energy
|
348
|
-
elif water == 'cpcmx' and self.
|
366
|
+
elif water == 'cpcmx' and self.is_cpcmx_option_ready():
|
349
367
|
options += ['--cpcmx', 'water']
|
350
368
|
|
351
369
|
# 'xtbout.json', 'xtbrestart', 'xtbtopo.mol', 'charges', and 'wbo' files will be
|
@@ -75,9 +75,7 @@ src/rdworks/predefined/misc/reactive.xml
|
|
75
75
|
src/rdworks/xtb/__init__.py
|
76
76
|
src/rdworks/xtb/wrapper.py
|
77
77
|
tests/test_basics.py
|
78
|
-
tests/
|
79
|
-
tests/
|
80
|
-
tests/
|
81
|
-
tests/test_nn_xtb.py
|
82
|
-
tests/test_web.py
|
78
|
+
tests/test_ionized.py
|
79
|
+
tests/test_round.py
|
80
|
+
tests/test_torsion.py
|
83
81
|
tests/test_xtb.py
|
@@ -51,24 +51,6 @@ drug_names = [
|
|
51
51
|
"Methixene", "Ethopropazine", "Aspirin", "Fluconazole", "Linezolid",
|
52
52
|
]
|
53
53
|
|
54
|
-
# Lahey, S.-L. J., Thien Phuc, T. N. & Rowley, C. N.
|
55
|
-
# Benchmarking Force Field and the ANI Neural Network Potentials for the
|
56
|
-
# Torsional Potential Energy Surface of Biaryl Drug Fragments.
|
57
|
-
# J. Chem. Inf. Model. 60, 6258–6268 (2020)
|
58
|
-
|
59
|
-
torsion_dataset_smiles = [
|
60
|
-
"C1(C2=CC=CN2)=CC=CC=C1",
|
61
|
-
"C1(C2=NC=CN2)=CC=CC=C1",
|
62
|
-
"C1(N2C=CC=C2)=NC=CC=N1",
|
63
|
-
"C1(C2=NC=NC=N2)=CC=CC=C1",
|
64
|
-
"C1(N2C=CC=C2)=CC=CC=C1",
|
65
|
-
"O=C(N1)C=CC=C1C2=COC=C2",
|
66
|
-
"C1(C2=NC=CC=N2)=NC=CC=N1",
|
67
|
-
"O=C(N1)C=CC=C1C2=NC=CN2",
|
68
|
-
]
|
69
|
-
|
70
|
-
torsion_dataset_names=["07", "09","20", "39", "10", "23", "12", "29"]
|
71
|
-
|
72
54
|
|
73
55
|
def test_init_mol():
|
74
56
|
mol = Mol(drug_smiles[0], drug_names[0])
|
@@ -458,82 +440,6 @@ def test_optimize_confs():
|
|
458
440
|
mol = mol.make_confs().optimize_confs(calculator='MMFF94')
|
459
441
|
|
460
442
|
|
461
|
-
def test_xtb_wrapper():
|
462
|
-
from rdworks.xtb.wrapper import GFN2xTB
|
463
|
-
assert GFN2xTB().version()
|
464
|
-
|
465
|
-
|
466
|
-
def test_torsion_fragment():
|
467
|
-
from rdworks.torsion import create_torsion_fragment
|
468
|
-
mol = Mol(molecule="CC(C)C1=C(C(=C(N1CC[C@H](C[C@H](CC(=O)O)O)O)C2=CC=C(C=C2)F)C3=CC=CC=C3)C(=O)NC4=CC=CC=C4",
|
469
|
-
name="atorvastatin").make_confs(n=1)
|
470
|
-
ta = mol.torsion_atoms()
|
471
|
-
assert len(ta) == 12
|
472
|
-
# {0: (0, 1, 3, 7), 1: (3, 4, 32, 33), 2: (4, 5, 26, 27), 3: (7, 6, 19, 20),
|
473
|
-
# 4: (3, 7, 8, 9), 5: (7, 8, 9, 10), 6: (8, 9, 10, 18), 7: (18, 10, 11, 12),
|
474
|
-
# 8: (10, 11, 12, 17), 9: (17, 12, 13, 14), 10: (12, 13, 14, 15), 11: (36, 35, 34, 32)}
|
475
|
-
(frag, frag_ijkl, frag_created, wbo_filtered) = create_torsion_fragment(mol.confs[0].rdmol, ta[6])
|
476
|
-
assert frag_ijkl == (5, 6, 7, 12)
|
477
|
-
assert frag_created == True
|
478
|
-
assert wbo_filtered == True
|
479
|
-
|
480
|
-
mol2 = Mol(molecule='CC(=O)Nc1ccc(O)cc1', name='acetaminophen.3').make_confs(n=1)
|
481
|
-
ta2 = mol2.torsion_atoms()
|
482
|
-
# {0: (5, 4, 3, 1)}
|
483
|
-
assert len(ta2) == 1
|
484
|
-
|
485
|
-
(frag, frag_ijkl, frag_created, wbo_filtered) = create_torsion_fragment(mol2.confs[0].rdmol, ta2[0])
|
486
|
-
# expects no fragmentation
|
487
|
-
assert frag == mol2.confs[0].rdmol
|
488
|
-
assert frag_ijkl == ta2[0]
|
489
|
-
assert frag_created == False
|
490
|
-
assert wbo_filtered == False
|
491
|
-
|
492
|
-
|
493
|
-
def test_torsion_fragment_from_conf():
|
494
|
-
from rdworks.torsion import create_torsion_fragment
|
495
|
-
mol = Mol(molecule="CC(C)C1=C(C(=C(N1CC[C@H](C[C@H](CC(=O)O)O)O)C2=CC=C(C=C2)F)C3=CC=CC=C3)C(=O)NC4=CC=CC=C4",
|
496
|
-
name="atorvastatin").make_confs(n=1)
|
497
|
-
ref_conf = mol.confs[0]
|
498
|
-
ta = ref_conf.torsion_atoms()
|
499
|
-
assert len(ta) == 12
|
500
|
-
# {0: (0, 1, 3, 7), 1: (3, 4, 32, 33), 2: (4, 5, 26, 27), 3: (7, 6, 19, 20),
|
501
|
-
# 4: (3, 7, 8, 9), 5: (7, 8, 9, 10), 6: (8, 9, 10, 18), 7: (18, 10, 11, 12),
|
502
|
-
# 8: (10, 11, 12, 17), 9: (17, 12, 13, 14), 10: (12, 13, 14, 15), 11: (36, 35, 34, 32)}
|
503
|
-
frag, frag_ijkl, frag_created, wbo_filtered = create_torsion_fragment(ref_conf.rdmol, ta[6])
|
504
|
-
assert frag_ijkl == (5, 6, 7, 12)
|
505
|
-
assert frag_created == True
|
506
|
-
assert wbo_filtered == True
|
507
|
-
|
508
|
-
ref_conf = ref_conf.torsion_energies(calculator='MMFF94', torsion_key=6, interval=15)
|
509
|
-
|
510
|
-
mol2 = Mol(molecule='CC(=O)Nc1ccc(O)cc1', name='acetaminophen.3').make_confs(n=1)
|
511
|
-
ref_conf2 = mol2.confs[0]
|
512
|
-
ta2 = ref_conf2.torsion_atoms()
|
513
|
-
# {0: (5, 4, 3, 1)}
|
514
|
-
assert len(ta2) == 1
|
515
|
-
frag, frag_ijkl, frag_created, wbo_filtered = create_torsion_fragment(ref_conf2.rdmol, ta2[0])
|
516
|
-
# expects no fragmentation
|
517
|
-
assert frag == ref_conf2.rdmol
|
518
|
-
assert frag_ijkl == ta2[0]
|
519
|
-
assert frag_created == False
|
520
|
-
assert wbo_filtered == False
|
521
|
-
|
522
|
-
ref_conf2 = ref_conf2.torsion_energies(calculator='MMFF94', interval=15)
|
523
|
-
ref_conf3 = ref_conf2.torsion_energies_one(calculator='MMFF94', indices=frag_ijkl)
|
524
|
-
|
525
|
-
|
526
|
-
def test_torsion_energies():
|
527
|
-
libr = MolLibr(torsion_dataset_smiles, torsion_dataset_names)
|
528
|
-
with open(workdir / 'test_torsion_energies.html', 'w') as f:
|
529
|
-
for mol in libr[:1]:
|
530
|
-
mol = mol.make_confs().drop_confs(similar=True, similar_rmsd=0.3).sort_confs().rename()
|
531
|
-
mol = mol.optimize_confs(calculator='MMFF94').torsion_energies(calculator='MMFF94',
|
532
|
-
interval=15)
|
533
|
-
f.write(mol.to_html())
|
534
|
-
print(mol.dumps('torsion', decimals=2))
|
535
|
-
|
536
|
-
|
537
443
|
def test_workflow():
|
538
444
|
state_mol = Mol('Cc1nc2cc(Cl)nc(Cl)c2nc1C', 'A-1250')
|
539
445
|
state_mol = state_mol.make_confs(n=50, method='ETKDG')
|
@@ -559,4 +465,4 @@ def test_serialization():
|
|
559
465
|
rebuilt = Mol().deserialize(serialized)
|
560
466
|
assert rebuilt.count() == 10
|
561
467
|
assert rebuilt.name == name
|
562
|
-
assert rebuilt == mol
|
468
|
+
assert rebuilt == mol
|
@@ -0,0 +1,34 @@
|
|
1
|
+
from rdworks import IonizedStates
|
2
|
+
|
3
|
+
|
4
|
+
def test_ionizedstate():
|
5
|
+
smiles = 'O=C(NCCCC)[C@H](CCC1)N1[C@@H](CC)C2=NN=C(CC3=CC=C(C)C=C3)O2'
|
6
|
+
x = IonizedStates(smiles)
|
7
|
+
|
8
|
+
assert x.count() == 7
|
9
|
+
|
10
|
+
d = x.get_sites()
|
11
|
+
print('sites:')
|
12
|
+
for k, v in d.items():
|
13
|
+
print(k, v)
|
14
|
+
print()
|
15
|
+
|
16
|
+
indices = d['CCCCNC(=O)[C@@H]1CCCN1[C@@H](CC)c1nnc(Cc2ccc(C)cc2)o1'][0]
|
17
|
+
|
18
|
+
assert (11, 'B') in indices
|
19
|
+
assert (16, 'B') in indices
|
20
|
+
assert (17, 'B') in indices
|
21
|
+
|
22
|
+
expected = ['CCCCNC(=O)[C@@H]1CCCN1[C@@H](CC)c1nnc(Cc2ccc(C)cc2)o1',
|
23
|
+
'CCCCNC(=O)[C@@H]1CCCN1[C@@H](CC)c1[nH+]nc(Cc2ccc(C)cc2)o1',
|
24
|
+
'CCCCNC(=O)[C@@H]1CCC[NH+]1[C@@H](CC)c1nnc(Cc2ccc(C)cc2)o1',
|
25
|
+
'CCCCNC(=O)[C@@H]1CCCN1[C@@H](CC)c1n[nH+]c(Cc2ccc(C)cc2)o1',
|
26
|
+
'CCCCNC(=O)[C@@H]1CCC[NH+]1[C@@H](CC)c1[nH+]nc(Cc2ccc(C)cc2)o1',
|
27
|
+
'CCCCNC(=O)[C@@H]1CCCN1[C@@H](CC)c1[nH+][nH+]c(Cc2ccc(C)cc2)o1',
|
28
|
+
'CCCCNC(=O)[C@@H]1CCC[NH+]1[C@@H](CC)c1n[nH+]c(Cc2ccc(C)cc2)o1']
|
29
|
+
results = x.get_smiles()
|
30
|
+
assert set(expected).intersection(set(results)) == set(expected)
|
31
|
+
|
32
|
+
|
33
|
+
if __name__ == '__main__':
|
34
|
+
test_ionizedstate()
|