rdworks 0.52.1__tar.gz → 0.54.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rdworks-0.52.1 → rdworks-0.54.2}/PKG-INFO +1 -1
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/__init__.py +3 -3
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/microstates.py +23 -15
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/mol.py +32 -1
- rdworks-0.54.2/src/rdworks/stereoisomers.py +67 -0
- rdworks-0.54.2/src/rdworks/workflow.py +85 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/xtb/wrapper.py +7 -6
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks.egg-info/PKG-INFO +1 -1
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks.egg-info/SOURCES.txt +1 -1
- {rdworks-0.52.1 → rdworks-0.54.2}/tests/test_basics.py +13 -1
- {rdworks-0.52.1 → rdworks-0.54.2}/tests/test_ionized.py +25 -25
- rdworks-0.52.1/src/rdworks/stereoisomers.py +0 -127
- rdworks-0.52.1/src/rdworks/tautomers.py +0 -20
- {rdworks-0.52.1 → rdworks-0.54.2}/LICENSE +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/README.md +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/pyproject.toml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/setup.cfg +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/autograph/__init__.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/autograph/autograph.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/autograph/centroid.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/autograph/dynamictreecut.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/autograph/nmrclust.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/autograph/rckmeans.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/bitqt/__init__.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/bitqt/bitqt.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/conf.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/descriptor.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/display.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/ionized.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/matchedseries.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/mollibr.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/pka.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/Asinex_fragment.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/Astex_RO3.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/Baell2010_PAINS/Baell2010A.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/Baell2010_PAINS/Baell2010B.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/Baell2010_PAINS/Baell2010C.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/Baell2010_PAINS/PAINS-less-than-015-hits.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/Baell2010_PAINS/PAINS-less-than-150-hits.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/Baell2010_PAINS/PAINS-more-than-150-hits.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/Baell2010_PAINS/makexml.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/Brenk2008_Dundee/makexml.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/CNS.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/ChEMBL_Walters/BMS.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/ChEMBL_Walters/Dundee.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/ChEMBL_Walters/Glaxo.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/ChEMBL_Walters/Inpharmatica.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/ChEMBL_Walters/LINT.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/ChEMBL_Walters/MLSMR.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/ChEMBL_Walters/PAINS.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/ChEMBL_Walters/SureChEMBL.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/ChEMBL_Walters/makexml.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999Acid.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999Base.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999ElPh.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/Hann1999_Glaxo/Hann1999NuPh.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/Hann1999_Glaxo/makexml.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/Kazius2005/Kazius2005.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/Kazius2005/makexml.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/ZINC_druglike.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/ZINC_fragment.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/ZINC_leadlike.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/fragment.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/ionized/simple_smarts_pattern.csv +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/ionized/smarts_pattern.csv +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/misc/makexml.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/misc/reactive-part-2.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/misc/reactive-part-3.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/predefined/misc/reactive.xml +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/readin.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/rgroup.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/scaffold.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/std.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/testdata.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/torsion.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/units.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/utils.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/xml.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks/xtb/__init__.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks.egg-info/dependency_links.txt +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks.egg-info/requires.txt +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/src/rdworks.egg-info/top_level.txt +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/tests/test_qupkake.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/tests/test_round.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/tests/test_torsion.py +0 -0
- {rdworks-0.52.1 → rdworks-0.54.2}/tests/test_xtb.py +0 -0
@@ -1,11 +1,11 @@
|
|
1
|
-
__version__ = '0.
|
1
|
+
__version__ = '0.54.2'
|
2
2
|
|
3
3
|
from rdworks.conf import Conf
|
4
4
|
from rdworks.mol import Mol
|
5
5
|
from rdworks.mollibr import MolLibr
|
6
6
|
|
7
|
-
from rdworks.
|
8
|
-
|
7
|
+
from rdworks.workflow import complete_stereoisomers, complete_tautomers
|
8
|
+
|
9
9
|
from rdworks.ionized import IonizedStates
|
10
10
|
|
11
11
|
from rdworks.readin import read_csv, merge_csv, read_dataframe, read_smi, read_sdf, read_mae
|
@@ -1,12 +1,16 @@
|
|
1
1
|
import numpy as np
|
2
2
|
import math
|
3
3
|
import itertools
|
4
|
+
import logging
|
4
5
|
|
5
6
|
from types import SimpleNamespace
|
6
7
|
from rdworks import Conf, Mol
|
7
8
|
from rdworks.xtb.wrapper import GFN2xTB
|
8
9
|
|
9
10
|
|
11
|
+
logger = logging.getLogger(__name__)
|
12
|
+
|
13
|
+
|
10
14
|
kT = 0.001987 * 298.0 # (kcal/mol K), standard condition
|
11
15
|
C = math.log(10) * kT
|
12
16
|
|
@@ -23,9 +27,10 @@ class Microstates():
|
|
23
27
|
self.mols = []
|
24
28
|
self.reference = None
|
25
29
|
|
26
|
-
|
30
|
+
|
31
|
+
def enumerate(self) -> None:
|
32
|
+
# Qu pKake results must be stored at .confs
|
27
33
|
for conf in self.origin:
|
28
|
-
print(conf.props)
|
29
34
|
pka = conf.props.get('pka', None)
|
30
35
|
if pka is None:
|
31
36
|
# no protonation/deprotonation sites
|
@@ -49,7 +54,7 @@ class Microstates():
|
|
49
54
|
|
50
55
|
for (p, d) in PD:
|
51
56
|
conf = self.origin.confs[0].copy()
|
52
|
-
conf = conf.protonate(p).deprotonate(d).optimize(calculator=calculator)
|
57
|
+
conf = conf.protonate(p).deprotonate(d).optimize(calculator=self.calculator)
|
53
58
|
charge = len(p) - len(d)
|
54
59
|
self.states.append(SimpleNamespace(
|
55
60
|
charge=charge,
|
@@ -83,7 +88,7 @@ class Microstates():
|
|
83
88
|
return float(np.dot(p, pe_array))
|
84
89
|
|
85
90
|
|
86
|
-
def
|
91
|
+
def populate(self) -> None:
|
87
92
|
for microstate in self.states:
|
88
93
|
mol = Mol(microstate.conf).make_confs(n=4).optimize_confs()
|
89
94
|
# mol = mol.drop_confs(similar=True, similar_rmsd=0.3, verbose=True)
|
@@ -94,23 +99,21 @@ class Microstates():
|
|
94
99
|
conf = conf.optimize(calculator=self.calculator, verbose=True)
|
95
100
|
# GFN2xTB requires 3D coordinates
|
96
101
|
# xtb = GFN2xTB(conf.rdmol).singlepoint(water='cpcmx', verbose=True)
|
97
|
-
|
98
|
-
|
102
|
+
PE.append(conf.potential_energy(calculator=self.calculator))
|
103
|
+
# xtb = GFN2xTB(conf.rdmol).singlepoint(verbose=True)
|
99
104
|
# SimpleNamespace(
|
100
105
|
# PE = datadict['total energy'] * hartree2kcalpermol,
|
101
106
|
# Gsolv = Gsolv,
|
102
107
|
# charges = datadict['partial charges'],
|
103
108
|
# wbo = Wiberg_bond_orders,
|
104
109
|
# )
|
105
|
-
print("PE=", PE)
|
106
110
|
microstate.PE = self.Boltzmann_weighted_average(PE)
|
107
|
-
|
108
|
-
|
111
|
+
logger.info(f"PE= {PE}")
|
112
|
+
logger.info(f"Boltzmann weighted= {microstate.PE}")
|
109
113
|
self.mols.append(mol)
|
110
|
-
print("microstate.energy", microstate)
|
111
114
|
|
112
115
|
|
113
|
-
def
|
116
|
+
def get_populations(self, pH: float) -> list[tuple]:
|
114
117
|
# set the lowest dG as the reference
|
115
118
|
self.reference = self.states[np.argmin([microstate.PE for microstate in self.states])]
|
116
119
|
for microstate in self.states:
|
@@ -120,7 +123,7 @@ class Microstates():
|
|
120
123
|
dG.append((microstate.PE - self.reference.PE) + microstate.delta_m * C * pH)
|
121
124
|
dG = np.array(dG)
|
122
125
|
|
123
|
-
|
126
|
+
logger.info(f"dG= {dG}")
|
124
127
|
Boltzmann_factors = np.exp(-dG/kT)
|
125
128
|
Z = np.sum(Boltzmann_factors)
|
126
129
|
p = Boltzmann_factors/Z
|
@@ -128,11 +131,16 @@ class Microstates():
|
|
128
131
|
# [(0, p0), (1, p1), ...]
|
129
132
|
|
130
133
|
return idx_p
|
134
|
+
|
135
|
+
|
136
|
+
def get_ensemble(self) -> list[Mol]:
|
137
|
+
return self.mols
|
138
|
+
|
139
|
+
|
140
|
+
def get_mol(self, idx: int) -> Mol:
|
141
|
+
return self.mols[idx]
|
131
142
|
|
132
143
|
|
133
144
|
def count(self) -> int:
|
134
145
|
return len(self.states)
|
135
146
|
|
136
|
-
|
137
|
-
def get_mol(self, idx: int) -> Mol:
|
138
|
-
return self.mols[idx]
|
@@ -28,7 +28,6 @@ from rdkit.Chem import (
|
|
28
28
|
Draw, rdDepictor, inchi,
|
29
29
|
rdDistGeom, rdMolAlign, rdMolTransforms, rdmolops
|
30
30
|
)
|
31
|
-
from rdkit.Chem.Draw import rdMolDraw2D
|
32
31
|
from rdkit.ML.Cluster import Butina
|
33
32
|
from PIL import Image
|
34
33
|
|
@@ -43,6 +42,8 @@ from rdworks.autograph import NMRCLUST, DynamicTreeCut, RCKmeans, AutoGraph
|
|
43
42
|
from rdworks.bitqt import BitQT
|
44
43
|
from rdworks.torsion import create_torsion_fragment, get_torsion_atoms
|
45
44
|
from rdworks.display import render_svg, render_png
|
45
|
+
from rdworks.stereoisomers import enumerate_stereoisomers, enumerate_ring_bond_stereoisomers
|
46
|
+
|
46
47
|
|
47
48
|
from scour.scour import scourString
|
48
49
|
|
@@ -329,6 +330,36 @@ class Mol:
|
|
329
330
|
return self
|
330
331
|
|
331
332
|
|
333
|
+
def count_stereoisomers(self) -> int:
|
334
|
+
"""Counts number of all possible stereoisomers ignoring the current stereochemistry.
|
335
|
+
|
336
|
+
Returns:
|
337
|
+
int: number of stereoisomers.
|
338
|
+
"""
|
339
|
+
|
340
|
+
ring_bond_stereo_info = self.get_ring_bond_stereo()
|
341
|
+
mol = self.copy()
|
342
|
+
# remove stereochemistry
|
343
|
+
mol = mol.remove_stereo()
|
344
|
+
rdmols = enumerate_stereoisomers(mol.rdmol)
|
345
|
+
# ring bond stereo is not properly enumerated
|
346
|
+
# cis/trans information is lost if stereochemistry is removed,
|
347
|
+
# which cannot be enumerated by EnumerateStereoisomers() function
|
348
|
+
# so enumerate_ring_bond_stereoisomers() is introduced
|
349
|
+
if len(ring_bond_stereo_info) > 0:
|
350
|
+
ring_cis_trans = []
|
351
|
+
for rdmol in rdmols:
|
352
|
+
ring_cis_trans += enumerate_ring_bond_stereoisomers(rdmol,
|
353
|
+
ring_bond_stereo_info,
|
354
|
+
override=True)
|
355
|
+
if len(ring_cis_trans) > 0:
|
356
|
+
rdmols = ring_cis_trans
|
357
|
+
|
358
|
+
unique_rdmols = set([Chem.MolToSmiles(rdmol) for rdmol in rdmols])
|
359
|
+
|
360
|
+
return len(unique_rdmols)
|
361
|
+
|
362
|
+
|
332
363
|
def make_confs(self, n: int = 50, method: str = 'ETKDG', **kwargs) -> Self:
|
333
364
|
"""Generates 3D conformers.
|
334
365
|
|
@@ -0,0 +1,67 @@
|
|
1
|
+
from rdkit import Chem
|
2
|
+
from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, StereoEnumerationOptions
|
3
|
+
|
4
|
+
|
5
|
+
def enumerate_stereoisomers(rdmol: Chem.Mol) -> list[Chem.Mol]:
|
6
|
+
"""Returns enumerated stereoisomers.
|
7
|
+
|
8
|
+
Args:
|
9
|
+
rdmol (Chem.Mol): input molecule.
|
10
|
+
|
11
|
+
Returns:
|
12
|
+
List[Chem.Mol]: a list of enumerated stereoisomers.
|
13
|
+
"""
|
14
|
+
return list(EnumerateStereoisomers(
|
15
|
+
rdmol,
|
16
|
+
options=StereoEnumerationOptions(
|
17
|
+
tryEmbedding=False,
|
18
|
+
onlyUnassigned=True,
|
19
|
+
maxIsomers=1024,
|
20
|
+
rand=None,
|
21
|
+
unique=True,
|
22
|
+
onlyStereoGroups=False,
|
23
|
+
)))
|
24
|
+
|
25
|
+
|
26
|
+
def enumerate_ring_bond_stereoisomers(rdmol: Chem.Mol,
|
27
|
+
ring_bond_stereo_info: list[tuple],
|
28
|
+
override: bool = False) -> list[Chem.Mol]:
|
29
|
+
"""Enumerates unspecified double bond stereochemistry (cis/trans).
|
30
|
+
|
31
|
+
<pre>
|
32
|
+
a1 a4 a1
|
33
|
+
\ / \
|
34
|
+
a2=a3 a2=a3
|
35
|
+
\
|
36
|
+
a4
|
37
|
+
</pre>
|
38
|
+
|
39
|
+
Args:
|
40
|
+
rdmol (Chem.Mol): input molecule.
|
41
|
+
ring_bond_stereo_info (List[Tuple]):
|
42
|
+
ring_bond_stereo_info will be set when .remove_stereo() is called.
|
43
|
+
bond_stereo_info = [(bond_idx, bond_stereo_descriptor), ..] where
|
44
|
+
bond_stereo_descriptor is `Chem.StereoDescriptor.Bond_Cis` or
|
45
|
+
`Chem.StereoDescriptor.Bond_Trans`, or `Chem.StereoDescriptor.NoValue`.
|
46
|
+
override (bool, optional): _description_. Defaults to False.
|
47
|
+
|
48
|
+
Returns:
|
49
|
+
List[Chem.Mol]: list of enumerated stereoisomers.
|
50
|
+
"""
|
51
|
+
isomers = []
|
52
|
+
for bond_idx, bond_stereo_desc in ring_bond_stereo_info:
|
53
|
+
if (bond_stereo_desc == Chem.StereoDescriptor.NoValue) or override:
|
54
|
+
bond = rdmol.GetBondWithIdx(bond_idx)
|
55
|
+
(a2,a3) = (bond.GetBeginAtom(), bond.GetEndAtom())
|
56
|
+
a2_idx = a2.GetIdx()
|
57
|
+
a3_idx = a3.GetIdx()
|
58
|
+
a1_idx = sorted([(a.GetIdx(), a.GetAtomicNum()) for a in a2.GetNeighbors() if a.GetIdx() != a3_idx], key=lambda x: x[1], reverse=True)[0][0]
|
59
|
+
a4_idx = sorted([(a.GetIdx(), a.GetAtomicNum()) for a in a3.GetNeighbors() if a.GetIdx() != a2_idx], key=lambda x: x[1], reverse=True)[0][0]
|
60
|
+
bond.SetStereoAtoms(a1_idx, a4_idx) # need to set reference atoms
|
61
|
+
# cis
|
62
|
+
bond.SetStereo(Chem.BondStereo.STEREOCIS)
|
63
|
+
isomers.append(Chem.Mol(rdmol))
|
64
|
+
# trans
|
65
|
+
bond.SetStereo(Chem.BondStereo.STEREOTRANS)
|
66
|
+
isomers.append(Chem.Mol(rdmol))
|
67
|
+
return isomers
|
@@ -0,0 +1,85 @@
|
|
1
|
+
|
2
|
+
from rdworks.stereoisomers import enumerate_stereoisomers, enumerate_ring_bond_stereoisomers
|
3
|
+
from rdworks.mol import Mol
|
4
|
+
from rdworks.mollibr import MolLibr
|
5
|
+
|
6
|
+
from rdkit import Chem
|
7
|
+
from rdkit.Chem.MolStandardize import rdMolStandardize
|
8
|
+
|
9
|
+
|
10
|
+
def complete_stereoisomers(molecular_input: str | Chem.Mol | Mol,
|
11
|
+
name: str | None = None,
|
12
|
+
std: bool = False,
|
13
|
+
override: bool = False,
|
14
|
+
**kwargs) -> MolLibr:
|
15
|
+
"""Completes stereoisomers and returns a rdworks.MolLibr.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
molecular_input (Union[Mol, str, Chem.Mol]): input molecule.
|
19
|
+
name (Optional[str], optional): name of the molecule. Defaults to None.
|
20
|
+
std (bool, optional): whether to standardize the input. Defaults to False.
|
21
|
+
override (bool, optional): whether to override input stereoisomers. Defaults to False.
|
22
|
+
|
23
|
+
Raises:
|
24
|
+
TypeError: if `molecular_input` is not rdworks.Mol, SMILES, or rdkit.Chem.Mol object.
|
25
|
+
|
26
|
+
Returns:
|
27
|
+
MolLibr: a library of complete stereoisomers.
|
28
|
+
"""
|
29
|
+
if isinstance(molecular_input, Mol):
|
30
|
+
if name:
|
31
|
+
mol = molecular_input.rename(name)
|
32
|
+
else:
|
33
|
+
mol = molecular_input
|
34
|
+
elif isinstance(molecular_input, str) or isinstance(molecular_input, Chem.Mol):
|
35
|
+
mol = Mol(molecular_input, name, std)
|
36
|
+
else:
|
37
|
+
raise TypeError('complete_stereoisomers() expects rdworks.Mol, SMILES or rdkit.Chem.Mol object')
|
38
|
+
|
39
|
+
ring_bond_stereo_info = mol.get_ring_bond_stereo()
|
40
|
+
|
41
|
+
if override:
|
42
|
+
mol = mol.remove_stereo()
|
43
|
+
|
44
|
+
rdmols = enumerate_stereoisomers(mol.rdmol)
|
45
|
+
# ring bond stereo is not properly enumerated
|
46
|
+
# cis/trans information is lost if stereochemistry is removed,
|
47
|
+
# which cannot be enumerated by EnumerateStereoisomers() function
|
48
|
+
# so enumerate_ring_bond_stereoisomers() is introduced
|
49
|
+
if len(ring_bond_stereo_info) > 0:
|
50
|
+
ring_cis_trans = []
|
51
|
+
for rdmol in rdmols:
|
52
|
+
ring_cis_trans += enumerate_ring_bond_stereoisomers(rdmol,
|
53
|
+
ring_bond_stereo_info,
|
54
|
+
override=override)
|
55
|
+
if len(ring_cis_trans) > 0:
|
56
|
+
rdmols = ring_cis_trans
|
57
|
+
|
58
|
+
if len(rdmols) > 1:
|
59
|
+
libr = MolLibr(rdmols).unique().rename(mol.name, sep='.').compute(**kwargs)
|
60
|
+
else:
|
61
|
+
libr = MolLibr(rdmols).rename(mol.name).compute(**kwargs)
|
62
|
+
|
63
|
+
for _ in libr:
|
64
|
+
_.props.update(mol.props)
|
65
|
+
|
66
|
+
return libr
|
67
|
+
|
68
|
+
|
69
|
+
|
70
|
+
def complete_tautomers(mol: Mol, **kwargs) -> MolLibr:
|
71
|
+
"""Returns a library of enumerated tautomers.
|
72
|
+
|
73
|
+
Args:
|
74
|
+
mol (Mol): input molecule.
|
75
|
+
|
76
|
+
Returns:
|
77
|
+
MolLibr: a library of enumerated tautomers.
|
78
|
+
"""
|
79
|
+
enumerator = rdMolStandardize.TautomerEnumerator()
|
80
|
+
rdmols = list(enumerator.Enumerate(mol.rdmol))
|
81
|
+
|
82
|
+
if len(rdmols) > 1:
|
83
|
+
return MolLibr(rdmols).unique().rename(mol.name, sep='.').compute(**kwargs)
|
84
|
+
|
85
|
+
return MolLibr(rdmols).compute(**kwargs)
|
@@ -79,7 +79,8 @@ class GFN2xTB:
|
|
79
79
|
proc = subprocess.run(['xtb', test_geometry, '--opt'],
|
80
80
|
cwd=temp_dir,
|
81
81
|
capture_output=True,
|
82
|
-
text=True
|
82
|
+
text=True,
|
83
|
+
encoding='utf-8')
|
83
84
|
assert proc.returncode == 0
|
84
85
|
|
85
86
|
return True
|
@@ -122,7 +123,7 @@ $ cp -r xtb-dist/share /usr/local/ """)
|
|
122
123
|
if GFN2xTB.is_xtb_ready():
|
123
124
|
with tempfile.TemporaryDirectory() as temp_dir: # tmpdir is a string
|
124
125
|
cmd = ['xtb', '--cpcmx']
|
125
|
-
proc = subprocess.run(cmd, cwd=temp_dir, capture_output=True, text=True)
|
126
|
+
proc = subprocess.run(cmd, cwd=temp_dir, capture_output=True, text=True, encoding='utf-8')
|
126
127
|
# we are expecting an error because no input file is given
|
127
128
|
assert proc.returncode != 0
|
128
129
|
for line in proc.stdout.split('\n'):
|
@@ -156,7 +157,7 @@ $ cp -r xtb-dist/share /usr/local/ """)
|
|
156
157
|
if GFN2xTB.is_xtb_ready():
|
157
158
|
with tempfile.TemporaryDirectory() as temp_dir: # tmpdir is a string
|
158
159
|
cmd = ['xtb', '--version']
|
159
|
-
proc = subprocess.run(cmd, cwd=temp_dir, capture_output=True, text=True)
|
160
|
+
proc = subprocess.run(cmd, cwd=temp_dir, capture_output=True, text=True, encoding='utf-8')
|
160
161
|
assert proc.returncode == 0, "GFN2xTB() Error: xtb not available"
|
161
162
|
match = re.search('xtb\s+version\s+(?P<version>[\d.]+)', proc.stdout)
|
162
163
|
if match:
|
@@ -358,7 +359,7 @@ $ cp -r xtb-dist/share /usr/local/ """)
|
|
358
359
|
|
359
360
|
# 'xtbout.json', 'xtbrestart', 'xtbtopo.mol', 'charges', and 'wbo' files will be
|
360
361
|
# created in the current working directory.
|
361
|
-
proc = subprocess.run(cmd + options, cwd=temp_dir, capture_output=True, text=True)
|
362
|
+
proc = subprocess.run(cmd + options, cwd=temp_dir, capture_output=True, text=True, encoding='utf-8')
|
362
363
|
# if proc.returncode == 0:
|
363
364
|
# print("Standard Output:")
|
364
365
|
# print(proc.stdout)
|
@@ -461,7 +462,7 @@ $ cp -r xtb-dist/share /usr/local/ """)
|
|
461
462
|
if verbose:
|
462
463
|
logger.info(f"optimize() {' '.join(cmd+options)}")
|
463
464
|
|
464
|
-
proc = subprocess.run(cmd + options, cwd=temp_dir, capture_output=True, text=True)
|
465
|
+
proc = subprocess.run(cmd + options, cwd=temp_dir, capture_output=True, text=True, encoding='utf-8')
|
465
466
|
|
466
467
|
if proc.returncode == 0 and xtbout_path.is_file():
|
467
468
|
with open(xtbout_path, 'r') as f:
|
@@ -527,7 +528,7 @@ $ cp -r xtb-dist/share /usr/local/ """)
|
|
527
528
|
elif water == 'alpb':
|
528
529
|
options += ['--alpb', 'water']
|
529
530
|
|
530
|
-
proc = subprocess.run(cmd + options, cwd=temp_dir, capture_output=True, text=True)
|
531
|
+
proc = subprocess.run(cmd + options, cwd=temp_dir, capture_output=True, text=True, encoding='utf-8')
|
531
532
|
# output files: xtb_esp.cosmo, xtb_esp.dat, xtb_esp_profile.dat
|
532
533
|
|
533
534
|
if proc.returncode == 0 and xtb_esp_dat.is_file():
|
@@ -16,11 +16,11 @@ src/rdworks/rgroup.py
|
|
16
16
|
src/rdworks/scaffold.py
|
17
17
|
src/rdworks/std.py
|
18
18
|
src/rdworks/stereoisomers.py
|
19
|
-
src/rdworks/tautomers.py
|
20
19
|
src/rdworks/testdata.py
|
21
20
|
src/rdworks/torsion.py
|
22
21
|
src/rdworks/units.py
|
23
22
|
src/rdworks/utils.py
|
23
|
+
src/rdworks/workflow.py
|
24
24
|
src/rdworks/xml.py
|
25
25
|
src/rdworks.egg-info/PKG-INFO
|
26
26
|
src/rdworks.egg-info/SOURCES.txt
|
@@ -400,7 +400,19 @@ def test_complete_stereoisomers():
|
|
400
400
|
for mol in MolLibr(drug_smiles, drug_names):
|
401
401
|
isomer_libr += rdworks.complete_stereoisomers(mol)
|
402
402
|
assert isomer_libr.count() >= 25
|
403
|
-
|
403
|
+
|
404
|
+
|
405
|
+
def test_count_stereoisomers():
|
406
|
+
"""count all possible stereoisomers ignoring current stereochemistry assignment"""
|
407
|
+
m = Mol('Cc1nc2c(-c3ccc(Cl)cc3F)nc(N3CCN(C(=O)C(F)F)CC3)cn2c(=O)c1C')
|
408
|
+
assert m.count_stereoisomers() == 1
|
409
|
+
m = Mol('CN1C=C([C@H]2CN(C3=NC(C4=CC=C(Cl)C=C4F)=C4N=C5CCCCN5C(=O)C4=C3)CCO2)C=N1')
|
410
|
+
assert m.count_stereoisomers() == 2
|
411
|
+
m = Mol('Cc1nc2c(-c3ccc(Cl)cc3F)nc(N3CCN(S(=O)(=O)N4C[C@@H](C)O[C@@H](C)C4)CC3)cn2c(=O)c1C')
|
412
|
+
assert m.count_stereoisomers() == 3
|
413
|
+
m = Mol('Cc1cc([C@H]2CN(c3cc4nc(C)c(C)c(=O)n4c(-c4ccc(Cl)cc4F)n3)C[C@@H](C)O2)ccn1')
|
414
|
+
assert m.count_stereoisomers() == 4
|
415
|
+
|
404
416
|
|
405
417
|
def test_cluster():
|
406
418
|
libr = rdworks.read_smi(datadir / "cdk2.smi.gz", progress=False)
|
@@ -36,31 +36,31 @@ def test_ionizedstate():
|
|
36
36
|
assert set(expected).intersection(set(results)) == set(expected)
|
37
37
|
|
38
38
|
|
39
|
-
def test_gypsum_dl():
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
39
|
+
# def test_gypsum_dl():
|
40
|
+
# import gypsum_dl
|
41
|
+
# smiles = 'O=C(NCCCC)[C@H](CCC1)N1[C@@H](CC)C2=NN=C(CC3=CC=C(C)C=C3)O2'
|
42
|
+
# state_smiles = list(
|
43
|
+
# gypsum_dl.GypsumDL(smiles,
|
44
|
+
# min_ph=6.4,
|
45
|
+
# max_ph=8.4,
|
46
|
+
# pka_precision=1.0,
|
47
|
+
# thoroughness=3,
|
48
|
+
# max_variants_per_compound=5,
|
49
|
+
# second_embed=False,
|
50
|
+
# skip_optimize_geometry=False,
|
51
|
+
# skip_alternate_ring_conformations=False,
|
52
|
+
# skip_adding_hydrogen=False,
|
53
|
+
# skip_making_tautomers=False,
|
54
|
+
# skip_enumerate_chiral_mol=False,
|
55
|
+
# skip_enumerate_double_bonds=False,
|
56
|
+
# let_tautomers_change_chirality=False,
|
57
|
+
# use_durrant_lab_filters=True,
|
58
|
+
# job_manager='serial',
|
59
|
+
# num_processors=1,
|
60
|
+
# ))
|
61
|
+
# for smi in state_smiles:
|
62
|
+
# print(smi)
|
63
63
|
|
64
64
|
if __name__ == '__main__':
|
65
65
|
test_ionizedstate()
|
66
|
-
test_gypsum_dl()
|
66
|
+
# test_gypsum_dl()
|
@@ -1,127 +0,0 @@
|
|
1
|
-
from typing import List, Tuple, Union, Optional
|
2
|
-
|
3
|
-
from rdkit import Chem
|
4
|
-
from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, StereoEnumerationOptions
|
5
|
-
|
6
|
-
from .mol import Mol
|
7
|
-
from .mollibr import MolLibr
|
8
|
-
|
9
|
-
|
10
|
-
def _enum_stereoisomers(rdmol:Chem.Mol) -> List[Chem.Mol]:
|
11
|
-
"""Returns enumerated stereoisomers.
|
12
|
-
|
13
|
-
Args:
|
14
|
-
rdmol (Chem.Mol): input molecule.
|
15
|
-
|
16
|
-
Returns:
|
17
|
-
List[Chem.Mol]: a list of enumerated stereoisomers.
|
18
|
-
"""
|
19
|
-
return list(EnumerateStereoisomers(
|
20
|
-
rdmol,
|
21
|
-
options=StereoEnumerationOptions(
|
22
|
-
tryEmbedding=False,
|
23
|
-
onlyUnassigned=True,
|
24
|
-
maxIsomers=1024,
|
25
|
-
rand=None,
|
26
|
-
unique=True,
|
27
|
-
onlyStereoGroups=False,
|
28
|
-
)))
|
29
|
-
|
30
|
-
|
31
|
-
def _enum_ring_bond_stereo(rdmol:Chem.Mol, ring_bond_stereo_info:List[Tuple],
|
32
|
-
override:bool=False) -> List[Chem.Mol]:
|
33
|
-
"""Enumerates unspecified double bond stereochemistry (cis/trans).
|
34
|
-
|
35
|
-
<pre>
|
36
|
-
a1 a4 a1
|
37
|
-
\ / \
|
38
|
-
a2=a3 a2=a3
|
39
|
-
\
|
40
|
-
a4
|
41
|
-
</pre>
|
42
|
-
|
43
|
-
Args:
|
44
|
-
rdmol (Chem.Mol): input molecule.
|
45
|
-
ring_bond_stereo_info (List[Tuple]):
|
46
|
-
ring_bond_stereo_info will be set when .remove_stereo() is called.
|
47
|
-
bond_stereo_info = [(bond_idx, bond_stereo_descriptor), ..] where
|
48
|
-
bond_stereo_descriptor is `Chem.StereoDescriptor.Bond_Cis` or
|
49
|
-
`Chem.StereoDescriptor.Bond_Trans`, or `Chem.StereoDescriptor.NoValue`.
|
50
|
-
override (bool, optional): _description_. Defaults to False.
|
51
|
-
|
52
|
-
Returns:
|
53
|
-
List[Chem.Mol]: list of enumerated stereoisomers.
|
54
|
-
"""
|
55
|
-
isomers = []
|
56
|
-
for bond_idx, bond_stereo_desc in ring_bond_stereo_info:
|
57
|
-
if (bond_stereo_desc == Chem.StereoDescriptor.NoValue) or override:
|
58
|
-
bond = rdmol.GetBondWithIdx(bond_idx)
|
59
|
-
(a2,a3) = (bond.GetBeginAtom(), bond.GetEndAtom())
|
60
|
-
a2_idx = a2.GetIdx()
|
61
|
-
a3_idx = a3.GetIdx()
|
62
|
-
a1_idx = sorted([(a.GetIdx(), a.GetAtomicNum()) for a in a2.GetNeighbors() if a.GetIdx() != a3_idx], key=lambda x: x[1], reverse=True)[0][0]
|
63
|
-
a4_idx = sorted([(a.GetIdx(), a.GetAtomicNum()) for a in a3.GetNeighbors() if a.GetIdx() != a2_idx], key=lambda x: x[1], reverse=True)[0][0]
|
64
|
-
bond.SetStereoAtoms(a1_idx, a4_idx) # need to set reference atoms
|
65
|
-
# cis
|
66
|
-
bond.SetStereo(Chem.BondStereo.STEREOCIS)
|
67
|
-
isomers.append(Chem.Mol(rdmol))
|
68
|
-
# trans
|
69
|
-
bond.SetStereo(Chem.BondStereo.STEREOTRANS)
|
70
|
-
isomers.append(Chem.Mol(rdmol))
|
71
|
-
return isomers
|
72
|
-
|
73
|
-
|
74
|
-
def complete_stereoisomers(molecular_input:Union[Mol, str, Chem.Mol], name:Optional[str]=None,
|
75
|
-
std:bool=False, override:bool=False, **kwargs) -> MolLibr:
|
76
|
-
"""Completes stereoisomers and returns a rdworks.MolLibr.
|
77
|
-
|
78
|
-
Args:
|
79
|
-
molecular_input (Union[Mol, str, Chem.Mol]): input molecule.
|
80
|
-
name (Optional[str], optional): name of the molecule. Defaults to None.
|
81
|
-
std (bool, optional): whether to standardize the input. Defaults to False.
|
82
|
-
override (bool, optional): whether to override input stereoisomers. Defaults to False.
|
83
|
-
|
84
|
-
Raises:
|
85
|
-
TypeError: if `molecular_input` is not rdworks.Mol, SMILES, or rdkit.Chem.Mol object.
|
86
|
-
|
87
|
-
Returns:
|
88
|
-
MolLibr: a library of complete stereoisomers.
|
89
|
-
"""
|
90
|
-
if isinstance(molecular_input, Mol):
|
91
|
-
if name:
|
92
|
-
mol = molecular_input.rename(name)
|
93
|
-
else:
|
94
|
-
mol = molecular_input
|
95
|
-
elif isinstance(molecular_input, str) or isinstance(molecular_input, Chem.Mol):
|
96
|
-
mol = Mol(molecular_input, name, std)
|
97
|
-
else:
|
98
|
-
raise TypeError('complete_stereoisomers() expects rdworks.Mol, SMILES or rdkit.Chem.Mol object')
|
99
|
-
|
100
|
-
ring_bond_stereo_info = mol.get_ring_bond_stereo()
|
101
|
-
|
102
|
-
if override:
|
103
|
-
mol = mol.remove_stereo()
|
104
|
-
|
105
|
-
rdmols = _enum_stereoisomers(mol.rdmol)
|
106
|
-
# ring bond stereo is not properly enumerated
|
107
|
-
# cis/trans information is lost if stereochemistry is removed,
|
108
|
-
# which cannot be enumerated by EnumerateStereoisomers() function
|
109
|
-
# so _enum_bond_stereo() is introduced
|
110
|
-
if len(ring_bond_stereo_info) > 0:
|
111
|
-
ring_cis_trans = []
|
112
|
-
for rdmol in rdmols:
|
113
|
-
ring_cis_trans += _enum_ring_bond_stereo(rdmol,
|
114
|
-
ring_bond_stereo_info,
|
115
|
-
override=override)
|
116
|
-
if len(ring_cis_trans) > 0:
|
117
|
-
rdmols = ring_cis_trans
|
118
|
-
|
119
|
-
if len(rdmols) > 1:
|
120
|
-
libr = MolLibr(rdmols).unique().rename(mol.name, sep='.').compute(**kwargs)
|
121
|
-
else:
|
122
|
-
libr = MolLibr(rdmols).rename(mol.name).compute(**kwargs)
|
123
|
-
|
124
|
-
for _ in libr:
|
125
|
-
_.props.update(mol.props)
|
126
|
-
|
127
|
-
return libr
|
@@ -1,20 +0,0 @@
|
|
1
|
-
from rdkit.Chem.MolStandardize import rdMolStandardize
|
2
|
-
|
3
|
-
from .mol import Mol
|
4
|
-
from .mollibr import MolLibr
|
5
|
-
|
6
|
-
|
7
|
-
def complete_tautomers(mol:Mol, **kwargs) -> MolLibr:
|
8
|
-
"""Returns a library of enumerated tautomers.
|
9
|
-
|
10
|
-
Args:
|
11
|
-
mol (Mol): input molecule.
|
12
|
-
|
13
|
-
Returns:
|
14
|
-
MolLibr: a library of enumerated tautomers.
|
15
|
-
"""
|
16
|
-
enumerator = rdMolStandardize.TautomerEnumerator()
|
17
|
-
rdmols = list(enumerator.Enumerate(mol.rdmol))
|
18
|
-
if len(rdmols) > 1:
|
19
|
-
return MolLibr(rdmols).unique().rename(mol.name, sep='.').compute(**kwargs)
|
20
|
-
return MolLibr(rdmols).compute(**kwargs)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|