hjxdl 0.1.13__py3-none-any.whl → 0.1.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hdl/_version.py +2 -2
- hdl/datasets/city_code.json +2576 -0
- hdl/datasets/defined_BaseFeatures.fdef +236 -0
- hdl/datasets/las.tsv +0 -0
- hdl/datasets/route_template.json +113 -0
- hdl/datasets/vocab.txt +591 -0
- hdl/ju/__init__.py +0 -0
- hdl/ju/setup.py +55 -0
- hdl/jupyfuncs/__init__.py +0 -0
- hdl/jupyfuncs/chem/__init__.py +0 -0
- hdl/jupyfuncs/chem/mol.py +548 -0
- hdl/jupyfuncs/chem/norm.py +268 -0
- hdl/jupyfuncs/chem/pdb_ext.py +94 -0
- hdl/jupyfuncs/chem/scaffold.py +25 -0
- hdl/jupyfuncs/chem/shape.py +241 -0
- hdl/jupyfuncs/chem/tokenizers.py +2 -0
- hdl/jupyfuncs/dbtools/__init__.py +0 -0
- hdl/jupyfuncs/dbtools/pg.py +42 -0
- hdl/jupyfuncs/dbtools/query_info.py +150 -0
- hdl/jupyfuncs/dl/__init__.py +0 -0
- hdl/jupyfuncs/dl/cp.py +54 -0
- hdl/jupyfuncs/dl/dataframe.py +38 -0
- hdl/jupyfuncs/dl/fp.py +49 -0
- hdl/jupyfuncs/dl/list.py +20 -0
- hdl/jupyfuncs/dl/model_utils.py +97 -0
- hdl/jupyfuncs/dl/tensor.py +159 -0
- hdl/jupyfuncs/dl/uncs.py +112 -0
- hdl/jupyfuncs/llm/__init__.py +0 -0
- hdl/jupyfuncs/llm/extract.py +123 -0
- hdl/jupyfuncs/llm/openapi.py +94 -0
- hdl/jupyfuncs/network/__init__.py +0 -0
- hdl/jupyfuncs/network/proxy.py +20 -0
- hdl/jupyfuncs/path/__init__.py +0 -0
- hdl/jupyfuncs/path/glob.py +285 -0
- hdl/jupyfuncs/path/strings.py +65 -0
- hdl/jupyfuncs/show/__init__.py +0 -0
- hdl/jupyfuncs/show/pbar.py +50 -0
- hdl/jupyfuncs/show/plot.py +259 -0
- hdl/jupyfuncs/utils/__init__.py +0 -0
- hdl/jupyfuncs/utils/wrappers.py +8 -0
- hdl/utils/weather/__init__.py +0 -0
- hdl/utils/weather/weather.py +68 -0
- {hjxdl-0.1.13.dist-info → hjxdl-0.1.15.dist-info}/METADATA +1 -1
- {hjxdl-0.1.13.dist-info → hjxdl-0.1.15.dist-info}/RECORD +46 -5
- {hjxdl-0.1.13.dist-info → hjxdl-0.1.15.dist-info}/WHEEL +1 -1
- {hjxdl-0.1.13.dist-info → hjxdl-0.1.15.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,268 @@
|
|
1
|
+
import functools
|
2
|
+
import logging
|
3
|
+
|
4
|
+
from rdkit import Chem
|
5
|
+
from rdkit.Chem import AllChem
|
6
|
+
|
7
|
+
|
8
|
+
log = logging.getLogger(__name__)
|
9
|
+
|
10
|
+
|
11
|
+
__all__ = [
|
12
|
+
"Normalizer",
|
13
|
+
"Normalization",
|
14
|
+
"NORMALIZATIONS",
|
15
|
+
]
|
16
|
+
|
17
|
+
|
18
|
+
def memoized_property(fget):
|
19
|
+
"""Decorator to create memoized properties."""
|
20
|
+
attr_name = "_{}".format(fget.__name__)
|
21
|
+
|
22
|
+
@functools.wraps(fget)
|
23
|
+
def fget_memoized(self):
|
24
|
+
if not hasattr(self, attr_name):
|
25
|
+
setattr(self, attr_name, fget(self))
|
26
|
+
return getattr(self, attr_name)
|
27
|
+
|
28
|
+
return property(fget_memoized)
|
29
|
+
|
30
|
+
|
31
|
+
class Normalization(object):
|
32
|
+
"""A normalization transform defined by reaction SMARTS."""
|
33
|
+
|
34
|
+
def __init__(self, name, transform):
|
35
|
+
"""
|
36
|
+
:param string name: A name for this Normalization
|
37
|
+
:param string transform: Reaction SMARTS to define the transformation.
|
38
|
+
"""
|
39
|
+
log.debug("Initializing Normalization: %s", name)
|
40
|
+
self.name = name
|
41
|
+
self.transform_str = transform
|
42
|
+
|
43
|
+
@memoized_property
|
44
|
+
def transform(self):
|
45
|
+
log.debug("Loading Normalization transform: %s", self.name)
|
46
|
+
return AllChem.ReactionFromSmarts(self.transform_str)
|
47
|
+
|
48
|
+
def __repr__(self):
|
49
|
+
return "Normalization({!r}, {!r})".format(self.name, self.transform_str)
|
50
|
+
|
51
|
+
def __str__(self):
|
52
|
+
return self.name
|
53
|
+
|
54
|
+
|
55
|
+
NORMALIZATIONS = [
|
56
|
+
Normalization(
|
57
|
+
"Nitro to N+(O-)=O",
|
58
|
+
"[*:1][N,P,As,Sb:2](=[O,S,Se,Te:3])=[O,S,Se,Te:4]>>[*:1][*+1:2]([*-1:3])=[*:4]",
|
59
|
+
),
|
60
|
+
Normalization(
|
61
|
+
"Sulfone to S(=O)(=O)", "[S+2:1]([O-:2])([O-:3])>>[S+0:1](=[O-0:2])(=[O-0:3])"
|
62
|
+
),
|
63
|
+
Normalization("Pyridine oxide to n+O-", "[n:1]=[O:2]>>[n+:1][O-:2]"),
|
64
|
+
Normalization(
|
65
|
+
"Azide to N=N+=N-", "[*,H:1][N:2]=[N:3]#[N:4]>>[*,H:1][N:2]=[N+:3]=[N-:4]"
|
66
|
+
),
|
67
|
+
Normalization("Diazo/azo to =N+=N-", "[*:1]=[N:2]#[N:3]>>[*:1]=[N+:2]=[N-:3]"),
|
68
|
+
Normalization(
|
69
|
+
"Sulfoxide to -S+(O-)-",
|
70
|
+
"[!O:1][S+0;X3:2](=[O:3])[!O:4]>>[*:1][S+1:2]([O-:3])[*:4]",
|
71
|
+
),
|
72
|
+
Normalization(
|
73
|
+
"Phosphate to P(O-)=O",
|
74
|
+
"[O,S,Se,Te;-1:1][P+;D4:2][O,S,Se,Te;-1:3]>>[*+0:1]=[P+0;D5:2][*-1:3]",
|
75
|
+
),
|
76
|
+
Normalization(
|
77
|
+
"Amidinium to C(=NH2+)NH2",
|
78
|
+
"[C,S;X3+1:1]([NX3:2])[NX3!H0:3]>>[*+0:1]([N:2])=[N+:3]",
|
79
|
+
),
|
80
|
+
Normalization(
|
81
|
+
"Normalize hydrazine-diazonium",
|
82
|
+
"[CX4:1][NX3H:2]-[NX3H:3][CX4:4][NX2+:5]#[NX1:6]>>[CX4:1][NH0:2]=[NH+:3][C:4][N+0:5]=[NH:6]",
|
83
|
+
),
|
84
|
+
Normalization(
|
85
|
+
"Recombine 1,3-separated charges",
|
86
|
+
"[N,P,As,Sb,O,S,Se,Te;-1:1]-[A:2]=[N,P,As,Sb,O,S,Se,Te;+1:3]>>[*-0:1]=[*:2]-[*+0:3]",
|
87
|
+
),
|
88
|
+
Normalization(
|
89
|
+
"Recombine 1,3-separated charges",
|
90
|
+
"[n,o,p,s;-1:1]:[a:2]=[N,O,P,S;+1:3]>>[*-0:1]:[*:2]-[*+0:3]",
|
91
|
+
),
|
92
|
+
Normalization(
|
93
|
+
"Recombine 1,3-separated charges",
|
94
|
+
"[N,O,P,S;-1:1]-[a:2]:[n,o,p,s;+1:3]>>[*-0:1]=[*:2]:[*+0:3]",
|
95
|
+
),
|
96
|
+
Normalization(
|
97
|
+
"Recombine 1,5-separated charges",
|
98
|
+
"[N,P,As,Sb,O,S,Se,Te;-1:1]-[A+0:2]=[A:3]-[A:4]=[N,P,As,Sb,O,S,Se,Te;+1:5]>>[*-0:1]=[*:2]-[*:3]=[*:4]-[*+0:5]",
|
99
|
+
),
|
100
|
+
Normalization(
|
101
|
+
"Recombine 1,5-separated charges",
|
102
|
+
"[n,o,p,s;-1:1]:[a:2]:[a:3]:[c:4]=[N,O,P,S;+1:5]>>[*-0:1]:[*:2]:[*:3]:[c:4]-[*+0:5]",
|
103
|
+
),
|
104
|
+
Normalization(
|
105
|
+
"Recombine 1,5-separated charges",
|
106
|
+
"[N,O,P,S;-1:1]-[c:2]:[a:3]:[a:4]:[n,o,p,s;+1:5]>>[*-0:1]=[c:2]:[*:3]:[*:4]:[*+0:5]",
|
107
|
+
),
|
108
|
+
Normalization(
|
109
|
+
"Normalize 1,3 conjugated cation",
|
110
|
+
"[N,O;+0!H0:1]-[A:2]=[N!$(*[O-]),O;+1H0:3]>>[*+1:1]=[*:2]-[*+0:3]",
|
111
|
+
),
|
112
|
+
Normalization(
|
113
|
+
"Normalize 1,3 conjugated cation",
|
114
|
+
"[n;+0!H0:1]:[c:2]=[N!$(*[O-]),O;+1H0:3]>>[*+1:1]:[*:2]-[*+0:3]",
|
115
|
+
),
|
116
|
+
Normalization(
|
117
|
+
"Normalize 1,3 conjugated cation",
|
118
|
+
"[N,O;+0!H0:1]-[c:2]:[n!$(*[O-]),o;+1H0:3]>>[*+1:1]=[*:2]:[*+0:3]",
|
119
|
+
),
|
120
|
+
Normalization(
|
121
|
+
"Normalize 1,5 conjugated cation",
|
122
|
+
"[N,O;+0!H0:1]-[A:2]=[A:3]-[A:4]=[N!$(*[O-]),O;+1H0:5]>>[*+1:1]=[*:2]-[*:3]=[*:4]-[*+0:5]",
|
123
|
+
),
|
124
|
+
Normalization(
|
125
|
+
"Normalize 1,5 conjugated cation",
|
126
|
+
"[n;+0!H0:1]:[a:2]:[a:3]:[c:4]=[N!$(*[O-]),O;+1H0:5]>>[n+1:1]:[*:2]:[*:3]:[*:4]-[*+0:5]",
|
127
|
+
),
|
128
|
+
Normalization(
|
129
|
+
"Normalize 1,5 conjugated cation",
|
130
|
+
"[N,O;+0!H0:1]-[c:2]:[a:3]:[a:4]:[n!$(*[O-]),o;+1H0:5]>>[*+1:1]=[c:2]:[*:3]:[*:4]:[*+0:5]",
|
131
|
+
),
|
132
|
+
Normalization(
|
133
|
+
"Normalize 1,5 conjugated cation",
|
134
|
+
"[n;+0!H0:1]1:[a:2]:[a:3]:[a:4]:[n!$(*[O-]);+1H0:5]1>>[n+1:1]1:[*:2]:[*:3]:[*:4]:[n+0:5]1",
|
135
|
+
),
|
136
|
+
Normalization(
|
137
|
+
"Normalize 1,5 conjugated cation",
|
138
|
+
"[n;+0!H0:1]:[a:2]:[a:3]:[a:4]:[n!$(*[O-]);+1H0:5]>>[n+1:1]:[*:2]:[*:3]:[*:4]:[n+0:5]",
|
139
|
+
),
|
140
|
+
Normalization(
|
141
|
+
"Charge normalization",
|
142
|
+
"[F,Cl,Br,I,At;-1:1]=[O:2]>>[*-0:1][O-:2]"),
|
143
|
+
Normalization(
|
144
|
+
"Charge recombination", "[N,P,As,Sb;-1:1]=[C+;v3:2]>>[*+0:1]#[C+0:2]"
|
145
|
+
),
|
146
|
+
Normalization(
|
147
|
+
"Nitro to N+(O-)=O",
|
148
|
+
"[N;X3:1](=[O:2])=[O:3]>>[*+1:1]([*-1:2])=[*:3]"),
|
149
|
+
Normalization(
|
150
|
+
"Diazonium N",
|
151
|
+
"[*:1]-[N;X2:2]#[N;X1:3]>>[*:1]-[*+1:2]#[*:3]",
|
152
|
+
),
|
153
|
+
Normalization(
|
154
|
+
"Quaternary N",
|
155
|
+
"[N;X4;v4;+0:1]>>[*+1:1]",
|
156
|
+
),
|
157
|
+
Normalization(
|
158
|
+
"Trivalent O",
|
159
|
+
"[*:1]=[O;X2;v3;+0:2]-[#6:3]>>[*:1]=[*+1:2]-[*:3]",
|
160
|
+
),
|
161
|
+
Normalization(
|
162
|
+
"Sulfoxide to -S+(O-)",
|
163
|
+
"[!O:1][S+0;D3:2](=[O:3])[!O:4]>>[*:1][S+1:2]([O-:3])[*:4]",
|
164
|
+
),
|
165
|
+
Normalization(
|
166
|
+
"Sulfoxide to -S+(O-) 2",
|
167
|
+
"[!O:1][SH1+1;D3:2](=[O:3])[!O:4]>>[*:1][S+1:2]([O-:3])[*:4]",
|
168
|
+
),
|
169
|
+
Normalization(
|
170
|
+
"Trivalent S",
|
171
|
+
"[O:1]=[S;D2;+0:2]-[#6:3]>>[*:1]=[*+1:2]-[*:3]",
|
172
|
+
),
|
173
|
+
|
174
|
+
Normalization(
|
175
|
+
"Bad amide tautomer1",
|
176
|
+
"[C:1]([OH1;D1:2])=;!@[NH1:3]>>[C:1](=[OH0:2])-[NH2:3]",
|
177
|
+
),
|
178
|
+
Normalization(
|
179
|
+
"Bad amide tautomer2",
|
180
|
+
"[C:1]([OH1;D1:2])=;!@[NH0:3]>>[C:1](=[OH0:2])-[NH1:3]",
|
181
|
+
),
|
182
|
+
Normalization(
|
183
|
+
"Halogen with no neighbors", "[F,Cl,Br,I;X0;+0:1]>>[*-1:1]",
|
184
|
+
),
|
185
|
+
Normalization(
|
186
|
+
"Odd pyridine/pyridazine oxide structure",
|
187
|
+
"[C,N;-;D2,D3:1]-[N+2;D3:2]-[O-;D1:3]>>[*-0:1]=[*+1:2]-[*-:3]",
|
188
|
+
),
|
189
|
+
Normalization(
|
190
|
+
"qunimade2",
|
191
|
+
"[n&H0:1][n&H1:2][n&H1,c;R2:3][c&H1,n&H1:4][c,n&H1:5](=[S,N,O:7])[n&H1:6]>>[n&H0:1][n&H1:2][n&H0,c;R2:3][c&H1,n&H0:4][c,n&H0:5]([S,N,O:7])[n&H0:6]"
|
192
|
+
),
|
193
|
+
Normalization(
|
194
|
+
"qunimade",
|
195
|
+
"[c,n&H0,n&H1:2][n&H0,n&H1,c:3][c,n&H0,n&H1:4][c,n&H0,n&H1:5](=[S,N,O:1])>>[c,n&H0:2][n&H0,c:3][c,n&H0:4][c,n&H0:5]([S,N,O:1])"
|
196
|
+
),
|
197
|
+
]
|
198
|
+
|
199
|
+
|
200
|
+
class Normalizer(object):
|
201
|
+
"""A class for applying Normalization transforms.
|
202
|
+
This class is typically used to apply a series of Normalization transforms to correct functional groups and
|
203
|
+
recombine charges. Each transform is repeatedly applied until no further changes occur.
|
204
|
+
"""
|
205
|
+
|
206
|
+
def __init__(self, normalizations=NORMALIZATIONS):
|
207
|
+
"""Initialize a Normalizer with an optional custom list of :class:`~molvs.normalize.Normalization` transforms.
|
208
|
+
:param normalizations: A list of :class:`~molvs.normalize.Normalization` transforms to apply.
|
209
|
+
:param int max_restarts: The maximum number of times to attempt to apply the series of normalizations (default
|
210
|
+
200).
|
211
|
+
"""
|
212
|
+
log.debug("Initializing Normalizer")
|
213
|
+
self.normalizations = normalizations
|
214
|
+
|
215
|
+
def __call__(self, mol):
|
216
|
+
"""Calling a Normalizer instance like a function is the same as calling its normalize(mol) method."""
|
217
|
+
return self.normalize(mol)
|
218
|
+
|
219
|
+
def normalize(self, mol):
|
220
|
+
"""Apply a series of Normalization transforms to correct functional groups and recombine charges.
|
221
|
+
A series of transforms are applied to the molecule. For each Normalization, the transform is applied repeatedly
|
222
|
+
until no further changes occur. If any changes occurred, we go back and start from the first Normalization
|
223
|
+
again, in case the changes mean an earlier transform is now applicable. The molecule is returned once the entire
|
224
|
+
series of Normalizations cause no further changes or if max_restarts (default 200) is reached.
|
225
|
+
:param mol: The molecule to normalize.
|
226
|
+
:type mol: :rdkit:`Mol <Chem.rdchem.Mol-class.html>`
|
227
|
+
:return: The normalized fragment.
|
228
|
+
:rtype: :rdkit:`Mol <Chem.rdchem.Mol-class.html>`
|
229
|
+
"""
|
230
|
+
log.debug("Running Normalizer")
|
231
|
+
# Normalize each fragment separately to get around quirky RunReactants behaviour
|
232
|
+
fragments = []
|
233
|
+
for fragment in Chem.GetMolFrags(mol, asMols=True):
|
234
|
+
fragments.append(self._normalize_fragment(fragment))
|
235
|
+
# Join normalized fragments into a single molecule again
|
236
|
+
outmol = fragments.pop()
|
237
|
+
for fragment in fragments:
|
238
|
+
outmol = Chem.CombineMols(outmol, fragment)
|
239
|
+
Chem.SanitizeMol(outmol)
|
240
|
+
return outmol
|
241
|
+
|
242
|
+
def _normalize_fragment(self, mol):
|
243
|
+
for normalization in self.normalizations:
|
244
|
+
product = self._apply_transform(mol, normalization.transform)
|
245
|
+
if product:
|
246
|
+
mol = product
|
247
|
+
return mol
|
248
|
+
|
249
|
+
def _apply_transform(self, mol, rule):
|
250
|
+
"""Repeatedly apply normalization transform to molecule until no changes occur.
|
251
|
+
It is possible for multiple products to be produced when a rule is applied. The rule is applied repeatedly to
|
252
|
+
each of the products, until no further changes occur or after 20 attempts. If there are multiple unique products
|
253
|
+
after the final application, the first product (sorted alphabetically by SMILES) is chosen.
|
254
|
+
"""
|
255
|
+
mols = [mol]
|
256
|
+
for n in range(20):
|
257
|
+
products = {}
|
258
|
+
for mol in mols:
|
259
|
+
for product in [x[0] for x in rule.RunReactants((mol,))]:
|
260
|
+
if Chem.SanitizeMol(product, catchErrors=True) == 0:
|
261
|
+
products[
|
262
|
+
Chem.MolToSmiles(product, isomericSmiles=True)
|
263
|
+
] = product
|
264
|
+
if products:
|
265
|
+
mols = [products[s] for s in sorted(products)]
|
266
|
+
else:
|
267
|
+
# If n == 0, the rule was not applicable and we return None
|
268
|
+
return mols[0] if n > 0 else None
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# Extract ligand and pdb
|
2
|
+
|
3
|
+
import sys
|
4
|
+
from prody import *
|
5
|
+
from rdkit import Chem
|
6
|
+
from rdkit.Chem import AllChem
|
7
|
+
from io import StringIO
|
8
|
+
import pypdb
|
9
|
+
|
10
|
+
|
11
|
+
__all__ = [
|
12
|
+
'get_pdb_components',
|
13
|
+
'process_ligand',
|
14
|
+
'write_pdb',
|
15
|
+
'write_sdf'
|
16
|
+
]
|
17
|
+
|
18
|
+
def get_pdb_components(pdb_id):
|
19
|
+
"""
|
20
|
+
Split a protein-ligand pdb into protein and ligand components
|
21
|
+
:param pdb_id:
|
22
|
+
:return:
|
23
|
+
"""
|
24
|
+
pdb = parsePDB(pdb_id)
|
25
|
+
protein = pdb.select('protein')
|
26
|
+
ligand = pdb.select('not protein and not water')
|
27
|
+
return protein, ligand
|
28
|
+
|
29
|
+
|
30
|
+
def process_ligand(ligand, res_name):
|
31
|
+
"""
|
32
|
+
Add bond orders to a pdb ligand
|
33
|
+
1. Select the ligand component with name "res_name"
|
34
|
+
2. Get the corresponding SMILES from pypdb
|
35
|
+
3. Create a template molecule from the SMILES in step 2
|
36
|
+
4. Write the PDB file to a stream
|
37
|
+
5. Read the stream into an RDKit molecule
|
38
|
+
6. Assign the bond orders from the template from step 3
|
39
|
+
:param ligand: ligand as generated by prody
|
40
|
+
:param res_name: residue name of ligand to extract
|
41
|
+
:return: molecule with bond orders assigned
|
42
|
+
"""
|
43
|
+
output = StringIO()
|
44
|
+
sub_mol = ligand.select(f"resname {res_name}")
|
45
|
+
chem_desc = pypdb.describe_chemical(f"{res_name}")
|
46
|
+
sub_smiles = chem_desc["describeHet"]["ligandInfo"]["ligand"]["smiles"]
|
47
|
+
template = AllChem.MolFromSmiles(sub_smiles)
|
48
|
+
writePDBStream(output, sub_mol)
|
49
|
+
pdb_string = output.getvalue()
|
50
|
+
rd_mol = AllChem.MolFromPDBBlock(pdb_string)
|
51
|
+
new_mol = AllChem.AssignBondOrdersFromTemplate(template, rd_mol)
|
52
|
+
return new_mol
|
53
|
+
|
54
|
+
|
55
|
+
def write_pdb(protein, pdb_name):
|
56
|
+
"""
|
57
|
+
Write a prody protein to a pdb file
|
58
|
+
:param protein: protein object from prody
|
59
|
+
:param pdb_name: base name for the pdb file
|
60
|
+
:return: None
|
61
|
+
"""
|
62
|
+
output_pdb_name = f"{pdb_name}_protein.pdb"
|
63
|
+
writePDB(f"{output_pdb_name}", protein)
|
64
|
+
print(f"wrote {output_pdb_name}")
|
65
|
+
|
66
|
+
|
67
|
+
def write_sdf(new_mol, pdb_name, res_name):
|
68
|
+
"""
|
69
|
+
Write an RDKit molecule to an SD file
|
70
|
+
:param new_mol:
|
71
|
+
:param pdb_name:
|
72
|
+
:param res_name:
|
73
|
+
:return:
|
74
|
+
"""
|
75
|
+
outfile_name = f"{pdb_name}_{res_name}_ligand.sdf"
|
76
|
+
writer = Chem.SDWriter(f"{outfile_name}")
|
77
|
+
writer.write(new_mol)
|
78
|
+
print(f"wrote {outfile_name}")
|
79
|
+
|
80
|
+
|
81
|
+
def main(pdb_name):
|
82
|
+
"""
|
83
|
+
Read Ligand Expo data, split pdb into protein and ligands,
|
84
|
+
write protein pdb, write ligand sdf files
|
85
|
+
:param pdb_name: id from the pdb, doesn't need to have an extension
|
86
|
+
:return:
|
87
|
+
"""
|
88
|
+
protein, ligand = get_pdb_components(pdb_name)
|
89
|
+
write_pdb(protein, pdb_name)
|
90
|
+
|
91
|
+
res_name_list = list(set(ligand.getResnames()))
|
92
|
+
for res in res_name_list:
|
93
|
+
new_mol = process_ligand(ligand, res)
|
94
|
+
write_sdf(new_mol, pdb_name, res)
|
@@ -0,0 +1,25 @@
|
|
1
|
+
from rdkit.Chem.Scaffolds import rdScaffoldNetwork
|
2
|
+
from rdkit import RDLogger
|
3
|
+
RDLogger.DisableLog('rdApp.info')
|
4
|
+
|
5
|
+
|
6
|
+
def create_sn(
|
7
|
+
mols,
|
8
|
+
includeGenericScaffolds=False,
|
9
|
+
includeGenericBondScaffolds=False,
|
10
|
+
includeScaffoldsWithAttachments=True,
|
11
|
+
includeScaffoldsWithoutAttachments=False,
|
12
|
+
pruneBeforeFragmenting=True,
|
13
|
+
keepOnlyFirstFragment=True
|
14
|
+
):
|
15
|
+
RDLogger.DisableLog('rdApp.info')
|
16
|
+
scaffParams = rdScaffoldNetwork.ScaffoldNetworkParams()
|
17
|
+
scaffParams.collectMolCounts = True
|
18
|
+
scaffParams.includeGenericScaffolds = includeGenericScaffolds
|
19
|
+
scaffParams.includeScaffoldsWithoutAttachments = includeScaffoldsWithoutAttachments
|
20
|
+
scaffParams.keepOnlyFirstFragment = keepOnlyFirstFragment
|
21
|
+
scaffParams.includeGenericBondScaffolds = includeGenericBondScaffolds
|
22
|
+
scaffParams.includeScaffoldsWithAttachments = includeScaffoldsWithAttachments
|
23
|
+
scaffParams.pruneBeforeFragmenting = pruneBeforeFragmenting
|
24
|
+
net = rdScaffoldNetwork.CreateScaffoldNetwork(mols, scaffParams)
|
25
|
+
return net
|
@@ -0,0 +1,241 @@
|
|
1
|
+
# shape
|
2
|
+
|
3
|
+
import os
|
4
|
+
import subprocess
|
5
|
+
from copy import deepcopy
|
6
|
+
|
7
|
+
from rdkit import Chem
|
8
|
+
from rdkit.Chem import AllChem
|
9
|
+
from pyshapeit import AlignMol
|
10
|
+
import multiprocess as mp
|
11
|
+
|
12
|
+
from rdkit import RDLogger
|
13
|
+
|
14
|
+
from jupyfuncs.pbar import tqdm
|
15
|
+
from jupyfuncs.norm import Normalizer
|
16
|
+
|
17
|
+
# from rdkit.Chem.Draw import IPythonConsole
|
18
|
+
from rdkit.Chem import PyMol
|
19
|
+
from rdkit.Chem.Subshape import SubshapeBuilder, SubshapeObjects
|
20
|
+
from PIL import ImageFile
|
21
|
+
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
22
|
+
|
23
|
+
lg = RDLogger.logger()
|
24
|
+
lg.setLevel(4)
|
25
|
+
|
26
|
+
__all__ = [
|
27
|
+
'get_mols_from_smi',
|
28
|
+
'get_aligned_mol',
|
29
|
+
'get_aligned_sdf',
|
30
|
+
'show_alignment',
|
31
|
+
'pymol_running'
|
32
|
+
]
|
33
|
+
|
34
|
+
|
35
|
+
def get_mols_from_smi(probe_smifile):
|
36
|
+
mols = []
|
37
|
+
with open(probe_smifile) as f:
|
38
|
+
for line in f.readlines():
|
39
|
+
smi = line.strip()
|
40
|
+
mol = None
|
41
|
+
try:
|
42
|
+
mol = Chem.MolFromSmiles(smi)
|
43
|
+
except Exception as e:
|
44
|
+
print(e)
|
45
|
+
if mol:
|
46
|
+
mols.append(mol)
|
47
|
+
return mols
|
48
|
+
|
49
|
+
|
50
|
+
def get_aligned_mol(
|
51
|
+
ref_mol, probe_mol, num_confs, num_cpu
|
52
|
+
):
|
53
|
+
|
54
|
+
mol1 = ref_mol
|
55
|
+
mol1.SetProp('_Name', 'ref')
|
56
|
+
|
57
|
+
AllChem.EmbedMultipleConfs(
|
58
|
+
probe_mol,
|
59
|
+
numConfs=num_confs,
|
60
|
+
numThreads=num_cpu
|
61
|
+
)
|
62
|
+
|
63
|
+
score = 0
|
64
|
+
# conf_id = -1
|
65
|
+
|
66
|
+
aligned_mol = deepcopy(probe_mol)
|
67
|
+
|
68
|
+
for i in range(probe_mol.GetNumConformers()):
|
69
|
+
|
70
|
+
mol2 = Chem.MolFromMolBlock(
|
71
|
+
Chem.MolToMolBlock(probe_mol, confId=i)
|
72
|
+
)
|
73
|
+
mol2.SetProp('_Name', 'probe')
|
74
|
+
|
75
|
+
sim_score = AlignMol(mol1, mol2)
|
76
|
+
if sim_score > score:
|
77
|
+
score = sim_score
|
78
|
+
aligned_mol = deepcopy(mol2)
|
79
|
+
# pbar.update(1)
|
80
|
+
|
81
|
+
return aligned_mol
|
82
|
+
|
83
|
+
|
84
|
+
def get_aligned_mol_mp(
|
85
|
+
config
|
86
|
+
):
|
87
|
+
return get_aligned_mol(*config)
|
88
|
+
|
89
|
+
|
90
|
+
def gen_configs(ref_mol, mols, num_confs, num_cpu):
|
91
|
+
configs = []
|
92
|
+
for probe_mol in mols:
|
93
|
+
configs.append((ref_mol, probe_mol, num_confs, num_cpu))
|
94
|
+
return configs
|
95
|
+
|
96
|
+
|
97
|
+
def get_aligned_sdf(
|
98
|
+
ref_sdf: str,
|
99
|
+
probe_smifile: str,
|
100
|
+
num_confs=150,
|
101
|
+
num_cpu=5,
|
102
|
+
num_workers=10,
|
103
|
+
output_sdf=None,
|
104
|
+
print_info=True,
|
105
|
+
norm_mol=True
|
106
|
+
):
|
107
|
+
ref_sdf = os.path.abspath(ref_sdf)
|
108
|
+
ref_mol = Chem.SDMolSupplier(ref_sdf)[0]
|
109
|
+
if not output_sdf:
|
110
|
+
output_sdf = os.path.abspath(probe_smifile) + '.sdf'
|
111
|
+
else:
|
112
|
+
output_sdf = os.path.abspath(output_sdf)
|
113
|
+
|
114
|
+
mols = get_mols_from_smi(probe_smifile)
|
115
|
+
|
116
|
+
configs = gen_configs(
|
117
|
+
ref_mol, mols, num_confs=num_confs, num_cpu=num_cpu
|
118
|
+
)
|
119
|
+
|
120
|
+
pool = mp.Pool(num_workers)
|
121
|
+
aligned_mols = list(
|
122
|
+
tqdm(
|
123
|
+
pool.imap(get_aligned_mol_mp, configs),
|
124
|
+
total=len(mols),
|
125
|
+
desc='All mols'
|
126
|
+
)
|
127
|
+
)
|
128
|
+
if norm_mol:
|
129
|
+
normer = Normalizer()
|
130
|
+
sdwriter = Chem.SDWriter(output_sdf)
|
131
|
+
for mol in aligned_mols:
|
132
|
+
if norm_mol:
|
133
|
+
mol = normer(mol)
|
134
|
+
sdwriter.write(mol)
|
135
|
+
sdwriter.flush()
|
136
|
+
sdwriter.close()
|
137
|
+
return output_sdf
|
138
|
+
|
139
|
+
# out_aligned = output_sdf + 'ali.sdf'
|
140
|
+
# score_file = output_sdf + 'score.csv'
|
141
|
+
|
142
|
+
# command = f'shape-it -r {ref_sdf} -d {output_sdf} -o {out_aligned} -s {score_file}'
|
143
|
+
# out_info = subprocess.getoutput(command)
|
144
|
+
# if print_info:
|
145
|
+
# print(out_info)
|
146
|
+
|
147
|
+
# if norm_mol:
|
148
|
+
# fix_path = out_aligned + 'fix.sdf'
|
149
|
+
# mols = Chem.SDMolSupplier(out_aligned)
|
150
|
+
# sdwriter = Chem.SDWriter(fix_path)
|
151
|
+
# for mol in mols:
|
152
|
+
# mol = normer(mol)
|
153
|
+
# sdwriter.write(mol)
|
154
|
+
# sdwriter.flush()
|
155
|
+
# sdwriter.close()
|
156
|
+
# return fix_path
|
157
|
+
# else:
|
158
|
+
# return out_aligned
|
159
|
+
|
160
|
+
# shape-it -r ref_sdf -d output_sdf -o out_aligned -s score_file
|
161
|
+
|
162
|
+
|
163
|
+
def show_alignment(
|
164
|
+
ref_mol,
|
165
|
+
probe_mol,
|
166
|
+
gen_confs,
|
167
|
+
num_confs=200,
|
168
|
+
num_cpu=5,
|
169
|
+
):
|
170
|
+
# should install pymol-open-source
|
171
|
+
if not pymol_running():
|
172
|
+
subprocess.Popen(['pymol', '-cKRQ'])
|
173
|
+
|
174
|
+
if isinstance(ref_mol, Chem.Mol):
|
175
|
+
mol1 = ref_mol
|
176
|
+
elif isinstance(ref_mol, str) and ref_mol.endswith('.sdf'):
|
177
|
+
mol1 = Chem.SDMolSupplier(ref_mol)[0]
|
178
|
+
|
179
|
+
if isinstance(probe_mol, Chem.Mol):
|
180
|
+
mol2 = probe_mol
|
181
|
+
elif isinstance(probe_mol, str) and probe_mol.endswith('.sdf'):
|
182
|
+
mol2 = Chem.SDMolSupplier(probe_mol)[0]
|
183
|
+
else:
|
184
|
+
mol2 = Chem.MolFromSmiles(probe_mol)
|
185
|
+
|
186
|
+
if gen_confs:
|
187
|
+
AllChem.EmbedMultipleConfs(
|
188
|
+
mol2,
|
189
|
+
numConfs=num_confs,
|
190
|
+
numThreads=num_cpu
|
191
|
+
)
|
192
|
+
|
193
|
+
score = 0
|
194
|
+
for i in range(mol2.GetNumConformers()):
|
195
|
+
|
196
|
+
probe_mol = Chem.MolFromMolBlock(
|
197
|
+
Chem.MolToMolBlock(mol2, confId=i)
|
198
|
+
)
|
199
|
+
|
200
|
+
sim_score = AlignMol(mol1, probe_mol)
|
201
|
+
if sim_score > score:
|
202
|
+
score = sim_score
|
203
|
+
probe = deepcopy(probe_mol)
|
204
|
+
|
205
|
+
mol1.SetProp('_Name', 'ref')
|
206
|
+
probe.SetProp('_Name', 'probe')
|
207
|
+
|
208
|
+
AllChem.CanonicalizeConformer(mol1.GetConformer())
|
209
|
+
AllChem.CanonicalizeConformer(probe.GetConformer())
|
210
|
+
|
211
|
+
builder = SubshapeBuilder.SubshapeBuilder()
|
212
|
+
builder.gridDims = (20., 20., 10)
|
213
|
+
builder.gridSpacing = 0.5
|
214
|
+
builder.winRad = 4.
|
215
|
+
|
216
|
+
refShape = builder.GenerateSubshapeShape(mol1)
|
217
|
+
probeShape = builder.GenerateSubshapeShape(probe)
|
218
|
+
|
219
|
+
v = PyMol.MolViewer()
|
220
|
+
|
221
|
+
score = AlignMol(mol1, probe)
|
222
|
+
|
223
|
+
v.DeleteAll()
|
224
|
+
|
225
|
+
v.ShowMol(mol1, name='ref', showOnly=False)
|
226
|
+
SubshapeObjects.DisplaySubshape(v, refShape, 'ref_Shape')
|
227
|
+
v.server.do('set transparency=0.5')
|
228
|
+
|
229
|
+
v.ShowMol(probe, name='probe', showOnly=False)
|
230
|
+
SubshapeObjects.DisplaySubshape(v, probeShape, 'prob_Shape')
|
231
|
+
v.server.do('set transparency=0.5')
|
232
|
+
|
233
|
+
return v.GetPNG()
|
234
|
+
|
235
|
+
|
236
|
+
def pymol_running() -> bool:
|
237
|
+
out_info = subprocess.getoutput('ps aux | grep pymol')
|
238
|
+
if '-cKRQ' in out_info:
|
239
|
+
return True
|
240
|
+
else:
|
241
|
+
return False
|
File without changes
|
@@ -0,0 +1,42 @@
|
|
1
|
+
import psycopg
|
2
|
+
from psycopg import sql
|
3
|
+
|
4
|
+
|
5
|
+
def connect_by_infofile(info_file: str) -> psycopg.Connection:
|
6
|
+
"""Create a postgres connection
|
7
|
+
|
8
|
+
Args:
|
9
|
+
info_file (str):
|
10
|
+
the path of the connection info like
|
11
|
+
host=127.0.0.1 dbname=dbname port=5432 user=postgres password=lala
|
12
|
+
|
13
|
+
Returns:
|
14
|
+
psycopg.Connection:
|
15
|
+
the connection instance should be closed after committing.
|
16
|
+
"""
|
17
|
+
conn = psycopg.connect(
|
18
|
+
open(info_file).readline()
|
19
|
+
)
|
20
|
+
return conn
|
21
|
+
|
22
|
+
|
23
|
+
def get_item_by_idx(
|
24
|
+
idx: int,
|
25
|
+
info_file: str,
|
26
|
+
by: str = id,
|
27
|
+
table: str = 'reaction_id'
|
28
|
+
):
|
29
|
+
|
30
|
+
conn = connect_by_infofile(
|
31
|
+
info_file
|
32
|
+
)
|
33
|
+
|
34
|
+
query_name = str(idx)
|
35
|
+
query = sql.SQL(
|
36
|
+
"select reaction_id from {table} where {by} = %s"
|
37
|
+
).format(
|
38
|
+
table=sql.Identifier(table),
|
39
|
+
by=sql.Identifier(by)
|
40
|
+
)
|
41
|
+
cur = conn.execute(query, [query_name]).fetchone()
|
42
|
+
return cur[0]
|