hjxdl 0.1.13__py3-none-any.whl → 0.1.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hdl/_version.py +2 -2
- hdl/datasets/city_code.json +2576 -0
- hdl/datasets/defined_BaseFeatures.fdef +236 -0
- hdl/datasets/las.tsv +0 -0
- hdl/datasets/route_template.json +113 -0
- hdl/datasets/vocab.txt +591 -0
- hdl/ju/__init__.py +0 -0
- hdl/ju/setup.py +55 -0
- hdl/jupyfuncs/__init__.py +0 -0
- hdl/jupyfuncs/chem/__init__.py +0 -0
- hdl/jupyfuncs/chem/mol.py +548 -0
- hdl/jupyfuncs/chem/norm.py +268 -0
- hdl/jupyfuncs/chem/pdb_ext.py +94 -0
- hdl/jupyfuncs/chem/scaffold.py +25 -0
- hdl/jupyfuncs/chem/shape.py +241 -0
- hdl/jupyfuncs/chem/tokenizers.py +2 -0
- hdl/jupyfuncs/dbtools/__init__.py +0 -0
- hdl/jupyfuncs/dbtools/pg.py +42 -0
- hdl/jupyfuncs/dbtools/query_info.py +150 -0
- hdl/jupyfuncs/dl/__init__.py +0 -0
- hdl/jupyfuncs/dl/cp.py +54 -0
- hdl/jupyfuncs/dl/dataframe.py +38 -0
- hdl/jupyfuncs/dl/fp.py +49 -0
- hdl/jupyfuncs/dl/list.py +20 -0
- hdl/jupyfuncs/dl/model_utils.py +97 -0
- hdl/jupyfuncs/dl/tensor.py +159 -0
- hdl/jupyfuncs/dl/uncs.py +112 -0
- hdl/jupyfuncs/llm/__init__.py +0 -0
- hdl/jupyfuncs/llm/extract.py +123 -0
- hdl/jupyfuncs/llm/openapi.py +94 -0
- hdl/jupyfuncs/network/__init__.py +0 -0
- hdl/jupyfuncs/network/proxy.py +20 -0
- hdl/jupyfuncs/path/__init__.py +0 -0
- hdl/jupyfuncs/path/glob.py +285 -0
- hdl/jupyfuncs/path/strings.py +65 -0
- hdl/jupyfuncs/show/__init__.py +0 -0
- hdl/jupyfuncs/show/pbar.py +50 -0
- hdl/jupyfuncs/show/plot.py +259 -0
- hdl/jupyfuncs/utils/__init__.py +0 -0
- hdl/jupyfuncs/utils/wrappers.py +8 -0
- hdl/utils/weather/__init__.py +0 -0
- hdl/utils/weather/weather.py +68 -0
- {hjxdl-0.1.13.dist-info → hjxdl-0.1.15.dist-info}/METADATA +1 -1
- {hjxdl-0.1.13.dist-info → hjxdl-0.1.15.dist-info}/RECORD +46 -5
- {hjxdl-0.1.13.dist-info → hjxdl-0.1.15.dist-info}/WHEEL +1 -1
- {hjxdl-0.1.13.dist-info → hjxdl-0.1.15.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,548 @@
|
|
1
|
+
# Jupyter funcs
|
2
|
+
import os
|
3
|
+
import re
|
4
|
+
import itertools
|
5
|
+
from copy import deepcopy
|
6
|
+
# from collections import defaultdict
|
7
|
+
|
8
|
+
from rdkit import Chem
|
9
|
+
from rdkit.Chem.Draw import IPythonConsole
|
10
|
+
from rdkit.Chem.Draw.IPythonConsole import addMolToView
|
11
|
+
# from rdkit.Chem import rdDepictor
|
12
|
+
from rdkit.Chem.Draw import rdMolDraw2D
|
13
|
+
from IPython.display import SVG
|
14
|
+
from rdkit.Chem import AllChem
|
15
|
+
from ipywidgets import (
|
16
|
+
interact,
|
17
|
+
# interactive,
|
18
|
+
fixed,
|
19
|
+
)
|
20
|
+
from rdkit.Chem.rdRGroupDecomposition import (
|
21
|
+
RGroupDecomposition,
|
22
|
+
# RGroupDecompositionParameters,
|
23
|
+
# RGroupMatching,
|
24
|
+
# RGroupScore,
|
25
|
+
# RGroupLabels,
|
26
|
+
# RGroupCoreAlignment,
|
27
|
+
RGroupLabelling
|
28
|
+
)
|
29
|
+
import pandas as pd
|
30
|
+
from rdkit.Chem import PandasTools
|
31
|
+
from rdkit.Chem import Draw
|
32
|
+
from IPython.display import HTML
|
33
|
+
# from rdkit import rdBase
|
34
|
+
from IPython.display import display
|
35
|
+
|
36
|
+
from rdkit import Chem
|
37
|
+
from rdkit.Chem import rdmolops
|
38
|
+
from rdkit.Chem import Draw
|
39
|
+
from rdkit.Chem.Draw import IPythonConsole
|
40
|
+
from rdkit.Chem import rdRGroupDecomposition
|
41
|
+
from rdkit.Chem import rdqueries
|
42
|
+
from rdkit.Chem import rdDepictor
|
43
|
+
from rdkit.Chem.Draw import rdMolDraw2D
|
44
|
+
from rdkit.Chem.MolStandardize import rdMolStandardize
|
45
|
+
from rdkit import Geometry
|
46
|
+
rdDepictor.SetPreferCoordGen(True)
|
47
|
+
import pandas as pd
|
48
|
+
from PIL import Image as pilImage
|
49
|
+
from io import BytesIO
|
50
|
+
from IPython.display import SVG, Image
|
51
|
+
from ipywidgets import interact
|
52
|
+
import molvs as mv
|
53
|
+
|
54
|
+
|
55
|
+
IPythonConsole.ipython_useSVG = True
|
56
|
+
IPythonConsole.molSize = (450, 350)
|
57
|
+
params = Chem.SubstructMatchParameters()
|
58
|
+
params.aromaticMatchesConjugated = True
|
59
|
+
|
60
|
+
__all__ = [
|
61
|
+
'draw_mol',
|
62
|
+
'draw_confs',
|
63
|
+
'show_decomp',
|
64
|
+
'get_ids_folds',
|
65
|
+
'show_pharmacophore',
|
66
|
+
'mol_without_indices',
|
67
|
+
'norm_colors',
|
68
|
+
'drawmol_with_hi',
|
69
|
+
'draw_mols_surfs',
|
70
|
+
]
|
71
|
+
|
72
|
+
|
73
|
+
COLORS = {
|
74
|
+
# "Tol" colormap from https://davidmathlogic.com/colorblind
|
75
|
+
'tol': [
|
76
|
+
(51, 34, 136),
|
77
|
+
(17, 119, 51),
|
78
|
+
(68, 170, 153),
|
79
|
+
(136, 204, 238),
|
80
|
+
(221, 204, 119),
|
81
|
+
(204, 102, 119),
|
82
|
+
(170, 68, 153),
|
83
|
+
(136, 34, 85)
|
84
|
+
],
|
85
|
+
# "IBM" colormap from https://davidmathlogic.com/colorblind
|
86
|
+
'ibm': [
|
87
|
+
(100, 143, 255),
|
88
|
+
(120, 94, 240),
|
89
|
+
(220, 38, 127),
|
90
|
+
(254, 97, 0),
|
91
|
+
(255, 176, 0)
|
92
|
+
],
|
93
|
+
# Okabe_Ito colormap from https://jfly.uni-koeln.de/color/
|
94
|
+
'okabe': [
|
95
|
+
(230, 159, 0),
|
96
|
+
(86, 180, 233),
|
97
|
+
(0, 158, 115),
|
98
|
+
(240, 228, 66),
|
99
|
+
(0, 114, 178),
|
100
|
+
(213, 94, 0),
|
101
|
+
(204, 121, 167)
|
102
|
+
]
|
103
|
+
}
|
104
|
+
|
105
|
+
|
106
|
+
# def get_his_for_onemol(mol_sm, pat_sm):
|
107
|
+
# atom_ids = []
|
108
|
+
# bond_ids = []
|
109
|
+
# m = Chem.MolFromSmiles(mol_sm)
|
110
|
+
# pt = Chem.MolFromSmiles(pat_sm)
|
111
|
+
# hi_id = m.GetSubstructMatches(pt, params=params)
|
112
|
+
# if len(m.GetSubstructMatches(pt, params=params)) == 0:
|
113
|
+
# Chem.Kekulize(m)
|
114
|
+
# hi_id = m.GetSubstructMatches(pt, params=params)
|
115
|
+
# if len(hi_id) == 0:
|
116
|
+
# return
|
117
|
+
# atom_ids.append(itertools.chain.from_iterable(hi_id))
|
118
|
+
|
119
|
+
|
120
|
+
# def get_match_his(mol_sms, pat_sms):
|
121
|
+
# highlightatoms = defaultdict(list)
|
122
|
+
# highlightbonds = defaultdict(list)
|
123
|
+
# for i in range(len(df)):
|
124
|
+
# try:
|
125
|
+
# mm = df.iloc[i, 0][2:-2]
|
126
|
+
# pm = df.iloc[i, 2]
|
127
|
+
# m = Chem.MolFromSmiles(mm)
|
128
|
+
# pt = Chem.MolFromSmiles(pm)
|
129
|
+
# hi_id = m.GetSubstructMatches(pt, params=params)
|
130
|
+
# if len(m.GetSubstructMatches(pt, params=params)) == 0:
|
131
|
+
# Chem.Kekulize(m)
|
132
|
+
# hi_id = m.GetSubstructMatches(pt, params=params)
|
133
|
+
# mols.append(m)
|
134
|
+
# hi_ids.append(hi_id)
|
135
|
+
# except:
|
136
|
+
# pass
|
137
|
+
# pass
|
138
|
+
|
139
|
+
|
140
|
+
def norm_colors(colors=COLORS):
|
141
|
+
colors = deepcopy(COLORS)
|
142
|
+
for k, v in colors.items():
|
143
|
+
for i, color in enumerate(v):
|
144
|
+
colors[k][i] = tuple(y / 255 for y in color)
|
145
|
+
return colors
|
146
|
+
|
147
|
+
|
148
|
+
def drawmol_with_hi(
|
149
|
+
mol,
|
150
|
+
legend,
|
151
|
+
atom_hi_dict,
|
152
|
+
bond_hi_dict,
|
153
|
+
atomrads_dict,
|
154
|
+
widthmults_dict,
|
155
|
+
width=350,
|
156
|
+
height=200,
|
157
|
+
):
|
158
|
+
d2d = rdMolDraw2D.MolDraw2DCairo(width, height)
|
159
|
+
d2d.ClearDrawing()
|
160
|
+
d2d.DrawMoleculeWithHighlights(
|
161
|
+
mol, legend,
|
162
|
+
atom_hi_dict,
|
163
|
+
bond_hi_dict,
|
164
|
+
atomrads_dict,
|
165
|
+
widthmults_dict
|
166
|
+
)
|
167
|
+
d2d.FinishDrawing()
|
168
|
+
png = d2d.GetDrawingText()
|
169
|
+
return png
|
170
|
+
|
171
|
+
|
172
|
+
def show_atom_number(mol, label='atomNote'):
|
173
|
+
new_mol = deepcopy(mol)
|
174
|
+
for atom in new_mol.GetAtoms():
|
175
|
+
atom.SetProp(label, str(atom.GetIdx()))
|
176
|
+
return new_mol
|
177
|
+
|
178
|
+
|
179
|
+
def moltosvg(mol, molSize=(500, 500), kekulize=True):
|
180
|
+
mc = mol
|
181
|
+
drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0], molSize[1])
|
182
|
+
drawer.DrawMolecule(mc)
|
183
|
+
drawer.FinishDrawing()
|
184
|
+
svg = drawer.GetDrawingText()
|
185
|
+
return svg.replace('svg:', '')
|
186
|
+
|
187
|
+
|
188
|
+
def draw_mol(mol):
|
189
|
+
return SVG(moltosvg(show_atom_number(mol)))
|
190
|
+
|
191
|
+
|
192
|
+
def drawit(m, p, confId=-1):
|
193
|
+
mb = Chem.MolToMolBlock(m, confId=confId)
|
194
|
+
p.removeAllModels()
|
195
|
+
p.addModel(mb, 'sdf')
|
196
|
+
p.setStyle({'stick': {}})
|
197
|
+
p.setBackgroundColor('0xeeeeee')
|
198
|
+
p.zoomTo()
|
199
|
+
return p.show()
|
200
|
+
|
201
|
+
|
202
|
+
def draw_confs(m):
|
203
|
+
import py3Dmol
|
204
|
+
p = py3Dmol.view(width=500, height=500)
|
205
|
+
return interact(drawit,
|
206
|
+
m=fixed(m),
|
207
|
+
p=fixed(p),
|
208
|
+
confId=(0, m.GetNumConformers() - 1))
|
209
|
+
|
210
|
+
|
211
|
+
def do_decomp(mols, cores, options):
|
212
|
+
options.rgroupLabelling = RGroupLabelling.AtomMap
|
213
|
+
decomp = RGroupDecomposition(cores, options)
|
214
|
+
for mol in mols:
|
215
|
+
decomp.Add(mol)
|
216
|
+
decomp.Process()
|
217
|
+
return decomp
|
218
|
+
|
219
|
+
|
220
|
+
def show_decomp(mols, cores, options, item=False):
|
221
|
+
decomp = do_decomp(mols, cores, options)
|
222
|
+
if item:
|
223
|
+
rows = decomp.GetRGroupsAsRows()
|
224
|
+
items = [
|
225
|
+
'{}:{}'.format(
|
226
|
+
group, Chem.MolToSmiles(row[group])
|
227
|
+
)
|
228
|
+
for row in rows for group in row
|
229
|
+
]
|
230
|
+
return ' '.join(items)
|
231
|
+
else:
|
232
|
+
cols = decomp.GetRGroupsAsColumns()
|
233
|
+
cols['mol'] = mols
|
234
|
+
cols['input core'] = cores[0]
|
235
|
+
df = pd.DataFrame(cols)
|
236
|
+
PandasTools.ChangeMoleculeRendering(df)
|
237
|
+
return HTML(df.to_html())
|
238
|
+
|
239
|
+
|
240
|
+
def get_ids_folds(id_list, num_folds, need_shuffle=False):
|
241
|
+
if need_shuffle:
|
242
|
+
from random import shuffle
|
243
|
+
shuffle(id_list)
|
244
|
+
num_ids = len(id_list)
|
245
|
+
assert num_ids >= num_folds
|
246
|
+
|
247
|
+
num_each_fold = int(num_ids / num_folds)
|
248
|
+
|
249
|
+
blocks = []
|
250
|
+
|
251
|
+
for i in range(num_folds):
|
252
|
+
start = num_each_fold * i
|
253
|
+
end = start + num_each_fold
|
254
|
+
if end > num_ids - 1:
|
255
|
+
end = num_ids - 1
|
256
|
+
|
257
|
+
blocks.append(id_list[start: end])
|
258
|
+
|
259
|
+
id_blocks = []
|
260
|
+
for i in range(num_folds):
|
261
|
+
id_blocks.append(
|
262
|
+
(list(itertools.chain.from_iterable([blocks[j] for j in range(num_folds) if j != i])),
|
263
|
+
blocks[i])
|
264
|
+
)
|
265
|
+
|
266
|
+
return id_blocks
|
267
|
+
|
268
|
+
|
269
|
+
keep = ["Donor", "Acceptor", "Aromatic", "Hydrophobe", "LumpedHydrophobe"]
|
270
|
+
|
271
|
+
|
272
|
+
def show_pharmacophore(
|
273
|
+
sdf_path,
|
274
|
+
keep=keep,
|
275
|
+
fdf_dir=os.path.join(
|
276
|
+
os.path.dirname(__file__),
|
277
|
+
"..",
|
278
|
+
"datasets",
|
279
|
+
'defined_BaseFeatures.fdef'
|
280
|
+
)
|
281
|
+
):
|
282
|
+
template_mol = [m for m in Chem.SDMolSupplier(sdf_path)][0]
|
283
|
+
fdef = AllChem.BuildFeatureFactory(
|
284
|
+
fdf_dir
|
285
|
+
)
|
286
|
+
prob_feats = fdef.GetFeaturesForMol(template_mol)
|
287
|
+
prob_feats = [f for f in prob_feats if f.GetFamily() in keep]
|
288
|
+
# prob_points = [list(x.GetPos()) for x in prob_feats]
|
289
|
+
|
290
|
+
for i, feat in enumerate(prob_feats):
|
291
|
+
atomids = feat.GetAtomIds()
|
292
|
+
print(
|
293
|
+
"pharamcophore index:{0}; feature:{1}; type:{2}; atom id:{3}".format(
|
294
|
+
i,
|
295
|
+
feat.GetFamily(),
|
296
|
+
feat.GetType(),
|
297
|
+
atomids
|
298
|
+
)
|
299
|
+
)
|
300
|
+
display(
|
301
|
+
Draw.MolToImage(
|
302
|
+
template_mol,
|
303
|
+
highlightAtoms=list(atomids),
|
304
|
+
highlightColor=[0, 1, 0],
|
305
|
+
useSVG=True
|
306
|
+
)
|
307
|
+
)
|
308
|
+
|
309
|
+
|
310
|
+
def mol_without_indices(
|
311
|
+
mol_input: Chem.Mol,
|
312
|
+
remove_indices=[],
|
313
|
+
keep_properties=[]
|
314
|
+
):
|
315
|
+
|
316
|
+
atom_list, bond_list, idx_map = [], [], {} # idx_map: {old: new}
|
317
|
+
for atom in mol_input.GetAtoms():
|
318
|
+
|
319
|
+
props = {}
|
320
|
+
for property_name in keep_properties:
|
321
|
+
if property_name in atom.GetPropsAsDict():
|
322
|
+
props[property_name] = atom.GetPropsAsDict()[property_name]
|
323
|
+
symbol = atom.GetSymbol()
|
324
|
+
|
325
|
+
if symbol.startswith('*'):
|
326
|
+
atom_symbol = '*'
|
327
|
+
props['molAtomMapNumber'] = atom.GetAtomMapNum()
|
328
|
+
elif symbol.startswith('R'):
|
329
|
+
atom_symbol = '*'
|
330
|
+
if len(symbol) > 1:
|
331
|
+
atom_map_num = int(symbol[1:])
|
332
|
+
else:
|
333
|
+
atom_map_num = atom.GetAtomMapNum()
|
334
|
+
props['dummyLabel'] = 'R' + str(atom_map_num)
|
335
|
+
props['_MolFileRLabel'] = str(atom_map_num)
|
336
|
+
props['molAtomMapNumber'] = atom_map_num
|
337
|
+
|
338
|
+
else:
|
339
|
+
atom_symbol = symbol
|
340
|
+
atom_list.append(
|
341
|
+
(
|
342
|
+
atom_symbol,
|
343
|
+
atom.GetFormalCharge(),
|
344
|
+
atom.GetNumExplicitHs(),
|
345
|
+
props
|
346
|
+
)
|
347
|
+
)
|
348
|
+
for bond in mol_input.GetBonds():
|
349
|
+
bond_list.append(
|
350
|
+
(
|
351
|
+
bond.GetBeginAtomIdx(),
|
352
|
+
bond.GetEndAtomIdx(),
|
353
|
+
bond.GetBondType()
|
354
|
+
)
|
355
|
+
)
|
356
|
+
mol = Chem.RWMol(Chem.Mol())
|
357
|
+
|
358
|
+
new_idx = 0
|
359
|
+
for atom_index, atom_info in enumerate(atom_list):
|
360
|
+
if atom_index not in remove_indices:
|
361
|
+
atom = Chem.Atom(atom_info[0])
|
362
|
+
atom.SetFormalCharge(atom_info[1])
|
363
|
+
atom.SetNumExplicitHs(atom_info[2])
|
364
|
+
|
365
|
+
for property_name in atom_info[3]:
|
366
|
+
if isinstance(atom_info[3][property_name], str):
|
367
|
+
atom.SetProp(property_name, atom_info[3][property_name])
|
368
|
+
elif isinstance(atom_info[3][property_name], int):
|
369
|
+
atom.SetIntProp(property_name, atom_info[3][property_name])
|
370
|
+
mol.AddAtom(atom)
|
371
|
+
idx_map[atom_index] = new_idx
|
372
|
+
new_idx += 1
|
373
|
+
for bond_info in bond_list:
|
374
|
+
if (
|
375
|
+
bond_info[0] not in remove_indices
|
376
|
+
and bond_info[1] not in remove_indices
|
377
|
+
):
|
378
|
+
mol.AddBond(
|
379
|
+
idx_map[bond_info[0]],
|
380
|
+
idx_map[bond_info[1]],
|
381
|
+
bond_info[2]
|
382
|
+
)
|
383
|
+
else:
|
384
|
+
one_in = False
|
385
|
+
if (
|
386
|
+
(bond_info[0] in remove_indices)
|
387
|
+
and (bond_info[1] not in remove_indices)
|
388
|
+
):
|
389
|
+
keep_index = bond_info[1]
|
390
|
+
# remove_index = bond_info[0]
|
391
|
+
one_in = True
|
392
|
+
elif (
|
393
|
+
(bond_info[1] in remove_indices)
|
394
|
+
and (bond_info[0] not in remove_indices)
|
395
|
+
):
|
396
|
+
keep_index = bond_info[0]
|
397
|
+
# remove_index = bond_info[1]
|
398
|
+
one_in = True
|
399
|
+
if one_in:
|
400
|
+
if atom_list[keep_index][0] == 'N':
|
401
|
+
old_num_explicit_Hs = mol.GetAtomWithIdx(
|
402
|
+
idx_map[keep_index]
|
403
|
+
).GetNumExplicitHs()
|
404
|
+
|
405
|
+
mol.GetAtomWithIdx(idx_map[keep_index]).SetNumExplicitHs(
|
406
|
+
old_num_explicit_Hs + 1
|
407
|
+
)
|
408
|
+
mol = Chem.Mol(mol)
|
409
|
+
return mol
|
410
|
+
|
411
|
+
|
412
|
+
def draw_mols_surfs(
|
413
|
+
mols,
|
414
|
+
width=400,
|
415
|
+
height=400,
|
416
|
+
surface=True,
|
417
|
+
surface_opacity=0.5
|
418
|
+
):
|
419
|
+
import py3Dmol
|
420
|
+
|
421
|
+
view = py3Dmol.view(width=width, height=height)
|
422
|
+
view.setBackgroundColor('0xeeeeee')
|
423
|
+
view.removeAllModels()
|
424
|
+
for mol in mols:
|
425
|
+
addMolToView(mol, view)
|
426
|
+
if surface:
|
427
|
+
view.addSurface(
|
428
|
+
py3Dmol.SAS,
|
429
|
+
{'opacity': surface_opacity}
|
430
|
+
)
|
431
|
+
view.zoomTo()
|
432
|
+
return view.show()
|
433
|
+
|
434
|
+
|
435
|
+
def draw_rxn(
|
436
|
+
rxn_smiles,
|
437
|
+
use_smiles: bool = True,
|
438
|
+
):
|
439
|
+
rxn = AllChem.ReactionFromSmarts(rxn_smiles, useSmiles=use_smiles)
|
440
|
+
d2d = Draw.MolDraw2DCairo(2000, 500)
|
441
|
+
d2d.DrawReaction(rxn, highlightByReactant=True)
|
442
|
+
png = d2d.GetDrawingText()
|
443
|
+
display(Image(png))
|
444
|
+
|
445
|
+
|
446
|
+
def react(rxn_smarts, reagents):
|
447
|
+
try:
|
448
|
+
rxn = AllChem.ReactionFromSmarts(rxn_smarts)
|
449
|
+
# n_reactants = rxn.GetNumReactantTemplates()
|
450
|
+
products = rxn.RunReactants([
|
451
|
+
Chem.MolFromSmiles(smi) for smi in reagents
|
452
|
+
])
|
453
|
+
return products
|
454
|
+
except Exception as e:
|
455
|
+
print(e)
|
456
|
+
return []
|
457
|
+
|
458
|
+
|
459
|
+
def match_pattern(mol, patt):
|
460
|
+
if mol:
|
461
|
+
return mol.HasSubstructMatch(patt)
|
462
|
+
else:
|
463
|
+
return False
|
464
|
+
|
465
|
+
|
466
|
+
def split_rxn_smiles(smi):
|
467
|
+
try:
|
468
|
+
reagents1, reagents2, products = smi.split('>')
|
469
|
+
if len(reagents2) > 0:
|
470
|
+
reagents = '.'.join([reagents1, reagents2])
|
471
|
+
else:
|
472
|
+
reagents = reagents1
|
473
|
+
return reagents, products
|
474
|
+
except Exception as e:
|
475
|
+
print(e)
|
476
|
+
return '', ''
|
477
|
+
|
478
|
+
|
479
|
+
def find_mprod(rxn_smi):
|
480
|
+
# ref: https://github.com/LiamWilbraham/uspto-analysis/blob/master/reaction-stats-uspto.ipynb
|
481
|
+
rxn_smarts = '[C:1](=[O:2])-[OD1].[N!H0:3]>>[C:1](=[O:2])[N:3]'
|
482
|
+
patt_acid = Chem.MolFromSmarts('[CX3](=O)[OX2H1]')
|
483
|
+
patt_amine = Chem.MolFromSmarts('[N;H3,H2,H1]') # ammonia or primary/secondary amine
|
484
|
+
|
485
|
+
products = split_rxn_smiles(rxn_smi)[1].split('.')
|
486
|
+
|
487
|
+
reactants = [r for r in split_rxn_smiles(rxn_smi)[0].split('.')]
|
488
|
+
cooh = [
|
489
|
+
r
|
490
|
+
for r in reactants
|
491
|
+
if match_pattern(Chem.MolFromSmiles(r), patt_acid)
|
492
|
+
]
|
493
|
+
|
494
|
+
cooh = [re.sub('@', '', i) for i in cooh]
|
495
|
+
|
496
|
+
amine = [
|
497
|
+
r for r in reactants
|
498
|
+
if match_pattern(Chem.MolFromSmiles(r), patt_amine)
|
499
|
+
]
|
500
|
+
amine = [re.sub('@', '', i) for i in amine]
|
501
|
+
|
502
|
+
for perm in itertools.product(cooh, amine):
|
503
|
+
|
504
|
+
cooh_i = perm[0]
|
505
|
+
amine_i = perm[1]
|
506
|
+
|
507
|
+
smarts_products = react(rxn_smarts, perm)
|
508
|
+
|
509
|
+
for p_1 in smarts_products:
|
510
|
+
for p_2 in products:
|
511
|
+
p_2 = re.sub('@', '', p_2)
|
512
|
+
patt = Chem.MolFromSmiles(p_2)
|
513
|
+
if Chem.MolToInchiKey(p_1[0]) == Chem.MolToInchiKey(patt):
|
514
|
+
return cooh_i, amine_i, p_2
|
515
|
+
return None
|
516
|
+
|
517
|
+
|
518
|
+
def get_largest_mol(smiles, to_smiles=False):
|
519
|
+
mol = Chem.MolFromSmiles(smiles)
|
520
|
+
if mol is None:
|
521
|
+
return
|
522
|
+
mol_frags = rdmolops.GetMolFrags(mol, asMols=True)
|
523
|
+
largest_mol = max(mol_frags, default=mol, key=lambda m: m.GetNumAtoms())
|
524
|
+
if to_smiles:
|
525
|
+
return mv.standardize_smiles(Chem.MolToSmiles(largest_mol))
|
526
|
+
return largest_mol
|
527
|
+
|
528
|
+
|
529
|
+
def standardize_tautomer(mol, max_tautomers=1000):
|
530
|
+
params = rdMolStandardize.CleanupParameters()
|
531
|
+
params.maxTautomers = max_tautomers
|
532
|
+
enumerator = rdMolStandardize.TautomerEnumerator(params)
|
533
|
+
cm = enumerator.Canonicalize(mol)
|
534
|
+
return cm
|
535
|
+
|
536
|
+
|
537
|
+
def reorder_tautomers(m):
|
538
|
+
enumerator = rdMolStandardize.TautomerEnumerator()
|
539
|
+
canon = enumerator.Canonicalize(m)
|
540
|
+
csmi = Chem.MolToSmiles(canon)
|
541
|
+
res = [canon]
|
542
|
+
tauts = enumerator.Enumerate(m)
|
543
|
+
smis = [Chem.MolToSmiles(x) for x in tauts]
|
544
|
+
stpl = sorted(
|
545
|
+
(x, y) for x, y in zip(smis, tauts) if x!=csmi
|
546
|
+
)
|
547
|
+
res += [y for _, y in stpl]
|
548
|
+
return res
|