hjxdl 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. hdl/_version.py +2 -2
  2. hdl/datasets/city_code.json +2576 -0
  3. hdl/datasets/defined_BaseFeatures.fdef +236 -0
  4. hdl/datasets/las.tsv +0 -0
  5. hdl/datasets/route_template.json +113 -0
  6. hdl/datasets/vocab.txt +591 -0
  7. hdl/ju/__init__.py +0 -0
  8. hdl/ju/setup.py +55 -0
  9. hdl/jupyfuncs/__init__.py +0 -0
  10. hdl/jupyfuncs/chem/__init__.py +0 -0
  11. hdl/jupyfuncs/chem/mol.py +548 -0
  12. hdl/jupyfuncs/chem/norm.py +268 -0
  13. hdl/jupyfuncs/chem/pdb_ext.py +94 -0
  14. hdl/jupyfuncs/chem/scaffold.py +25 -0
  15. hdl/jupyfuncs/chem/shape.py +241 -0
  16. hdl/jupyfuncs/chem/tokenizers.py +2 -0
  17. hdl/jupyfuncs/dbtools/__init__.py +0 -0
  18. hdl/jupyfuncs/dbtools/pg.py +42 -0
  19. hdl/jupyfuncs/dbtools/query_info.py +150 -0
  20. hdl/jupyfuncs/dl/__init__.py +0 -0
  21. hdl/jupyfuncs/dl/cp.py +54 -0
  22. hdl/jupyfuncs/dl/dataframe.py +38 -0
  23. hdl/jupyfuncs/dl/fp.py +49 -0
  24. hdl/jupyfuncs/dl/list.py +20 -0
  25. hdl/jupyfuncs/dl/model_utils.py +97 -0
  26. hdl/jupyfuncs/dl/tensor.py +159 -0
  27. hdl/jupyfuncs/dl/uncs.py +112 -0
  28. hdl/jupyfuncs/llm/__init__.py +0 -0
  29. hdl/jupyfuncs/llm/extract.py +123 -0
  30. hdl/jupyfuncs/llm/openapi.py +94 -0
  31. hdl/jupyfuncs/network/__init__.py +0 -0
  32. hdl/jupyfuncs/network/proxy.py +20 -0
  33. hdl/jupyfuncs/path/__init__.py +0 -0
  34. hdl/jupyfuncs/path/glob.py +285 -0
  35. hdl/jupyfuncs/path/strings.py +65 -0
  36. hdl/jupyfuncs/show/__init__.py +0 -0
  37. hdl/jupyfuncs/show/pbar.py +50 -0
  38. hdl/jupyfuncs/show/plot.py +259 -0
  39. hdl/jupyfuncs/utils/__init__.py +0 -0
  40. hdl/jupyfuncs/utils/wrappers.py +8 -0
  41. hdl/utils/llm/chat.py +4 -0
  42. {hjxdl-0.1.12.dist-info → hjxdl-0.1.14.dist-info}/METADATA +1 -1
  43. {hjxdl-0.1.12.dist-info → hjxdl-0.1.14.dist-info}/RECORD +45 -6
  44. {hjxdl-0.1.12.dist-info → hjxdl-0.1.14.dist-info}/WHEEL +1 -1
  45. {hjxdl-0.1.12.dist-info → hjxdl-0.1.14.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,548 @@
1
+ # Jupyter funcs
2
+ import os
3
+ import re
4
+ import itertools
5
+ from copy import deepcopy
6
+ # from collections import defaultdict
7
+
8
+ from rdkit import Chem
9
+ from rdkit.Chem.Draw import IPythonConsole
10
+ from rdkit.Chem.Draw.IPythonConsole import addMolToView
11
+ # from rdkit.Chem import rdDepictor
12
+ from rdkit.Chem.Draw import rdMolDraw2D
13
+ from IPython.display import SVG
14
+ from rdkit.Chem import AllChem
15
+ from ipywidgets import (
16
+ interact,
17
+ # interactive,
18
+ fixed,
19
+ )
20
+ from rdkit.Chem.rdRGroupDecomposition import (
21
+ RGroupDecomposition,
22
+ # RGroupDecompositionParameters,
23
+ # RGroupMatching,
24
+ # RGroupScore,
25
+ # RGroupLabels,
26
+ # RGroupCoreAlignment,
27
+ RGroupLabelling
28
+ )
29
+ import pandas as pd
30
+ from rdkit.Chem import PandasTools
31
+ from rdkit.Chem import Draw
32
+ from IPython.display import HTML
33
+ # from rdkit import rdBase
34
+ from IPython.display import display
35
+
36
+ from rdkit import Chem
37
+ from rdkit.Chem import rdmolops
38
+ from rdkit.Chem import Draw
39
+ from rdkit.Chem.Draw import IPythonConsole
40
+ from rdkit.Chem import rdRGroupDecomposition
41
+ from rdkit.Chem import rdqueries
42
+ from rdkit.Chem import rdDepictor
43
+ from rdkit.Chem.Draw import rdMolDraw2D
44
+ from rdkit.Chem.MolStandardize import rdMolStandardize
45
+ from rdkit import Geometry
46
+ rdDepictor.SetPreferCoordGen(True)
47
+ import pandas as pd
48
+ from PIL import Image as pilImage
49
+ from io import BytesIO
50
+ from IPython.display import SVG, Image
51
+ from ipywidgets import interact
52
+ import molvs as mv
53
+
54
+
55
+ IPythonConsole.ipython_useSVG = True
56
+ IPythonConsole.molSize = (450, 350)
57
+ params = Chem.SubstructMatchParameters()
58
+ params.aromaticMatchesConjugated = True
59
+
60
+ __all__ = [
61
+ 'draw_mol',
62
+ 'draw_confs',
63
+ 'show_decomp',
64
+ 'get_ids_folds',
65
+ 'show_pharmacophore',
66
+ 'mol_without_indices',
67
+ 'norm_colors',
68
+ 'drawmol_with_hi',
69
+ 'draw_mols_surfs',
70
+ ]
71
+
72
+
73
+ COLORS = {
74
+ # "Tol" colormap from https://davidmathlogic.com/colorblind
75
+ 'tol': [
76
+ (51, 34, 136),
77
+ (17, 119, 51),
78
+ (68, 170, 153),
79
+ (136, 204, 238),
80
+ (221, 204, 119),
81
+ (204, 102, 119),
82
+ (170, 68, 153),
83
+ (136, 34, 85)
84
+ ],
85
+ # "IBM" colormap from https://davidmathlogic.com/colorblind
86
+ 'ibm': [
87
+ (100, 143, 255),
88
+ (120, 94, 240),
89
+ (220, 38, 127),
90
+ (254, 97, 0),
91
+ (255, 176, 0)
92
+ ],
93
+ # Okabe_Ito colormap from https://jfly.uni-koeln.de/color/
94
+ 'okabe': [
95
+ (230, 159, 0),
96
+ (86, 180, 233),
97
+ (0, 158, 115),
98
+ (240, 228, 66),
99
+ (0, 114, 178),
100
+ (213, 94, 0),
101
+ (204, 121, 167)
102
+ ]
103
+ }
104
+
105
+
106
+ # def get_his_for_onemol(mol_sm, pat_sm):
107
+ # atom_ids = []
108
+ # bond_ids = []
109
+ # m = Chem.MolFromSmiles(mol_sm)
110
+ # pt = Chem.MolFromSmiles(pat_sm)
111
+ # hi_id = m.GetSubstructMatches(pt, params=params)
112
+ # if len(m.GetSubstructMatches(pt, params=params)) == 0:
113
+ # Chem.Kekulize(m)
114
+ # hi_id = m.GetSubstructMatches(pt, params=params)
115
+ # if len(hi_id) == 0:
116
+ # return
117
+ # atom_ids.append(itertools.chain.from_iterable(hi_id))
118
+
119
+
120
+ # def get_match_his(mol_sms, pat_sms):
121
+ # highlightatoms = defaultdict(list)
122
+ # highlightbonds = defaultdict(list)
123
+ # for i in range(len(df)):
124
+ # try:
125
+ # mm = df.iloc[i, 0][2:-2]
126
+ # pm = df.iloc[i, 2]
127
+ # m = Chem.MolFromSmiles(mm)
128
+ # pt = Chem.MolFromSmiles(pm)
129
+ # hi_id = m.GetSubstructMatches(pt, params=params)
130
+ # if len(m.GetSubstructMatches(pt, params=params)) == 0:
131
+ # Chem.Kekulize(m)
132
+ # hi_id = m.GetSubstructMatches(pt, params=params)
133
+ # mols.append(m)
134
+ # hi_ids.append(hi_id)
135
+ # except:
136
+ # pass
137
+ # pass
138
+
139
+
140
+ def norm_colors(colors=COLORS):
141
+ colors = deepcopy(COLORS)
142
+ for k, v in colors.items():
143
+ for i, color in enumerate(v):
144
+ colors[k][i] = tuple(y / 255 for y in color)
145
+ return colors
146
+
147
+
148
+ def drawmol_with_hi(
149
+ mol,
150
+ legend,
151
+ atom_hi_dict,
152
+ bond_hi_dict,
153
+ atomrads_dict,
154
+ widthmults_dict,
155
+ width=350,
156
+ height=200,
157
+ ):
158
+ d2d = rdMolDraw2D.MolDraw2DCairo(width, height)
159
+ d2d.ClearDrawing()
160
+ d2d.DrawMoleculeWithHighlights(
161
+ mol, legend,
162
+ atom_hi_dict,
163
+ bond_hi_dict,
164
+ atomrads_dict,
165
+ widthmults_dict
166
+ )
167
+ d2d.FinishDrawing()
168
+ png = d2d.GetDrawingText()
169
+ return png
170
+
171
+
172
+ def show_atom_number(mol, label='atomNote'):
173
+ new_mol = deepcopy(mol)
174
+ for atom in new_mol.GetAtoms():
175
+ atom.SetProp(label, str(atom.GetIdx()))
176
+ return new_mol
177
+
178
+
179
+ def moltosvg(mol, molSize=(500, 500), kekulize=True):
180
+ mc = mol
181
+ drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0], molSize[1])
182
+ drawer.DrawMolecule(mc)
183
+ drawer.FinishDrawing()
184
+ svg = drawer.GetDrawingText()
185
+ return svg.replace('svg:', '')
186
+
187
+
188
+ def draw_mol(mol):
189
+ return SVG(moltosvg(show_atom_number(mol)))
190
+
191
+
192
+ def drawit(m, p, confId=-1):
193
+ mb = Chem.MolToMolBlock(m, confId=confId)
194
+ p.removeAllModels()
195
+ p.addModel(mb, 'sdf')
196
+ p.setStyle({'stick': {}})
197
+ p.setBackgroundColor('0xeeeeee')
198
+ p.zoomTo()
199
+ return p.show()
200
+
201
+
202
+ def draw_confs(m):
203
+ import py3Dmol
204
+ p = py3Dmol.view(width=500, height=500)
205
+ return interact(drawit,
206
+ m=fixed(m),
207
+ p=fixed(p),
208
+ confId=(0, m.GetNumConformers() - 1))
209
+
210
+
211
+ def do_decomp(mols, cores, options):
212
+ options.rgroupLabelling = RGroupLabelling.AtomMap
213
+ decomp = RGroupDecomposition(cores, options)
214
+ for mol in mols:
215
+ decomp.Add(mol)
216
+ decomp.Process()
217
+ return decomp
218
+
219
+
220
+ def show_decomp(mols, cores, options, item=False):
221
+ decomp = do_decomp(mols, cores, options)
222
+ if item:
223
+ rows = decomp.GetRGroupsAsRows()
224
+ items = [
225
+ '{}:{}'.format(
226
+ group, Chem.MolToSmiles(row[group])
227
+ )
228
+ for row in rows for group in row
229
+ ]
230
+ return ' '.join(items)
231
+ else:
232
+ cols = decomp.GetRGroupsAsColumns()
233
+ cols['mol'] = mols
234
+ cols['input core'] = cores[0]
235
+ df = pd.DataFrame(cols)
236
+ PandasTools.ChangeMoleculeRendering(df)
237
+ return HTML(df.to_html())
238
+
239
+
240
+ def get_ids_folds(id_list, num_folds, need_shuffle=False):
241
+ if need_shuffle:
242
+ from random import shuffle
243
+ shuffle(id_list)
244
+ num_ids = len(id_list)
245
+ assert num_ids >= num_folds
246
+
247
+ num_each_fold = int(num_ids / num_folds)
248
+
249
+ blocks = []
250
+
251
+ for i in range(num_folds):
252
+ start = num_each_fold * i
253
+ end = start + num_each_fold
254
+ if end > num_ids - 1:
255
+ end = num_ids - 1
256
+
257
+ blocks.append(id_list[start: end])
258
+
259
+ id_blocks = []
260
+ for i in range(num_folds):
261
+ id_blocks.append(
262
+ (list(itertools.chain.from_iterable([blocks[j] for j in range(num_folds) if j != i])),
263
+ blocks[i])
264
+ )
265
+
266
+ return id_blocks
267
+
268
+
269
+ keep = ["Donor", "Acceptor", "Aromatic", "Hydrophobe", "LumpedHydrophobe"]
270
+
271
+
272
+ def show_pharmacophore(
273
+ sdf_path,
274
+ keep=keep,
275
+ fdf_dir=os.path.join(
276
+ os.path.dirname(__file__),
277
+ "..",
278
+ "datasets",
279
+ 'defined_BaseFeatures.fdef'
280
+ )
281
+ ):
282
+ template_mol = [m for m in Chem.SDMolSupplier(sdf_path)][0]
283
+ fdef = AllChem.BuildFeatureFactory(
284
+ fdf_dir
285
+ )
286
+ prob_feats = fdef.GetFeaturesForMol(template_mol)
287
+ prob_feats = [f for f in prob_feats if f.GetFamily() in keep]
288
+ # prob_points = [list(x.GetPos()) for x in prob_feats]
289
+
290
+ for i, feat in enumerate(prob_feats):
291
+ atomids = feat.GetAtomIds()
292
+ print(
293
+ "pharamcophore index:{0}; feature:{1}; type:{2}; atom id:{3}".format(
294
+ i,
295
+ feat.GetFamily(),
296
+ feat.GetType(),
297
+ atomids
298
+ )
299
+ )
300
+ display(
301
+ Draw.MolToImage(
302
+ template_mol,
303
+ highlightAtoms=list(atomids),
304
+ highlightColor=[0, 1, 0],
305
+ useSVG=True
306
+ )
307
+ )
308
+
309
+
310
+ def mol_without_indices(
311
+ mol_input: Chem.Mol,
312
+ remove_indices=[],
313
+ keep_properties=[]
314
+ ):
315
+
316
+ atom_list, bond_list, idx_map = [], [], {} # idx_map: {old: new}
317
+ for atom in mol_input.GetAtoms():
318
+
319
+ props = {}
320
+ for property_name in keep_properties:
321
+ if property_name in atom.GetPropsAsDict():
322
+ props[property_name] = atom.GetPropsAsDict()[property_name]
323
+ symbol = atom.GetSymbol()
324
+
325
+ if symbol.startswith('*'):
326
+ atom_symbol = '*'
327
+ props['molAtomMapNumber'] = atom.GetAtomMapNum()
328
+ elif symbol.startswith('R'):
329
+ atom_symbol = '*'
330
+ if len(symbol) > 1:
331
+ atom_map_num = int(symbol[1:])
332
+ else:
333
+ atom_map_num = atom.GetAtomMapNum()
334
+ props['dummyLabel'] = 'R' + str(atom_map_num)
335
+ props['_MolFileRLabel'] = str(atom_map_num)
336
+ props['molAtomMapNumber'] = atom_map_num
337
+
338
+ else:
339
+ atom_symbol = symbol
340
+ atom_list.append(
341
+ (
342
+ atom_symbol,
343
+ atom.GetFormalCharge(),
344
+ atom.GetNumExplicitHs(),
345
+ props
346
+ )
347
+ )
348
+ for bond in mol_input.GetBonds():
349
+ bond_list.append(
350
+ (
351
+ bond.GetBeginAtomIdx(),
352
+ bond.GetEndAtomIdx(),
353
+ bond.GetBondType()
354
+ )
355
+ )
356
+ mol = Chem.RWMol(Chem.Mol())
357
+
358
+ new_idx = 0
359
+ for atom_index, atom_info in enumerate(atom_list):
360
+ if atom_index not in remove_indices:
361
+ atom = Chem.Atom(atom_info[0])
362
+ atom.SetFormalCharge(atom_info[1])
363
+ atom.SetNumExplicitHs(atom_info[2])
364
+
365
+ for property_name in atom_info[3]:
366
+ if isinstance(atom_info[3][property_name], str):
367
+ atom.SetProp(property_name, atom_info[3][property_name])
368
+ elif isinstance(atom_info[3][property_name], int):
369
+ atom.SetIntProp(property_name, atom_info[3][property_name])
370
+ mol.AddAtom(atom)
371
+ idx_map[atom_index] = new_idx
372
+ new_idx += 1
373
+ for bond_info in bond_list:
374
+ if (
375
+ bond_info[0] not in remove_indices
376
+ and bond_info[1] not in remove_indices
377
+ ):
378
+ mol.AddBond(
379
+ idx_map[bond_info[0]],
380
+ idx_map[bond_info[1]],
381
+ bond_info[2]
382
+ )
383
+ else:
384
+ one_in = False
385
+ if (
386
+ (bond_info[0] in remove_indices)
387
+ and (bond_info[1] not in remove_indices)
388
+ ):
389
+ keep_index = bond_info[1]
390
+ # remove_index = bond_info[0]
391
+ one_in = True
392
+ elif (
393
+ (bond_info[1] in remove_indices)
394
+ and (bond_info[0] not in remove_indices)
395
+ ):
396
+ keep_index = bond_info[0]
397
+ # remove_index = bond_info[1]
398
+ one_in = True
399
+ if one_in:
400
+ if atom_list[keep_index][0] == 'N':
401
+ old_num_explicit_Hs = mol.GetAtomWithIdx(
402
+ idx_map[keep_index]
403
+ ).GetNumExplicitHs()
404
+
405
+ mol.GetAtomWithIdx(idx_map[keep_index]).SetNumExplicitHs(
406
+ old_num_explicit_Hs + 1
407
+ )
408
+ mol = Chem.Mol(mol)
409
+ return mol
410
+
411
+
412
+ def draw_mols_surfs(
413
+ mols,
414
+ width=400,
415
+ height=400,
416
+ surface=True,
417
+ surface_opacity=0.5
418
+ ):
419
+ import py3Dmol
420
+
421
+ view = py3Dmol.view(width=width, height=height)
422
+ view.setBackgroundColor('0xeeeeee')
423
+ view.removeAllModels()
424
+ for mol in mols:
425
+ addMolToView(mol, view)
426
+ if surface:
427
+ view.addSurface(
428
+ py3Dmol.SAS,
429
+ {'opacity': surface_opacity}
430
+ )
431
+ view.zoomTo()
432
+ return view.show()
433
+
434
+
435
+ def draw_rxn(
436
+ rxn_smiles,
437
+ use_smiles: bool = True,
438
+ ):
439
+ rxn = AllChem.ReactionFromSmarts(rxn_smiles, useSmiles=use_smiles)
440
+ d2d = Draw.MolDraw2DCairo(2000, 500)
441
+ d2d.DrawReaction(rxn, highlightByReactant=True)
442
+ png = d2d.GetDrawingText()
443
+ display(Image(png))
444
+
445
+
446
+ def react(rxn_smarts, reagents):
447
+ try:
448
+ rxn = AllChem.ReactionFromSmarts(rxn_smarts)
449
+ # n_reactants = rxn.GetNumReactantTemplates()
450
+ products = rxn.RunReactants([
451
+ Chem.MolFromSmiles(smi) for smi in reagents
452
+ ])
453
+ return products
454
+ except Exception as e:
455
+ print(e)
456
+ return []
457
+
458
+
459
+ def match_pattern(mol, patt):
460
+ if mol:
461
+ return mol.HasSubstructMatch(patt)
462
+ else:
463
+ return False
464
+
465
+
466
+ def split_rxn_smiles(smi):
467
+ try:
468
+ reagents1, reagents2, products = smi.split('>')
469
+ if len(reagents2) > 0:
470
+ reagents = '.'.join([reagents1, reagents2])
471
+ else:
472
+ reagents = reagents1
473
+ return reagents, products
474
+ except Exception as e:
475
+ print(e)
476
+ return '', ''
477
+
478
+
479
+ def find_mprod(rxn_smi):
480
+ # ref: https://github.com/LiamWilbraham/uspto-analysis/blob/master/reaction-stats-uspto.ipynb
481
+ rxn_smarts = '[C:1](=[O:2])-[OD1].[N!H0:3]>>[C:1](=[O:2])[N:3]'
482
+ patt_acid = Chem.MolFromSmarts('[CX3](=O)[OX2H1]')
483
+ patt_amine = Chem.MolFromSmarts('[N;H3,H2,H1]') # ammonia or primary/secondary amine
484
+
485
+ products = split_rxn_smiles(rxn_smi)[1].split('.')
486
+
487
+ reactants = [r for r in split_rxn_smiles(rxn_smi)[0].split('.')]
488
+ cooh = [
489
+ r
490
+ for r in reactants
491
+ if match_pattern(Chem.MolFromSmiles(r), patt_acid)
492
+ ]
493
+
494
+ cooh = [re.sub('@', '', i) for i in cooh]
495
+
496
+ amine = [
497
+ r for r in reactants
498
+ if match_pattern(Chem.MolFromSmiles(r), patt_amine)
499
+ ]
500
+ amine = [re.sub('@', '', i) for i in amine]
501
+
502
+ for perm in itertools.product(cooh, amine):
503
+
504
+ cooh_i = perm[0]
505
+ amine_i = perm[1]
506
+
507
+ smarts_products = react(rxn_smarts, perm)
508
+
509
+ for p_1 in smarts_products:
510
+ for p_2 in products:
511
+ p_2 = re.sub('@', '', p_2)
512
+ patt = Chem.MolFromSmiles(p_2)
513
+ if Chem.MolToInchiKey(p_1[0]) == Chem.MolToInchiKey(patt):
514
+ return cooh_i, amine_i, p_2
515
+ return None
516
+
517
+
518
+ def get_largest_mol(smiles, to_smiles=False):
519
+ mol = Chem.MolFromSmiles(smiles)
520
+ if mol is None:
521
+ return
522
+ mol_frags = rdmolops.GetMolFrags(mol, asMols=True)
523
+ largest_mol = max(mol_frags, default=mol, key=lambda m: m.GetNumAtoms())
524
+ if to_smiles:
525
+ return mv.standardize_smiles(Chem.MolToSmiles(largest_mol))
526
+ return largest_mol
527
+
528
+
529
+ def standardize_tautomer(mol, max_tautomers=1000):
530
+ params = rdMolStandardize.CleanupParameters()
531
+ params.maxTautomers = max_tautomers
532
+ enumerator = rdMolStandardize.TautomerEnumerator(params)
533
+ cm = enumerator.Canonicalize(mol)
534
+ return cm
535
+
536
+
537
+ def reorder_tautomers(m):
538
+ enumerator = rdMolStandardize.TautomerEnumerator()
539
+ canon = enumerator.Canonicalize(m)
540
+ csmi = Chem.MolToSmiles(canon)
541
+ res = [canon]
542
+ tauts = enumerator.Enumerate(m)
543
+ smis = [Chem.MolToSmiles(x) for x in tauts]
544
+ stpl = sorted(
545
+ (x, y) for x, y in zip(smis, tauts) if x!=csmi
546
+ )
547
+ res += [y for _, y in stpl]
548
+ return res