hjxdl 0.1.13__py3-none-any.whl → 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. hdl/_version.py +2 -2
  2. hdl/datasets/city_code.json +2576 -0
  3. hdl/datasets/defined_BaseFeatures.fdef +236 -0
  4. hdl/datasets/las.tsv +0 -0
  5. hdl/datasets/route_template.json +113 -0
  6. hdl/datasets/vocab.txt +591 -0
  7. hdl/ju/__init__.py +0 -0
  8. hdl/ju/setup.py +55 -0
  9. hdl/jupyfuncs/__init__.py +0 -0
  10. hdl/jupyfuncs/chem/__init__.py +0 -0
  11. hdl/jupyfuncs/chem/mol.py +548 -0
  12. hdl/jupyfuncs/chem/norm.py +268 -0
  13. hdl/jupyfuncs/chem/pdb_ext.py +94 -0
  14. hdl/jupyfuncs/chem/scaffold.py +25 -0
  15. hdl/jupyfuncs/chem/shape.py +241 -0
  16. hdl/jupyfuncs/chem/tokenizers.py +2 -0
  17. hdl/jupyfuncs/dbtools/__init__.py +0 -0
  18. hdl/jupyfuncs/dbtools/pg.py +42 -0
  19. hdl/jupyfuncs/dbtools/query_info.py +150 -0
  20. hdl/jupyfuncs/dl/__init__.py +0 -0
  21. hdl/jupyfuncs/dl/cp.py +54 -0
  22. hdl/jupyfuncs/dl/dataframe.py +38 -0
  23. hdl/jupyfuncs/dl/fp.py +49 -0
  24. hdl/jupyfuncs/dl/list.py +20 -0
  25. hdl/jupyfuncs/dl/model_utils.py +97 -0
  26. hdl/jupyfuncs/dl/tensor.py +159 -0
  27. hdl/jupyfuncs/dl/uncs.py +112 -0
  28. hdl/jupyfuncs/llm/__init__.py +0 -0
  29. hdl/jupyfuncs/llm/extract.py +123 -0
  30. hdl/jupyfuncs/llm/openapi.py +94 -0
  31. hdl/jupyfuncs/network/__init__.py +0 -0
  32. hdl/jupyfuncs/network/proxy.py +20 -0
  33. hdl/jupyfuncs/path/__init__.py +0 -0
  34. hdl/jupyfuncs/path/glob.py +285 -0
  35. hdl/jupyfuncs/path/strings.py +65 -0
  36. hdl/jupyfuncs/show/__init__.py +0 -0
  37. hdl/jupyfuncs/show/pbar.py +50 -0
  38. hdl/jupyfuncs/show/plot.py +259 -0
  39. hdl/jupyfuncs/utils/__init__.py +0 -0
  40. hdl/jupyfuncs/utils/wrappers.py +8 -0
  41. hdl/utils/weather/__init__.py +0 -0
  42. hdl/utils/weather/weather.py +68 -0
  43. {hjxdl-0.1.13.dist-info → hjxdl-0.1.15.dist-info}/METADATA +1 -1
  44. {hjxdl-0.1.13.dist-info → hjxdl-0.1.15.dist-info}/RECORD +46 -5
  45. {hjxdl-0.1.13.dist-info → hjxdl-0.1.15.dist-info}/WHEEL +1 -1
  46. {hjxdl-0.1.13.dist-info → hjxdl-0.1.15.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,268 @@
1
+ import functools
2
+ import logging
3
+
4
+ from rdkit import Chem
5
+ from rdkit.Chem import AllChem
6
+
7
+
8
+ log = logging.getLogger(__name__)
9
+
10
+
11
+ __all__ = [
12
+ "Normalizer",
13
+ "Normalization",
14
+ "NORMALIZATIONS",
15
+ ]
16
+
17
+
18
+ def memoized_property(fget):
19
+ """Decorator to create memoized properties."""
20
+ attr_name = "_{}".format(fget.__name__)
21
+
22
+ @functools.wraps(fget)
23
+ def fget_memoized(self):
24
+ if not hasattr(self, attr_name):
25
+ setattr(self, attr_name, fget(self))
26
+ return getattr(self, attr_name)
27
+
28
+ return property(fget_memoized)
29
+
30
+
31
+ class Normalization(object):
32
+ """A normalization transform defined by reaction SMARTS."""
33
+
34
+ def __init__(self, name, transform):
35
+ """
36
+ :param string name: A name for this Normalization
37
+ :param string transform: Reaction SMARTS to define the transformation.
38
+ """
39
+ log.debug("Initializing Normalization: %s", name)
40
+ self.name = name
41
+ self.transform_str = transform
42
+
43
+ @memoized_property
44
+ def transform(self):
45
+ log.debug("Loading Normalization transform: %s", self.name)
46
+ return AllChem.ReactionFromSmarts(self.transform_str)
47
+
48
+ def __repr__(self):
49
+ return "Normalization({!r}, {!r})".format(self.name, self.transform_str)
50
+
51
+ def __str__(self):
52
+ return self.name
53
+
54
+
55
+ NORMALIZATIONS = [
56
+ Normalization(
57
+ "Nitro to N+(O-)=O",
58
+ "[*:1][N,P,As,Sb:2](=[O,S,Se,Te:3])=[O,S,Se,Te:4]>>[*:1][*+1:2]([*-1:3])=[*:4]",
59
+ ),
60
+ Normalization(
61
+ "Sulfone to S(=O)(=O)", "[S+2:1]([O-:2])([O-:3])>>[S+0:1](=[O-0:2])(=[O-0:3])"
62
+ ),
63
+ Normalization("Pyridine oxide to n+O-", "[n:1]=[O:2]>>[n+:1][O-:2]"),
64
+ Normalization(
65
+ "Azide to N=N+=N-", "[*,H:1][N:2]=[N:3]#[N:4]>>[*,H:1][N:2]=[N+:3]=[N-:4]"
66
+ ),
67
+ Normalization("Diazo/azo to =N+=N-", "[*:1]=[N:2]#[N:3]>>[*:1]=[N+:2]=[N-:3]"),
68
+ Normalization(
69
+ "Sulfoxide to -S+(O-)-",
70
+ "[!O:1][S+0;X3:2](=[O:3])[!O:4]>>[*:1][S+1:2]([O-:3])[*:4]",
71
+ ),
72
+ Normalization(
73
+ "Phosphate to P(O-)=O",
74
+ "[O,S,Se,Te;-1:1][P+;D4:2][O,S,Se,Te;-1:3]>>[*+0:1]=[P+0;D5:2][*-1:3]",
75
+ ),
76
+ Normalization(
77
+ "Amidinium to C(=NH2+)NH2",
78
+ "[C,S;X3+1:1]([NX3:2])[NX3!H0:3]>>[*+0:1]([N:2])=[N+:3]",
79
+ ),
80
+ Normalization(
81
+ "Normalize hydrazine-diazonium",
82
+ "[CX4:1][NX3H:2]-[NX3H:3][CX4:4][NX2+:5]#[NX1:6]>>[CX4:1][NH0:2]=[NH+:3][C:4][N+0:5]=[NH:6]",
83
+ ),
84
+ Normalization(
85
+ "Recombine 1,3-separated charges",
86
+ "[N,P,As,Sb,O,S,Se,Te;-1:1]-[A:2]=[N,P,As,Sb,O,S,Se,Te;+1:3]>>[*-0:1]=[*:2]-[*+0:3]",
87
+ ),
88
+ Normalization(
89
+ "Recombine 1,3-separated charges",
90
+ "[n,o,p,s;-1:1]:[a:2]=[N,O,P,S;+1:3]>>[*-0:1]:[*:2]-[*+0:3]",
91
+ ),
92
+ Normalization(
93
+ "Recombine 1,3-separated charges",
94
+ "[N,O,P,S;-1:1]-[a:2]:[n,o,p,s;+1:3]>>[*-0:1]=[*:2]:[*+0:3]",
95
+ ),
96
+ Normalization(
97
+ "Recombine 1,5-separated charges",
98
+ "[N,P,As,Sb,O,S,Se,Te;-1:1]-[A+0:2]=[A:3]-[A:4]=[N,P,As,Sb,O,S,Se,Te;+1:5]>>[*-0:1]=[*:2]-[*:3]=[*:4]-[*+0:5]",
99
+ ),
100
+ Normalization(
101
+ "Recombine 1,5-separated charges",
102
+ "[n,o,p,s;-1:1]:[a:2]:[a:3]:[c:4]=[N,O,P,S;+1:5]>>[*-0:1]:[*:2]:[*:3]:[c:4]-[*+0:5]",
103
+ ),
104
+ Normalization(
105
+ "Recombine 1,5-separated charges",
106
+ "[N,O,P,S;-1:1]-[c:2]:[a:3]:[a:4]:[n,o,p,s;+1:5]>>[*-0:1]=[c:2]:[*:3]:[*:4]:[*+0:5]",
107
+ ),
108
+ Normalization(
109
+ "Normalize 1,3 conjugated cation",
110
+ "[N,O;+0!H0:1]-[A:2]=[N!$(*[O-]),O;+1H0:3]>>[*+1:1]=[*:2]-[*+0:3]",
111
+ ),
112
+ Normalization(
113
+ "Normalize 1,3 conjugated cation",
114
+ "[n;+0!H0:1]:[c:2]=[N!$(*[O-]),O;+1H0:3]>>[*+1:1]:[*:2]-[*+0:3]",
115
+ ),
116
+ Normalization(
117
+ "Normalize 1,3 conjugated cation",
118
+ "[N,O;+0!H0:1]-[c:2]:[n!$(*[O-]),o;+1H0:3]>>[*+1:1]=[*:2]:[*+0:3]",
119
+ ),
120
+ Normalization(
121
+ "Normalize 1,5 conjugated cation",
122
+ "[N,O;+0!H0:1]-[A:2]=[A:3]-[A:4]=[N!$(*[O-]),O;+1H0:5]>>[*+1:1]=[*:2]-[*:3]=[*:4]-[*+0:5]",
123
+ ),
124
+ Normalization(
125
+ "Normalize 1,5 conjugated cation",
126
+ "[n;+0!H0:1]:[a:2]:[a:3]:[c:4]=[N!$(*[O-]),O;+1H0:5]>>[n+1:1]:[*:2]:[*:3]:[*:4]-[*+0:5]",
127
+ ),
128
+ Normalization(
129
+ "Normalize 1,5 conjugated cation",
130
+ "[N,O;+0!H0:1]-[c:2]:[a:3]:[a:4]:[n!$(*[O-]),o;+1H0:5]>>[*+1:1]=[c:2]:[*:3]:[*:4]:[*+0:5]",
131
+ ),
132
+ Normalization(
133
+ "Normalize 1,5 conjugated cation",
134
+ "[n;+0!H0:1]1:[a:2]:[a:3]:[a:4]:[n!$(*[O-]);+1H0:5]1>>[n+1:1]1:[*:2]:[*:3]:[*:4]:[n+0:5]1",
135
+ ),
136
+ Normalization(
137
+ "Normalize 1,5 conjugated cation",
138
+ "[n;+0!H0:1]:[a:2]:[a:3]:[a:4]:[n!$(*[O-]);+1H0:5]>>[n+1:1]:[*:2]:[*:3]:[*:4]:[n+0:5]",
139
+ ),
140
+ Normalization(
141
+ "Charge normalization",
142
+ "[F,Cl,Br,I,At;-1:1]=[O:2]>>[*-0:1][O-:2]"),
143
+ Normalization(
144
+ "Charge recombination", "[N,P,As,Sb;-1:1]=[C+;v3:2]>>[*+0:1]#[C+0:2]"
145
+ ),
146
+ Normalization(
147
+ "Nitro to N+(O-)=O",
148
+ "[N;X3:1](=[O:2])=[O:3]>>[*+1:1]([*-1:2])=[*:3]"),
149
+ Normalization(
150
+ "Diazonium N",
151
+ "[*:1]-[N;X2:2]#[N;X1:3]>>[*:1]-[*+1:2]#[*:3]",
152
+ ),
153
+ Normalization(
154
+ "Quaternary N",
155
+ "[N;X4;v4;+0:1]>>[*+1:1]",
156
+ ),
157
+ Normalization(
158
+ "Trivalent O",
159
+ "[*:1]=[O;X2;v3;+0:2]-[#6:3]>>[*:1]=[*+1:2]-[*:3]",
160
+ ),
161
+ Normalization(
162
+ "Sulfoxide to -S+(O-)",
163
+ "[!O:1][S+0;D3:2](=[O:3])[!O:4]>>[*:1][S+1:2]([O-:3])[*:4]",
164
+ ),
165
+ Normalization(
166
+ "Sulfoxide to -S+(O-) 2",
167
+ "[!O:1][SH1+1;D3:2](=[O:3])[!O:4]>>[*:1][S+1:2]([O-:3])[*:4]",
168
+ ),
169
+ Normalization(
170
+ "Trivalent S",
171
+ "[O:1]=[S;D2;+0:2]-[#6:3]>>[*:1]=[*+1:2]-[*:3]",
172
+ ),
173
+
174
+ Normalization(
175
+ "Bad amide tautomer1",
176
+ "[C:1]([OH1;D1:2])=;!@[NH1:3]>>[C:1](=[OH0:2])-[NH2:3]",
177
+ ),
178
+ Normalization(
179
+ "Bad amide tautomer2",
180
+ "[C:1]([OH1;D1:2])=;!@[NH0:3]>>[C:1](=[OH0:2])-[NH1:3]",
181
+ ),
182
+ Normalization(
183
+ "Halogen with no neighbors", "[F,Cl,Br,I;X0;+0:1]>>[*-1:1]",
184
+ ),
185
+ Normalization(
186
+ "Odd pyridine/pyridazine oxide structure",
187
+ "[C,N;-;D2,D3:1]-[N+2;D3:2]-[O-;D1:3]>>[*-0:1]=[*+1:2]-[*-:3]",
188
+ ),
189
+ Normalization(
190
+ "qunimade2",
191
+ "[n&H0:1][n&H1:2][n&H1,c;R2:3][c&H1,n&H1:4][c,n&H1:5](=[S,N,O:7])[n&H1:6]>>[n&H0:1][n&H1:2][n&H0,c;R2:3][c&H1,n&H0:4][c,n&H0:5]([S,N,O:7])[n&H0:6]"
192
+ ),
193
+ Normalization(
194
+ "qunimade",
195
+ "[c,n&H0,n&H1:2][n&H0,n&H1,c:3][c,n&H0,n&H1:4][c,n&H0,n&H1:5](=[S,N,O:1])>>[c,n&H0:2][n&H0,c:3][c,n&H0:4][c,n&H0:5]([S,N,O:1])"
196
+ ),
197
+ ]
198
+
199
+
200
+ class Normalizer(object):
201
+ """A class for applying Normalization transforms.
202
+ This class is typically used to apply a series of Normalization transforms to correct functional groups and
203
+ recombine charges. Each transform is repeatedly applied until no further changes occur.
204
+ """
205
+
206
+ def __init__(self, normalizations=NORMALIZATIONS):
207
+ """Initialize a Normalizer with an optional custom list of :class:`~molvs.normalize.Normalization` transforms.
208
+ :param normalizations: A list of :class:`~molvs.normalize.Normalization` transforms to apply.
209
+ :param int max_restarts: The maximum number of times to attempt to apply the series of normalizations (default
210
+ 200).
211
+ """
212
+ log.debug("Initializing Normalizer")
213
+ self.normalizations = normalizations
214
+
215
+ def __call__(self, mol):
216
+ """Calling a Normalizer instance like a function is the same as calling its normalize(mol) method."""
217
+ return self.normalize(mol)
218
+
219
+ def normalize(self, mol):
220
+ """Apply a series of Normalization transforms to correct functional groups and recombine charges.
221
+ A series of transforms are applied to the molecule. For each Normalization, the transform is applied repeatedly
222
+ until no further changes occur. If any changes occurred, we go back and start from the first Normalization
223
+ again, in case the changes mean an earlier transform is now applicable. The molecule is returned once the entire
224
+ series of Normalizations cause no further changes or if max_restarts (default 200) is reached.
225
+ :param mol: The molecule to normalize.
226
+ :type mol: :rdkit:`Mol <Chem.rdchem.Mol-class.html>`
227
+ :return: The normalized fragment.
228
+ :rtype: :rdkit:`Mol <Chem.rdchem.Mol-class.html>`
229
+ """
230
+ log.debug("Running Normalizer")
231
+ # Normalize each fragment separately to get around quirky RunReactants behaviour
232
+ fragments = []
233
+ for fragment in Chem.GetMolFrags(mol, asMols=True):
234
+ fragments.append(self._normalize_fragment(fragment))
235
+ # Join normalized fragments into a single molecule again
236
+ outmol = fragments.pop()
237
+ for fragment in fragments:
238
+ outmol = Chem.CombineMols(outmol, fragment)
239
+ Chem.SanitizeMol(outmol)
240
+ return outmol
241
+
242
+ def _normalize_fragment(self, mol):
243
+ for normalization in self.normalizations:
244
+ product = self._apply_transform(mol, normalization.transform)
245
+ if product:
246
+ mol = product
247
+ return mol
248
+
249
+ def _apply_transform(self, mol, rule):
250
+ """Repeatedly apply normalization transform to molecule until no changes occur.
251
+ It is possible for multiple products to be produced when a rule is applied. The rule is applied repeatedly to
252
+ each of the products, until no further changes occur or after 20 attempts. If there are multiple unique products
253
+ after the final application, the first product (sorted alphabetically by SMILES) is chosen.
254
+ """
255
+ mols = [mol]
256
+ for n in range(20):
257
+ products = {}
258
+ for mol in mols:
259
+ for product in [x[0] for x in rule.RunReactants((mol,))]:
260
+ if Chem.SanitizeMol(product, catchErrors=True) == 0:
261
+ products[
262
+ Chem.MolToSmiles(product, isomericSmiles=True)
263
+ ] = product
264
+ if products:
265
+ mols = [products[s] for s in sorted(products)]
266
+ else:
267
+ # If n == 0, the rule was not applicable and we return None
268
+ return mols[0] if n > 0 else None
@@ -0,0 +1,94 @@
1
+ # Extract ligand and pdb
2
+
3
+ import sys
4
+ from prody import *
5
+ from rdkit import Chem
6
+ from rdkit.Chem import AllChem
7
+ from io import StringIO
8
+ import pypdb
9
+
10
+
11
+ __all__ = [
12
+ 'get_pdb_components',
13
+ 'process_ligand',
14
+ 'write_pdb',
15
+ 'write_sdf'
16
+ ]
17
+
18
+ def get_pdb_components(pdb_id):
19
+ """
20
+ Split a protein-ligand pdb into protein and ligand components
21
+ :param pdb_id:
22
+ :return:
23
+ """
24
+ pdb = parsePDB(pdb_id)
25
+ protein = pdb.select('protein')
26
+ ligand = pdb.select('not protein and not water')
27
+ return protein, ligand
28
+
29
+
30
+ def process_ligand(ligand, res_name):
31
+ """
32
+ Add bond orders to a pdb ligand
33
+ 1. Select the ligand component with name "res_name"
34
+ 2. Get the corresponding SMILES from pypdb
35
+ 3. Create a template molecule from the SMILES in step 2
36
+ 4. Write the PDB file to a stream
37
+ 5. Read the stream into an RDKit molecule
38
+ 6. Assign the bond orders from the template from step 3
39
+ :param ligand: ligand as generated by prody
40
+ :param res_name: residue name of ligand to extract
41
+ :return: molecule with bond orders assigned
42
+ """
43
+ output = StringIO()
44
+ sub_mol = ligand.select(f"resname {res_name}")
45
+ chem_desc = pypdb.describe_chemical(f"{res_name}")
46
+ sub_smiles = chem_desc["describeHet"]["ligandInfo"]["ligand"]["smiles"]
47
+ template = AllChem.MolFromSmiles(sub_smiles)
48
+ writePDBStream(output, sub_mol)
49
+ pdb_string = output.getvalue()
50
+ rd_mol = AllChem.MolFromPDBBlock(pdb_string)
51
+ new_mol = AllChem.AssignBondOrdersFromTemplate(template, rd_mol)
52
+ return new_mol
53
+
54
+
55
+ def write_pdb(protein, pdb_name):
56
+ """
57
+ Write a prody protein to a pdb file
58
+ :param protein: protein object from prody
59
+ :param pdb_name: base name for the pdb file
60
+ :return: None
61
+ """
62
+ output_pdb_name = f"{pdb_name}_protein.pdb"
63
+ writePDB(f"{output_pdb_name}", protein)
64
+ print(f"wrote {output_pdb_name}")
65
+
66
+
67
+ def write_sdf(new_mol, pdb_name, res_name):
68
+ """
69
+ Write an RDKit molecule to an SD file
70
+ :param new_mol:
71
+ :param pdb_name:
72
+ :param res_name:
73
+ :return:
74
+ """
75
+ outfile_name = f"{pdb_name}_{res_name}_ligand.sdf"
76
+ writer = Chem.SDWriter(f"{outfile_name}")
77
+ writer.write(new_mol)
78
+ print(f"wrote {outfile_name}")
79
+
80
+
81
+ def main(pdb_name):
82
+ """
83
+ Read Ligand Expo data, split pdb into protein and ligands,
84
+ write protein pdb, write ligand sdf files
85
+ :param pdb_name: id from the pdb, doesn't need to have an extension
86
+ :return:
87
+ """
88
+ protein, ligand = get_pdb_components(pdb_name)
89
+ write_pdb(protein, pdb_name)
90
+
91
+ res_name_list = list(set(ligand.getResnames()))
92
+ for res in res_name_list:
93
+ new_mol = process_ligand(ligand, res)
94
+ write_sdf(new_mol, pdb_name, res)
@@ -0,0 +1,25 @@
1
+ from rdkit.Chem.Scaffolds import rdScaffoldNetwork
2
+ from rdkit import RDLogger
3
+ RDLogger.DisableLog('rdApp.info')
4
+
5
+
6
+ def create_sn(
7
+ mols,
8
+ includeGenericScaffolds=False,
9
+ includeGenericBondScaffolds=False,
10
+ includeScaffoldsWithAttachments=True,
11
+ includeScaffoldsWithoutAttachments=False,
12
+ pruneBeforeFragmenting=True,
13
+ keepOnlyFirstFragment=True
14
+ ):
15
+ RDLogger.DisableLog('rdApp.info')
16
+ scaffParams = rdScaffoldNetwork.ScaffoldNetworkParams()
17
+ scaffParams.collectMolCounts = True
18
+ scaffParams.includeGenericScaffolds = includeGenericScaffolds
19
+ scaffParams.includeScaffoldsWithoutAttachments = includeScaffoldsWithoutAttachments
20
+ scaffParams.keepOnlyFirstFragment = keepOnlyFirstFragment
21
+ scaffParams.includeGenericBondScaffolds = includeGenericBondScaffolds
22
+ scaffParams.includeScaffoldsWithAttachments = includeScaffoldsWithAttachments
23
+ scaffParams.pruneBeforeFragmenting = pruneBeforeFragmenting
24
+ net = rdScaffoldNetwork.CreateScaffoldNetwork(mols, scaffParams)
25
+ return net
@@ -0,0 +1,241 @@
1
+ # shape
2
+
3
+ import os
4
+ import subprocess
5
+ from copy import deepcopy
6
+
7
+ from rdkit import Chem
8
+ from rdkit.Chem import AllChem
9
+ from pyshapeit import AlignMol
10
+ import multiprocess as mp
11
+
12
+ from rdkit import RDLogger
13
+
14
+ from jupyfuncs.pbar import tqdm
15
+ from jupyfuncs.norm import Normalizer
16
+
17
+ # from rdkit.Chem.Draw import IPythonConsole
18
+ from rdkit.Chem import PyMol
19
+ from rdkit.Chem.Subshape import SubshapeBuilder, SubshapeObjects
20
+ from PIL import ImageFile
21
+ ImageFile.LOAD_TRUNCATED_IMAGES = True
22
+
23
+ lg = RDLogger.logger()
24
+ lg.setLevel(4)
25
+
26
+ __all__ = [
27
+ 'get_mols_from_smi',
28
+ 'get_aligned_mol',
29
+ 'get_aligned_sdf',
30
+ 'show_alignment',
31
+ 'pymol_running'
32
+ ]
33
+
34
+
35
+ def get_mols_from_smi(probe_smifile):
36
+ mols = []
37
+ with open(probe_smifile) as f:
38
+ for line in f.readlines():
39
+ smi = line.strip()
40
+ mol = None
41
+ try:
42
+ mol = Chem.MolFromSmiles(smi)
43
+ except Exception as e:
44
+ print(e)
45
+ if mol:
46
+ mols.append(mol)
47
+ return mols
48
+
49
+
50
+ def get_aligned_mol(
51
+ ref_mol, probe_mol, num_confs, num_cpu
52
+ ):
53
+
54
+ mol1 = ref_mol
55
+ mol1.SetProp('_Name', 'ref')
56
+
57
+ AllChem.EmbedMultipleConfs(
58
+ probe_mol,
59
+ numConfs=num_confs,
60
+ numThreads=num_cpu
61
+ )
62
+
63
+ score = 0
64
+ # conf_id = -1
65
+
66
+ aligned_mol = deepcopy(probe_mol)
67
+
68
+ for i in range(probe_mol.GetNumConformers()):
69
+
70
+ mol2 = Chem.MolFromMolBlock(
71
+ Chem.MolToMolBlock(probe_mol, confId=i)
72
+ )
73
+ mol2.SetProp('_Name', 'probe')
74
+
75
+ sim_score = AlignMol(mol1, mol2)
76
+ if sim_score > score:
77
+ score = sim_score
78
+ aligned_mol = deepcopy(mol2)
79
+ # pbar.update(1)
80
+
81
+ return aligned_mol
82
+
83
+
84
+ def get_aligned_mol_mp(
85
+ config
86
+ ):
87
+ return get_aligned_mol(*config)
88
+
89
+
90
+ def gen_configs(ref_mol, mols, num_confs, num_cpu):
91
+ configs = []
92
+ for probe_mol in mols:
93
+ configs.append((ref_mol, probe_mol, num_confs, num_cpu))
94
+ return configs
95
+
96
+
97
+ def get_aligned_sdf(
98
+ ref_sdf: str,
99
+ probe_smifile: str,
100
+ num_confs=150,
101
+ num_cpu=5,
102
+ num_workers=10,
103
+ output_sdf=None,
104
+ print_info=True,
105
+ norm_mol=True
106
+ ):
107
+ ref_sdf = os.path.abspath(ref_sdf)
108
+ ref_mol = Chem.SDMolSupplier(ref_sdf)[0]
109
+ if not output_sdf:
110
+ output_sdf = os.path.abspath(probe_smifile) + '.sdf'
111
+ else:
112
+ output_sdf = os.path.abspath(output_sdf)
113
+
114
+ mols = get_mols_from_smi(probe_smifile)
115
+
116
+ configs = gen_configs(
117
+ ref_mol, mols, num_confs=num_confs, num_cpu=num_cpu
118
+ )
119
+
120
+ pool = mp.Pool(num_workers)
121
+ aligned_mols = list(
122
+ tqdm(
123
+ pool.imap(get_aligned_mol_mp, configs),
124
+ total=len(mols),
125
+ desc='All mols'
126
+ )
127
+ )
128
+ if norm_mol:
129
+ normer = Normalizer()
130
+ sdwriter = Chem.SDWriter(output_sdf)
131
+ for mol in aligned_mols:
132
+ if norm_mol:
133
+ mol = normer(mol)
134
+ sdwriter.write(mol)
135
+ sdwriter.flush()
136
+ sdwriter.close()
137
+ return output_sdf
138
+
139
+ # out_aligned = output_sdf + 'ali.sdf'
140
+ # score_file = output_sdf + 'score.csv'
141
+
142
+ # command = f'shape-it -r {ref_sdf} -d {output_sdf} -o {out_aligned} -s {score_file}'
143
+ # out_info = subprocess.getoutput(command)
144
+ # if print_info:
145
+ # print(out_info)
146
+
147
+ # if norm_mol:
148
+ # fix_path = out_aligned + 'fix.sdf'
149
+ # mols = Chem.SDMolSupplier(out_aligned)
150
+ # sdwriter = Chem.SDWriter(fix_path)
151
+ # for mol in mols:
152
+ # mol = normer(mol)
153
+ # sdwriter.write(mol)
154
+ # sdwriter.flush()
155
+ # sdwriter.close()
156
+ # return fix_path
157
+ # else:
158
+ # return out_aligned
159
+
160
+ # shape-it -r ref_sdf -d output_sdf -o out_aligned -s score_file
161
+
162
+
163
+ def show_alignment(
164
+ ref_mol,
165
+ probe_mol,
166
+ gen_confs,
167
+ num_confs=200,
168
+ num_cpu=5,
169
+ ):
170
+ # should install pymol-open-source
171
+ if not pymol_running():
172
+ subprocess.Popen(['pymol', '-cKRQ'])
173
+
174
+ if isinstance(ref_mol, Chem.Mol):
175
+ mol1 = ref_mol
176
+ elif isinstance(ref_mol, str) and ref_mol.endswith('.sdf'):
177
+ mol1 = Chem.SDMolSupplier(ref_mol)[0]
178
+
179
+ if isinstance(probe_mol, Chem.Mol):
180
+ mol2 = probe_mol
181
+ elif isinstance(probe_mol, str) and probe_mol.endswith('.sdf'):
182
+ mol2 = Chem.SDMolSupplier(probe_mol)[0]
183
+ else:
184
+ mol2 = Chem.MolFromSmiles(probe_mol)
185
+
186
+ if gen_confs:
187
+ AllChem.EmbedMultipleConfs(
188
+ mol2,
189
+ numConfs=num_confs,
190
+ numThreads=num_cpu
191
+ )
192
+
193
+ score = 0
194
+ for i in range(mol2.GetNumConformers()):
195
+
196
+ probe_mol = Chem.MolFromMolBlock(
197
+ Chem.MolToMolBlock(mol2, confId=i)
198
+ )
199
+
200
+ sim_score = AlignMol(mol1, probe_mol)
201
+ if sim_score > score:
202
+ score = sim_score
203
+ probe = deepcopy(probe_mol)
204
+
205
+ mol1.SetProp('_Name', 'ref')
206
+ probe.SetProp('_Name', 'probe')
207
+
208
+ AllChem.CanonicalizeConformer(mol1.GetConformer())
209
+ AllChem.CanonicalizeConformer(probe.GetConformer())
210
+
211
+ builder = SubshapeBuilder.SubshapeBuilder()
212
+ builder.gridDims = (20., 20., 10)
213
+ builder.gridSpacing = 0.5
214
+ builder.winRad = 4.
215
+
216
+ refShape = builder.GenerateSubshapeShape(mol1)
217
+ probeShape = builder.GenerateSubshapeShape(probe)
218
+
219
+ v = PyMol.MolViewer()
220
+
221
+ score = AlignMol(mol1, probe)
222
+
223
+ v.DeleteAll()
224
+
225
+ v.ShowMol(mol1, name='ref', showOnly=False)
226
+ SubshapeObjects.DisplaySubshape(v, refShape, 'ref_Shape')
227
+ v.server.do('set transparency=0.5')
228
+
229
+ v.ShowMol(probe, name='probe', showOnly=False)
230
+ SubshapeObjects.DisplaySubshape(v, probeShape, 'prob_Shape')
231
+ v.server.do('set transparency=0.5')
232
+
233
+ return v.GetPNG()
234
+
235
+
236
+ def pymol_running() -> bool:
237
+ out_info = subprocess.getoutput('ps aux | grep pymol')
238
+ if '-cKRQ' in out_info:
239
+ return True
240
+ else:
241
+ return False
@@ -0,0 +1,2 @@
1
+ SMI_REGEX_PATTERN = \
2
+ "(\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\(|\)|\.|=|#||\+|\\\\\/|:||@|\?|>|\*|\$|\%[0–9]{2}|[0–9])"
File without changes
@@ -0,0 +1,42 @@
1
+ import psycopg
2
+ from psycopg import sql
3
+
4
+
5
+ def connect_by_infofile(info_file: str) -> psycopg.Connection:
6
+ """Create a postgres connection
7
+
8
+ Args:
9
+ info_file (str):
10
+ the path of the connection info like
11
+ host=127.0.0.1 dbname=dbname port=5432 user=postgres password=lala
12
+
13
+ Returns:
14
+ psycopg.Connection:
15
+ the connection instance should be closed after committing.
16
+ """
17
+ conn = psycopg.connect(
18
+ open(info_file).readline()
19
+ )
20
+ return conn
21
+
22
+
23
+ def get_item_by_idx(
24
+ idx: int,
25
+ info_file: str,
26
+ by: str = id,
27
+ table: str = 'reaction_id'
28
+ ):
29
+
30
+ conn = connect_by_infofile(
31
+ info_file
32
+ )
33
+
34
+ query_name = str(idx)
35
+ query = sql.SQL(
36
+ "select reaction_id from {table} where {by} = %s"
37
+ ).format(
38
+ table=sql.Identifier(table),
39
+ by=sql.Identifier(by)
40
+ )
41
+ cur = conn.execute(query, [query_name]).fetchone()
42
+ return cur[0]