cnotebook 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cnotebook/align.py ADDED
@@ -0,0 +1,454 @@
1
+ import logging
2
+ from typing import Callable, Literal
3
+ from abc import ABCMeta, abstractmethod
4
+ from openeye import oegraphsim, oechem, oedepict
5
+
6
+ log = logging.getLogger("cnotebook")
7
+
8
+
9
+ ########################################################################################################################
10
+ # Fingerprint generation
11
+ ########################################################################################################################
12
+
13
+ # Dynamic creation of a typemap for OpenEye atom type fingerprints
14
+ atom_fp_typemap = dict(
15
+ (x.replace("OEFPAtomType_", "").lower(), getattr(oegraphsim, x))
16
+ for x in list(filter(lambda x: x.startswith("OEFPAtomType_"), dir(oegraphsim)))
17
+ )
18
+
19
+ # Dynamic creation of a typemap for OpenEye bond type fingerprints
20
+ bond_fp_typemap = dict(
21
+ (x.replace("OEFPBondType_", "").lower(), getattr(oegraphsim, x))
22
+ for x in list(filter(lambda x: x.startswith("OEFPBondType_"), dir(oegraphsim)))
23
+ )
24
+
25
+
26
+ def get_atom_mask(atom_type):
27
+ """
28
+ Get the OEFingerprint atom type masks from "|" delimited strings
29
+
30
+ The atom_type string is composed of "|" delimted members from the OEFPAtomType_ namespace. These are
31
+ case-insensitive and only optionally need to be prefixed by "OEFPAtomType_".
32
+
33
+ :param atom_type: Delimited string of OEFPAtomTypes
34
+ :return: Bitmask for OpenEye fingerprint atom types
35
+ :rtype: int
36
+ """
37
+ atom_mask = oegraphsim.OEFPAtomType_None
38
+ for m in atom_type.split("|"):
39
+ mask = atom_fp_typemap.get(m.strip().lower().replace("oefpatomtype_", ""), None)
40
+ if mask is None:
41
+ raise KeyError(f'{m} is not a known OEAtomFPType')
42
+ atom_mask |= mask
43
+ # Check validity
44
+ if atom_mask == oegraphsim.OEFPAtomType_None:
45
+ raise ValueError("No atom fingerprint types configured")
46
+ return atom_mask
47
+
48
+
49
+ def get_bond_mask(bond_type):
50
+ """
51
+ Get the OEFingerprint bond type masks from "|" delimited strings
52
+
53
+ The bond_type string is composed of "|" delimted members from the OEFPBondType_ namespace. These are
54
+ case-insensitive and only optionally need to be prefixed by "OEFPBondType_".
55
+
56
+ :param bond_type: Delimited string of OEFPBondTypes
57
+ :return: Bitmask for OpenEye fingerprint bond types
58
+ :rtype: int
59
+ """
60
+ # Bond mask
61
+ bond_mask = oegraphsim.OEFPBondType_None
62
+ for m in bond_type.split("|"):
63
+ mask = bond_fp_typemap.get(m.strip().lower().replace("oefpbondtype_", ""), None)
64
+ if mask is None:
65
+ raise KeyError(f'{m} is not a known OEBondFPType')
66
+ bond_mask |= mask
67
+ # Check validity
68
+ if bond_mask == oegraphsim.OEFPBondType_None:
69
+ raise ValueError("No bond fingerprint types configured")
70
+ return bond_mask
71
+
72
+
73
+ def fingerprint_maker(
74
+ fptype: str,
75
+ num_bits: int,
76
+ min_distance: int,
77
+ max_distance: int,
78
+ atom_type: str | int,
79
+ bond_type: str | int
80
+ ) -> Callable[[oechem.OEMolBase], oegraphsim.OEFingerPrint]:
81
+ """
82
+ Create a function that generates a fingerprint from a molecule
83
+ :param fptype: Fingerprint type
84
+ :param num_bits: Number of bits in the fingerprint
85
+ :param min_distance: Minimum distance/radius for path/circular/tree
86
+ :param max_distance: Maximum distance/radius for path/circular/tree
87
+ :param atom_type: Atom type string delimited by "|" OR int bitmask from the oegraphsim.OEFPAtomType_ namespace
88
+ :param bond_type: Bond type string delimited by "|" OR int bitmask from the oegraphsim.OEFPBondType_ namespace
89
+ :return: Function that generates a fingerprint from a molecule
90
+ """
91
+ # Be forgiving with case
92
+ _fptype = fptype.lower()
93
+
94
+ # Convert atom type and bond type strings to masks if necessary
95
+ atom_mask = get_atom_mask(atom_type) if isinstance(atom_type, str) else atom_type
96
+ bond_mask = get_bond_mask(bond_type) if isinstance(bond_type, str) else bond_type
97
+ if _fptype == "path":
98
+ def _make_path_fp(mol):
99
+ fp = oegraphsim.OEFingerPrint()
100
+ oegraphsim.OEMakePathFP(fp, mol, num_bits, min_distance, max_distance, atom_mask, bond_mask)
101
+ return fp
102
+ return _make_path_fp
103
+ elif _fptype == "circular":
104
+ def _make_circular_fp(mol):
105
+ fp = oegraphsim.OEFingerPrint()
106
+ oegraphsim.OEMakeCircularFP(fp, mol, num_bits, min_distance, max_distance, atom_mask, bond_mask)
107
+ return fp
108
+ return _make_circular_fp
109
+ elif _fptype == "tree":
110
+ def _make_tree_fp(mol):
111
+ fp = oegraphsim.OEFingerPrint()
112
+ oegraphsim.OEMakeTreeFP(fp, mol, num_bits, min_distance, max_distance, atom_mask, bond_mask)
113
+ return fp
114
+ return _make_tree_fp
115
+ elif _fptype == "maccs":
116
+ def _make_maccs(mol):
117
+ fp = oegraphsim.OEFingerPrint()
118
+ oegraphsim.OEMakeMACCS166FP(fp, mol)
119
+ return fp
120
+ return _make_maccs
121
+ elif _fptype == "lingo":
122
+ def _make_lingo(mol):
123
+ fp = oegraphsim.OEFingerPrint()
124
+ oegraphsim.OEMakeLingoFP(fp, mol)
125
+ return fp
126
+ return _make_lingo
127
+ raise KeyError(f'Unknown fingerprint type {fptype} (valid: path / tree / circular / maccs / lingo)')
128
+
129
+
130
+ ########################################################################################################################
131
+ # Small molecule 2D structure aligners
132
+ ########################################################################################################################
133
+
134
+ class Aligner(metaclass=ABCMeta):
135
+ """Abstract base class for 2D molecule aligners.
136
+
137
+ Aligners transform molecule 2D coordinates to align with a reference
138
+ structure or pattern. Subclasses must implement :meth:`validate` and
139
+ :meth:`align` methods.
140
+
141
+ The aligner is callable - calling it with a molecule or display object
142
+ will validate and then align the molecule if validation passes.
143
+ """
144
+
145
+ def __call__(self, mol_or_disp: oechem.OEMolBase | oedepict.OE2DMolDisplay) -> bool:
146
+
147
+ # Get the molecule
148
+ mol = mol_or_disp if isinstance(mol_or_disp, oechem.OEMolBase) else mol_or_disp.GetMolecule()
149
+
150
+ try:
151
+ log.debug("Aligner called for molecule: %s", oechem.OEMolToSmiles(mol) if mol else "None")
152
+ except TypeError:
153
+ log.debug("Aligner called for molecule: %s", mol)
154
+
155
+ # If the molecule validates against the aligner
156
+ if self.validate(mol):
157
+ result = self.align(mol)
158
+ log.debug("Alignment result: %s", result)
159
+ return result
160
+
161
+ log.debug("Molecule failed validation, skipping alignment")
162
+ return False
163
+
164
+ @abstractmethod
165
+ def align(self, mol: oechem.OEMolBase) -> bool:
166
+ """Align the molecule to the reference.
167
+
168
+ :param mol: Molecule to align (will be modified in place).
169
+ :returns: True if alignment was successful.
170
+ """
171
+ raise NotImplementedError
172
+
173
+ @abstractmethod
174
+ def validate(self, mol: oechem.OEMolBase) -> bool:
175
+ """Validate that the molecule can be aligned.
176
+
177
+ :param mol: Molecule to validate.
178
+ :returns: True if the molecule can be aligned.
179
+ """
180
+ raise NotImplementedError
181
+
182
+
183
+ class OESubSearchAligner(Aligner):
184
+ """Aligner using substructure search for 2D molecule alignment."""
185
+
186
+ def __init__(self, ref: oechem.OESubSearch | oechem.OEMolBase | str, **_kwargs):
187
+ """Create a substructure-based aligner.
188
+
189
+ :param ref: Reference for alignment. Can be:
190
+
191
+ - ``OESubSearch``: Pre-configured substructure search object.
192
+ - ``OEMolBase``: Molecule to use as substructure pattern.
193
+ - ``str``: SMARTS pattern string.
194
+
195
+ :param _kwargs: Additional keyword arguments (ignored, for API compatibility).
196
+ """
197
+ # Reference molecule with 2D coordinates
198
+ self.refmol = None
199
+
200
+ if isinstance(ref, (oechem.OESubSearch, str)):
201
+ self.ss = oechem.OESubSearch(ref)
202
+
203
+ else:
204
+ self.refmol = oechem.OEGraphMol(ref)
205
+ # Ensure the reference molecule has proper 2D depiction coordinates
206
+ oedepict.OEPrepareDepiction(self.refmol, False)
207
+ self.ss = oechem.OESubSearch(self.refmol, oechem.OEExprOpts_DefaultAtoms, oechem.OEExprOpts_DefaultBonds)
208
+
209
+ def validate(self, mol: oechem.OEMolBase) -> bool:
210
+ """
211
+ Validate that the molecule has a match to this substructure search.
212
+
213
+ :param mol: Molecule to search.
214
+ :returns: True if there is a match to this substructure search.
215
+ """
216
+ oechem.OEPrepareSearch(mol, self.ss)
217
+ return self.ss.SingleMatch(mol)
218
+
219
+ def align(self, mol: oechem.OEMolBase) -> bool:
220
+ """
221
+ Align molecule to the substructure pattern.
222
+
223
+ :param mol: Molecule to align.
224
+ :returns: True if the alignment was successful.
225
+ """
226
+ oechem.OEPrepareSearch(mol, self.ss)
227
+ alignres = oedepict.OEPrepareAlignedDepiction(mol, self.ss)
228
+ result = alignres.IsValid()
229
+ log.debug("OEPrepareAlignedDepiction (substructure) returned: %s", result)
230
+ return result
231
+
232
+
233
+ class OEMCSSearchAligner(Aligner):
234
+ """Aligner using Maximum Common Substructure (MCS) search for 2D molecule alignment."""
235
+
236
+ def __init__(
237
+ self,
238
+ ref: oechem.OEMCSSearch | oechem.OEMolBase,
239
+ *,
240
+ func: Literal["atoms", "bonds", "atoms_and_cycles", "bonds_and_cycles"] = "bonds_and_cycles",
241
+ min_atoms: int = 1,
242
+ **_kwargs
243
+ ):
244
+ """Create an MCS-based aligner.
245
+
246
+ :param ref: Reference for alignment. Can be:
247
+
248
+ - ``OEMCSSearch``: Pre-configured MCS search object.
249
+ - ``OEMolBase``: Reference molecule for MCS calculation.
250
+
251
+ :param func: MCS evaluation function to use:
252
+
253
+ - ``"atoms"``: Maximize atom count.
254
+ - ``"bonds"``: Maximize bond count.
255
+ - ``"atoms_and_cycles"``: Maximize atoms while preserving complete cycles.
256
+ - ``"bonds_and_cycles"``: Maximize bonds while preserving complete cycles.
257
+
258
+ :param min_atoms: Minimum number of atoms required in the MCS.
259
+ :param _kwargs: Additional keyword arguments (ignored, for API compatibility).
260
+ """
261
+ self.refmol = None
262
+
263
+ if isinstance(ref, oechem.OEMCSSearch):
264
+ self.mcss = oechem.OEMCSSearch(ref)
265
+
266
+ else:
267
+ self.refmol = ref.CreateCopy()
268
+ # Ensure the reference molecule has proper 2D depiction coordinates
269
+ oedepict.OEPrepareDepiction(self.refmol, False)
270
+
271
+ # Currently just using default parameters
272
+ self.mcss = oechem.OEMCSSearch(oechem.OEMCSType_Approximate)
273
+ self.mcss.Init(self.refmol, oechem.OEExprOpts_DefaultAtoms, oechem.OEExprOpts_DefaultBonds)
274
+
275
+ if func == "atoms":
276
+ self.mcss.SetMCSFunc(oechem.OEMCSMaxAtoms())
277
+ elif func == "bonds":
278
+ self.mcss.SetMCSFunc(oechem.OEMCSMaxBonds())
279
+ elif func == "atoms_and_cycles":
280
+ self.mcss.SetMCSFunc(oechem.OEMCSMaxAtomsCompleteCycles())
281
+ elif func == "bonds_and_cycles":
282
+ self.mcss.SetMCSFunc(oechem.OEMCSMaxBondsCompleteCycles())
283
+ else:
284
+ raise ValueError(f'Unknown MCS evaluation function name: {func}')
285
+
286
+ # Other options
287
+ self.mcss.SetMinAtoms(min_atoms)
288
+
289
+ def validate(self, mol: oechem.OEMolBase) -> bool:
290
+ """
291
+ Validate that a maximum common substructure exists in a query molecule.
292
+
293
+ :param mol: Molecule to search.
294
+ :returns: True if the molecule contains the maximum common substructure.
295
+ """
296
+ return self.mcss.SingleMatch(mol)
297
+
298
+ def align(self, mol: oechem.OEMolBase) -> bool:
299
+ """
300
+ Align molecule using the maximum common substructure.
301
+
302
+ :param mol: Molecule to align.
303
+ :returns: True if the alignment was successful.
304
+ """
305
+ alignres = oedepict.OEPrepareAlignedDepiction(mol, self.mcss)
306
+ result = alignres.IsValid()
307
+ log.debug("OEPrepareAlignedDepiction (MCS) returned: %s", result)
308
+ return result
309
+
310
+
311
+ class OEFingerprintAligner(Aligner):
312
+ """Aligner using fingerprint similarity and overlap for 2D molecule alignment.
313
+
314
+ This aligner uses molecular fingerprints to identify common structural
315
+ features between molecules and aligns based on the fingerprint overlap.
316
+ """
317
+
318
+ def __init__(
319
+ self,
320
+ refmol: oechem.OEMolBase,
321
+ *,
322
+ threshold: float = 0.4,
323
+ fptype: str = "tree",
324
+ num_bits: int = 4096,
325
+ min_distance: int = 0,
326
+ max_distance: int = 4,
327
+ atom_type: str | int = oegraphsim.OEFPAtomType_DefaultTreeAtom,
328
+ bond_type: str | int = oegraphsim.OEFPBondType_DefaultTreeBond
329
+ ):
330
+ """Create a fingerprint-based aligner.
331
+
332
+ :param refmol: Reference molecule for alignment.
333
+ :param threshold: Minimum Tanimoto similarity required to attempt alignment.
334
+ :param fptype: Fingerprint type ("path", "circular", or "tree").
335
+ :param num_bits: Number of bits in the fingerprint.
336
+ :param min_distance: Minimum path/radius distance for fingerprint.
337
+ :param max_distance: Maximum path/radius distance for fingerprint.
338
+ :param atom_type: Atom type for fingerprint generation. Can be an integer
339
+ constant or a string name (e.g., "default", "aromaticity").
340
+ :param bond_type: Bond type for fingerprint generation. Can be an integer
341
+ constant or a string name (e.g., "default", "inring").
342
+ """
343
+ # Similarity threshold to apply alignment
344
+ self.threshold = threshold
345
+
346
+ # Fingerprint maker
347
+ self.make_fp = fingerprint_maker(
348
+ fptype=fptype,
349
+ num_bits=num_bits,
350
+ min_distance=min_distance,
351
+ max_distance=max_distance,
352
+ atom_type=atom_type,
353
+ bond_type=bond_type
354
+
355
+ )
356
+
357
+ # Reference molecule and fingerprint
358
+ self.refmol = oechem.OEGraphMol(refmol)
359
+ self.reffp = None
360
+ self.fptype = None
361
+
362
+ if self.refmol.IsValid():
363
+ # Ensure the reference molecule has proper 2D depiction coordinates (but retain existing coordinates)
364
+ oedepict.OEPrepareDepiction(self.refmol, False)
365
+ self.reffp = self.make_fp(self.refmol)
366
+ self.fptype = self.reffp.GetFPTypeBase()
367
+
368
+ else:
369
+ log.warning("Reference molecule for fingerprint-based alignment is not valid")
370
+
371
+ def validate(self, mol: oechem.OEMolBase) -> bool:
372
+ if self.reffp is None:
373
+ return False
374
+
375
+ fp = self.make_fp(mol)
376
+ sim = oegraphsim.OETanimoto(fp, self.reffp)
377
+ log.debug("Fingerprint Tanimoto similarity: %.3f (threshold: %.3f)", sim, self.threshold)
378
+ return sim >= self.threshold
379
+
380
+ def align(self, mol: oechem.OEMolBase) -> bool:
381
+ if self.fptype is None:
382
+ return False
383
+
384
+ overlaps = oegraphsim.OEGetFPOverlap(self.refmol, mol, self.fptype)
385
+ result = oedepict.OEPrepareMultiAlignedDepiction(mol, self.refmol, overlaps)
386
+
387
+ log.debug("OEPrepareMultiAlignedDepiction (FP) returned: %s", result)
388
+ return result
389
+
390
+
391
+ # Aligners registry
392
+ _ALIGNERS = {
393
+ "substructure": OESubSearchAligner,
394
+ "fingerprint": OEFingerprintAligner,
395
+ "mcss": OEMCSSearchAligner
396
+ }
397
+
398
+
399
+ def create_aligner(
400
+ ref: oechem.OEMolBase | oechem.OESubSearch | oechem.OEMCSSearch | str,
401
+ method: Literal["substructure", "ss", "mcss", "fp", "fingerprint"] = None,
402
+ **kwargs
403
+ ) -> Aligner:
404
+ """
405
+ Create an aligner for the given reference.
406
+
407
+ :param ref: Alignment reference - can be a molecule, substructure search, MCS search, or SMARTS string.
408
+ :param method: Alignment method ("substructure"/"ss", "mcss", "fingerprint"/"fp").
409
+ If None, the method is auto-detected based on the reference type.
410
+ :param kwargs: Keyword arguments passed to the aligner constructor.
411
+ :returns: Configured aligner instance.
412
+ """
413
+ # Normalize the method
414
+ if method is not None:
415
+ _method = method.lower()
416
+
417
+ if _method in ("substructure", "ss"):
418
+ method = "substructure"
419
+ elif _method in ("fingerprint", "fp"):
420
+ method = "fingerprint"
421
+ elif _method == "mcss":
422
+ method = "mcss"
423
+ else:
424
+ raise ValueError(f'Unknown depiction alignment method: {method}. Valid options: "substructure"/"ss", "mcss", "fingerprint"/"fp".')
425
+
426
+ # Auto-detect method based on reference type if not specified
427
+ if isinstance(ref, str):
428
+ # SMARTS string - use substructure aligner
429
+ log.debug("Using substructure aligner for SMARTS string alignment reference")
430
+ return OESubSearchAligner(ref, **kwargs)
431
+
432
+ elif isinstance(ref, oechem.OESubSearch):
433
+ log.debug("Using substructure aligner for oechem.OESubSearch alignment reference")
434
+ return OESubSearchAligner(ref, **kwargs)
435
+
436
+ elif isinstance(ref, oechem.OEMCSSearch):
437
+ log.debug("Using MCS aligner for oechem.OEMCSSearch alignment reference")
438
+ return OEMCSSearchAligner(ref, **kwargs)
439
+
440
+ elif isinstance(ref, oechem.OEMolBase):
441
+ # Use specified method or default to fingerprint
442
+ if method == "substructure":
443
+ log.debug("Using substructure aligner for oechem.OEMolBase alignment reference")
444
+ return OESubSearchAligner(ref, **kwargs)
445
+ elif method == "mcss":
446
+ log.debug("Using MCS aligner for oechem.OEMolBase alignment reference")
447
+ return OEMCSSearchAligner(ref, **kwargs)
448
+ else:
449
+ # Default to fingerprint aligner for molecules
450
+ log.debug("Using fingerprint aligner for oechem.OEMolBase alignment reference")
451
+ return OEFingerprintAligner(ref, **kwargs)
452
+
453
+ else:
454
+ raise TypeError(f'Unsupported alignment reference type: {type(ref)}.')