cnotebook 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cnotebook/__init__.py +400 -0
- cnotebook/align.py +454 -0
- cnotebook/context.py +523 -0
- cnotebook/grid/__init__.py +55 -0
- cnotebook/grid/grid.py +1649 -0
- cnotebook/helpers.py +201 -0
- cnotebook/ipython_ext.py +56 -0
- cnotebook/marimo_ext.py +272 -0
- cnotebook/pandas_ext.py +1156 -0
- cnotebook/polars_ext.py +1235 -0
- cnotebook/render.py +200 -0
- cnotebook-2.1.0.dist-info/METADATA +336 -0
- cnotebook-2.1.0.dist-info/RECORD +16 -0
- cnotebook-2.1.0.dist-info/WHEEL +5 -0
- cnotebook-2.1.0.dist-info/licenses/LICENSE +21 -0
- cnotebook-2.1.0.dist-info/top_level.txt +1 -0
cnotebook/align.py
ADDED
|
@@ -0,0 +1,454 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Callable, Literal
|
|
3
|
+
from abc import ABCMeta, abstractmethod
|
|
4
|
+
from openeye import oegraphsim, oechem, oedepict
|
|
5
|
+
|
|
6
|
+
log = logging.getLogger("cnotebook")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
########################################################################################################################
|
|
10
|
+
# Fingerprint generation
|
|
11
|
+
########################################################################################################################
|
|
12
|
+
|
|
13
|
+
# Dynamic creation of a typemap for OpenEye atom type fingerprints
|
|
14
|
+
atom_fp_typemap = dict(
|
|
15
|
+
(x.replace("OEFPAtomType_", "").lower(), getattr(oegraphsim, x))
|
|
16
|
+
for x in list(filter(lambda x: x.startswith("OEFPAtomType_"), dir(oegraphsim)))
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
# Dynamic creation of a typemap for OpenEye bond type fingerprints
|
|
20
|
+
bond_fp_typemap = dict(
|
|
21
|
+
(x.replace("OEFPBondType_", "").lower(), getattr(oegraphsim, x))
|
|
22
|
+
for x in list(filter(lambda x: x.startswith("OEFPBondType_"), dir(oegraphsim)))
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_atom_mask(atom_type):
|
|
27
|
+
"""
|
|
28
|
+
Get the OEFingerprint atom type masks from "|" delimited strings
|
|
29
|
+
|
|
30
|
+
The atom_type string is composed of "|" delimted members from the OEFPAtomType_ namespace. These are
|
|
31
|
+
case-insensitive and only optionally need to be prefixed by "OEFPAtomType_".
|
|
32
|
+
|
|
33
|
+
:param atom_type: Delimited string of OEFPAtomTypes
|
|
34
|
+
:return: Bitmask for OpenEye fingerprint atom types
|
|
35
|
+
:rtype: int
|
|
36
|
+
"""
|
|
37
|
+
atom_mask = oegraphsim.OEFPAtomType_None
|
|
38
|
+
for m in atom_type.split("|"):
|
|
39
|
+
mask = atom_fp_typemap.get(m.strip().lower().replace("oefpatomtype_", ""), None)
|
|
40
|
+
if mask is None:
|
|
41
|
+
raise KeyError(f'{m} is not a known OEAtomFPType')
|
|
42
|
+
atom_mask |= mask
|
|
43
|
+
# Check validity
|
|
44
|
+
if atom_mask == oegraphsim.OEFPAtomType_None:
|
|
45
|
+
raise ValueError("No atom fingerprint types configured")
|
|
46
|
+
return atom_mask
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def get_bond_mask(bond_type):
|
|
50
|
+
"""
|
|
51
|
+
Get the OEFingerprint bond type masks from "|" delimited strings
|
|
52
|
+
|
|
53
|
+
The bond_type string is composed of "|" delimted members from the OEFPBondType_ namespace. These are
|
|
54
|
+
case-insensitive and only optionally need to be prefixed by "OEFPBondType_".
|
|
55
|
+
|
|
56
|
+
:param bond_type: Delimited string of OEFPBondTypes
|
|
57
|
+
:return: Bitmask for OpenEye fingerprint bond types
|
|
58
|
+
:rtype: int
|
|
59
|
+
"""
|
|
60
|
+
# Bond mask
|
|
61
|
+
bond_mask = oegraphsim.OEFPBondType_None
|
|
62
|
+
for m in bond_type.split("|"):
|
|
63
|
+
mask = bond_fp_typemap.get(m.strip().lower().replace("oefpbondtype_", ""), None)
|
|
64
|
+
if mask is None:
|
|
65
|
+
raise KeyError(f'{m} is not a known OEBondFPType')
|
|
66
|
+
bond_mask |= mask
|
|
67
|
+
# Check validity
|
|
68
|
+
if bond_mask == oegraphsim.OEFPBondType_None:
|
|
69
|
+
raise ValueError("No bond fingerprint types configured")
|
|
70
|
+
return bond_mask
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def fingerprint_maker(
|
|
74
|
+
fptype: str,
|
|
75
|
+
num_bits: int,
|
|
76
|
+
min_distance: int,
|
|
77
|
+
max_distance: int,
|
|
78
|
+
atom_type: str | int,
|
|
79
|
+
bond_type: str | int
|
|
80
|
+
) -> Callable[[oechem.OEMolBase], oegraphsim.OEFingerPrint]:
|
|
81
|
+
"""
|
|
82
|
+
Create a function that generates a fingerprint from a molecule
|
|
83
|
+
:param fptype: Fingerprint type
|
|
84
|
+
:param num_bits: Number of bits in the fingerprint
|
|
85
|
+
:param min_distance: Minimum distance/radius for path/circular/tree
|
|
86
|
+
:param max_distance: Maximum distance/radius for path/circular/tree
|
|
87
|
+
:param atom_type: Atom type string delimited by "|" OR int bitmask from the oegraphsim.OEFPAtomType_ namespace
|
|
88
|
+
:param bond_type: Bond type string delimited by "|" OR int bitmask from the oegraphsim.OEFPBondType_ namespace
|
|
89
|
+
:return: Function that generates a fingerprint from a molecule
|
|
90
|
+
"""
|
|
91
|
+
# Be forgiving with case
|
|
92
|
+
_fptype = fptype.lower()
|
|
93
|
+
|
|
94
|
+
# Convert atom type and bond type strings to masks if necessary
|
|
95
|
+
atom_mask = get_atom_mask(atom_type) if isinstance(atom_type, str) else atom_type
|
|
96
|
+
bond_mask = get_bond_mask(bond_type) if isinstance(bond_type, str) else bond_type
|
|
97
|
+
if _fptype == "path":
|
|
98
|
+
def _make_path_fp(mol):
|
|
99
|
+
fp = oegraphsim.OEFingerPrint()
|
|
100
|
+
oegraphsim.OEMakePathFP(fp, mol, num_bits, min_distance, max_distance, atom_mask, bond_mask)
|
|
101
|
+
return fp
|
|
102
|
+
return _make_path_fp
|
|
103
|
+
elif _fptype == "circular":
|
|
104
|
+
def _make_circular_fp(mol):
|
|
105
|
+
fp = oegraphsim.OEFingerPrint()
|
|
106
|
+
oegraphsim.OEMakeCircularFP(fp, mol, num_bits, min_distance, max_distance, atom_mask, bond_mask)
|
|
107
|
+
return fp
|
|
108
|
+
return _make_circular_fp
|
|
109
|
+
elif _fptype == "tree":
|
|
110
|
+
def _make_tree_fp(mol):
|
|
111
|
+
fp = oegraphsim.OEFingerPrint()
|
|
112
|
+
oegraphsim.OEMakeTreeFP(fp, mol, num_bits, min_distance, max_distance, atom_mask, bond_mask)
|
|
113
|
+
return fp
|
|
114
|
+
return _make_tree_fp
|
|
115
|
+
elif _fptype == "maccs":
|
|
116
|
+
def _make_maccs(mol):
|
|
117
|
+
fp = oegraphsim.OEFingerPrint()
|
|
118
|
+
oegraphsim.OEMakeMACCS166FP(fp, mol)
|
|
119
|
+
return fp
|
|
120
|
+
return _make_maccs
|
|
121
|
+
elif _fptype == "lingo":
|
|
122
|
+
def _make_lingo(mol):
|
|
123
|
+
fp = oegraphsim.OEFingerPrint()
|
|
124
|
+
oegraphsim.OEMakeLingoFP(fp, mol)
|
|
125
|
+
return fp
|
|
126
|
+
return _make_lingo
|
|
127
|
+
raise KeyError(f'Unknown fingerprint type {fptype} (valid: path / tree / circular / maccs / lingo)')
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
########################################################################################################################
|
|
131
|
+
# Small molecule 2D structure aligners
|
|
132
|
+
########################################################################################################################
|
|
133
|
+
|
|
134
|
+
class Aligner(metaclass=ABCMeta):
|
|
135
|
+
"""Abstract base class for 2D molecule aligners.
|
|
136
|
+
|
|
137
|
+
Aligners transform molecule 2D coordinates to align with a reference
|
|
138
|
+
structure or pattern. Subclasses must implement :meth:`validate` and
|
|
139
|
+
:meth:`align` methods.
|
|
140
|
+
|
|
141
|
+
The aligner is callable - calling it with a molecule or display object
|
|
142
|
+
will validate and then align the molecule if validation passes.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
def __call__(self, mol_or_disp: oechem.OEMolBase | oedepict.OE2DMolDisplay) -> bool:
|
|
146
|
+
|
|
147
|
+
# Get the molecule
|
|
148
|
+
mol = mol_or_disp if isinstance(mol_or_disp, oechem.OEMolBase) else mol_or_disp.GetMolecule()
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
log.debug("Aligner called for molecule: %s", oechem.OEMolToSmiles(mol) if mol else "None")
|
|
152
|
+
except TypeError:
|
|
153
|
+
log.debug("Aligner called for molecule: %s", mol)
|
|
154
|
+
|
|
155
|
+
# If the molecule validates against the aligner
|
|
156
|
+
if self.validate(mol):
|
|
157
|
+
result = self.align(mol)
|
|
158
|
+
log.debug("Alignment result: %s", result)
|
|
159
|
+
return result
|
|
160
|
+
|
|
161
|
+
log.debug("Molecule failed validation, skipping alignment")
|
|
162
|
+
return False
|
|
163
|
+
|
|
164
|
+
@abstractmethod
|
|
165
|
+
def align(self, mol: oechem.OEMolBase) -> bool:
|
|
166
|
+
"""Align the molecule to the reference.
|
|
167
|
+
|
|
168
|
+
:param mol: Molecule to align (will be modified in place).
|
|
169
|
+
:returns: True if alignment was successful.
|
|
170
|
+
"""
|
|
171
|
+
raise NotImplementedError
|
|
172
|
+
|
|
173
|
+
@abstractmethod
|
|
174
|
+
def validate(self, mol: oechem.OEMolBase) -> bool:
|
|
175
|
+
"""Validate that the molecule can be aligned.
|
|
176
|
+
|
|
177
|
+
:param mol: Molecule to validate.
|
|
178
|
+
:returns: True if the molecule can be aligned.
|
|
179
|
+
"""
|
|
180
|
+
raise NotImplementedError
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class OESubSearchAligner(Aligner):
|
|
184
|
+
"""Aligner using substructure search for 2D molecule alignment."""
|
|
185
|
+
|
|
186
|
+
def __init__(self, ref: oechem.OESubSearch | oechem.OEMolBase | str, **_kwargs):
|
|
187
|
+
"""Create a substructure-based aligner.
|
|
188
|
+
|
|
189
|
+
:param ref: Reference for alignment. Can be:
|
|
190
|
+
|
|
191
|
+
- ``OESubSearch``: Pre-configured substructure search object.
|
|
192
|
+
- ``OEMolBase``: Molecule to use as substructure pattern.
|
|
193
|
+
- ``str``: SMARTS pattern string.
|
|
194
|
+
|
|
195
|
+
:param _kwargs: Additional keyword arguments (ignored, for API compatibility).
|
|
196
|
+
"""
|
|
197
|
+
# Reference molecule with 2D coordinates
|
|
198
|
+
self.refmol = None
|
|
199
|
+
|
|
200
|
+
if isinstance(ref, (oechem.OESubSearch, str)):
|
|
201
|
+
self.ss = oechem.OESubSearch(ref)
|
|
202
|
+
|
|
203
|
+
else:
|
|
204
|
+
self.refmol = oechem.OEGraphMol(ref)
|
|
205
|
+
# Ensure the reference molecule has proper 2D depiction coordinates
|
|
206
|
+
oedepict.OEPrepareDepiction(self.refmol, False)
|
|
207
|
+
self.ss = oechem.OESubSearch(self.refmol, oechem.OEExprOpts_DefaultAtoms, oechem.OEExprOpts_DefaultBonds)
|
|
208
|
+
|
|
209
|
+
def validate(self, mol: oechem.OEMolBase) -> bool:
|
|
210
|
+
"""
|
|
211
|
+
Validate that the molecule has a match to this substructure search.
|
|
212
|
+
|
|
213
|
+
:param mol: Molecule to search.
|
|
214
|
+
:returns: True if there is a match to this substructure search.
|
|
215
|
+
"""
|
|
216
|
+
oechem.OEPrepareSearch(mol, self.ss)
|
|
217
|
+
return self.ss.SingleMatch(mol)
|
|
218
|
+
|
|
219
|
+
def align(self, mol: oechem.OEMolBase) -> bool:
|
|
220
|
+
"""
|
|
221
|
+
Align molecule to the substructure pattern.
|
|
222
|
+
|
|
223
|
+
:param mol: Molecule to align.
|
|
224
|
+
:returns: True if the alignment was successful.
|
|
225
|
+
"""
|
|
226
|
+
oechem.OEPrepareSearch(mol, self.ss)
|
|
227
|
+
alignres = oedepict.OEPrepareAlignedDepiction(mol, self.ss)
|
|
228
|
+
result = alignres.IsValid()
|
|
229
|
+
log.debug("OEPrepareAlignedDepiction (substructure) returned: %s", result)
|
|
230
|
+
return result
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class OEMCSSearchAligner(Aligner):
|
|
234
|
+
"""Aligner using Maximum Common Substructure (MCS) search for 2D molecule alignment."""
|
|
235
|
+
|
|
236
|
+
def __init__(
|
|
237
|
+
self,
|
|
238
|
+
ref: oechem.OEMCSSearch | oechem.OEMolBase,
|
|
239
|
+
*,
|
|
240
|
+
func: Literal["atoms", "bonds", "atoms_and_cycles", "bonds_and_cycles"] = "bonds_and_cycles",
|
|
241
|
+
min_atoms: int = 1,
|
|
242
|
+
**_kwargs
|
|
243
|
+
):
|
|
244
|
+
"""Create an MCS-based aligner.
|
|
245
|
+
|
|
246
|
+
:param ref: Reference for alignment. Can be:
|
|
247
|
+
|
|
248
|
+
- ``OEMCSSearch``: Pre-configured MCS search object.
|
|
249
|
+
- ``OEMolBase``: Reference molecule for MCS calculation.
|
|
250
|
+
|
|
251
|
+
:param func: MCS evaluation function to use:
|
|
252
|
+
|
|
253
|
+
- ``"atoms"``: Maximize atom count.
|
|
254
|
+
- ``"bonds"``: Maximize bond count.
|
|
255
|
+
- ``"atoms_and_cycles"``: Maximize atoms while preserving complete cycles.
|
|
256
|
+
- ``"bonds_and_cycles"``: Maximize bonds while preserving complete cycles.
|
|
257
|
+
|
|
258
|
+
:param min_atoms: Minimum number of atoms required in the MCS.
|
|
259
|
+
:param _kwargs: Additional keyword arguments (ignored, for API compatibility).
|
|
260
|
+
"""
|
|
261
|
+
self.refmol = None
|
|
262
|
+
|
|
263
|
+
if isinstance(ref, oechem.OEMCSSearch):
|
|
264
|
+
self.mcss = oechem.OEMCSSearch(ref)
|
|
265
|
+
|
|
266
|
+
else:
|
|
267
|
+
self.refmol = ref.CreateCopy()
|
|
268
|
+
# Ensure the reference molecule has proper 2D depiction coordinates
|
|
269
|
+
oedepict.OEPrepareDepiction(self.refmol, False)
|
|
270
|
+
|
|
271
|
+
# Currently just using default parameters
|
|
272
|
+
self.mcss = oechem.OEMCSSearch(oechem.OEMCSType_Approximate)
|
|
273
|
+
self.mcss.Init(self.refmol, oechem.OEExprOpts_DefaultAtoms, oechem.OEExprOpts_DefaultBonds)
|
|
274
|
+
|
|
275
|
+
if func == "atoms":
|
|
276
|
+
self.mcss.SetMCSFunc(oechem.OEMCSMaxAtoms())
|
|
277
|
+
elif func == "bonds":
|
|
278
|
+
self.mcss.SetMCSFunc(oechem.OEMCSMaxBonds())
|
|
279
|
+
elif func == "atoms_and_cycles":
|
|
280
|
+
self.mcss.SetMCSFunc(oechem.OEMCSMaxAtomsCompleteCycles())
|
|
281
|
+
elif func == "bonds_and_cycles":
|
|
282
|
+
self.mcss.SetMCSFunc(oechem.OEMCSMaxBondsCompleteCycles())
|
|
283
|
+
else:
|
|
284
|
+
raise ValueError(f'Unknown MCS evaluation function name: {func}')
|
|
285
|
+
|
|
286
|
+
# Other options
|
|
287
|
+
self.mcss.SetMinAtoms(min_atoms)
|
|
288
|
+
|
|
289
|
+
def validate(self, mol: oechem.OEMolBase) -> bool:
|
|
290
|
+
"""
|
|
291
|
+
Validate that a maximum common substructure exists in a query molecule.
|
|
292
|
+
|
|
293
|
+
:param mol: Molecule to search.
|
|
294
|
+
:returns: True if the molecule contains the maximum common substructure.
|
|
295
|
+
"""
|
|
296
|
+
return self.mcss.SingleMatch(mol)
|
|
297
|
+
|
|
298
|
+
def align(self, mol: oechem.OEMolBase) -> bool:
|
|
299
|
+
"""
|
|
300
|
+
Align molecule using the maximum common substructure.
|
|
301
|
+
|
|
302
|
+
:param mol: Molecule to align.
|
|
303
|
+
:returns: True if the alignment was successful.
|
|
304
|
+
"""
|
|
305
|
+
alignres = oedepict.OEPrepareAlignedDepiction(mol, self.mcss)
|
|
306
|
+
result = alignres.IsValid()
|
|
307
|
+
log.debug("OEPrepareAlignedDepiction (MCS) returned: %s", result)
|
|
308
|
+
return result
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
class OEFingerprintAligner(Aligner):
|
|
312
|
+
"""Aligner using fingerprint similarity and overlap for 2D molecule alignment.
|
|
313
|
+
|
|
314
|
+
This aligner uses molecular fingerprints to identify common structural
|
|
315
|
+
features between molecules and aligns based on the fingerprint overlap.
|
|
316
|
+
"""
|
|
317
|
+
|
|
318
|
+
def __init__(
|
|
319
|
+
self,
|
|
320
|
+
refmol: oechem.OEMolBase,
|
|
321
|
+
*,
|
|
322
|
+
threshold: float = 0.4,
|
|
323
|
+
fptype: str = "tree",
|
|
324
|
+
num_bits: int = 4096,
|
|
325
|
+
min_distance: int = 0,
|
|
326
|
+
max_distance: int = 4,
|
|
327
|
+
atom_type: str | int = oegraphsim.OEFPAtomType_DefaultTreeAtom,
|
|
328
|
+
bond_type: str | int = oegraphsim.OEFPBondType_DefaultTreeBond
|
|
329
|
+
):
|
|
330
|
+
"""Create a fingerprint-based aligner.
|
|
331
|
+
|
|
332
|
+
:param refmol: Reference molecule for alignment.
|
|
333
|
+
:param threshold: Minimum Tanimoto similarity required to attempt alignment.
|
|
334
|
+
:param fptype: Fingerprint type ("path", "circular", or "tree").
|
|
335
|
+
:param num_bits: Number of bits in the fingerprint.
|
|
336
|
+
:param min_distance: Minimum path/radius distance for fingerprint.
|
|
337
|
+
:param max_distance: Maximum path/radius distance for fingerprint.
|
|
338
|
+
:param atom_type: Atom type for fingerprint generation. Can be an integer
|
|
339
|
+
constant or a string name (e.g., "default", "aromaticity").
|
|
340
|
+
:param bond_type: Bond type for fingerprint generation. Can be an integer
|
|
341
|
+
constant or a string name (e.g., "default", "inring").
|
|
342
|
+
"""
|
|
343
|
+
# Similarity threshold to apply alignment
|
|
344
|
+
self.threshold = threshold
|
|
345
|
+
|
|
346
|
+
# Fingerprint maker
|
|
347
|
+
self.make_fp = fingerprint_maker(
|
|
348
|
+
fptype=fptype,
|
|
349
|
+
num_bits=num_bits,
|
|
350
|
+
min_distance=min_distance,
|
|
351
|
+
max_distance=max_distance,
|
|
352
|
+
atom_type=atom_type,
|
|
353
|
+
bond_type=bond_type
|
|
354
|
+
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
# Reference molecule and fingerprint
|
|
358
|
+
self.refmol = oechem.OEGraphMol(refmol)
|
|
359
|
+
self.reffp = None
|
|
360
|
+
self.fptype = None
|
|
361
|
+
|
|
362
|
+
if self.refmol.IsValid():
|
|
363
|
+
# Ensure the reference molecule has proper 2D depiction coordinates (but retain existing coordinates)
|
|
364
|
+
oedepict.OEPrepareDepiction(self.refmol, False)
|
|
365
|
+
self.reffp = self.make_fp(self.refmol)
|
|
366
|
+
self.fptype = self.reffp.GetFPTypeBase()
|
|
367
|
+
|
|
368
|
+
else:
|
|
369
|
+
log.warning("Reference molecule for fingerprint-based alignment is not valid")
|
|
370
|
+
|
|
371
|
+
def validate(self, mol: oechem.OEMolBase) -> bool:
|
|
372
|
+
if self.reffp is None:
|
|
373
|
+
return False
|
|
374
|
+
|
|
375
|
+
fp = self.make_fp(mol)
|
|
376
|
+
sim = oegraphsim.OETanimoto(fp, self.reffp)
|
|
377
|
+
log.debug("Fingerprint Tanimoto similarity: %.3f (threshold: %.3f)", sim, self.threshold)
|
|
378
|
+
return sim >= self.threshold
|
|
379
|
+
|
|
380
|
+
def align(self, mol: oechem.OEMolBase) -> bool:
|
|
381
|
+
if self.fptype is None:
|
|
382
|
+
return False
|
|
383
|
+
|
|
384
|
+
overlaps = oegraphsim.OEGetFPOverlap(self.refmol, mol, self.fptype)
|
|
385
|
+
result = oedepict.OEPrepareMultiAlignedDepiction(mol, self.refmol, overlaps)
|
|
386
|
+
|
|
387
|
+
log.debug("OEPrepareMultiAlignedDepiction (FP) returned: %s", result)
|
|
388
|
+
return result
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
# Aligners registry
|
|
392
|
+
_ALIGNERS = {
|
|
393
|
+
"substructure": OESubSearchAligner,
|
|
394
|
+
"fingerprint": OEFingerprintAligner,
|
|
395
|
+
"mcss": OEMCSSearchAligner
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def create_aligner(
|
|
400
|
+
ref: oechem.OEMolBase | oechem.OESubSearch | oechem.OEMCSSearch | str,
|
|
401
|
+
method: Literal["substructure", "ss", "mcss", "fp", "fingerprint"] = None,
|
|
402
|
+
**kwargs
|
|
403
|
+
) -> Aligner:
|
|
404
|
+
"""
|
|
405
|
+
Create an aligner for the given reference.
|
|
406
|
+
|
|
407
|
+
:param ref: Alignment reference - can be a molecule, substructure search, MCS search, or SMARTS string.
|
|
408
|
+
:param method: Alignment method ("substructure"/"ss", "mcss", "fingerprint"/"fp").
|
|
409
|
+
If None, the method is auto-detected based on the reference type.
|
|
410
|
+
:param kwargs: Keyword arguments passed to the aligner constructor.
|
|
411
|
+
:returns: Configured aligner instance.
|
|
412
|
+
"""
|
|
413
|
+
# Normalize the method
|
|
414
|
+
if method is not None:
|
|
415
|
+
_method = method.lower()
|
|
416
|
+
|
|
417
|
+
if _method in ("substructure", "ss"):
|
|
418
|
+
method = "substructure"
|
|
419
|
+
elif _method in ("fingerprint", "fp"):
|
|
420
|
+
method = "fingerprint"
|
|
421
|
+
elif _method == "mcss":
|
|
422
|
+
method = "mcss"
|
|
423
|
+
else:
|
|
424
|
+
raise ValueError(f'Unknown depiction alignment method: {method}. Valid options: "substructure"/"ss", "mcss", "fingerprint"/"fp".')
|
|
425
|
+
|
|
426
|
+
# Auto-detect method based on reference type if not specified
|
|
427
|
+
if isinstance(ref, str):
|
|
428
|
+
# SMARTS string - use substructure aligner
|
|
429
|
+
log.debug("Using substructure aligner for SMARTS string alignment reference")
|
|
430
|
+
return OESubSearchAligner(ref, **kwargs)
|
|
431
|
+
|
|
432
|
+
elif isinstance(ref, oechem.OESubSearch):
|
|
433
|
+
log.debug("Using substructure aligner for oechem.OESubSearch alignment reference")
|
|
434
|
+
return OESubSearchAligner(ref, **kwargs)
|
|
435
|
+
|
|
436
|
+
elif isinstance(ref, oechem.OEMCSSearch):
|
|
437
|
+
log.debug("Using MCS aligner for oechem.OEMCSSearch alignment reference")
|
|
438
|
+
return OEMCSSearchAligner(ref, **kwargs)
|
|
439
|
+
|
|
440
|
+
elif isinstance(ref, oechem.OEMolBase):
|
|
441
|
+
# Use specified method or default to fingerprint
|
|
442
|
+
if method == "substructure":
|
|
443
|
+
log.debug("Using substructure aligner for oechem.OEMolBase alignment reference")
|
|
444
|
+
return OESubSearchAligner(ref, **kwargs)
|
|
445
|
+
elif method == "mcss":
|
|
446
|
+
log.debug("Using MCS aligner for oechem.OEMolBase alignment reference")
|
|
447
|
+
return OEMCSSearchAligner(ref, **kwargs)
|
|
448
|
+
else:
|
|
449
|
+
# Default to fingerprint aligner for molecules
|
|
450
|
+
log.debug("Using fingerprint aligner for oechem.OEMolBase alignment reference")
|
|
451
|
+
return OEFingerprintAligner(ref, **kwargs)
|
|
452
|
+
|
|
453
|
+
else:
|
|
454
|
+
raise TypeError(f'Unsupported alignment reference type: {type(ref)}.')
|