cnotebook 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cnotebook/__init__.py +104 -0
- cnotebook/align.py +390 -0
- cnotebook/context.py +491 -0
- cnotebook/helpers.py +69 -0
- cnotebook/ipython_ext.py +255 -0
- cnotebook/marimo_ext.py +34 -0
- cnotebook/pandas_ext.py +900 -0
- cnotebook/render.py +198 -0
- cnotebook-1.0.1.dist-info/METADATA +275 -0
- cnotebook-1.0.1.dist-info/RECORD +13 -0
- cnotebook-1.0.1.dist-info/WHEEL +5 -0
- cnotebook-1.0.1.dist-info/licenses/LICENSE +21 -0
- cnotebook-1.0.1.dist-info/top_level.txt +1 -0
cnotebook/__init__.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from .pandas_ext import render_dataframe
|
|
3
|
+
from .context import cnotebook_context
|
|
4
|
+
from .ipython_ext import (
|
|
5
|
+
register_ipython_formatters as _register_ipython_formatters,
|
|
6
|
+
render_molecule_grid
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
# Only import formatter registration from the Pandas module, otherwise have users import functionality from there
|
|
10
|
+
# to avoid confusion
|
|
11
|
+
from cnotebook.pandas_ext import register_pandas_formatters as _register_pandas_formatters
|
|
12
|
+
|
|
13
|
+
__version__ = '1.0.1'
|
|
14
|
+
|
|
15
|
+
###########################################
|
|
16
|
+
|
|
17
|
+
def is_jupyter_notebook() -> bool:
|
|
18
|
+
# noinspection PyBroadException
|
|
19
|
+
try:
|
|
20
|
+
from IPython import get_ipython
|
|
21
|
+
shell = get_ipython().__class__.__name__
|
|
22
|
+
|
|
23
|
+
if shell == 'ZMQInteractiveShell':
|
|
24
|
+
return True # Jupyter notebook or qtconsole
|
|
25
|
+
|
|
26
|
+
elif shell == 'TerminalInteractiveShell':
|
|
27
|
+
return False # Terminal running IPython
|
|
28
|
+
|
|
29
|
+
else:
|
|
30
|
+
return False # Other type (?)
|
|
31
|
+
|
|
32
|
+
except Exception:
|
|
33
|
+
return False # Probably standard Python interpreter
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def is_marimo_notebook() -> bool:
|
|
37
|
+
# noinspection PyBroadException
|
|
38
|
+
try:
|
|
39
|
+
# noinspection PyUnresolvedReferences
|
|
40
|
+
import marimo as mo
|
|
41
|
+
return mo.running_in_notebook()
|
|
42
|
+
|
|
43
|
+
except Exception:
|
|
44
|
+
return False
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# Register the formatters
|
|
48
|
+
# Note: All registration function calls are idempotent
|
|
49
|
+
if is_jupyter_notebook():
|
|
50
|
+
_register_ipython_formatters()
|
|
51
|
+
_register_pandas_formatters()
|
|
52
|
+
|
|
53
|
+
elif is_marimo_notebook():
|
|
54
|
+
from . import marimo_ext
|
|
55
|
+
|
|
56
|
+
# Configure logging
|
|
57
|
+
log = logging.getLogger("cnotebook")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class LevelSpecificFormatter(logging.Formatter):
|
|
61
|
+
"""
|
|
62
|
+
A logging formatter
|
|
63
|
+
"""
|
|
64
|
+
NORMAL_FORMAT = "%(message)s"
|
|
65
|
+
DEBUG_FORMAT = "%(levelname)s: %(message)s"
|
|
66
|
+
|
|
67
|
+
def __init__(self):
|
|
68
|
+
super().__init__(fmt=self.NORMAL_FORMAT, datefmt=None, style='%')
|
|
69
|
+
|
|
70
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
71
|
+
"""
|
|
72
|
+
Format a log record for printing
|
|
73
|
+
:param record: Record to format
|
|
74
|
+
:return: Formatted record
|
|
75
|
+
"""
|
|
76
|
+
if record.levelno == logging.DEBUG:
|
|
77
|
+
self._style._fmt = self.DEBUG_FORMAT
|
|
78
|
+
else:
|
|
79
|
+
self._style._fmt = self.NORMAL_FORMAT
|
|
80
|
+
|
|
81
|
+
# Call the original formatter class to do the grunt work
|
|
82
|
+
result = logging.Formatter.format(self, record)
|
|
83
|
+
|
|
84
|
+
return result
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
############################
|
|
88
|
+
# Example of how to use it #
|
|
89
|
+
############################
|
|
90
|
+
|
|
91
|
+
ch = logging.StreamHandler()
|
|
92
|
+
ch.setLevel(logging.DEBUG)
|
|
93
|
+
|
|
94
|
+
ch.setFormatter(LevelSpecificFormatter())
|
|
95
|
+
log.addHandler(ch)
|
|
96
|
+
|
|
97
|
+
log.setLevel(logging.INFO)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def enable_debugging():
|
|
101
|
+
"""
|
|
102
|
+
Convenience function for enabling the debug log
|
|
103
|
+
"""
|
|
104
|
+
log.setLevel(logging.DEBUG)
|
cnotebook/align.py
ADDED
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Callable, Literal
|
|
3
|
+
from abc import ABCMeta, abstractmethod
|
|
4
|
+
from openeye import oegraphsim, oechem, oedepict
|
|
5
|
+
|
|
6
|
+
log = logging.getLogger("cnotebook")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
########################################################################################################################
|
|
10
|
+
# Fingerprint generation
|
|
11
|
+
########################################################################################################################
|
|
12
|
+
|
|
13
|
+
# Dynamic creation of a typemap for OpenEye atom type fingerprints
|
|
14
|
+
atom_fp_typemap = dict(
|
|
15
|
+
(x.replace("OEFPAtomType_", "").lower(), getattr(oegraphsim, x))
|
|
16
|
+
for x in list(filter(lambda x: x.startswith("OEFPAtomType_"), dir(oegraphsim)))
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
# Dynamic creation of a typemap for OpenEye bond type fingerprints
|
|
20
|
+
bond_fp_typemap = dict(
|
|
21
|
+
(x.replace("OEFPBondType_", "").lower(), getattr(oegraphsim, x))
|
|
22
|
+
for x in list(filter(lambda x: x.startswith("OEFPBondType_"), dir(oegraphsim)))
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_atom_mask(atom_type):
|
|
27
|
+
"""
|
|
28
|
+
Get the OEFingerprint atom type masks from "|" delimited strings
|
|
29
|
+
|
|
30
|
+
The atom_type string is composed of "|" delimted members from the OEFPAtomType_ namespace. These are
|
|
31
|
+
case-insensitive and only optionally need to be prefixed by "OEFPAtomType_".
|
|
32
|
+
|
|
33
|
+
:param atom_type: Delimited string of OEFPAtomTypes
|
|
34
|
+
:return: Bitmask for OpenEye fingerprint atom types
|
|
35
|
+
:rtype: int
|
|
36
|
+
"""
|
|
37
|
+
atom_mask = oegraphsim.OEFPAtomType_None
|
|
38
|
+
for m in atom_type.split("|"):
|
|
39
|
+
mask = atom_fp_typemap.get(m.strip().lower().replace("oefpatomtype_", ""), None)
|
|
40
|
+
if mask is None:
|
|
41
|
+
raise KeyError(f'{m} is not a known OEAtomFPType')
|
|
42
|
+
atom_mask |= mask
|
|
43
|
+
# Check validity
|
|
44
|
+
if atom_mask == oegraphsim.OEFPAtomType_None:
|
|
45
|
+
raise ValueError("No atom fingerprint types configured")
|
|
46
|
+
return atom_mask
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def get_bond_mask(bond_type):
|
|
50
|
+
"""
|
|
51
|
+
Get the OEFingerprint bond type masks from "|" delimited strings
|
|
52
|
+
|
|
53
|
+
The bond_type string is composed of "|" delimted members from the OEFPBondType_ namespace. These are
|
|
54
|
+
case-insensitive and only optionally need to be prefixed by "OEFPBondType_".
|
|
55
|
+
|
|
56
|
+
:param bond_type: Delimited string of OEFPBondTypes
|
|
57
|
+
:return: Bitmask for OpenEye fingerprint bond types
|
|
58
|
+
:rtype: int
|
|
59
|
+
"""
|
|
60
|
+
# Bond mask
|
|
61
|
+
bond_mask = oegraphsim.OEFPBondType_None
|
|
62
|
+
for m in bond_type.split("|"):
|
|
63
|
+
mask = bond_fp_typemap.get(m.strip().lower().replace("oefpbondtype_", ""), None)
|
|
64
|
+
if mask is None:
|
|
65
|
+
raise KeyError(f'{m} is not a known OEBondFPType')
|
|
66
|
+
bond_mask |= mask
|
|
67
|
+
# Check validity
|
|
68
|
+
if bond_mask == oegraphsim.OEFPBondType_None:
|
|
69
|
+
raise ValueError("No bond fingerprint types configured")
|
|
70
|
+
return bond_mask
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def fingerprint_maker(
|
|
74
|
+
fptype: str,
|
|
75
|
+
num_bits: int,
|
|
76
|
+
min_distance: int,
|
|
77
|
+
max_distance: int,
|
|
78
|
+
atom_type: str | int,
|
|
79
|
+
bond_type: str | int
|
|
80
|
+
) -> Callable[[oechem.OEMolBase], oegraphsim.OEFingerPrint]:
|
|
81
|
+
"""
|
|
82
|
+
Create a function that generates a fingerprint from a molecule
|
|
83
|
+
:param fptype: Fingerprint type
|
|
84
|
+
:param num_bits: Number of bits in the fingerprint
|
|
85
|
+
:param min_distance: Minimum distance/radius for path/circular/tree
|
|
86
|
+
:param max_distance: Maximum distance/radius for path/circular/tree
|
|
87
|
+
:param atom_type: Atom type string delimited by "|" OR int bitmask from the oegraphsim.OEFPAtomType_ namespace
|
|
88
|
+
:param bond_type: Bond type string delimited by "|" OR int bitmask from the oegraphsim.OEFPBondType_ namespace
|
|
89
|
+
:return: Function that generates a fingerprint from a molecule
|
|
90
|
+
"""
|
|
91
|
+
# Be forgiving with case
|
|
92
|
+
_fptype = fptype.lower()
|
|
93
|
+
|
|
94
|
+
# Convert atom type and bond type strings to masks if necessary
|
|
95
|
+
atom_mask = get_atom_mask(atom_type) if isinstance(atom_type, str) else atom_type
|
|
96
|
+
bond_mask = get_bond_mask(bond_type) if isinstance(bond_type, str) else bond_type
|
|
97
|
+
if _fptype == "path":
|
|
98
|
+
def _make_path_fp(mol):
|
|
99
|
+
fp = oegraphsim.OEFingerPrint()
|
|
100
|
+
oegraphsim.OEMakePathFP(fp, mol, num_bits, min_distance, max_distance, atom_mask, bond_mask)
|
|
101
|
+
return fp
|
|
102
|
+
return _make_path_fp
|
|
103
|
+
elif _fptype == "circular":
|
|
104
|
+
def _make_circular_fp(mol):
|
|
105
|
+
fp = oegraphsim.OEFingerPrint()
|
|
106
|
+
oegraphsim.OEMakeCircularFP(fp, mol, num_bits, min_distance, max_distance, atom_mask, bond_mask)
|
|
107
|
+
return fp
|
|
108
|
+
return _make_circular_fp
|
|
109
|
+
elif _fptype == "tree":
|
|
110
|
+
def _make_tree_fp(mol):
|
|
111
|
+
fp = oegraphsim.OEFingerPrint()
|
|
112
|
+
oegraphsim.OEMakeTreeFP(fp, mol, num_bits, min_distance, max_distance, atom_mask, bond_mask)
|
|
113
|
+
return fp
|
|
114
|
+
return _make_tree_fp
|
|
115
|
+
elif _fptype == "maccs":
|
|
116
|
+
def _make_maccs(mol):
|
|
117
|
+
fp = oegraphsim.OEFingerPrint()
|
|
118
|
+
oegraphsim.OEMakeMACCS166FP(fp, mol)
|
|
119
|
+
return fp
|
|
120
|
+
return _make_maccs
|
|
121
|
+
elif _fptype == "lingo":
|
|
122
|
+
def _make_lingo(mol):
|
|
123
|
+
fp = oegraphsim.OEFingerPrint()
|
|
124
|
+
oegraphsim.OEMakeLingoFP(fp, mol)
|
|
125
|
+
return fp
|
|
126
|
+
return _make_lingo
|
|
127
|
+
raise KeyError(f'Unknown fingerprint type {fptype} (valid: path / tree / circular / maccs / lingo)')
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
########################################################################################################################
|
|
131
|
+
# Small molecule 2D structure aligners
|
|
132
|
+
########################################################################################################################
|
|
133
|
+
|
|
134
|
+
class Aligner(metaclass=ABCMeta):
|
|
135
|
+
"""
|
|
136
|
+
Base class for a 2D molecule aligner
|
|
137
|
+
"""
|
|
138
|
+
def __call__(self, mol_or_disp: oechem.OEMolBase | oedepict.OE2DMolDisplay) -> bool:
|
|
139
|
+
|
|
140
|
+
# Get the molecule
|
|
141
|
+
mol = mol_or_disp if isinstance(mol_or_disp, oechem.OEMolBase) else mol_or_disp.GetMolecule()
|
|
142
|
+
|
|
143
|
+
# If the molecule validates against the aligner
|
|
144
|
+
if self.validate(mol):
|
|
145
|
+
return self.align(mol)
|
|
146
|
+
|
|
147
|
+
return False
|
|
148
|
+
|
|
149
|
+
@abstractmethod
|
|
150
|
+
def align(self, mol: oechem.OEMolBase) -> bool:
|
|
151
|
+
raise NotImplementedError
|
|
152
|
+
|
|
153
|
+
@abstractmethod
|
|
154
|
+
def validate(self, mol: oechem.OEMolBase) -> bool:
|
|
155
|
+
raise NotImplementedError
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# FIXME: Bug reported OpenEye on 11/30/2025 regarding using OESubSearch matches in OEPrepareMultiAlignedDepiction
|
|
159
|
+
# class OESubSearchAligner(Aligner):
|
|
160
|
+
# """
|
|
161
|
+
# 2D molecule substructure alignment
|
|
162
|
+
# """
|
|
163
|
+
# def __init__(self, ref: oechem.OESubSearch | oechem.OEMolBase, **_kwargs):
|
|
164
|
+
#
|
|
165
|
+
# # Reference molecule with 2D coordinates
|
|
166
|
+
# self.refmol = None
|
|
167
|
+
#
|
|
168
|
+
# # In the future, make these configurable
|
|
169
|
+
# self.alignment_options = oedepict.OEAlignmentOptions()
|
|
170
|
+
#
|
|
171
|
+
# if isinstance(ref, (oechem.OESubSearch, str)):
|
|
172
|
+
# self.ss = oechem.OESubSearch(ref)
|
|
173
|
+
#
|
|
174
|
+
# else:
|
|
175
|
+
# self.refmol = oechem.OEGraphMol(ref)
|
|
176
|
+
# self.ss = oechem.OESubSearch(self.refmol, oechem.OEExprOpts_DefaultAtoms, oechem.OEExprOpts_DefaultBonds)
|
|
177
|
+
#
|
|
178
|
+
# def validate(self, mol: oechem.OEMolBase) -> bool:
|
|
179
|
+
# """
|
|
180
|
+
# Validate that the molecule has a match to this substructure search
|
|
181
|
+
# :param mol: Molecule to search
|
|
182
|
+
# :return: True if there is a match to this substructure search
|
|
183
|
+
# """
|
|
184
|
+
# oechem.OEPrepareSearch(mol, self.ss)
|
|
185
|
+
# return self.ss.SingleMatch(mol)
|
|
186
|
+
#
|
|
187
|
+
# def align(self, mol: oechem.OEMolBase) -> bool:
|
|
188
|
+
# """
|
|
189
|
+
# Align to this substructure trying the following
|
|
190
|
+
#
|
|
191
|
+
# 1. If we had a reference molecule, then try to maximize the alignment to that reference molecule using the
|
|
192
|
+
# OpenEye multiple aligner (this works VERY WELL even if there are multiple matches to the reference)
|
|
193
|
+
#
|
|
194
|
+
# 2. Standard aligned depiction, which fails if there are multiple matches to the substructure
|
|
195
|
+
#
|
|
196
|
+
# :param mol: Molecule to align
|
|
197
|
+
# :return: True if the alignment was successful
|
|
198
|
+
# """
|
|
199
|
+
# ok = False
|
|
200
|
+
#
|
|
201
|
+
# if self.refmol is not None:
|
|
202
|
+
# oechem.OEPrepareSearch(mol, self.ss)
|
|
203
|
+
# ok = oedepict.OEPrepareMultiAlignedDepiction(
|
|
204
|
+
# mol,
|
|
205
|
+
# self.refmol,
|
|
206
|
+
# self.ss.Match(mol)
|
|
207
|
+
# )
|
|
208
|
+
#
|
|
209
|
+
# if not ok:
|
|
210
|
+
# alignres = oedepict.OEPrepareAlignedDepiction(
|
|
211
|
+
# mol,
|
|
212
|
+
# self.ss,
|
|
213
|
+
# self.alignment_options
|
|
214
|
+
# )
|
|
215
|
+
#
|
|
216
|
+
# ok = alignres.IsValid()
|
|
217
|
+
#
|
|
218
|
+
# return ok
|
|
219
|
+
#
|
|
220
|
+
#
|
|
221
|
+
# FIXME: Bug reported OpenEye on 11/30/2025 regarding using OEMCSSearch matches in OEPrepareMultiAlignedDepiction
|
|
222
|
+
# class OEMCSSearchAligner(Aligner):
|
|
223
|
+
# """
|
|
224
|
+
# 2D molecule MCS alignment
|
|
225
|
+
# """
|
|
226
|
+
# def __init__(
|
|
227
|
+
# self,
|
|
228
|
+
# ref: oechem.OEMCSSearch | oechem.OEMolBase,
|
|
229
|
+
# *,
|
|
230
|
+
# func: Literal["atoms", "bonds", "atoms_and_cycles", "bonds_and_cycles"] = "bonds_cycles",
|
|
231
|
+
# min_atoms: int = 1,
|
|
232
|
+
# **_kwargs
|
|
233
|
+
# ):
|
|
234
|
+
#
|
|
235
|
+
# self.refmol = None
|
|
236
|
+
#
|
|
237
|
+
# # In the future, make these configurable
|
|
238
|
+
# self.alignment_options = oedepict.OEAlignmentOptions()
|
|
239
|
+
#
|
|
240
|
+
# if isinstance(ref, oechem.OEMCSSearch):
|
|
241
|
+
# self.mcss = oechem.OEMCSSearch(ref)
|
|
242
|
+
#
|
|
243
|
+
# else:
|
|
244
|
+
# self.refmol = ref.CreateCopy()
|
|
245
|
+
#
|
|
246
|
+
# # Currently just using default parameters
|
|
247
|
+
# self.mcss = oechem.OEMCSSearch(oechem.OEMCSType_Approximate)
|
|
248
|
+
# self.mcss.Init(self.refmol, oechem.OEExprOpts_DefaultAtoms, oechem.OEExprOpts_DefaultBonds)
|
|
249
|
+
#
|
|
250
|
+
# if func == "atoms":
|
|
251
|
+
# self.mcss.SetMCSFunc(oechem.OEMCSMaxAtoms())
|
|
252
|
+
# elif func == "bonds":
|
|
253
|
+
# self.mcss.SetMCSFunc(oechem.OEMCSMaxBonds())
|
|
254
|
+
# elif func == "atoms_and_cycles":
|
|
255
|
+
# self.mcss.SetMCSFunc(oechem.OEMCSMaxAtomsCompleteCycles())
|
|
256
|
+
# elif func == "bonds_and_cycles":
|
|
257
|
+
# self.mcss.SetMCSFunc(oechem.OEMCSMaxBondsCompleteCycles())
|
|
258
|
+
# else:
|
|
259
|
+
# raise ValueError(f'Unknown MCS evaluation function name: {func}')
|
|
260
|
+
#
|
|
261
|
+
# # Other options
|
|
262
|
+
# self.mcss.SetMinAtoms(min_atoms)
|
|
263
|
+
#
|
|
264
|
+
# def validate(self, mol: oechem.OEMolBase) -> bool:
|
|
265
|
+
# """
|
|
266
|
+
# Validate that a maximum common substructure exists in a query molecule (within a threshold)
|
|
267
|
+
# :param mol: Molecule to search
|
|
268
|
+
# :return: True if the molecule contains the maximum common substructure
|
|
269
|
+
# """
|
|
270
|
+
# return self.mcss.SingleMatch(mol)
|
|
271
|
+
#
|
|
272
|
+
# def align(self, mol: oechem.OEMolBase) -> bool:
|
|
273
|
+
# ok = False
|
|
274
|
+
#
|
|
275
|
+
# if self.refmol is not None:
|
|
276
|
+
#
|
|
277
|
+
# ok = oedepict.OEPrepareMultiAlignedDepiction(
|
|
278
|
+
# mol,
|
|
279
|
+
# self.refmol,
|
|
280
|
+
# self.mcss.Match(mol)
|
|
281
|
+
# )
|
|
282
|
+
#
|
|
283
|
+
# if not ok:
|
|
284
|
+
#
|
|
285
|
+
# alignres = oedepict.OEPrepareAlignedDepiction(
|
|
286
|
+
# mol,
|
|
287
|
+
# self.mcss,
|
|
288
|
+
# self.alignment_options
|
|
289
|
+
# )
|
|
290
|
+
#
|
|
291
|
+
# ok = alignres.IsValid()
|
|
292
|
+
#
|
|
293
|
+
# return ok
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
class OEFingerprintAligner(Aligner):
|
|
297
|
+
"""
|
|
298
|
+
Fingerprint aligner
|
|
299
|
+
"""
|
|
300
|
+
# Just using the default tree fingerprint type for the alignment
|
|
301
|
+
|
|
302
|
+
def __init__(
|
|
303
|
+
self,
|
|
304
|
+
refmol: oechem.OEMolBase,
|
|
305
|
+
*,
|
|
306
|
+
threshold: float = 0.4,
|
|
307
|
+
fptype: str = "tree",
|
|
308
|
+
num_bits: int = 4096,
|
|
309
|
+
min_distance: int = 0,
|
|
310
|
+
max_distance: int = 4,
|
|
311
|
+
atom_type: str | int = oegraphsim.OEFPAtomType_DefaultTreeAtom,
|
|
312
|
+
bond_type: str | int = oegraphsim.OEFPBondType_DefaultTreeBond
|
|
313
|
+
):
|
|
314
|
+
# Simiarity threshold to apply alignment
|
|
315
|
+
self.threshold = threshold
|
|
316
|
+
|
|
317
|
+
# Fingerprint maker
|
|
318
|
+
self.make_fp = fingerprint_maker(
|
|
319
|
+
fptype=fptype,
|
|
320
|
+
num_bits=num_bits,
|
|
321
|
+
min_distance=min_distance,
|
|
322
|
+
max_distance=max_distance,
|
|
323
|
+
atom_type=atom_type,
|
|
324
|
+
bond_type=bond_type
|
|
325
|
+
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
# Reference molecule and fingerprint
|
|
329
|
+
self.refmol = refmol.CreateCopy()
|
|
330
|
+
self.reffp = None
|
|
331
|
+
self.fptype = None
|
|
332
|
+
|
|
333
|
+
if self.refmol.IsValid():
|
|
334
|
+
self.reffp = self.make_fp(self.refmol)
|
|
335
|
+
self.fptype = self.reffp.GetFPTypeBase()
|
|
336
|
+
|
|
337
|
+
def validate(self, mol: oechem.OEMolBase) -> bool:
|
|
338
|
+
if self.reffp is None:
|
|
339
|
+
return False
|
|
340
|
+
|
|
341
|
+
fp = self.make_fp(mol)
|
|
342
|
+
return oegraphsim.OETanimoto(fp, self.reffp) >= self.threshold
|
|
343
|
+
|
|
344
|
+
def align(self, mol: oechem.OEMolBase) -> bool:
|
|
345
|
+
if self.fptype is None:
|
|
346
|
+
return False
|
|
347
|
+
|
|
348
|
+
overlaps = oegraphsim.OEGetFPOverlap(mol, self.refmol, self.fptype)
|
|
349
|
+
return oedepict.OEPrepareMultiAlignedDepiction(mol, self.refmol, overlaps)
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
# Substructure aligners
|
|
353
|
+
_ALIGNERS = {
|
|
354
|
+
# "substructure": OESubSearchAligner,
|
|
355
|
+
"fingerprint": OEFingerprintAligner,
|
|
356
|
+
# "mcss": OEMCSSearchAligner
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def create_aligner(
|
|
361
|
+
ref: oechem.OEMolBase,
|
|
362
|
+
method: Literal["fp", "fingerprint"] = None,
|
|
363
|
+
**kwargs
|
|
364
|
+
) -> Aligner:
|
|
365
|
+
"""
|
|
366
|
+
Create an aligner for the given reference molecule.
|
|
367
|
+
|
|
368
|
+
Note: Only fingerprint alignment is currently supported. Substructure and MCS aligners
|
|
369
|
+
are disabled due to OpenEye bugs reported on 11/30/2025.
|
|
370
|
+
|
|
371
|
+
:param ref: Alignment reference molecule
|
|
372
|
+
:param method: Alignment method (only "fp" or "fingerprint" supported)
|
|
373
|
+
:param kwargs: Keyword arguments for the OEFingerprintAligner
|
|
374
|
+
:return: Configured aligner instance
|
|
375
|
+
"""
|
|
376
|
+
# Normalize the method
|
|
377
|
+
if method is not None:
|
|
378
|
+
_method = method.lower()
|
|
379
|
+
|
|
380
|
+
if _method in ("fingerprint", "fp"):
|
|
381
|
+
method = "fingerprint"
|
|
382
|
+
else:
|
|
383
|
+
raise ValueError(f'Unknown depiction alignment method: {method}. Only "fingerprint"/"fp" is currently supported.')
|
|
384
|
+
|
|
385
|
+
# Only fingerprint aligner is currently available
|
|
386
|
+
if isinstance(ref, oechem.OEMolBase):
|
|
387
|
+
log.debug("Using fingerprint aligner for oechem.OEMolBase alignment reference")
|
|
388
|
+
return OEFingerprintAligner(ref, **kwargs)
|
|
389
|
+
else:
|
|
390
|
+
raise TypeError(f'Unsupported alignment reference type: {type(ref)}. Only oechem.OEMolBase is currently supported.')
|