cnotebook 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cnotebook/__init__.py ADDED
@@ -0,0 +1,104 @@
1
+ import logging
2
+ from .pandas_ext import render_dataframe
3
+ from .context import cnotebook_context
4
+ from .ipython_ext import (
5
+ register_ipython_formatters as _register_ipython_formatters,
6
+ render_molecule_grid
7
+ )
8
+
9
+ # Only import formatter registration from the Pandas module, otherwise have users import functionality from there
10
+ # to avoid confusion
11
+ from cnotebook.pandas_ext import register_pandas_formatters as _register_pandas_formatters
12
+
13
+ __version__ = '1.0.1'
14
+
15
+ ###########################################
16
+
17
+ def is_jupyter_notebook() -> bool:
18
+ # noinspection PyBroadException
19
+ try:
20
+ from IPython import get_ipython
21
+ shell = get_ipython().__class__.__name__
22
+
23
+ if shell == 'ZMQInteractiveShell':
24
+ return True # Jupyter notebook or qtconsole
25
+
26
+ elif shell == 'TerminalInteractiveShell':
27
+ return False # Terminal running IPython
28
+
29
+ else:
30
+ return False # Other type (?)
31
+
32
+ except Exception:
33
+ return False # Probably standard Python interpreter
34
+
35
+
36
+ def is_marimo_notebook() -> bool:
37
+ # noinspection PyBroadException
38
+ try:
39
+ # noinspection PyUnresolvedReferences
40
+ import marimo as mo
41
+ return mo.running_in_notebook()
42
+
43
+ except Exception:
44
+ return False
45
+
46
+
47
+ # Register the formatters
48
+ # Note: All registration function calls are idempotent
49
+ if is_jupyter_notebook():
50
+ _register_ipython_formatters()
51
+ _register_pandas_formatters()
52
+
53
+ elif is_marimo_notebook():
54
+ from . import marimo_ext
55
+
56
+ # Configure logging
57
+ log = logging.getLogger("cnotebook")
58
+
59
+
60
+ class LevelSpecificFormatter(logging.Formatter):
61
+ """
62
+ A logging formatter
63
+ """
64
+ NORMAL_FORMAT = "%(message)s"
65
+ DEBUG_FORMAT = "%(levelname)s: %(message)s"
66
+
67
+ def __init__(self):
68
+ super().__init__(fmt=self.NORMAL_FORMAT, datefmt=None, style='%')
69
+
70
+ def format(self, record: logging.LogRecord) -> str:
71
+ """
72
+ Format a log record for printing
73
+ :param record: Record to format
74
+ :return: Formatted record
75
+ """
76
+ if record.levelno == logging.DEBUG:
77
+ self._style._fmt = self.DEBUG_FORMAT
78
+ else:
79
+ self._style._fmt = self.NORMAL_FORMAT
80
+
81
+ # Call the original formatter class to do the grunt work
82
+ result = logging.Formatter.format(self, record)
83
+
84
+ return result
85
+
86
+
87
+ ############################
88
+ # Example of how to use it #
89
+ ############################
90
+
91
+ ch = logging.StreamHandler()
92
+ ch.setLevel(logging.DEBUG)
93
+
94
+ ch.setFormatter(LevelSpecificFormatter())
95
+ log.addHandler(ch)
96
+
97
+ log.setLevel(logging.INFO)
98
+
99
+
100
+ def enable_debugging():
101
+ """
102
+ Convenience function for enabling the debug log
103
+ """
104
+ log.setLevel(logging.DEBUG)
cnotebook/align.py ADDED
@@ -0,0 +1,390 @@
1
+ import logging
2
+ from typing import Callable, Literal
3
+ from abc import ABCMeta, abstractmethod
4
+ from openeye import oegraphsim, oechem, oedepict
5
+
6
+ log = logging.getLogger("cnotebook")
7
+
8
+
9
+ ########################################################################################################################
10
+ # Fingerprint generation
11
+ ########################################################################################################################
12
+
13
+ # Dynamic creation of a typemap for OpenEye atom type fingerprints
14
+ atom_fp_typemap = dict(
15
+ (x.replace("OEFPAtomType_", "").lower(), getattr(oegraphsim, x))
16
+ for x in list(filter(lambda x: x.startswith("OEFPAtomType_"), dir(oegraphsim)))
17
+ )
18
+
19
+ # Dynamic creation of a typemap for OpenEye bond type fingerprints
20
+ bond_fp_typemap = dict(
21
+ (x.replace("OEFPBondType_", "").lower(), getattr(oegraphsim, x))
22
+ for x in list(filter(lambda x: x.startswith("OEFPBondType_"), dir(oegraphsim)))
23
+ )
24
+
25
+
26
+ def get_atom_mask(atom_type):
27
+ """
28
+ Get the OEFingerprint atom type masks from "|" delimited strings
29
+
30
+ The atom_type string is composed of "|" delimted members from the OEFPAtomType_ namespace. These are
31
+ case-insensitive and only optionally need to be prefixed by "OEFPAtomType_".
32
+
33
+ :param atom_type: Delimited string of OEFPAtomTypes
34
+ :return: Bitmask for OpenEye fingerprint atom types
35
+ :rtype: int
36
+ """
37
+ atom_mask = oegraphsim.OEFPAtomType_None
38
+ for m in atom_type.split("|"):
39
+ mask = atom_fp_typemap.get(m.strip().lower().replace("oefpatomtype_", ""), None)
40
+ if mask is None:
41
+ raise KeyError(f'{m} is not a known OEAtomFPType')
42
+ atom_mask |= mask
43
+ # Check validity
44
+ if atom_mask == oegraphsim.OEFPAtomType_None:
45
+ raise ValueError("No atom fingerprint types configured")
46
+ return atom_mask
47
+
48
+
49
+ def get_bond_mask(bond_type):
50
+ """
51
+ Get the OEFingerprint bond type masks from "|" delimited strings
52
+
53
+ The bond_type string is composed of "|" delimted members from the OEFPBondType_ namespace. These are
54
+ case-insensitive and only optionally need to be prefixed by "OEFPBondType_".
55
+
56
+ :param bond_type: Delimited string of OEFPBondTypes
57
+ :return: Bitmask for OpenEye fingerprint bond types
58
+ :rtype: int
59
+ """
60
+ # Bond mask
61
+ bond_mask = oegraphsim.OEFPBondType_None
62
+ for m in bond_type.split("|"):
63
+ mask = bond_fp_typemap.get(m.strip().lower().replace("oefpbondtype_", ""), None)
64
+ if mask is None:
65
+ raise KeyError(f'{m} is not a known OEBondFPType')
66
+ bond_mask |= mask
67
+ # Check validity
68
+ if bond_mask == oegraphsim.OEFPBondType_None:
69
+ raise ValueError("No bond fingerprint types configured")
70
+ return bond_mask
71
+
72
+
73
+ def fingerprint_maker(
74
+ fptype: str,
75
+ num_bits: int,
76
+ min_distance: int,
77
+ max_distance: int,
78
+ atom_type: str | int,
79
+ bond_type: str | int
80
+ ) -> Callable[[oechem.OEMolBase], oegraphsim.OEFingerPrint]:
81
+ """
82
+ Create a function that generates a fingerprint from a molecule
83
+ :param fptype: Fingerprint type
84
+ :param num_bits: Number of bits in the fingerprint
85
+ :param min_distance: Minimum distance/radius for path/circular/tree
86
+ :param max_distance: Maximum distance/radius for path/circular/tree
87
+ :param atom_type: Atom type string delimited by "|" OR int bitmask from the oegraphsim.OEFPAtomType_ namespace
88
+ :param bond_type: Bond type string delimited by "|" OR int bitmask from the oegraphsim.OEFPBondType_ namespace
89
+ :return: Function that generates a fingerprint from a molecule
90
+ """
91
+ # Be forgiving with case
92
+ _fptype = fptype.lower()
93
+
94
+ # Convert atom type and bond type strings to masks if necessary
95
+ atom_mask = get_atom_mask(atom_type) if isinstance(atom_type, str) else atom_type
96
+ bond_mask = get_bond_mask(bond_type) if isinstance(bond_type, str) else bond_type
97
+ if _fptype == "path":
98
+ def _make_path_fp(mol):
99
+ fp = oegraphsim.OEFingerPrint()
100
+ oegraphsim.OEMakePathFP(fp, mol, num_bits, min_distance, max_distance, atom_mask, bond_mask)
101
+ return fp
102
+ return _make_path_fp
103
+ elif _fptype == "circular":
104
+ def _make_circular_fp(mol):
105
+ fp = oegraphsim.OEFingerPrint()
106
+ oegraphsim.OEMakeCircularFP(fp, mol, num_bits, min_distance, max_distance, atom_mask, bond_mask)
107
+ return fp
108
+ return _make_circular_fp
109
+ elif _fptype == "tree":
110
+ def _make_tree_fp(mol):
111
+ fp = oegraphsim.OEFingerPrint()
112
+ oegraphsim.OEMakeTreeFP(fp, mol, num_bits, min_distance, max_distance, atom_mask, bond_mask)
113
+ return fp
114
+ return _make_tree_fp
115
+ elif _fptype == "maccs":
116
+ def _make_maccs(mol):
117
+ fp = oegraphsim.OEFingerPrint()
118
+ oegraphsim.OEMakeMACCS166FP(fp, mol)
119
+ return fp
120
+ return _make_maccs
121
+ elif _fptype == "lingo":
122
+ def _make_lingo(mol):
123
+ fp = oegraphsim.OEFingerPrint()
124
+ oegraphsim.OEMakeLingoFP(fp, mol)
125
+ return fp
126
+ return _make_lingo
127
+ raise KeyError(f'Unknown fingerprint type {fptype} (valid: path / tree / circular / maccs / lingo)')
128
+
129
+
130
+ ########################################################################################################################
131
+ # Small molecule 2D structure aligners
132
+ ########################################################################################################################
133
+
134
+ class Aligner(metaclass=ABCMeta):
135
+ """
136
+ Base class for a 2D molecule aligner
137
+ """
138
+ def __call__(self, mol_or_disp: oechem.OEMolBase | oedepict.OE2DMolDisplay) -> bool:
139
+
140
+ # Get the molecule
141
+ mol = mol_or_disp if isinstance(mol_or_disp, oechem.OEMolBase) else mol_or_disp.GetMolecule()
142
+
143
+ # If the molecule validates against the aligner
144
+ if self.validate(mol):
145
+ return self.align(mol)
146
+
147
+ return False
148
+
149
+ @abstractmethod
150
+ def align(self, mol: oechem.OEMolBase) -> bool:
151
+ raise NotImplementedError
152
+
153
+ @abstractmethod
154
+ def validate(self, mol: oechem.OEMolBase) -> bool:
155
+ raise NotImplementedError
156
+
157
+
158
+ # FIXME: Bug reported OpenEye on 11/30/2025 regarding using OESubSearch matches in OEPrepareMultiAlignedDepiction
159
+ # class OESubSearchAligner(Aligner):
160
+ # """
161
+ # 2D molecule substructure alignment
162
+ # """
163
+ # def __init__(self, ref: oechem.OESubSearch | oechem.OEMolBase, **_kwargs):
164
+ #
165
+ # # Reference molecule with 2D coordinates
166
+ # self.refmol = None
167
+ #
168
+ # # In the future, make these configurable
169
+ # self.alignment_options = oedepict.OEAlignmentOptions()
170
+ #
171
+ # if isinstance(ref, (oechem.OESubSearch, str)):
172
+ # self.ss = oechem.OESubSearch(ref)
173
+ #
174
+ # else:
175
+ # self.refmol = oechem.OEGraphMol(ref)
176
+ # self.ss = oechem.OESubSearch(self.refmol, oechem.OEExprOpts_DefaultAtoms, oechem.OEExprOpts_DefaultBonds)
177
+ #
178
+ # def validate(self, mol: oechem.OEMolBase) -> bool:
179
+ # """
180
+ # Validate that the molecule has a match to this substructure search
181
+ # :param mol: Molecule to search
182
+ # :return: True if there is a match to this substructure search
183
+ # """
184
+ # oechem.OEPrepareSearch(mol, self.ss)
185
+ # return self.ss.SingleMatch(mol)
186
+ #
187
+ # def align(self, mol: oechem.OEMolBase) -> bool:
188
+ # """
189
+ # Align to this substructure trying the following
190
+ #
191
+ # 1. If we had a reference molecule, then try to maximize the alignment to that reference molecule using the
192
+ # OpenEye multiple aligner (this works VERY WELL even if there are multiple matches to the reference)
193
+ #
194
+ # 2. Standard aligned depiction, which fails if there are multiple matches to the substructure
195
+ #
196
+ # :param mol: Molecule to align
197
+ # :return: True if the alignment was successful
198
+ # """
199
+ # ok = False
200
+ #
201
+ # if self.refmol is not None:
202
+ # oechem.OEPrepareSearch(mol, self.ss)
203
+ # ok = oedepict.OEPrepareMultiAlignedDepiction(
204
+ # mol,
205
+ # self.refmol,
206
+ # self.ss.Match(mol)
207
+ # )
208
+ #
209
+ # if not ok:
210
+ # alignres = oedepict.OEPrepareAlignedDepiction(
211
+ # mol,
212
+ # self.ss,
213
+ # self.alignment_options
214
+ # )
215
+ #
216
+ # ok = alignres.IsValid()
217
+ #
218
+ # return ok
219
+ #
220
+ #
221
+ # FIXME: Bug reported OpenEye on 11/30/2025 regarding using OEMCSSearch matches in OEPrepareMultiAlignedDepiction
222
+ # class OEMCSSearchAligner(Aligner):
223
+ # """
224
+ # 2D molecule MCS alignment
225
+ # """
226
+ # def __init__(
227
+ # self,
228
+ # ref: oechem.OEMCSSearch | oechem.OEMolBase,
229
+ # *,
230
+ # func: Literal["atoms", "bonds", "atoms_and_cycles", "bonds_and_cycles"] = "bonds_cycles",
231
+ # min_atoms: int = 1,
232
+ # **_kwargs
233
+ # ):
234
+ #
235
+ # self.refmol = None
236
+ #
237
+ # # In the future, make these configurable
238
+ # self.alignment_options = oedepict.OEAlignmentOptions()
239
+ #
240
+ # if isinstance(ref, oechem.OEMCSSearch):
241
+ # self.mcss = oechem.OEMCSSearch(ref)
242
+ #
243
+ # else:
244
+ # self.refmol = ref.CreateCopy()
245
+ #
246
+ # # Currently just using default parameters
247
+ # self.mcss = oechem.OEMCSSearch(oechem.OEMCSType_Approximate)
248
+ # self.mcss.Init(self.refmol, oechem.OEExprOpts_DefaultAtoms, oechem.OEExprOpts_DefaultBonds)
249
+ #
250
+ # if func == "atoms":
251
+ # self.mcss.SetMCSFunc(oechem.OEMCSMaxAtoms())
252
+ # elif func == "bonds":
253
+ # self.mcss.SetMCSFunc(oechem.OEMCSMaxBonds())
254
+ # elif func == "atoms_and_cycles":
255
+ # self.mcss.SetMCSFunc(oechem.OEMCSMaxAtomsCompleteCycles())
256
+ # elif func == "bonds_and_cycles":
257
+ # self.mcss.SetMCSFunc(oechem.OEMCSMaxBondsCompleteCycles())
258
+ # else:
259
+ # raise ValueError(f'Unknown MCS evaluation function name: {func}')
260
+ #
261
+ # # Other options
262
+ # self.mcss.SetMinAtoms(min_atoms)
263
+ #
264
+ # def validate(self, mol: oechem.OEMolBase) -> bool:
265
+ # """
266
+ # Validate that a maximum common substructure exists in a query molecule (within a threshold)
267
+ # :param mol: Molecule to search
268
+ # :return: True if the molecule contains the maximum common substructure
269
+ # """
270
+ # return self.mcss.SingleMatch(mol)
271
+ #
272
+ # def align(self, mol: oechem.OEMolBase) -> bool:
273
+ # ok = False
274
+ #
275
+ # if self.refmol is not None:
276
+ #
277
+ # ok = oedepict.OEPrepareMultiAlignedDepiction(
278
+ # mol,
279
+ # self.refmol,
280
+ # self.mcss.Match(mol)
281
+ # )
282
+ #
283
+ # if not ok:
284
+ #
285
+ # alignres = oedepict.OEPrepareAlignedDepiction(
286
+ # mol,
287
+ # self.mcss,
288
+ # self.alignment_options
289
+ # )
290
+ #
291
+ # ok = alignres.IsValid()
292
+ #
293
+ # return ok
294
+
295
+
296
+ class OEFingerprintAligner(Aligner):
297
+ """
298
+ Fingerprint aligner
299
+ """
300
+ # Just using the default tree fingerprint type for the alignment
301
+
302
+ def __init__(
303
+ self,
304
+ refmol: oechem.OEMolBase,
305
+ *,
306
+ threshold: float = 0.4,
307
+ fptype: str = "tree",
308
+ num_bits: int = 4096,
309
+ min_distance: int = 0,
310
+ max_distance: int = 4,
311
+ atom_type: str | int = oegraphsim.OEFPAtomType_DefaultTreeAtom,
312
+ bond_type: str | int = oegraphsim.OEFPBondType_DefaultTreeBond
313
+ ):
314
+ # Simiarity threshold to apply alignment
315
+ self.threshold = threshold
316
+
317
+ # Fingerprint maker
318
+ self.make_fp = fingerprint_maker(
319
+ fptype=fptype,
320
+ num_bits=num_bits,
321
+ min_distance=min_distance,
322
+ max_distance=max_distance,
323
+ atom_type=atom_type,
324
+ bond_type=bond_type
325
+
326
+ )
327
+
328
+ # Reference molecule and fingerprint
329
+ self.refmol = refmol.CreateCopy()
330
+ self.reffp = None
331
+ self.fptype = None
332
+
333
+ if self.refmol.IsValid():
334
+ self.reffp = self.make_fp(self.refmol)
335
+ self.fptype = self.reffp.GetFPTypeBase()
336
+
337
+ def validate(self, mol: oechem.OEMolBase) -> bool:
338
+ if self.reffp is None:
339
+ return False
340
+
341
+ fp = self.make_fp(mol)
342
+ return oegraphsim.OETanimoto(fp, self.reffp) >= self.threshold
343
+
344
+ def align(self, mol: oechem.OEMolBase) -> bool:
345
+ if self.fptype is None:
346
+ return False
347
+
348
+ overlaps = oegraphsim.OEGetFPOverlap(mol, self.refmol, self.fptype)
349
+ return oedepict.OEPrepareMultiAlignedDepiction(mol, self.refmol, overlaps)
350
+
351
+
352
+ # Substructure aligners
353
+ _ALIGNERS = {
354
+ # "substructure": OESubSearchAligner,
355
+ "fingerprint": OEFingerprintAligner,
356
+ # "mcss": OEMCSSearchAligner
357
+ }
358
+
359
+
360
+ def create_aligner(
361
+ ref: oechem.OEMolBase,
362
+ method: Literal["fp", "fingerprint"] = None,
363
+ **kwargs
364
+ ) -> Aligner:
365
+ """
366
+ Create an aligner for the given reference molecule.
367
+
368
+ Note: Only fingerprint alignment is currently supported. Substructure and MCS aligners
369
+ are disabled due to OpenEye bugs reported on 11/30/2025.
370
+
371
+ :param ref: Alignment reference molecule
372
+ :param method: Alignment method (only "fp" or "fingerprint" supported)
373
+ :param kwargs: Keyword arguments for the OEFingerprintAligner
374
+ :return: Configured aligner instance
375
+ """
376
+ # Normalize the method
377
+ if method is not None:
378
+ _method = method.lower()
379
+
380
+ if _method in ("fingerprint", "fp"):
381
+ method = "fingerprint"
382
+ else:
383
+ raise ValueError(f'Unknown depiction alignment method: {method}. Only "fingerprint"/"fp" is currently supported.')
384
+
385
+ # Only fingerprint aligner is currently available
386
+ if isinstance(ref, oechem.OEMolBase):
387
+ log.debug("Using fingerprint aligner for oechem.OEMolBase alignment reference")
388
+ return OEFingerprintAligner(ref, **kwargs)
389
+ else:
390
+ raise TypeError(f'Unsupported alignment reference type: {type(ref)}. Only oechem.OEMolBase is currently supported.')