cnotebook 1.2.0__py3-none-any.whl → 2.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1237 @@
1
+ import logging
2
+ import typing
3
+ import weakref
4
+ import polars as pl
5
+ import oepolars as oeplr
6
+ from openeye import oechem, oedepict, oegraphsim, oegrapheme
7
+ from .context import pass_cnotebook_context, get_series_context, create_local_context
8
+ from typing import Iterable, Literal
9
+ from .helpers import escape_brackets, create_structure_highlighter
10
+ from .align import fingerprint_maker
11
+ from .render import (
12
+ CNotebookContext, # noqa
13
+ oemol_to_disp,
14
+ oedisp_to_html,
15
+ render_invalid_molecule,
16
+ render_empty_molecule
17
+ )
18
+
19
+ # Only register iPython formatters if that is present
20
+ try:
21
+ # noinspection PyProtectedMember,PyPackageRequirements
22
+ from IPython import get_ipython
23
+ ipython_present = True
24
+ except ModuleNotFoundError:
25
+ ipython_present = False
26
+
27
+ if typing.TYPE_CHECKING:
28
+ from .context import CNotebookContext
29
+
30
+ log = logging.getLogger("cnotebook")
31
+
32
+ # Global storage for DataFrame column contexts
33
+ # Structure: {id(DataFrame): (weakref(DataFrame), {column_name: CNotebookContext})}
34
+ # We store a weak reference to the DataFrame to allow cleanup
35
+ _dataframe_column_contexts: dict[int, tuple[weakref.ref, dict[str, CNotebookContext]]] = {}
36
+
37
+
38
+ def _cleanup_dead_contexts() -> None:
39
+ """Remove entries for DataFrames that have been garbage collected."""
40
+ dead_ids = [df_id for df_id, (ref, _) in _dataframe_column_contexts.items() if ref() is None]
41
+ for df_id in dead_ids:
42
+ del _dataframe_column_contexts[df_id]
43
+
44
+
45
+ def get_dataframe_column_context(df: pl.DataFrame, column: str) -> CNotebookContext | None:
46
+ """
47
+ Get the CNotebookContext for a specific DataFrame column.
48
+
49
+ :param df: The DataFrame.
50
+ :param column: The column name.
51
+ :returns: The CNotebookContext if one exists, otherwise None.
52
+ """
53
+ _cleanup_dead_contexts()
54
+ df_id = id(df)
55
+ if df_id in _dataframe_column_contexts:
56
+ ref, col_contexts = _dataframe_column_contexts[df_id]
57
+ # Verify the DataFrame is still the same object
58
+ if ref() is df:
59
+ return col_contexts.get(column)
60
+ return None
61
+
62
+
63
+ def set_dataframe_column_context(df: pl.DataFrame, column: str, ctx: CNotebookContext) -> None:
64
+ """
65
+ Set the CNotebookContext for a specific DataFrame column.
66
+
67
+ :param df: The DataFrame.
68
+ :param column: The column name.
69
+ :param ctx: The CNotebookContext to store.
70
+ """
71
+ _cleanup_dead_contexts()
72
+ df_id = id(df)
73
+ if df_id not in _dataframe_column_contexts:
74
+ _dataframe_column_contexts[df_id] = (weakref.ref(df), {})
75
+ _dataframe_column_contexts[df_id][1][column] = ctx
76
+
77
+
78
+ def create_mol_formatter(*, ctx: CNotebookContext) -> typing.Callable[[oechem.OEMolBase], str]:
79
+ """
80
+ Closure that creates a function that renders an OEMol to HTML
81
+ :param ctx: CNotebook rendering context
82
+ :return: Function that renders molecules to HTML
83
+ """
84
+ def _oemol_to_html(mol: oechem.OEMolBase):
85
+ if isinstance(mol, oechem.OEMolBase):
86
+
87
+ # Render valid molecules
88
+ if mol.IsValid():
89
+ # Create the display object
90
+ disp = oemol_to_disp(mol, ctx=ctx)
91
+
92
+ # Apply display callbacks
93
+ if ctx.callbacks is not None:
94
+ for callback in ctx.callbacks:
95
+ callback(disp)
96
+
97
+ # Render into the string stream
98
+ return oedisp_to_html(disp)
99
+
100
+ # Empty molecule
101
+ elif mol.NumAtoms() == 0:
102
+ return render_empty_molecule(ctx=ctx)
103
+
104
+ # Invalid molecule
105
+ else:
106
+ return render_invalid_molecule(ctx=ctx)
107
+
108
+ return str(mol)
109
+
110
+ return _oemol_to_html
111
+
112
+
113
+ @pass_cnotebook_context
114
+ def create_disp_formatter(
115
+ *,
116
+ callbacks: list[typing.Callable[[oedepict.OE2DMolDisplay], None]] | None = None,
117
+ ctx: CNotebookContext
118
+ ) -> typing.Callable[[oedepict.OE2DMolDisplay], str]:
119
+ """
120
+ Closure that creates a function that renders an OE2DMolDisplay to HTML
121
+ :param ctx: Render context
122
+ :param callbacks: List of callbacks to modify the rendering of the molecule
123
+ :return: Function that renders display objects to HTML
124
+ """
125
+
126
+ def _oedisp_to_html(disp: oedepict.OE2DMolDisplay) -> str:
127
+
128
+ if isinstance(disp, oedepict.OE2DMolDisplay) and disp.IsValid():
129
+ # Copy the display, as not to modify the original with callbacks
130
+ disp_to_render = oedepict.OE2DMolDisplay(disp)
131
+
132
+ # Apply display callbacks
133
+ if callbacks is not None:
134
+ for callback in callbacks:
135
+ callback(disp_to_render)
136
+
137
+ return oedisp_to_html(disp_to_render, ctx=ctx)
138
+ return str(disp)
139
+
140
+ return _oedisp_to_html
141
+
142
+
143
+ def escape_formatter(obj: typing.Any) -> str:
144
+ return escape_brackets(str(obj))
145
+
146
+
147
+ def render_polars_dataframe(
148
+ df: pl.DataFrame,
149
+ formatters: dict | None = None,
150
+ col_space: dict[str, float | int] | None = None,
151
+ ctx: CNotebookContext | None = None,
152
+ **kwargs
153
+ ) -> str:
154
+ """
155
+ Render a Polars DataFrame with molecules to HTML.
156
+
157
+ This is a native Polars implementation that renders molecule and display
158
+ columns directly without converting to pandas.
159
+
160
+ :param df: Polars DataFrame to render
161
+ :param formatters: Custom formatters for displaying columns
162
+ :param col_space: Custom column spacing
163
+ :param ctx: Local renering context (optional)
164
+ :param kwargs: Additional keyword arguments (currently unused, kept for API compatibility)
165
+ :return: HTML of rendered DataFrame
166
+ """
167
+ # Defaults are empty dictionaries for these
168
+ formatters = formatters or {}
169
+ col_space = col_space or {}
170
+
171
+ # Identify molecule and display columns
172
+ molecule_columns: set[str] = set()
173
+ display_columns: set[str] = set()
174
+
175
+ # Capture metadata from ORIGINAL DataFrame and create formatters
176
+ for col in df.columns:
177
+ dtype = df.schema[col]
178
+ if isinstance(dtype, oeplr.MoleculeType):
179
+ molecule_columns.add(col)
180
+
181
+ # First check for DataFrame-level column context (persists across column accesses)
182
+ df_col_ctx = get_dataframe_column_context(df, col)
183
+
184
+ if df_col_ctx is not None:
185
+ # Use DataFrame-level context
186
+ series_ctx = ctx if ctx is not None else df_col_ctx
187
+ else:
188
+ # Fall back to series metadata (might be empty due to Polars Series ephemeral nature)
189
+ series = df.get_column(col)
190
+ metadata = series.chem.metadata if hasattr(series, 'chem') else {}
191
+ series_ctx = ctx if ctx is not None else get_series_context(metadata)
192
+
193
+ if col in formatters:
194
+ log.warning(f'Overwriting existing formatter for {col} with a molecule formatter')
195
+
196
+ formatters[col] = create_mol_formatter(ctx=series_ctx)
197
+
198
+ # Record the column width
199
+ if col in col_space:
200
+ log.warning(f'Column spacing for {col} already defined, overwriting with molecule image width')
201
+
202
+ col_space[col] = float(series_ctx.width)
203
+
204
+ elif isinstance(dtype, oeplr.DisplayType):
205
+ display_columns.add(col)
206
+
207
+ # Get metadata from the original series via .chem.metadata
208
+ series = df.get_column(col)
209
+ metadata = series.chem.metadata if hasattr(series, 'chem') else {}
210
+
211
+ # Get the cnotebook options for this column (use passed ctx if provided)
212
+ series_ctx = ctx if ctx is not None else get_series_context(metadata)
213
+
214
+ if col in formatters:
215
+ log.warning(f'Overwriting existing formatter for {col} with a display formatter')
216
+
217
+ formatters[col] = create_disp_formatter(ctx=series_ctx)
218
+
219
+ # Calculate column width from display objects
220
+ if len(series) > 0:
221
+ max_width = 0
222
+ for disp in series:
223
+ if isinstance(disp, oedepict.OE2DMolDisplay):
224
+ max_width = max(max_width, disp.GetWidth())
225
+ col_space[col] = max(0, max_width)
226
+ else:
227
+ col_space[col] = 0
228
+
229
+ if len(molecule_columns) > 0:
230
+ log.debug(f'Detected molecule columns: {", ".join(molecule_columns)}')
231
+
232
+ if len(display_columns) > 0:
233
+ log.debug(f'Detected display columns: {", ".join(display_columns)}')
234
+
235
+ # All other columns get escape formatter
236
+ for col in df.columns:
237
+ if col not in display_columns and col not in molecule_columns:
238
+ if col not in formatters:
239
+ formatters[col] = escape_formatter
240
+
241
+ # Deep copy molecule columns to avoid modifying originals during rendering
242
+ # Create a dictionary mapping column name to deep-copied series
243
+ copied_molecule_series: dict[str, pl.Series] = {}
244
+ for col in molecule_columns:
245
+ series = df.get_column(col)
246
+ if hasattr(series, 'chem') and hasattr(series.chem, 'deepcopy'):
247
+ # Use oepolars deepcopy to create copies of molecules
248
+ copied_series = series.chem.deepcopy()
249
+ # Preserve metadata from original
250
+ if hasattr(series, 'chem') and hasattr(series.chem, 'metadata'):
251
+ original_metadata = series.chem.metadata
252
+ if original_metadata and hasattr(copied_series, 'chem'):
253
+ copied_series.chem.metadata.update(original_metadata)
254
+ copied_molecule_series[col] = copied_series
255
+
256
+ # Build HTML table natively
257
+ html_parts = ['<table border="1" class="dataframe">', '<thead><tr style="text-align: right;">']
258
+
259
+ # Header
260
+ for col in df.columns:
261
+ width_style = ""
262
+ if col in col_space:
263
+ width_style = f' style="min-width: {col_space[col]}px;"'
264
+ html_parts.append(f'<th{width_style}>{escape_brackets(str(col))}</th>')
265
+ html_parts.append('</tr></thead>')
266
+
267
+ # Body
268
+ html_parts.append('<tbody>')
269
+ for row_idx in range(len(df)):
270
+ html_parts.append('<tr>')
271
+ for col in df.columns:
272
+ # Use copied series for molecule columns, original for others
273
+ if col in copied_molecule_series:
274
+ value = copied_molecule_series[col][row_idx]
275
+ else:
276
+ value = df[col][row_idx]
277
+
278
+ # Apply formatter if available
279
+ if col in formatters:
280
+ cell_html = formatters[col](value)
281
+ else:
282
+ cell_html = escape_brackets(str(value))
283
+
284
+ html_parts.append(f'<td>{cell_html}</td>')
285
+ html_parts.append('</tr>')
286
+ html_parts.append('</tbody>')
287
+
288
+ html_parts.append('</table>')
289
+
290
+ return ''.join(html_parts)
291
+
292
+
293
+ ########################################################################################################################
294
+ # Series accessor methods (monkey-patched onto oepolars)
295
+ ########################################################################################################################
296
+
297
+
298
+ def _series_highlight(
299
+ self,
300
+ pattern: Iterable[str] | str | oechem.OESubSearch | Iterable[oechem.OESubSearch],
301
+ *,
302
+ color: oechem.OEColor | oechem.OEColorIter | None = None,
303
+ style: int | Literal["overlay_default", "overlay_ball_and_stick"] = "overlay_default",
304
+ ref: oechem.OESubSearch | oechem.OEMCSSearch | oechem.OEQMol | Literal["first"] | oechem.OEMolBase | None = None,
305
+ method: Literal["ss", "substructure", "mcss", "fp", "fingerprint"] | None = None
306
+ ) -> None:
307
+ """
308
+ Highlight chemical features in a structure.
309
+
310
+ The pattern argument can be:
311
+ - SMARTS pattern
312
+ - oechem.OESubSearch or oechem.OEMCSSearch object
313
+ - Iterable of SMARTS patterns, oechem.OESubSearch, and/or oechem.OEMCSSearch objects
314
+
315
+ :param pattern: Pattern(s) to highlight in the molecule.
316
+ :param color: Highlight color(s). Can be a single oechem.OEColor or an oechem.OEColorIter
317
+ (e.g., oechem.OEGetLightColors()). Defaults to oechem.OEGetLightColors().
318
+ :param style: Highlight style. Can be an int (OEHighlightStyle constant) or a string
319
+ ("overlay_default", "overlay_ball_and_stick"). Defaults to "overlay_default".
320
+ :param ref: Optional reference for alignment.
321
+ :param method: Optional alignment method.
322
+ """
323
+ # Check dtype
324
+ if not isinstance(self._series.dtype, oeplr.MoleculeType):
325
+ raise TypeError(
326
+ "highlight only works on molecule columns (oepolars.MoleculeType). If this column has "
327
+ "molecules, use series.chem.as_molecule() to convert to a molecule column first."
328
+ )
329
+
330
+ # Get / create a series context and save it (because we are modifying it locally)
331
+ ctx = get_series_context(self.metadata, save=True)
332
+
333
+ # ********************************************************************************
334
+ # Highlighting
335
+ # ********************************************************************************
336
+
337
+ # Case: Pattern is a single SMARTS string or oechem.OESubSearch object
338
+ if isinstance(pattern, (str, oechem.OESubSearch, oechem.OEMCSSearch, oechem.OEQMol)):
339
+ ctx.add_callback(
340
+ create_structure_highlighter(
341
+ query=pattern,
342
+ color=color,
343
+ style=style
344
+ )
345
+ )
346
+
347
+ # Case: Pattern is an iterable
348
+ elif isinstance(pattern, Iterable):
349
+ for element in pattern:
350
+
351
+ # Element is a SMARTS string or oechem.OESubSearch object
352
+ if isinstance(element, (str, oechem.OESubSearch, oechem.OEMCSSearch, oechem.OEQMol)):
353
+ ctx.add_callback(
354
+ create_structure_highlighter(
355
+ query=element,
356
+ color=color,
357
+ style=style
358
+ )
359
+ )
360
+
361
+ # Unknown element
362
+ else:
363
+ raise TypeError(f'Do not know how to add molecule highlight for type {type(element).__name__}')
364
+
365
+ # Case: Pattern is an unknown type
366
+ else:
367
+ raise TypeError(f'Do not know how to add molecule highlight for type {type(pattern).__name__}')
368
+
369
+ # ********************************************************************************
370
+ # Alignment
371
+ # ********************************************************************************
372
+
373
+ if ref is not None:
374
+ # Only apply alignment if align_depictions method is available
375
+ if hasattr(self, 'align_depictions'):
376
+ self.align_depictions(ref=ref, method=method)
377
+ else:
378
+ log.warning("align_depictions not available; ref parameter ignored")
379
+
380
+
381
+ def _series_reset_depictions(self) -> None:
382
+ """
383
+ Reset depiction callbacks for a molecule series.
384
+
385
+ This clears any highlight callbacks that have been added to the series metadata.
386
+ """
387
+ # Clear the cnotebook context from metadata
388
+ _ = self.metadata.pop("cnotebook", None)
389
+
390
+
391
+ def _series_clear_formatting_rules(self) -> None:
392
+ """
393
+ Clear all formatting rule callbacks from a molecule series.
394
+
395
+ This removes any callbacks applied to the molecule prior to rendering,
396
+ such as highlighting. Unlike reset_depictions which removes the entire
397
+ rendering context, this method only clears the callbacks while preserving
398
+ other context settings like image dimensions and styling.
399
+ """
400
+ ctx = self.metadata.get("cnotebook", None)
401
+ if ctx is not None and isinstance(ctx, CNotebookContext):
402
+ ctx.reset_callbacks()
403
+
404
+
405
+ def _series_recalculate_depiction_coordinates(
406
+ self,
407
+ *,
408
+ clear_coords: bool = True,
409
+ add_depiction_hydrogens: bool = True,
410
+ perceive_bond_stereo: bool = True,
411
+ suppress_explicit_hydrogens: bool = True,
412
+ orientation: int = oedepict.OEDepictOrientation_Default
413
+ ) -> None:
414
+ """
415
+ Recalculate the depictions for a molecule series.
416
+
417
+ See the following link for more information:
418
+ https://docs.eyesopen.com/toolkits/python/depicttk/OEDepictClasses/OEPrepareDepictionOptions.html
419
+
420
+ :param clear_coords: Clear existing 2D coordinates
421
+ :param add_depiction_hydrogens: Add explicit depiction hydrogens for faithful stereo depiction, etc.
422
+ :param perceive_bond_stereo: Perceive wedge/hash bond stereo
423
+ :param suppress_explicit_hydrogens: Suppress explicit hydrogens
424
+ :param orientation: Preferred 2D orientation
425
+ """
426
+ if not isinstance(self._series.dtype, oeplr.MoleculeType):
427
+ raise TypeError(
428
+ "recalculate_depiction_coordinates only works on molecule columns (oepolars.MoleculeType). If this "
429
+ "column has molecules, use series.chem.as_molecule() to convert to a molecule column first."
430
+ )
431
+
432
+ # Create the depiction options
433
+ opts = oedepict.OEPrepareDepictionOptions()
434
+ opts.SetClearCoords(clear_coords)
435
+ opts.SetAddDepictionHydrogens(add_depiction_hydrogens)
436
+ opts.SetPerceiveBondStereo(perceive_bond_stereo)
437
+ opts.SetSuppressHydrogens(suppress_explicit_hydrogens)
438
+ opts.SetDepictOrientation(orientation)
439
+
440
+ for mol in self._series.to_list():
441
+ if isinstance(mol, oechem.OEMolBase):
442
+ oedepict.OEPrepareDepiction(mol, opts)
443
+
444
+
445
+ def _series_align_depictions(
446
+ self,
447
+ ref: oechem.OESubSearch | oechem.OEMCSSearch | oechem.OEMolBase | oechem.OEQMol | Literal["first"],
448
+ method: Literal["substructure", "ss", "mcss", "fp", "fingerprint"] | None = None,
449
+ **kwargs
450
+ ) -> None:
451
+ """
452
+ Align the 2D coordinates of molecules in a series.
453
+
454
+ :param ref: Alignment reference (molecule, "first", or search object)
455
+ :param method: Alignment method
456
+ :param kwargs: Keyword arguments for aligner
457
+ """
458
+ if not isinstance(self._series.dtype, oeplr.MoleculeType):
459
+ raise TypeError(
460
+ "align_depictions only works on molecule columns (oepolars.MoleculeType). If this "
461
+ "column has molecules, use series.chem.as_molecule() to convert to a molecule column first."
462
+ )
463
+
464
+ # Get molecule list from series
465
+ mols = self._series.to_list()
466
+
467
+ # Handle "first" reference
468
+ if isinstance(ref, str) and ref == "first":
469
+ for mol in mols:
470
+ if mol is not None and mol.IsValid():
471
+ ref = mol.CreateCopy()
472
+ break
473
+ else:
474
+ log.warning("No valid molecule found in series for depiction alignment")
475
+ return
476
+
477
+ # Make sure the reference has 2D coordinates
478
+ oedepict.OEPrepareDepiction(ref, False)
479
+
480
+ # Suppress alignment warnings (there are lots of needless warnings)
481
+ level = oechem.OEThrow.GetLevel()
482
+ oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Error)
483
+
484
+ # noinspection PyBroadException
485
+ try:
486
+ # Create the aligner
487
+ from .align import create_aligner
488
+ aligner = create_aligner(ref=ref, method=method, **kwargs)
489
+
490
+ for mol in mols:
491
+ if mol is not None:
492
+ _ = aligner(mol)
493
+
494
+ except Exception:
495
+ # We don't care if the aligners fail - it just results in unaligned structures (NBD)
496
+ pass
497
+
498
+ # Restore OEThrow level
499
+ oechem.OEThrow.SetLevel(level)
500
+
501
+
502
+ # Monkey-patch onto oepolars SeriesChemNamespace
503
+ # Note: Series-level highlight is not registered because Polars Series are ephemeral and
504
+ # metadata doesn't persist across column accesses. Use df.chem.highlight() instead.
505
+ from oepolars.namespaces.series import SeriesChemNamespace
506
+ SeriesChemNamespace.reset_depictions = _series_reset_depictions
507
+ SeriesChemNamespace.clear_formatting_rules = _series_clear_formatting_rules
508
+ SeriesChemNamespace.recalculate_depiction_coordinates = _series_recalculate_depiction_coordinates
509
+ SeriesChemNamespace.align_depictions = _series_align_depictions
510
+
511
+
512
+ ########################################################################################################################
513
+ # DataFrame accessor methods (monkey-patched onto oepolars)
514
+ ########################################################################################################################
515
+
516
+ # Regular expression for splitting SMARTS patterns
517
+ import re
518
+ SMARTS_DELIMITER_RE = re.compile(r'\s*[|\r\n\t]+\s*')
519
+
520
+ # Store the fingerprint tag for fingerprint_similarity
521
+ _fingerprint_overlap_tag = oechem.OEGetTag("fingerprint_overlap")
522
+
523
+
524
+ class ColorBondByOverlapScore(oegrapheme.OEBondGlyphBase):
525
+ """Bond glyph that colors bonds by fingerprint overlap score.
526
+
527
+ Used internally by fingerprint similarity visualization to highlight
528
+ bonds based on their contribution to molecular similarity.
529
+
530
+ See: https://docs.eyesopen.com/toolkits/cookbook/python/depiction/simcalc.html
531
+ """
532
+
533
+ def __init__(self, cg: oechem.OELinearColorGradient, tag: int):
534
+ """Create a bond coloring glyph.
535
+
536
+ :param cg: Color gradient to map overlap scores to colors.
537
+ :param tag: OEChem data tag containing overlap scores on bonds.
538
+ """
539
+ oegrapheme.OEBondGlyphBase.__init__(self)
540
+ self.colorg = cg
541
+ self.tag = tag
542
+
543
+ # noinspection PyPep8Naming
544
+ def RenderGlyph(self, disp, bond):
545
+
546
+ bdisp = disp.GetBondDisplay(bond)
547
+ if bdisp is None or not bdisp.IsVisible():
548
+ return False
549
+
550
+ if not bond.HasData(self.tag):
551
+ return False
552
+
553
+ linewidth = disp.GetScale() / 3.0
554
+ color = self.colorg.GetColorAt(bond.GetData(self.tag))
555
+ pen = oedepict.OEPen(color, color, oedepict.OEFill_Off, linewidth)
556
+
557
+ adispB = disp.GetAtomDisplay(bond.GetBgn())
558
+ adispE = disp.GetAtomDisplay(bond.GetEnd())
559
+
560
+ layer = disp.GetLayer(oedepict.OELayerPosition_Below)
561
+ layer.DrawLine(adispB.GetCoords(), adispE.GetCoords(), pen)
562
+
563
+ return True
564
+
565
+ # noinspection PyPep8Naming
566
+ def ColorBondByOverlapScore(self):
567
+ return ColorBondByOverlapScore(self.colorg, self.tag).__disown__()
568
+
569
+
570
+ def _dataframe_reset_depictions(self, *, molecule_columns: str | Iterable[str] | None = None) -> None:
571
+ """
572
+ Reset depiction callbacks for one or more molecule columns in the DataFrame.
573
+
574
+ :param molecule_columns: Optional molecule column(s) to reset. If None, resets all molecule columns.
575
+ """
576
+ columns = set()
577
+ if molecule_columns is None:
578
+ columns.update(self._df.columns)
579
+
580
+ elif isinstance(molecule_columns, str):
581
+ columns.add(molecule_columns)
582
+
583
+ else:
584
+ columns.update(molecule_columns)
585
+
586
+ # Filter invalid and non-molecule columns
587
+ for col in filter(
588
+ lambda c: c in self._df.columns and isinstance(self._df.schema[c], oeplr.MoleculeType),
589
+ columns
590
+ ):
591
+ self._df.get_column(col).chem.reset_depictions()
592
+
593
+
594
+ def _dataframe_clear_formatting_rules(self, molecule_columns: str | Iterable[str] | None = None) -> None:
595
+ """
596
+ Clear all formatting rule callbacks from one or more molecule columns.
597
+
598
+ This removes any callbacks applied to molecules prior to rendering,
599
+ such as highlighting. Unlike reset_depictions which removes the entire
600
+ rendering context, this method only clears the callbacks while preserving
601
+ other context settings like image dimensions and styling.
602
+
603
+ :param molecule_columns: Optional molecule column(s) to clear formatting rules from.
604
+ If None, clears formatting rules from all molecule columns.
605
+
606
+ Example::
607
+
608
+ # Clear formatting rules from all molecule columns
609
+ df.chem.clear_formatting_rules()
610
+
611
+ # Clear formatting rules from a specific column
612
+ df.chem.clear_formatting_rules("smiles")
613
+
614
+ # Clear formatting rules from multiple columns
615
+ df.chem.clear_formatting_rules(["mol1", "mol2"])
616
+ """
617
+ columns = set()
618
+ if molecule_columns is None:
619
+ columns.update(self._df.columns)
620
+
621
+ elif isinstance(molecule_columns, str):
622
+ columns.add(molecule_columns)
623
+
624
+ else:
625
+ columns.update(molecule_columns)
626
+
627
+ # Filter invalid and non-molecule columns and clear their formatting rules
628
+ for col in filter(
629
+ lambda c: c in self._df.columns and isinstance(self._df.schema[c], oeplr.MoleculeType),
630
+ columns
631
+ ):
632
+ # Clear DataFrame-level column context callbacks
633
+ ctx = get_dataframe_column_context(self._df, col)
634
+ if ctx is not None:
635
+ ctx.reset_callbacks()
636
+
637
+
638
+ def _dataframe_recalculate_depiction_coordinates(
639
+ self,
640
+ *,
641
+ molecule_columns: str | Iterable[str] | None = None,
642
+ clear_coords: bool = True,
643
+ add_depiction_hydrogens: bool = True,
644
+ perceive_bond_stereo: bool = True,
645
+ suppress_explicit_hydrogens: bool = True,
646
+ orientation: int = oedepict.OEDepictOrientation_Default
647
+ ) -> None:
648
+ """
649
+ Recalculate the depictions for one or more molecule series in a DataFrame. If molecule_columns is None,
650
+ which is the default, then all molecule columns will have their depictions recalculated.
651
+
652
+ See the following link for more information:
653
+ https://docs.eyesopen.com/toolkits/python/depicttk/OEDepictClasses/OEPrepareDepictionOptions.html
654
+
655
+ :param molecule_columns: Optional molecule column(s) to have depictions recalculated
656
+ :param clear_coords: Clear existing 2D coordinates
657
+ :param add_depiction_hydrogens: Add explicit depiction hydrogens for faithful stereo depiction, etc.
658
+ :param perceive_bond_stereo: Perceive wedge/hash bond stereo
659
+ :param suppress_explicit_hydrogens: Suppress explicit hydrogens
660
+ :param orientation: Preferred 2D orientation
661
+ """
662
+ if molecule_columns is None:
663
+ molecule_columns = set()
664
+
665
+ for col in self._df.columns:
666
+ if isinstance(self._df.schema[col], oeplr.MoleculeType):
667
+ molecule_columns.add(col)
668
+
669
+ elif isinstance(molecule_columns, str):
670
+ molecule_columns = {molecule_columns}
671
+
672
+ else:
673
+ molecule_columns = set(molecule_columns)
674
+
675
+ # Recalculate the column depictions
676
+ for col in molecule_columns:
677
+
678
+ if col in self._df.columns:
679
+ if isinstance(self._df.schema[col], oeplr.MoleculeType):
680
+ self._df.get_column(col).chem.recalculate_depiction_coordinates(
681
+ clear_coords=clear_coords,
682
+ add_depiction_hydrogens=add_depiction_hydrogens,
683
+ perceive_bond_stereo=perceive_bond_stereo,
684
+ suppress_explicit_hydrogens=suppress_explicit_hydrogens,
685
+ orientation=orientation
686
+ )
687
+
688
+ else:
689
+ log.warning(f'Column {col} does not have a MoleculeType')
690
+
691
+ else:
692
+ log.warning(f'{col} not found in DataFrame columns: ({", ".join(self._df.columns)})')
693
+
694
+
695
+ def _dataframe_highlight(
696
+ self,
697
+ molecule_column: str,
698
+ pattern: Iterable[str] | str | oechem.OESubSearch | Iterable[oechem.OESubSearch],
699
+ *,
700
+ color: oechem.OEColor | oechem.OEColorIter | None = None,
701
+ style: int | Literal["overlay_default", "overlay_ball_and_stick"] = "overlay_default",
702
+ ) -> None:
703
+ """
704
+ Highlight chemical features in molecules within a specified column.
705
+
706
+ This method stores the highlighting callbacks at the DataFrame level, ensuring they persist
707
+ across column accesses. This is necessary because Polars Series objects are ephemeral.
708
+
709
+ The pattern argument can be:
710
+ - SMARTS pattern
711
+ - oechem.OESubSearch or oechem.OEMCSSearch object
712
+ - Iterable of SMARTS patterns, oechem.OESubSearch, and/or oechem.OEMCSSearch objects
713
+
714
+ :param molecule_column: Name of the molecule column to highlight.
715
+ :param pattern: Pattern(s) to highlight in the molecules.
716
+ :param color: Highlight color(s). Can be a single oechem.OEColor or an oechem.OEColorIter
717
+ (e.g., oechem.OEGetLightColors()). Defaults to oechem.OEGetLightColors().
718
+ :param style: Highlight style. Can be an int (OEHighlightStyle constant) or a string
719
+ ("overlay_default", "overlay_ball_and_stick"). Defaults to "overlay_default".
720
+ """
721
+ # Check the column exists and is a molecule type
722
+ if molecule_column not in self._df.columns:
723
+ raise ValueError(f'Column {molecule_column} not found in DataFrame columns: ({", ".join(self._df.columns)})')
724
+
725
+ if not isinstance(self._df.schema[molecule_column], oeplr.MoleculeType):
726
+ raise TypeError(
727
+ f"highlight only works on molecule columns (oepolars.MoleculeType). Column '{molecule_column}' "
728
+ f"has type {self._df.schema[molecule_column]}."
729
+ )
730
+
731
+ # Get or create the context for this DataFrame column
732
+ ctx = get_dataframe_column_context(self._df, molecule_column)
733
+ if ctx is None:
734
+ ctx = create_local_context()
735
+ set_dataframe_column_context(self._df, molecule_column, ctx)
736
+
737
+ # Case: Pattern is a single SMARTS string or oechem.OESubSearch object
738
+ if isinstance(pattern, (str, oechem.OESubSearch, oechem.OEMCSSearch, oechem.OEQMol)):
739
+ ctx.add_callback(create_structure_highlighter(pattern, color=color, style=style))
740
+
741
+ # Case: Pattern is an iterable of SMARTS strings and/or oechem.OESubSearch objects
742
+ elif isinstance(pattern, Iterable):
743
+ for p in pattern:
744
+ ctx.add_callback(create_structure_highlighter(p, color=color, style=style))
745
+
746
+ else:
747
+ raise TypeError(f'Unsupported type for pattern: {type(pattern).__name__}')
748
+
749
+
750
+ def _dataframe_copy_molecules(
751
+ self,
752
+ source_column: str,
753
+ dest_column: str,
754
+ ) -> pl.DataFrame:
755
+ """
756
+ Create a deep copy of molecules from one column to a new column.
757
+
758
+ This creates independent copies of all molecules, allowing modifications
759
+ (such as highlighting or alignment) to the new column without affecting
760
+ the original.
761
+
762
+ :param source_column: Name of the source molecule column.
763
+ :param dest_column: Name of the new column to create with copied molecules.
764
+ :returns: New DataFrame with the molecule column added.
765
+
766
+ Example::
767
+
768
+ # Create a copy of molecules for alignment
769
+ df = df.chem.copy_molecules("Original", "Aligned")
770
+ df.chem.highlight("Aligned", "c1ccccc1")
771
+ """
772
+ if source_column not in self._df.columns:
773
+ raise ValueError(f'Column {source_column} not found in DataFrame columns: ({", ".join(self._df.columns)})')
774
+
775
+ if not isinstance(self._df.schema[source_column], oeplr.MoleculeType):
776
+ raise TypeError(
777
+ f"copy_molecules only works on molecule columns (oepolars.MoleculeType). Column '{source_column}' "
778
+ f"has type {self._df.schema[source_column]}."
779
+ )
780
+
781
+ # Use the series-level copy_molecules (or deepcopy) and add as a new column
782
+ copied_series = self._df.get_column(source_column).chem.copy_molecules()
783
+ return self._df.with_columns(copied_series.alias(dest_column))
784
+
785
+
786
+ def _dataframe_highlight_using_column(
787
+ self,
788
+ molecule_column: str,
789
+ pattern_column: str,
790
+ *,
791
+ highlighted_column: str = "highlighted_substructures",
792
+ color: oechem.OEColor | oechem.OEColorIter | None = None,
793
+ style: int | Literal["overlay_default", "overlay_ball_and_stick"] = "overlay_default",
794
+ ref: oechem.OESubSearch | oechem.OEMCSSearch | oechem.OEMolBase | None = None,
795
+ alignment_opts: oedepict.OEAlignmentOptions | None = None,
796
+ prepare_opts: oedepict.OEPrepareDepictionOptions | None = None,
797
+ inplace: bool = False
798
+ ) -> pl.DataFrame:
799
+ """
800
+ Highlight molecules based on the value of another column. The column produced is a DisplayType column, so
801
+ the results are not suitable for other molecular calculations.
802
+
803
+ The other column can contain:
804
+ - Comma or whitespace delimited string of SMARTS patterns
805
+ - oechem.OESubSearch or oechem.OEMCSSearch object
806
+ - Iterable of SMARTS patterns, oechem.OESubSearch, and/or oechem.OEMCSSearch objects
807
+
808
+ :param molecule_column: Name of the molecule column.
809
+ :param pattern_column: Name of the pattern column.
810
+ :param highlighted_column: Optional name of the column with highlighted structures.
811
+ :param color: Highlight color(s). Can be a single oechem.OEColor or an oechem.OEColorIter
812
+ (e.g., oechem.OEGetLightColors()). Defaults to oechem.OEGetLightColors().
813
+ :param style: Highlight style. Can be an int (OEHighlightStyle constant) or a string
814
+ ("overlay_default", "overlay_ball_and_stick"). Defaults to "overlay_default".
815
+ :param ref: Optional reference for aligning depictions.
816
+ :param alignment_opts: Optional depiction alignment options (oedepict.OEAlignmentOptions).
817
+ :param prepare_opts: Optional depiction preparation options (oedepict.OEPrepareDepictionOptions).
818
+ :param inplace: If True, returns the modified DataFrame (note: Polars DataFrames are immutable).
819
+ :returns: Modified DataFrame with highlighted column.
820
+ """
821
+ df = self._df
822
+
823
+ if molecule_column not in df.columns:
824
+ raise KeyError(f'{molecule_column} not found in DataFrame columns: ({", ".join(df.columns)})')
825
+
826
+ if not isinstance(df.schema[molecule_column], oeplr.MoleculeType):
827
+ raise TypeError(
828
+ f"highlight_using_column only works on molecule columns (oepolars.MoleculeType). If {molecule_column}"
829
+ " has molecules, use df.chem.as_molecule() to convert to a molecule column first."
830
+ )
831
+
832
+ if pattern_column not in df.columns:
833
+ raise KeyError(f'{pattern_column} not found in DataFrame columns: ({", ".join(df.columns)})')
834
+
835
+ # Default color
836
+ if color is None:
837
+ color = oechem.OEGetLightColors()
838
+
839
+ # Determine highlighting approach based on style
840
+ use_overlay = isinstance(style, str) and style in ("overlay_default", "overlay_ball_and_stick")
841
+
842
+ # Check if color is compatible with overlay
843
+ if use_overlay and isinstance(color, oechem.OEColor):
844
+ log.warning(
845
+ "Overlay coloring is not compatible with a single oechem.OEColor. Falling back to standard highlighting")
846
+ use_overlay = False
847
+ style = oedepict.OEHighlightStyle_BallAndStick
848
+
849
+ # Create the display objects
850
+ displays = []
851
+
852
+ # Get the rendering context for creating the displays
853
+ series = df.get_column(molecule_column)
854
+ metadata = series.chem.metadata if hasattr(series, 'chem') else {}
855
+ ctx = get_series_context(metadata)
856
+
857
+ for row_idx in range(len(df)):
858
+ mol = df[molecule_column][row_idx]
859
+ patterns = df[pattern_column][row_idx]
860
+
861
+ if isinstance(mol, oechem.OEMolBase) and mol.IsValid():
862
+
863
+ # Create the display
864
+ disp = oemol_to_disp(mol, ctx=ctx)
865
+
866
+ # Highlight
867
+ substructures = []
868
+
869
+ # Parse different patterns
870
+ if isinstance(patterns, str):
871
+ for pattern in re.split(SMARTS_DELIMITER_RE, patterns):
872
+ ss = oechem.OESubSearch(pattern)
873
+ if ss.IsValid():
874
+ substructures.append(ss)
875
+
876
+ elif isinstance(patterns, oechem.OESubSearch):
877
+ if patterns.IsValid():
878
+ substructures.append(patterns)
879
+
880
+ elif isinstance(patterns, Iterable):
881
+
882
+ for p in patterns:
883
+
884
+ if isinstance(p, str):
885
+ for pattern in re.split(SMARTS_DELIMITER_RE, p):
886
+ ss = oechem.OESubSearch(pattern)
887
+ if ss.IsValid():
888
+ substructures.append(ss)
889
+
890
+ elif isinstance(p, oechem.OESubSearch):
891
+ if p.IsValid():
892
+ substructures.append(p)
893
+
894
+ else:
895
+ log.warning(f'Do not know how to highlight using: {type(p).__name__}')
896
+
897
+ elif patterns is not None:
898
+ log.warning(f'Do not know how to highlight using: {type(patterns).__name__}')
899
+
900
+ # Overlay highlighting
901
+ if use_overlay:
902
+ highlight = oedepict.OEHighlightOverlayByBallAndStick(color)
903
+ for ss in substructures:
904
+ oedepict.OEAddHighlightOverlay(disp, highlight, ss.Match(mol, True))
905
+
906
+ else:
907
+ # Traditional highlighting
908
+ if isinstance(color, oechem.OEColor):
909
+ highlight_color = color
910
+ else:
911
+ highlight_color = oechem.OELightBlue
912
+ for c in color:
913
+ highlight_color = c
914
+ break
915
+ for ss in substructures:
916
+ for match in ss.Match(mol, True):
917
+ oedepict.OEAddHighlighting(disp, highlight_color, style, match)
918
+
919
+ displays.append(disp)
920
+
921
+ else:
922
+ displays.append(None)
923
+
924
+ # Create the new column with DisplayType (must instantiate the type)
925
+ display_series = pl.Series(highlighted_column, displays, dtype=oeplr.DisplayType())
926
+
927
+ # Add the column to the DataFrame
928
+ result = df.with_columns(display_series)
929
+
930
+ return result
931
+
932
+
933
+ def _dataframe_fingerprint_similarity(
934
+ self,
935
+ molecule_column: str,
936
+ ref: oechem.OEMolBase | None = None,
937
+ *,
938
+ tanimoto_column: str = "fingerprint_tanimoto",
939
+ reference_similarity_column: str = "reference_similarity",
940
+ target_similarity_column: str = "target_similarity",
941
+ fptype: str = "tree",
942
+ num_bits: int = 4096,
943
+ min_distance: int = 0,
944
+ max_distance: int = 4,
945
+ atom_type: str | int = oegraphsim.OEFPAtomType_DefaultTreeAtom,
946
+ bond_type: str | int = oegraphsim.OEFPBondType_DefaultTreeBond,
947
+ inplace: bool = False
948
+ ) -> pl.DataFrame:
949
+ """
950
+ Color molecules by fingerprint similarity.
951
+
952
+ :param molecule_column: Name of the molecule column
953
+ :param ref: Reference molecule (if None, uses first valid molecule)
954
+ :param tanimoto_column: Name of the tanimoto score column
955
+ :param reference_similarity_column: Name of the reference display column
956
+ :param target_similarity_column: Name of the target display column
957
+ :param fptype: Fingerprint type
958
+ :param num_bits: Number of bits in the fingerprint
959
+ :param min_distance: Minimum distance/radius for path/circular/tree
960
+ :param max_distance: Maximum distance/radius for path/circular/tree
961
+ :param atom_type: Atom type bitmask
962
+ :param bond_type: Bond type bitmask
963
+ :param inplace: Not used (Polars DataFrames are immutable), kept for API compatibility
964
+ :return: DataFrame with similarity columns
965
+ """
966
+ tag = _fingerprint_overlap_tag
967
+ df = self._df
968
+
969
+ if molecule_column not in df.columns:
970
+ raise KeyError(f'Molecule column not found in DataFrame: {molecule_column}')
971
+
972
+ if not isinstance(df.schema[molecule_column], oeplr.MoleculeType):
973
+ raise TypeError(
974
+ f"Column {molecule_column} does not have MoleculeType ({df.schema[molecule_column]})"
975
+ )
976
+
977
+ # Get the context for rendering
978
+ series = df.get_column(molecule_column)
979
+ metadata = series.chem.metadata if hasattr(series, 'chem') else {}
980
+ ctx = get_series_context(metadata)
981
+
982
+ # Get molecule list
983
+ mols = series.to_list()
984
+
985
+ # If we're using the first molecule as our reference
986
+ if ref is None:
987
+ for mol in mols:
988
+ if mol is not None and mol.IsValid():
989
+ ref = mol
990
+ break
991
+ else:
992
+ log.warning(f'No valid reference molecules to use for alignment in column {molecule_column}')
993
+ return df
994
+
995
+ # Check reference molecule
996
+ if not ref.IsValid():
997
+ log.warning("Reference molecule is not valid")
998
+ return df
999
+
1000
+ # Fingerprint maker
1001
+ make_fp = fingerprint_maker(
1002
+ fptype=fptype,
1003
+ num_bits=num_bits,
1004
+ min_distance=min_distance,
1005
+ max_distance=max_distance,
1006
+ atom_type=atom_type,
1007
+ bond_type=bond_type
1008
+ )
1009
+
1010
+ # Make the reference fingerprint
1011
+ ref_fp = make_fp(ref)
1012
+
1013
+ if not ref_fp.IsValid():
1014
+ log.warning("Fingerprint from reference molecule is invalid")
1015
+ return df
1016
+
1017
+ # Create the display objects and scores
1018
+ ref_displays = []
1019
+ targ_displays = []
1020
+ ref_molecules = [] # Cache to prevent GC
1021
+ targ_molecules = [] # Cache to prevent GC
1022
+ tanimotos = []
1023
+
1024
+ for mol in mols:
1025
+ if mol is not None and mol.IsValid():
1026
+
1027
+ # Copy the molecules, because we're modifying them
1028
+ targ_mol = oechem.OEMol(mol)
1029
+ ref_mol = oechem.OEMol(ref)
1030
+
1031
+ # Cache molecules to prevent GC
1032
+ targ_molecules.append(targ_mol)
1033
+ ref_molecules.append(ref_mol)
1034
+
1035
+ # Create the fingerprint
1036
+ targ_fp = make_fp(targ_mol)
1037
+ if targ_fp.IsValid():
1038
+
1039
+ # Add the tanimoto
1040
+ tanimotos.append(oegraphsim.OETanimoto(ref_fp, targ_fp))
1041
+
1042
+ # Calculate the similarity
1043
+ targ_bonds = oechem.OEUIntArray(targ_mol.GetMaxBondIdx())
1044
+ ref_bonds = oechem.OEUIntArray(ref_mol.GetMaxBondIdx())
1045
+
1046
+ # Overlaps
1047
+ overlaps = oegraphsim.OEGetFPOverlap(ref_mol, targ_mol, ref_fp.GetFPTypeBase())
1048
+
1049
+ for match in overlaps:
1050
+ for bond in match.GetPatternBonds():
1051
+ ref_bonds[bond.GetIdx()] += 1
1052
+ for bond in match.GetTargetBonds():
1053
+ targ_bonds[bond.GetIdx()] += 1
1054
+
1055
+ for bond in targ_mol.GetBonds():
1056
+ bond.SetData(tag, targ_bonds[bond.GetIdx()])
1057
+
1058
+ for bond in ref_mol.GetBonds():
1059
+ bond.SetData(tag, ref_bonds[bond.GetIdx()])
1060
+
1061
+ # noinspection PyTypeChecker
1062
+ maxvalue = max((0, max(targ_bonds), max(ref_bonds)))
1063
+
1064
+ # Create the color gradient
1065
+ colorg = oechem.OELinearColorGradient()
1066
+ colorg.AddStop(oechem.OEColorStop(0.0, oechem.OEPinkTint))
1067
+ colorg.AddStop(oechem.OEColorStop(1.0, oechem.OEYellow))
1068
+ colorg.AddStop(oechem.OEColorStop(maxvalue, oechem.OEDarkGreen))
1069
+
1070
+ # Function that will color the bonds
1071
+ bondglyph = ColorBondByOverlapScore(colorg, tag)
1072
+
1073
+ # Align the molecules
1074
+ overlaps = oegraphsim.OEGetFPOverlap(ref_mol, targ_mol, ref_fp.GetFPTypeBase())
1075
+ oedepict.OEPrepareMultiAlignedDepiction(targ_mol, ref_mol, overlaps)
1076
+
1077
+ # Create the displays
1078
+ ref_disp = oemol_to_disp(ref_mol, ctx=ctx)
1079
+ targ_disp = oemol_to_disp(targ_mol, ctx=ctx)
1080
+
1081
+ # Color the displays
1082
+ oegrapheme.OEAddGlyph(ref_disp, bondglyph, oechem.IsTrueBond())
1083
+ oegrapheme.OEAddGlyph(targ_disp, bondglyph, oechem.IsTrueBond())
1084
+
1085
+ ref_displays.append(ref_disp)
1086
+ targ_displays.append(targ_disp)
1087
+
1088
+ # Fingerprint was invalid
1089
+ else:
1090
+ tanimotos.append(None)
1091
+ ref_displays.append(None)
1092
+ targ_displays.append(None)
1093
+
1094
+ # Molecule was invalid
1095
+ else:
1096
+ tanimotos.append(None)
1097
+ ref_displays.append(None)
1098
+ targ_displays.append(None)
1099
+
1100
+ # Create the columns
1101
+ tanimoto_series = pl.Series(tanimoto_column, tanimotos, dtype=pl.Float64)
1102
+ ref_series = pl.Series(reference_similarity_column, ref_displays, dtype=oeplr.DisplayType())
1103
+ targ_series = pl.Series(target_similarity_column, targ_displays, dtype=oeplr.DisplayType())
1104
+
1105
+ # Store molecule references in metadata to prevent GC (same as pandas version)
1106
+ ref_series.chem.metadata["molecules"] = ref_molecules # noqa
1107
+ targ_series.chem.metadata["molecules"] = targ_molecules # noqa
1108
+
1109
+ # Add the columns to the DataFrame
1110
+ result = df.with_columns([tanimoto_series, ref_series, targ_series])
1111
+
1112
+ return result
1113
+
1114
+
1115
+ # Monkey-patch onto oepolars DataFrameChemNamespace
1116
+ from oepolars.namespaces.dataframe import DataFrameChemNamespace
1117
+ DataFrameChemNamespace.reset_depictions = _dataframe_reset_depictions
1118
+ DataFrameChemNamespace.clear_formatting_rules = _dataframe_clear_formatting_rules
1119
+ DataFrameChemNamespace.recalculate_depiction_coordinates = _dataframe_recalculate_depiction_coordinates
1120
+ DataFrameChemNamespace.highlight = _dataframe_highlight
1121
+ DataFrameChemNamespace.highlight_using_column = _dataframe_highlight_using_column
1122
+ DataFrameChemNamespace.fingerprint_similarity = _dataframe_fingerprint_similarity
1123
+ DataFrameChemNamespace.copy_molecules = _dataframe_copy_molecules
1124
+
1125
+
1126
+ ########################################################################################################################
1127
+ # MolGrid accessor methods for Series and DataFrame
1128
+ ########################################################################################################################
1129
+
1130
+
1131
+ def _polars_series_molgrid(
1132
+ self,
1133
+ title: bool | str | None = True,
1134
+ tooltip_fields: list[str] | None = None,
1135
+ **kwargs
1136
+ ) -> "MolGrid":
1137
+ """Display molecules in an interactive grid.
1138
+
1139
+ :param title: Title display mode. True uses molecule's title, a string
1140
+ specifies a field name, None/False hides titles.
1141
+ :param tooltip_fields: Fields for tooltip.
1142
+ :param kwargs: Additional arguments passed to MolGrid.
1143
+ :returns: MolGrid instance.
1144
+ """
1145
+ from cnotebook import MolGrid
1146
+
1147
+ series = self._series
1148
+ mols = list(series.to_list())
1149
+
1150
+ return MolGrid(
1151
+ mols,
1152
+ title=title,
1153
+ tooltip_fields=tooltip_fields,
1154
+ **kwargs
1155
+ )
1156
+
1157
+
1158
+ def _polars_dataframe_molgrid(
1159
+ self,
1160
+ mol_col: str,
1161
+ title: bool | str | None = True,
1162
+ tooltip_fields: list[str] | None = None,
1163
+ **kwargs
1164
+ ) -> "MolGrid":
1165
+ """Display molecules from a column in an interactive grid.
1166
+
1167
+ :param mol_col: Column containing molecules.
1168
+ :param title: Title display mode. True uses molecule's title, a string
1169
+ specifies a field name, None/False hides titles.
1170
+ :param tooltip_fields: Columns for tooltip.
1171
+ :param kwargs: Additional arguments passed to MolGrid.
1172
+ :returns: MolGrid instance.
1173
+ """
1174
+ from cnotebook import MolGrid
1175
+ import pandas as pd
1176
+
1177
+ df = self._df
1178
+ mols = list(df[mol_col].to_list())
1179
+
1180
+ # Build pandas DataFrame from non-molecule columns for MolGrid data access
1181
+ # We extract only primitive columns to avoid pyarrow dependency issues
1182
+ pdf_data = {}
1183
+ for col in df.columns:
1184
+ if col != mol_col:
1185
+ # Extract column values as Python objects
1186
+ pdf_data[col] = df[col].to_list()
1187
+
1188
+ pdf = pd.DataFrame(pdf_data)
1189
+
1190
+ return MolGrid(
1191
+ mols,
1192
+ dataframe=pdf,
1193
+ mol_col=mol_col,
1194
+ title=title,
1195
+ tooltip_fields=tooltip_fields,
1196
+ **kwargs
1197
+ )
1198
+
1199
+
1200
+ # Attach molgrid methods to accessors
1201
+ SeriesChemNamespace.molgrid = _polars_series_molgrid
1202
+ DataFrameChemNamespace.molgrid = _polars_dataframe_molgrid
1203
+
1204
+
1205
+ ########################################################################################################################
1206
+ # Register Polars formatters
1207
+ ########################################################################################################################
1208
+
1209
+ if ipython_present:
1210
+
1211
+ def register_polars_formatters():
1212
+ """
1213
+ Register Polars DataFrame formatters for iPython/Jupyter display.
1214
+
1215
+ This registers render_polars_dataframe as the HTML formatter for
1216
+ Polars DataFrames in iPython environments.
1217
+
1218
+ Note: Calls to this function are idempotent.
1219
+ """
1220
+ ipython_instance = get_ipython()
1221
+
1222
+ if ipython_instance is not None:
1223
+ html_formatter = ipython_instance.display_formatter.formatters['text/html']
1224
+ try:
1225
+ formatter = html_formatter.lookup(pl.DataFrame)
1226
+ if formatter is not render_polars_dataframe:
1227
+ html_formatter.for_type(pl.DataFrame, render_polars_dataframe)
1228
+ except KeyError:
1229
+ html_formatter.for_type(pl.DataFrame, render_polars_dataframe)
1230
+ else:
1231
+ log.debug("[cnotebook] iPython installed but not in use - cannot register polars extension")
1232
+
1233
+ else:
1234
+
1235
+ # iPython is not present, so we do not register a Polars formatter
1236
+ def register_polars_formatters():
1237
+ pass