cnotebook 1.2.0__py3-none-any.whl → 2.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cnotebook/pandas_ext.py CHANGED
@@ -3,7 +3,6 @@ import logging
3
3
  import typing
4
4
  import pandas as pd
5
5
  import oepandas as oepd
6
- from pandas.api.extensions import register_dataframe_accessor, register_series_accessor
7
6
  from typing import Iterable, Any, Literal, Hashable
8
7
  from openeye import oechem, oedepict, oegraphsim, oegrapheme
9
8
  from copy import copy as shallow_copy
@@ -11,7 +10,7 @@ from .context import pass_cnotebook_context, get_series_context
11
10
  from .helpers import escape_brackets, create_structure_highlighter
12
11
  from .align import create_aligner, fingerprint_maker
13
12
  from .render import (
14
- CNotebookContext,
13
+ CNotebookContext, # noqa
15
14
  oemol_to_disp,
16
15
  oedisp_to_html,
17
16
  render_invalid_molecule,
@@ -109,6 +108,7 @@ def render_dataframe(
109
108
  df: pd.DataFrame,
110
109
  formatters: dict | None = None,
111
110
  col_space: dict[str, float | int] | None = None,
111
+ ctx: CNotebookContext | None = None,
112
112
  **kwargs
113
113
  ) -> str:
114
114
  """
@@ -116,6 +116,7 @@ def render_dataframe(
116
116
  :param df: DataFrame to render
117
117
  :param formatters: Custom formatters for displaying columns
118
118
  :param col_space: Custom column spacing
119
+ :param ctx: Local rendering context (optional)
119
120
  :param kwargs: Additional keyword arguments for DataFrame.to_html
120
121
  :return: HTML of rendered DataFrame
121
122
  """
@@ -170,15 +171,15 @@ def render_dataframe(
170
171
  assert isinstance(arr, oepd.MoleculeArray)
171
172
 
172
173
  # Get the cnotebook options for this column
173
- ctx = get_series_context(arr.metadata)
174
+ series_ctx = ctx if ctx is not None else get_series_context(arr.metadata)
174
175
 
175
- formatters[col] = create_mol_formatter(ctx=ctx)
176
+ formatters[col] = create_mol_formatter(ctx=series_ctx)
176
177
 
177
178
  # Record the column width
178
179
  if col in col_space:
179
180
  log.warning(f'Column spacing for {col} already defined by overwriting with molecule image width')
180
181
 
181
- col_space[col] = float(ctx.width)
182
+ col_space[col] = float(series_ctx.width)
182
183
 
183
184
  # ---------------------------------------------------
184
185
  # Display columns
@@ -202,9 +203,9 @@ def render_dataframe(
202
203
  assert isinstance(arr, oepd.DisplayArray)
203
204
 
204
205
  # Get column metadata
205
- ctx = get_series_context(arr.metadata)
206
+ series_ctx = ctx if ctx is not None else get_series_context(arr.metadata)
206
207
 
207
- formatters[col] = create_disp_formatter(ctx=ctx)
208
+ formatters[col] = create_disp_formatter(ctx=series_ctx)
208
209
 
209
210
  if len(arr) > 0:
210
211
  col_space[col] = max(disp.GetWidth() for disp in arr if isinstance(disp, oedepict.OE2DMolDisplay))
@@ -257,435 +258,570 @@ else:
257
258
 
258
259
 
259
260
  ########################################################################################################################
260
- # Series accessors
261
+ # CNotebook Series accessor extensions for OEPandas .chem accessor
261
262
  ########################################################################################################################
262
263
 
263
- @register_series_accessor("highlight")
264
- class SeriesHighlightAccessor:
265
- def __init__(self, pandas_obj: pd.Series):
266
- if not isinstance(pandas_obj.dtype, oepd.MoleculeDtype):
267
- raise TypeError(
268
- "subsearch only works on molecule columns (oepandas.MoleculeDtype). If this column has "
269
- "molecules, use pd.Series.as_molecule to convert to a molecule column first."
264
+ def _series_highlight(
265
+ self,
266
+ pattern: Iterable[str] | str | oechem.OESubSearch | Iterable[oechem.OESubSearch],
267
+ *,
268
+ color: oechem.OEColor | oechem.OEColorIter | None = None,
269
+ style: int | Literal["overlay_default", "overlay_ball_and_stick"] = "overlay_default",
270
+ ref: oechem.OESubSearch | oechem.OEMCSSearch | oechem.OEQMol | Literal["first"] | oechem.OEMolBase | None = None,
271
+ method: Literal["ss", "substructure", "mcss", "fp", "fingerprint"] | None = None
272
+ ) -> None:
273
+ """
274
+ Highlight chemical features in a structure.
275
+
276
+ The pattern argument can be:
277
+ - SMARTS pattern
278
+ - oechem.OESubSearch or oechem.OEMCSSearch object
279
+ - Iterable of SMARTS patterns, oechem.OESubSearch, and/or oechem.OEMCSSearch objects
280
+
281
+ :param pattern: Pattern(s) to highlight in the molecule.
282
+ :param color: Highlight color(s). Can be a single oechem.OEColor or an oechem.OEColorIter
283
+ (e.g., oechem.OEGetLightColors()). Defaults to oechem.OEGetLightColors().
284
+ :param style: Highlight style. Can be an int (OEHighlightStyle constant) or a string
285
+ ("overlay_default", "overlay_ball_and_stick"). Defaults to "overlay_default".
286
+ :param ref: Optional reference for alignment.
287
+ :param method: Optional alignment method.
288
+ """
289
+ if not isinstance(self._obj.dtype, oepd.MoleculeDtype):
290
+ raise TypeError(
291
+ "highlight only works on molecule columns (oepandas.MoleculeDtype). If this column has "
292
+ "molecules, use series.chem.as_molecule() to convert to a molecule column first."
293
+ )
294
+
295
+ # Get the molecule array
296
+ arr = self._obj.array
297
+ assert isinstance(arr, oepd.MoleculeArray)
298
+
299
+ # Get / create a series context and save it (because we are modifying it locally)
300
+ ctx = get_series_context(arr.metadata, save=True)
301
+
302
+ # ********************************************************************************
303
+ # Highlighting
304
+ # ********************************************************************************
305
+
306
+ # Case: Pattern is a single SMARTS string or oechem.OESubSearch object
307
+ if isinstance(pattern, (str, oechem.OESubSearch, oechem.OEMCSSearch, oechem.OEQMol)):
308
+ ctx.add_callback(
309
+ create_structure_highlighter(
310
+ query=pattern,
311
+ color=color,
312
+ style=style
270
313
  )
314
+ )
271
315
 
272
- self._obj = pandas_obj
273
-
274
- def __call__(
275
- self,
276
- pattern: Iterable[str] | str | oechem.OESubSearch | Iterable[oechem.OESubSearch],
277
- *,
278
- color: oechem.OEColor = oechem.OEColor(oechem.OELightBlue),
279
- style: int = oedepict.OEHighlightStyle_Stick,
280
- ref: oechem.OESubSearch | oechem.OEMCSSearch | oechem.OEQMol | Literal["first"] | oechem.OEMolBase | None = None, # noqa
281
- method: Literal["ss", "substructure", "mcss", "fp", "fingerprint"] | None = None
282
- ) -> None:
283
- """
284
- Highlight chemical features in a structure
316
+ # Case: Pattern is an iterable
317
+ elif isinstance(pattern, Iterable):
318
+ for element in pattern:
319
+
320
+ # Element is a SMARTS string or oechem.OESubSearch object
321
+ if isinstance(element, (str, oechem.OESubSearch, oechem.OEMCSSearch, oechem.OEQMol)):
322
+ ctx.add_callback(
323
+ create_structure_highlighter(
324
+ query=element,
325
+ color=color,
326
+ style=style
327
+ )
328
+ )
285
329
 
286
- The pattern argument can be:
287
- - SMARTS pattern
288
- - oechem.OESubSearch or oechem.OEMCSSearch object
289
- - Iterable of SMARTS patterns, oechem.OESubSearch, and/or oechem.OEMCSSearch objects
330
+ # Unknown element
331
+ else:
332
+ raise TypeError(f'Do not know how to add molecule highlight for type {type(element).__name__}')
290
333
 
291
- :param pattern: Pattern(s) to highlight in the molecule
292
- :param color: Highlight color
293
- :param style: Highlight style
294
- :return: Callback to highlight the pattern(s) in the molecule
295
- """
296
- # Get the molecule array
297
- # Direct assignment to help IDE understand this is a MoleculeArray
334
+ # Case: Pattern is an unknown type
335
+ else:
336
+ raise TypeError(f'Do not know how to add molecule highlight for type {type(pattern).__name__}')
337
+
338
+ # ********************************************************************************
339
+ # Alignment
340
+ # ********************************************************************************
341
+
342
+ if ref is not None:
343
+ self._obj.chem.align_depictions(ref=ref, method=method)
344
+
345
+
346
+ def _series_recalculate_depiction_coordinates(
347
+ self,
348
+ *,
349
+ clear_coords: bool = True,
350
+ add_depiction_hydrogens: bool = True,
351
+ perceive_bond_stereo: bool = True,
352
+ suppress_explicit_hydrogens: bool = True,
353
+ orientation: int = oedepict.OEDepictOrientation_Default
354
+ ) -> None:
355
+ """
356
+ Recalculate the depictions for a molecule series.
357
+
358
+ See the following link for more information:
359
+ https://docs.eyesopen.com/toolkits/python/depicttk/OEDepictClasses/OEPrepareDepictionOptions.html
360
+
361
+ :param clear_coords: Clear existing 2D coordinates
362
+ :param add_depiction_hydrogens: Add explicit depiction hydrogens for faithful stereo depiction, etc.
363
+ :param perceive_bond_stereo: Perceive wedge/hash bond stereo
364
+ :param suppress_explicit_hydrogens: Suppress explicit hydrogens
365
+ :param orientation: Preferred 2D orientation
366
+ """
367
+ if not isinstance(self._obj.dtype, oepd.MoleculeDtype):
368
+ raise TypeError(
369
+ "recalculate_depiction_coordinates only works on molecule columns (oepandas.MoleculeDtype). If this "
370
+ "column has molecules, use series.chem.as_molecule() to convert to a molecule column first."
371
+ )
372
+
373
+ # Create the depiction options
374
+ opts = oedepict.OEPrepareDepictionOptions()
375
+ opts.SetClearCoords(clear_coords)
376
+ opts.SetAddDepictionHydrogens(add_depiction_hydrogens)
377
+ opts.SetPerceiveBondStereo(perceive_bond_stereo)
378
+ opts.SetSuppressHydrogens(suppress_explicit_hydrogens)
379
+ opts.SetDepictOrientation(orientation)
380
+
381
+ for mol in self._obj.array:
382
+ if isinstance(mol, oechem.OEMolBase):
383
+ oedepict.OEPrepareDepiction(mol, opts)
384
+
385
+
386
+ def _series_reset_depictions(self) -> None:
387
+ """
388
+ Reset depiction callbacks for a molecule series
389
+ """
390
+ # Check if array has metadata attribute (should be true for oepandas arrays)
391
+ if hasattr(self._obj.array, "metadata"):
298
392
  arr = self._obj.array
299
393
  assert isinstance(arr, oepd.MoleculeArray)
394
+ _ = arr.metadata.pop("cnotebook", None)
300
395
 
301
- # Get / create a series context and save it (because we are modifying it locally)
302
- ctx = get_series_context(arr.metadata, save=True)
303
396
 
304
- # ********************************************************************************
305
- # Highlighting
306
- # ********************************************************************************
397
+ def _series_clear_formatting_rules(self) -> None:
398
+ """
399
+ Clear all formatting rule callbacks from a molecule series.
307
400
 
308
- # Case: Pattern is a single SMARTS string or oechem.OESubSearch object
309
- if isinstance(pattern, (str, oechem.OESubSearch, oechem.OEMCSSearch, oechem.OEQMol)):
310
- ctx.add_callback(
311
- create_structure_highlighter(
312
- query=pattern,
313
- color=color,
314
- style=style
315
- )
316
- )
401
+ This removes any callbacks applied to the molecule prior to rendering,
402
+ such as highlighting. Unlike reset_depictions which removes the entire
403
+ rendering context, this method only clears the callbacks while preserving
404
+ other context settings like image dimensions and styling.
405
+ """
406
+ if hasattr(self._obj.array, "metadata"):
407
+ arr = self._obj.array
408
+ assert isinstance(arr, oepd.MoleculeArray)
409
+ ctx = arr.metadata.get("cnotebook", None)
410
+ if ctx is not None and isinstance(ctx, CNotebookContext):
411
+ ctx.reset_callbacks()
317
412
 
318
- # Case: Pattern is an iterable
319
- elif isinstance(pattern, Iterable):
320
- for element in pattern:
321
-
322
- # Element is a SMARTS string or oechem.OESubSearch object
323
- if isinstance(element, (str, oechem.OESubSearch, oechem.OEMCSSearch, oechem.OEQMol)):
324
- ctx.add_callback(
325
- create_structure_highlighter(
326
- query=element,
327
- color=color,
328
- style=style
329
- )
330
- )
331
413
 
332
- # Unknown element
333
- else:
334
- raise TypeError(f'Do not know how to add molecule highlight for type {type(element).__name__}')
414
+ def _series_align_depictions(
415
+ self,
416
+ ref: oechem.OESubSearch | oechem.OEMCSSearch | oechem.OEMolBase | oechem.OEQMol | Literal["first"],
417
+ method: Literal["substructure", "ss", "mcss", "fp", "fingerprint"] | None = None,
418
+ **kwargs
419
+ ) -> None:
420
+ """
421
+ Align the 2D coordinates of molecules
422
+ :param ref: Alignment reference
423
+ :param method: Alignment method
424
+ :param kwargs: Keyword arguments for aligner
425
+ :return: Aligned molecule depictions
426
+ """
427
+ if not isinstance(self._obj.dtype, oepd.MoleculeDtype):
428
+ raise TypeError(
429
+ "align_depictions only works on molecule columns (oepandas.MoleculeDtype). If this "
430
+ "column has molecules, use series.chem.as_molecule() to convert to a molecule column first."
431
+ )
335
432
 
336
- # Case: Pattern is an unknown type
433
+ # Get the rendering context for creating the displays
434
+ arr = self._obj.array
435
+ assert isinstance(arr, oepd.MoleculeArray)
436
+
437
+ if isinstance(ref, str) and ref == "first":
438
+ for mol in arr:
439
+ if mol is not None and mol.IsValid():
440
+ ref = mol.CreateCopy()
441
+ break
337
442
  else:
338
- raise TypeError(f'Do not know how to add molecule highlight for type {type(pattern).__name__}')
443
+ log.warning("No valid molecule found in series for depiction alignment")
444
+ return
339
445
 
340
- # ********************************************************************************
341
- # Alignment
342
- # ********************************************************************************
446
+ # Suppress alignment warnings (there are lots of needless warnings)
447
+ level = oechem.OEThrow.GetLevel()
448
+ oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Error)
343
449
 
344
- if ref is not None:
345
- self._obj.align_depictions(ref=ref, method=method)
450
+ # noinspection PyBroadException
451
+ try:
452
+ # Create the aligner
453
+ aligner = create_aligner(ref=ref, method=method)
346
454
 
455
+ for mol in arr:
456
+ _ = aligner(mol)
347
457
 
348
- @register_series_accessor("recalculate_depiction_coordinates")
349
- class SeriesRecalculateDepictionCoordinatesAccessor:
350
- def __init__(self, pandas_obj: pd.Series):
351
- if not isinstance(pandas_obj.dtype, oepd.MoleculeDtype):
352
- raise TypeError(
353
- "recalculate_depiction_coordinates only works on molecule columns (oepandas.MoleculeDtype). If this "
354
- "column has molecules, use pd.Series.as_molecule to convert to a molecule column first."
355
- )
458
+ except Exception as ex:
459
+ log.debug("Error aligning molecules: %s", ex)
356
460
 
357
- self._obj = pandas_obj
358
-
359
- def __call__(
360
- self,
361
- *,
362
- clear_coords: bool = True,
363
- add_depction_hydrogens: bool = True,
364
- perceive_bond_stereo: bool = True,
365
- suppress_explicit_hydrogens: bool = True,
366
- orientation: int = oedepict.OEDepictOrientation_Default
367
- ) -> None:
368
- """
369
- Recalculate the depictions for a molecule series.
461
+ # Restore OEThrow
462
+ finally:
463
+ oechem.OEThrow.SetLevel(level)
370
464
 
371
- See the following link for more information:
372
- https://docs.eyesopen.com/toolkits/python/depicttk/OEDepictClasses/OEPrepareDepictionOptions.html
373
465
 
374
- :param clear_coords: Clear existing 2D coordinates
375
- :param add_depction_hydrogens: Add explicit depiction hydrogens for faithful stereo depiction, etc.
376
- :param perceive_bond_stereo: Perceive wedge/hash bond stereo
377
- :param suppress_explicit_hydrogens: Suppress explicit hydrogens
378
- :param orientation: Preferred 2D orientation
379
- """
380
- # Create the depiction options
381
- opts = oedepict.OEPrepareDepictionOptions()
382
- opts.SetClearCoords(clear_coords)
383
- opts.SetAddDepictionHydrogens(add_depction_hydrogens)
466
+ ########################################################################################################################
467
+ # CNotebook DataFrame accessor extensions for OEPandas .chem accessor
468
+ ########################################################################################################################
384
469
 
385
- for mol in self._obj.array:
386
- if isinstance(mol, oechem.OEMolBase):
387
- oedepict.OEPrepareDepiction(mol, opts)
470
+ def _dataframe_recalculate_depiction_coordinates(
471
+ self,
472
+ *,
473
+ molecule_columns: str | Iterable[str] | None = None,
474
+ clear_coords: bool = True,
475
+ add_depction_hydrogens: bool = True,
476
+ perceive_bond_stereo: bool = True,
477
+ suppress_explicit_hydrogens: bool = True,
478
+ orientation: int = oedepict.OEDepictOrientation_Default
479
+ ) -> None:
480
+ """
481
+ Recalculate the depictions for a one or more molecule series in a DataFrame. If molecule_columns is None,
482
+ which is the default, then all molecule columns will have their depictions recalculated
483
+
484
+ See the following link for more information:
485
+ https://docs.eyesopen.com/toolkits/python/depicttk/OEDepictClasses/OEPrepareDepictionOptions.html
486
+
487
+ :param molecule_columns: Optional molecule column(s) to have depictions recalculated
488
+ :param clear_coords: Clear existing 2D coordinates
489
+ :param add_depction_hydrogens: Add explicit depiction hydrogens for faithful stereo depiction, etc.
490
+ :param perceive_bond_stereo: Perceive wedge/hash bond stereo
491
+ :param suppress_explicit_hydrogens: Suppress explicit hydrogens
492
+ :param orientation: Preferred 2D orientation
493
+ """
494
+ if molecule_columns is None:
495
+ molecule_columns = set()
388
496
 
497
+ for col in self._obj.columns:
498
+ if isinstance(self._obj.dtypes[col], oepd.MoleculeDtype):
499
+ molecule_columns.add(col)
389
500
 
390
- @register_series_accessor("reset_depictions")
391
- class SeriesResetDepictionsAccessor:
392
- def __init__(self, pandas_obj: pd.Series):
393
- self._obj = pandas_obj
501
+ elif isinstance(molecule_columns, str):
502
+ molecule_columns = {molecule_columns}
394
503
 
395
- def __call__(self) -> None:
396
- """
397
- Reset depiction callbacks for a molecule series
398
- """
399
- # Check if array has metadata attribute (should be true for oepandas arrays)
400
- if hasattr(self._obj.array, "metadata"):
401
- # Direct assignment to help IDE understand this has metadata
402
- arr = self._obj.array
403
- assert isinstance(arr, oepd.MoleculeArray)
404
- _ = arr.metadata.pop("cnotebook", None)
405
-
406
-
407
- @register_series_accessor("align_depictions")
408
- class SeriesAlignDepictionsAccessor:
409
- def __init__(self, pandas_obj: pd.Series):
410
- if not isinstance(pandas_obj.dtype, oepd.MoleculeDtype):
411
- raise TypeError(
412
- "align_depictions only works on molecule columns (oepandas.MoleculeDtype). If this "
413
- "column has molecules, use pd.Series.as_molecule to convert to a molecule column first."
414
- )
504
+ else:
505
+ molecule_columns = set(molecule_columns)
415
506
 
416
- self._obj = pandas_obj
507
+ # Recalculate the column depictions
508
+ for col in molecule_columns:
417
509
 
418
- def __call__(
419
- self,
420
- ref: oechem.OESubSearch | oechem.OEMCSSearch | oechem.OEMolBase | oechem.OEQMol | Literal["first"],
421
- method: Literal["substructure", "ss", "mcss", "fp", "fingerprint"] | None = None,
422
- **kwargs
423
- ) -> None:
424
- """
425
- Align the 2D coordinates of molecules
426
- :param align: Alignment reference
427
- :param kwargs: Keyword arguments for aligner
428
- :return: Aligned molecule depictions
429
- """
430
- # Get the rendering context for creating the displays
510
+ if col in self._obj.columns:
511
+ if isinstance(self._obj.dtypes[col], oepd.MoleculeDtype):
512
+ self._obj[col].chem.recalculate_depiction_coordinates(
513
+ clear_coords=clear_coords,
514
+ add_depction_hydrogens=add_depction_hydrogens,
515
+ perceive_bond_stereo=perceive_bond_stereo,
516
+ suppress_explicit_hydrogens=suppress_explicit_hydrogens,
517
+ orientation=orientation
518
+ )
431
519
 
432
- # TODO: Maybe do this smarter so that you know if the context is column-level, which means you could copy that
433
- # context into the new DisplayArray that you'll create below? Or even link the contexts?
520
+ else:
521
+ log.warning(f'Column {col} does not have a MoleculeDtype')
434
522
 
435
- # Direct assignment to help IDE understand this is a MoleculeArray
436
- arr = self._obj.array
437
- assert isinstance(arr, oepd.MoleculeArray)
523
+ else:
524
+ log.warning(f'{col} not found in DataFrame columns: ({", ".join(self._obj.columns)})')
525
+ molecule_columns.remove(col)
438
526
 
439
- if isinstance(ref, str) and ref == "first":
440
- for mol in arr:
441
- if mol is not None and mol.IsValid():
442
- ref = mol.CreateCopy()
443
- break
444
- else:
445
- log.warning("No valid molecule found in series for depiction alignment")
446
- return
447
527
 
448
- # Suppress alignment warnings (there are lots of needless warnings)
449
- level = oechem.OEThrow.GetLevel()
450
- oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Error)
528
+ def _dataframe_reset_depictions(self, *, molecule_columns: str | Iterable[str] | None = None) -> None:
529
+ """
530
+ Reset depiction callbacks for one or more columns
531
+ """
532
+ columns = set()
533
+ if molecule_columns is None:
534
+ columns.update(self._obj.columns)
451
535
 
452
- # noinspection PyBroadException
453
- try:
454
- # Create the aligner
455
- aligner = create_aligner(ref=ref, method=method)
536
+ elif isinstance(molecule_columns, str):
537
+ columns.add(molecule_columns)
456
538
 
457
- for mol in arr:
458
- _ = aligner(mol)
539
+ else:
540
+ columns.update(molecule_columns)
459
541
 
460
- except Exception:
461
- # We don't care if the aligners fail - it just results in unaligned structures (NBD)
462
- pass
542
+ # Filter invalid and non-molecule columns
543
+ for col in filter(
544
+ lambda c: c in self._obj.columns and isinstance(self._obj[c].dtype, oepd.MoleculeDtype),
545
+ columns
546
+ ):
547
+ self._obj[col].chem.reset_depictions()
463
548
 
464
- # Restore OEThrow
465
- finally:
466
- oechem.OEThrow.SetLevel(level)
467
549
 
550
+ def _dataframe_clear_formatting_rules(self, molecule_columns: str | Iterable[str] | None = None) -> None:
551
+ """
552
+ Clear all formatting rule callbacks from one or more molecule columns.
468
553
 
469
- ########################################################################################################################
470
- # DataFrame accessors
471
- ########################################################################################################################
554
+ This removes any callbacks applied to molecules prior to rendering,
555
+ such as highlighting. Unlike reset_depictions which removes the entire
556
+ rendering context, this method only clears the callbacks while preserving
557
+ other context settings like image dimensions and styling.
472
558
 
473
- @register_dataframe_accessor("recalculate_depiction_coordinates")
474
- class SeriesRecalculateDepictionCoordinatesAccessor:
475
- def __init__(self, pandas_obj: pd.DataFrame):
476
- self._obj = pandas_obj
477
-
478
- def __call__(
479
- self,
480
- *,
481
- molecule_columns: str | Iterable[str] | None = None,
482
- clear_coords: bool = True,
483
- add_depction_hydrogens: bool = True,
484
- perceive_bond_stereo: bool = True,
485
- suppress_explicit_hydrogens: bool = True,
486
- orientation: int = oedepict.OEDepictOrientation_Default
487
- ) -> None:
488
- """
489
- Recalculate the depictions for a one or more molecule series in a DataFrame. If molecule_columns is None,
490
- which is the default, then all molecule columns will have their depictions recalculated
491
-
492
- See the following link for more information:
493
- https://docs.eyesopen.com/toolkits/python/depicttk/OEDepictClasses/OEPrepareDepictionOptions.html
494
-
495
- :param molecule_columns: Optional molecule column(s) to have depictions recalculated
496
- :param clear_coords: Clear existing 2D coordinates
497
- :param add_depction_hydrogens: Add explicit depiction hydrogens for faithful stereo depiction, etc.
498
- :param perceive_bond_stereo: Perceive wedge/hash bond stereo
499
- :param suppress_explicit_hydrogens: Suppress explicit hydrogens
500
- :param orientation: Preferred 2D orientation
501
- """
502
- if molecule_columns is None:
503
- molecule_columns = set()
559
+ :param molecule_columns: Optional molecule column(s) to clear formatting rules from.
560
+ If None, clears formatting rules from all molecule columns.
504
561
 
505
- for col in self._obj.columns:
506
- if isinstance(self._obj.dtypes[col], oepd.MoleculeDtype):
507
- molecule_columns.add(col)
562
+ Example::
508
563
 
509
- elif isinstance(molecule_columns, str):
510
- molecule_columns = {molecule_columns}
564
+ # Clear formatting rules from all molecule columns
565
+ df.chem.clear_formatting_rules()
511
566
 
512
- else:
513
- molecule_columns = set(molecule_columns)
514
-
515
- # Recalculate the column depictions
516
- for col in molecule_columns:
517
-
518
- if col in self._obj.columns:
519
- if isinstance(self._obj.dtypes[col], oepd.MoleculeDtype):
520
- self._obj[col].recalculate_depiction_coordinates(
521
- clear_coords=clear_coords,
522
- add_depction_hydrogens=add_depction_hydrogens,
523
- perceive_bond_stereo=perceive_bond_stereo,
524
- suppress_explicit_hydrogens=suppress_explicit_hydrogens,
525
- orientation=orientation
526
- )
567
+ # Clear formatting rules from a specific column
568
+ df.chem.clear_formatting_rules("smiles")
527
569
 
528
- else:
529
- log.warning(f'Column {col} does not have a MoleculeDtype')
570
+ # Clear formatting rules from multiple columns
571
+ df.chem.clear_formatting_rules(["mol1", "mol2"])
572
+ """
573
+ columns = set()
574
+ if molecule_columns is None:
575
+ columns.update(self._obj.columns)
530
576
 
531
- else:
532
- log.warning(f'{col} not found in DataFrame columns: ({", ".join(self._obj.columns)})')
533
- molecule_columns.remove(col)
577
+ elif isinstance(molecule_columns, str):
578
+ columns.add(molecule_columns)
534
579
 
580
+ else:
581
+ columns.update(molecule_columns)
535
582
 
536
- @register_dataframe_accessor("reset_depictions")
537
- class DataFrameResetDepictionsAccessor:
538
- def __init__(self, pandas_obj: pd.DataFrame):
539
- self._obj = pandas_obj
583
+ # Filter invalid and non-molecule columns
584
+ for col in filter(
585
+ lambda c: c in self._obj.columns and isinstance(self._obj[c].dtype, oepd.MoleculeDtype),
586
+ columns
587
+ ):
588
+ self._obj[col].chem.clear_formatting_rules()
540
589
 
541
- def __call__(self, *, molecule_columns: str | Iterable[str] | None = None) -> None:
542
- """
543
- Reset depiction callbacks for one or more columns
544
- """
545
- columns = set()
546
- if molecule_columns is None:
547
- columns.update(self._obj.columns)
548
590
 
549
- elif isinstance(molecule_columns, str):
550
- columns.add(molecule_columns)
591
+ def _dataframe_highlight(
592
+ self,
593
+ molecule_column: str,
594
+ pattern: Iterable[str] | str | oechem.OESubSearch | Iterable[oechem.OESubSearch],
595
+ *,
596
+ color: oechem.OEColor | oechem.OEColorIter | None = None,
597
+ style: int | Literal["overlay_default", "overlay_ball_and_stick"] = "overlay_default",
598
+ ) -> None:
599
+ """
600
+ Highlight chemical features in molecules within a specified column.
551
601
 
552
- else:
553
- columns.update(molecule_columns)
554
-
555
- # Filter invalid and non-molecule columns
556
- for col in filter(
557
- lambda c: c in self._obj.columns and isinstance(self._obj[c].dtype, oepd.MoleculeDtype),
558
- columns
559
- ):
560
- self._obj[col].reset_depictions()
561
-
562
-
563
- @register_dataframe_accessor("highlight_using_column")
564
- class HighlightUsingColumnAccessor:
565
- def __init__(self, pandas_obj: pd.DataFrame):
566
- self._obj = pandas_obj
567
-
568
- def __call__(
569
- self,
570
- molecule_column: str,
571
- pattern_column: str,
572
- *,
573
- highlighted_column: str = "highlighted_substructures",
574
- ref: oechem.OESubSearch | oechem.OEMCSSearch | oechem.OEMolBase | None = None,
575
- alignment_opts: oedepict.OEAlignmentOptions | None = None,
576
- prepare_opts: oedepict.OEPrepareDepictionOptions | None = None,
577
- inplace: bool = False
578
- ) -> pd.DataFrame:
579
- """
580
- Highlight molecules based on the value of another column. The column produced is a DisplayArray column, so
581
- the results are not suitable for other molecular calculations.
582
-
583
- The other column can contain:
584
- - Comma or whitespace delimited string of SMARTS patterns
585
- - oechem.OESubSearch or oechem.OEMCSSearch object
586
- - Iterable of SMARTS patterns, oechem.OESubSearch, and/or oechem.OEMCSSearch objects
587
-
588
- :param molecule_column: Name of the molecule column
589
- :param pattern_column: Name of the pattern column
590
- :param highlighted_column: Optional name of the column with highlighted structures
591
- :param ref: Optional reference for aligning depictions
592
- :param alignment_opts: Optional depiction alignment options (oedepict.OEAlignmentOptions)
593
- :param prepare_opts: Optional depiction preparation options (oedepict.OEPrepareDepictionOptions)
594
- :param inplace: Modify the DataFrame in place
595
- :return: Modified DataFrame
596
- """
597
- # Object we are operating on
598
- df = self._obj if inplace else self._obj.copy()
602
+ The pattern argument can be:
603
+ - SMARTS pattern
604
+ - oechem.OESubSearch or oechem.OEMCSSearch object
605
+ - Iterable of SMARTS patterns, oechem.OESubSearch, and/or oechem.OEMCSSearch objects
599
606
 
600
- if molecule_column not in df.columns:
601
- raise KeyError(f'{molecule_column} not found in DataFrame columns: ({", ".join(df.columns)}')
607
+ :param molecule_column: Name of the molecule column to highlight.
608
+ :param pattern: Pattern(s) to highlight in the molecules.
609
+ :param color: Highlight color(s). Can be a single oechem.OEColor or an oechem.OEColorIter
610
+ (e.g., oechem.OEGetLightColors()). Defaults to oechem.OEGetLightColors().
611
+ :param style: Highlight style. Can be an int (OEHighlightStyle constant) or a string
612
+ ("overlay_default", "overlay_ball_and_stick"). Defaults to "overlay_default".
602
613
 
603
- if not isinstance(df[molecule_column].dtype, oepd.MoleculeDtype):
604
- raise TypeError(
605
- f"highlight_using_column only works on molecule columns (oepandas.MoleculeDtype). If {molecule_column}"
606
- " has molecules, use pd.Series.as_molecule to convert to a molecule column first."
607
- )
614
+ Example::
608
615
 
609
- if pattern_column not in df.columns:
610
- raise KeyError(f'{pattern_column} not found in DataFrame columns: ({", ".join(df.columns)}')
616
+ # Highlight benzene rings in the 'smiles' column
617
+ df.chem.highlight("smiles", "c1ccccc1")
611
618
 
612
- # Create the display objects
613
- indexes = []
614
- displays = []
619
+ # Highlight multiple patterns
620
+ df.chem.highlight("smiles", ["c1ccccc1", "[OH]"])
621
+ """
622
+ if molecule_column not in self._obj.columns:
623
+ raise ValueError(f'Column {molecule_column} not found in DataFrame columns: ({", ".join(self._obj.columns)})')
615
624
 
616
- # Get the rendering context for creating the displays
617
- # TODO: Maybe do this smarter so that you know if the context is column-level, which means you could copy that
618
- # context into the new DisplayArray that you'll create below? Or even link the contexts?
619
- # Direct assignment to help IDE understand this is a MoleculeArray
620
- arr = df[molecule_column].array
621
- assert isinstance(arr, oepd.MoleculeArray)
622
- ctx = get_series_context(arr.metadata)
625
+ if not isinstance(self._obj[molecule_column].dtype, oepd.MoleculeDtype):
626
+ raise TypeError(
627
+ f"highlight only works on molecule columns (oepandas.MoleculeDtype). Column '{molecule_column}' "
628
+ f"has type {self._obj[molecule_column].dtype}."
629
+ )
623
630
 
624
- for idx, row in df.iterrows():
625
- indexes.append(idx)
631
+ # Delegate to the series-level highlight (which works in Pandas)
632
+ self._obj[molecule_column].chem.highlight(pattern, color=color, style=style)
626
633
 
627
- mol = row[molecule_column]
628
- if isinstance(mol, oechem.OEMolBase):
629
634
 
630
- # Create the display
631
- disp = oemol_to_disp(mol, ctx=ctx)
635
+ def _dataframe_copy_molecules(
636
+ self,
637
+ source_column: str,
638
+ dest_column: str,
639
+ ) -> pd.DataFrame:
640
+ """
641
+ Create a deep copy of molecules from one column to a new column.
632
642
 
633
- # Highlight
634
- substructures = []
635
- patterns = row[pattern_column]
643
+ This creates independent copies of all molecules, allowing modifications
644
+ (such as highlighting or alignment) to the new column without affecting
645
+ the original.
636
646
 
637
- # Parse different patterns
638
- if isinstance(patterns, str):
639
- for pattern in re.split(SMARTS_DELIMITER_RE, patterns):
640
- ss = oechem.OESubSearch(pattern)
641
- if ss.IsValid():
642
- substructures.append(ss)
647
+ :param source_column: Name of the source molecule column.
648
+ :param dest_column: Name of the new column to create with copied molecules.
649
+ :returns: DataFrame with the new molecule column added.
650
+
651
+ Example::
652
+
653
+ # Create a copy of molecules for alignment
654
+ df = df.chem.copy_molecules("Original", "Aligned")
655
+ df.chem.highlight("Aligned", "c1ccccc1")
656
+ """
657
+ if source_column not in self._obj.columns:
658
+ raise ValueError(f'Column {source_column} not found in DataFrame columns: ({", ".join(self._obj.columns)})')
643
659
 
644
- elif isinstance(patterns, oechem.OESubSearch):
645
- if patterns.IsValid():
646
- substructures.append(patterns)
660
+ if not isinstance(self._obj[source_column].dtype, oepd.MoleculeDtype):
661
+ raise TypeError(
662
+ f"copy_molecules only works on molecule columns (oepandas.MoleculeDtype). Column '{source_column}' "
663
+ f"has type {self._obj[source_column].dtype}."
664
+ )
647
665
 
648
- elif isinstance(patterns, Iterable):
666
+ # Use the series-level copy_molecules and assign to the new column
667
+ self._obj[dest_column] = self._obj[source_column].chem.copy_molecules()
668
+ return self._obj
649
669
 
650
- for p in patterns:
651
670
 
652
- if isinstance(p, str):
653
- for pattern in re.split(SMARTS_DELIMITER_RE, p):
654
- ss = oechem.OESubSearch(pattern)
655
- if ss.IsValid():
656
- substructures.append(ss)
671
+ def _dataframe_highlight_using_column(
672
+ self,
673
+ molecule_column: str,
674
+ pattern_column: str,
675
+ *,
676
+ highlighted_column: str = "highlighted_substructures",
677
+ color: oechem.OEColor | oechem.OEColorIter | None = None,
678
+ style: int | Literal["overlay_default", "overlay_ball_and_stick"] = "overlay_default",
679
+ inplace: bool = False
680
+ ) -> pd.DataFrame:
681
+ """
682
+ Highlight molecules based on the value of another column. The column produced is a DisplayArray column, so
683
+ the results are not suitable for other molecular calculations.
684
+
685
+ The other column can contain:
686
+ - Comma or whitespace delimited string of SMARTS patterns
687
+ - oechem.OESubSearch or oechem.OEMCSSearch object
688
+ - Iterable of SMARTS patterns, oechem.OESubSearch, and/or oechem.OEMCSSearch objects
689
+
690
+ :param molecule_column: Name of the molecule column.
691
+ :param pattern_column: Name of the pattern column.
692
+ :param highlighted_column: Optional name of the column with highlighted structures.
693
+ :param color: Highlight color(s). Can be a single oechem.OEColor or an oechem.OEColorIter
694
+ (e.g., oechem.OEGetLightColors()). Defaults to oechem.OEGetLightColors().
695
+ :param style: Highlight style. Can be an int (OEHighlightStyle constant) or a string
696
+ ("overlay_default", "overlay_ball_and_stick"). Defaults to "overlay_default".
697
+ :param inplace: Modify the DataFrame in place.
698
+ :returns: Modified DataFrame.
699
+ """
700
+ # Object we are operating on
701
+ df = self._obj if inplace else self._obj.copy()
657
702
 
658
- elif isinstance(p, oechem.OESubSearch):
659
- if p.IsValid():
660
- substructures.append(p)
703
+ if molecule_column not in df.columns:
704
+ raise KeyError(f'{molecule_column} not found in DataFrame columns: ({", ".join(df.columns)}')
661
705
 
662
- else:
663
- log.warning(f'Do not know how to highlight using: {type(p).__name__}')
706
+ if not isinstance(df[molecule_column].dtype, oepd.MoleculeDtype):
707
+ raise TypeError(
708
+ f"highlight_using_column only works on molecule columns (oepandas.MoleculeDtype). If {molecule_column}"
709
+ " has molecules, use df.chem.as_molecule() to convert to a molecule column first."
710
+ )
664
711
 
665
- else:
666
- log.warning(f'Do not know how to highlight using: {type(patterns).__name__}')
712
+ if pattern_column not in df.columns:
713
+ raise KeyError(f'{pattern_column} not found in DataFrame columns: ({", ".join(df.columns)}')
714
+
715
+ # Default color
716
+ if color is None:
717
+ color = oechem.OEGetLightColors()
718
+
719
+ # Determine highlighting approach based on style
720
+ use_overlay = isinstance(style, str) and style in ("overlay_default", "overlay_ball_and_stick")
721
+
722
+ # Check if color is compatible with overlay
723
+ if use_overlay and isinstance(color, oechem.OEColor):
724
+ log.warning(
725
+ "Overlay coloring is not compatible with a single oechem.OEColor. Falling back to standard highlighting")
726
+ use_overlay = False
727
+ style = oedepict.OEHighlightStyle_BallAndStick
728
+
729
+ # Create the display objects
730
+ indexes = []
731
+ displays = []
732
+
733
+ # Get the rendering context for creating the displays
734
+ arr = df[molecule_column].array
735
+ assert isinstance(arr, oepd.MoleculeArray)
736
+ ctx = get_series_context(arr.metadata)
737
+
738
+ for idx, row in df.iterrows():
739
+ indexes.append(idx)
740
+
741
+ mol = row[molecule_column]
742
+ if isinstance(mol, oechem.OEMolBase):
743
+
744
+ # Create the display
745
+ disp = oemol_to_disp(mol, ctx=ctx)
746
+
747
+ # Highlight
748
+ substructures = []
749
+ patterns = row[pattern_column]
750
+
751
+ # Parse different patterns
752
+ if isinstance(patterns, str):
753
+ for pattern in re.split(SMARTS_DELIMITER_RE, patterns):
754
+ ss = oechem.OESubSearch(pattern)
755
+ if ss.IsValid():
756
+ substructures.append(ss)
667
757
 
668
- # Apply substructure highlights
669
- highlight = oedepict.OEHighlightOverlayByBallAndStick(oechem.OEGetLightColors())
758
+ elif isinstance(patterns, oechem.OESubSearch):
759
+ if patterns.IsValid():
760
+ substructures.append(patterns)
670
761
 
762
+ elif isinstance(patterns, Iterable):
763
+
764
+ for p in patterns:
765
+
766
+ if isinstance(p, str):
767
+ for pattern in re.split(SMARTS_DELIMITER_RE, p):
768
+ ss = oechem.OESubSearch(pattern)
769
+ if ss.IsValid():
770
+ substructures.append(ss)
771
+
772
+ elif isinstance(p, oechem.OESubSearch):
773
+ if p.IsValid():
774
+ substructures.append(p)
775
+
776
+ else:
777
+ log.warning(f'Do not know how to highlight using: {type(p).__name__}')
778
+
779
+ else:
780
+ log.warning(f'Do not know how to highlight using: {type(patterns).__name__}')
781
+
782
+ # Overlay highlighting
783
+ if use_overlay:
784
+ highlight = oedepict.OEHighlightOverlayByBallAndStick(color)
671
785
  for ss in substructures:
672
786
  oedepict.OEAddHighlightOverlay(disp, highlight, ss.Match(mol, True))
673
787
 
674
- displays.append(disp)
675
-
676
788
  else:
677
- displays.append(None)
789
+ # Traditional highlighting
790
+ if isinstance(color, oechem.OEColor):
791
+ highlight_color = color
792
+ else:
793
+ highlight_color = oechem.OELightBlue
794
+ for c in color:
795
+ highlight_color = c
796
+ break
797
+ for ss in substructures:
798
+ for match in ss.Match(mol, True):
799
+ oedepict.OEAddHighlighting(disp, highlight_color, style, match)
678
800
 
679
- df[highlighted_column] = pd.Series(displays, index=indexes, dtype=oepd.DisplayDtype())
680
- return df
801
+ displays.append(disp)
802
+
803
+ else:
804
+ displays.append(None)
805
+
806
+ df[highlighted_column] = pd.Series(displays, index=indexes, dtype=oepd.DisplayDtype())
807
+ return df
681
808
 
682
809
 
683
810
  class ColorBondByOverlapScore(oegrapheme.OEBondGlyphBase):
811
+ """Bond glyph that colors bonds by fingerprint overlap score.
812
+
813
+ Used internally by fingerprint similarity visualization to highlight
814
+ bonds based on their contribution to molecular similarity.
815
+
816
+ See: https://docs.eyesopen.com/toolkits/cookbook/python/depiction/simcalc.html
684
817
  """
685
- Color molecule by bond overlap score:
686
- https://docs.eyesopen.com/toolkits/cookbook/python/depiction/simcalc.html
687
- """
688
- def __init__(self, cg, tag):
818
+
819
+ def __init__(self, cg: oechem.OELinearColorGradient, tag: int):
820
+ """Create a bond coloring glyph.
821
+
822
+ :param cg: Color gradient to map overlap scores to colors.
823
+ :param tag: OEChem data tag containing overlap scores on bonds.
824
+ """
689
825
  oegrapheme.OEBondGlyphBase.__init__(self)
690
826
  self.colorg = cg
691
827
  self.tag = tag
@@ -717,196 +853,306 @@ class ColorBondByOverlapScore(oegrapheme.OEBondGlyphBase):
717
853
  return ColorBondByOverlapScore(self.colorg, self.tag).__disown__()
718
854
 
719
855
 
720
- @register_dataframe_accessor("fingerprint_similarity")
721
- class FingerprintSimilaritySeriesAccessor:
722
- def __init__(self, pandas_obj: pd.DataFrame):
723
- self._obj = pandas_obj
724
- self._tag = oechem.OEGetTag("fingerprint_overlap")
725
-
726
- def __call__(
727
- self,
728
- molecule_column: str,
729
- ref: oechem.OEMolBase | None = None,
730
- *,
731
- tanimoto_column="fingerprint_tanimoto",
732
- reference_similarity_column="reference_similarity",
733
- target_similarity_column="target_similarity",
734
- fptype: str = "tree",
735
- num_bits: int = 4096,
736
- min_distance: int = 0,
737
- max_distance: int = 4,
738
- atom_type: str | int = oegraphsim.OEFPAtomType_DefaultTreeAtom,
739
- bond_type: str | int = oegraphsim.OEFPBondType_DefaultTreeBond,
740
- inplace: bool = False
741
- ) -> pd.DataFrame:
742
- """
743
- Color molecules by fingerprint similarity
744
- :param ref: Reference molecule
745
- :param fptype: Fingerprint type
746
- :param num_bits: Number of bits in the fingerprint
747
- :param min_distance: Minimum distance/radius for path/circular/tree
748
- :param max_distance: Maximum distance/radius for path/circular/tree
749
- :param atom_type: Atom type string delimited by "|" OR int bitmask from the oegraphsim.OEFPAtomType_ namespace
750
- :param bond_type: Bond type string delimited by "|" OR int bitmask from the oegraphsim.OEFPBondType_ namespace
751
- :return:
752
- """
753
- # Preprocess
754
- df = self._obj if inplace else self._obj.copy()
856
+ # Store the fingerprint tag for fingerprint_similarity
857
+ _fingerprint_overlap_tag = oechem.OEGetTag("fingerprint_overlap")
755
858
 
756
- if molecule_column not in df.columns:
757
- raise KeyError(f'Molecule column not found in DataFrame: {molecule_column}')
758
859
 
759
- if not isinstance(df[molecule_column].dtype, oepd.MoleculeDtype):
760
- raise TypeError("Column {} does not have dtype oepd.MoleculeDtype ({})".format(
761
- molecule_column, str(df[molecule_column].dtype)))
860
+ def _dataframe_fingerprint_similarity(
861
+ self,
862
+ molecule_column: str,
863
+ ref: oechem.OEMolBase | None = None,
864
+ *,
865
+ tanimoto_column="fingerprint_tanimoto",
866
+ reference_similarity_column="reference_similarity",
867
+ target_similarity_column="target_similarity",
868
+ fptype: str = "tree",
869
+ num_bits: int = 4096,
870
+ min_distance: int = 0,
871
+ max_distance: int = 4,
872
+ atom_type: str | int = oegraphsim.OEFPAtomType_DefaultTreeAtom,
873
+ bond_type: str | int = oegraphsim.OEFPBondType_DefaultTreeBond,
874
+ inplace: bool = False
875
+ ) -> pd.DataFrame:
876
+ """
877
+ Color molecules by fingerprint similarity
878
+ :param molecule_column: Name of the molecule column
879
+ :param ref: Reference molecule
880
+ :param tanimoto_column: Name of the tanimoto column
881
+ :param reference_similarity_column: Name of the reference similarity column
882
+ :param target_similarity_column: Name of the target similarity column
883
+ :param fptype: Fingerprint type
884
+ :param num_bits: Number of bits in the fingerprint
885
+ :param min_distance: Minimum distance/radius for path/circular/tree
886
+ :param max_distance: Maximum distance/radius for path/circular/tree
887
+ :param atom_type: Atom type string delimited by "|" OR int bitmask from the oegraphsim.OEFPAtomType_ namespace
888
+ :param bond_type: Bond type string delimited by "|" OR int bitmask from the oegraphsim.OEFPBondType_ namespace
889
+ :param inplace: Modify the DataFrame in place
890
+ :return: DataFrame with similarity columns
891
+ """
892
+ tag = _fingerprint_overlap_tag
762
893
 
763
- # Get the context
764
- # Direct assignment to help IDE understand this is a MoleculeArray
765
- arr = self._obj[molecule_column].array
766
- assert isinstance(arr, oepd.MoleculeArray)
767
- ctx = get_series_context(arr.metadata)
768
-
769
- # If we're using the first molecule as our reference
770
- if ref is None:
771
- for mol in arr: # type: oechem.OEMol
772
- if mol.IsValid():
773
- ref = mol
774
- break
775
- else:
776
- log.warning(f'No valid reference molecules to use for alignment in column {molecule_column}')
777
- return df
894
+ # Preprocess
895
+ df = self._obj if inplace else self._obj.copy()
778
896
 
779
- # Check reference molecule
780
- if not ref.IsValid():
781
- log.warning("Reference molecule is not valid")
782
- return df
897
+ if molecule_column not in df.columns:
898
+ raise KeyError(f'Molecule column not found in DataFrame: {molecule_column}')
783
899
 
784
- # Fingerprint maker
785
- make_fp = fingerprint_maker(
786
- fptype=fptype,
787
- num_bits=num_bits,
788
- min_distance=min_distance,
789
- max_distance=max_distance,
790
- atom_type=atom_type,
791
- bond_type=bond_type
792
- )
900
+ if not isinstance(df[molecule_column].dtype, oepd.MoleculeDtype):
901
+ raise TypeError("Column {} does not have dtype oepd.MoleculeDtype ({})".format(
902
+ molecule_column, str(df[molecule_column].dtype)))
793
903
 
794
- # Make the reference fingerprint
795
- ref_fp = make_fp(ref)
904
+ # Get the context
905
+ arr = self._obj[molecule_column].array
906
+ assert isinstance(arr, oepd.MoleculeArray)
907
+ ctx = get_series_context(arr.metadata)
796
908
 
797
- if not ref_fp.IsValid():
798
- log.warning("Fingerprint from reference molecule is invalid")
909
+ # If we're using the first molecule as our reference
910
+ if ref is None:
911
+ for mol in arr: # type: oechem.OEMol
912
+ if mol.IsValid():
913
+ ref = mol
914
+ break
915
+ else:
916
+ log.warning(f'No valid reference molecules to use for alignment in column {molecule_column}')
799
917
  return df
800
918
 
801
- # Create the display objects
802
- ref_displays = []
803
- targ_displays = []
919
+ # Check reference molecule
920
+ if not ref.IsValid():
921
+ log.warning("Reference molecule is not valid")
922
+ return df
804
923
 
805
- # FIXME: See now below regarding the fact we have to cache the reference and target molecule copies
806
- ref_molecules = []
807
- targ_molecules = []
924
+ # Fingerprint maker
925
+ make_fp = fingerprint_maker(
926
+ fptype=fptype,
927
+ num_bits=num_bits,
928
+ min_distance=min_distance,
929
+ max_distance=max_distance,
930
+ atom_type=atom_type,
931
+ bond_type=bond_type
932
+ )
933
+
934
+ # Make the reference fingerprint
935
+ ref_fp = make_fp(ref)
936
+
937
+ if not ref_fp.IsValid():
938
+ log.warning("Fingerprint from reference molecule is invalid")
939
+ return df
808
940
 
809
- tanimotos = []
810
- index = []
941
+ # Create the display objects
942
+ ref_displays = []
943
+ targ_displays = []
811
944
 
812
- for idx, mol in df[molecule_column].items(): # type: Hashable, oechem.OEMol
813
- index.append(idx)
814
- if mol is not None and mol.IsValid():
945
+ # FIXME: See now below regarding the fact we have to cache the reference and target molecule copies
946
+ ref_molecules = []
947
+ targ_molecules = []
815
948
 
816
- # Copy the molecules, because we're modifying them
817
- targ_mol = oechem.OEMol(mol)
818
- ref_mol = oechem.OEMol(ref)
949
+ tanimotos = []
950
+ index = []
819
951
 
820
- # FIXME: See now below regarding the fact we have to cache the reference and target molecule copies
821
- targ_molecules.append(targ_mol)
822
- ref_molecules.append(ref_mol)
952
+ for idx, mol in df[molecule_column].items(): # type: Hashable, oechem.OEMol
953
+ index.append(idx)
954
+ if mol is not None and mol.IsValid():
823
955
 
824
- # Create the fingerprint
825
- targ_fp = make_fp(targ_mol)
826
- if targ_fp.IsValid():
956
+ # Copy the molecules, because we're modifying them
957
+ targ_mol = oechem.OEMol(mol)
958
+ ref_mol = oechem.OEMol(ref)
827
959
 
828
- # Add the tanimoto
829
- tanimotos.append(oegraphsim.OETanimoto(ref_fp, targ_fp))
960
+ # FIXME: See now below regarding the fact we have to cache the reference and target molecule copies
961
+ targ_molecules.append(targ_mol)
962
+ ref_molecules.append(ref_mol)
830
963
 
831
- # Calculate the similarity
832
- targ_bonds = oechem.OEUIntArray(targ_mol.GetMaxBondIdx())
833
- ref_bonds = oechem.OEUIntArray(ref_mol.GetMaxBondIdx())
964
+ # Create the fingerprint
965
+ targ_fp = make_fp(targ_mol)
966
+ if targ_fp.IsValid():
834
967
 
835
- # Overlaps
836
- overlaps = oegraphsim.OEGetFPOverlap(ref_mol, targ_mol, ref_fp.GetFPTypeBase())
968
+ # Add the tanimoto
969
+ tanimotos.append(oegraphsim.OETanimoto(ref_fp, targ_fp))
837
970
 
838
- for match in overlaps:
839
- for bond in match.GetPatternBonds():
840
- ref_bonds[bond.GetIdx()] += 1
841
- for bond in match.GetTargetBonds():
842
- targ_bonds[bond.GetIdx()] += 1
971
+ # Calculate the similarity
972
+ targ_bonds = oechem.OEUIntArray(targ_mol.GetMaxBondIdx())
973
+ ref_bonds = oechem.OEUIntArray(ref_mol.GetMaxBondIdx())
843
974
 
844
- for bond in targ_mol.GetBonds():
845
- bond.SetData(self._tag, targ_bonds[bond.GetIdx()])
975
+ # Overlaps
976
+ overlaps = oegraphsim.OEGetFPOverlap(ref_mol, targ_mol, ref_fp.GetFPTypeBase())
846
977
 
847
- for bond in ref_mol.GetBonds():
848
- bond.SetData(self._tag, ref_bonds[bond.GetIdx()])
978
+ for match in overlaps:
979
+ for bond in match.GetPatternBonds():
980
+ ref_bonds[bond.GetIdx()] += 1
981
+ for bond in match.GetTargetBonds():
982
+ targ_bonds[bond.GetIdx()] += 1
849
983
 
850
- # noinspection PyTypeChecker
851
- maxvalue = max((0, max(targ_bonds), max(ref_bonds)))
984
+ for bond in targ_mol.GetBonds():
985
+ bond.SetData(tag, targ_bonds[bond.GetIdx()])
852
986
 
853
- # Create the color gradient
854
- colorg = oechem.OELinearColorGradient()
855
- colorg.AddStop(oechem.OEColorStop(0.0, oechem.OEPinkTint))
856
- colorg.AddStop(oechem.OEColorStop(1.0, oechem.OEYellow))
857
- colorg.AddStop(oechem.OEColorStop(maxvalue, oechem.OEDarkGreen))
987
+ for bond in ref_mol.GetBonds():
988
+ bond.SetData(tag, ref_bonds[bond.GetIdx()])
858
989
 
859
- # Function that will color the bonds
860
- bondglyph = ColorBondByOverlapScore(colorg, self._tag)
990
+ # noinspection PyTypeChecker
991
+ maxvalue = max((0, max(targ_bonds), max(ref_bonds)))
861
992
 
862
- # Align the molecules
863
- overlaps = oegraphsim.OEGetFPOverlap(ref_mol, targ_mol, ref_fp.GetFPTypeBase())
864
- oedepict.OEPrepareMultiAlignedDepiction(targ_mol, ref_mol, overlaps)
993
+ # Create the color gradient
994
+ colorg = oechem.OELinearColorGradient()
995
+ colorg.AddStop(oechem.OEColorStop(0.0, oechem.OEPinkTint))
996
+ colorg.AddStop(oechem.OEColorStop(1.0, oechem.OEYellow))
997
+ colorg.AddStop(oechem.OEColorStop(maxvalue, oechem.OEDarkGreen))
865
998
 
866
- # Create the displays
867
- ref_disp = oemol_to_disp(ref_mol, ctx=ctx)
868
- targ_disp = oemol_to_disp(targ_mol, ctx=ctx)
999
+ # Function that will color the bonds
1000
+ bondglyph = ColorBondByOverlapScore(colorg, tag)
869
1001
 
870
- # Color the displays
871
- oegrapheme.OEAddGlyph(ref_disp, bondglyph, oechem.IsTrueBond())
872
- oegrapheme.OEAddGlyph(targ_disp, bondglyph, oechem.IsTrueBond())
1002
+ # Align the molecules
1003
+ oedepict.OEPrepareDepiction(ref_mol, False)
1004
+ oedepict.OEPrepareDepiction(targ_mol, False)
873
1005
 
874
- ref_displays.append(ref_disp)
875
- targ_displays.append(targ_disp)
1006
+ overlaps = oegraphsim.OEGetFPOverlap(ref_mol, targ_mol, ref_fp.GetFPTypeBase())
1007
+ oedepict.OEPrepareMultiAlignedDepiction(targ_mol, ref_mol, overlaps)
876
1008
 
877
- # Fingerprint was invalid
878
- else:
879
- ref_displays.append(None)
880
- targ_displays.append(None)
1009
+ # Create the displays
1010
+ ref_disp = oemol_to_disp(ref_mol, ctx=ctx)
1011
+ targ_disp = oemol_to_disp(targ_mol, ctx=ctx)
1012
+
1013
+ # Color the displays
1014
+ oegrapheme.OEAddGlyph(ref_disp, bondglyph, oechem.IsTrueBond())
1015
+ oegrapheme.OEAddGlyph(targ_disp, bondglyph, oechem.IsTrueBond())
1016
+
1017
+ ref_displays.append(ref_disp)
1018
+ targ_displays.append(targ_disp)
881
1019
 
882
- # Molecule was invalid
1020
+ # Fingerprint was invalid
883
1021
  else:
884
1022
  ref_displays.append(None)
885
1023
  targ_displays.append(None)
886
1024
 
887
- # Add the columns
888
- df[tanimoto_column] = pd.Series(
889
- tanimotos,
890
- index=index,
891
- dtype=float
892
- )
1025
+ # Molecule was invalid
1026
+ else:
1027
+ ref_displays.append(None)
1028
+ targ_displays.append(None)
893
1029
 
894
- # FIXME: Submitted to OpenEye as Case #00037423
895
- # We need to keep the copies of the molecules that we made above, or they will be garbage collected
896
- # and the OE2DMolDisplay objects will segfault. We'll keep those in the metadata now for the arrays.
897
- ref_arr = oepd.DisplayArray(ref_displays, metadata={"molecules": ref_molecules})
898
- targ_arr = oepd.DisplayArray(targ_displays, metadata={"molecules": targ_molecules})
1030
+ # Add the columns
1031
+ df[tanimoto_column] = pd.Series(
1032
+ tanimotos,
1033
+ index=index,
1034
+ dtype=float
1035
+ )
899
1036
 
900
- df[reference_similarity_column] = pd.Series(
901
- ref_arr,
902
- index=shallow_copy(index),
903
- dtype=oepd.DisplayDtype()
904
- )
1037
+ # FIXME: Submitted to OpenEye as Case #00037423
1038
+ # We need to keep the copies of the molecules that we made above, or they will be garbage collected
1039
+ # and the OE2DMolDisplay objects will segfault. We'll keep those in the metadata now for the arrays.
1040
+ ref_arr = oepd.DisplayArray(ref_displays, metadata={"molecules": ref_molecules})
1041
+ targ_arr = oepd.DisplayArray(targ_displays, metadata={"molecules": targ_molecules})
905
1042
 
906
- df[target_similarity_column] = pd.Series(
907
- targ_arr,
908
- index=shallow_copy(index),
909
- dtype=oepd.DisplayDtype()
910
- )
1043
+ df[reference_similarity_column] = pd.Series(
1044
+ ref_arr,
1045
+ index=shallow_copy(index),
1046
+ dtype=oepd.DisplayDtype()
1047
+ )
911
1048
 
912
- return df
1049
+ df[target_similarity_column] = pd.Series(
1050
+ targ_arr,
1051
+ index=shallow_copy(index),
1052
+ dtype=oepd.DisplayDtype()
1053
+ )
1054
+
1055
+ return df
1056
+
1057
+
1058
+ ########################################################################################################################
1059
+ # Monkey-patch CNotebook methods onto OEPandas accessors
1060
+ ########################################################################################################################
1061
+
1062
+ # Import the OEPandas accessor classes
1063
+ from oepandas.pandas_extensions import OESeriesAccessor, OEDataFrameAccessor
1064
+
1065
+ # Add cnotebook methods to Series accessor
1066
+ OESeriesAccessor.highlight = _series_highlight
1067
+ OESeriesAccessor.recalculate_depiction_coordinates = _series_recalculate_depiction_coordinates
1068
+ OESeriesAccessor.reset_depictions = _series_reset_depictions
1069
+ OESeriesAccessor.clear_formatting_rules = _series_clear_formatting_rules
1070
+ OESeriesAccessor.align_depictions = _series_align_depictions
1071
+
1072
+ # Add cnotebook methods to DataFrame accessor
1073
+ OEDataFrameAccessor.recalculate_depiction_coordinates = _dataframe_recalculate_depiction_coordinates
1074
+ OEDataFrameAccessor.reset_depictions = _dataframe_reset_depictions
1075
+ OEDataFrameAccessor.clear_formatting_rules = _dataframe_clear_formatting_rules
1076
+ OEDataFrameAccessor.copy_molecules = _dataframe_copy_molecules
1077
+ OEDataFrameAccessor.highlight = _dataframe_highlight
1078
+ OEDataFrameAccessor.highlight_using_column = _dataframe_highlight_using_column
1079
+ OEDataFrameAccessor.fingerprint_similarity = _dataframe_fingerprint_similarity
1080
+
1081
+
1082
+ ########################################################################################################################
1083
+ # MolGrid accessor methods for Series and DataFrame
1084
+ ########################################################################################################################
1085
+
1086
+ def _series_molgrid(
1087
+ self,
1088
+ title: bool | str | None = True,
1089
+ tooltip_fields: list = None,
1090
+ **kwargs
1091
+ ):
1092
+ """Display molecules in an interactive grid.
1093
+
1094
+ :param title: Title display mode. True uses molecule's title, a string
1095
+ specifies a field name, None/False hides titles.
1096
+ :param tooltip_fields: Fields for tooltip.
1097
+ :param kwargs: Additional arguments passed to MolGrid.
1098
+ :returns: MolGrid instance.
1099
+ """
1100
+ from cnotebook import MolGrid
1101
+
1102
+ series = self._obj
1103
+ mols = list(series)
1104
+
1105
+ # Check if series is part of a DataFrame
1106
+ df = None
1107
+ # noinspection PyProtectedMember
1108
+ if hasattr(series, '_cacher') and series._cacher is not None:
1109
+ try:
1110
+ # noinspection PyProtectedMember
1111
+ df = series._cacher[1]()
1112
+ except (TypeError, KeyError):
1113
+ pass
1114
+
1115
+ return MolGrid(
1116
+ mols,
1117
+ dataframe=df,
1118
+ mol_col=series.name,
1119
+ title=title,
1120
+ tooltip_fields=tooltip_fields,
1121
+ **kwargs
1122
+ )
1123
+
1124
+
1125
+ def _dataframe_molgrid(
1126
+ self,
1127
+ mol_col: str,
1128
+ title: bool | str | None = True,
1129
+ tooltip_fields: list = None,
1130
+ **kwargs
1131
+ ):
1132
+ """Display molecules from a column in an interactive grid.
1133
+
1134
+ :param mol_col: Column containing molecules.
1135
+ :param title: Title display mode. True uses molecule's title, a string
1136
+ specifies a field name, None/False hides titles.
1137
+ :param tooltip_fields: Columns for tooltip.
1138
+ :param kwargs: Additional arguments passed to MolGrid.
1139
+ :returns: MolGrid instance.
1140
+ """
1141
+ from cnotebook import MolGrid
1142
+
1143
+ df = self._obj
1144
+ mols = list(df[mol_col])
1145
+
1146
+ return MolGrid(
1147
+ mols,
1148
+ dataframe=df,
1149
+ mol_col=mol_col,
1150
+ title=title,
1151
+ tooltip_fields=tooltip_fields,
1152
+ **kwargs
1153
+ )
1154
+
1155
+
1156
+ # Add molgrid methods to accessors
1157
+ OESeriesAccessor.molgrid = _series_molgrid
1158
+ OEDataFrameAccessor.molgrid = _dataframe_molgrid