cnotebook 1.2.0__py3-none-any.whl → 2.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cnotebook/__init__.py +365 -67
- cnotebook/align.py +231 -167
- cnotebook/context.py +50 -18
- cnotebook/grid/__init__.py +56 -0
- cnotebook/grid/grid.py +1655 -0
- cnotebook/helpers.py +147 -15
- cnotebook/ipython_ext.py +0 -3
- cnotebook/marimo_ext.py +67 -0
- cnotebook/pandas_ext.py +760 -514
- cnotebook/polars_ext.py +1237 -0
- cnotebook/render.py +0 -195
- cnotebook-2.1.1.dist-info/METADATA +338 -0
- cnotebook-2.1.1.dist-info/RECORD +16 -0
- {cnotebook-1.2.0.dist-info → cnotebook-2.1.1.dist-info}/WHEEL +1 -1
- cnotebook-1.2.0.dist-info/METADATA +0 -280
- cnotebook-1.2.0.dist-info/RECORD +0 -13
- {cnotebook-1.2.0.dist-info → cnotebook-2.1.1.dist-info}/licenses/LICENSE +0 -0
- {cnotebook-1.2.0.dist-info → cnotebook-2.1.1.dist-info}/top_level.txt +0 -0
cnotebook/pandas_ext.py
CHANGED
|
@@ -3,7 +3,6 @@ import logging
|
|
|
3
3
|
import typing
|
|
4
4
|
import pandas as pd
|
|
5
5
|
import oepandas as oepd
|
|
6
|
-
from pandas.api.extensions import register_dataframe_accessor, register_series_accessor
|
|
7
6
|
from typing import Iterable, Any, Literal, Hashable
|
|
8
7
|
from openeye import oechem, oedepict, oegraphsim, oegrapheme
|
|
9
8
|
from copy import copy as shallow_copy
|
|
@@ -11,7 +10,7 @@ from .context import pass_cnotebook_context, get_series_context
|
|
|
11
10
|
from .helpers import escape_brackets, create_structure_highlighter
|
|
12
11
|
from .align import create_aligner, fingerprint_maker
|
|
13
12
|
from .render import (
|
|
14
|
-
CNotebookContext,
|
|
13
|
+
CNotebookContext, # noqa
|
|
15
14
|
oemol_to_disp,
|
|
16
15
|
oedisp_to_html,
|
|
17
16
|
render_invalid_molecule,
|
|
@@ -109,6 +108,7 @@ def render_dataframe(
|
|
|
109
108
|
df: pd.DataFrame,
|
|
110
109
|
formatters: dict | None = None,
|
|
111
110
|
col_space: dict[str, float | int] | None = None,
|
|
111
|
+
ctx: CNotebookContext | None = None,
|
|
112
112
|
**kwargs
|
|
113
113
|
) -> str:
|
|
114
114
|
"""
|
|
@@ -116,6 +116,7 @@ def render_dataframe(
|
|
|
116
116
|
:param df: DataFrame to render
|
|
117
117
|
:param formatters: Custom formatters for displaying columns
|
|
118
118
|
:param col_space: Custom column spacing
|
|
119
|
+
:param ctx: Local rendering context (optional)
|
|
119
120
|
:param kwargs: Additional keyword arguments for DataFrame.to_html
|
|
120
121
|
:return: HTML of rendered DataFrame
|
|
121
122
|
"""
|
|
@@ -170,15 +171,15 @@ def render_dataframe(
|
|
|
170
171
|
assert isinstance(arr, oepd.MoleculeArray)
|
|
171
172
|
|
|
172
173
|
# Get the cnotebook options for this column
|
|
173
|
-
|
|
174
|
+
series_ctx = ctx if ctx is not None else get_series_context(arr.metadata)
|
|
174
175
|
|
|
175
|
-
formatters[col] = create_mol_formatter(ctx=
|
|
176
|
+
formatters[col] = create_mol_formatter(ctx=series_ctx)
|
|
176
177
|
|
|
177
178
|
# Record the column width
|
|
178
179
|
if col in col_space:
|
|
179
180
|
log.warning(f'Column spacing for {col} already defined by overwriting with molecule image width')
|
|
180
181
|
|
|
181
|
-
col_space[col] = float(
|
|
182
|
+
col_space[col] = float(series_ctx.width)
|
|
182
183
|
|
|
183
184
|
# ---------------------------------------------------
|
|
184
185
|
# Display columns
|
|
@@ -202,9 +203,9 @@ def render_dataframe(
|
|
|
202
203
|
assert isinstance(arr, oepd.DisplayArray)
|
|
203
204
|
|
|
204
205
|
# Get column metadata
|
|
205
|
-
|
|
206
|
+
series_ctx = ctx if ctx is not None else get_series_context(arr.metadata)
|
|
206
207
|
|
|
207
|
-
formatters[col] = create_disp_formatter(ctx=
|
|
208
|
+
formatters[col] = create_disp_formatter(ctx=series_ctx)
|
|
208
209
|
|
|
209
210
|
if len(arr) > 0:
|
|
210
211
|
col_space[col] = max(disp.GetWidth() for disp in arr if isinstance(disp, oedepict.OE2DMolDisplay))
|
|
@@ -257,435 +258,570 @@ else:
|
|
|
257
258
|
|
|
258
259
|
|
|
259
260
|
########################################################################################################################
|
|
260
|
-
# Series
|
|
261
|
+
# CNotebook Series accessor extensions for OEPandas .chem accessor
|
|
261
262
|
########################################################################################################################
|
|
262
263
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
264
|
+
def _series_highlight(
|
|
265
|
+
self,
|
|
266
|
+
pattern: Iterable[str] | str | oechem.OESubSearch | Iterable[oechem.OESubSearch],
|
|
267
|
+
*,
|
|
268
|
+
color: oechem.OEColor | oechem.OEColorIter | None = None,
|
|
269
|
+
style: int | Literal["overlay_default", "overlay_ball_and_stick"] = "overlay_default",
|
|
270
|
+
ref: oechem.OESubSearch | oechem.OEMCSSearch | oechem.OEQMol | Literal["first"] | oechem.OEMolBase | None = None,
|
|
271
|
+
method: Literal["ss", "substructure", "mcss", "fp", "fingerprint"] | None = None
|
|
272
|
+
) -> None:
|
|
273
|
+
"""
|
|
274
|
+
Highlight chemical features in a structure.
|
|
275
|
+
|
|
276
|
+
The pattern argument can be:
|
|
277
|
+
- SMARTS pattern
|
|
278
|
+
- oechem.OESubSearch or oechem.OEMCSSearch object
|
|
279
|
+
- Iterable of SMARTS patterns, oechem.OESubSearch, and/or oechem.OEMCSSearch objects
|
|
280
|
+
|
|
281
|
+
:param pattern: Pattern(s) to highlight in the molecule.
|
|
282
|
+
:param color: Highlight color(s). Can be a single oechem.OEColor or an oechem.OEColorIter
|
|
283
|
+
(e.g., oechem.OEGetLightColors()). Defaults to oechem.OEGetLightColors().
|
|
284
|
+
:param style: Highlight style. Can be an int (OEHighlightStyle constant) or a string
|
|
285
|
+
("overlay_default", "overlay_ball_and_stick"). Defaults to "overlay_default".
|
|
286
|
+
:param ref: Optional reference for alignment.
|
|
287
|
+
:param method: Optional alignment method.
|
|
288
|
+
"""
|
|
289
|
+
if not isinstance(self._obj.dtype, oepd.MoleculeDtype):
|
|
290
|
+
raise TypeError(
|
|
291
|
+
"highlight only works on molecule columns (oepandas.MoleculeDtype). If this column has "
|
|
292
|
+
"molecules, use series.chem.as_molecule() to convert to a molecule column first."
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
# Get the molecule array
|
|
296
|
+
arr = self._obj.array
|
|
297
|
+
assert isinstance(arr, oepd.MoleculeArray)
|
|
298
|
+
|
|
299
|
+
# Get / create a series context and save it (because we are modifying it locally)
|
|
300
|
+
ctx = get_series_context(arr.metadata, save=True)
|
|
301
|
+
|
|
302
|
+
# ********************************************************************************
|
|
303
|
+
# Highlighting
|
|
304
|
+
# ********************************************************************************
|
|
305
|
+
|
|
306
|
+
# Case: Pattern is a single SMARTS string or oechem.OESubSearch object
|
|
307
|
+
if isinstance(pattern, (str, oechem.OESubSearch, oechem.OEMCSSearch, oechem.OEQMol)):
|
|
308
|
+
ctx.add_callback(
|
|
309
|
+
create_structure_highlighter(
|
|
310
|
+
query=pattern,
|
|
311
|
+
color=color,
|
|
312
|
+
style=style
|
|
270
313
|
)
|
|
314
|
+
)
|
|
271
315
|
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
316
|
+
# Case: Pattern is an iterable
|
|
317
|
+
elif isinstance(pattern, Iterable):
|
|
318
|
+
for element in pattern:
|
|
319
|
+
|
|
320
|
+
# Element is a SMARTS string or oechem.OESubSearch object
|
|
321
|
+
if isinstance(element, (str, oechem.OESubSearch, oechem.OEMCSSearch, oechem.OEQMol)):
|
|
322
|
+
ctx.add_callback(
|
|
323
|
+
create_structure_highlighter(
|
|
324
|
+
query=element,
|
|
325
|
+
color=color,
|
|
326
|
+
style=style
|
|
327
|
+
)
|
|
328
|
+
)
|
|
285
329
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
- Iterable of SMARTS patterns, oechem.OESubSearch, and/or oechem.OEMCSSearch objects
|
|
330
|
+
# Unknown element
|
|
331
|
+
else:
|
|
332
|
+
raise TypeError(f'Do not know how to add molecule highlight for type {type(element).__name__}')
|
|
290
333
|
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
334
|
+
# Case: Pattern is an unknown type
|
|
335
|
+
else:
|
|
336
|
+
raise TypeError(f'Do not know how to add molecule highlight for type {type(pattern).__name__}')
|
|
337
|
+
|
|
338
|
+
# ********************************************************************************
|
|
339
|
+
# Alignment
|
|
340
|
+
# ********************************************************************************
|
|
341
|
+
|
|
342
|
+
if ref is not None:
|
|
343
|
+
self._obj.chem.align_depictions(ref=ref, method=method)
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def _series_recalculate_depiction_coordinates(
|
|
347
|
+
self,
|
|
348
|
+
*,
|
|
349
|
+
clear_coords: bool = True,
|
|
350
|
+
add_depiction_hydrogens: bool = True,
|
|
351
|
+
perceive_bond_stereo: bool = True,
|
|
352
|
+
suppress_explicit_hydrogens: bool = True,
|
|
353
|
+
orientation: int = oedepict.OEDepictOrientation_Default
|
|
354
|
+
) -> None:
|
|
355
|
+
"""
|
|
356
|
+
Recalculate the depictions for a molecule series.
|
|
357
|
+
|
|
358
|
+
See the following link for more information:
|
|
359
|
+
https://docs.eyesopen.com/toolkits/python/depicttk/OEDepictClasses/OEPrepareDepictionOptions.html
|
|
360
|
+
|
|
361
|
+
:param clear_coords: Clear existing 2D coordinates
|
|
362
|
+
:param add_depiction_hydrogens: Add explicit depiction hydrogens for faithful stereo depiction, etc.
|
|
363
|
+
:param perceive_bond_stereo: Perceive wedge/hash bond stereo
|
|
364
|
+
:param suppress_explicit_hydrogens: Suppress explicit hydrogens
|
|
365
|
+
:param orientation: Preferred 2D orientation
|
|
366
|
+
"""
|
|
367
|
+
if not isinstance(self._obj.dtype, oepd.MoleculeDtype):
|
|
368
|
+
raise TypeError(
|
|
369
|
+
"recalculate_depiction_coordinates only works on molecule columns (oepandas.MoleculeDtype). If this "
|
|
370
|
+
"column has molecules, use series.chem.as_molecule() to convert to a molecule column first."
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
# Create the depiction options
|
|
374
|
+
opts = oedepict.OEPrepareDepictionOptions()
|
|
375
|
+
opts.SetClearCoords(clear_coords)
|
|
376
|
+
opts.SetAddDepictionHydrogens(add_depiction_hydrogens)
|
|
377
|
+
opts.SetPerceiveBondStereo(perceive_bond_stereo)
|
|
378
|
+
opts.SetSuppressHydrogens(suppress_explicit_hydrogens)
|
|
379
|
+
opts.SetDepictOrientation(orientation)
|
|
380
|
+
|
|
381
|
+
for mol in self._obj.array:
|
|
382
|
+
if isinstance(mol, oechem.OEMolBase):
|
|
383
|
+
oedepict.OEPrepareDepiction(mol, opts)
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def _series_reset_depictions(self) -> None:
|
|
387
|
+
"""
|
|
388
|
+
Reset depiction callbacks for a molecule series
|
|
389
|
+
"""
|
|
390
|
+
# Check if array has metadata attribute (should be true for oepandas arrays)
|
|
391
|
+
if hasattr(self._obj.array, "metadata"):
|
|
298
392
|
arr = self._obj.array
|
|
299
393
|
assert isinstance(arr, oepd.MoleculeArray)
|
|
394
|
+
_ = arr.metadata.pop("cnotebook", None)
|
|
300
395
|
|
|
301
|
-
# Get / create a series context and save it (because we are modifying it locally)
|
|
302
|
-
ctx = get_series_context(arr.metadata, save=True)
|
|
303
396
|
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
397
|
+
def _series_clear_formatting_rules(self) -> None:
|
|
398
|
+
"""
|
|
399
|
+
Clear all formatting rule callbacks from a molecule series.
|
|
307
400
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
401
|
+
This removes any callbacks applied to the molecule prior to rendering,
|
|
402
|
+
such as highlighting. Unlike reset_depictions which removes the entire
|
|
403
|
+
rendering context, this method only clears the callbacks while preserving
|
|
404
|
+
other context settings like image dimensions and styling.
|
|
405
|
+
"""
|
|
406
|
+
if hasattr(self._obj.array, "metadata"):
|
|
407
|
+
arr = self._obj.array
|
|
408
|
+
assert isinstance(arr, oepd.MoleculeArray)
|
|
409
|
+
ctx = arr.metadata.get("cnotebook", None)
|
|
410
|
+
if ctx is not None and isinstance(ctx, CNotebookContext):
|
|
411
|
+
ctx.reset_callbacks()
|
|
317
412
|
|
|
318
|
-
# Case: Pattern is an iterable
|
|
319
|
-
elif isinstance(pattern, Iterable):
|
|
320
|
-
for element in pattern:
|
|
321
|
-
|
|
322
|
-
# Element is a SMARTS string or oechem.OESubSearch object
|
|
323
|
-
if isinstance(element, (str, oechem.OESubSearch, oechem.OEMCSSearch, oechem.OEQMol)):
|
|
324
|
-
ctx.add_callback(
|
|
325
|
-
create_structure_highlighter(
|
|
326
|
-
query=element,
|
|
327
|
-
color=color,
|
|
328
|
-
style=style
|
|
329
|
-
)
|
|
330
|
-
)
|
|
331
413
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
414
|
+
def _series_align_depictions(
|
|
415
|
+
self,
|
|
416
|
+
ref: oechem.OESubSearch | oechem.OEMCSSearch | oechem.OEMolBase | oechem.OEQMol | Literal["first"],
|
|
417
|
+
method: Literal["substructure", "ss", "mcss", "fp", "fingerprint"] | None = None,
|
|
418
|
+
**kwargs
|
|
419
|
+
) -> None:
|
|
420
|
+
"""
|
|
421
|
+
Align the 2D coordinates of molecules
|
|
422
|
+
:param ref: Alignment reference
|
|
423
|
+
:param method: Alignment method
|
|
424
|
+
:param kwargs: Keyword arguments for aligner
|
|
425
|
+
:return: Aligned molecule depictions
|
|
426
|
+
"""
|
|
427
|
+
if not isinstance(self._obj.dtype, oepd.MoleculeDtype):
|
|
428
|
+
raise TypeError(
|
|
429
|
+
"align_depictions only works on molecule columns (oepandas.MoleculeDtype). If this "
|
|
430
|
+
"column has molecules, use series.chem.as_molecule() to convert to a molecule column first."
|
|
431
|
+
)
|
|
335
432
|
|
|
336
|
-
|
|
433
|
+
# Get the rendering context for creating the displays
|
|
434
|
+
arr = self._obj.array
|
|
435
|
+
assert isinstance(arr, oepd.MoleculeArray)
|
|
436
|
+
|
|
437
|
+
if isinstance(ref, str) and ref == "first":
|
|
438
|
+
for mol in arr:
|
|
439
|
+
if mol is not None and mol.IsValid():
|
|
440
|
+
ref = mol.CreateCopy()
|
|
441
|
+
break
|
|
337
442
|
else:
|
|
338
|
-
|
|
443
|
+
log.warning("No valid molecule found in series for depiction alignment")
|
|
444
|
+
return
|
|
339
445
|
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
446
|
+
# Suppress alignment warnings (there are lots of needless warnings)
|
|
447
|
+
level = oechem.OEThrow.GetLevel()
|
|
448
|
+
oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Error)
|
|
343
449
|
|
|
344
|
-
|
|
345
|
-
|
|
450
|
+
# noinspection PyBroadException
|
|
451
|
+
try:
|
|
452
|
+
# Create the aligner
|
|
453
|
+
aligner = create_aligner(ref=ref, method=method)
|
|
346
454
|
|
|
455
|
+
for mol in arr:
|
|
456
|
+
_ = aligner(mol)
|
|
347
457
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
def __init__(self, pandas_obj: pd.Series):
|
|
351
|
-
if not isinstance(pandas_obj.dtype, oepd.MoleculeDtype):
|
|
352
|
-
raise TypeError(
|
|
353
|
-
"recalculate_depiction_coordinates only works on molecule columns (oepandas.MoleculeDtype). If this "
|
|
354
|
-
"column has molecules, use pd.Series.as_molecule to convert to a molecule column first."
|
|
355
|
-
)
|
|
458
|
+
except Exception as ex:
|
|
459
|
+
log.debug("Error aligning molecules: %s", ex)
|
|
356
460
|
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
self,
|
|
361
|
-
*,
|
|
362
|
-
clear_coords: bool = True,
|
|
363
|
-
add_depction_hydrogens: bool = True,
|
|
364
|
-
perceive_bond_stereo: bool = True,
|
|
365
|
-
suppress_explicit_hydrogens: bool = True,
|
|
366
|
-
orientation: int = oedepict.OEDepictOrientation_Default
|
|
367
|
-
) -> None:
|
|
368
|
-
"""
|
|
369
|
-
Recalculate the depictions for a molecule series.
|
|
461
|
+
# Restore OEThrow
|
|
462
|
+
finally:
|
|
463
|
+
oechem.OEThrow.SetLevel(level)
|
|
370
464
|
|
|
371
|
-
See the following link for more information:
|
|
372
|
-
https://docs.eyesopen.com/toolkits/python/depicttk/OEDepictClasses/OEPrepareDepictionOptions.html
|
|
373
465
|
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
:param suppress_explicit_hydrogens: Suppress explicit hydrogens
|
|
378
|
-
:param orientation: Preferred 2D orientation
|
|
379
|
-
"""
|
|
380
|
-
# Create the depiction options
|
|
381
|
-
opts = oedepict.OEPrepareDepictionOptions()
|
|
382
|
-
opts.SetClearCoords(clear_coords)
|
|
383
|
-
opts.SetAddDepictionHydrogens(add_depction_hydrogens)
|
|
466
|
+
########################################################################################################################
|
|
467
|
+
# CNotebook DataFrame accessor extensions for OEPandas .chem accessor
|
|
468
|
+
########################################################################################################################
|
|
384
469
|
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
470
|
+
def _dataframe_recalculate_depiction_coordinates(
|
|
471
|
+
self,
|
|
472
|
+
*,
|
|
473
|
+
molecule_columns: str | Iterable[str] | None = None,
|
|
474
|
+
clear_coords: bool = True,
|
|
475
|
+
add_depction_hydrogens: bool = True,
|
|
476
|
+
perceive_bond_stereo: bool = True,
|
|
477
|
+
suppress_explicit_hydrogens: bool = True,
|
|
478
|
+
orientation: int = oedepict.OEDepictOrientation_Default
|
|
479
|
+
) -> None:
|
|
480
|
+
"""
|
|
481
|
+
Recalculate the depictions for a one or more molecule series in a DataFrame. If molecule_columns is None,
|
|
482
|
+
which is the default, then all molecule columns will have their depictions recalculated
|
|
483
|
+
|
|
484
|
+
See the following link for more information:
|
|
485
|
+
https://docs.eyesopen.com/toolkits/python/depicttk/OEDepictClasses/OEPrepareDepictionOptions.html
|
|
486
|
+
|
|
487
|
+
:param molecule_columns: Optional molecule column(s) to have depictions recalculated
|
|
488
|
+
:param clear_coords: Clear existing 2D coordinates
|
|
489
|
+
:param add_depction_hydrogens: Add explicit depiction hydrogens for faithful stereo depiction, etc.
|
|
490
|
+
:param perceive_bond_stereo: Perceive wedge/hash bond stereo
|
|
491
|
+
:param suppress_explicit_hydrogens: Suppress explicit hydrogens
|
|
492
|
+
:param orientation: Preferred 2D orientation
|
|
493
|
+
"""
|
|
494
|
+
if molecule_columns is None:
|
|
495
|
+
molecule_columns = set()
|
|
388
496
|
|
|
497
|
+
for col in self._obj.columns:
|
|
498
|
+
if isinstance(self._obj.dtypes[col], oepd.MoleculeDtype):
|
|
499
|
+
molecule_columns.add(col)
|
|
389
500
|
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
def __init__(self, pandas_obj: pd.Series):
|
|
393
|
-
self._obj = pandas_obj
|
|
501
|
+
elif isinstance(molecule_columns, str):
|
|
502
|
+
molecule_columns = {molecule_columns}
|
|
394
503
|
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
Reset depiction callbacks for a molecule series
|
|
398
|
-
"""
|
|
399
|
-
# Check if array has metadata attribute (should be true for oepandas arrays)
|
|
400
|
-
if hasattr(self._obj.array, "metadata"):
|
|
401
|
-
# Direct assignment to help IDE understand this has metadata
|
|
402
|
-
arr = self._obj.array
|
|
403
|
-
assert isinstance(arr, oepd.MoleculeArray)
|
|
404
|
-
_ = arr.metadata.pop("cnotebook", None)
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
@register_series_accessor("align_depictions")
|
|
408
|
-
class SeriesAlignDepictionsAccessor:
|
|
409
|
-
def __init__(self, pandas_obj: pd.Series):
|
|
410
|
-
if not isinstance(pandas_obj.dtype, oepd.MoleculeDtype):
|
|
411
|
-
raise TypeError(
|
|
412
|
-
"align_depictions only works on molecule columns (oepandas.MoleculeDtype). If this "
|
|
413
|
-
"column has molecules, use pd.Series.as_molecule to convert to a molecule column first."
|
|
414
|
-
)
|
|
504
|
+
else:
|
|
505
|
+
molecule_columns = set(molecule_columns)
|
|
415
506
|
|
|
416
|
-
|
|
507
|
+
# Recalculate the column depictions
|
|
508
|
+
for col in molecule_columns:
|
|
417
509
|
|
|
418
|
-
|
|
419
|
-
self,
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
:param kwargs: Keyword arguments for aligner
|
|
428
|
-
:return: Aligned molecule depictions
|
|
429
|
-
"""
|
|
430
|
-
# Get the rendering context for creating the displays
|
|
510
|
+
if col in self._obj.columns:
|
|
511
|
+
if isinstance(self._obj.dtypes[col], oepd.MoleculeDtype):
|
|
512
|
+
self._obj[col].chem.recalculate_depiction_coordinates(
|
|
513
|
+
clear_coords=clear_coords,
|
|
514
|
+
add_depction_hydrogens=add_depction_hydrogens,
|
|
515
|
+
perceive_bond_stereo=perceive_bond_stereo,
|
|
516
|
+
suppress_explicit_hydrogens=suppress_explicit_hydrogens,
|
|
517
|
+
orientation=orientation
|
|
518
|
+
)
|
|
431
519
|
|
|
432
|
-
|
|
433
|
-
|
|
520
|
+
else:
|
|
521
|
+
log.warning(f'Column {col} does not have a MoleculeDtype')
|
|
434
522
|
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
523
|
+
else:
|
|
524
|
+
log.warning(f'{col} not found in DataFrame columns: ({", ".join(self._obj.columns)})')
|
|
525
|
+
molecule_columns.remove(col)
|
|
438
526
|
|
|
439
|
-
if isinstance(ref, str) and ref == "first":
|
|
440
|
-
for mol in arr:
|
|
441
|
-
if mol is not None and mol.IsValid():
|
|
442
|
-
ref = mol.CreateCopy()
|
|
443
|
-
break
|
|
444
|
-
else:
|
|
445
|
-
log.warning("No valid molecule found in series for depiction alignment")
|
|
446
|
-
return
|
|
447
527
|
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
528
|
+
def _dataframe_reset_depictions(self, *, molecule_columns: str | Iterable[str] | None = None) -> None:
|
|
529
|
+
"""
|
|
530
|
+
Reset depiction callbacks for one or more columns
|
|
531
|
+
"""
|
|
532
|
+
columns = set()
|
|
533
|
+
if molecule_columns is None:
|
|
534
|
+
columns.update(self._obj.columns)
|
|
451
535
|
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
# Create the aligner
|
|
455
|
-
aligner = create_aligner(ref=ref, method=method)
|
|
536
|
+
elif isinstance(molecule_columns, str):
|
|
537
|
+
columns.add(molecule_columns)
|
|
456
538
|
|
|
457
|
-
|
|
458
|
-
|
|
539
|
+
else:
|
|
540
|
+
columns.update(molecule_columns)
|
|
459
541
|
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
542
|
+
# Filter invalid and non-molecule columns
|
|
543
|
+
for col in filter(
|
|
544
|
+
lambda c: c in self._obj.columns and isinstance(self._obj[c].dtype, oepd.MoleculeDtype),
|
|
545
|
+
columns
|
|
546
|
+
):
|
|
547
|
+
self._obj[col].chem.reset_depictions()
|
|
463
548
|
|
|
464
|
-
# Restore OEThrow
|
|
465
|
-
finally:
|
|
466
|
-
oechem.OEThrow.SetLevel(level)
|
|
467
549
|
|
|
550
|
+
def _dataframe_clear_formatting_rules(self, molecule_columns: str | Iterable[str] | None = None) -> None:
|
|
551
|
+
"""
|
|
552
|
+
Clear all formatting rule callbacks from one or more molecule columns.
|
|
468
553
|
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
554
|
+
This removes any callbacks applied to molecules prior to rendering,
|
|
555
|
+
such as highlighting. Unlike reset_depictions which removes the entire
|
|
556
|
+
rendering context, this method only clears the callbacks while preserving
|
|
557
|
+
other context settings like image dimensions and styling.
|
|
472
558
|
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
def __init__(self, pandas_obj: pd.DataFrame):
|
|
476
|
-
self._obj = pandas_obj
|
|
477
|
-
|
|
478
|
-
def __call__(
|
|
479
|
-
self,
|
|
480
|
-
*,
|
|
481
|
-
molecule_columns: str | Iterable[str] | None = None,
|
|
482
|
-
clear_coords: bool = True,
|
|
483
|
-
add_depction_hydrogens: bool = True,
|
|
484
|
-
perceive_bond_stereo: bool = True,
|
|
485
|
-
suppress_explicit_hydrogens: bool = True,
|
|
486
|
-
orientation: int = oedepict.OEDepictOrientation_Default
|
|
487
|
-
) -> None:
|
|
488
|
-
"""
|
|
489
|
-
Recalculate the depictions for a one or more molecule series in a DataFrame. If molecule_columns is None,
|
|
490
|
-
which is the default, then all molecule columns will have their depictions recalculated
|
|
491
|
-
|
|
492
|
-
See the following link for more information:
|
|
493
|
-
https://docs.eyesopen.com/toolkits/python/depicttk/OEDepictClasses/OEPrepareDepictionOptions.html
|
|
494
|
-
|
|
495
|
-
:param molecule_columns: Optional molecule column(s) to have depictions recalculated
|
|
496
|
-
:param clear_coords: Clear existing 2D coordinates
|
|
497
|
-
:param add_depction_hydrogens: Add explicit depiction hydrogens for faithful stereo depiction, etc.
|
|
498
|
-
:param perceive_bond_stereo: Perceive wedge/hash bond stereo
|
|
499
|
-
:param suppress_explicit_hydrogens: Suppress explicit hydrogens
|
|
500
|
-
:param orientation: Preferred 2D orientation
|
|
501
|
-
"""
|
|
502
|
-
if molecule_columns is None:
|
|
503
|
-
molecule_columns = set()
|
|
559
|
+
:param molecule_columns: Optional molecule column(s) to clear formatting rules from.
|
|
560
|
+
If None, clears formatting rules from all molecule columns.
|
|
504
561
|
|
|
505
|
-
|
|
506
|
-
if isinstance(self._obj.dtypes[col], oepd.MoleculeDtype):
|
|
507
|
-
molecule_columns.add(col)
|
|
562
|
+
Example::
|
|
508
563
|
|
|
509
|
-
|
|
510
|
-
|
|
564
|
+
# Clear formatting rules from all molecule columns
|
|
565
|
+
df.chem.clear_formatting_rules()
|
|
511
566
|
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
# Recalculate the column depictions
|
|
516
|
-
for col in molecule_columns:
|
|
517
|
-
|
|
518
|
-
if col in self._obj.columns:
|
|
519
|
-
if isinstance(self._obj.dtypes[col], oepd.MoleculeDtype):
|
|
520
|
-
self._obj[col].recalculate_depiction_coordinates(
|
|
521
|
-
clear_coords=clear_coords,
|
|
522
|
-
add_depction_hydrogens=add_depction_hydrogens,
|
|
523
|
-
perceive_bond_stereo=perceive_bond_stereo,
|
|
524
|
-
suppress_explicit_hydrogens=suppress_explicit_hydrogens,
|
|
525
|
-
orientation=orientation
|
|
526
|
-
)
|
|
567
|
+
# Clear formatting rules from a specific column
|
|
568
|
+
df.chem.clear_formatting_rules("smiles")
|
|
527
569
|
|
|
528
|
-
|
|
529
|
-
|
|
570
|
+
# Clear formatting rules from multiple columns
|
|
571
|
+
df.chem.clear_formatting_rules(["mol1", "mol2"])
|
|
572
|
+
"""
|
|
573
|
+
columns = set()
|
|
574
|
+
if molecule_columns is None:
|
|
575
|
+
columns.update(self._obj.columns)
|
|
530
576
|
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
molecule_columns.remove(col)
|
|
577
|
+
elif isinstance(molecule_columns, str):
|
|
578
|
+
columns.add(molecule_columns)
|
|
534
579
|
|
|
580
|
+
else:
|
|
581
|
+
columns.update(molecule_columns)
|
|
535
582
|
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
583
|
+
# Filter invalid and non-molecule columns
|
|
584
|
+
for col in filter(
|
|
585
|
+
lambda c: c in self._obj.columns and isinstance(self._obj[c].dtype, oepd.MoleculeDtype),
|
|
586
|
+
columns
|
|
587
|
+
):
|
|
588
|
+
self._obj[col].chem.clear_formatting_rules()
|
|
540
589
|
|
|
541
|
-
def __call__(self, *, molecule_columns: str | Iterable[str] | None = None) -> None:
|
|
542
|
-
"""
|
|
543
|
-
Reset depiction callbacks for one or more columns
|
|
544
|
-
"""
|
|
545
|
-
columns = set()
|
|
546
|
-
if molecule_columns is None:
|
|
547
|
-
columns.update(self._obj.columns)
|
|
548
590
|
|
|
549
|
-
|
|
550
|
-
|
|
591
|
+
def _dataframe_highlight(
|
|
592
|
+
self,
|
|
593
|
+
molecule_column: str,
|
|
594
|
+
pattern: Iterable[str] | str | oechem.OESubSearch | Iterable[oechem.OESubSearch],
|
|
595
|
+
*,
|
|
596
|
+
color: oechem.OEColor | oechem.OEColorIter | None = None,
|
|
597
|
+
style: int | Literal["overlay_default", "overlay_ball_and_stick"] = "overlay_default",
|
|
598
|
+
) -> None:
|
|
599
|
+
"""
|
|
600
|
+
Highlight chemical features in molecules within a specified column.
|
|
551
601
|
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
for col in filter(
|
|
557
|
-
lambda c: c in self._obj.columns and isinstance(self._obj[c].dtype, oepd.MoleculeDtype),
|
|
558
|
-
columns
|
|
559
|
-
):
|
|
560
|
-
self._obj[col].reset_depictions()
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
@register_dataframe_accessor("highlight_using_column")
|
|
564
|
-
class HighlightUsingColumnAccessor:
|
|
565
|
-
def __init__(self, pandas_obj: pd.DataFrame):
|
|
566
|
-
self._obj = pandas_obj
|
|
567
|
-
|
|
568
|
-
def __call__(
|
|
569
|
-
self,
|
|
570
|
-
molecule_column: str,
|
|
571
|
-
pattern_column: str,
|
|
572
|
-
*,
|
|
573
|
-
highlighted_column: str = "highlighted_substructures",
|
|
574
|
-
ref: oechem.OESubSearch | oechem.OEMCSSearch | oechem.OEMolBase | None = None,
|
|
575
|
-
alignment_opts: oedepict.OEAlignmentOptions | None = None,
|
|
576
|
-
prepare_opts: oedepict.OEPrepareDepictionOptions | None = None,
|
|
577
|
-
inplace: bool = False
|
|
578
|
-
) -> pd.DataFrame:
|
|
579
|
-
"""
|
|
580
|
-
Highlight molecules based on the value of another column. The column produced is a DisplayArray column, so
|
|
581
|
-
the results are not suitable for other molecular calculations.
|
|
582
|
-
|
|
583
|
-
The other column can contain:
|
|
584
|
-
- Comma or whitespace delimited string of SMARTS patterns
|
|
585
|
-
- oechem.OESubSearch or oechem.OEMCSSearch object
|
|
586
|
-
- Iterable of SMARTS patterns, oechem.OESubSearch, and/or oechem.OEMCSSearch objects
|
|
587
|
-
|
|
588
|
-
:param molecule_column: Name of the molecule column
|
|
589
|
-
:param pattern_column: Name of the pattern column
|
|
590
|
-
:param highlighted_column: Optional name of the column with highlighted structures
|
|
591
|
-
:param ref: Optional reference for aligning depictions
|
|
592
|
-
:param alignment_opts: Optional depiction alignment options (oedepict.OEAlignmentOptions)
|
|
593
|
-
:param prepare_opts: Optional depiction preparation options (oedepict.OEPrepareDepictionOptions)
|
|
594
|
-
:param inplace: Modify the DataFrame in place
|
|
595
|
-
:return: Modified DataFrame
|
|
596
|
-
"""
|
|
597
|
-
# Object we are operating on
|
|
598
|
-
df = self._obj if inplace else self._obj.copy()
|
|
602
|
+
The pattern argument can be:
|
|
603
|
+
- SMARTS pattern
|
|
604
|
+
- oechem.OESubSearch or oechem.OEMCSSearch object
|
|
605
|
+
- Iterable of SMARTS patterns, oechem.OESubSearch, and/or oechem.OEMCSSearch objects
|
|
599
606
|
|
|
600
|
-
|
|
601
|
-
|
|
607
|
+
:param molecule_column: Name of the molecule column to highlight.
|
|
608
|
+
:param pattern: Pattern(s) to highlight in the molecules.
|
|
609
|
+
:param color: Highlight color(s). Can be a single oechem.OEColor or an oechem.OEColorIter
|
|
610
|
+
(e.g., oechem.OEGetLightColors()). Defaults to oechem.OEGetLightColors().
|
|
611
|
+
:param style: Highlight style. Can be an int (OEHighlightStyle constant) or a string
|
|
612
|
+
("overlay_default", "overlay_ball_and_stick"). Defaults to "overlay_default".
|
|
602
613
|
|
|
603
|
-
|
|
604
|
-
raise TypeError(
|
|
605
|
-
f"highlight_using_column only works on molecule columns (oepandas.MoleculeDtype). If {molecule_column}"
|
|
606
|
-
" has molecules, use pd.Series.as_molecule to convert to a molecule column first."
|
|
607
|
-
)
|
|
614
|
+
Example::
|
|
608
615
|
|
|
609
|
-
|
|
610
|
-
|
|
616
|
+
# Highlight benzene rings in the 'smiles' column
|
|
617
|
+
df.chem.highlight("smiles", "c1ccccc1")
|
|
611
618
|
|
|
612
|
-
#
|
|
613
|
-
|
|
614
|
-
|
|
619
|
+
# Highlight multiple patterns
|
|
620
|
+
df.chem.highlight("smiles", ["c1ccccc1", "[OH]"])
|
|
621
|
+
"""
|
|
622
|
+
if molecule_column not in self._obj.columns:
|
|
623
|
+
raise ValueError(f'Column {molecule_column} not found in DataFrame columns: ({", ".join(self._obj.columns)})')
|
|
615
624
|
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
assert isinstance(arr, oepd.MoleculeArray)
|
|
622
|
-
ctx = get_series_context(arr.metadata)
|
|
625
|
+
if not isinstance(self._obj[molecule_column].dtype, oepd.MoleculeDtype):
|
|
626
|
+
raise TypeError(
|
|
627
|
+
f"highlight only works on molecule columns (oepandas.MoleculeDtype). Column '{molecule_column}' "
|
|
628
|
+
f"has type {self._obj[molecule_column].dtype}."
|
|
629
|
+
)
|
|
623
630
|
|
|
624
|
-
|
|
625
|
-
|
|
631
|
+
# Delegate to the series-level highlight (which works in Pandas)
|
|
632
|
+
self._obj[molecule_column].chem.highlight(pattern, color=color, style=style)
|
|
626
633
|
|
|
627
|
-
mol = row[molecule_column]
|
|
628
|
-
if isinstance(mol, oechem.OEMolBase):
|
|
629
634
|
|
|
630
|
-
|
|
631
|
-
|
|
635
|
+
def _dataframe_copy_molecules(
|
|
636
|
+
self,
|
|
637
|
+
source_column: str,
|
|
638
|
+
dest_column: str,
|
|
639
|
+
) -> pd.DataFrame:
|
|
640
|
+
"""
|
|
641
|
+
Create a deep copy of molecules from one column to a new column.
|
|
632
642
|
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
643
|
+
This creates independent copies of all molecules, allowing modifications
|
|
644
|
+
(such as highlighting or alignment) to the new column without affecting
|
|
645
|
+
the original.
|
|
636
646
|
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
647
|
+
:param source_column: Name of the source molecule column.
|
|
648
|
+
:param dest_column: Name of the new column to create with copied molecules.
|
|
649
|
+
:returns: DataFrame with the new molecule column added.
|
|
650
|
+
|
|
651
|
+
Example::
|
|
652
|
+
|
|
653
|
+
# Create a copy of molecules for alignment
|
|
654
|
+
df = df.chem.copy_molecules("Original", "Aligned")
|
|
655
|
+
df.chem.highlight("Aligned", "c1ccccc1")
|
|
656
|
+
"""
|
|
657
|
+
if source_column not in self._obj.columns:
|
|
658
|
+
raise ValueError(f'Column {source_column} not found in DataFrame columns: ({", ".join(self._obj.columns)})')
|
|
643
659
|
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
660
|
+
if not isinstance(self._obj[source_column].dtype, oepd.MoleculeDtype):
|
|
661
|
+
raise TypeError(
|
|
662
|
+
f"copy_molecules only works on molecule columns (oepandas.MoleculeDtype). Column '{source_column}' "
|
|
663
|
+
f"has type {self._obj[source_column].dtype}."
|
|
664
|
+
)
|
|
647
665
|
|
|
648
|
-
|
|
666
|
+
# Use the series-level copy_molecules and assign to the new column
|
|
667
|
+
self._obj[dest_column] = self._obj[source_column].chem.copy_molecules()
|
|
668
|
+
return self._obj
|
|
649
669
|
|
|
650
|
-
for p in patterns:
|
|
651
670
|
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
671
|
+
def _dataframe_highlight_using_column(
|
|
672
|
+
self,
|
|
673
|
+
molecule_column: str,
|
|
674
|
+
pattern_column: str,
|
|
675
|
+
*,
|
|
676
|
+
highlighted_column: str = "highlighted_substructures",
|
|
677
|
+
color: oechem.OEColor | oechem.OEColorIter | None = None,
|
|
678
|
+
style: int | Literal["overlay_default", "overlay_ball_and_stick"] = "overlay_default",
|
|
679
|
+
inplace: bool = False
|
|
680
|
+
) -> pd.DataFrame:
|
|
681
|
+
"""
|
|
682
|
+
Highlight molecules based on the value of another column. The column produced is a DisplayArray column, so
|
|
683
|
+
the results are not suitable for other molecular calculations.
|
|
684
|
+
|
|
685
|
+
The other column can contain:
|
|
686
|
+
- Comma or whitespace delimited string of SMARTS patterns
|
|
687
|
+
- oechem.OESubSearch or oechem.OEMCSSearch object
|
|
688
|
+
- Iterable of SMARTS patterns, oechem.OESubSearch, and/or oechem.OEMCSSearch objects
|
|
689
|
+
|
|
690
|
+
:param molecule_column: Name of the molecule column.
|
|
691
|
+
:param pattern_column: Name of the pattern column.
|
|
692
|
+
:param highlighted_column: Optional name of the column with highlighted structures.
|
|
693
|
+
:param color: Highlight color(s). Can be a single oechem.OEColor or an oechem.OEColorIter
|
|
694
|
+
(e.g., oechem.OEGetLightColors()). Defaults to oechem.OEGetLightColors().
|
|
695
|
+
:param style: Highlight style. Can be an int (OEHighlightStyle constant) or a string
|
|
696
|
+
("overlay_default", "overlay_ball_and_stick"). Defaults to "overlay_default".
|
|
697
|
+
:param inplace: Modify the DataFrame in place.
|
|
698
|
+
:returns: Modified DataFrame.
|
|
699
|
+
"""
|
|
700
|
+
# Object we are operating on
|
|
701
|
+
df = self._obj if inplace else self._obj.copy()
|
|
657
702
|
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
substructures.append(p)
|
|
703
|
+
if molecule_column not in df.columns:
|
|
704
|
+
raise KeyError(f'{molecule_column} not found in DataFrame columns: ({", ".join(df.columns)}')
|
|
661
705
|
|
|
662
|
-
|
|
663
|
-
|
|
706
|
+
if not isinstance(df[molecule_column].dtype, oepd.MoleculeDtype):
|
|
707
|
+
raise TypeError(
|
|
708
|
+
f"highlight_using_column only works on molecule columns (oepandas.MoleculeDtype). If {molecule_column}"
|
|
709
|
+
" has molecules, use df.chem.as_molecule() to convert to a molecule column first."
|
|
710
|
+
)
|
|
664
711
|
|
|
665
|
-
|
|
666
|
-
|
|
712
|
+
if pattern_column not in df.columns:
|
|
713
|
+
raise KeyError(f'{pattern_column} not found in DataFrame columns: ({", ".join(df.columns)}')
|
|
714
|
+
|
|
715
|
+
# Default color
|
|
716
|
+
if color is None:
|
|
717
|
+
color = oechem.OEGetLightColors()
|
|
718
|
+
|
|
719
|
+
# Determine highlighting approach based on style
|
|
720
|
+
use_overlay = isinstance(style, str) and style in ("overlay_default", "overlay_ball_and_stick")
|
|
721
|
+
|
|
722
|
+
# Check if color is compatible with overlay
|
|
723
|
+
if use_overlay and isinstance(color, oechem.OEColor):
|
|
724
|
+
log.warning(
|
|
725
|
+
"Overlay coloring is not compatible with a single oechem.OEColor. Falling back to standard highlighting")
|
|
726
|
+
use_overlay = False
|
|
727
|
+
style = oedepict.OEHighlightStyle_BallAndStick
|
|
728
|
+
|
|
729
|
+
# Create the display objects
|
|
730
|
+
indexes = []
|
|
731
|
+
displays = []
|
|
732
|
+
|
|
733
|
+
# Get the rendering context for creating the displays
|
|
734
|
+
arr = df[molecule_column].array
|
|
735
|
+
assert isinstance(arr, oepd.MoleculeArray)
|
|
736
|
+
ctx = get_series_context(arr.metadata)
|
|
737
|
+
|
|
738
|
+
for idx, row in df.iterrows():
|
|
739
|
+
indexes.append(idx)
|
|
740
|
+
|
|
741
|
+
mol = row[molecule_column]
|
|
742
|
+
if isinstance(mol, oechem.OEMolBase):
|
|
743
|
+
|
|
744
|
+
# Create the display
|
|
745
|
+
disp = oemol_to_disp(mol, ctx=ctx)
|
|
746
|
+
|
|
747
|
+
# Highlight
|
|
748
|
+
substructures = []
|
|
749
|
+
patterns = row[pattern_column]
|
|
750
|
+
|
|
751
|
+
# Parse different patterns
|
|
752
|
+
if isinstance(patterns, str):
|
|
753
|
+
for pattern in re.split(SMARTS_DELIMITER_RE, patterns):
|
|
754
|
+
ss = oechem.OESubSearch(pattern)
|
|
755
|
+
if ss.IsValid():
|
|
756
|
+
substructures.append(ss)
|
|
667
757
|
|
|
668
|
-
|
|
669
|
-
|
|
758
|
+
elif isinstance(patterns, oechem.OESubSearch):
|
|
759
|
+
if patterns.IsValid():
|
|
760
|
+
substructures.append(patterns)
|
|
670
761
|
|
|
762
|
+
elif isinstance(patterns, Iterable):
|
|
763
|
+
|
|
764
|
+
for p in patterns:
|
|
765
|
+
|
|
766
|
+
if isinstance(p, str):
|
|
767
|
+
for pattern in re.split(SMARTS_DELIMITER_RE, p):
|
|
768
|
+
ss = oechem.OESubSearch(pattern)
|
|
769
|
+
if ss.IsValid():
|
|
770
|
+
substructures.append(ss)
|
|
771
|
+
|
|
772
|
+
elif isinstance(p, oechem.OESubSearch):
|
|
773
|
+
if p.IsValid():
|
|
774
|
+
substructures.append(p)
|
|
775
|
+
|
|
776
|
+
else:
|
|
777
|
+
log.warning(f'Do not know how to highlight using: {type(p).__name__}')
|
|
778
|
+
|
|
779
|
+
else:
|
|
780
|
+
log.warning(f'Do not know how to highlight using: {type(patterns).__name__}')
|
|
781
|
+
|
|
782
|
+
# Overlay highlighting
|
|
783
|
+
if use_overlay:
|
|
784
|
+
highlight = oedepict.OEHighlightOverlayByBallAndStick(color)
|
|
671
785
|
for ss in substructures:
|
|
672
786
|
oedepict.OEAddHighlightOverlay(disp, highlight, ss.Match(mol, True))
|
|
673
787
|
|
|
674
|
-
displays.append(disp)
|
|
675
|
-
|
|
676
788
|
else:
|
|
677
|
-
|
|
789
|
+
# Traditional highlighting
|
|
790
|
+
if isinstance(color, oechem.OEColor):
|
|
791
|
+
highlight_color = color
|
|
792
|
+
else:
|
|
793
|
+
highlight_color = oechem.OELightBlue
|
|
794
|
+
for c in color:
|
|
795
|
+
highlight_color = c
|
|
796
|
+
break
|
|
797
|
+
for ss in substructures:
|
|
798
|
+
for match in ss.Match(mol, True):
|
|
799
|
+
oedepict.OEAddHighlighting(disp, highlight_color, style, match)
|
|
678
800
|
|
|
679
|
-
|
|
680
|
-
|
|
801
|
+
displays.append(disp)
|
|
802
|
+
|
|
803
|
+
else:
|
|
804
|
+
displays.append(None)
|
|
805
|
+
|
|
806
|
+
df[highlighted_column] = pd.Series(displays, index=indexes, dtype=oepd.DisplayDtype())
|
|
807
|
+
return df
|
|
681
808
|
|
|
682
809
|
|
|
683
810
|
class ColorBondByOverlapScore(oegrapheme.OEBondGlyphBase):
|
|
811
|
+
"""Bond glyph that colors bonds by fingerprint overlap score.
|
|
812
|
+
|
|
813
|
+
Used internally by fingerprint similarity visualization to highlight
|
|
814
|
+
bonds based on their contribution to molecular similarity.
|
|
815
|
+
|
|
816
|
+
See: https://docs.eyesopen.com/toolkits/cookbook/python/depiction/simcalc.html
|
|
684
817
|
"""
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
818
|
+
|
|
819
|
+
def __init__(self, cg: oechem.OELinearColorGradient, tag: int):
|
|
820
|
+
"""Create a bond coloring glyph.
|
|
821
|
+
|
|
822
|
+
:param cg: Color gradient to map overlap scores to colors.
|
|
823
|
+
:param tag: OEChem data tag containing overlap scores on bonds.
|
|
824
|
+
"""
|
|
689
825
|
oegrapheme.OEBondGlyphBase.__init__(self)
|
|
690
826
|
self.colorg = cg
|
|
691
827
|
self.tag = tag
|
|
@@ -717,196 +853,306 @@ class ColorBondByOverlapScore(oegrapheme.OEBondGlyphBase):
|
|
|
717
853
|
return ColorBondByOverlapScore(self.colorg, self.tag).__disown__()
|
|
718
854
|
|
|
719
855
|
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
def __init__(self, pandas_obj: pd.DataFrame):
|
|
723
|
-
self._obj = pandas_obj
|
|
724
|
-
self._tag = oechem.OEGetTag("fingerprint_overlap")
|
|
725
|
-
|
|
726
|
-
def __call__(
|
|
727
|
-
self,
|
|
728
|
-
molecule_column: str,
|
|
729
|
-
ref: oechem.OEMolBase | None = None,
|
|
730
|
-
*,
|
|
731
|
-
tanimoto_column="fingerprint_tanimoto",
|
|
732
|
-
reference_similarity_column="reference_similarity",
|
|
733
|
-
target_similarity_column="target_similarity",
|
|
734
|
-
fptype: str = "tree",
|
|
735
|
-
num_bits: int = 4096,
|
|
736
|
-
min_distance: int = 0,
|
|
737
|
-
max_distance: int = 4,
|
|
738
|
-
atom_type: str | int = oegraphsim.OEFPAtomType_DefaultTreeAtom,
|
|
739
|
-
bond_type: str | int = oegraphsim.OEFPBondType_DefaultTreeBond,
|
|
740
|
-
inplace: bool = False
|
|
741
|
-
) -> pd.DataFrame:
|
|
742
|
-
"""
|
|
743
|
-
Color molecules by fingerprint similarity
|
|
744
|
-
:param ref: Reference molecule
|
|
745
|
-
:param fptype: Fingerprint type
|
|
746
|
-
:param num_bits: Number of bits in the fingerprint
|
|
747
|
-
:param min_distance: Minimum distance/radius for path/circular/tree
|
|
748
|
-
:param max_distance: Maximum distance/radius for path/circular/tree
|
|
749
|
-
:param atom_type: Atom type string delimited by "|" OR int bitmask from the oegraphsim.OEFPAtomType_ namespace
|
|
750
|
-
:param bond_type: Bond type string delimited by "|" OR int bitmask from the oegraphsim.OEFPBondType_ namespace
|
|
751
|
-
:return:
|
|
752
|
-
"""
|
|
753
|
-
# Preprocess
|
|
754
|
-
df = self._obj if inplace else self._obj.copy()
|
|
856
|
+
# Store the fingerprint tag for fingerprint_similarity
|
|
857
|
+
_fingerprint_overlap_tag = oechem.OEGetTag("fingerprint_overlap")
|
|
755
858
|
|
|
756
|
-
if molecule_column not in df.columns:
|
|
757
|
-
raise KeyError(f'Molecule column not found in DataFrame: {molecule_column}')
|
|
758
859
|
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
860
|
+
def _dataframe_fingerprint_similarity(
|
|
861
|
+
self,
|
|
862
|
+
molecule_column: str,
|
|
863
|
+
ref: oechem.OEMolBase | None = None,
|
|
864
|
+
*,
|
|
865
|
+
tanimoto_column="fingerprint_tanimoto",
|
|
866
|
+
reference_similarity_column="reference_similarity",
|
|
867
|
+
target_similarity_column="target_similarity",
|
|
868
|
+
fptype: str = "tree",
|
|
869
|
+
num_bits: int = 4096,
|
|
870
|
+
min_distance: int = 0,
|
|
871
|
+
max_distance: int = 4,
|
|
872
|
+
atom_type: str | int = oegraphsim.OEFPAtomType_DefaultTreeAtom,
|
|
873
|
+
bond_type: str | int = oegraphsim.OEFPBondType_DefaultTreeBond,
|
|
874
|
+
inplace: bool = False
|
|
875
|
+
) -> pd.DataFrame:
|
|
876
|
+
"""
|
|
877
|
+
Color molecules by fingerprint similarity
|
|
878
|
+
:param molecule_column: Name of the molecule column
|
|
879
|
+
:param ref: Reference molecule
|
|
880
|
+
:param tanimoto_column: Name of the tanimoto column
|
|
881
|
+
:param reference_similarity_column: Name of the reference similarity column
|
|
882
|
+
:param target_similarity_column: Name of the target similarity column
|
|
883
|
+
:param fptype: Fingerprint type
|
|
884
|
+
:param num_bits: Number of bits in the fingerprint
|
|
885
|
+
:param min_distance: Minimum distance/radius for path/circular/tree
|
|
886
|
+
:param max_distance: Maximum distance/radius for path/circular/tree
|
|
887
|
+
:param atom_type: Atom type string delimited by "|" OR int bitmask from the oegraphsim.OEFPAtomType_ namespace
|
|
888
|
+
:param bond_type: Bond type string delimited by "|" OR int bitmask from the oegraphsim.OEFPBondType_ namespace
|
|
889
|
+
:param inplace: Modify the DataFrame in place
|
|
890
|
+
:return: DataFrame with similarity columns
|
|
891
|
+
"""
|
|
892
|
+
tag = _fingerprint_overlap_tag
|
|
762
893
|
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
arr = self._obj[molecule_column].array
|
|
766
|
-
assert isinstance(arr, oepd.MoleculeArray)
|
|
767
|
-
ctx = get_series_context(arr.metadata)
|
|
768
|
-
|
|
769
|
-
# If we're using the first molecule as our reference
|
|
770
|
-
if ref is None:
|
|
771
|
-
for mol in arr: # type: oechem.OEMol
|
|
772
|
-
if mol.IsValid():
|
|
773
|
-
ref = mol
|
|
774
|
-
break
|
|
775
|
-
else:
|
|
776
|
-
log.warning(f'No valid reference molecules to use for alignment in column {molecule_column}')
|
|
777
|
-
return df
|
|
894
|
+
# Preprocess
|
|
895
|
+
df = self._obj if inplace else self._obj.copy()
|
|
778
896
|
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
log.warning("Reference molecule is not valid")
|
|
782
|
-
return df
|
|
897
|
+
if molecule_column not in df.columns:
|
|
898
|
+
raise KeyError(f'Molecule column not found in DataFrame: {molecule_column}')
|
|
783
899
|
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
num_bits=num_bits,
|
|
788
|
-
min_distance=min_distance,
|
|
789
|
-
max_distance=max_distance,
|
|
790
|
-
atom_type=atom_type,
|
|
791
|
-
bond_type=bond_type
|
|
792
|
-
)
|
|
900
|
+
if not isinstance(df[molecule_column].dtype, oepd.MoleculeDtype):
|
|
901
|
+
raise TypeError("Column {} does not have dtype oepd.MoleculeDtype ({})".format(
|
|
902
|
+
molecule_column, str(df[molecule_column].dtype)))
|
|
793
903
|
|
|
794
|
-
|
|
795
|
-
|
|
904
|
+
# Get the context
|
|
905
|
+
arr = self._obj[molecule_column].array
|
|
906
|
+
assert isinstance(arr, oepd.MoleculeArray)
|
|
907
|
+
ctx = get_series_context(arr.metadata)
|
|
796
908
|
|
|
797
|
-
|
|
798
|
-
|
|
909
|
+
# If we're using the first molecule as our reference
|
|
910
|
+
if ref is None:
|
|
911
|
+
for mol in arr: # type: oechem.OEMol
|
|
912
|
+
if mol.IsValid():
|
|
913
|
+
ref = mol
|
|
914
|
+
break
|
|
915
|
+
else:
|
|
916
|
+
log.warning(f'No valid reference molecules to use for alignment in column {molecule_column}')
|
|
799
917
|
return df
|
|
800
918
|
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
919
|
+
# Check reference molecule
|
|
920
|
+
if not ref.IsValid():
|
|
921
|
+
log.warning("Reference molecule is not valid")
|
|
922
|
+
return df
|
|
804
923
|
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
924
|
+
# Fingerprint maker
|
|
925
|
+
make_fp = fingerprint_maker(
|
|
926
|
+
fptype=fptype,
|
|
927
|
+
num_bits=num_bits,
|
|
928
|
+
min_distance=min_distance,
|
|
929
|
+
max_distance=max_distance,
|
|
930
|
+
atom_type=atom_type,
|
|
931
|
+
bond_type=bond_type
|
|
932
|
+
)
|
|
933
|
+
|
|
934
|
+
# Make the reference fingerprint
|
|
935
|
+
ref_fp = make_fp(ref)
|
|
936
|
+
|
|
937
|
+
if not ref_fp.IsValid():
|
|
938
|
+
log.warning("Fingerprint from reference molecule is invalid")
|
|
939
|
+
return df
|
|
808
940
|
|
|
809
|
-
|
|
810
|
-
|
|
941
|
+
# Create the display objects
|
|
942
|
+
ref_displays = []
|
|
943
|
+
targ_displays = []
|
|
811
944
|
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
945
|
+
# FIXME: See now below regarding the fact we have to cache the reference and target molecule copies
|
|
946
|
+
ref_molecules = []
|
|
947
|
+
targ_molecules = []
|
|
815
948
|
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
ref_mol = oechem.OEMol(ref)
|
|
949
|
+
tanimotos = []
|
|
950
|
+
index = []
|
|
819
951
|
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
952
|
+
for idx, mol in df[molecule_column].items(): # type: Hashable, oechem.OEMol
|
|
953
|
+
index.append(idx)
|
|
954
|
+
if mol is not None and mol.IsValid():
|
|
823
955
|
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
956
|
+
# Copy the molecules, because we're modifying them
|
|
957
|
+
targ_mol = oechem.OEMol(mol)
|
|
958
|
+
ref_mol = oechem.OEMol(ref)
|
|
827
959
|
|
|
828
|
-
|
|
829
|
-
|
|
960
|
+
# FIXME: See now below regarding the fact we have to cache the reference and target molecule copies
|
|
961
|
+
targ_molecules.append(targ_mol)
|
|
962
|
+
ref_molecules.append(ref_mol)
|
|
830
963
|
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
964
|
+
# Create the fingerprint
|
|
965
|
+
targ_fp = make_fp(targ_mol)
|
|
966
|
+
if targ_fp.IsValid():
|
|
834
967
|
|
|
835
|
-
|
|
836
|
-
|
|
968
|
+
# Add the tanimoto
|
|
969
|
+
tanimotos.append(oegraphsim.OETanimoto(ref_fp, targ_fp))
|
|
837
970
|
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
for bond in match.GetTargetBonds():
|
|
842
|
-
targ_bonds[bond.GetIdx()] += 1
|
|
971
|
+
# Calculate the similarity
|
|
972
|
+
targ_bonds = oechem.OEUIntArray(targ_mol.GetMaxBondIdx())
|
|
973
|
+
ref_bonds = oechem.OEUIntArray(ref_mol.GetMaxBondIdx())
|
|
843
974
|
|
|
844
|
-
|
|
845
|
-
|
|
975
|
+
# Overlaps
|
|
976
|
+
overlaps = oegraphsim.OEGetFPOverlap(ref_mol, targ_mol, ref_fp.GetFPTypeBase())
|
|
846
977
|
|
|
847
|
-
|
|
848
|
-
|
|
978
|
+
for match in overlaps:
|
|
979
|
+
for bond in match.GetPatternBonds():
|
|
980
|
+
ref_bonds[bond.GetIdx()] += 1
|
|
981
|
+
for bond in match.GetTargetBonds():
|
|
982
|
+
targ_bonds[bond.GetIdx()] += 1
|
|
849
983
|
|
|
850
|
-
|
|
851
|
-
|
|
984
|
+
for bond in targ_mol.GetBonds():
|
|
985
|
+
bond.SetData(tag, targ_bonds[bond.GetIdx()])
|
|
852
986
|
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
colorg.AddStop(oechem.OEColorStop(0.0, oechem.OEPinkTint))
|
|
856
|
-
colorg.AddStop(oechem.OEColorStop(1.0, oechem.OEYellow))
|
|
857
|
-
colorg.AddStop(oechem.OEColorStop(maxvalue, oechem.OEDarkGreen))
|
|
987
|
+
for bond in ref_mol.GetBonds():
|
|
988
|
+
bond.SetData(tag, ref_bonds[bond.GetIdx()])
|
|
858
989
|
|
|
859
|
-
|
|
860
|
-
|
|
990
|
+
# noinspection PyTypeChecker
|
|
991
|
+
maxvalue = max((0, max(targ_bonds), max(ref_bonds)))
|
|
861
992
|
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
993
|
+
# Create the color gradient
|
|
994
|
+
colorg = oechem.OELinearColorGradient()
|
|
995
|
+
colorg.AddStop(oechem.OEColorStop(0.0, oechem.OEPinkTint))
|
|
996
|
+
colorg.AddStop(oechem.OEColorStop(1.0, oechem.OEYellow))
|
|
997
|
+
colorg.AddStop(oechem.OEColorStop(maxvalue, oechem.OEDarkGreen))
|
|
865
998
|
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
targ_disp = oemol_to_disp(targ_mol, ctx=ctx)
|
|
999
|
+
# Function that will color the bonds
|
|
1000
|
+
bondglyph = ColorBondByOverlapScore(colorg, tag)
|
|
869
1001
|
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
1002
|
+
# Align the molecules
|
|
1003
|
+
oedepict.OEPrepareDepiction(ref_mol, False)
|
|
1004
|
+
oedepict.OEPrepareDepiction(targ_mol, False)
|
|
873
1005
|
|
|
874
|
-
|
|
875
|
-
|
|
1006
|
+
overlaps = oegraphsim.OEGetFPOverlap(ref_mol, targ_mol, ref_fp.GetFPTypeBase())
|
|
1007
|
+
oedepict.OEPrepareMultiAlignedDepiction(targ_mol, ref_mol, overlaps)
|
|
876
1008
|
|
|
877
|
-
#
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
1009
|
+
# Create the displays
|
|
1010
|
+
ref_disp = oemol_to_disp(ref_mol, ctx=ctx)
|
|
1011
|
+
targ_disp = oemol_to_disp(targ_mol, ctx=ctx)
|
|
1012
|
+
|
|
1013
|
+
# Color the displays
|
|
1014
|
+
oegrapheme.OEAddGlyph(ref_disp, bondglyph, oechem.IsTrueBond())
|
|
1015
|
+
oegrapheme.OEAddGlyph(targ_disp, bondglyph, oechem.IsTrueBond())
|
|
1016
|
+
|
|
1017
|
+
ref_displays.append(ref_disp)
|
|
1018
|
+
targ_displays.append(targ_disp)
|
|
881
1019
|
|
|
882
|
-
#
|
|
1020
|
+
# Fingerprint was invalid
|
|
883
1021
|
else:
|
|
884
1022
|
ref_displays.append(None)
|
|
885
1023
|
targ_displays.append(None)
|
|
886
1024
|
|
|
887
|
-
#
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
dtype=float
|
|
892
|
-
)
|
|
1025
|
+
# Molecule was invalid
|
|
1026
|
+
else:
|
|
1027
|
+
ref_displays.append(None)
|
|
1028
|
+
targ_displays.append(None)
|
|
893
1029
|
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
1030
|
+
# Add the columns
|
|
1031
|
+
df[tanimoto_column] = pd.Series(
|
|
1032
|
+
tanimotos,
|
|
1033
|
+
index=index,
|
|
1034
|
+
dtype=float
|
|
1035
|
+
)
|
|
899
1036
|
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
1037
|
+
# FIXME: Submitted to OpenEye as Case #00037423
|
|
1038
|
+
# We need to keep the copies of the molecules that we made above, or they will be garbage collected
|
|
1039
|
+
# and the OE2DMolDisplay objects will segfault. We'll keep those in the metadata now for the arrays.
|
|
1040
|
+
ref_arr = oepd.DisplayArray(ref_displays, metadata={"molecules": ref_molecules})
|
|
1041
|
+
targ_arr = oepd.DisplayArray(targ_displays, metadata={"molecules": targ_molecules})
|
|
905
1042
|
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
1043
|
+
df[reference_similarity_column] = pd.Series(
|
|
1044
|
+
ref_arr,
|
|
1045
|
+
index=shallow_copy(index),
|
|
1046
|
+
dtype=oepd.DisplayDtype()
|
|
1047
|
+
)
|
|
911
1048
|
|
|
912
|
-
|
|
1049
|
+
df[target_similarity_column] = pd.Series(
|
|
1050
|
+
targ_arr,
|
|
1051
|
+
index=shallow_copy(index),
|
|
1052
|
+
dtype=oepd.DisplayDtype()
|
|
1053
|
+
)
|
|
1054
|
+
|
|
1055
|
+
return df
|
|
1056
|
+
|
|
1057
|
+
|
|
1058
|
+
########################################################################################################################
|
|
1059
|
+
# Monkey-patch CNotebook methods onto OEPandas accessors
|
|
1060
|
+
########################################################################################################################
|
|
1061
|
+
|
|
1062
|
+
# Import the OEPandas accessor classes
|
|
1063
|
+
from oepandas.pandas_extensions import OESeriesAccessor, OEDataFrameAccessor
|
|
1064
|
+
|
|
1065
|
+
# Add cnotebook methods to Series accessor
|
|
1066
|
+
OESeriesAccessor.highlight = _series_highlight
|
|
1067
|
+
OESeriesAccessor.recalculate_depiction_coordinates = _series_recalculate_depiction_coordinates
|
|
1068
|
+
OESeriesAccessor.reset_depictions = _series_reset_depictions
|
|
1069
|
+
OESeriesAccessor.clear_formatting_rules = _series_clear_formatting_rules
|
|
1070
|
+
OESeriesAccessor.align_depictions = _series_align_depictions
|
|
1071
|
+
|
|
1072
|
+
# Add cnotebook methods to DataFrame accessor
|
|
1073
|
+
OEDataFrameAccessor.recalculate_depiction_coordinates = _dataframe_recalculate_depiction_coordinates
|
|
1074
|
+
OEDataFrameAccessor.reset_depictions = _dataframe_reset_depictions
|
|
1075
|
+
OEDataFrameAccessor.clear_formatting_rules = _dataframe_clear_formatting_rules
|
|
1076
|
+
OEDataFrameAccessor.copy_molecules = _dataframe_copy_molecules
|
|
1077
|
+
OEDataFrameAccessor.highlight = _dataframe_highlight
|
|
1078
|
+
OEDataFrameAccessor.highlight_using_column = _dataframe_highlight_using_column
|
|
1079
|
+
OEDataFrameAccessor.fingerprint_similarity = _dataframe_fingerprint_similarity
|
|
1080
|
+
|
|
1081
|
+
|
|
1082
|
+
########################################################################################################################
|
|
1083
|
+
# MolGrid accessor methods for Series and DataFrame
|
|
1084
|
+
########################################################################################################################
|
|
1085
|
+
|
|
1086
|
+
def _series_molgrid(
|
|
1087
|
+
self,
|
|
1088
|
+
title: bool | str | None = True,
|
|
1089
|
+
tooltip_fields: list = None,
|
|
1090
|
+
**kwargs
|
|
1091
|
+
):
|
|
1092
|
+
"""Display molecules in an interactive grid.
|
|
1093
|
+
|
|
1094
|
+
:param title: Title display mode. True uses molecule's title, a string
|
|
1095
|
+
specifies a field name, None/False hides titles.
|
|
1096
|
+
:param tooltip_fields: Fields for tooltip.
|
|
1097
|
+
:param kwargs: Additional arguments passed to MolGrid.
|
|
1098
|
+
:returns: MolGrid instance.
|
|
1099
|
+
"""
|
|
1100
|
+
from cnotebook import MolGrid
|
|
1101
|
+
|
|
1102
|
+
series = self._obj
|
|
1103
|
+
mols = list(series)
|
|
1104
|
+
|
|
1105
|
+
# Check if series is part of a DataFrame
|
|
1106
|
+
df = None
|
|
1107
|
+
# noinspection PyProtectedMember
|
|
1108
|
+
if hasattr(series, '_cacher') and series._cacher is not None:
|
|
1109
|
+
try:
|
|
1110
|
+
# noinspection PyProtectedMember
|
|
1111
|
+
df = series._cacher[1]()
|
|
1112
|
+
except (TypeError, KeyError):
|
|
1113
|
+
pass
|
|
1114
|
+
|
|
1115
|
+
return MolGrid(
|
|
1116
|
+
mols,
|
|
1117
|
+
dataframe=df,
|
|
1118
|
+
mol_col=series.name,
|
|
1119
|
+
title=title,
|
|
1120
|
+
tooltip_fields=tooltip_fields,
|
|
1121
|
+
**kwargs
|
|
1122
|
+
)
|
|
1123
|
+
|
|
1124
|
+
|
|
1125
|
+
def _dataframe_molgrid(
|
|
1126
|
+
self,
|
|
1127
|
+
mol_col: str,
|
|
1128
|
+
title: bool | str | None = True,
|
|
1129
|
+
tooltip_fields: list = None,
|
|
1130
|
+
**kwargs
|
|
1131
|
+
):
|
|
1132
|
+
"""Display molecules from a column in an interactive grid.
|
|
1133
|
+
|
|
1134
|
+
:param mol_col: Column containing molecules.
|
|
1135
|
+
:param title: Title display mode. True uses molecule's title, a string
|
|
1136
|
+
specifies a field name, None/False hides titles.
|
|
1137
|
+
:param tooltip_fields: Columns for tooltip.
|
|
1138
|
+
:param kwargs: Additional arguments passed to MolGrid.
|
|
1139
|
+
:returns: MolGrid instance.
|
|
1140
|
+
"""
|
|
1141
|
+
from cnotebook import MolGrid
|
|
1142
|
+
|
|
1143
|
+
df = self._obj
|
|
1144
|
+
mols = list(df[mol_col])
|
|
1145
|
+
|
|
1146
|
+
return MolGrid(
|
|
1147
|
+
mols,
|
|
1148
|
+
dataframe=df,
|
|
1149
|
+
mol_col=mol_col,
|
|
1150
|
+
title=title,
|
|
1151
|
+
tooltip_fields=tooltip_fields,
|
|
1152
|
+
**kwargs
|
|
1153
|
+
)
|
|
1154
|
+
|
|
1155
|
+
|
|
1156
|
+
# Add molgrid methods to accessors
|
|
1157
|
+
OESeriesAccessor.molgrid = _series_molgrid
|
|
1158
|
+
OEDataFrameAccessor.molgrid = _dataframe_molgrid
|