cnotebook 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cnotebook/__init__.py +104 -0
- cnotebook/align.py +390 -0
- cnotebook/context.py +491 -0
- cnotebook/helpers.py +69 -0
- cnotebook/ipython_ext.py +255 -0
- cnotebook/marimo_ext.py +34 -0
- cnotebook/pandas_ext.py +900 -0
- cnotebook/render.py +198 -0
- cnotebook-1.0.1.dist-info/METADATA +275 -0
- cnotebook-1.0.1.dist-info/RECORD +13 -0
- cnotebook-1.0.1.dist-info/WHEEL +5 -0
- cnotebook-1.0.1.dist-info/licenses/LICENSE +21 -0
- cnotebook-1.0.1.dist-info/top_level.txt +1 -0
cnotebook/pandas_ext.py
ADDED
|
@@ -0,0 +1,900 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import logging
|
|
3
|
+
import typing
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import oepandas as oepd
|
|
6
|
+
from pandas.api.extensions import register_dataframe_accessor, register_series_accessor
|
|
7
|
+
from typing import Iterable, Any, Literal, Hashable
|
|
8
|
+
from openeye import oechem, oedepict, oegraphsim, oegrapheme
|
|
9
|
+
from copy import copy as shallow_copy
|
|
10
|
+
from .context import pass_cnotebook_context, get_series_context
|
|
11
|
+
from .helpers import escape_brackets, create_structure_highlighter
|
|
12
|
+
from .align import create_aligner, fingerprint_maker
|
|
13
|
+
from .render import (
|
|
14
|
+
CNotebookContext,
|
|
15
|
+
oemol_to_disp,
|
|
16
|
+
oedisp_to_html,
|
|
17
|
+
render_invalid_molecule,
|
|
18
|
+
render_empty_molecule
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# Only register iPython formatters if that is present
|
|
22
|
+
try:
|
|
23
|
+
# noinspection PyProtectedMember,PyPackageRequirements
|
|
24
|
+
from IPython import get_ipython
|
|
25
|
+
ipython_present = True
|
|
26
|
+
except ModuleNotFoundError:
|
|
27
|
+
ipython_present = False
|
|
28
|
+
|
|
29
|
+
if typing.TYPE_CHECKING:
|
|
30
|
+
from .context import CNotebookContext
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
SMARTS_DELIMITER_RE = re.compile(r'\s*[|\r\n\t]+\s*')
|
|
34
|
+
|
|
35
|
+
log = logging.getLogger("cnotebook")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def create_mol_formatter(*, ctx: CNotebookContext) -> typing.Callable[[oechem.OEMolBase], str]:
|
|
39
|
+
"""
|
|
40
|
+
Closure that creates a function that renders an OEMol to HTML
|
|
41
|
+
:param ctx: CNotebook rendering context
|
|
42
|
+
:return: Function that renders molecules to HTML
|
|
43
|
+
"""
|
|
44
|
+
def _oemol_to_html(mol: oechem.OEMolBase):
|
|
45
|
+
if isinstance(mol, oechem.OEMolBase):
|
|
46
|
+
|
|
47
|
+
# Render valid molecules
|
|
48
|
+
if mol.IsValid():
|
|
49
|
+
# Create the display object
|
|
50
|
+
disp = oemol_to_disp(mol, ctx=ctx)
|
|
51
|
+
|
|
52
|
+
# Apply display callbacks
|
|
53
|
+
if ctx.callbacks is not None:
|
|
54
|
+
for callback in ctx.callbacks:
|
|
55
|
+
callback(disp)
|
|
56
|
+
|
|
57
|
+
# Render into the string stream
|
|
58
|
+
return oedisp_to_html(disp)
|
|
59
|
+
|
|
60
|
+
# Empty molecule
|
|
61
|
+
elif mol.NumAtoms() == 0:
|
|
62
|
+
return render_empty_molecule(ctx=ctx)
|
|
63
|
+
|
|
64
|
+
# Invalid molecule
|
|
65
|
+
else:
|
|
66
|
+
return render_invalid_molecule(ctx=ctx)
|
|
67
|
+
|
|
68
|
+
return str(mol)
|
|
69
|
+
|
|
70
|
+
return _oemol_to_html
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@pass_cnotebook_context
|
|
74
|
+
def create_disp_formatter(
|
|
75
|
+
*,
|
|
76
|
+
callbacks: list[typing.Callable[[oedepict.OE2DMolDisplay], None]] | None = None,
|
|
77
|
+
ctx: CNotebookContext
|
|
78
|
+
) -> typing.Callable[[oedepict.OE2DMolDisplay], str]:
|
|
79
|
+
"""
|
|
80
|
+
Closure that creates a function that renders an OEMol to HTML
|
|
81
|
+
:param ctx: Render context
|
|
82
|
+
:param callbacks: List of callbacks to modify the rendering of the molecule
|
|
83
|
+
:return: Function that renders molecules to HTML
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
def _oedisp_to_html(disp: oedepict.OE2DMolDisplay) -> str:
|
|
87
|
+
|
|
88
|
+
if isinstance(disp, oedepict.OE2DMolDisplay) and disp.IsValid():
|
|
89
|
+
# Copy the display, as not to modify the original with callbacks
|
|
90
|
+
# TODO: Update with ctx
|
|
91
|
+
disp_to_render = oedepict.OE2DMolDisplay(disp)
|
|
92
|
+
|
|
93
|
+
# Apply display callbacks
|
|
94
|
+
if callbacks is not None:
|
|
95
|
+
for callback in callbacks:
|
|
96
|
+
callback(disp_to_render)
|
|
97
|
+
|
|
98
|
+
return oedisp_to_html(disp_to_render, ctx=ctx)
|
|
99
|
+
return str(disp)
|
|
100
|
+
|
|
101
|
+
return _oedisp_to_html
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def escape_formatter(obj: Any) -> str:
|
|
105
|
+
return escape_brackets(str(obj))
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def render_dataframe(
|
|
109
|
+
df: pd.DataFrame,
|
|
110
|
+
formatters: dict | None = None,
|
|
111
|
+
col_space: dict[str, float | int] | None = None,
|
|
112
|
+
**kwargs
|
|
113
|
+
) -> str:
|
|
114
|
+
"""
|
|
115
|
+
Render a DataFrame with molecules
|
|
116
|
+
:param df: DataFrame to render
|
|
117
|
+
:param formatters: Custom formatters for displaying columns
|
|
118
|
+
:param col_space: Custom column spacing
|
|
119
|
+
:param kwargs: Additional keyword arguments for DataFrame.to_html
|
|
120
|
+
:return: HTML of rendered DataFrame
|
|
121
|
+
"""
|
|
122
|
+
# Defaults are empty dictionaries for these
|
|
123
|
+
formatters = formatters or {}
|
|
124
|
+
col_space = col_space or {}
|
|
125
|
+
|
|
126
|
+
# Render columns with MoleculeDtype
|
|
127
|
+
molecule_columns = set()
|
|
128
|
+
|
|
129
|
+
for col in df.columns:
|
|
130
|
+
if isinstance(df.dtypes[col], oepd.MoleculeDtype):
|
|
131
|
+
molecule_columns.add(col)
|
|
132
|
+
|
|
133
|
+
# We need to copy both the DataFrame and the molecules, because we modify them in-place to render them
|
|
134
|
+
df = df.copy()
|
|
135
|
+
|
|
136
|
+
for col in molecule_columns:
|
|
137
|
+
# Direct assignment to help IDE understand this is a MoleculeArray
|
|
138
|
+
arr = df[col].array
|
|
139
|
+
assert isinstance(arr, oepd.MoleculeArray)
|
|
140
|
+
df[col] = pd.Series(arr.deepcopy(), index=df[col].index, dtype=oepd.MoleculeDtype())
|
|
141
|
+
|
|
142
|
+
# ---------------------------------------------------
|
|
143
|
+
# Molecule columns
|
|
144
|
+
# ---------------------------------------------------
|
|
145
|
+
|
|
146
|
+
if len(molecule_columns) > 0:
|
|
147
|
+
log.debug(f'Detected molecule columns: {", ".join(molecule_columns)}')
|
|
148
|
+
|
|
149
|
+
# Create formatters for each column
|
|
150
|
+
for col in molecule_columns:
|
|
151
|
+
|
|
152
|
+
# Create the formatter for this column
|
|
153
|
+
if col in formatters:
|
|
154
|
+
log.warning(f'Overwriting existing formatter for {col} with a molecule formatter')
|
|
155
|
+
|
|
156
|
+
# Direct assignment to help IDE understand this is a MoleculeArray
|
|
157
|
+
arr = df[col].array
|
|
158
|
+
assert isinstance(arr, oepd.MoleculeArray)
|
|
159
|
+
|
|
160
|
+
# Get the cnotebook options for this column
|
|
161
|
+
ctx = get_series_context(arr.metadata)
|
|
162
|
+
|
|
163
|
+
formatters[col] = create_mol_formatter(ctx=ctx)
|
|
164
|
+
|
|
165
|
+
# Record the column width
|
|
166
|
+
if col in col_space:
|
|
167
|
+
log.warning(f'Column spacing for {col} already defined by overwriting with molecule image width')
|
|
168
|
+
|
|
169
|
+
col_space[col] = float(ctx.width)
|
|
170
|
+
|
|
171
|
+
# ---------------------------------------------------
|
|
172
|
+
# Display columns
|
|
173
|
+
# ---------------------------------------------------
|
|
174
|
+
|
|
175
|
+
# Render columns with DisplayDtype
|
|
176
|
+
display_columns = set()
|
|
177
|
+
|
|
178
|
+
for col in df.columns:
|
|
179
|
+
if isinstance(df.dtypes[col], oepd.DisplayDtype):
|
|
180
|
+
display_columns.add(col)
|
|
181
|
+
|
|
182
|
+
if len(display_columns) > 0:
|
|
183
|
+
log.debug(f'Detected display columns: {", ".join(display_columns)}')
|
|
184
|
+
|
|
185
|
+
for col in display_columns:
|
|
186
|
+
|
|
187
|
+
# Get the underlying display array
|
|
188
|
+
# Direct assignment to help IDE understand this is a DisplayArray
|
|
189
|
+
arr = df[col].array
|
|
190
|
+
assert isinstance(arr, oepd.DisplayArray)
|
|
191
|
+
|
|
192
|
+
# Get column metadata
|
|
193
|
+
ctx = get_series_context(arr.metadata)
|
|
194
|
+
|
|
195
|
+
formatters[col] = create_disp_formatter(ctx=ctx)
|
|
196
|
+
|
|
197
|
+
if len(arr) > 0:
|
|
198
|
+
col_space[col] = max(disp.GetWidth() for disp in arr if isinstance(disp, oedepict.OE2DMolDisplay))
|
|
199
|
+
col_space[col] = max(0, col_space[col])
|
|
200
|
+
else:
|
|
201
|
+
col_space[col] = 0
|
|
202
|
+
|
|
203
|
+
# ---------------------------------------------------
|
|
204
|
+
# All other columns
|
|
205
|
+
# ---------------------------------------------------
|
|
206
|
+
|
|
207
|
+
for col in df.columns:
|
|
208
|
+
if col not in display_columns and col not in molecule_columns:
|
|
209
|
+
formatters[col] = escape_formatter
|
|
210
|
+
|
|
211
|
+
return df.to_html(escape=False, formatters=formatters, col_space=col_space, **kwargs)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
########################################################################################################################
|
|
215
|
+
# Register Pandas formatters
|
|
216
|
+
########################################################################################################################
|
|
217
|
+
|
|
218
|
+
if ipython_present:
|
|
219
|
+
|
|
220
|
+
def register_pandas_formatters():
|
|
221
|
+
"""
|
|
222
|
+
Modify how the notebook is told how to display Pandas Dataframes - this actually is more flexible because it
|
|
223
|
+
will still work with other custom changes to to_html().
|
|
224
|
+
|
|
225
|
+
Note: Calls to this function are idempotent.
|
|
226
|
+
"""
|
|
227
|
+
ipython_instance = get_ipython()
|
|
228
|
+
|
|
229
|
+
if ipython_instance is not None:
|
|
230
|
+
html_formatter = ipython_instance.display_formatter.formatters['text/html']
|
|
231
|
+
try:
|
|
232
|
+
formatter = html_formatter.lookup(pd.DataFrame)
|
|
233
|
+
if formatter is not render_dataframe:
|
|
234
|
+
html_formatter.for_type(pd.DataFrame, render_dataframe)
|
|
235
|
+
except KeyError:
|
|
236
|
+
html_formatter.for_type(pd.DataFrame, render_dataframe)
|
|
237
|
+
else:
|
|
238
|
+
log.debug("[cnotebook] iPython installed but not in use - cannot register pandas extension")
|
|
239
|
+
|
|
240
|
+
else:
|
|
241
|
+
|
|
242
|
+
# iPython is not present, so we do not register a Pandas formatter
|
|
243
|
+
def register_pandas_formatters():
|
|
244
|
+
pass
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
########################################################################################################################
|
|
248
|
+
# Series accessors
|
|
249
|
+
########################################################################################################################
|
|
250
|
+
|
|
251
|
+
@register_series_accessor("highlight")
|
|
252
|
+
class SeriesHighlightAccessor:
|
|
253
|
+
def __init__(self, pandas_obj: pd.Series):
|
|
254
|
+
if not isinstance(pandas_obj.dtype, oepd.MoleculeDtype):
|
|
255
|
+
raise TypeError(
|
|
256
|
+
"subsearch only works on molecule columns (oepandas.MoleculeDtype). If this column has "
|
|
257
|
+
"molecules, use pd.Series.as_molecule to convert to a molecule column first."
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
self._obj = pandas_obj
|
|
261
|
+
|
|
262
|
+
def __call__(
|
|
263
|
+
self,
|
|
264
|
+
pattern: Iterable[str] | str | oechem.OESubSearch | Iterable[oechem.OESubSearch],
|
|
265
|
+
*,
|
|
266
|
+
color: oechem.OEColor = oechem.OEColor(oechem.OELightBlue),
|
|
267
|
+
style: int = oedepict.OEHighlightStyle_Stick,
|
|
268
|
+
ref: oechem.OESubSearch | oechem.OEMCSSearch | oechem.OEQMol | Literal["first"] | oechem.OEMolBase | None = None, # noqa
|
|
269
|
+
method: Literal["ss", "substructure", "mcss", "fp", "fingerprint"] | None = None
|
|
270
|
+
) -> None:
|
|
271
|
+
"""
|
|
272
|
+
Highlight chemical features in a structure
|
|
273
|
+
|
|
274
|
+
The pattern argument can be:
|
|
275
|
+
- SMARTS pattern
|
|
276
|
+
- oechem.OESubSearch or oechem.OEMCSSearch object
|
|
277
|
+
- Iterable of SMARTS patterns, oechem.OESubSearch, and/or oechem.OEMCSSearch objects
|
|
278
|
+
|
|
279
|
+
:param pattern: Pattern(s) to highlight in the molecule
|
|
280
|
+
:param color: Highlight color
|
|
281
|
+
:param style: Highlight style
|
|
282
|
+
:return: Callback to highlight the pattern(s) in the molecule
|
|
283
|
+
"""
|
|
284
|
+
# Get the molecule array
|
|
285
|
+
# Direct assignment to help IDE understand this is a MoleculeArray
|
|
286
|
+
arr = self._obj.array
|
|
287
|
+
assert isinstance(arr, oepd.MoleculeArray)
|
|
288
|
+
|
|
289
|
+
# Get / create a series context and save it (because we are modifying it locally)
|
|
290
|
+
ctx = get_series_context(arr.metadata, save=True)
|
|
291
|
+
|
|
292
|
+
# ********************************************************************************
|
|
293
|
+
# Highlighting
|
|
294
|
+
# ********************************************************************************
|
|
295
|
+
|
|
296
|
+
# Case: Pattern is a single SMARTS string or oechem.OESubSearch object
|
|
297
|
+
if isinstance(pattern, (str, oechem.OESubSearch, oechem.OEMCSSearch, oechem.OEQMol)):
|
|
298
|
+
ctx.add_callback(
|
|
299
|
+
create_structure_highlighter(
|
|
300
|
+
query=pattern,
|
|
301
|
+
color=color,
|
|
302
|
+
style=style
|
|
303
|
+
)
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
# Case: Pattern is an iterable
|
|
307
|
+
elif isinstance(pattern, Iterable):
|
|
308
|
+
for element in pattern:
|
|
309
|
+
|
|
310
|
+
# Element is a SMARTS string or oechem.OESubSearch object
|
|
311
|
+
if isinstance(element, (str, oechem.OESubSearch, oechem.OEMCSSearch, oechem.OEQMol)):
|
|
312
|
+
ctx.add_callback(
|
|
313
|
+
create_structure_highlighter(
|
|
314
|
+
query=element,
|
|
315
|
+
color=color,
|
|
316
|
+
style=style
|
|
317
|
+
)
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
# Unknown element
|
|
321
|
+
else:
|
|
322
|
+
raise TypeError(f'Do not know how to add molecule highlight for type {type(element).__name__}')
|
|
323
|
+
|
|
324
|
+
# Case: Pattern is an unknown type
|
|
325
|
+
else:
|
|
326
|
+
raise TypeError(f'Do not know how to add molecule highlight for type {type(pattern).__name__}')
|
|
327
|
+
|
|
328
|
+
# ********************************************************************************
|
|
329
|
+
# Alignment
|
|
330
|
+
# ********************************************************************************
|
|
331
|
+
|
|
332
|
+
if ref is not None:
|
|
333
|
+
self._obj.align_depictions(ref=ref, method=method)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
@register_series_accessor("recalculate_depiction_coordinates")
|
|
337
|
+
class SeriesRecalculateDepictionCoordinatesAccessor:
|
|
338
|
+
def __init__(self, pandas_obj: pd.Series):
|
|
339
|
+
if not isinstance(pandas_obj.dtype, oepd.MoleculeDtype):
|
|
340
|
+
raise TypeError(
|
|
341
|
+
"recalculate_depiction_coordinates only works on molecule columns (oepandas.MoleculeDtype). If this "
|
|
342
|
+
"column has molecules, use pd.Series.as_molecule to convert to a molecule column first."
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
self._obj = pandas_obj
|
|
346
|
+
|
|
347
|
+
def __call__(
|
|
348
|
+
self,
|
|
349
|
+
*,
|
|
350
|
+
clear_coords: bool = True,
|
|
351
|
+
add_depction_hydrogens: bool = True,
|
|
352
|
+
perceive_bond_stereo: bool = True,
|
|
353
|
+
suppress_explicit_hydrogens: bool = True,
|
|
354
|
+
orientation: int = oedepict.OEDepictOrientation_Default
|
|
355
|
+
) -> None:
|
|
356
|
+
"""
|
|
357
|
+
Recalculate the depictions for a molecule series.
|
|
358
|
+
|
|
359
|
+
See the following link for more information:
|
|
360
|
+
https://docs.eyesopen.com/toolkits/python/depicttk/OEDepictClasses/OEPrepareDepictionOptions.html
|
|
361
|
+
|
|
362
|
+
:param clear_coords: Clear existing 2D coordinates
|
|
363
|
+
:param add_depction_hydrogens: Add explicit depiction hydrogens for faithful stereo depiction, etc.
|
|
364
|
+
:param perceive_bond_stereo: Perceive wedge/hash bond stereo
|
|
365
|
+
:param suppress_explicit_hydrogens: Suppress explicit hydrogens
|
|
366
|
+
:param orientation: Preferred 2D orientation
|
|
367
|
+
"""
|
|
368
|
+
# Create the depiction options
|
|
369
|
+
opts = oedepict.OEPrepareDepictionOptions()
|
|
370
|
+
opts.SetClearCoords(clear_coords)
|
|
371
|
+
opts.SetAddDepictionHydrogens(add_depction_hydrogens)
|
|
372
|
+
|
|
373
|
+
for mol in self._obj.array:
|
|
374
|
+
if isinstance(mol, oechem.OEMolBase):
|
|
375
|
+
oedepict.OEPrepareDepiction(mol, opts)
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
@register_series_accessor("reset_depictions")
|
|
379
|
+
class SeriesResetDepictionsAccessor:
|
|
380
|
+
def __init__(self, pandas_obj: pd.Series):
|
|
381
|
+
self._obj = pandas_obj
|
|
382
|
+
|
|
383
|
+
def __call__(self) -> None:
|
|
384
|
+
"""
|
|
385
|
+
Reset depiction callbacks for a molecule series
|
|
386
|
+
"""
|
|
387
|
+
# Check if array has metadata attribute (should be true for oepandas arrays)
|
|
388
|
+
if hasattr(self._obj.array, "metadata"):
|
|
389
|
+
# Direct assignment to help IDE understand this has metadata
|
|
390
|
+
arr = self._obj.array
|
|
391
|
+
assert isinstance(arr, oepd.MoleculeArray)
|
|
392
|
+
_ = arr.metadata.pop("cnotebook", None)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
@register_series_accessor("align_depictions")
|
|
396
|
+
class SeriesAlignDepictionsAccessor:
|
|
397
|
+
def __init__(self, pandas_obj: pd.Series):
|
|
398
|
+
if not isinstance(pandas_obj.dtype, oepd.MoleculeDtype):
|
|
399
|
+
raise TypeError(
|
|
400
|
+
"align_depictions only works on molecule columns (oepandas.MoleculeDtype). If this "
|
|
401
|
+
"column has molecules, use pd.Series.as_molecule to convert to a molecule column first."
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
self._obj = pandas_obj
|
|
405
|
+
|
|
406
|
+
def __call__(
|
|
407
|
+
self,
|
|
408
|
+
ref: oechem.OESubSearch | oechem.OEMCSSearch | oechem.OEMolBase | oechem.OEQMol | Literal["first"],
|
|
409
|
+
method: Literal["substructure", "ss", "mcss", "fp", "fingerprint"] | None = None,
|
|
410
|
+
**kwargs
|
|
411
|
+
) -> None:
|
|
412
|
+
"""
|
|
413
|
+
Align the 2D coordinates of molecules
|
|
414
|
+
:param align: Alignment reference
|
|
415
|
+
:param kwargs: Keyword arguments for aligner
|
|
416
|
+
:return: Aligned molecule depictions
|
|
417
|
+
"""
|
|
418
|
+
# Get the rendering context for creating the displays
|
|
419
|
+
|
|
420
|
+
# TODO: Maybe do this smarter so that you know if the context is column-level, which means you could copy that
|
|
421
|
+
# context into the new DisplayArray that you'll create below? Or even link the contexts?
|
|
422
|
+
|
|
423
|
+
# Direct assignment to help IDE understand this is a MoleculeArray
|
|
424
|
+
arr = self._obj.array
|
|
425
|
+
assert isinstance(arr, oepd.MoleculeArray)
|
|
426
|
+
|
|
427
|
+
if isinstance(ref, str) and ref == "first":
|
|
428
|
+
for mol in arr:
|
|
429
|
+
if mol is not None and mol.IsValid():
|
|
430
|
+
ref = mol.CreateCopy()
|
|
431
|
+
break
|
|
432
|
+
else:
|
|
433
|
+
log.warning("No valid molecule found in series for depiction alignment")
|
|
434
|
+
return
|
|
435
|
+
|
|
436
|
+
# Suppress alignment warnings (there are lots of needless warnings)
|
|
437
|
+
level = oechem.OEThrow.GetLevel()
|
|
438
|
+
oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Error)
|
|
439
|
+
|
|
440
|
+
# noinspection PyBroadException
|
|
441
|
+
try:
|
|
442
|
+
# Create the aligner
|
|
443
|
+
aligner = create_aligner(ref=ref, method=method)
|
|
444
|
+
|
|
445
|
+
for mol in arr:
|
|
446
|
+
_ = aligner(mol)
|
|
447
|
+
|
|
448
|
+
except Exception:
|
|
449
|
+
# We don't care if the aligners fail - it just results in unaligned structures (NBD)
|
|
450
|
+
pass
|
|
451
|
+
|
|
452
|
+
# Restore OEThrow
|
|
453
|
+
finally:
|
|
454
|
+
oechem.OEThrow.SetLevel(level)
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
########################################################################################################################
|
|
458
|
+
# DataFrame accessors
|
|
459
|
+
########################################################################################################################
|
|
460
|
+
|
|
461
|
+
@register_dataframe_accessor("recalculate_depiction_coordinates")
|
|
462
|
+
class SeriesRecalculateDepictionCoordinatesAccessor:
|
|
463
|
+
def __init__(self, pandas_obj: pd.DataFrame):
|
|
464
|
+
self._obj = pandas_obj
|
|
465
|
+
|
|
466
|
+
def __call__(
|
|
467
|
+
self,
|
|
468
|
+
*,
|
|
469
|
+
molecule_columns: str | Iterable[str] | None = None,
|
|
470
|
+
clear_coords: bool = True,
|
|
471
|
+
add_depction_hydrogens: bool = True,
|
|
472
|
+
perceive_bond_stereo: bool = True,
|
|
473
|
+
suppress_explicit_hydrogens: bool = True,
|
|
474
|
+
orientation: int = oedepict.OEDepictOrientation_Default
|
|
475
|
+
) -> None:
|
|
476
|
+
"""
|
|
477
|
+
Recalculate the depictions for a one or more molecule series in a DataFrame. If molecule_columns is None,
|
|
478
|
+
which is the default, then all molecule columns will have their depictions recalculated
|
|
479
|
+
|
|
480
|
+
See the following link for more information:
|
|
481
|
+
https://docs.eyesopen.com/toolkits/python/depicttk/OEDepictClasses/OEPrepareDepictionOptions.html
|
|
482
|
+
|
|
483
|
+
:param molecule_columns: Optional molecule column(s) to have depictions recalculated
|
|
484
|
+
:param clear_coords: Clear existing 2D coordinates
|
|
485
|
+
:param add_depction_hydrogens: Add explicit depiction hydrogens for faithful stereo depiction, etc.
|
|
486
|
+
:param perceive_bond_stereo: Perceive wedge/hash bond stereo
|
|
487
|
+
:param suppress_explicit_hydrogens: Suppress explicit hydrogens
|
|
488
|
+
:param orientation: Preferred 2D orientation
|
|
489
|
+
"""
|
|
490
|
+
if molecule_columns is None:
|
|
491
|
+
molecule_columns = set()
|
|
492
|
+
|
|
493
|
+
for col in self._obj.columns:
|
|
494
|
+
if isinstance(self._obj.dtypes[col], oepd.MoleculeDtype):
|
|
495
|
+
molecule_columns.add(col)
|
|
496
|
+
|
|
497
|
+
elif isinstance(molecule_columns, str):
|
|
498
|
+
molecule_columns = {molecule_columns}
|
|
499
|
+
|
|
500
|
+
else:
|
|
501
|
+
molecule_columns = set(molecule_columns)
|
|
502
|
+
|
|
503
|
+
# Recalculate the column depictions
|
|
504
|
+
for col in molecule_columns:
|
|
505
|
+
|
|
506
|
+
if col in self._obj.columns:
|
|
507
|
+
if isinstance(self._obj.dtypes[col], oepd.MoleculeDtype):
|
|
508
|
+
self._obj[col].recalculate_depiction_coordinates(
|
|
509
|
+
clear_coords=clear_coords,
|
|
510
|
+
add_depction_hydrogens=add_depction_hydrogens,
|
|
511
|
+
perceive_bond_stereo=perceive_bond_stereo,
|
|
512
|
+
suppress_explicit_hydrogens=suppress_explicit_hydrogens,
|
|
513
|
+
orientation=orientation
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
else:
|
|
517
|
+
log.warning(f'Column {col} does not have a MoleculeDtype')
|
|
518
|
+
|
|
519
|
+
else:
|
|
520
|
+
log.warning(f'{col} not found in DataFrame columns: ({", ".join(self._obj.columns)})')
|
|
521
|
+
molecule_columns.remove(col)
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
@register_dataframe_accessor("reset_depictions")
|
|
525
|
+
class SeriesResetDepictionsAccessor:
|
|
526
|
+
def __init__(self, pandas_obj: pd.DataFrame):
|
|
527
|
+
self._obj = pandas_obj
|
|
528
|
+
|
|
529
|
+
def __call__(self, *, molecule_columns: str | Iterable[str] | None = None) -> None:
|
|
530
|
+
"""
|
|
531
|
+
Reset depiction callbacks for one or more columns
|
|
532
|
+
"""
|
|
533
|
+
columns = set()
|
|
534
|
+
if molecule_columns is None:
|
|
535
|
+
columns.update(self._obj.columns)
|
|
536
|
+
|
|
537
|
+
elif isinstance(molecule_columns, str):
|
|
538
|
+
columns.add(molecule_columns)
|
|
539
|
+
|
|
540
|
+
else:
|
|
541
|
+
columns.update(molecule_columns)
|
|
542
|
+
|
|
543
|
+
# Filter invalid and non-molecule columns
|
|
544
|
+
for col in filter(
|
|
545
|
+
lambda c: c in self._obj.columns and isinstance(self._obj[c].dtype, oepd.MoleculeDtype),
|
|
546
|
+
columns
|
|
547
|
+
):
|
|
548
|
+
self._obj[col].reset_depictions()
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
@register_dataframe_accessor("highlight_using_column")
|
|
552
|
+
class HighlightUsingColumnAccessor:
|
|
553
|
+
def __init__(self, pandas_obj: pd.DataFrame):
|
|
554
|
+
self._obj = pandas_obj
|
|
555
|
+
|
|
556
|
+
def __call__(
|
|
557
|
+
self,
|
|
558
|
+
molecule_column: str,
|
|
559
|
+
pattern_column: str,
|
|
560
|
+
*,
|
|
561
|
+
highlighted_column: str = "highlighted_substructures",
|
|
562
|
+
ref: oechem.OESubSearch | oechem.OEMCSSearch | oechem.OEMolBase | None = None,
|
|
563
|
+
alignment_opts: oedepict.OEAlignmentOptions | None = None,
|
|
564
|
+
prepare_opts: oedepict.OEPrepareDepictionOptions | None = None,
|
|
565
|
+
inplace: bool = False
|
|
566
|
+
) -> pd.DataFrame:
|
|
567
|
+
"""
|
|
568
|
+
Highlight molecules based on the value of another column. The column produced is a DisplayArray column, so
|
|
569
|
+
the results are not suitable for other molecular calculations.
|
|
570
|
+
|
|
571
|
+
The other column can contain:
|
|
572
|
+
- Comma or whitespace delimited string of SMARTS patterns
|
|
573
|
+
- oechem.OESubSearch or oechem.OEMCSSearch object
|
|
574
|
+
- Iterable of SMARTS patterns, oechem.OESubSearch, and/or oechem.OEMCSSearch objects
|
|
575
|
+
|
|
576
|
+
:param molecule_column: Name of the molecule column
|
|
577
|
+
:param pattern_column: Name of the pattern column
|
|
578
|
+
:param highlighted_column: Optional name of the column with highlighted structures
|
|
579
|
+
:param ref: Optional reference for aligning depictions
|
|
580
|
+
:param alignment_opts: Optional depiction alignment options (oedepict.OEAlignmentOptions)
|
|
581
|
+
:param prepare_opts: Optional depiction preparation options (oedepict.OEPrepareDepictionOptions)
|
|
582
|
+
:param inplace: Modify the DataFrame in place
|
|
583
|
+
:return: Modified DataFrame
|
|
584
|
+
"""
|
|
585
|
+
# Object we are operating on
|
|
586
|
+
df = self._obj if inplace else self._obj.copy()
|
|
587
|
+
|
|
588
|
+
if molecule_column not in df.columns:
|
|
589
|
+
raise KeyError(f'{molecule_column} not found in DataFrame columns: ({", ".join(df.columns)}')
|
|
590
|
+
|
|
591
|
+
if not isinstance(df[molecule_column].dtype, oepd.MoleculeDtype):
|
|
592
|
+
raise TypeError(
|
|
593
|
+
f"highlight_using_column only works on molecule columns (oepandas.MoleculeDtype). If {molecule_column}"
|
|
594
|
+
" has molecules, use pd.Series.as_molecule to convert to a molecule column first."
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
if pattern_column not in df.columns:
|
|
598
|
+
raise KeyError(f'{pattern_column} not found in DataFrame columns: ({", ".join(df.columns)}')
|
|
599
|
+
|
|
600
|
+
# Create the display objects
|
|
601
|
+
indexes = []
|
|
602
|
+
displays = []
|
|
603
|
+
|
|
604
|
+
# Get the rendering context for creating the displays
|
|
605
|
+
# TODO: Maybe do this smarter so that you know if the context is column-level, which means you could copy that
|
|
606
|
+
# context into the new DisplayArray that you'll create below? Or even link the contexts?
|
|
607
|
+
# Direct assignment to help IDE understand this is a MoleculeArray
|
|
608
|
+
arr = df[molecule_column].array
|
|
609
|
+
assert isinstance(arr, oepd.MoleculeArray)
|
|
610
|
+
ctx = get_series_context(arr.metadata)
|
|
611
|
+
|
|
612
|
+
for idx, row in df.iterrows():
|
|
613
|
+
indexes.append(idx)
|
|
614
|
+
|
|
615
|
+
mol = row[molecule_column]
|
|
616
|
+
if isinstance(mol, oechem.OEMolBase):
|
|
617
|
+
|
|
618
|
+
# Create the display
|
|
619
|
+
disp = oemol_to_disp(mol, ctx=ctx)
|
|
620
|
+
|
|
621
|
+
# Highlight
|
|
622
|
+
substructures = []
|
|
623
|
+
patterns = row[pattern_column]
|
|
624
|
+
|
|
625
|
+
# Parse different patterns
|
|
626
|
+
if isinstance(patterns, str):
|
|
627
|
+
for pattern in re.split(SMARTS_DELIMITER_RE, patterns):
|
|
628
|
+
ss = oechem.OESubSearch(pattern)
|
|
629
|
+
if ss.IsValid():
|
|
630
|
+
substructures.append(ss)
|
|
631
|
+
|
|
632
|
+
elif isinstance(patterns, oechem.OESubSearch):
|
|
633
|
+
if patterns.IsValid():
|
|
634
|
+
substructures.append(patterns)
|
|
635
|
+
|
|
636
|
+
elif isinstance(patterns, Iterable):
|
|
637
|
+
|
|
638
|
+
for p in patterns:
|
|
639
|
+
|
|
640
|
+
if isinstance(p, str):
|
|
641
|
+
for pattern in re.split(SMARTS_DELIMITER_RE, p):
|
|
642
|
+
ss = oechem.OESubSearch(pattern)
|
|
643
|
+
if ss.IsValid():
|
|
644
|
+
substructures.append(ss)
|
|
645
|
+
|
|
646
|
+
elif isinstance(p, oechem.OESubSearch):
|
|
647
|
+
if p.IsValid():
|
|
648
|
+
substructures.append(p)
|
|
649
|
+
|
|
650
|
+
else:
|
|
651
|
+
log.warning(f'Do not know how to highlight using: {type(p).__name__}')
|
|
652
|
+
|
|
653
|
+
else:
|
|
654
|
+
log.warning(f'Do not know how to highlight using: {type(patterns).__name__}')
|
|
655
|
+
|
|
656
|
+
# Apply substructure highlights
|
|
657
|
+
highlight = oedepict.OEHighlightOverlayByBallAndStick(oechem.OEGetLightColors())
|
|
658
|
+
|
|
659
|
+
for ss in substructures:
|
|
660
|
+
oedepict.OEAddHighlightOverlay(disp, highlight, ss.Match(mol, True))
|
|
661
|
+
|
|
662
|
+
displays.append(disp)
|
|
663
|
+
|
|
664
|
+
else:
|
|
665
|
+
displays.append(None)
|
|
666
|
+
|
|
667
|
+
df[highlighted_column] = pd.Series(displays, index=indexes, dtype=oepd.DisplayDtype())
|
|
668
|
+
return df
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
class ColorBondByOverlapScore(oegrapheme.OEBondGlyphBase):
|
|
672
|
+
"""
|
|
673
|
+
Color molecule by bond overlap score:
|
|
674
|
+
https://docs.eyesopen.com/toolkits/cookbook/python/depiction/simcalc.html
|
|
675
|
+
"""
|
|
676
|
+
def __init__(self, cg, tag):
|
|
677
|
+
oegrapheme.OEBondGlyphBase.__init__(self)
|
|
678
|
+
self.colorg = cg
|
|
679
|
+
self.tag = tag
|
|
680
|
+
|
|
681
|
+
# noinspection PyPep8Naming
|
|
682
|
+
def RenderGlyph(self, disp, bond):
|
|
683
|
+
|
|
684
|
+
bdisp = disp.GetBondDisplay(bond)
|
|
685
|
+
if bdisp is None or not bdisp.IsVisible():
|
|
686
|
+
return False
|
|
687
|
+
|
|
688
|
+
if not bond.HasData(self.tag):
|
|
689
|
+
return False
|
|
690
|
+
|
|
691
|
+
linewidth = disp.GetScale() / 3.0
|
|
692
|
+
color = self.colorg.GetColorAt(bond.GetData(self.tag))
|
|
693
|
+
pen = oedepict.OEPen(color, color, oedepict.OEFill_Off, linewidth)
|
|
694
|
+
|
|
695
|
+
adispB = disp.GetAtomDisplay(bond.GetBgn())
|
|
696
|
+
adispE = disp.GetAtomDisplay(bond.GetEnd())
|
|
697
|
+
|
|
698
|
+
layer = disp.GetLayer(oedepict.OELayerPosition_Below)
|
|
699
|
+
layer.DrawLine(adispB.GetCoords(), adispE.GetCoords(), pen)
|
|
700
|
+
|
|
701
|
+
return True
|
|
702
|
+
|
|
703
|
+
# noinspection PyPep8Naming
|
|
704
|
+
def ColorBondByOverlapScore(self):
|
|
705
|
+
return ColorBondByOverlapScore(self.colorg, self.tag).__disown__()
|
|
706
|
+
|
|
707
|
+
|
|
708
|
+
@register_dataframe_accessor("fingerprint_similarity")
|
|
709
|
+
class FingerprintSimilaritySeriesAccessor:
|
|
710
|
+
def __init__(self, pandas_obj: pd.DataFrame):
|
|
711
|
+
self._obj = pandas_obj
|
|
712
|
+
self._tag = oechem.OEGetTag("fingerprint_overlap")
|
|
713
|
+
|
|
714
|
+
def __call__(
|
|
715
|
+
self,
|
|
716
|
+
molecule_column: str,
|
|
717
|
+
ref: oechem.OEMolBase | None = None,
|
|
718
|
+
*,
|
|
719
|
+
tanimoto_column="fingerprint_tanimoto",
|
|
720
|
+
reference_similarity_column="reference_similarity",
|
|
721
|
+
target_similarity_column="target_similarity",
|
|
722
|
+
fptype: str = "tree",
|
|
723
|
+
num_bits: int = 4096,
|
|
724
|
+
min_distance: int = 0,
|
|
725
|
+
max_distance: int = 4,
|
|
726
|
+
atom_type: str | int = oegraphsim.OEFPAtomType_DefaultTreeAtom,
|
|
727
|
+
bond_type: str | int = oegraphsim.OEFPBondType_DefaultTreeBond,
|
|
728
|
+
inplace: bool = False
|
|
729
|
+
) -> pd.DataFrame:
|
|
730
|
+
"""
|
|
731
|
+
Color molecules by fingerprint similarity
|
|
732
|
+
:param ref: Reference molecule
|
|
733
|
+
:param fptype: Fingerprint type
|
|
734
|
+
:param num_bits: Number of bits in the fingerprint
|
|
735
|
+
:param min_distance: Minimum distance/radius for path/circular/tree
|
|
736
|
+
:param max_distance: Maximum distance/radius for path/circular/tree
|
|
737
|
+
:param atom_type: Atom type string delimited by "|" OR int bitmask from the oegraphsim.OEFPAtomType_ namespace
|
|
738
|
+
:param bond_type: Bond type string delimited by "|" OR int bitmask from the oegraphsim.OEFPBondType_ namespace
|
|
739
|
+
:return:
|
|
740
|
+
"""
|
|
741
|
+
# Preprocess
|
|
742
|
+
df = self._obj if inplace else self._obj.copy()
|
|
743
|
+
|
|
744
|
+
if molecule_column not in df.columns:
|
|
745
|
+
raise KeyError(f'Molecule column not found in DataFrame: {molecule_column}')
|
|
746
|
+
|
|
747
|
+
if not isinstance(df[molecule_column].dtype, oepd.MoleculeDtype):
|
|
748
|
+
raise TypeError("Column {} does not have dtype oepd.MoleculeDtype ({})".format(
|
|
749
|
+
molecule_column, str(df[molecule_column].dtype)))
|
|
750
|
+
|
|
751
|
+
# Get the context
|
|
752
|
+
# Direct assignment to help IDE understand this is a MoleculeArray
|
|
753
|
+
arr = self._obj[molecule_column].array
|
|
754
|
+
assert isinstance(arr, oepd.MoleculeArray)
|
|
755
|
+
ctx = get_series_context(arr.metadata)
|
|
756
|
+
|
|
757
|
+
# If we're using the first molecule as our reference
|
|
758
|
+
if ref is None:
|
|
759
|
+
for mol in arr: # type: oechem.OEMol
|
|
760
|
+
if mol.IsValid():
|
|
761
|
+
ref = mol
|
|
762
|
+
break
|
|
763
|
+
else:
|
|
764
|
+
log.warning(f'No valid reference molecules to use for alignment in column {molecule_column}')
|
|
765
|
+
return df
|
|
766
|
+
|
|
767
|
+
# Check reference molecule
|
|
768
|
+
if not ref.IsValid():
|
|
769
|
+
log.warning("Reference molecule is not valid")
|
|
770
|
+
return df
|
|
771
|
+
|
|
772
|
+
# Fingerprint maker
|
|
773
|
+
make_fp = fingerprint_maker(
|
|
774
|
+
fptype=fptype,
|
|
775
|
+
num_bits=num_bits,
|
|
776
|
+
min_distance=min_distance,
|
|
777
|
+
max_distance=max_distance,
|
|
778
|
+
atom_type=atom_type,
|
|
779
|
+
bond_type=bond_type
|
|
780
|
+
)
|
|
781
|
+
|
|
782
|
+
# Make the reference fingerprint
|
|
783
|
+
ref_fp = make_fp(ref)
|
|
784
|
+
|
|
785
|
+
if not ref_fp.IsValid():
|
|
786
|
+
log.warning("Fingerprint from reference molecule is invalid")
|
|
787
|
+
return df
|
|
788
|
+
|
|
789
|
+
# Create the display objects
|
|
790
|
+
ref_displays = []
|
|
791
|
+
targ_displays = []
|
|
792
|
+
|
|
793
|
+
# FIXME: See now below regarding the fact we have to cache the reference and target molecule copies
|
|
794
|
+
ref_molecules = []
|
|
795
|
+
targ_molecules = []
|
|
796
|
+
|
|
797
|
+
tanimotos = []
|
|
798
|
+
index = []
|
|
799
|
+
|
|
800
|
+
for idx, mol in df[molecule_column].items(): # type: Hashable, oechem.OEMol
|
|
801
|
+
index.append(idx)
|
|
802
|
+
if mol is not None and mol.IsValid():
|
|
803
|
+
|
|
804
|
+
# Copy the molecules, because we're modifying them
|
|
805
|
+
targ_mol = oechem.OEMol(mol)
|
|
806
|
+
ref_mol = oechem.OEMol(ref)
|
|
807
|
+
|
|
808
|
+
# FIXME: See now below regarding the fact we have to cache the reference and target molecule copies
|
|
809
|
+
targ_molecules.append(targ_mol)
|
|
810
|
+
ref_molecules.append(ref_mol)
|
|
811
|
+
|
|
812
|
+
# Create the fingerprint
|
|
813
|
+
targ_fp = make_fp(targ_mol)
|
|
814
|
+
if targ_fp.IsValid():
|
|
815
|
+
|
|
816
|
+
# Add the tanimoto
|
|
817
|
+
tanimotos.append(oegraphsim.OETanimoto(ref_fp, targ_fp))
|
|
818
|
+
|
|
819
|
+
# Calculate the similarity
|
|
820
|
+
targ_bonds = oechem.OEUIntArray(targ_mol.GetMaxBondIdx())
|
|
821
|
+
ref_bonds = oechem.OEUIntArray(ref_mol.GetMaxBondIdx())
|
|
822
|
+
|
|
823
|
+
# Overlaps
|
|
824
|
+
overlaps = oegraphsim.OEGetFPOverlap(ref_mol, targ_mol, ref_fp.GetFPTypeBase())
|
|
825
|
+
|
|
826
|
+
for match in overlaps:
|
|
827
|
+
for bond in match.GetPatternBonds():
|
|
828
|
+
ref_bonds[bond.GetIdx()] += 1
|
|
829
|
+
for bond in match.GetTargetBonds():
|
|
830
|
+
targ_bonds[bond.GetIdx()] += 1
|
|
831
|
+
|
|
832
|
+
for bond in targ_mol.GetBonds():
|
|
833
|
+
bond.SetData(self._tag, targ_bonds[bond.GetIdx()])
|
|
834
|
+
|
|
835
|
+
for bond in ref_mol.GetBonds():
|
|
836
|
+
bond.SetData(self._tag, ref_bonds[bond.GetIdx()])
|
|
837
|
+
|
|
838
|
+
# noinspection PyTypeChecker
|
|
839
|
+
maxvalue = max((0, max(targ_bonds), max(ref_bonds)))
|
|
840
|
+
|
|
841
|
+
# Create the color gradient
|
|
842
|
+
colorg = oechem.OELinearColorGradient()
|
|
843
|
+
colorg.AddStop(oechem.OEColorStop(0.0, oechem.OEPinkTint))
|
|
844
|
+
colorg.AddStop(oechem.OEColorStop(1.0, oechem.OEYellow))
|
|
845
|
+
colorg.AddStop(oechem.OEColorStop(maxvalue, oechem.OEDarkGreen))
|
|
846
|
+
|
|
847
|
+
# Function that will color the bonds
|
|
848
|
+
bondglyph = ColorBondByOverlapScore(colorg, self._tag)
|
|
849
|
+
|
|
850
|
+
# Align the molecules
|
|
851
|
+
overlaps = oegraphsim.OEGetFPOverlap(ref_mol, targ_mol, ref_fp.GetFPTypeBase())
|
|
852
|
+
oedepict.OEPrepareMultiAlignedDepiction(targ_mol, ref_mol, overlaps)
|
|
853
|
+
|
|
854
|
+
# Create the displays
|
|
855
|
+
ref_disp = oemol_to_disp(ref_mol, ctx=ctx)
|
|
856
|
+
targ_disp = oemol_to_disp(targ_mol, ctx=ctx)
|
|
857
|
+
|
|
858
|
+
# Color the displays
|
|
859
|
+
oegrapheme.OEAddGlyph(ref_disp, bondglyph, oechem.IsTrueBond())
|
|
860
|
+
oegrapheme.OEAddGlyph(targ_disp, bondglyph, oechem.IsTrueBond())
|
|
861
|
+
|
|
862
|
+
ref_displays.append(ref_disp)
|
|
863
|
+
targ_displays.append(targ_disp)
|
|
864
|
+
|
|
865
|
+
# Fingerprint was invalid
|
|
866
|
+
else:
|
|
867
|
+
ref_displays.append(None)
|
|
868
|
+
targ_displays.append(None)
|
|
869
|
+
|
|
870
|
+
# Molecule was invalid
|
|
871
|
+
else:
|
|
872
|
+
ref_displays.append(None)
|
|
873
|
+
targ_displays.append(None)
|
|
874
|
+
|
|
875
|
+
# Add the columns
|
|
876
|
+
df[tanimoto_column] = pd.Series(
|
|
877
|
+
tanimotos,
|
|
878
|
+
index=index,
|
|
879
|
+
dtype=float
|
|
880
|
+
)
|
|
881
|
+
|
|
882
|
+
# FIXME: Submitted to OpenEye as Case #00037423
|
|
883
|
+
# We need to keep the copies of the molecules that we made above, or they will be garbage collected
|
|
884
|
+
# and the OE2DMolDisplay objects will segfault. We'll keep those in the metadata now for the arrays.
|
|
885
|
+
ref_arr = oepd.DisplayArray(ref_displays, metadata={"molecules": ref_molecules})
|
|
886
|
+
targ_arr = oepd.DisplayArray(targ_displays, metadata={"molecules": targ_molecules})
|
|
887
|
+
|
|
888
|
+
df[reference_similarity_column] = pd.Series(
|
|
889
|
+
ref_arr,
|
|
890
|
+
index=shallow_copy(index),
|
|
891
|
+
dtype=oepd.DisplayDtype()
|
|
892
|
+
)
|
|
893
|
+
|
|
894
|
+
df[target_similarity_column] = pd.Series(
|
|
895
|
+
targ_arr,
|
|
896
|
+
index=shallow_copy(index),
|
|
897
|
+
dtype=oepd.DisplayDtype()
|
|
898
|
+
)
|
|
899
|
+
|
|
900
|
+
return df
|