datamapplot 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ from datamapplot.create_plots import create_plot, create_interactive_plot
2
+ from datamapplot.plot_rendering import render_plot
3
+ from datamapplot.interactive_rendering import render_html
4
+ from importlib.metadata import version, PackageNotFoundError
5
+
6
+ try:
7
+ __version__ = version("datamapplot")
8
+ except PackageNotFoundError:
9
+ __version__ = "0.3-dev"
10
+
11
+ __all__ = ["create_plot", "create_interactive_plot", "render_plot", "render_html"]
@@ -0,0 +1,80 @@
1
+ import numpy as np
2
+ import numba
3
+
4
+ from scipy.interpolate import splprep, splev
5
+
6
+
7
+ @numba.njit()
8
+ def circumradius(points):
9
+ bc = points[1:] - points[0]
10
+ d = 2 * (bc[0, 0] * bc[1, 1] - bc[0, 1] * bc[1, 0])
11
+ b_norm = bc[0, 0] * bc[0, 0] + bc[0, 1] * bc[0, 1]
12
+ c_norm = bc[1, 0] * bc[1, 0] + bc[1, 1] * bc[1, 1]
13
+ ux = (bc[1, 1] * b_norm - bc[0, 1] * c_norm) / d
14
+ uy = (bc[0, 0] * c_norm - bc[1, 0] * b_norm) / d
15
+ return np.sqrt(ux * ux + uy * uy)
16
+
17
+
18
+ def create_boundary_polygons(points, simplices, alpha=0.1):
19
+ all_edges = set([(np.int32(0), np.int32(0)) for i in range(0)])
20
+ boundary = set([(np.int32(0), np.int32(0)) for i in range(0)])
21
+ for simplex in simplices:
22
+ if circumradius(points[simplex]) < alpha:
23
+ for e in (
24
+ (simplex[0], simplex[1]),
25
+ (simplex[0], simplex[2]),
26
+ (simplex[1], simplex[2]),
27
+ ):
28
+ if e[0] < e[1]:
29
+ if (e[0], e[1]) not in all_edges:
30
+ all_edges.add((e[0], e[1]))
31
+ boundary.add((e[0], e[1]))
32
+ else:
33
+ boundary.remove((e[0], e[1]))
34
+ else:
35
+ if (e[1], e[0]) not in all_edges:
36
+ all_edges.add((e[1], e[0]))
37
+ boundary.add((e[1], e[0]))
38
+ else:
39
+ boundary.remove((e[1], e[0]))
40
+
41
+ polygons = []
42
+ search_set = boundary.copy()
43
+ sequence = list(search_set.pop())
44
+ while len(search_set) > 0:
45
+ to_find = sequence[-1]
46
+ for link in search_set:
47
+ if link[0] == to_find:
48
+ sequence.append(link[1])
49
+ search_set.remove(link)
50
+ break
51
+ elif link[1] == to_find:
52
+ sequence.append(link[0])
53
+ search_set.remove(link)
54
+ break
55
+ else:
56
+ polygons.append(sequence.copy())
57
+ sequence = list(search_set.pop())
58
+
59
+ polygons.append(sequence)
60
+
61
+ result = [
62
+ np.empty((len(sequence) + 1, 2), dtype=np.float32) for sequence in polygons
63
+ ]
64
+ for s, sequence in enumerate(polygons):
65
+ for i, n in enumerate(sequence):
66
+ result[s][i] = points[n]
67
+ result[s][-1] = points[sequence[0]]
68
+
69
+ return result
70
+
71
+
72
+ def smooth_polygon(p, point_multipler=4, spline_coeff=0.0001):
73
+ dist = np.sqrt(np.sum((p[:-2] - p[1:-1]) ** 2, axis=1))
74
+ dist_along = np.concatenate(([0], dist.cumsum()))
75
+ spline, u = splprep(p[:-1].T, u=dist_along, s=spline_coeff, per=True)
76
+
77
+ interp_d = np.linspace(dist_along[0], dist_along[-1], len(p) * point_multipler)
78
+ interp_x, interp_y = splev(interp_d, spline)
79
+
80
+ return np.vstack([interp_x, interp_y]).T
@@ -0,0 +1,579 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import textwrap
4
+
5
+ from matplotlib import pyplot as plt
6
+ from matplotlib.colors import to_rgb
7
+
8
+ from datamapplot.palette_handling import (
9
+ palette_from_datamap,
10
+ palette_from_cmap_and_datamap,
11
+ deep_palette,
12
+ pastel_palette,
13
+ )
14
+ from datamapplot.plot_rendering import render_plot
15
+ from datamapplot.medoids import medoid
16
+ from datamapplot.interactive_rendering import (
17
+ render_html,
18
+ label_text_and_polygon_dataframes,
19
+ InteractiveFigure,
20
+ )
21
+
22
+
23
+ def create_plot(
24
+ data_map_coords,
25
+ labels=None,
26
+ *,
27
+ title=None,
28
+ sub_title=None,
29
+ noise_label="Unlabelled",
30
+ noise_color="#999999",
31
+ color_label_text=True,
32
+ color_label_arrows=False,
33
+ label_wrap_width=16,
34
+ label_color_map=None,
35
+ figsize=(12, 12),
36
+ dynamic_label_size=False,
37
+ dpi=plt.rcParams["figure.dpi"],
38
+ force_matplotlib=False,
39
+ darkmode=False,
40
+ highlight_labels=None,
41
+ palette_hue_shift=0.0,
42
+ palette_hue_radius_dependence=1.0,
43
+ use_medoids=False,
44
+ cmap=None,
45
+ marker_color_array=None,
46
+ **render_plot_kwds,
47
+ ):
48
+ """Create a static plot from ``data_map_coords`` with text labels provided by ``labels``.
49
+ This is the primary function for DataMapPlot and provides the easiest interface to the
50
+ static plotting functionality. This function provides a number of options, but also
51
+ passes any further keyword options through to the lower level ``render_plot`` function
52
+ so be sure to check the documentation for ``render_plot`` to discover further keyword
53
+ arguments that can be used here as well.
54
+
55
+ Parameters
56
+ ----------
57
+ data_map_coords: ndarray of floats of shape (n_samples, 2)
58
+ The 2D coordinates for the data map. Usually this is produced via a
59
+ dimension reduction technique such as UMAP, t-SNE, PacMAP, PyMDE etc.
60
+
61
+ labels: ndarray of strings (object) of shape (n_samples,)
62
+ A string label each data point in the data map. There should ideally by
63
+ only up to 64 unique labels. Noise or unlabelled points should have the
64
+ same label as ``noise_label``, which is "Unlabelled" by default.
65
+
66
+ title: str or None (optional, default=None)
67
+ A title for the plot. If ``None`` then no title is used for the plot.
68
+ The title should be succint; three to seven words.
69
+
70
+ sub_title: str or None (optional, default=None)
71
+ A sub-title for the plot. If ``None`` then no sub-title is used for the plot.
72
+ The sub-title can be significantly longer then the title and provide more information\
73
+ about the plot and data sources.
74
+
75
+ noise_label: str (optional, default="Unlabelled")
76
+ The string used in the ``labels`` array to identify the unlabelled or noise points
77
+ in the dataset.
78
+
79
+ noise_color: str (optional, default="#999999")
80
+ The colour to use for unlabelled or noise points in the data map. This should usually
81
+ be a muted or neutral colour to distinguish background points from the labelled clusters.
82
+
83
+ color_label_text: bool (optional, default=True)
84
+ Whether to use colours for the text labels generated in the plot. If ``False`` then
85
+ the text labels will default to either black or white depending on ``darkmode``.
86
+
87
+ color_label_arrows: bool (optional, default=True)
88
+ Whether to use colours for the arrows between the text labels and clusters. If ``False``
89
+ then the arrows will default to either black or white depending on ``darkmode``.
90
+
91
+ label_wrap_width: int (optional, default=16)
92
+ The number of characters to apply text-wrapping at when creating text labels for
93
+ display in the plot. Note that long words will not be broken, so you can choose
94
+ relatively small values if you want tight text-wrapping.
95
+
96
+ label_color_map: dict or None (optional, default=None)
97
+ A colour mapping to use to colour points/clusters in the data map. The mapping should
98
+ be keyed by the unique cluster labels in ``labels`` and take values that are hex-string
99
+ representations of colours. If ``None`` then a colour mapping will be auto-generated.
100
+
101
+ figsize: (int, int) (optional, default=(12,12))
102
+ How big to make the figure in inches (actual pixel size will depend on ``dpi``).
103
+
104
+ dynamic_label_size: bool (optional, default=False)
105
+ Whether to dynamically resize the text labels based on the relative sizes of the
106
+ clusters. This can be useful to help highlight larger clusters.
107
+
108
+ dpi: int (optional, default=plt.rcParams["figure.dpi"])
109
+ The dots-per-inch setting usd when rendering the plot.
110
+
111
+ force_matplotlib: bool (optional, default=False)
112
+ Force using matplotlib instead of datashader for rendering the scatterplot of the
113
+ data map. This can be useful if you wish to have a different marker_type, or variably
114
+ sized markers based on a marker_size_array, neither of which are supported by the
115
+ datashader based renderer.
116
+
117
+ darkmode: bool (optional, default=False)
118
+ Whether to render the plot in darkmode (with a dark background) or not.
119
+
120
+ highlight_labels: list of str or None (optional, default=None)
121
+ A list of unique labels that should have their text highlighted in the resulting plot.
122
+ Arguments supported by ``render_plot`` can allow for control over how highlighted labels
123
+ are rendered. By default they are simply rendered in bold text.
124
+
125
+ palette_hue_shift: float (optional, default=0.0)
126
+ A setting, in degrees clockwise, to shift the hue channel when generating a colour
127
+ palette and color_mapping for the labels.
128
+
129
+ palette_hue_radius_dependence: float (optional, default=1.0)
130
+ A setting that determines how dependent on the radius the hue channel is. Larger
131
+ values will result in more hue variation where there are more outlying points.
132
+
133
+ use_medoids: bool (optional, default=False)
134
+ Whether to use medoids instead of centroids to determine the "location" of the cluster,
135
+ both for the label indicator line, and for palette colouring. Note that medoids are
136
+ more computationally expensive, especially for large plots, so use with some caution.
137
+
138
+ cmap: matplotlib cmap or None (optional, default=None)
139
+ A linear matplotlib cmap colour map to use as the base for a generated colour mapping.
140
+ This *should* be a matplotlib cmap that is smooth and linear, and cyclic
141
+ (see the colorcet package for some good options). If not a cyclic cmap it will be
142
+ "made" cyclic by reflecting it. If ``None`` then a custom method will be used instead.
143
+
144
+ **render_plot_kwds
145
+ All other keyword arguments are passed through the ``render_plot`` which provides
146
+ significant further control over the aesthetics of the plot.
147
+
148
+ Returns
149
+ -------
150
+
151
+ fig: matplotlib.Figure
152
+ The figure that the resulting plot is rendered to.
153
+
154
+ ax: matpolotlib.Axes
155
+ The axes contained within the figure that the plot is rendered to.
156
+
157
+ """
158
+ if labels is None:
159
+ label_locations = np.zeros((0, 2), dtype=np.float32)
160
+ label_text = []
161
+ cluster_label_vector = np.full(data_map_coords.shape[0], "Unlabelled", dtype=object)
162
+ unique_non_noise_labels = []
163
+ else:
164
+ cluster_label_vector = np.asarray(labels)
165
+ unique_non_noise_labels = [
166
+ label for label in np.unique(cluster_label_vector) if label != noise_label
167
+ ]
168
+ if use_medoids:
169
+ label_locations = np.asarray(
170
+ [
171
+ medoid(data_map_coords[cluster_label_vector == i])
172
+ for i in unique_non_noise_labels
173
+ ]
174
+ )
175
+ else:
176
+ label_locations = np.asarray(
177
+ [
178
+ data_map_coords[cluster_label_vector == i].mean(axis=0)
179
+ for i in unique_non_noise_labels
180
+ ]
181
+ )
182
+ label_text = [
183
+ textwrap.fill(x, width=label_wrap_width, break_long_words=False)
184
+ for x in unique_non_noise_labels
185
+ ]
186
+ if highlight_labels is not None:
187
+ highlight_labels = [
188
+ textwrap.fill(x, width=label_wrap_width, break_long_words=False)
189
+ for x in highlight_labels
190
+ ]
191
+
192
+ # If we don't have a color map, generate one
193
+ if label_color_map is None:
194
+ if cmap is None:
195
+ palette = palette_from_datamap(
196
+ data_map_coords,
197
+ label_locations,
198
+ hue_shift=palette_hue_shift,
199
+ radius_weight_power=palette_hue_radius_dependence,
200
+ )
201
+ else:
202
+ palette = palette_from_cmap_and_datamap(
203
+ cmap,
204
+ data_map_coords,
205
+ label_locations,
206
+ radius_weight_power=palette_hue_radius_dependence,
207
+ )
208
+ label_to_index_map = {
209
+ name: index for index, name in enumerate(unique_non_noise_labels)
210
+ }
211
+ color_list = [
212
+ palette[label_to_index_map[x]] if x in label_to_index_map else noise_color
213
+ for x in cluster_label_vector
214
+ ]
215
+ label_color_map = {
216
+ x: (
217
+ palette[label_to_index_map[x]]
218
+ if x in label_to_index_map
219
+ else noise_color
220
+ )
221
+ for x in np.unique(cluster_label_vector)
222
+ }
223
+ else:
224
+ color_list = [
225
+ label_color_map[x] if x != noise_label else noise_color
226
+ for x in cluster_label_vector
227
+ ]
228
+
229
+ if marker_color_array is not None:
230
+ color_list = list(marker_color_array)
231
+
232
+ label_colors = [label_color_map[x] for x in unique_non_noise_labels]
233
+
234
+ if color_label_text and len(label_colors) > 0:
235
+ # Darken and reduce chroma of label colors to get text labels
236
+ if darkmode:
237
+ label_text_colors = pastel_palette(label_colors)
238
+ else:
239
+ label_text_colors = deep_palette(label_colors)
240
+ else:
241
+ label_text_colors = None
242
+
243
+ if color_label_arrows:
244
+ label_arrow_colors = label_colors
245
+ else:
246
+ label_arrow_colors = None
247
+
248
+ if dynamic_label_size:
249
+ font_scale_factor = np.sqrt(figsize[0] * figsize[1])
250
+ cluster_sizes = np.sqrt(pd.Series(cluster_label_vector).value_counts())
251
+ label_size_adjustments = cluster_sizes - cluster_sizes.min()
252
+ label_size_adjustments /= label_size_adjustments.max()
253
+ label_size_adjustments *= (
254
+ render_plot_kwds.get("label_font_size", font_scale_factor) + 2
255
+ )
256
+ label_size_adjustments = dict(label_size_adjustments - 2)
257
+ label_size_adjustments = [
258
+ label_size_adjustments[x] for x in unique_non_noise_labels
259
+ ]
260
+ else:
261
+ label_size_adjustments = [0.0] * len(unique_non_noise_labels)
262
+
263
+ # Heuristics for point size and alpha values
264
+ n_points = data_map_coords.shape[0]
265
+ if data_map_coords.shape[0] < 100_000 or force_matplotlib:
266
+ magic_number = np.clip(128 * 4 ** (-np.log10(n_points)), 0.05, 64)
267
+ point_scale_factor = np.sqrt(figsize[0] * figsize[1])
268
+ point_size = magic_number * (point_scale_factor / 2)
269
+ alpha = np.clip(magic_number, 0.05, 1)
270
+ else:
271
+ point_size = int(np.sqrt(figsize[0] * figsize[1]) * dpi) // 2048
272
+ alpha = 1.0
273
+
274
+ if "point_size" in render_plot_kwds:
275
+ point_size = render_plot_kwds.pop("point_size")
276
+
277
+ if "alpha" in render_plot_kwds:
278
+ alpha = render_plot_kwds.pop("alpha")
279
+
280
+ fig, ax = render_plot(
281
+ data_map_coords,
282
+ color_list,
283
+ label_text,
284
+ label_locations,
285
+ title=title,
286
+ sub_title=sub_title,
287
+ point_size=point_size,
288
+ alpha=alpha,
289
+ label_text_colors=None if not color_label_text else label_text_colors,
290
+ label_arrow_colors=None if not color_label_arrows else label_arrow_colors,
291
+ highlight_colors=[label_color_map[x] for x in unique_non_noise_labels],
292
+ figsize=figsize,
293
+ noise_color=noise_color,
294
+ label_size_adjustments=label_size_adjustments,
295
+ dpi=dpi,
296
+ force_matplotlib=force_matplotlib,
297
+ darkmode=darkmode,
298
+ highlight_labels=highlight_labels,
299
+ **render_plot_kwds,
300
+ )
301
+
302
+ return fig, ax
303
+
304
+
305
+ def create_interactive_plot(
306
+ data_map_coords,
307
+ *label_layers,
308
+ hover_text=None,
309
+ inline_data=True,
310
+ noise_label="Unlabelled",
311
+ noise_color="#999999",
312
+ color_label_text=True,
313
+ label_wrap_width=16,
314
+ label_color_map=None,
315
+ width="100%",
316
+ height=800,
317
+ darkmode=False,
318
+ palette_hue_shift=0.0,
319
+ palette_hue_radius_dependence=1.0,
320
+ cmap=None,
321
+ marker_size_array=None,
322
+ marker_color_array=None,
323
+ use_medoids=False,
324
+ cluster_boundary_polygons=False,
325
+ color_cluster_boundaries=True,
326
+ polygon_alpha=0.1,
327
+ **render_html_kwds,
328
+ ):
329
+ """
330
+
331
+ Parameters
332
+ ----------
333
+ data_map_coords: ndarray of floats of shape (n_samples, 2)
334
+ The 2D coordinates for the data map. Usually this is produced via a
335
+ dimension reduction technique such as UMAP, t-SNE, PacMAP, PyMDE etc.
336
+
337
+ *label_layers: np.ndarray
338
+ All remaining positional arguments are assumed to be labels, each at
339
+ a different level of resolution. Ideally these should be ordered such that
340
+ the most fine-grained resolution is first, and the coarsest resolution is last.
341
+ The individual labels-layers should be formatted the same as for `create_plot`.
342
+
343
+ hover_text: list or np.ndarray or None (optional, default=None)
344
+ An iterable (usually a list of numpy array) of text strings, one for each
345
+ data point in `data_map_coords` that can be used in a tooltip when hovering
346
+ over points.
347
+
348
+ inline_data: bool (optional, default=True)
349
+ Whether to include data inline in the HTML file (compressed and base64 encoded)
350
+ of whether to write data to separate files that will then be referenced by the
351
+ HTML file -- in the latter case you will need to ensure all the files are
352
+ co-located and served over an http server or similar. Inline is the best
353
+ default choice for easy portability and simplicity, but can result in very
354
+ large file sizes.
355
+
356
+ noise_label: str (optional, default="Unlabelled")
357
+ The string used in the ``labels`` array to identify the unlabelled or noise points
358
+ in the dataset.
359
+
360
+ noise_color: str (optional, default="#999999")
361
+ The colour to use for unlabelled or noise points in the data map. This should usually
362
+ be a muted or neutral colour to distinguish background points from the labelled clusters.
363
+
364
+ color_label_text: bool (optional, default=True)
365
+ Whether to use colours for the text labels generated in the plot. If ``False`` then
366
+ the text labels will default to either black or white depending on ``darkmode``.
367
+
368
+ label_wrap_width: int (optional, default=16)
369
+ The number of characters to apply text-wrapping at when creating text labels for
370
+ display in the plot. Note that long words will not be broken, so you can choose
371
+ relatively small values if you want tight text-wrapping.
372
+
373
+ label_color_map: dict or None (optional, default=None)
374
+ A colour mapping to use to colour points/clusters in the data map. The mapping should
375
+ be keyed by the unique cluster labels in ``labels`` and take values that are hex-string
376
+ representations of colours. If ``None`` then a colour mapping will be auto-generated.
377
+
378
+ width: int or str (optional, default="100%")
379
+ The width of the plot when rendered in a notebook. This should be a valid HTML iframe
380
+ width specification -- either an integer number of pixels, or a string that can be
381
+ properly interpreted in HTML.
382
+
383
+ height: int or str (optional, default=800)
384
+ The height of the plot when rendered in a notebook. This should be a valid HTML iframe
385
+ height specification -- either an integer number of pixels, or a string that can be
386
+ properly interpreted in HTML.
387
+
388
+ darkmode: bool (optional, default=False)
389
+ Whether to render the plot in darkmode (with a dark background) or not.
390
+
391
+ palette_hue_shift: float (optional, default=0.0)
392
+ A setting, in degrees clockwise, to shift the hue channel when generating a colour
393
+ palette and color_mapping for the labels.
394
+
395
+ palette_hue_radius_dependence: float (optional, default=1.0)
396
+ A setting that determines how dependent on the radius the hue channel is. Larger
397
+ values will result in more hue variation where there are more outlying points.
398
+
399
+ cmap: matplotlib cmap or None (optional, default=None)
400
+ A linear matplotlib cmap colour map to use as the base for a generated colour mapping.
401
+ This *should* be a matplotlib cmap that is smooth and linear, and cyclic
402
+ (see the colorcet package for some good options). If not a cyclic cmap it will be
403
+ "made" cyclic by reflecting it. If ``None`` then a custom method will be used instead.
404
+
405
+ marker_size_array: np.ndarray or None (optional, default=None)
406
+ An array of sizes for each of the points in the data map scatterplot.
407
+
408
+ use_medoids: bool (optional, default=False)
409
+ Whether to use medoids instead of centroids to determine the "location" of the cluster,
410
+ both for the label indicator line, and for palette colouring. Note that medoids are
411
+ more computationally expensive, especially for large plots, so use with some caution.
412
+
413
+ cluster_boundary_polygons: bool (optional, default=False)
414
+ Whether to draw alpha-shape generated boundary lines around clusters. This can be useful
415
+ in highlighting clusters at different resolutions when using many different label_layers.
416
+
417
+ polygon_alpha: float (optional, default=0.1)
418
+ The alpha value to use when genrating alpha-shape based boundaries around clusters.
419
+
420
+ **render_html_kwds:
421
+ All other keyword arguments will be passed through the `render_html` function. Please
422
+ see the docstring of that function for further options that can control the
423
+ aesthetic results.
424
+
425
+ Returns
426
+ -------
427
+
428
+ """
429
+ if len(label_layers) == 0:
430
+ label_dataframe = pd.DataFrame(
431
+ {
432
+ "x": [data_map_coords.T[0].mean()],
433
+ "y": [data_map_coords.T[1].mean()],
434
+ "label": [""],
435
+ "size": [np.power(data_map_coords.shape[0], 0.25)],
436
+ }
437
+ )
438
+ else:
439
+ label_dataframe = pd.concat(
440
+ [
441
+ label_text_and_polygon_dataframes(
442
+ labels,
443
+ data_map_coords,
444
+ noise_label=noise_label,
445
+ use_medoids=use_medoids,
446
+ cluster_polygons=cluster_boundary_polygons,
447
+ alpha=polygon_alpha,
448
+ )
449
+ for labels in label_layers
450
+ ]
451
+ )
452
+
453
+ if label_color_map is None:
454
+ if cmap is None:
455
+ palette = palette_from_datamap(
456
+ data_map_coords,
457
+ label_dataframe[["x", "y"]].values,
458
+ hue_shift=palette_hue_shift,
459
+ radius_weight_power=palette_hue_radius_dependence,
460
+ )
461
+ else:
462
+ palette = palette_from_cmap_and_datamap(
463
+ cmap,
464
+ data_map_coords,
465
+ label_dataframe[["x", "y"]].values,
466
+ radius_weight_power=palette_hue_radius_dependence,
467
+ )
468
+ if not darkmode:
469
+ text_palette = np.asarray(
470
+ [
471
+ tuple(int(c * 255) for c in to_rgb(color))
472
+ for color in deep_palette(palette)
473
+ ]
474
+ )
475
+ else:
476
+ text_palette = np.asarray(
477
+ [
478
+ tuple(int(c * 255) for c in to_rgb(color))
479
+ for color in pastel_palette(palette)
480
+ ]
481
+ )
482
+ palette = [tuple(int(c * 255) for c in to_rgb(color)) for color in palette]
483
+ color_map = {
484
+ label: color for label, color in zip(label_dataframe.label, palette)
485
+ }
486
+ else:
487
+ color_map = {
488
+ label: tuple(int(c * 255) for c in to_rgb(color))
489
+ for label, color in label_color_map.items()
490
+ }
491
+ if not darkmode:
492
+ text_palette = np.asarray(
493
+ [
494
+ tuple(int(c * 255) for c in to_rgb(color))
495
+ for color in deep_palette(
496
+ [label_color_map[label] for label in label_dataframe.label]
497
+ )
498
+ ]
499
+ )
500
+ else:
501
+ text_palette = np.asarray(
502
+ [
503
+ tuple(int(c * 255) for c in to_rgb(color))
504
+ for color in pastel_palette(
505
+ [label_color_map[label] for label in label_dataframe.label]
506
+ )
507
+ ]
508
+ )
509
+
510
+ if color_label_text or color_cluster_boundaries:
511
+ label_dataframe["r"] = text_palette.T[0]
512
+ label_dataframe["g"] = text_palette.T[1]
513
+ label_dataframe["b"] = text_palette.T[2]
514
+ label_dataframe["a"] = 64
515
+ else:
516
+ label_dataframe["r"] = 15 if not darkmode else 240
517
+ label_dataframe["g"] = 15 if not darkmode else 240
518
+ label_dataframe["b"] = 15 if not darkmode else 240
519
+ label_dataframe["a"] = 64
520
+
521
+ label_dataframe["label"] = label_dataframe.label.map(
522
+ lambda x: textwrap.fill(x, width=label_wrap_width, break_long_words=False)
523
+ )
524
+
525
+ point_dataframe = pd.DataFrame(
526
+ {
527
+ "x": data_map_coords.T[0],
528
+ "y": data_map_coords.T[1],
529
+ }
530
+ )
531
+ if hover_text is not None:
532
+ point_dataframe["hover_text"] = hover_text
533
+
534
+ if marker_size_array is not None:
535
+ point_dataframe["size"] = marker_size_array
536
+
537
+ if marker_color_array is None:
538
+ color_vector = np.asarray(
539
+ [tuple(int(c * 255) for c in to_rgb(noise_color))]
540
+ * data_map_coords.shape[0],
541
+ dtype=np.uint8,
542
+ )
543
+ for labels in reversed(label_layers):
544
+ label_map = {n: i for i, n in enumerate(np.unique(labels))}
545
+ if noise_label not in label_map:
546
+ label_map[noise_label] = -1
547
+ label_unmap = {i: n for n, i in label_map.items()}
548
+ cluster_label_vector = np.asarray(pd.Series(labels).map(label_map))
549
+ unique_non_noise_labels = [
550
+ label for label in label_unmap if label != label_map[noise_label]
551
+ ]
552
+ for label in unique_non_noise_labels:
553
+ color_vector[cluster_label_vector == label] = color_map[
554
+ label_unmap[label]
555
+ ]
556
+ else:
557
+ color_vector = np.asarray(
558
+ [
559
+ tuple(int(c * 255) for c in to_rgb(color))
560
+ for color in marker_color_array
561
+ ],
562
+ dtype=np.uint8,
563
+ )
564
+
565
+ point_dataframe["r"] = color_vector.T[0].astype(np.uint8)
566
+ point_dataframe["g"] = color_vector.T[1].astype(np.uint8)
567
+ point_dataframe["b"] = color_vector.T[2].astype(np.uint8)
568
+ point_dataframe["a"] = np.uint8(180)
569
+
570
+ html_str = render_html(
571
+ point_dataframe,
572
+ label_dataframe,
573
+ inline_data=inline_data,
574
+ color_label_text=color_label_text,
575
+ darkmode=darkmode,
576
+ **render_html_kwds,
577
+ )
578
+
579
+ return InteractiveFigure(html_str, width=width, height=height)