datamapplot 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamapplot/__init__.py +11 -0
- datamapplot/alpha_shapes.py +80 -0
- datamapplot/create_plots.py +579 -0
- datamapplot/interactive_rendering.py +949 -0
- datamapplot/medoids.py +103 -0
- datamapplot/overlap_computations.py +160 -0
- datamapplot/palette_handling.py +276 -0
- datamapplot/plot_rendering.py +611 -0
- datamapplot/text_placement.py +322 -0
- datamapplot-0.2.2.dist-info/LICENSE +21 -0
- datamapplot-0.2.2.dist-info/METADATA +187 -0
- datamapplot-0.2.2.dist-info/RECORD +14 -0
- datamapplot-0.2.2.dist-info/WHEEL +5 -0
- datamapplot-0.2.2.dist-info/top_level.txt +1 -0
datamapplot/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from datamapplot.create_plots import create_plot, create_interactive_plot
|
|
2
|
+
from datamapplot.plot_rendering import render_plot
|
|
3
|
+
from datamapplot.interactive_rendering import render_html
|
|
4
|
+
from importlib.metadata import version, PackageNotFoundError
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
__version__ = version("datamapplot")
|
|
8
|
+
except PackageNotFoundError:
|
|
9
|
+
__version__ = "0.3-dev"
|
|
10
|
+
|
|
11
|
+
__all__ = ["create_plot", "create_interactive_plot", "render_plot", "render_html"]
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import numba
|
|
3
|
+
|
|
4
|
+
from scipy.interpolate import splprep, splev
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@numba.njit()
|
|
8
|
+
def circumradius(points):
|
|
9
|
+
bc = points[1:] - points[0]
|
|
10
|
+
d = 2 * (bc[0, 0] * bc[1, 1] - bc[0, 1] * bc[1, 0])
|
|
11
|
+
b_norm = bc[0, 0] * bc[0, 0] + bc[0, 1] * bc[0, 1]
|
|
12
|
+
c_norm = bc[1, 0] * bc[1, 0] + bc[1, 1] * bc[1, 1]
|
|
13
|
+
ux = (bc[1, 1] * b_norm - bc[0, 1] * c_norm) / d
|
|
14
|
+
uy = (bc[0, 0] * c_norm - bc[1, 0] * b_norm) / d
|
|
15
|
+
return np.sqrt(ux * ux + uy * uy)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def create_boundary_polygons(points, simplices, alpha=0.1):
|
|
19
|
+
all_edges = set([(np.int32(0), np.int32(0)) for i in range(0)])
|
|
20
|
+
boundary = set([(np.int32(0), np.int32(0)) for i in range(0)])
|
|
21
|
+
for simplex in simplices:
|
|
22
|
+
if circumradius(points[simplex]) < alpha:
|
|
23
|
+
for e in (
|
|
24
|
+
(simplex[0], simplex[1]),
|
|
25
|
+
(simplex[0], simplex[2]),
|
|
26
|
+
(simplex[1], simplex[2]),
|
|
27
|
+
):
|
|
28
|
+
if e[0] < e[1]:
|
|
29
|
+
if (e[0], e[1]) not in all_edges:
|
|
30
|
+
all_edges.add((e[0], e[1]))
|
|
31
|
+
boundary.add((e[0], e[1]))
|
|
32
|
+
else:
|
|
33
|
+
boundary.remove((e[0], e[1]))
|
|
34
|
+
else:
|
|
35
|
+
if (e[1], e[0]) not in all_edges:
|
|
36
|
+
all_edges.add((e[1], e[0]))
|
|
37
|
+
boundary.add((e[1], e[0]))
|
|
38
|
+
else:
|
|
39
|
+
boundary.remove((e[1], e[0]))
|
|
40
|
+
|
|
41
|
+
polygons = []
|
|
42
|
+
search_set = boundary.copy()
|
|
43
|
+
sequence = list(search_set.pop())
|
|
44
|
+
while len(search_set) > 0:
|
|
45
|
+
to_find = sequence[-1]
|
|
46
|
+
for link in search_set:
|
|
47
|
+
if link[0] == to_find:
|
|
48
|
+
sequence.append(link[1])
|
|
49
|
+
search_set.remove(link)
|
|
50
|
+
break
|
|
51
|
+
elif link[1] == to_find:
|
|
52
|
+
sequence.append(link[0])
|
|
53
|
+
search_set.remove(link)
|
|
54
|
+
break
|
|
55
|
+
else:
|
|
56
|
+
polygons.append(sequence.copy())
|
|
57
|
+
sequence = list(search_set.pop())
|
|
58
|
+
|
|
59
|
+
polygons.append(sequence)
|
|
60
|
+
|
|
61
|
+
result = [
|
|
62
|
+
np.empty((len(sequence) + 1, 2), dtype=np.float32) for sequence in polygons
|
|
63
|
+
]
|
|
64
|
+
for s, sequence in enumerate(polygons):
|
|
65
|
+
for i, n in enumerate(sequence):
|
|
66
|
+
result[s][i] = points[n]
|
|
67
|
+
result[s][-1] = points[sequence[0]]
|
|
68
|
+
|
|
69
|
+
return result
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def smooth_polygon(p, point_multipler=4, spline_coeff=0.0001):
|
|
73
|
+
dist = np.sqrt(np.sum((p[:-2] - p[1:-1]) ** 2, axis=1))
|
|
74
|
+
dist_along = np.concatenate(([0], dist.cumsum()))
|
|
75
|
+
spline, u = splprep(p[:-1].T, u=dist_along, s=spline_coeff, per=True)
|
|
76
|
+
|
|
77
|
+
interp_d = np.linspace(dist_along[0], dist_along[-1], len(p) * point_multipler)
|
|
78
|
+
interp_x, interp_y = splev(interp_d, spline)
|
|
79
|
+
|
|
80
|
+
return np.vstack([interp_x, interp_y]).T
|
|
@@ -0,0 +1,579 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import textwrap
|
|
4
|
+
|
|
5
|
+
from matplotlib import pyplot as plt
|
|
6
|
+
from matplotlib.colors import to_rgb
|
|
7
|
+
|
|
8
|
+
from datamapplot.palette_handling import (
|
|
9
|
+
palette_from_datamap,
|
|
10
|
+
palette_from_cmap_and_datamap,
|
|
11
|
+
deep_palette,
|
|
12
|
+
pastel_palette,
|
|
13
|
+
)
|
|
14
|
+
from datamapplot.plot_rendering import render_plot
|
|
15
|
+
from datamapplot.medoids import medoid
|
|
16
|
+
from datamapplot.interactive_rendering import (
|
|
17
|
+
render_html,
|
|
18
|
+
label_text_and_polygon_dataframes,
|
|
19
|
+
InteractiveFigure,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def create_plot(
|
|
24
|
+
data_map_coords,
|
|
25
|
+
labels=None,
|
|
26
|
+
*,
|
|
27
|
+
title=None,
|
|
28
|
+
sub_title=None,
|
|
29
|
+
noise_label="Unlabelled",
|
|
30
|
+
noise_color="#999999",
|
|
31
|
+
color_label_text=True,
|
|
32
|
+
color_label_arrows=False,
|
|
33
|
+
label_wrap_width=16,
|
|
34
|
+
label_color_map=None,
|
|
35
|
+
figsize=(12, 12),
|
|
36
|
+
dynamic_label_size=False,
|
|
37
|
+
dpi=plt.rcParams["figure.dpi"],
|
|
38
|
+
force_matplotlib=False,
|
|
39
|
+
darkmode=False,
|
|
40
|
+
highlight_labels=None,
|
|
41
|
+
palette_hue_shift=0.0,
|
|
42
|
+
palette_hue_radius_dependence=1.0,
|
|
43
|
+
use_medoids=False,
|
|
44
|
+
cmap=None,
|
|
45
|
+
marker_color_array=None,
|
|
46
|
+
**render_plot_kwds,
|
|
47
|
+
):
|
|
48
|
+
"""Create a static plot from ``data_map_coords`` with text labels provided by ``labels``.
|
|
49
|
+
This is the primary function for DataMapPlot and provides the easiest interface to the
|
|
50
|
+
static plotting functionality. This function provides a number of options, but also
|
|
51
|
+
passes any further keyword options through to the lower level ``render_plot`` function
|
|
52
|
+
so be sure to check the documentation for ``render_plot`` to discover further keyword
|
|
53
|
+
arguments that can be used here as well.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
data_map_coords: ndarray of floats of shape (n_samples, 2)
|
|
58
|
+
The 2D coordinates for the data map. Usually this is produced via a
|
|
59
|
+
dimension reduction technique such as UMAP, t-SNE, PacMAP, PyMDE etc.
|
|
60
|
+
|
|
61
|
+
labels: ndarray of strings (object) of shape (n_samples,)
|
|
62
|
+
A string label each data point in the data map. There should ideally by
|
|
63
|
+
only up to 64 unique labels. Noise or unlabelled points should have the
|
|
64
|
+
same label as ``noise_label``, which is "Unlabelled" by default.
|
|
65
|
+
|
|
66
|
+
title: str or None (optional, default=None)
|
|
67
|
+
A title for the plot. If ``None`` then no title is used for the plot.
|
|
68
|
+
The title should be succint; three to seven words.
|
|
69
|
+
|
|
70
|
+
sub_title: str or None (optional, default=None)
|
|
71
|
+
A sub-title for the plot. If ``None`` then no sub-title is used for the plot.
|
|
72
|
+
The sub-title can be significantly longer then the title and provide more information\
|
|
73
|
+
about the plot and data sources.
|
|
74
|
+
|
|
75
|
+
noise_label: str (optional, default="Unlabelled")
|
|
76
|
+
The string used in the ``labels`` array to identify the unlabelled or noise points
|
|
77
|
+
in the dataset.
|
|
78
|
+
|
|
79
|
+
noise_color: str (optional, default="#999999")
|
|
80
|
+
The colour to use for unlabelled or noise points in the data map. This should usually
|
|
81
|
+
be a muted or neutral colour to distinguish background points from the labelled clusters.
|
|
82
|
+
|
|
83
|
+
color_label_text: bool (optional, default=True)
|
|
84
|
+
Whether to use colours for the text labels generated in the plot. If ``False`` then
|
|
85
|
+
the text labels will default to either black or white depending on ``darkmode``.
|
|
86
|
+
|
|
87
|
+
color_label_arrows: bool (optional, default=True)
|
|
88
|
+
Whether to use colours for the arrows between the text labels and clusters. If ``False``
|
|
89
|
+
then the arrows will default to either black or white depending on ``darkmode``.
|
|
90
|
+
|
|
91
|
+
label_wrap_width: int (optional, default=16)
|
|
92
|
+
The number of characters to apply text-wrapping at when creating text labels for
|
|
93
|
+
display in the plot. Note that long words will not be broken, so you can choose
|
|
94
|
+
relatively small values if you want tight text-wrapping.
|
|
95
|
+
|
|
96
|
+
label_color_map: dict or None (optional, default=None)
|
|
97
|
+
A colour mapping to use to colour points/clusters in the data map. The mapping should
|
|
98
|
+
be keyed by the unique cluster labels in ``labels`` and take values that are hex-string
|
|
99
|
+
representations of colours. If ``None`` then a colour mapping will be auto-generated.
|
|
100
|
+
|
|
101
|
+
figsize: (int, int) (optional, default=(12,12))
|
|
102
|
+
How big to make the figure in inches (actual pixel size will depend on ``dpi``).
|
|
103
|
+
|
|
104
|
+
dynamic_label_size: bool (optional, default=False)
|
|
105
|
+
Whether to dynamically resize the text labels based on the relative sizes of the
|
|
106
|
+
clusters. This can be useful to help highlight larger clusters.
|
|
107
|
+
|
|
108
|
+
dpi: int (optional, default=plt.rcParams["figure.dpi"])
|
|
109
|
+
The dots-per-inch setting usd when rendering the plot.
|
|
110
|
+
|
|
111
|
+
force_matplotlib: bool (optional, default=False)
|
|
112
|
+
Force using matplotlib instead of datashader for rendering the scatterplot of the
|
|
113
|
+
data map. This can be useful if you wish to have a different marker_type, or variably
|
|
114
|
+
sized markers based on a marker_size_array, neither of which are supported by the
|
|
115
|
+
datashader based renderer.
|
|
116
|
+
|
|
117
|
+
darkmode: bool (optional, default=False)
|
|
118
|
+
Whether to render the plot in darkmode (with a dark background) or not.
|
|
119
|
+
|
|
120
|
+
highlight_labels: list of str or None (optional, default=None)
|
|
121
|
+
A list of unique labels that should have their text highlighted in the resulting plot.
|
|
122
|
+
Arguments supported by ``render_plot`` can allow for control over how highlighted labels
|
|
123
|
+
are rendered. By default they are simply rendered in bold text.
|
|
124
|
+
|
|
125
|
+
palette_hue_shift: float (optional, default=0.0)
|
|
126
|
+
A setting, in degrees clockwise, to shift the hue channel when generating a colour
|
|
127
|
+
palette and color_mapping for the labels.
|
|
128
|
+
|
|
129
|
+
palette_hue_radius_dependence: float (optional, default=1.0)
|
|
130
|
+
A setting that determines how dependent on the radius the hue channel is. Larger
|
|
131
|
+
values will result in more hue variation where there are more outlying points.
|
|
132
|
+
|
|
133
|
+
use_medoids: bool (optional, default=False)
|
|
134
|
+
Whether to use medoids instead of centroids to determine the "location" of the cluster,
|
|
135
|
+
both for the label indicator line, and for palette colouring. Note that medoids are
|
|
136
|
+
more computationally expensive, especially for large plots, so use with some caution.
|
|
137
|
+
|
|
138
|
+
cmap: matplotlib cmap or None (optional, default=None)
|
|
139
|
+
A linear matplotlib cmap colour map to use as the base for a generated colour mapping.
|
|
140
|
+
This *should* be a matplotlib cmap that is smooth and linear, and cyclic
|
|
141
|
+
(see the colorcet package for some good options). If not a cyclic cmap it will be
|
|
142
|
+
"made" cyclic by reflecting it. If ``None`` then a custom method will be used instead.
|
|
143
|
+
|
|
144
|
+
**render_plot_kwds
|
|
145
|
+
All other keyword arguments are passed through the ``render_plot`` which provides
|
|
146
|
+
significant further control over the aesthetics of the plot.
|
|
147
|
+
|
|
148
|
+
Returns
|
|
149
|
+
-------
|
|
150
|
+
|
|
151
|
+
fig: matplotlib.Figure
|
|
152
|
+
The figure that the resulting plot is rendered to.
|
|
153
|
+
|
|
154
|
+
ax: matpolotlib.Axes
|
|
155
|
+
The axes contained within the figure that the plot is rendered to.
|
|
156
|
+
|
|
157
|
+
"""
|
|
158
|
+
if labels is None:
|
|
159
|
+
label_locations = np.zeros((0, 2), dtype=np.float32)
|
|
160
|
+
label_text = []
|
|
161
|
+
cluster_label_vector = np.full(data_map_coords.shape[0], "Unlabelled", dtype=object)
|
|
162
|
+
unique_non_noise_labels = []
|
|
163
|
+
else:
|
|
164
|
+
cluster_label_vector = np.asarray(labels)
|
|
165
|
+
unique_non_noise_labels = [
|
|
166
|
+
label for label in np.unique(cluster_label_vector) if label != noise_label
|
|
167
|
+
]
|
|
168
|
+
if use_medoids:
|
|
169
|
+
label_locations = np.asarray(
|
|
170
|
+
[
|
|
171
|
+
medoid(data_map_coords[cluster_label_vector == i])
|
|
172
|
+
for i in unique_non_noise_labels
|
|
173
|
+
]
|
|
174
|
+
)
|
|
175
|
+
else:
|
|
176
|
+
label_locations = np.asarray(
|
|
177
|
+
[
|
|
178
|
+
data_map_coords[cluster_label_vector == i].mean(axis=0)
|
|
179
|
+
for i in unique_non_noise_labels
|
|
180
|
+
]
|
|
181
|
+
)
|
|
182
|
+
label_text = [
|
|
183
|
+
textwrap.fill(x, width=label_wrap_width, break_long_words=False)
|
|
184
|
+
for x in unique_non_noise_labels
|
|
185
|
+
]
|
|
186
|
+
if highlight_labels is not None:
|
|
187
|
+
highlight_labels = [
|
|
188
|
+
textwrap.fill(x, width=label_wrap_width, break_long_words=False)
|
|
189
|
+
for x in highlight_labels
|
|
190
|
+
]
|
|
191
|
+
|
|
192
|
+
# If we don't have a color map, generate one
|
|
193
|
+
if label_color_map is None:
|
|
194
|
+
if cmap is None:
|
|
195
|
+
palette = palette_from_datamap(
|
|
196
|
+
data_map_coords,
|
|
197
|
+
label_locations,
|
|
198
|
+
hue_shift=palette_hue_shift,
|
|
199
|
+
radius_weight_power=palette_hue_radius_dependence,
|
|
200
|
+
)
|
|
201
|
+
else:
|
|
202
|
+
palette = palette_from_cmap_and_datamap(
|
|
203
|
+
cmap,
|
|
204
|
+
data_map_coords,
|
|
205
|
+
label_locations,
|
|
206
|
+
radius_weight_power=palette_hue_radius_dependence,
|
|
207
|
+
)
|
|
208
|
+
label_to_index_map = {
|
|
209
|
+
name: index for index, name in enumerate(unique_non_noise_labels)
|
|
210
|
+
}
|
|
211
|
+
color_list = [
|
|
212
|
+
palette[label_to_index_map[x]] if x in label_to_index_map else noise_color
|
|
213
|
+
for x in cluster_label_vector
|
|
214
|
+
]
|
|
215
|
+
label_color_map = {
|
|
216
|
+
x: (
|
|
217
|
+
palette[label_to_index_map[x]]
|
|
218
|
+
if x in label_to_index_map
|
|
219
|
+
else noise_color
|
|
220
|
+
)
|
|
221
|
+
for x in np.unique(cluster_label_vector)
|
|
222
|
+
}
|
|
223
|
+
else:
|
|
224
|
+
color_list = [
|
|
225
|
+
label_color_map[x] if x != noise_label else noise_color
|
|
226
|
+
for x in cluster_label_vector
|
|
227
|
+
]
|
|
228
|
+
|
|
229
|
+
if marker_color_array is not None:
|
|
230
|
+
color_list = list(marker_color_array)
|
|
231
|
+
|
|
232
|
+
label_colors = [label_color_map[x] for x in unique_non_noise_labels]
|
|
233
|
+
|
|
234
|
+
if color_label_text and len(label_colors) > 0:
|
|
235
|
+
# Darken and reduce chroma of label colors to get text labels
|
|
236
|
+
if darkmode:
|
|
237
|
+
label_text_colors = pastel_palette(label_colors)
|
|
238
|
+
else:
|
|
239
|
+
label_text_colors = deep_palette(label_colors)
|
|
240
|
+
else:
|
|
241
|
+
label_text_colors = None
|
|
242
|
+
|
|
243
|
+
if color_label_arrows:
|
|
244
|
+
label_arrow_colors = label_colors
|
|
245
|
+
else:
|
|
246
|
+
label_arrow_colors = None
|
|
247
|
+
|
|
248
|
+
if dynamic_label_size:
|
|
249
|
+
font_scale_factor = np.sqrt(figsize[0] * figsize[1])
|
|
250
|
+
cluster_sizes = np.sqrt(pd.Series(cluster_label_vector).value_counts())
|
|
251
|
+
label_size_adjustments = cluster_sizes - cluster_sizes.min()
|
|
252
|
+
label_size_adjustments /= label_size_adjustments.max()
|
|
253
|
+
label_size_adjustments *= (
|
|
254
|
+
render_plot_kwds.get("label_font_size", font_scale_factor) + 2
|
|
255
|
+
)
|
|
256
|
+
label_size_adjustments = dict(label_size_adjustments - 2)
|
|
257
|
+
label_size_adjustments = [
|
|
258
|
+
label_size_adjustments[x] for x in unique_non_noise_labels
|
|
259
|
+
]
|
|
260
|
+
else:
|
|
261
|
+
label_size_adjustments = [0.0] * len(unique_non_noise_labels)
|
|
262
|
+
|
|
263
|
+
# Heuristics for point size and alpha values
|
|
264
|
+
n_points = data_map_coords.shape[0]
|
|
265
|
+
if data_map_coords.shape[0] < 100_000 or force_matplotlib:
|
|
266
|
+
magic_number = np.clip(128 * 4 ** (-np.log10(n_points)), 0.05, 64)
|
|
267
|
+
point_scale_factor = np.sqrt(figsize[0] * figsize[1])
|
|
268
|
+
point_size = magic_number * (point_scale_factor / 2)
|
|
269
|
+
alpha = np.clip(magic_number, 0.05, 1)
|
|
270
|
+
else:
|
|
271
|
+
point_size = int(np.sqrt(figsize[0] * figsize[1]) * dpi) // 2048
|
|
272
|
+
alpha = 1.0
|
|
273
|
+
|
|
274
|
+
if "point_size" in render_plot_kwds:
|
|
275
|
+
point_size = render_plot_kwds.pop("point_size")
|
|
276
|
+
|
|
277
|
+
if "alpha" in render_plot_kwds:
|
|
278
|
+
alpha = render_plot_kwds.pop("alpha")
|
|
279
|
+
|
|
280
|
+
fig, ax = render_plot(
|
|
281
|
+
data_map_coords,
|
|
282
|
+
color_list,
|
|
283
|
+
label_text,
|
|
284
|
+
label_locations,
|
|
285
|
+
title=title,
|
|
286
|
+
sub_title=sub_title,
|
|
287
|
+
point_size=point_size,
|
|
288
|
+
alpha=alpha,
|
|
289
|
+
label_text_colors=None if not color_label_text else label_text_colors,
|
|
290
|
+
label_arrow_colors=None if not color_label_arrows else label_arrow_colors,
|
|
291
|
+
highlight_colors=[label_color_map[x] for x in unique_non_noise_labels],
|
|
292
|
+
figsize=figsize,
|
|
293
|
+
noise_color=noise_color,
|
|
294
|
+
label_size_adjustments=label_size_adjustments,
|
|
295
|
+
dpi=dpi,
|
|
296
|
+
force_matplotlib=force_matplotlib,
|
|
297
|
+
darkmode=darkmode,
|
|
298
|
+
highlight_labels=highlight_labels,
|
|
299
|
+
**render_plot_kwds,
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
return fig, ax
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def create_interactive_plot(
|
|
306
|
+
data_map_coords,
|
|
307
|
+
*label_layers,
|
|
308
|
+
hover_text=None,
|
|
309
|
+
inline_data=True,
|
|
310
|
+
noise_label="Unlabelled",
|
|
311
|
+
noise_color="#999999",
|
|
312
|
+
color_label_text=True,
|
|
313
|
+
label_wrap_width=16,
|
|
314
|
+
label_color_map=None,
|
|
315
|
+
width="100%",
|
|
316
|
+
height=800,
|
|
317
|
+
darkmode=False,
|
|
318
|
+
palette_hue_shift=0.0,
|
|
319
|
+
palette_hue_radius_dependence=1.0,
|
|
320
|
+
cmap=None,
|
|
321
|
+
marker_size_array=None,
|
|
322
|
+
marker_color_array=None,
|
|
323
|
+
use_medoids=False,
|
|
324
|
+
cluster_boundary_polygons=False,
|
|
325
|
+
color_cluster_boundaries=True,
|
|
326
|
+
polygon_alpha=0.1,
|
|
327
|
+
**render_html_kwds,
|
|
328
|
+
):
|
|
329
|
+
"""
|
|
330
|
+
|
|
331
|
+
Parameters
|
|
332
|
+
----------
|
|
333
|
+
data_map_coords: ndarray of floats of shape (n_samples, 2)
|
|
334
|
+
The 2D coordinates for the data map. Usually this is produced via a
|
|
335
|
+
dimension reduction technique such as UMAP, t-SNE, PacMAP, PyMDE etc.
|
|
336
|
+
|
|
337
|
+
*label_layers: np.ndarray
|
|
338
|
+
All remaining positional arguments are assumed to be labels, each at
|
|
339
|
+
a different level of resolution. Ideally these should be ordered such that
|
|
340
|
+
the most fine-grained resolution is first, and the coarsest resolution is last.
|
|
341
|
+
The individual labels-layers should be formatted the same as for `create_plot`.
|
|
342
|
+
|
|
343
|
+
hover_text: list or np.ndarray or None (optional, default=None)
|
|
344
|
+
An iterable (usually a list of numpy array) of text strings, one for each
|
|
345
|
+
data point in `data_map_coords` that can be used in a tooltip when hovering
|
|
346
|
+
over points.
|
|
347
|
+
|
|
348
|
+
inline_data: bool (optional, default=True)
|
|
349
|
+
Whether to include data inline in the HTML file (compressed and base64 encoded)
|
|
350
|
+
of whether to write data to separate files that will then be referenced by the
|
|
351
|
+
HTML file -- in the latter case you will need to ensure all the files are
|
|
352
|
+
co-located and served over an http server or similar. Inline is the best
|
|
353
|
+
default choice for easy portability and simplicity, but can result in very
|
|
354
|
+
large file sizes.
|
|
355
|
+
|
|
356
|
+
noise_label: str (optional, default="Unlabelled")
|
|
357
|
+
The string used in the ``labels`` array to identify the unlabelled or noise points
|
|
358
|
+
in the dataset.
|
|
359
|
+
|
|
360
|
+
noise_color: str (optional, default="#999999")
|
|
361
|
+
The colour to use for unlabelled or noise points in the data map. This should usually
|
|
362
|
+
be a muted or neutral colour to distinguish background points from the labelled clusters.
|
|
363
|
+
|
|
364
|
+
color_label_text: bool (optional, default=True)
|
|
365
|
+
Whether to use colours for the text labels generated in the plot. If ``False`` then
|
|
366
|
+
the text labels will default to either black or white depending on ``darkmode``.
|
|
367
|
+
|
|
368
|
+
label_wrap_width: int (optional, default=16)
|
|
369
|
+
The number of characters to apply text-wrapping at when creating text labels for
|
|
370
|
+
display in the plot. Note that long words will not be broken, so you can choose
|
|
371
|
+
relatively small values if you want tight text-wrapping.
|
|
372
|
+
|
|
373
|
+
label_color_map: dict or None (optional, default=None)
|
|
374
|
+
A colour mapping to use to colour points/clusters in the data map. The mapping should
|
|
375
|
+
be keyed by the unique cluster labels in ``labels`` and take values that are hex-string
|
|
376
|
+
representations of colours. If ``None`` then a colour mapping will be auto-generated.
|
|
377
|
+
|
|
378
|
+
width: int or str (optional, default="100%")
|
|
379
|
+
The width of the plot when rendered in a notebook. This should be a valid HTML iframe
|
|
380
|
+
width specification -- either an integer number of pixels, or a string that can be
|
|
381
|
+
properly interpreted in HTML.
|
|
382
|
+
|
|
383
|
+
height: int or str (optional, default=800)
|
|
384
|
+
The height of the plot when rendered in a notebook. This should be a valid HTML iframe
|
|
385
|
+
height specification -- either an integer number of pixels, or a string that can be
|
|
386
|
+
properly interpreted in HTML.
|
|
387
|
+
|
|
388
|
+
darkmode: bool (optional, default=False)
|
|
389
|
+
Whether to render the plot in darkmode (with a dark background) or not.
|
|
390
|
+
|
|
391
|
+
palette_hue_shift: float (optional, default=0.0)
|
|
392
|
+
A setting, in degrees clockwise, to shift the hue channel when generating a colour
|
|
393
|
+
palette and color_mapping for the labels.
|
|
394
|
+
|
|
395
|
+
palette_hue_radius_dependence: float (optional, default=1.0)
|
|
396
|
+
A setting that determines how dependent on the radius the hue channel is. Larger
|
|
397
|
+
values will result in more hue variation where there are more outlying points.
|
|
398
|
+
|
|
399
|
+
cmap: matplotlib cmap or None (optional, default=None)
|
|
400
|
+
A linear matplotlib cmap colour map to use as the base for a generated colour mapping.
|
|
401
|
+
This *should* be a matplotlib cmap that is smooth and linear, and cyclic
|
|
402
|
+
(see the colorcet package for some good options). If not a cyclic cmap it will be
|
|
403
|
+
"made" cyclic by reflecting it. If ``None`` then a custom method will be used instead.
|
|
404
|
+
|
|
405
|
+
marker_size_array: np.ndarray or None (optional, default=None)
|
|
406
|
+
An array of sizes for each of the points in the data map scatterplot.
|
|
407
|
+
|
|
408
|
+
use_medoids: bool (optional, default=False)
|
|
409
|
+
Whether to use medoids instead of centroids to determine the "location" of the cluster,
|
|
410
|
+
both for the label indicator line, and for palette colouring. Note that medoids are
|
|
411
|
+
more computationally expensive, especially for large plots, so use with some caution.
|
|
412
|
+
|
|
413
|
+
cluster_boundary_polygons: bool (optional, default=False)
|
|
414
|
+
Whether to draw alpha-shape generated boundary lines around clusters. This can be useful
|
|
415
|
+
in highlighting clusters at different resolutions when using many different label_layers.
|
|
416
|
+
|
|
417
|
+
polygon_alpha: float (optional, default=0.1)
|
|
418
|
+
The alpha value to use when genrating alpha-shape based boundaries around clusters.
|
|
419
|
+
|
|
420
|
+
**render_html_kwds:
|
|
421
|
+
All other keyword arguments will be passed through the `render_html` function. Please
|
|
422
|
+
see the docstring of that function for further options that can control the
|
|
423
|
+
aesthetic results.
|
|
424
|
+
|
|
425
|
+
Returns
|
|
426
|
+
-------
|
|
427
|
+
|
|
428
|
+
"""
|
|
429
|
+
if len(label_layers) == 0:
|
|
430
|
+
label_dataframe = pd.DataFrame(
|
|
431
|
+
{
|
|
432
|
+
"x": [data_map_coords.T[0].mean()],
|
|
433
|
+
"y": [data_map_coords.T[1].mean()],
|
|
434
|
+
"label": [""],
|
|
435
|
+
"size": [np.power(data_map_coords.shape[0], 0.25)],
|
|
436
|
+
}
|
|
437
|
+
)
|
|
438
|
+
else:
|
|
439
|
+
label_dataframe = pd.concat(
|
|
440
|
+
[
|
|
441
|
+
label_text_and_polygon_dataframes(
|
|
442
|
+
labels,
|
|
443
|
+
data_map_coords,
|
|
444
|
+
noise_label=noise_label,
|
|
445
|
+
use_medoids=use_medoids,
|
|
446
|
+
cluster_polygons=cluster_boundary_polygons,
|
|
447
|
+
alpha=polygon_alpha,
|
|
448
|
+
)
|
|
449
|
+
for labels in label_layers
|
|
450
|
+
]
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
if label_color_map is None:
|
|
454
|
+
if cmap is None:
|
|
455
|
+
palette = palette_from_datamap(
|
|
456
|
+
data_map_coords,
|
|
457
|
+
label_dataframe[["x", "y"]].values,
|
|
458
|
+
hue_shift=palette_hue_shift,
|
|
459
|
+
radius_weight_power=palette_hue_radius_dependence,
|
|
460
|
+
)
|
|
461
|
+
else:
|
|
462
|
+
palette = palette_from_cmap_and_datamap(
|
|
463
|
+
cmap,
|
|
464
|
+
data_map_coords,
|
|
465
|
+
label_dataframe[["x", "y"]].values,
|
|
466
|
+
radius_weight_power=palette_hue_radius_dependence,
|
|
467
|
+
)
|
|
468
|
+
if not darkmode:
|
|
469
|
+
text_palette = np.asarray(
|
|
470
|
+
[
|
|
471
|
+
tuple(int(c * 255) for c in to_rgb(color))
|
|
472
|
+
for color in deep_palette(palette)
|
|
473
|
+
]
|
|
474
|
+
)
|
|
475
|
+
else:
|
|
476
|
+
text_palette = np.asarray(
|
|
477
|
+
[
|
|
478
|
+
tuple(int(c * 255) for c in to_rgb(color))
|
|
479
|
+
for color in pastel_palette(palette)
|
|
480
|
+
]
|
|
481
|
+
)
|
|
482
|
+
palette = [tuple(int(c * 255) for c in to_rgb(color)) for color in palette]
|
|
483
|
+
color_map = {
|
|
484
|
+
label: color for label, color in zip(label_dataframe.label, palette)
|
|
485
|
+
}
|
|
486
|
+
else:
|
|
487
|
+
color_map = {
|
|
488
|
+
label: tuple(int(c * 255) for c in to_rgb(color))
|
|
489
|
+
for label, color in label_color_map.items()
|
|
490
|
+
}
|
|
491
|
+
if not darkmode:
|
|
492
|
+
text_palette = np.asarray(
|
|
493
|
+
[
|
|
494
|
+
tuple(int(c * 255) for c in to_rgb(color))
|
|
495
|
+
for color in deep_palette(
|
|
496
|
+
[label_color_map[label] for label in label_dataframe.label]
|
|
497
|
+
)
|
|
498
|
+
]
|
|
499
|
+
)
|
|
500
|
+
else:
|
|
501
|
+
text_palette = np.asarray(
|
|
502
|
+
[
|
|
503
|
+
tuple(int(c * 255) for c in to_rgb(color))
|
|
504
|
+
for color in pastel_palette(
|
|
505
|
+
[label_color_map[label] for label in label_dataframe.label]
|
|
506
|
+
)
|
|
507
|
+
]
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
if color_label_text or color_cluster_boundaries:
|
|
511
|
+
label_dataframe["r"] = text_palette.T[0]
|
|
512
|
+
label_dataframe["g"] = text_palette.T[1]
|
|
513
|
+
label_dataframe["b"] = text_palette.T[2]
|
|
514
|
+
label_dataframe["a"] = 64
|
|
515
|
+
else:
|
|
516
|
+
label_dataframe["r"] = 15 if not darkmode else 240
|
|
517
|
+
label_dataframe["g"] = 15 if not darkmode else 240
|
|
518
|
+
label_dataframe["b"] = 15 if not darkmode else 240
|
|
519
|
+
label_dataframe["a"] = 64
|
|
520
|
+
|
|
521
|
+
label_dataframe["label"] = label_dataframe.label.map(
|
|
522
|
+
lambda x: textwrap.fill(x, width=label_wrap_width, break_long_words=False)
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
point_dataframe = pd.DataFrame(
|
|
526
|
+
{
|
|
527
|
+
"x": data_map_coords.T[0],
|
|
528
|
+
"y": data_map_coords.T[1],
|
|
529
|
+
}
|
|
530
|
+
)
|
|
531
|
+
if hover_text is not None:
|
|
532
|
+
point_dataframe["hover_text"] = hover_text
|
|
533
|
+
|
|
534
|
+
if marker_size_array is not None:
|
|
535
|
+
point_dataframe["size"] = marker_size_array
|
|
536
|
+
|
|
537
|
+
if marker_color_array is None:
|
|
538
|
+
color_vector = np.asarray(
|
|
539
|
+
[tuple(int(c * 255) for c in to_rgb(noise_color))]
|
|
540
|
+
* data_map_coords.shape[0],
|
|
541
|
+
dtype=np.uint8,
|
|
542
|
+
)
|
|
543
|
+
for labels in reversed(label_layers):
|
|
544
|
+
label_map = {n: i for i, n in enumerate(np.unique(labels))}
|
|
545
|
+
if noise_label not in label_map:
|
|
546
|
+
label_map[noise_label] = -1
|
|
547
|
+
label_unmap = {i: n for n, i in label_map.items()}
|
|
548
|
+
cluster_label_vector = np.asarray(pd.Series(labels).map(label_map))
|
|
549
|
+
unique_non_noise_labels = [
|
|
550
|
+
label for label in label_unmap if label != label_map[noise_label]
|
|
551
|
+
]
|
|
552
|
+
for label in unique_non_noise_labels:
|
|
553
|
+
color_vector[cluster_label_vector == label] = color_map[
|
|
554
|
+
label_unmap[label]
|
|
555
|
+
]
|
|
556
|
+
else:
|
|
557
|
+
color_vector = np.asarray(
|
|
558
|
+
[
|
|
559
|
+
tuple(int(c * 255) for c in to_rgb(color))
|
|
560
|
+
for color in marker_color_array
|
|
561
|
+
],
|
|
562
|
+
dtype=np.uint8,
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
point_dataframe["r"] = color_vector.T[0].astype(np.uint8)
|
|
566
|
+
point_dataframe["g"] = color_vector.T[1].astype(np.uint8)
|
|
567
|
+
point_dataframe["b"] = color_vector.T[2].astype(np.uint8)
|
|
568
|
+
point_dataframe["a"] = np.uint8(180)
|
|
569
|
+
|
|
570
|
+
html_str = render_html(
|
|
571
|
+
point_dataframe,
|
|
572
|
+
label_dataframe,
|
|
573
|
+
inline_data=inline_data,
|
|
574
|
+
color_label_text=color_label_text,
|
|
575
|
+
darkmode=darkmode,
|
|
576
|
+
**render_html_kwds,
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
return InteractiveFigure(html_str, width=width, height=height)
|