sclab 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sclab/__init__.py +7 -0
- sclab/_io.py +32 -0
- sclab/_sclab.py +80 -0
- sclab/dataset/__init__.py +8 -0
- sclab/dataset/_dataset.py +398 -0
- sclab/dataset/_exceptions.py +2 -0
- sclab/dataset/plotter/__init__.py +7 -0
- sclab/dataset/plotter/_controls.py +594 -0
- sclab/dataset/plotter/_plotter.py +1017 -0
- sclab/dataset/plotter/_utils.py +437 -0
- sclab/dataset/processor/__init__.py +7 -0
- sclab/dataset/processor/_processor.py +1063 -0
- sclab/dataset/processor/step/__init__.py +7 -0
- sclab/dataset/processor/step/_basic_processor_step.py +109 -0
- sclab/dataset/processor/step/_processor_step_base.py +120 -0
- sclab/event/__init__.py +7 -0
- sclab/event/_broker.py +201 -0
- sclab/event/_client.py +81 -0
- sclab/event/_utils.py +14 -0
- sclab/examples/__init__.py +5 -0
- sclab/examples/processor_steps/__init__.py +15 -0
- sclab/examples/processor_steps/_cluster.py +37 -0
- sclab/examples/processor_steps/_neighbors.py +72 -0
- sclab/examples/processor_steps/_pca.py +124 -0
- sclab/examples/processor_steps/_preprocess.py +186 -0
- sclab/examples/processor_steps/_qc.py +93 -0
- sclab/examples/processor_steps/_umap.py +48 -0
- sclab-0.1.7.dist-info/METADATA +139 -0
- sclab-0.1.7.dist-info/RECORD +30 -0
- sclab-0.1.7.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,437 @@
|
|
|
1
|
+
from functools import lru_cache
|
|
2
|
+
from itertools import product
|
|
3
|
+
from typing import Literal, NamedTuple
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import plotly.graph_objects as go
|
|
8
|
+
from numpy import float64
|
|
9
|
+
from numpy.typing import NDArray
|
|
10
|
+
from scipy.integrate import trapezoid
|
|
11
|
+
from scipy.interpolate import BSpline, make_smoothing_spline
|
|
12
|
+
from sklearn.neighbors import KernelDensity
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def make_periodic_smoothing_spline(
|
|
16
|
+
x: NDArray[float64],
|
|
17
|
+
y: NDArray[float64],
|
|
18
|
+
t_range: tuple[float, float],
|
|
19
|
+
w: NDArray[float64] | None = None,
|
|
20
|
+
lam: float | None = None,
|
|
21
|
+
n_reps: int = 5,
|
|
22
|
+
) -> BSpline:
|
|
23
|
+
assert n_reps % 2 == 1
|
|
24
|
+
|
|
25
|
+
o = np.argsort(x)
|
|
26
|
+
x, y = x[o], y[o]
|
|
27
|
+
|
|
28
|
+
tmin, tmax = t_range
|
|
29
|
+
tspan = tmax - tmin
|
|
30
|
+
|
|
31
|
+
mask = np.logical_and((x >= tmin), (x < tmax))
|
|
32
|
+
x, y = x[mask], y[mask]
|
|
33
|
+
n = x.size
|
|
34
|
+
|
|
35
|
+
xx = np.concatenate([x + i * tspan for i in range(n_reps)])
|
|
36
|
+
yy = np.tile(y, n_reps)
|
|
37
|
+
ww = np.tile(w, n_reps) if w is not None else None
|
|
38
|
+
bspl = make_smoothing_spline(xx, yy, ww, lam)
|
|
39
|
+
t, c, k = bspl.tck
|
|
40
|
+
|
|
41
|
+
N = n_reps // 2
|
|
42
|
+
t = t - tspan * N
|
|
43
|
+
t = t[n * N : -n * N + 1]
|
|
44
|
+
c = c[n * N : -n * N + 1]
|
|
45
|
+
|
|
46
|
+
return BSpline(t, c, k)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class DensityResult(NamedTuple):
|
|
50
|
+
kde: KernelDensity
|
|
51
|
+
grid_size: tuple
|
|
52
|
+
bounds: tuple
|
|
53
|
+
grid: NDArray
|
|
54
|
+
density: NDArray
|
|
55
|
+
scale: float
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _density_nd(
|
|
59
|
+
data: NDArray,
|
|
60
|
+
bandwidth: float | Literal["scott", "silverman"] | None = None,
|
|
61
|
+
bandwidth_factor: float = 1,
|
|
62
|
+
algorithm: Literal["kd_tree", "ball_tree", "auto"] = "auto",
|
|
63
|
+
kernel: str = "gaussian",
|
|
64
|
+
metric: str = "euclidean",
|
|
65
|
+
grid_size: tuple | None = None,
|
|
66
|
+
max_grid_size: int = 2**5 + 1,
|
|
67
|
+
periodic: bool = False,
|
|
68
|
+
bounds: tuple[tuple[float, float]] | None = None,
|
|
69
|
+
normalize: bool = False,
|
|
70
|
+
) -> DensityResult:
|
|
71
|
+
if data.ndim == 1:
|
|
72
|
+
data = data.reshape(-1, 1)
|
|
73
|
+
|
|
74
|
+
nsamples, ndims = data.shape
|
|
75
|
+
if bounds is None:
|
|
76
|
+
assert not periodic, "bounds must be specified if periodic=True"
|
|
77
|
+
lower, upper = data.min(axis=0), data.max(axis=0)
|
|
78
|
+
span = upper - lower
|
|
79
|
+
margins = span / 10
|
|
80
|
+
bounds = tuple(zip(lower - margins, upper + margins))
|
|
81
|
+
assert len(bounds) == ndims, "must provide bounds for each dimension"
|
|
82
|
+
bounds = np.array(bounds)
|
|
83
|
+
|
|
84
|
+
if periodic:
|
|
85
|
+
offsets = np.array(list(product([-1, 0, 1], repeat=ndims)))
|
|
86
|
+
offsets = offsets * np.diff(bounds).T
|
|
87
|
+
dat = np.empty((nsamples * 3**ndims, ndims))
|
|
88
|
+
for i, offset in enumerate(offsets):
|
|
89
|
+
dat[i * nsamples : (i + 1) * nsamples] = data + offset[None, :]
|
|
90
|
+
else:
|
|
91
|
+
dat = data
|
|
92
|
+
dat = (dat - bounds.min(axis=1)) / (bounds.max(axis=1) - bounds.min(axis=1))
|
|
93
|
+
|
|
94
|
+
if bandwidth is None:
|
|
95
|
+
bandwidth = bandwidth_factor
|
|
96
|
+
|
|
97
|
+
kde = KernelDensity(
|
|
98
|
+
bandwidth=bandwidth,
|
|
99
|
+
algorithm=algorithm,
|
|
100
|
+
kernel=kernel,
|
|
101
|
+
metric=metric,
|
|
102
|
+
)
|
|
103
|
+
kde.fit(dat)
|
|
104
|
+
|
|
105
|
+
if grid_size is None:
|
|
106
|
+
grid_size = (max_grid_size, max_grid_size)
|
|
107
|
+
|
|
108
|
+
grid = np.meshgrid(*[np.linspace(0, 1, n) for n in grid_size], indexing="ij")
|
|
109
|
+
grid = np.vstack([x.ravel() for x in grid]).T
|
|
110
|
+
d = np.exp(kde.score_samples(grid))
|
|
111
|
+
|
|
112
|
+
if normalize and ndims == 1:
|
|
113
|
+
scale = trapezoid(d, grid.reshape(-1))
|
|
114
|
+
elif normalize:
|
|
115
|
+
# perform simple Riemmann sum for higher dimensions
|
|
116
|
+
deltas = np.diff(bounds).T / (np.array(grid_size) - 1)
|
|
117
|
+
tmp = d.reshape(grid_size).copy()
|
|
118
|
+
for i, s in enumerate(grid_size):
|
|
119
|
+
# take left corners for the sum
|
|
120
|
+
tmp = tmp.take(np.arange(s - 1), axis=i)
|
|
121
|
+
scale = tmp.sum() * np.prod(deltas)
|
|
122
|
+
else:
|
|
123
|
+
scale = 1
|
|
124
|
+
|
|
125
|
+
d /= scale
|
|
126
|
+
|
|
127
|
+
grid = (grid * (bounds.max(axis=1) - bounds.min(axis=1))) + bounds.min(axis=1)
|
|
128
|
+
|
|
129
|
+
return DensityResult(kde, grid_size, bounds, grid, d, scale)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@lru_cache
|
|
133
|
+
def _make_density_heatmap(
|
|
134
|
+
data: tuple[tuple[float, float]],
|
|
135
|
+
bandwidth_factor: float,
|
|
136
|
+
grid_resolution: int,
|
|
137
|
+
line_smoothing: float,
|
|
138
|
+
contours: int,
|
|
139
|
+
color: str = "orange",
|
|
140
|
+
):
|
|
141
|
+
data = np.array(data)
|
|
142
|
+
result = _density_nd(
|
|
143
|
+
data,
|
|
144
|
+
bandwidth_factor=bandwidth_factor,
|
|
145
|
+
max_grid_size=2**grid_resolution + 1,
|
|
146
|
+
)
|
|
147
|
+
nx, ny = result.grid_size
|
|
148
|
+
X: NDArray = result.grid.reshape(result.grid_size + (2,))
|
|
149
|
+
D: NDArray = result.density.reshape(result.grid_size)
|
|
150
|
+
x = X[:nx, 0, 0]
|
|
151
|
+
y = X[0, :ny, 1]
|
|
152
|
+
z = D.clip(min=0).T
|
|
153
|
+
|
|
154
|
+
start = z.min() + 1e-9
|
|
155
|
+
end = z.max() + 1e-9
|
|
156
|
+
size = (end - start) / contours
|
|
157
|
+
contours = dict(start=start, end=end, size=size)
|
|
158
|
+
|
|
159
|
+
return go.Contour(
|
|
160
|
+
z=z,
|
|
161
|
+
x=x,
|
|
162
|
+
y=y,
|
|
163
|
+
showscale=False,
|
|
164
|
+
colorscale=["white", color],
|
|
165
|
+
zmin=0,
|
|
166
|
+
line_smoothing=line_smoothing,
|
|
167
|
+
contours=contours,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _get_color_sequence():
|
|
172
|
+
"""Get a list of color names that are distinguishable by redmean distance."""
|
|
173
|
+
|
|
174
|
+
import plotly.colors as pc
|
|
175
|
+
from scipy.spatial.distance import pdist, squareform
|
|
176
|
+
|
|
177
|
+
color_ids = []
|
|
178
|
+
color_sequence = []
|
|
179
|
+
for scale in [
|
|
180
|
+
"D3",
|
|
181
|
+
"Plotly",
|
|
182
|
+
"G10",
|
|
183
|
+
"T10",
|
|
184
|
+
"Alphabet",
|
|
185
|
+
"Dark24",
|
|
186
|
+
"Light24",
|
|
187
|
+
"Set1",
|
|
188
|
+
"Pastel1",
|
|
189
|
+
"Dark2",
|
|
190
|
+
"Set2",
|
|
191
|
+
"Pastel2",
|
|
192
|
+
"Set3",
|
|
193
|
+
"Antique",
|
|
194
|
+
"Bold",
|
|
195
|
+
"Pastel",
|
|
196
|
+
"Prism",
|
|
197
|
+
"Safe",
|
|
198
|
+
"Vivid",
|
|
199
|
+
]:
|
|
200
|
+
colors = getattr(pc.qualitative, scale)
|
|
201
|
+
color_ids.extend([f"{scale}_{i}" for i in range(len(colors))])
|
|
202
|
+
color_sequence.extend(colors)
|
|
203
|
+
banned = [
|
|
204
|
+
(211, 211, 211), # lightgray - used for missing values
|
|
205
|
+
]
|
|
206
|
+
color_ids = np.array(color_ids)
|
|
207
|
+
X = np.array(
|
|
208
|
+
[
|
|
209
|
+
pc.hex_to_rgb(c) if c.startswith("#") else pc.unlabel_rgb(c)
|
|
210
|
+
for c in color_sequence
|
|
211
|
+
],
|
|
212
|
+
dtype=int,
|
|
213
|
+
)
|
|
214
|
+
color_sequence = np.array([pc.label_rgb(c) for c in X])
|
|
215
|
+
|
|
216
|
+
def redmean(c1, c2):
|
|
217
|
+
# https://en.wikipedia.org/wiki/Color_difference#sRGB
|
|
218
|
+
r1, g1, b1 = c1
|
|
219
|
+
r2, g2, b2 = c2
|
|
220
|
+
rm = (r1 + r2) / 2
|
|
221
|
+
dr, dg, db = r1 - r2, g1 - g2, b1 - b2
|
|
222
|
+
return np.sqrt(
|
|
223
|
+
(2 + rm / 256) * dr**2 + 4 * dg**2 + (2 + (255 - rm) / 256) * db**2
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
D = squareform(pdist(X, redmean))
|
|
227
|
+
np.fill_diagonal(D, np.inf)
|
|
228
|
+
|
|
229
|
+
mindist = 65
|
|
230
|
+
|
|
231
|
+
mask = (np.array([[redmean(b, c) for b in banned] for c in X]) > mindist).all(
|
|
232
|
+
axis=1
|
|
233
|
+
)
|
|
234
|
+
for i, d in enumerate(D):
|
|
235
|
+
mask[i] *= (d[: i + 1][mask[: i + 1]] > mindist).all()
|
|
236
|
+
|
|
237
|
+
return color_sequence[mask].tolist()
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
COLOR_DISCRETE_SEQUENCE = _get_color_sequence()
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def Rx(degs: float):
|
|
244
|
+
"""
|
|
245
|
+
Rotate a 3D coordinate system around its x-axis by the given angle (in degrees).
|
|
246
|
+
The rotation is counter-clockwise when viewed from the positive x-axis.
|
|
247
|
+
The returned matrix is a 3x3 numpy array, which can be used to transform
|
|
248
|
+
3-element numpy vectors or arrays.
|
|
249
|
+
|
|
250
|
+
Parameters
|
|
251
|
+
----------
|
|
252
|
+
degs : float
|
|
253
|
+
Angle of rotation in degrees.
|
|
254
|
+
|
|
255
|
+
Returns
|
|
256
|
+
-------
|
|
257
|
+
NDArray
|
|
258
|
+
3x3 rotation matrix as a right operating matrix.
|
|
259
|
+
|
|
260
|
+
Examples
|
|
261
|
+
--------
|
|
262
|
+
>>> rotated_X = X @ Rx(45)
|
|
263
|
+
"""
|
|
264
|
+
rads = np.pi * degs / 180
|
|
265
|
+
c, s = np.cos(rads), np.sin(rads)
|
|
266
|
+
return np.array([[1.0, 0.0, 0.0], [0.0, c, -s], [0.0, s, c]]).T
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def Ry(degs: float):
|
|
270
|
+
"""
|
|
271
|
+
Rotate a 3D coordinate system around its y-axis by the given angle (in degrees).
|
|
272
|
+
The rotation is counter-clockwise when viewed from the positive y-axis.
|
|
273
|
+
The returned matrix is a 3x3 numpy array, which can be used to transform
|
|
274
|
+
3-element numpy vectors or arrays.
|
|
275
|
+
|
|
276
|
+
Parameters
|
|
277
|
+
----------
|
|
278
|
+
degs : float
|
|
279
|
+
Angle of rotation in degrees.
|
|
280
|
+
|
|
281
|
+
Returns
|
|
282
|
+
-------
|
|
283
|
+
NDArray
|
|
284
|
+
3x3 rotation matrix as a right operating matrix.
|
|
285
|
+
|
|
286
|
+
Examples
|
|
287
|
+
--------
|
|
288
|
+
>>> rotated_X = X @ Ry(45)
|
|
289
|
+
"""
|
|
290
|
+
rads = np.pi * degs / 180
|
|
291
|
+
c, s = np.cos(rads), np.sin(rads)
|
|
292
|
+
return np.array([[c, 0.0, s], [0.0, 1.0, 0.0], [-s, 0.0, c]]).T
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def Rz(degs: float):
|
|
296
|
+
"""
|
|
297
|
+
Rotate a 3D coordinate system around its z-axis by the given angle (in degrees).
|
|
298
|
+
The rotation is counter-clockwise when viewed from the positive z-axis.
|
|
299
|
+
The returned matrix is a 3x3 numpy array, which can be used to transform
|
|
300
|
+
3-element numpy vectors or arrays.
|
|
301
|
+
|
|
302
|
+
Parameters
|
|
303
|
+
----------
|
|
304
|
+
degs : float
|
|
305
|
+
Angle of rotation in degrees.
|
|
306
|
+
|
|
307
|
+
Returns
|
|
308
|
+
-------
|
|
309
|
+
NDArray
|
|
310
|
+
3x3 rotation matrix as a right operating matrix.
|
|
311
|
+
|
|
312
|
+
Examples
|
|
313
|
+
--------
|
|
314
|
+
>>> rotated_X = X @ Rz(45)
|
|
315
|
+
"""
|
|
316
|
+
|
|
317
|
+
rads = np.pi * degs / 180
|
|
318
|
+
c, s = np.cos(rads), np.sin(rads)
|
|
319
|
+
return np.array([[c, -s, 0.0], [s, c, 0.0], [0.0, 0.0, 1.0]]).T
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def Rxyz(alpha, beta, gamma):
|
|
323
|
+
return Rz(gamma) @ Ry(beta) @ Rx(alpha)
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def rotate_xyz(
|
|
327
|
+
X: pd.DataFrame,
|
|
328
|
+
alpha: float,
|
|
329
|
+
beta: float,
|
|
330
|
+
gamma: float,
|
|
331
|
+
):
|
|
332
|
+
"""
|
|
333
|
+
Rotate the data in X by alpha, beta, and gamma degrees around the x, y, and z axes,
|
|
334
|
+
respectively, and return the rotated data.
|
|
335
|
+
|
|
336
|
+
Parameters
|
|
337
|
+
----------
|
|
338
|
+
X : pd.DataFrame
|
|
339
|
+
3D data to be rotated
|
|
340
|
+
alpha : float
|
|
341
|
+
angle in degrees to rotate around the x axis
|
|
342
|
+
beta : float
|
|
343
|
+
angle in degrees to rotate around the y axis
|
|
344
|
+
gamma : float
|
|
345
|
+
angle in degrees to rotate around the z axis
|
|
346
|
+
|
|
347
|
+
Returns
|
|
348
|
+
-------
|
|
349
|
+
pd.DataFrame
|
|
350
|
+
rotated data
|
|
351
|
+
"""
|
|
352
|
+
#
|
|
353
|
+
colnames = X.columns
|
|
354
|
+
X = X @ Rxyz(alpha, beta, gamma)
|
|
355
|
+
X.columns = colnames
|
|
356
|
+
|
|
357
|
+
return X
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def rotate_multiple_steps(
|
|
361
|
+
X: pd.DataFrame,
|
|
362
|
+
steps: str,
|
|
363
|
+
):
|
|
364
|
+
"""
|
|
365
|
+
Rotate the data in X according to a sequence of steps.
|
|
366
|
+
|
|
367
|
+
Parameters
|
|
368
|
+
----------
|
|
369
|
+
X : pd.DataFrame
|
|
370
|
+
3D data to be rotated
|
|
371
|
+
steps : str
|
|
372
|
+
string of comma-separated "axis:angle" pairs, where axis is in {"x", "y", "z"}
|
|
373
|
+
and angle is in degrees
|
|
374
|
+
|
|
375
|
+
Returns
|
|
376
|
+
-------
|
|
377
|
+
pd.DataFrame
|
|
378
|
+
rotated data
|
|
379
|
+
"""
|
|
380
|
+
colnames = X.columns
|
|
381
|
+
|
|
382
|
+
# remove spaces
|
|
383
|
+
steps = steps.replace(" ", "")
|
|
384
|
+
|
|
385
|
+
# replace all separators with newlines
|
|
386
|
+
for sep in ",;":
|
|
387
|
+
steps = steps.replace(sep, "\n")
|
|
388
|
+
|
|
389
|
+
# remove key:value assignment tokens
|
|
390
|
+
for chr in ":=":
|
|
391
|
+
steps = steps.replace(chr, "")
|
|
392
|
+
|
|
393
|
+
# get the list of steps
|
|
394
|
+
steps_list = steps.split("\n")
|
|
395
|
+
|
|
396
|
+
for step in steps_list:
|
|
397
|
+
if len(step) < 2:
|
|
398
|
+
continue
|
|
399
|
+
|
|
400
|
+
step = step.strip()
|
|
401
|
+
axis = step[0].lower()
|
|
402
|
+
|
|
403
|
+
if axis not in {"x", "y", "z"}:
|
|
404
|
+
break
|
|
405
|
+
|
|
406
|
+
try:
|
|
407
|
+
angle = float(step[1:])
|
|
408
|
+
except ValueError:
|
|
409
|
+
break
|
|
410
|
+
|
|
411
|
+
match axis:
|
|
412
|
+
case "x":
|
|
413
|
+
X = X @ Rx(angle)
|
|
414
|
+
case "y":
|
|
415
|
+
X = X @ Ry(angle)
|
|
416
|
+
case "z":
|
|
417
|
+
X = X @ Rz(angle)
|
|
418
|
+
case _:
|
|
419
|
+
break
|
|
420
|
+
|
|
421
|
+
X.columns = colnames
|
|
422
|
+
return X
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def rotate_and_project_traces(
|
|
426
|
+
X: pd.DataFrame,
|
|
427
|
+
figure_data: list[go.Scatter | go.Scattergl | go.Scatter3d],
|
|
428
|
+
alpha: float,
|
|
429
|
+
beta: float,
|
|
430
|
+
gamma: float,
|
|
431
|
+
):
|
|
432
|
+
X = (Rxyz(alpha, beta, gamma) @ X.T).T
|
|
433
|
+
for trace in figure_data:
|
|
434
|
+
marker_ids = trace.hovertext
|
|
435
|
+
if not isinstance(marker_ids, np.ndarray | list):
|
|
436
|
+
continue
|
|
437
|
+
trace.x, trace.y = X.loc[marker_ids].values[:, :2].T
|