sclab 0.1.7__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sclab/__init__.py +3 -1
- sclab/_io.py +83 -12
- sclab/_methods_registry.py +65 -0
- sclab/_sclab.py +241 -21
- sclab/dataset/_dataset.py +4 -6
- sclab/dataset/processor/_processor.py +41 -19
- sclab/dataset/processor/_results_panel.py +94 -0
- sclab/dataset/processor/step/_processor_step_base.py +12 -6
- sclab/examples/processor_steps/__init__.py +8 -0
- sclab/examples/processor_steps/_cluster.py +2 -2
- sclab/examples/processor_steps/_differential_expression.py +329 -0
- sclab/examples/processor_steps/_doublet_detection.py +68 -0
- sclab/examples/processor_steps/_gene_expression.py +125 -0
- sclab/examples/processor_steps/_integration.py +116 -0
- sclab/examples/processor_steps/_neighbors.py +26 -6
- sclab/examples/processor_steps/_pca.py +13 -8
- sclab/examples/processor_steps/_preprocess.py +52 -25
- sclab/examples/processor_steps/_qc.py +24 -8
- sclab/examples/processor_steps/_umap.py +2 -2
- sclab/gui/__init__.py +0 -0
- sclab/gui/components/__init__.py +7 -0
- sclab/gui/components/_guided_pseudotime.py +482 -0
- sclab/gui/components/_transfer_metadata.py +186 -0
- sclab/methods/__init__.py +50 -0
- sclab/preprocess/__init__.py +26 -0
- sclab/preprocess/_cca.py +176 -0
- sclab/preprocess/_cca_integrate.py +109 -0
- sclab/preprocess/_filter_obs.py +42 -0
- sclab/preprocess/_harmony.py +421 -0
- sclab/preprocess/_harmony_integrate.py +53 -0
- sclab/preprocess/_normalize_weighted.py +65 -0
- sclab/preprocess/_pca.py +51 -0
- sclab/preprocess/_preprocess.py +155 -0
- sclab/preprocess/_qc.py +38 -0
- sclab/preprocess/_rpca.py +116 -0
- sclab/preprocess/_subset.py +208 -0
- sclab/preprocess/_transfer_metadata.py +196 -0
- sclab/preprocess/_transform.py +82 -0
- sclab/preprocess/_utils.py +96 -0
- sclab/scanpy/__init__.py +0 -0
- sclab/scanpy/_compat.py +92 -0
- sclab/scanpy/_settings.py +526 -0
- sclab/scanpy/logging.py +290 -0
- sclab/scanpy/plotting/__init__.py +0 -0
- sclab/scanpy/plotting/_rcmod.py +73 -0
- sclab/scanpy/plotting/palettes.py +221 -0
- sclab/scanpy/readwrite.py +1108 -0
- sclab/tools/__init__.py +0 -0
- sclab/tools/cellflow/__init__.py +0 -0
- sclab/tools/cellflow/density_dynamics/__init__.py +0 -0
- sclab/tools/cellflow/density_dynamics/_density_dynamics.py +349 -0
- sclab/tools/cellflow/pseudotime/__init__.py +0 -0
- sclab/tools/cellflow/pseudotime/_pseudotime.py +336 -0
- sclab/tools/cellflow/pseudotime/timeseries.py +226 -0
- sclab/tools/cellflow/utils/__init__.py +0 -0
- sclab/tools/cellflow/utils/density_nd.py +215 -0
- sclab/tools/cellflow/utils/interpolate.py +334 -0
- sclab/tools/cellflow/utils/periodic_genes.py +106 -0
- sclab/tools/cellflow/utils/smoothen.py +124 -0
- sclab/tools/cellflow/utils/times.py +55 -0
- sclab/tools/differential_expression/__init__.py +7 -0
- sclab/tools/differential_expression/_pseudobulk_edger.py +309 -0
- sclab/tools/differential_expression/_pseudobulk_helpers.py +290 -0
- sclab/tools/differential_expression/_pseudobulk_limma.py +257 -0
- sclab/tools/doublet_detection/__init__.py +5 -0
- sclab/tools/doublet_detection/_scrublet.py +64 -0
- sclab/tools/embedding/__init__.py +0 -0
- sclab/tools/imputation/__init__.py +0 -0
- sclab/tools/imputation/_alra.py +135 -0
- sclab/tools/labeling/__init__.py +6 -0
- sclab/tools/labeling/sctype.py +233 -0
- sclab/tools/utils/__init__.py +5 -0
- sclab/tools/utils/_aggregate_and_filter.py +290 -0
- sclab/utils/__init__.py +5 -0
- sclab/utils/_write_excel.py +510 -0
- {sclab-0.1.7.dist-info → sclab-0.3.4.dist-info}/METADATA +29 -12
- sclab-0.3.4.dist-info/RECORD +93 -0
- {sclab-0.1.7.dist-info → sclab-0.3.4.dist-info}/WHEEL +1 -1
- sclab-0.3.4.dist-info/licenses/LICENSE +29 -0
- sclab-0.1.7.dist-info/RECORD +0 -30
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Callable
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from numpy import asarray, ascontiguousarray, floating, prod
|
|
6
|
+
from numpy import empty as np_empty
|
|
7
|
+
from numpy import float64 as np_float64
|
|
8
|
+
from numpy.typing import NDArray
|
|
9
|
+
from scipy.fft import fft, fftfreq
|
|
10
|
+
from scipy.interpolate import BSpline, _fitpack_impl, make_smoothing_spline
|
|
11
|
+
from tqdm.auto import tqdm
|
|
12
|
+
|
|
13
|
+
from .smoothen import choose_grid_size, count_data_in_intervals, smoothen_data
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
from scipy.interpolate._dierckx import evaluate_spline
|
|
17
|
+
except ImportError:
|
|
18
|
+
from scipy.interpolate._bspl import evaluate_spline
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
PIX2 = 2 * np.pi
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def fit_smoothing_spline(
|
|
26
|
+
x: NDArray[floating],
|
|
27
|
+
y: NDArray[floating],
|
|
28
|
+
t_range: tuple[float, float],
|
|
29
|
+
w: NDArray[floating] | None = None,
|
|
30
|
+
lam: float | None = None,
|
|
31
|
+
periodic: bool = False,
|
|
32
|
+
n_reps: int = 3,
|
|
33
|
+
) -> BSpline:
|
|
34
|
+
if periodic:
|
|
35
|
+
assert n_reps % 2 == 1
|
|
36
|
+
|
|
37
|
+
o = np.argsort(x)
|
|
38
|
+
x, y = x[o], y[o]
|
|
39
|
+
if w is not None:
|
|
40
|
+
w = w[o]
|
|
41
|
+
|
|
42
|
+
tmin, tmax = t_range
|
|
43
|
+
tspan = tmax - tmin
|
|
44
|
+
|
|
45
|
+
if periodic:
|
|
46
|
+
mask = np.logical_and((x >= tmin), (x < tmax))
|
|
47
|
+
else:
|
|
48
|
+
mask = np.logical_and((x >= tmin), (x <= tmax))
|
|
49
|
+
|
|
50
|
+
x, y = x[mask], y[mask]
|
|
51
|
+
if w is not None:
|
|
52
|
+
w = w[mask]
|
|
53
|
+
n = x.size
|
|
54
|
+
|
|
55
|
+
if periodic:
|
|
56
|
+
xx = np.concatenate([x + i * tspan for i in range(n_reps)])
|
|
57
|
+
yy = np.tile(y, n_reps)
|
|
58
|
+
ww = np.tile(w, n_reps) if w is not None else None
|
|
59
|
+
else:
|
|
60
|
+
xx = x
|
|
61
|
+
yy = y
|
|
62
|
+
ww = w
|
|
63
|
+
|
|
64
|
+
bspl = make_smoothing_spline(xx, yy, ww, lam)
|
|
65
|
+
t, c, k = bspl.tck
|
|
66
|
+
|
|
67
|
+
if periodic:
|
|
68
|
+
N = n_reps // 2
|
|
69
|
+
t = t - tspan * N
|
|
70
|
+
t = t[n * N : -n * N + 1]
|
|
71
|
+
c = c[n * N : -n * N + 1]
|
|
72
|
+
|
|
73
|
+
return BSpline(t, c, k)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class NDFourier:
|
|
77
|
+
def __init__(
|
|
78
|
+
self,
|
|
79
|
+
xh: NDArray[floating] | None = None,
|
|
80
|
+
freq: NDArray[floating] | None = None,
|
|
81
|
+
t_range: tuple[float, float] | None = None,
|
|
82
|
+
grid_size: int | None = None,
|
|
83
|
+
periodic: bool = True,
|
|
84
|
+
largest_harmonic: int = 5,
|
|
85
|
+
d: int = 0,
|
|
86
|
+
zero_weight: float = 1.0,
|
|
87
|
+
smoothing_fn: Callable = np.average,
|
|
88
|
+
) -> None:
|
|
89
|
+
assert periodic
|
|
90
|
+
assert t_range is not None
|
|
91
|
+
assert t_range[0] == 0
|
|
92
|
+
|
|
93
|
+
self.tmin, self.tmax = self.t_range = t_range
|
|
94
|
+
self.tscale = PIX2 / self.tmax
|
|
95
|
+
|
|
96
|
+
if xh is not None:
|
|
97
|
+
assert freq is not None
|
|
98
|
+
self.n = grid_size + 1
|
|
99
|
+
self.xh = xh.reshape((xh.shape[0], -1, 1)).copy()
|
|
100
|
+
self.freq = freq.reshape((freq.shape[0], -1, 1)).copy()
|
|
101
|
+
self.scaled_freq = 1j * self.freq * self.tscale
|
|
102
|
+
|
|
103
|
+
self.grid_size = grid_size
|
|
104
|
+
self.periodic = periodic
|
|
105
|
+
self.largest_harmonic = largest_harmonic
|
|
106
|
+
self.d = d
|
|
107
|
+
self.zero_weight = zero_weight
|
|
108
|
+
self.smoothing_fn = smoothing_fn
|
|
109
|
+
|
|
110
|
+
def fit(
|
|
111
|
+
self,
|
|
112
|
+
t: NDArray[floating],
|
|
113
|
+
X: NDArray[floating],
|
|
114
|
+
) -> "NDFourier":
|
|
115
|
+
if self.grid_size is None:
|
|
116
|
+
self.grid_size = choose_grid_size(t, self.t_range)
|
|
117
|
+
|
|
118
|
+
t_grid = np.linspace(*self.t_range, self.grid_size + 1)
|
|
119
|
+
self.X_smooth = smoothen_data(
|
|
120
|
+
t,
|
|
121
|
+
X,
|
|
122
|
+
t_range=self.t_range,
|
|
123
|
+
t_grid=t_grid,
|
|
124
|
+
periodic=self.periodic,
|
|
125
|
+
zero_weight=self.zero_weight,
|
|
126
|
+
fn=self.smoothing_fn,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
self.n = n = self.X_smooth.shape[0]
|
|
130
|
+
self.X_smooth = self.X_smooth.reshape((n, -1))
|
|
131
|
+
|
|
132
|
+
xh: NDArray[floating] = fft(self.X_smooth, axis=0)
|
|
133
|
+
freq: NDArray[floating] = fftfreq(n, d=1 / n)
|
|
134
|
+
|
|
135
|
+
mask = np.abs(freq) <= self.largest_harmonic
|
|
136
|
+
xh = xh[mask]
|
|
137
|
+
freq = freq[mask]
|
|
138
|
+
|
|
139
|
+
self.xh = xh.reshape((xh.shape[0], -1, 1))
|
|
140
|
+
self.freq = freq.reshape((freq.shape[0], -1, 1))
|
|
141
|
+
self.scaled_freq = 1j * self.freq * self.tscale
|
|
142
|
+
|
|
143
|
+
return self
|
|
144
|
+
|
|
145
|
+
def derivative(self, d=1) -> "NDFourier":
|
|
146
|
+
return NDFourier(
|
|
147
|
+
self.xh,
|
|
148
|
+
self.freq,
|
|
149
|
+
self.t_range,
|
|
150
|
+
self.grid_size,
|
|
151
|
+
self.periodic,
|
|
152
|
+
self.largest_harmonic,
|
|
153
|
+
d + self.d,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
def __getitem__(self, key) -> "NDFourier":
|
|
157
|
+
return NDFourier(
|
|
158
|
+
self.xh[:, key],
|
|
159
|
+
self.freq,
|
|
160
|
+
self.t_range,
|
|
161
|
+
self.grid_size,
|
|
162
|
+
self.periodic,
|
|
163
|
+
self.largest_harmonic,
|
|
164
|
+
self.d,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
def __call__(self, x: NDArray[floating], d=0) -> NDArray[floating]:
|
|
168
|
+
x = asarray(x)
|
|
169
|
+
x_shape = x.shape
|
|
170
|
+
|
|
171
|
+
x = ascontiguousarray(x.ravel(), dtype=np_float64)
|
|
172
|
+
|
|
173
|
+
d = d + self.d
|
|
174
|
+
out: NDArray[floating] = np.real(
|
|
175
|
+
(self.xh * self.scaled_freq**d * np.exp(self.scaled_freq * x)).sum(axis=0)
|
|
176
|
+
/ self.n
|
|
177
|
+
)
|
|
178
|
+
out = out.T
|
|
179
|
+
out = out.reshape(x_shape + (self.xh.shape[1],))
|
|
180
|
+
|
|
181
|
+
return out
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class NDBSpline:
|
|
185
|
+
def __init__(
|
|
186
|
+
self,
|
|
187
|
+
t: NDArray[floating] | None = None,
|
|
188
|
+
C: NDArray[floating] | None = None,
|
|
189
|
+
k: int | None = None,
|
|
190
|
+
t_range: tuple[float, float] | None = None,
|
|
191
|
+
grid_size: int | None = None,
|
|
192
|
+
periodic: bool = False,
|
|
193
|
+
roughness: float | None = None,
|
|
194
|
+
zero_weight: float = 1.0,
|
|
195
|
+
window_width: float | None = None,
|
|
196
|
+
use_grid: bool = True,
|
|
197
|
+
weight_grid: bool = False,
|
|
198
|
+
smoothing_fn: Callable = np.average,
|
|
199
|
+
) -> None:
|
|
200
|
+
if periodic:
|
|
201
|
+
assert t_range is not None
|
|
202
|
+
assert t_range[0] == 0
|
|
203
|
+
|
|
204
|
+
if t is not None or C is not None or k is not None:
|
|
205
|
+
assert t is not None
|
|
206
|
+
assert C is not None
|
|
207
|
+
assert k is not None
|
|
208
|
+
self.t = t.copy()
|
|
209
|
+
self.C = C.reshape((C.shape[0], -1)).copy()
|
|
210
|
+
self.k = k
|
|
211
|
+
|
|
212
|
+
if t_range is not None:
|
|
213
|
+
self.tmin, self.tmax = self.t_range = t_range
|
|
214
|
+
|
|
215
|
+
self.grid_size = grid_size
|
|
216
|
+
self.periodic = periodic
|
|
217
|
+
self.window_width = window_width
|
|
218
|
+
self.use_grid = use_grid
|
|
219
|
+
self.weight_grid = weight_grid
|
|
220
|
+
self.roughness = roughness
|
|
221
|
+
self.zero_weight = zero_weight
|
|
222
|
+
self.smoothing_fn = smoothing_fn
|
|
223
|
+
|
|
224
|
+
def fit(
|
|
225
|
+
self,
|
|
226
|
+
t: NDArray[floating],
|
|
227
|
+
X: NDArray[floating],
|
|
228
|
+
progress: bool = False,
|
|
229
|
+
) -> "NDBSpline":
|
|
230
|
+
X = X.reshape((X.shape[0], -1))
|
|
231
|
+
if self.t_range is None:
|
|
232
|
+
self.tmin, self.tmax = self.t_range = t.min(), t.max()
|
|
233
|
+
|
|
234
|
+
if self.grid_size is None:
|
|
235
|
+
self.grid_size = choose_grid_size(t, self.t_range)
|
|
236
|
+
|
|
237
|
+
if self.roughness is None:
|
|
238
|
+
self.roughness = 1
|
|
239
|
+
|
|
240
|
+
if self.use_grid:
|
|
241
|
+
t_grid: NDArray[floating] = np.linspace(*self.t_range, self.grid_size + 1)
|
|
242
|
+
self.lam = 1 / self.grid_size / 10**self.roughness
|
|
243
|
+
else:
|
|
244
|
+
t_grid = None
|
|
245
|
+
self.lam = 1 / 10**self.roughness
|
|
246
|
+
self.X_smooth = smoothen_data(
|
|
247
|
+
t,
|
|
248
|
+
X,
|
|
249
|
+
t_range=self.t_range,
|
|
250
|
+
t_grid=t_grid,
|
|
251
|
+
periodic=self.periodic,
|
|
252
|
+
window_width=self.window_width,
|
|
253
|
+
zero_weight=self.zero_weight,
|
|
254
|
+
progress=progress,
|
|
255
|
+
fn=self.smoothing_fn,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
if t_grid is not None and self.weight_grid:
|
|
259
|
+
w = np.zeros(self.X_smooth.shape[0], dtype=float)
|
|
260
|
+
n = count_data_in_intervals(t, t_grid) + 1
|
|
261
|
+
if self.periodic:
|
|
262
|
+
n = np.append(n, n[0])
|
|
263
|
+
else:
|
|
264
|
+
n = np.append(n, n[-1])
|
|
265
|
+
w[n > 1] = 1 / np.log1p(n[n > 1])
|
|
266
|
+
else:
|
|
267
|
+
w = None
|
|
268
|
+
|
|
269
|
+
iterator = self.X_smooth.T
|
|
270
|
+
if progress:
|
|
271
|
+
iterator = tqdm(
|
|
272
|
+
iterator,
|
|
273
|
+
bar_format="{desc} {percentage:3.0f}%|{bar}|",
|
|
274
|
+
desc="Fitting bsplines",
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
fit_t_range = (0, 1)
|
|
278
|
+
fit_t_grid = np.linspace(0, 1, self.grid_size + 1)
|
|
279
|
+
fit_t = (t - self.tmin) / (self.tmax - self.tmin)
|
|
280
|
+
C = []
|
|
281
|
+
for x in iterator:
|
|
282
|
+
f = fit_smoothing_spline(
|
|
283
|
+
fit_t_grid if self.use_grid else fit_t,
|
|
284
|
+
x,
|
|
285
|
+
t_range=fit_t_range,
|
|
286
|
+
w=w,
|
|
287
|
+
lam=self.lam,
|
|
288
|
+
periodic=self.periodic,
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
C.append(f.c)
|
|
292
|
+
|
|
293
|
+
self.t = f.t.copy()
|
|
294
|
+
self.t *= self.tmax - self.tmin
|
|
295
|
+
self.t += self.tmin
|
|
296
|
+
self.C = np.array(C).T.copy()
|
|
297
|
+
self.k = 3
|
|
298
|
+
|
|
299
|
+
return self
|
|
300
|
+
|
|
301
|
+
def derivative(self, d: int = 1) -> "NDBSpline":
|
|
302
|
+
# pad the c array if needed
|
|
303
|
+
ct = len(self.t) - len(self.C)
|
|
304
|
+
if ct > 0:
|
|
305
|
+
self.C = np.r_[self.C, np.zeros((ct, self.C.shape[1]))]
|
|
306
|
+
t, C, k = _fitpack_impl.splder((self.t, self.C, self.k), d)
|
|
307
|
+
return NDBSpline(t, C, k, self.t_range, self.grid_size, self.periodic)
|
|
308
|
+
|
|
309
|
+
def __getitem__(self, key) -> "NDBSpline":
|
|
310
|
+
t = self.t
|
|
311
|
+
C = self.C[:, key]
|
|
312
|
+
k = self.k
|
|
313
|
+
return NDBSpline(t, C, k, self.t_range, self.grid_size, self.periodic)
|
|
314
|
+
|
|
315
|
+
def __call__(self, x: NDArray[floating], d: int = 0) -> NDArray[floating]:
|
|
316
|
+
x = asarray(x)
|
|
317
|
+
x_shape = x.shape
|
|
318
|
+
|
|
319
|
+
x = ascontiguousarray(x.ravel(), dtype=np_float64)
|
|
320
|
+
if self.periodic:
|
|
321
|
+
n = self.t.size - self.k - 1
|
|
322
|
+
x = self.t[self.k] + (x - self.t[self.k]) % (self.t[n] - self.t[self.k])
|
|
323
|
+
|
|
324
|
+
out = np_empty((len(x), prod(self.C.shape[1:])), dtype=self.C.dtype)
|
|
325
|
+
|
|
326
|
+
if not self.t.flags.c_contiguous:
|
|
327
|
+
self.t = self.t.copy()
|
|
328
|
+
if not self.C.flags.c_contiguous:
|
|
329
|
+
self.C = self.C.copy()
|
|
330
|
+
|
|
331
|
+
evaluate_spline(self.t, self.C, self.k, x, d, False, out)
|
|
332
|
+
out = out.reshape(x_shape + (self.C.shape[1],))
|
|
333
|
+
|
|
334
|
+
return out
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from anndata import AnnData
|
|
4
|
+
from numpy.typing import NDArray
|
|
5
|
+
from scipy.signal import get_window, periodogram
|
|
6
|
+
from scipy.sparse import spmatrix
|
|
7
|
+
|
|
8
|
+
from sclab.tools.utils import aggregate_and_filter
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def periodic_genes(
|
|
12
|
+
adata: AnnData,
|
|
13
|
+
time_key: str,
|
|
14
|
+
tmin: float,
|
|
15
|
+
tmax: float,
|
|
16
|
+
period: float,
|
|
17
|
+
n: int,
|
|
18
|
+
min_pct_power_below: float = 0.75,
|
|
19
|
+
layer: str | None = None,
|
|
20
|
+
):
|
|
21
|
+
times = adata.obs[time_key].values.copy()
|
|
22
|
+
if layer is None or layer == "X":
|
|
23
|
+
X = adata.X
|
|
24
|
+
else:
|
|
25
|
+
X = adata.layers[layer]
|
|
26
|
+
|
|
27
|
+
_assert_integer_counts(X)
|
|
28
|
+
|
|
29
|
+
tmp_adata = AnnData(X, obs=adata.obs[[time_key]], var=adata.var[[]])
|
|
30
|
+
|
|
31
|
+
w = (tmax - tmin) / n
|
|
32
|
+
bins = np.arange(-w / 2 + tmin, tmax, w)
|
|
33
|
+
labels = list(map(lambda x: f"{x:.2f}", bins[:-1] + w / 2))
|
|
34
|
+
|
|
35
|
+
times[times >= bins.max()] = times[times >= bins.max()] - tmax
|
|
36
|
+
tmp_adata.obs["timepoint"] = pd.cut(times, bins=bins, labels=labels)
|
|
37
|
+
aggregated = aggregate_and_filter(
|
|
38
|
+
tmp_adata,
|
|
39
|
+
"timepoint",
|
|
40
|
+
replicas_per_group=1,
|
|
41
|
+
make_stats=False,
|
|
42
|
+
make_dummies=False,
|
|
43
|
+
)
|
|
44
|
+
log_cnts = np.log1p(aggregated.X)
|
|
45
|
+
profiles = pd.DataFrame(log_cnts, index=labels, columns=aggregated.var_names)
|
|
46
|
+
ps = power_spectrum_df(profiles)
|
|
47
|
+
pp = pct_power_below(ps, 1 / period)
|
|
48
|
+
|
|
49
|
+
adata.varm["profile"] = profiles.T
|
|
50
|
+
adata.varm["periodogram"] = ps.T
|
|
51
|
+
adata.var["pct_power_below"] = pp
|
|
52
|
+
adata.var["periodic"] = pp > min_pct_power_below
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _assert_integer_counts(X: spmatrix | NDArray):
|
|
56
|
+
message = "Periodic genes requires raw integer counts. E.g. `layer = 'counts'`."
|
|
57
|
+
if isinstance(X, spmatrix):
|
|
58
|
+
assert all(X.data % 1 == 0), message
|
|
59
|
+
else:
|
|
60
|
+
assert all(X % 1 == 0), message
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def infer_dt_from_index(idx: pd.Index) -> float:
|
|
64
|
+
# Works for numeric or datetime indexes
|
|
65
|
+
if isinstance(idx, pd.DatetimeIndex):
|
|
66
|
+
dt = np.median(np.diff(idx.view("i8"))) / 1e9 # seconds
|
|
67
|
+
else:
|
|
68
|
+
dt = float(np.median(np.diff(idx.values.astype(float))))
|
|
69
|
+
return dt
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def power_spectrum_df(X: pd.DataFrame, window: str = "hann", detrend: str = "constant"):
|
|
73
|
+
# X: rows=timepoints, columns=variables
|
|
74
|
+
Xd = X - X.mean() # remove DC so percent computations are stable
|
|
75
|
+
dt = infer_dt_from_index(X.index) if X.index.size > 1 else 1.0
|
|
76
|
+
fs = 1.0 / dt
|
|
77
|
+
win = get_window(window, X.shape[0], fftbins=True)
|
|
78
|
+
|
|
79
|
+
# Build a tidy dataframe of periodograms for all columns
|
|
80
|
+
out = {}
|
|
81
|
+
for c in Xd.columns:
|
|
82
|
+
f, Pxx = periodogram(
|
|
83
|
+
Xd[c].values,
|
|
84
|
+
fs=fs,
|
|
85
|
+
window=win,
|
|
86
|
+
detrend=detrend,
|
|
87
|
+
scaling="spectrum", # integrates to variance
|
|
88
|
+
return_onesided=True,
|
|
89
|
+
)
|
|
90
|
+
out[c] = Pxx
|
|
91
|
+
ps = pd.DataFrame(out, index=pd.Index(f, name="frequency"))
|
|
92
|
+
return ps # units: (data units)^2, integrates (sum * df) to variance per column
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def pct_power_below(ps: pd.DataFrame, max_freq: float) -> pd.Series:
|
|
96
|
+
# ps is spectrum from power_spectrum_df (one-sided, DC included but we demeaned)
|
|
97
|
+
# Compute integrals via the rectangle rule: sum * df (df = freq spacing)
|
|
98
|
+
if len(ps.index) < 2:
|
|
99
|
+
return pd.Series({c: np.nan for c in ps.columns}, name="pct_power_at_low_freq")
|
|
100
|
+
df = ps.index[1] - ps.index[0]
|
|
101
|
+
mask_low = ps.index <= max_freq
|
|
102
|
+
num: pd.Series = ps.loc[mask_low].sum() * df
|
|
103
|
+
den: pd.Series = ps.sum() * df
|
|
104
|
+
s = num / den
|
|
105
|
+
s.name = "pct_power_at_low_freq"
|
|
106
|
+
return s
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Callable
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from numpy import bool_, floating, integer
|
|
6
|
+
from numpy.typing import NDArray
|
|
7
|
+
from tqdm.auto import tqdm
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
PIX2 = 2 * np.pi
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def count_empty_intervals(t: NDArray[floating], t_grid: NDArray[floating]) -> int:
|
|
14
|
+
n_data_in_intervals = count_data_in_intervals(t, t_grid)
|
|
15
|
+
empty_intervals_count = np.sum(n_data_in_intervals == 0)
|
|
16
|
+
return empty_intervals_count
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def count_data_in_intervals(
|
|
20
|
+
t: NDArray[floating], t_grid: NDArray[floating]
|
|
21
|
+
) -> NDArray[integer]:
|
|
22
|
+
t = t.reshape(-1, 1)
|
|
23
|
+
return np.logical_and(t_grid[:-1] <= t, t <= t_grid[1:]).sum(axis=0)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def choose_grid_size(t: NDArray[floating], t_range: tuple[float, float]) -> int:
|
|
27
|
+
grid_size = 2**10
|
|
28
|
+
for _ in range(10):
|
|
29
|
+
t_grid: NDArray[floating] = np.linspace(*t_range, grid_size + 1)
|
|
30
|
+
empty_intervals = count_empty_intervals(t, t_grid)
|
|
31
|
+
if empty_intervals == 0:
|
|
32
|
+
break
|
|
33
|
+
grid_size //= 2
|
|
34
|
+
else:
|
|
35
|
+
raise ValueError("Could not find a suitable grid size")
|
|
36
|
+
|
|
37
|
+
return grid_size
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def smoothen_data(
|
|
41
|
+
t: NDArray[floating],
|
|
42
|
+
X: NDArray[floating],
|
|
43
|
+
t_range: tuple[float, float] | None = None,
|
|
44
|
+
t_grid: NDArray[floating] | None = None,
|
|
45
|
+
fn: Callable[[NDArray[floating]], NDArray[floating]] = np.average,
|
|
46
|
+
window_width: float | None = None,
|
|
47
|
+
weights: NDArray[floating] | None = None,
|
|
48
|
+
zero_weight: float = 1,
|
|
49
|
+
periodic: bool = False,
|
|
50
|
+
quiet: bool = False,
|
|
51
|
+
progress: bool = False,
|
|
52
|
+
) -> NDArray[floating]:
|
|
53
|
+
if t_grid is None:
|
|
54
|
+
# no grid provided. We will have one output point for each input point
|
|
55
|
+
t_grid = t
|
|
56
|
+
is_grid = False
|
|
57
|
+
else:
|
|
58
|
+
# grid is provided
|
|
59
|
+
is_grid = True
|
|
60
|
+
empty_intervals = count_empty_intervals(t, t_grid)
|
|
61
|
+
if empty_intervals > 0 and not quiet:
|
|
62
|
+
logger.warning(f"Provided grid has {empty_intervals} empty intervals")
|
|
63
|
+
|
|
64
|
+
if t_range is not None:
|
|
65
|
+
# we used a specific t values range
|
|
66
|
+
tmin, tmax = t_range
|
|
67
|
+
else:
|
|
68
|
+
tmin, tmax = t_grid.min(), t_grid.max()
|
|
69
|
+
|
|
70
|
+
# full time window size
|
|
71
|
+
tspan = tmax - tmin
|
|
72
|
+
|
|
73
|
+
if window_width is None and not is_grid:
|
|
74
|
+
window_width = tspan * 0.05
|
|
75
|
+
elif window_width is None and is_grid:
|
|
76
|
+
window_width = tspan / (t_grid.size - 1) * 2
|
|
77
|
+
|
|
78
|
+
# initialize the output matrix with NaNs
|
|
79
|
+
X_smooth: NDArray[floating] = np.full((t_grid.size,) + X.shape[1:], np.nan)
|
|
80
|
+
|
|
81
|
+
generator = enumerate(t_grid)
|
|
82
|
+
if progress:
|
|
83
|
+
generator = tqdm(
|
|
84
|
+
generator,
|
|
85
|
+
total=t_grid.size,
|
|
86
|
+
bar_format="{desc} {percentage:3.0f}%|{bar}|",
|
|
87
|
+
desc="Smoothing data",
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
X = X.astype(float)
|
|
91
|
+
eps = np.finfo(float).eps
|
|
92
|
+
for i, m in generator:
|
|
93
|
+
low = m - window_width / 2
|
|
94
|
+
hig = m + window_width / 2
|
|
95
|
+
|
|
96
|
+
mask: NDArray[bool_] = (t >= low) & (t <= hig)
|
|
97
|
+
if periodic:
|
|
98
|
+
# include points beyond the periodic boundaries
|
|
99
|
+
mask = (
|
|
100
|
+
mask
|
|
101
|
+
| (t >= low + tspan) & (t <= hig + tspan)
|
|
102
|
+
| (t >= low - tspan) & (t <= hig - tspan)
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
if mask.sum() == 0:
|
|
106
|
+
continue
|
|
107
|
+
|
|
108
|
+
x = X[mask] + eps
|
|
109
|
+
if fn == np.average and weights is not None:
|
|
110
|
+
w = weights[mask]
|
|
111
|
+
X_smooth[i] = np.average(x, axis=0, weights=w)
|
|
112
|
+
|
|
113
|
+
elif fn == np.average and zero_weight == 1:
|
|
114
|
+
X_smooth[i] = np.mean(x, axis=0)
|
|
115
|
+
|
|
116
|
+
elif fn == np.average and zero_weight != 1:
|
|
117
|
+
w = np.ones_like(x)
|
|
118
|
+
w[x == eps] = zero_weight + eps
|
|
119
|
+
X_smooth[i] = fn(x, axis=0, weights=w)
|
|
120
|
+
|
|
121
|
+
else:
|
|
122
|
+
X_smooth[i] = fn(x, axis=0)
|
|
123
|
+
|
|
124
|
+
return X_smooth - eps
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from numpy import floating
|
|
5
|
+
from numpy.typing import NDArray
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def guess_trange(
|
|
9
|
+
times: NDArray[floating], verbose: bool = False
|
|
10
|
+
) -> tuple[float, float]:
|
|
11
|
+
tmin, tmax = times.min(), times.max()
|
|
12
|
+
tspan = tmax - tmin
|
|
13
|
+
|
|
14
|
+
scale = 10.0 ** np.ceil(np.log10(tspan)) / 100
|
|
15
|
+
tspan = np.ceil(tspan / scale) * scale
|
|
16
|
+
|
|
17
|
+
scale = 10.0 ** np.ceil(np.log10(tspan)) / 100
|
|
18
|
+
g_tmin = np.floor(tmin / scale) * scale
|
|
19
|
+
g_tmax = np.ceil(tmax / scale) * scale
|
|
20
|
+
|
|
21
|
+
g_tmin = 0.0 if g_tmin == -0.0 else g_tmin
|
|
22
|
+
g_tmax = 0.0 if g_tmax == -0.0 else g_tmax
|
|
23
|
+
|
|
24
|
+
if verbose:
|
|
25
|
+
print(
|
|
26
|
+
f"tspan: {tspan:10.4f} min-max: {tmin:10.4f} - {tmax:10.4f} | {g_tmin:>8} - {g_tmax:>8}"
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
return g_tmin, g_tmax
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_guess_trange(N: int = 1000, verbose: bool = False) -> None:
|
|
33
|
+
def _test1(trange: tuple[float, float]) -> bool:
|
|
34
|
+
tmin, tmax = trange
|
|
35
|
+
tspan = tmax - tmin
|
|
36
|
+
g_tmin, g_tmax = guess_trange(np.random.uniform(*trange, N))
|
|
37
|
+
err_min = np.abs(g_tmin - tmin) / tspan
|
|
38
|
+
err_max = np.abs(g_tmax - tmax) / tspan
|
|
39
|
+
return err_min <= 0.01 and err_max <= 0.01
|
|
40
|
+
|
|
41
|
+
scales1 = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
|
|
42
|
+
scales2 = [1, 2, 3, 5, 7]
|
|
43
|
+
for s1, s2 in itertools.product(scales1, scales2):
|
|
44
|
+
scale = s1 * s2
|
|
45
|
+
for lw, hg in [(-2, -1), (-1 / 2, 1 / 2), (1, 2)]:
|
|
46
|
+
trange = lw * scale, hg * scale
|
|
47
|
+
acc1 = np.mean([_test1(trange) for _ in range(500)])
|
|
48
|
+
if verbose:
|
|
49
|
+
print(
|
|
50
|
+
f"scale: {scale: 9.3f} | lw-hg: {lw: 5.1f} - {hg: 5.1f} | {acc1: 8.2%}"
|
|
51
|
+
)
|
|
52
|
+
else:
|
|
53
|
+
assert acc1 > 0.95, (
|
|
54
|
+
f"scale: {scale: 9.3f} | lw-hg: {lw: 5.1f} - {hg: 5.1f} | {acc1: 8.2%}"
|
|
55
|
+
)
|