sclab 0.1.7__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. sclab/__init__.py +3 -1
  2. sclab/_io.py +83 -12
  3. sclab/_methods_registry.py +65 -0
  4. sclab/_sclab.py +241 -21
  5. sclab/dataset/_dataset.py +4 -6
  6. sclab/dataset/processor/_processor.py +41 -19
  7. sclab/dataset/processor/_results_panel.py +94 -0
  8. sclab/dataset/processor/step/_processor_step_base.py +12 -6
  9. sclab/examples/processor_steps/__init__.py +8 -0
  10. sclab/examples/processor_steps/_cluster.py +2 -2
  11. sclab/examples/processor_steps/_differential_expression.py +329 -0
  12. sclab/examples/processor_steps/_doublet_detection.py +68 -0
  13. sclab/examples/processor_steps/_gene_expression.py +125 -0
  14. sclab/examples/processor_steps/_integration.py +116 -0
  15. sclab/examples/processor_steps/_neighbors.py +26 -6
  16. sclab/examples/processor_steps/_pca.py +13 -8
  17. sclab/examples/processor_steps/_preprocess.py +52 -25
  18. sclab/examples/processor_steps/_qc.py +24 -8
  19. sclab/examples/processor_steps/_umap.py +2 -2
  20. sclab/gui/__init__.py +0 -0
  21. sclab/gui/components/__init__.py +7 -0
  22. sclab/gui/components/_guided_pseudotime.py +482 -0
  23. sclab/gui/components/_transfer_metadata.py +186 -0
  24. sclab/methods/__init__.py +50 -0
  25. sclab/preprocess/__init__.py +26 -0
  26. sclab/preprocess/_cca.py +176 -0
  27. sclab/preprocess/_cca_integrate.py +109 -0
  28. sclab/preprocess/_filter_obs.py +42 -0
  29. sclab/preprocess/_harmony.py +421 -0
  30. sclab/preprocess/_harmony_integrate.py +53 -0
  31. sclab/preprocess/_normalize_weighted.py +65 -0
  32. sclab/preprocess/_pca.py +51 -0
  33. sclab/preprocess/_preprocess.py +155 -0
  34. sclab/preprocess/_qc.py +38 -0
  35. sclab/preprocess/_rpca.py +116 -0
  36. sclab/preprocess/_subset.py +208 -0
  37. sclab/preprocess/_transfer_metadata.py +196 -0
  38. sclab/preprocess/_transform.py +82 -0
  39. sclab/preprocess/_utils.py +96 -0
  40. sclab/scanpy/__init__.py +0 -0
  41. sclab/scanpy/_compat.py +92 -0
  42. sclab/scanpy/_settings.py +526 -0
  43. sclab/scanpy/logging.py +290 -0
  44. sclab/scanpy/plotting/__init__.py +0 -0
  45. sclab/scanpy/plotting/_rcmod.py +73 -0
  46. sclab/scanpy/plotting/palettes.py +221 -0
  47. sclab/scanpy/readwrite.py +1108 -0
  48. sclab/tools/__init__.py +0 -0
  49. sclab/tools/cellflow/__init__.py +0 -0
  50. sclab/tools/cellflow/density_dynamics/__init__.py +0 -0
  51. sclab/tools/cellflow/density_dynamics/_density_dynamics.py +349 -0
  52. sclab/tools/cellflow/pseudotime/__init__.py +0 -0
  53. sclab/tools/cellflow/pseudotime/_pseudotime.py +336 -0
  54. sclab/tools/cellflow/pseudotime/timeseries.py +226 -0
  55. sclab/tools/cellflow/utils/__init__.py +0 -0
  56. sclab/tools/cellflow/utils/density_nd.py +215 -0
  57. sclab/tools/cellflow/utils/interpolate.py +334 -0
  58. sclab/tools/cellflow/utils/periodic_genes.py +106 -0
  59. sclab/tools/cellflow/utils/smoothen.py +124 -0
  60. sclab/tools/cellflow/utils/times.py +55 -0
  61. sclab/tools/differential_expression/__init__.py +7 -0
  62. sclab/tools/differential_expression/_pseudobulk_edger.py +309 -0
  63. sclab/tools/differential_expression/_pseudobulk_helpers.py +290 -0
  64. sclab/tools/differential_expression/_pseudobulk_limma.py +257 -0
  65. sclab/tools/doublet_detection/__init__.py +5 -0
  66. sclab/tools/doublet_detection/_scrublet.py +64 -0
  67. sclab/tools/embedding/__init__.py +0 -0
  68. sclab/tools/imputation/__init__.py +0 -0
  69. sclab/tools/imputation/_alra.py +135 -0
  70. sclab/tools/labeling/__init__.py +6 -0
  71. sclab/tools/labeling/sctype.py +233 -0
  72. sclab/tools/utils/__init__.py +5 -0
  73. sclab/tools/utils/_aggregate_and_filter.py +290 -0
  74. sclab/utils/__init__.py +5 -0
  75. sclab/utils/_write_excel.py +510 -0
  76. {sclab-0.1.7.dist-info → sclab-0.3.4.dist-info}/METADATA +29 -12
  77. sclab-0.3.4.dist-info/RECORD +93 -0
  78. {sclab-0.1.7.dist-info → sclab-0.3.4.dist-info}/WHEEL +1 -1
  79. sclab-0.3.4.dist-info/licenses/LICENSE +29 -0
  80. sclab-0.1.7.dist-info/RECORD +0 -30
@@ -0,0 +1,334 @@
1
+ import logging
2
+ from typing import Callable
3
+
4
+ import numpy as np
5
+ from numpy import asarray, ascontiguousarray, floating, prod
6
+ from numpy import empty as np_empty
7
+ from numpy import float64 as np_float64
8
+ from numpy.typing import NDArray
9
+ from scipy.fft import fft, fftfreq
10
+ from scipy.interpolate import BSpline, _fitpack_impl, make_smoothing_spline
11
+ from tqdm.auto import tqdm
12
+
13
+ from .smoothen import choose_grid_size, count_data_in_intervals, smoothen_data
14
+
15
+ try:
16
+ from scipy.interpolate._dierckx import evaluate_spline
17
+ except ImportError:
18
+ from scipy.interpolate._bspl import evaluate_spline
19
+
20
+
21
+ logger = logging.getLogger(__name__)
22
+ PIX2 = 2 * np.pi
23
+
24
+
25
+ def fit_smoothing_spline(
26
+ x: NDArray[floating],
27
+ y: NDArray[floating],
28
+ t_range: tuple[float, float],
29
+ w: NDArray[floating] | None = None,
30
+ lam: float | None = None,
31
+ periodic: bool = False,
32
+ n_reps: int = 3,
33
+ ) -> BSpline:
34
+ if periodic:
35
+ assert n_reps % 2 == 1
36
+
37
+ o = np.argsort(x)
38
+ x, y = x[o], y[o]
39
+ if w is not None:
40
+ w = w[o]
41
+
42
+ tmin, tmax = t_range
43
+ tspan = tmax - tmin
44
+
45
+ if periodic:
46
+ mask = np.logical_and((x >= tmin), (x < tmax))
47
+ else:
48
+ mask = np.logical_and((x >= tmin), (x <= tmax))
49
+
50
+ x, y = x[mask], y[mask]
51
+ if w is not None:
52
+ w = w[mask]
53
+ n = x.size
54
+
55
+ if periodic:
56
+ xx = np.concatenate([x + i * tspan for i in range(n_reps)])
57
+ yy = np.tile(y, n_reps)
58
+ ww = np.tile(w, n_reps) if w is not None else None
59
+ else:
60
+ xx = x
61
+ yy = y
62
+ ww = w
63
+
64
+ bspl = make_smoothing_spline(xx, yy, ww, lam)
65
+ t, c, k = bspl.tck
66
+
67
+ if periodic:
68
+ N = n_reps // 2
69
+ t = t - tspan * N
70
+ t = t[n * N : -n * N + 1]
71
+ c = c[n * N : -n * N + 1]
72
+
73
+ return BSpline(t, c, k)
74
+
75
+
76
+ class NDFourier:
77
+ def __init__(
78
+ self,
79
+ xh: NDArray[floating] | None = None,
80
+ freq: NDArray[floating] | None = None,
81
+ t_range: tuple[float, float] | None = None,
82
+ grid_size: int | None = None,
83
+ periodic: bool = True,
84
+ largest_harmonic: int = 5,
85
+ d: int = 0,
86
+ zero_weight: float = 1.0,
87
+ smoothing_fn: Callable = np.average,
88
+ ) -> None:
89
+ assert periodic
90
+ assert t_range is not None
91
+ assert t_range[0] == 0
92
+
93
+ self.tmin, self.tmax = self.t_range = t_range
94
+ self.tscale = PIX2 / self.tmax
95
+
96
+ if xh is not None:
97
+ assert freq is not None
98
+ self.n = grid_size + 1
99
+ self.xh = xh.reshape((xh.shape[0], -1, 1)).copy()
100
+ self.freq = freq.reshape((freq.shape[0], -1, 1)).copy()
101
+ self.scaled_freq = 1j * self.freq * self.tscale
102
+
103
+ self.grid_size = grid_size
104
+ self.periodic = periodic
105
+ self.largest_harmonic = largest_harmonic
106
+ self.d = d
107
+ self.zero_weight = zero_weight
108
+ self.smoothing_fn = smoothing_fn
109
+
110
+ def fit(
111
+ self,
112
+ t: NDArray[floating],
113
+ X: NDArray[floating],
114
+ ) -> "NDFourier":
115
+ if self.grid_size is None:
116
+ self.grid_size = choose_grid_size(t, self.t_range)
117
+
118
+ t_grid = np.linspace(*self.t_range, self.grid_size + 1)
119
+ self.X_smooth = smoothen_data(
120
+ t,
121
+ X,
122
+ t_range=self.t_range,
123
+ t_grid=t_grid,
124
+ periodic=self.periodic,
125
+ zero_weight=self.zero_weight,
126
+ fn=self.smoothing_fn,
127
+ )
128
+
129
+ self.n = n = self.X_smooth.shape[0]
130
+ self.X_smooth = self.X_smooth.reshape((n, -1))
131
+
132
+ xh: NDArray[floating] = fft(self.X_smooth, axis=0)
133
+ freq: NDArray[floating] = fftfreq(n, d=1 / n)
134
+
135
+ mask = np.abs(freq) <= self.largest_harmonic
136
+ xh = xh[mask]
137
+ freq = freq[mask]
138
+
139
+ self.xh = xh.reshape((xh.shape[0], -1, 1))
140
+ self.freq = freq.reshape((freq.shape[0], -1, 1))
141
+ self.scaled_freq = 1j * self.freq * self.tscale
142
+
143
+ return self
144
+
145
+ def derivative(self, d=1) -> "NDFourier":
146
+ return NDFourier(
147
+ self.xh,
148
+ self.freq,
149
+ self.t_range,
150
+ self.grid_size,
151
+ self.periodic,
152
+ self.largest_harmonic,
153
+ d + self.d,
154
+ )
155
+
156
+ def __getitem__(self, key) -> "NDFourier":
157
+ return NDFourier(
158
+ self.xh[:, key],
159
+ self.freq,
160
+ self.t_range,
161
+ self.grid_size,
162
+ self.periodic,
163
+ self.largest_harmonic,
164
+ self.d,
165
+ )
166
+
167
+ def __call__(self, x: NDArray[floating], d=0) -> NDArray[floating]:
168
+ x = asarray(x)
169
+ x_shape = x.shape
170
+
171
+ x = ascontiguousarray(x.ravel(), dtype=np_float64)
172
+
173
+ d = d + self.d
174
+ out: NDArray[floating] = np.real(
175
+ (self.xh * self.scaled_freq**d * np.exp(self.scaled_freq * x)).sum(axis=0)
176
+ / self.n
177
+ )
178
+ out = out.T
179
+ out = out.reshape(x_shape + (self.xh.shape[1],))
180
+
181
+ return out
182
+
183
+
184
+ class NDBSpline:
185
+ def __init__(
186
+ self,
187
+ t: NDArray[floating] | None = None,
188
+ C: NDArray[floating] | None = None,
189
+ k: int | None = None,
190
+ t_range: tuple[float, float] | None = None,
191
+ grid_size: int | None = None,
192
+ periodic: bool = False,
193
+ roughness: float | None = None,
194
+ zero_weight: float = 1.0,
195
+ window_width: float | None = None,
196
+ use_grid: bool = True,
197
+ weight_grid: bool = False,
198
+ smoothing_fn: Callable = np.average,
199
+ ) -> None:
200
+ if periodic:
201
+ assert t_range is not None
202
+ assert t_range[0] == 0
203
+
204
+ if t is not None or C is not None or k is not None:
205
+ assert t is not None
206
+ assert C is not None
207
+ assert k is not None
208
+ self.t = t.copy()
209
+ self.C = C.reshape((C.shape[0], -1)).copy()
210
+ self.k = k
211
+
212
+ if t_range is not None:
213
+ self.tmin, self.tmax = self.t_range = t_range
214
+
215
+ self.grid_size = grid_size
216
+ self.periodic = periodic
217
+ self.window_width = window_width
218
+ self.use_grid = use_grid
219
+ self.weight_grid = weight_grid
220
+ self.roughness = roughness
221
+ self.zero_weight = zero_weight
222
+ self.smoothing_fn = smoothing_fn
223
+
224
+ def fit(
225
+ self,
226
+ t: NDArray[floating],
227
+ X: NDArray[floating],
228
+ progress: bool = False,
229
+ ) -> "NDBSpline":
230
+ X = X.reshape((X.shape[0], -1))
231
+ if self.t_range is None:
232
+ self.tmin, self.tmax = self.t_range = t.min(), t.max()
233
+
234
+ if self.grid_size is None:
235
+ self.grid_size = choose_grid_size(t, self.t_range)
236
+
237
+ if self.roughness is None:
238
+ self.roughness = 1
239
+
240
+ if self.use_grid:
241
+ t_grid: NDArray[floating] = np.linspace(*self.t_range, self.grid_size + 1)
242
+ self.lam = 1 / self.grid_size / 10**self.roughness
243
+ else:
244
+ t_grid = None
245
+ self.lam = 1 / 10**self.roughness
246
+ self.X_smooth = smoothen_data(
247
+ t,
248
+ X,
249
+ t_range=self.t_range,
250
+ t_grid=t_grid,
251
+ periodic=self.periodic,
252
+ window_width=self.window_width,
253
+ zero_weight=self.zero_weight,
254
+ progress=progress,
255
+ fn=self.smoothing_fn,
256
+ )
257
+
258
+ if t_grid is not None and self.weight_grid:
259
+ w = np.zeros(self.X_smooth.shape[0], dtype=float)
260
+ n = count_data_in_intervals(t, t_grid) + 1
261
+ if self.periodic:
262
+ n = np.append(n, n[0])
263
+ else:
264
+ n = np.append(n, n[-1])
265
+ w[n > 1] = 1 / np.log1p(n[n > 1])
266
+ else:
267
+ w = None
268
+
269
+ iterator = self.X_smooth.T
270
+ if progress:
271
+ iterator = tqdm(
272
+ iterator,
273
+ bar_format="{desc} {percentage:3.0f}%|{bar}|",
274
+ desc="Fitting bsplines",
275
+ )
276
+
277
+ fit_t_range = (0, 1)
278
+ fit_t_grid = np.linspace(0, 1, self.grid_size + 1)
279
+ fit_t = (t - self.tmin) / (self.tmax - self.tmin)
280
+ C = []
281
+ for x in iterator:
282
+ f = fit_smoothing_spline(
283
+ fit_t_grid if self.use_grid else fit_t,
284
+ x,
285
+ t_range=fit_t_range,
286
+ w=w,
287
+ lam=self.lam,
288
+ periodic=self.periodic,
289
+ )
290
+
291
+ C.append(f.c)
292
+
293
+ self.t = f.t.copy()
294
+ self.t *= self.tmax - self.tmin
295
+ self.t += self.tmin
296
+ self.C = np.array(C).T.copy()
297
+ self.k = 3
298
+
299
+ return self
300
+
301
+ def derivative(self, d: int = 1) -> "NDBSpline":
302
+ # pad the c array if needed
303
+ ct = len(self.t) - len(self.C)
304
+ if ct > 0:
305
+ self.C = np.r_[self.C, np.zeros((ct, self.C.shape[1]))]
306
+ t, C, k = _fitpack_impl.splder((self.t, self.C, self.k), d)
307
+ return NDBSpline(t, C, k, self.t_range, self.grid_size, self.periodic)
308
+
309
+ def __getitem__(self, key) -> "NDBSpline":
310
+ t = self.t
311
+ C = self.C[:, key]
312
+ k = self.k
313
+ return NDBSpline(t, C, k, self.t_range, self.grid_size, self.periodic)
314
+
315
+ def __call__(self, x: NDArray[floating], d: int = 0) -> NDArray[floating]:
316
+ x = asarray(x)
317
+ x_shape = x.shape
318
+
319
+ x = ascontiguousarray(x.ravel(), dtype=np_float64)
320
+ if self.periodic:
321
+ n = self.t.size - self.k - 1
322
+ x = self.t[self.k] + (x - self.t[self.k]) % (self.t[n] - self.t[self.k])
323
+
324
+ out = np_empty((len(x), prod(self.C.shape[1:])), dtype=self.C.dtype)
325
+
326
+ if not self.t.flags.c_contiguous:
327
+ self.t = self.t.copy()
328
+ if not self.C.flags.c_contiguous:
329
+ self.C = self.C.copy()
330
+
331
+ evaluate_spline(self.t, self.C, self.k, x, d, False, out)
332
+ out = out.reshape(x_shape + (self.C.shape[1],))
333
+
334
+ return out
@@ -0,0 +1,106 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from anndata import AnnData
4
+ from numpy.typing import NDArray
5
+ from scipy.signal import get_window, periodogram
6
+ from scipy.sparse import spmatrix
7
+
8
+ from sclab.tools.utils import aggregate_and_filter
9
+
10
+
11
+ def periodic_genes(
12
+ adata: AnnData,
13
+ time_key: str,
14
+ tmin: float,
15
+ tmax: float,
16
+ period: float,
17
+ n: int,
18
+ min_pct_power_below: float = 0.75,
19
+ layer: str | None = None,
20
+ ):
21
+ times = adata.obs[time_key].values.copy()
22
+ if layer is None or layer == "X":
23
+ X = adata.X
24
+ else:
25
+ X = adata.layers[layer]
26
+
27
+ _assert_integer_counts(X)
28
+
29
+ tmp_adata = AnnData(X, obs=adata.obs[[time_key]], var=adata.var[[]])
30
+
31
+ w = (tmax - tmin) / n
32
+ bins = np.arange(-w / 2 + tmin, tmax, w)
33
+ labels = list(map(lambda x: f"{x:.2f}", bins[:-1] + w / 2))
34
+
35
+ times[times >= bins.max()] = times[times >= bins.max()] - tmax
36
+ tmp_adata.obs["timepoint"] = pd.cut(times, bins=bins, labels=labels)
37
+ aggregated = aggregate_and_filter(
38
+ tmp_adata,
39
+ "timepoint",
40
+ replicas_per_group=1,
41
+ make_stats=False,
42
+ make_dummies=False,
43
+ )
44
+ log_cnts = np.log1p(aggregated.X)
45
+ profiles = pd.DataFrame(log_cnts, index=labels, columns=aggregated.var_names)
46
+ ps = power_spectrum_df(profiles)
47
+ pp = pct_power_below(ps, 1 / period)
48
+
49
+ adata.varm["profile"] = profiles.T
50
+ adata.varm["periodogram"] = ps.T
51
+ adata.var["pct_power_below"] = pp
52
+ adata.var["periodic"] = pp > min_pct_power_below
53
+
54
+
55
+ def _assert_integer_counts(X: spmatrix | NDArray):
56
+ message = "Periodic genes requires raw integer counts. E.g. `layer = 'counts'`."
57
+ if isinstance(X, spmatrix):
58
+ assert all(X.data % 1 == 0), message
59
+ else:
60
+ assert all(X % 1 == 0), message
61
+
62
+
63
+ def infer_dt_from_index(idx: pd.Index) -> float:
64
+ # Works for numeric or datetime indexes
65
+ if isinstance(idx, pd.DatetimeIndex):
66
+ dt = np.median(np.diff(idx.view("i8"))) / 1e9 # seconds
67
+ else:
68
+ dt = float(np.median(np.diff(idx.values.astype(float))))
69
+ return dt
70
+
71
+
72
+ def power_spectrum_df(X: pd.DataFrame, window: str = "hann", detrend: str = "constant"):
73
+ # X: rows=timepoints, columns=variables
74
+ Xd = X - X.mean() # remove DC so percent computations are stable
75
+ dt = infer_dt_from_index(X.index) if X.index.size > 1 else 1.0
76
+ fs = 1.0 / dt
77
+ win = get_window(window, X.shape[0], fftbins=True)
78
+
79
+ # Build a tidy dataframe of periodograms for all columns
80
+ out = {}
81
+ for c in Xd.columns:
82
+ f, Pxx = periodogram(
83
+ Xd[c].values,
84
+ fs=fs,
85
+ window=win,
86
+ detrend=detrend,
87
+ scaling="spectrum", # integrates to variance
88
+ return_onesided=True,
89
+ )
90
+ out[c] = Pxx
91
+ ps = pd.DataFrame(out, index=pd.Index(f, name="frequency"))
92
+ return ps # units: (data units)^2, integrates (sum * df) to variance per column
93
+
94
+
95
+ def pct_power_below(ps: pd.DataFrame, max_freq: float) -> pd.Series:
96
+ # ps is spectrum from power_spectrum_df (one-sided, DC included but we demeaned)
97
+ # Compute integrals via the rectangle rule: sum * df (df = freq spacing)
98
+ if len(ps.index) < 2:
99
+ return pd.Series({c: np.nan for c in ps.columns}, name="pct_power_at_low_freq")
100
+ df = ps.index[1] - ps.index[0]
101
+ mask_low = ps.index <= max_freq
102
+ num: pd.Series = ps.loc[mask_low].sum() * df
103
+ den: pd.Series = ps.sum() * df
104
+ s = num / den
105
+ s.name = "pct_power_at_low_freq"
106
+ return s
@@ -0,0 +1,124 @@
1
+ import logging
2
+ from typing import Callable
3
+
4
+ import numpy as np
5
+ from numpy import bool_, floating, integer
6
+ from numpy.typing import NDArray
7
+ from tqdm.auto import tqdm
8
+
9
+ logger = logging.getLogger(__name__)
10
+ PIX2 = 2 * np.pi
11
+
12
+
13
+ def count_empty_intervals(t: NDArray[floating], t_grid: NDArray[floating]) -> int:
14
+ n_data_in_intervals = count_data_in_intervals(t, t_grid)
15
+ empty_intervals_count = np.sum(n_data_in_intervals == 0)
16
+ return empty_intervals_count
17
+
18
+
19
+ def count_data_in_intervals(
20
+ t: NDArray[floating], t_grid: NDArray[floating]
21
+ ) -> NDArray[integer]:
22
+ t = t.reshape(-1, 1)
23
+ return np.logical_and(t_grid[:-1] <= t, t <= t_grid[1:]).sum(axis=0)
24
+
25
+
26
+ def choose_grid_size(t: NDArray[floating], t_range: tuple[float, float]) -> int:
27
+ grid_size = 2**10
28
+ for _ in range(10):
29
+ t_grid: NDArray[floating] = np.linspace(*t_range, grid_size + 1)
30
+ empty_intervals = count_empty_intervals(t, t_grid)
31
+ if empty_intervals == 0:
32
+ break
33
+ grid_size //= 2
34
+ else:
35
+ raise ValueError("Could not find a suitable grid size")
36
+
37
+ return grid_size
38
+
39
+
40
+ def smoothen_data(
41
+ t: NDArray[floating],
42
+ X: NDArray[floating],
43
+ t_range: tuple[float, float] | None = None,
44
+ t_grid: NDArray[floating] | None = None,
45
+ fn: Callable[[NDArray[floating]], NDArray[floating]] = np.average,
46
+ window_width: float | None = None,
47
+ weights: NDArray[floating] | None = None,
48
+ zero_weight: float = 1,
49
+ periodic: bool = False,
50
+ quiet: bool = False,
51
+ progress: bool = False,
52
+ ) -> NDArray[floating]:
53
+ if t_grid is None:
54
+ # no grid provided. We will have one output point for each input point
55
+ t_grid = t
56
+ is_grid = False
57
+ else:
58
+ # grid is provided
59
+ is_grid = True
60
+ empty_intervals = count_empty_intervals(t, t_grid)
61
+ if empty_intervals > 0 and not quiet:
62
+ logger.warning(f"Provided grid has {empty_intervals} empty intervals")
63
+
64
+ if t_range is not None:
65
+ # we used a specific t values range
66
+ tmin, tmax = t_range
67
+ else:
68
+ tmin, tmax = t_grid.min(), t_grid.max()
69
+
70
+ # full time window size
71
+ tspan = tmax - tmin
72
+
73
+ if window_width is None and not is_grid:
74
+ window_width = tspan * 0.05
75
+ elif window_width is None and is_grid:
76
+ window_width = tspan / (t_grid.size - 1) * 2
77
+
78
+ # initialize the output matrix with NaNs
79
+ X_smooth: NDArray[floating] = np.full((t_grid.size,) + X.shape[1:], np.nan)
80
+
81
+ generator = enumerate(t_grid)
82
+ if progress:
83
+ generator = tqdm(
84
+ generator,
85
+ total=t_grid.size,
86
+ bar_format="{desc} {percentage:3.0f}%|{bar}|",
87
+ desc="Smoothing data",
88
+ )
89
+
90
+ X = X.astype(float)
91
+ eps = np.finfo(float).eps
92
+ for i, m in generator:
93
+ low = m - window_width / 2
94
+ hig = m + window_width / 2
95
+
96
+ mask: NDArray[bool_] = (t >= low) & (t <= hig)
97
+ if periodic:
98
+ # include points beyond the periodic boundaries
99
+ mask = (
100
+ mask
101
+ | (t >= low + tspan) & (t <= hig + tspan)
102
+ | (t >= low - tspan) & (t <= hig - tspan)
103
+ )
104
+
105
+ if mask.sum() == 0:
106
+ continue
107
+
108
+ x = X[mask] + eps
109
+ if fn == np.average and weights is not None:
110
+ w = weights[mask]
111
+ X_smooth[i] = np.average(x, axis=0, weights=w)
112
+
113
+ elif fn == np.average and zero_weight == 1:
114
+ X_smooth[i] = np.mean(x, axis=0)
115
+
116
+ elif fn == np.average and zero_weight != 1:
117
+ w = np.ones_like(x)
118
+ w[x == eps] = zero_weight + eps
119
+ X_smooth[i] = fn(x, axis=0, weights=w)
120
+
121
+ else:
122
+ X_smooth[i] = fn(x, axis=0)
123
+
124
+ return X_smooth - eps
@@ -0,0 +1,55 @@
1
+ import itertools
2
+
3
+ import numpy as np
4
+ from numpy import floating
5
+ from numpy.typing import NDArray
6
+
7
+
8
+ def guess_trange(
9
+ times: NDArray[floating], verbose: bool = False
10
+ ) -> tuple[float, float]:
11
+ tmin, tmax = times.min(), times.max()
12
+ tspan = tmax - tmin
13
+
14
+ scale = 10.0 ** np.ceil(np.log10(tspan)) / 100
15
+ tspan = np.ceil(tspan / scale) * scale
16
+
17
+ scale = 10.0 ** np.ceil(np.log10(tspan)) / 100
18
+ g_tmin = np.floor(tmin / scale) * scale
19
+ g_tmax = np.ceil(tmax / scale) * scale
20
+
21
+ g_tmin = 0.0 if g_tmin == -0.0 else g_tmin
22
+ g_tmax = 0.0 if g_tmax == -0.0 else g_tmax
23
+
24
+ if verbose:
25
+ print(
26
+ f"tspan: {tspan:10.4f} min-max: {tmin:10.4f} - {tmax:10.4f} | {g_tmin:>8} - {g_tmax:>8}"
27
+ )
28
+
29
+ return g_tmin, g_tmax
30
+
31
+
32
+ def test_guess_trange(N: int = 1000, verbose: bool = False) -> None:
33
+ def _test1(trange: tuple[float, float]) -> bool:
34
+ tmin, tmax = trange
35
+ tspan = tmax - tmin
36
+ g_tmin, g_tmax = guess_trange(np.random.uniform(*trange, N))
37
+ err_min = np.abs(g_tmin - tmin) / tspan
38
+ err_max = np.abs(g_tmax - tmax) / tspan
39
+ return err_min <= 0.01 and err_max <= 0.01
40
+
41
+ scales1 = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
42
+ scales2 = [1, 2, 3, 5, 7]
43
+ for s1, s2 in itertools.product(scales1, scales2):
44
+ scale = s1 * s2
45
+ for lw, hg in [(-2, -1), (-1 / 2, 1 / 2), (1, 2)]:
46
+ trange = lw * scale, hg * scale
47
+ acc1 = np.mean([_test1(trange) for _ in range(500)])
48
+ if verbose:
49
+ print(
50
+ f"scale: {scale: 9.3f} | lw-hg: {lw: 5.1f} - {hg: 5.1f} | {acc1: 8.2%}"
51
+ )
52
+ else:
53
+ assert acc1 > 0.95, (
54
+ f"scale: {scale: 9.3f} | lw-hg: {lw: 5.1f} - {hg: 5.1f} | {acc1: 8.2%}"
55
+ )
@@ -0,0 +1,7 @@
1
+ from ._pseudobulk_edger import pseudobulk_edger
2
+ from ._pseudobulk_limma import pseudobulk_limma
3
+
4
+ __all__ = [
5
+ "pseudobulk_edger",
6
+ "pseudobulk_limma",
7
+ ]