sclab 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sclab/__init__.py +1 -1
- sclab/examples/processor_steps/_integration.py +4 -1
- sclab/examples/processor_steps/_preprocess.py +34 -18
- sclab/preprocess/__init__.py +8 -1
- sclab/preprocess/_cca.py +26 -4
- sclab/preprocess/_cca_integrate.py +4 -4
- sclab/preprocess/_normalize_weighted.py +5 -1
- sclab/preprocess/_pca.py +51 -0
- sclab/preprocess/_preprocess.py +155 -0
- sclab/preprocess/_qc.py +38 -0
- sclab/preprocess/_rpca.py +116 -0
- sclab/preprocess/_transfer_metadata.py +90 -31
- sclab/tools/cellflow/pseudotime/_pseudotime.py +5 -1
- sclab/tools/cellflow/utils/periodic_genes.py +106 -0
- sclab/tools/differential_expression/__init__.py +2 -0
- sclab/tools/differential_expression/_pseudobulk_edger.py +28 -23
- sclab/tools/differential_expression/_pseudobulk_helpers.py +15 -2
- sclab/tools/differential_expression/_pseudobulk_limma.py +257 -0
- sclab/tools/embedding/__init__.py +0 -0
- sclab/tools/imputation/__init__.py +0 -0
- sclab/tools/imputation/_alra.py +135 -0
- sclab/tools/utils/__init__.py +5 -0
- sclab/tools/utils/_aggregate_and_filter.py +290 -0
- {sclab-0.3.2.dist-info → sclab-0.3.4.dist-info}/METADATA +6 -5
- {sclab-0.3.2.dist-info → sclab-0.3.4.dist-info}/RECORD +27 -16
- {sclab-0.3.2.dist-info → sclab-0.3.4.dist-info}/WHEEL +0 -0
- {sclab-0.3.2.dist-info → sclab-0.3.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from collections import Counter
|
|
2
2
|
from functools import partial
|
|
3
|
-
from typing import Literal
|
|
3
|
+
from typing import Callable, Literal
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
@@ -23,45 +23,79 @@ def transfer_metadata(
|
|
|
23
23
|
min_neighs: int = 5,
|
|
24
24
|
weight_by: Literal["connectivity", "distance", "constant"] = "connectivity",
|
|
25
25
|
):
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
26
|
+
new_values, new_values_err = _propagate_metadata(
|
|
27
|
+
adata,
|
|
28
|
+
column=column,
|
|
29
|
+
periodic=periodic,
|
|
30
|
+
vmin=vmin,
|
|
31
|
+
vmax=vmax,
|
|
32
|
+
min_neighs=min_neighs,
|
|
33
|
+
weight_by=weight_by,
|
|
34
|
+
mask=adata.obs[group_key] != source_group,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
adata.obs[f"transferred_{new_values.name}"] = new_values
|
|
38
|
+
adata.obs[f"transferred_{new_values_err.name}"] = new_values_err
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def propagate_metadata(
|
|
42
|
+
adata: AnnData,
|
|
43
|
+
column: str,
|
|
44
|
+
periodic: bool = False,
|
|
45
|
+
vmin: float = 0,
|
|
46
|
+
vmax: float = 1,
|
|
47
|
+
min_neighs: int = 5,
|
|
48
|
+
weight_by: Literal["connectivity", "distance", "constant"] = "connectivity",
|
|
49
|
+
):
|
|
50
|
+
new_values, new_values_err = _propagate_metadata(
|
|
51
|
+
adata,
|
|
52
|
+
column=column,
|
|
53
|
+
periodic=periodic,
|
|
54
|
+
vmin=vmin,
|
|
55
|
+
vmax=vmax,
|
|
56
|
+
min_neighs=min_neighs,
|
|
57
|
+
weight_by=weight_by,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
mask = adata.obs[column].isna()
|
|
61
|
+
adata.obs.loc[mask, column] = new_values.loc[mask]
|
|
62
|
+
adata.obs.loc[mask, new_values_err.name] = new_values_err.loc[mask]
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _propagate_metadata(
|
|
66
|
+
adata: AnnData,
|
|
67
|
+
column: str,
|
|
68
|
+
periodic: bool = False,
|
|
69
|
+
vmin: float = 0,
|
|
70
|
+
vmax: float = 1,
|
|
71
|
+
min_neighs: int = 5,
|
|
72
|
+
weight_by: Literal["connectivity", "distance", "constant"] = "connectivity",
|
|
73
|
+
mask: np.ndarray | pd.Series | None = None,
|
|
74
|
+
) -> tuple[pd.Series, pd.Series]:
|
|
75
|
+
D, W = _get_neighbors_and_weights(adata, weight_by=weight_by)
|
|
44
76
|
|
|
77
|
+
assign_value_fn: Callable
|
|
45
78
|
series = adata.obs[column]
|
|
46
79
|
if isinstance(series.dtype, pd.CategoricalDtype) or is_bool_dtype(series.dtype):
|
|
47
80
|
assign_value_fn = _assign_categorical
|
|
48
|
-
new_column = f"transferred_{column}"
|
|
49
|
-
new_column_err = f"transferred_{column}_proportion"
|
|
50
81
|
elif is_numeric_dtype(series.dtype) and periodic:
|
|
51
82
|
assign_value_fn = partial(_assign_numerical_periodic, vmin=vmin, vmax=vmax)
|
|
52
|
-
new_column = f"transferred_{column}"
|
|
53
|
-
new_column_err = f"transferred_{column}_error"
|
|
54
83
|
elif is_numeric_dtype(series.dtype):
|
|
55
84
|
assign_value_fn = _assign_numerical
|
|
56
|
-
new_column = f"transferred_{column}"
|
|
57
|
-
new_column_err = f"transferred_{column}_error"
|
|
58
85
|
else:
|
|
59
86
|
raise ValueError(f"Unsupported dtype {series.dtype} for column {column}")
|
|
60
87
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
88
|
+
if isinstance(series.dtype, pd.CategoricalDtype) or is_bool_dtype(series.dtype):
|
|
89
|
+
column_err = f"{column}_proportion"
|
|
90
|
+
else:
|
|
91
|
+
column_err = f"{column}_error"
|
|
92
|
+
|
|
93
|
+
meta_values: pd.Series = series.copy()
|
|
94
|
+
if mask is not None:
|
|
95
|
+
meta_values[mask] = pd.NA
|
|
96
|
+
|
|
97
|
+
new_values = pd.Series(index=series.index, dtype=series.dtype, name=column)
|
|
98
|
+
new_values_err = pd.Series(index=series.index, dtype=float, name=column_err)
|
|
65
99
|
|
|
66
100
|
for i, (d, w) in tqdm(enumerate(zip(D, W)), total=D.shape[0]):
|
|
67
101
|
if not pd.isna(meta_values.iloc[i]):
|
|
@@ -86,8 +120,33 @@ def transfer_metadata(
|
|
|
86
120
|
new_values.iloc[i] = assigned_value
|
|
87
121
|
new_values_err.iloc[i] = assigned_value_err
|
|
88
122
|
|
|
89
|
-
|
|
90
|
-
|
|
123
|
+
new_values = pd.concat([new_values, meta_values], axis=1).bfill(axis=1).iloc[:, 0]
|
|
124
|
+
|
|
125
|
+
return new_values, new_values_err
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _get_neighbors_and_weights(
|
|
129
|
+
adata: AnnData,
|
|
130
|
+
weight_by: Literal["connectivity", "distance", "constant"] = "connectivity",
|
|
131
|
+
):
|
|
132
|
+
D: csr_matrix = adata.obsp["distances"].copy()
|
|
133
|
+
C: csr_matrix = adata.obsp["connectivities"].copy()
|
|
134
|
+
D = D.tocsr()
|
|
135
|
+
W: csr_matrix
|
|
136
|
+
|
|
137
|
+
match weight_by:
|
|
138
|
+
case "connectivity":
|
|
139
|
+
W = C.tocsr().copy()
|
|
140
|
+
case "distance":
|
|
141
|
+
W = D.tocsr().copy()
|
|
142
|
+
W.data = 1.0 / W.data
|
|
143
|
+
case "constant":
|
|
144
|
+
W = D.tocsr().copy()
|
|
145
|
+
W.data[:] = 1.0
|
|
146
|
+
case _:
|
|
147
|
+
raise ValueError(f"Unsupported weight_by {weight_by}")
|
|
148
|
+
|
|
149
|
+
return D, W
|
|
91
150
|
|
|
92
151
|
|
|
93
152
|
def _assign_categorical(values: pd.Series, weights: NDArray):
|
|
@@ -280,6 +280,7 @@ def estimate_periodic_pseudotime_start(
|
|
|
280
280
|
time_key: str = "pseudotime",
|
|
281
281
|
bandwidth: float = 1 / 64,
|
|
282
282
|
show_plot: bool = False,
|
|
283
|
+
nth_root: int = 1,
|
|
283
284
|
):
|
|
284
285
|
# TODO: Test implementation
|
|
285
286
|
pseudotime = adata.obs[time_key].values.copy()
|
|
@@ -316,7 +317,10 @@ def estimate_periodic_pseudotime_start(
|
|
|
316
317
|
roots = (x[idx] + x[1:][idx]) / 2
|
|
317
318
|
heights = yp[idx]
|
|
318
319
|
|
|
319
|
-
|
|
320
|
+
roots = roots[heights.argsort()]
|
|
321
|
+
heights = heights[heights.argsort()]
|
|
322
|
+
|
|
323
|
+
max_peak_x = roots[nth_root - 1]
|
|
320
324
|
|
|
321
325
|
if show_plot:
|
|
322
326
|
plt.hist(
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from anndata import AnnData
|
|
4
|
+
from numpy.typing import NDArray
|
|
5
|
+
from scipy.signal import get_window, periodogram
|
|
6
|
+
from scipy.sparse import spmatrix
|
|
7
|
+
|
|
8
|
+
from sclab.tools.utils import aggregate_and_filter
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def periodic_genes(
|
|
12
|
+
adata: AnnData,
|
|
13
|
+
time_key: str,
|
|
14
|
+
tmin: float,
|
|
15
|
+
tmax: float,
|
|
16
|
+
period: float,
|
|
17
|
+
n: int,
|
|
18
|
+
min_pct_power_below: float = 0.75,
|
|
19
|
+
layer: str | None = None,
|
|
20
|
+
):
|
|
21
|
+
times = adata.obs[time_key].values.copy()
|
|
22
|
+
if layer is None or layer == "X":
|
|
23
|
+
X = adata.X
|
|
24
|
+
else:
|
|
25
|
+
X = adata.layers[layer]
|
|
26
|
+
|
|
27
|
+
_assert_integer_counts(X)
|
|
28
|
+
|
|
29
|
+
tmp_adata = AnnData(X, obs=adata.obs[[time_key]], var=adata.var[[]])
|
|
30
|
+
|
|
31
|
+
w = (tmax - tmin) / n
|
|
32
|
+
bins = np.arange(-w / 2 + tmin, tmax, w)
|
|
33
|
+
labels = list(map(lambda x: f"{x:.2f}", bins[:-1] + w / 2))
|
|
34
|
+
|
|
35
|
+
times[times >= bins.max()] = times[times >= bins.max()] - tmax
|
|
36
|
+
tmp_adata.obs["timepoint"] = pd.cut(times, bins=bins, labels=labels)
|
|
37
|
+
aggregated = aggregate_and_filter(
|
|
38
|
+
tmp_adata,
|
|
39
|
+
"timepoint",
|
|
40
|
+
replicas_per_group=1,
|
|
41
|
+
make_stats=False,
|
|
42
|
+
make_dummies=False,
|
|
43
|
+
)
|
|
44
|
+
log_cnts = np.log1p(aggregated.X)
|
|
45
|
+
profiles = pd.DataFrame(log_cnts, index=labels, columns=aggregated.var_names)
|
|
46
|
+
ps = power_spectrum_df(profiles)
|
|
47
|
+
pp = pct_power_below(ps, 1 / period)
|
|
48
|
+
|
|
49
|
+
adata.varm["profile"] = profiles.T
|
|
50
|
+
adata.varm["periodogram"] = ps.T
|
|
51
|
+
adata.var["pct_power_below"] = pp
|
|
52
|
+
adata.var["periodic"] = pp > min_pct_power_below
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _assert_integer_counts(X: spmatrix | NDArray):
|
|
56
|
+
message = "Periodic genes requires raw integer counts. E.g. `layer = 'counts'`."
|
|
57
|
+
if isinstance(X, spmatrix):
|
|
58
|
+
assert all(X.data % 1 == 0), message
|
|
59
|
+
else:
|
|
60
|
+
assert all(X % 1 == 0), message
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def infer_dt_from_index(idx: pd.Index) -> float:
|
|
64
|
+
# Works for numeric or datetime indexes
|
|
65
|
+
if isinstance(idx, pd.DatetimeIndex):
|
|
66
|
+
dt = np.median(np.diff(idx.view("i8"))) / 1e9 # seconds
|
|
67
|
+
else:
|
|
68
|
+
dt = float(np.median(np.diff(idx.values.astype(float))))
|
|
69
|
+
return dt
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def power_spectrum_df(X: pd.DataFrame, window: str = "hann", detrend: str = "constant"):
|
|
73
|
+
# X: rows=timepoints, columns=variables
|
|
74
|
+
Xd = X - X.mean() # remove DC so percent computations are stable
|
|
75
|
+
dt = infer_dt_from_index(X.index) if X.index.size > 1 else 1.0
|
|
76
|
+
fs = 1.0 / dt
|
|
77
|
+
win = get_window(window, X.shape[0], fftbins=True)
|
|
78
|
+
|
|
79
|
+
# Build a tidy dataframe of periodograms for all columns
|
|
80
|
+
out = {}
|
|
81
|
+
for c in Xd.columns:
|
|
82
|
+
f, Pxx = periodogram(
|
|
83
|
+
Xd[c].values,
|
|
84
|
+
fs=fs,
|
|
85
|
+
window=win,
|
|
86
|
+
detrend=detrend,
|
|
87
|
+
scaling="spectrum", # integrates to variance
|
|
88
|
+
return_onesided=True,
|
|
89
|
+
)
|
|
90
|
+
out[c] = Pxx
|
|
91
|
+
ps = pd.DataFrame(out, index=pd.Index(f, name="frequency"))
|
|
92
|
+
return ps # units: (data units)^2, integrates (sum * df) to variance per column
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def pct_power_below(ps: pd.DataFrame, max_freq: float) -> pd.Series:
|
|
96
|
+
# ps is spectrum from power_spectrum_df (one-sided, DC included but we demeaned)
|
|
97
|
+
# Compute integrals via the rectangle rule: sum * df (df = freq spacing)
|
|
98
|
+
if len(ps.index) < 2:
|
|
99
|
+
return pd.Series({c: np.nan for c in ps.columns}, name="pct_power_at_low_freq")
|
|
100
|
+
df = ps.index[1] - ps.index[0]
|
|
101
|
+
mask_low = ps.index <= max_freq
|
|
102
|
+
num: pd.Series = ps.loc[mask_low].sum() * df
|
|
103
|
+
den: pd.Series = ps.sum() * df
|
|
104
|
+
s = num / den
|
|
105
|
+
s.name = "pct_power_at_low_freq"
|
|
106
|
+
return s
|
|
@@ -12,9 +12,9 @@ def pseudobulk_edger(
|
|
|
12
12
|
cell_identity_key: str | None = None,
|
|
13
13
|
batch_key: str | None = None,
|
|
14
14
|
layer: str | None = None,
|
|
15
|
-
replicas_per_group: int =
|
|
15
|
+
replicas_per_group: int = 5,
|
|
16
16
|
min_cells_per_group: int = 30,
|
|
17
|
-
bootstrap_sampling: bool =
|
|
17
|
+
bootstrap_sampling: bool = False,
|
|
18
18
|
use_cells: dict[str, list[str]] | None = None,
|
|
19
19
|
aggregate: bool = True,
|
|
20
20
|
verbosity: int = 0,
|
|
@@ -134,7 +134,7 @@ def pseudobulk_edger(
|
|
|
134
134
|
|
|
135
135
|
try:
|
|
136
136
|
R(f"""
|
|
137
|
-
outs <-
|
|
137
|
+
outs <- fit_edger_model(aggr_adata, "{gk}", "{cell_identity_key}", "{batch_key}", verbosity = {verbosity})
|
|
138
138
|
fit <- outs$fit
|
|
139
139
|
y <- outs$y
|
|
140
140
|
""")
|
|
@@ -214,51 +214,58 @@ suppressPackageStartupMessages({
|
|
|
214
214
|
library(MAST)
|
|
215
215
|
})
|
|
216
216
|
|
|
217
|
-
|
|
217
|
+
fit_edger_model <- function(adata_, group_key, cell_identity_key = "None", batch_key = "None", verbosity = 0){
|
|
218
218
|
|
|
219
219
|
if (verbosity > 0){
|
|
220
220
|
cat("Group key:", group_key, "\n")
|
|
221
221
|
cat("Cell identity key:", cell_identity_key, "\n")
|
|
222
222
|
}
|
|
223
223
|
|
|
224
|
-
# create an edgeR object with counts and grouping factor
|
|
225
|
-
y <- DGEList(assay(adata_, "X"), group = colData(adata_)[[group_key]])
|
|
226
|
-
# filter out genes with low counts
|
|
227
|
-
if (verbosity > 1){
|
|
228
|
-
cat("Dimensions before subsetting:", dim(y), "\n")
|
|
229
|
-
}
|
|
230
|
-
keep <- filterByExpr(y)
|
|
231
|
-
y <- y[keep, , keep.lib.sizes=FALSE]
|
|
232
|
-
if (verbosity > 1){
|
|
233
|
-
cat("Dimensions after subsetting:", dim(y), "\n")
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
# normalize
|
|
237
|
-
y <- calcNormFactors(y)
|
|
238
224
|
# create a vector that is concatentation of condition and cell type that we will later use with contrasts
|
|
239
225
|
if (cell_identity_key == "None"){
|
|
240
226
|
group <- colData(adata_)[[group_key]]
|
|
241
227
|
} else {
|
|
242
228
|
group <- paste0(colData(adata_)[[group_key]], "_", colData(adata_)[[cell_identity_key]])
|
|
243
229
|
}
|
|
230
|
+
|
|
244
231
|
if (verbosity > 1){
|
|
245
232
|
cat("Group(s):", group, "\n")
|
|
246
233
|
}
|
|
247
234
|
|
|
248
|
-
|
|
235
|
+
group <- factor(group)
|
|
236
|
+
replica <- factor(colData(adata_)$replica)
|
|
249
237
|
|
|
250
238
|
# create a design matrix
|
|
251
239
|
if (batch_key == "None"){
|
|
252
240
|
design <- model.matrix(~ 0 + group + replica)
|
|
253
241
|
} else {
|
|
254
|
-
batch
|
|
242
|
+
batch <- factor(colData(adata_)[[batch_key]])
|
|
255
243
|
design <- model.matrix(~ 0 + group + replica + batch)
|
|
256
244
|
}
|
|
245
|
+
colnames(design) <- make.names(colnames(design))
|
|
246
|
+
|
|
247
|
+
# create an edgeR object with counts and grouping factor
|
|
248
|
+
y <- DGEList(assay(adata_, "X"), group = group)
|
|
249
|
+
|
|
250
|
+
# filter out genes with low counts
|
|
251
|
+
if (verbosity > 1){
|
|
252
|
+
cat("Dimensions before subsetting:", dim(y), "\n")
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
keep <- filterByExpr(y, design = design)
|
|
256
|
+
y <- y[keep, , keep.lib.sizes=FALSE]
|
|
257
|
+
if (verbosity > 1){
|
|
258
|
+
cat("Dimensions after subsetting:", dim(y), "\n")
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
# normalize
|
|
262
|
+
y <- calcNormFactors(y)
|
|
257
263
|
|
|
258
264
|
# estimate dispersion
|
|
259
265
|
y <- estimateDisp(y, design = design)
|
|
260
266
|
# fit the model
|
|
261
267
|
fit <- glmQLFit(y, design)
|
|
268
|
+
|
|
262
269
|
return(list("fit"=fit, "design"=design, "y"=y))
|
|
263
270
|
}
|
|
264
271
|
"""
|
|
@@ -282,9 +289,7 @@ def _try_imports():
|
|
|
282
289
|
except ModuleNotFoundError:
|
|
283
290
|
message = (
|
|
284
291
|
"edger_pseudobulk requires rpy2 and anndata2ri to be installed.\n"
|
|
285
|
-
"
|
|
286
|
-
"$ pip install rpy2 sclab-tools[r]\n"
|
|
287
|
-
"or\n"
|
|
292
|
+
"please install with one of the following:\n"
|
|
288
293
|
"$ pip install rpy2 anndata2ri\n"
|
|
289
294
|
"or\n"
|
|
290
295
|
"$ conda install -c conda-forge rpy2 anndata2ri\n"
|
|
@@ -18,6 +18,8 @@ def aggregate_and_filter(
|
|
|
18
18
|
min_cells_per_group: int = 30,
|
|
19
19
|
bootstrap_sampling: bool = False,
|
|
20
20
|
use_cells: dict[str, list[str]] | None = None,
|
|
21
|
+
make_stats: bool = True,
|
|
22
|
+
make_dummies: bool = True,
|
|
21
23
|
) -> AnnData:
|
|
22
24
|
"""
|
|
23
25
|
Aggregate and filter cells in an AnnData object into cell populations.
|
|
@@ -40,6 +42,10 @@ def aggregate_and_filter(
|
|
|
40
42
|
Whether to use bootstrap sampling to create replicas. Defaults to False.
|
|
41
43
|
use_cells : dict[str, list[str]], optional
|
|
42
44
|
If not None, only use the specified cells. Defaults to None.
|
|
45
|
+
make_stats : bool, optional
|
|
46
|
+
Whether to create expression statistics for each group. Defaults to True.
|
|
47
|
+
make_dummies : bool, optional
|
|
48
|
+
Whether to make categorical columns into dummies. Defaults to True.
|
|
43
49
|
|
|
44
50
|
Returns
|
|
45
51
|
-------
|
|
@@ -61,7 +67,12 @@ def aggregate_and_filter(
|
|
|
61
67
|
_prepare_categorical_column(adata, cell_identity_key)
|
|
62
68
|
cell_identity_dtype = adata.obs[cell_identity_key].dtype
|
|
63
69
|
|
|
64
|
-
|
|
70
|
+
if make_stats:
|
|
71
|
+
var_dataframe = _create_var_dataframe(
|
|
72
|
+
adata, layer, grouping_keys, groups_to_drop
|
|
73
|
+
)
|
|
74
|
+
else:
|
|
75
|
+
var_dataframe = pd.DataFrame(index=adata.var_names)
|
|
65
76
|
|
|
66
77
|
data = {}
|
|
67
78
|
meta = {}
|
|
@@ -100,6 +111,7 @@ def aggregate_and_filter(
|
|
|
100
111
|
data = pd.DataFrame(data).T
|
|
101
112
|
meta = pd.DataFrame(meta).T
|
|
102
113
|
meta["replica"] = meta["replica"].astype("category")
|
|
114
|
+
meta["replica_size"] = meta["replica_size"].astype(int)
|
|
103
115
|
meta[group_key] = meta[group_key].astype(group_dtype)
|
|
104
116
|
if cell_identity_key is not None:
|
|
105
117
|
meta[cell_identity_key] = meta[cell_identity_key].astype(cell_identity_dtype)
|
|
@@ -110,7 +122,8 @@ def aggregate_and_filter(
|
|
|
110
122
|
var=var_dataframe,
|
|
111
123
|
)
|
|
112
124
|
|
|
113
|
-
|
|
125
|
+
if make_dummies:
|
|
126
|
+
_join_dummies(aggr_adata, group_key)
|
|
114
127
|
|
|
115
128
|
return aggr_adata
|
|
116
129
|
|