sclab 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sclab might be problematic. Click here for more details.
- sclab/__init__.py +1 -1
- sclab/_sclab.py +10 -3
- sclab/dataset/_dataset.py +1 -1
- sclab/examples/processor_steps/__init__.py +2 -0
- sclab/examples/processor_steps/_doublet_detection.py +68 -0
- sclab/examples/processor_steps/_integration.py +37 -4
- sclab/examples/processor_steps/_neighbors.py +24 -4
- sclab/examples/processor_steps/_pca.py +5 -5
- sclab/examples/processor_steps/_preprocess.py +14 -1
- sclab/examples/processor_steps/_qc.py +22 -6
- sclab/gui/__init__.py +0 -0
- sclab/gui/components/__init__.py +5 -0
- sclab/gui/components/_guided_pseudotime.py +482 -0
- sclab/methods/__init__.py +25 -1
- sclab/preprocess/__init__.py +18 -0
- sclab/preprocess/_cca.py +154 -0
- sclab/preprocess/_cca_integrate.py +77 -0
- sclab/preprocess/_filter_obs.py +42 -0
- sclab/preprocess/_harmony.py +421 -0
- sclab/preprocess/_harmony_integrate.py +50 -0
- sclab/preprocess/_normalize_weighted.py +61 -0
- sclab/preprocess/_subset.py +208 -0
- sclab/preprocess/_transfer_metadata.py +137 -0
- sclab/preprocess/_transform.py +82 -0
- sclab/preprocess/_utils.py +96 -0
- sclab/tools/__init__.py +0 -0
- sclab/tools/cellflow/__init__.py +0 -0
- sclab/tools/cellflow/density_dynamics/__init__.py +0 -0
- sclab/tools/cellflow/density_dynamics/_density_dynamics.py +349 -0
- sclab/tools/cellflow/pseudotime/__init__.py +0 -0
- sclab/tools/cellflow/pseudotime/_pseudotime.py +332 -0
- sclab/tools/cellflow/pseudotime/timeseries.py +226 -0
- sclab/tools/cellflow/utils/__init__.py +0 -0
- sclab/tools/cellflow/utils/density_nd.py +136 -0
- sclab/tools/cellflow/utils/interpolate.py +334 -0
- sclab/tools/cellflow/utils/smoothen.py +124 -0
- sclab/tools/cellflow/utils/times.py +55 -0
- sclab/tools/differential_expression/__init__.py +5 -0
- sclab/tools/differential_expression/_pseudobulk_edger.py +304 -0
- sclab/tools/differential_expression/_pseudobulk_helpers.py +277 -0
- sclab/tools/doublet_detection/__init__.py +5 -0
- sclab/tools/doublet_detection/_scrublet.py +64 -0
- sclab/tools/labeling/__init__.py +6 -0
- sclab/tools/labeling/sctype.py +233 -0
- sclab/utils/__init__.py +5 -0
- sclab/utils/_write_excel.py +510 -0
- {sclab-0.2.4.dist-info → sclab-0.3.0.dist-info}/METADATA +7 -2
- sclab-0.3.0.dist-info/RECORD +81 -0
- sclab-0.2.4.dist-info/RECORD +0 -45
- {sclab-0.2.4.dist-info → sclab-0.3.0.dist-info}/WHEEL +0 -0
- {sclab-0.2.4.dist-info → sclab-0.3.0.dist-info}/licenses/LICENSE +0 -0
sclab/__init__.py
CHANGED
sclab/_sclab.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import inspect
|
|
2
2
|
from io import BytesIO
|
|
3
3
|
from pathlib import Path
|
|
4
|
+
import tempfile
|
|
4
5
|
|
|
5
6
|
from anndata import AnnData
|
|
6
7
|
from IPython.display import display
|
|
@@ -256,14 +257,20 @@ class DataLoader(VBox):
|
|
|
256
257
|
|
|
257
258
|
match path.suffix:
|
|
258
259
|
case ".h5":
|
|
259
|
-
|
|
260
|
+
with tempfile.NamedTemporaryFile(suffix=".h5") as tmp:
|
|
261
|
+
tmp.write(contents.getbuffer())
|
|
262
|
+
tmp.flush()
|
|
263
|
+
adata = read_10x_h5(tmp.name)
|
|
260
264
|
case ".h5ad":
|
|
261
|
-
|
|
265
|
+
with tempfile.NamedTemporaryFile(suffix=".h5ad") as tmp:
|
|
266
|
+
tmp.write(contents.getbuffer())
|
|
267
|
+
tmp.flush()
|
|
268
|
+
adata = read_h5ad(tmp.name)
|
|
262
269
|
case _:
|
|
263
270
|
self.upload_info.clear_output()
|
|
264
271
|
with self.upload_info:
|
|
265
272
|
print(f"`{filename}` is not valid")
|
|
266
|
-
print("Please upload a 10x h5 or h5ad file")
|
|
273
|
+
print("Please upload a 10x .h5 or .h5ad file")
|
|
267
274
|
return
|
|
268
275
|
|
|
269
276
|
if var_names in adata.var:
|
sclab/dataset/_dataset.py
CHANGED
|
@@ -365,7 +365,7 @@ class SCLabDataset(EventClient):
|
|
|
365
365
|
if not index.isin(self.metadata.index).all():
|
|
366
366
|
raise InvalidRowSubset("index contains invalid values")
|
|
367
367
|
|
|
368
|
-
self.adata
|
|
368
|
+
self.adata._inplace_subset_obs(index)
|
|
369
369
|
|
|
370
370
|
self.broker.publish("dset_total_rows_change", self.metadata)
|
|
371
371
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from ._cluster import Cluster
|
|
2
2
|
from ._differential_expression import DifferentialExpression
|
|
3
|
+
from ._doublet_detection import DoubletDetection
|
|
3
4
|
from ._gene_expression import GeneExpression
|
|
4
5
|
from ._integration import Integration
|
|
5
6
|
from ._neighbors import Neighbors
|
|
@@ -16,6 +17,7 @@ __all__ = [
|
|
|
16
17
|
"Neighbors",
|
|
17
18
|
"UMAP",
|
|
18
19
|
"Cluster",
|
|
20
|
+
"DoubletDetection",
|
|
19
21
|
"GeneExpression",
|
|
20
22
|
"DifferentialExpression",
|
|
21
23
|
]
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from ipywidgets import Dropdown
|
|
2
|
+
|
|
3
|
+
from sclab.dataset.processor import Processor
|
|
4
|
+
from sclab.dataset.processor.step import ProcessorStepBase
|
|
5
|
+
from sclab.tools.doublet_detection import scrublet
|
|
6
|
+
|
|
7
|
+
# from sclab.tools.doublet_detection import doubletdetection
|
|
8
|
+
# from sclab.tools.doublet_detection import scdblfinder
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DoubletDetection(ProcessorStepBase):
|
|
12
|
+
parent: Processor
|
|
13
|
+
name: str = "doublet_detection"
|
|
14
|
+
description: str = "Doublet Detection"
|
|
15
|
+
|
|
16
|
+
def __init__(self, parent: Processor) -> None:
|
|
17
|
+
variable_controls = dict(
|
|
18
|
+
layer=Dropdown(
|
|
19
|
+
options=tuple(parent.dataset.adata.layers.keys()),
|
|
20
|
+
value=None,
|
|
21
|
+
description="Layer",
|
|
22
|
+
),
|
|
23
|
+
flavor=Dropdown(
|
|
24
|
+
options=[
|
|
25
|
+
"scrublet",
|
|
26
|
+
# "doubletdetection",
|
|
27
|
+
# "scDblFinder",
|
|
28
|
+
],
|
|
29
|
+
description="Flavor",
|
|
30
|
+
),
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
super().__init__(
|
|
34
|
+
parent=parent,
|
|
35
|
+
fixed_params={},
|
|
36
|
+
variable_controls=variable_controls,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
def function(self, layer: str, flavor: str):
|
|
40
|
+
adata = self.parent.dataset.adata
|
|
41
|
+
|
|
42
|
+
kvargs = {"adata": adata, "layer": layer, "key_added": flavor}
|
|
43
|
+
|
|
44
|
+
self.broker.std_output.clear_output(wait=False)
|
|
45
|
+
with self.broker.std_output:
|
|
46
|
+
match flavor:
|
|
47
|
+
# case "scDblFinder":
|
|
48
|
+
# scdblfinder(**kvargs, clusters_col="leiden")
|
|
49
|
+
|
|
50
|
+
# case "doubletdetection":
|
|
51
|
+
# doubletdetection(
|
|
52
|
+
# **kvargs,
|
|
53
|
+
# pseudocount=1,
|
|
54
|
+
# clustering_algorithm="leiden",
|
|
55
|
+
# clustering_kwargs=dict(resolution=5.0),
|
|
56
|
+
# )
|
|
57
|
+
|
|
58
|
+
case "scrublet":
|
|
59
|
+
scrublet(**kvargs)
|
|
60
|
+
|
|
61
|
+
case _:
|
|
62
|
+
raise ValueError(f"Unknown flavor: {flavor}")
|
|
63
|
+
|
|
64
|
+
self.broker.publish(
|
|
65
|
+
"dset_metadata_change",
|
|
66
|
+
self.parent.dataset.metadata,
|
|
67
|
+
f"{flavor}_label",
|
|
68
|
+
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from ipywidgets import Dropdown
|
|
1
|
+
from ipywidgets import Dropdown, IntText
|
|
2
2
|
|
|
3
3
|
from sclab.dataset.processor import Processor
|
|
4
4
|
from sclab.dataset.processor.step import ProcessorStepBase
|
|
@@ -40,20 +40,49 @@ class Integration(ProcessorStepBase):
|
|
|
40
40
|
value="batch" if "batch" in cat_options else None,
|
|
41
41
|
description="GroupBy",
|
|
42
42
|
),
|
|
43
|
+
reference_batch=Dropdown(
|
|
44
|
+
description="Reference Batch",
|
|
45
|
+
),
|
|
43
46
|
flavor=Dropdown(
|
|
44
47
|
options=["harmony", "scanorama"],
|
|
45
48
|
value="harmony",
|
|
46
49
|
description="Flavor",
|
|
47
50
|
),
|
|
51
|
+
max_iters=IntText(
|
|
52
|
+
value=20,
|
|
53
|
+
description="Max iters",
|
|
54
|
+
),
|
|
48
55
|
)
|
|
49
56
|
|
|
57
|
+
def update_reference_batch(*args, **kwargs):
|
|
58
|
+
group_by = variable_controls["group_by"].value
|
|
59
|
+
options = {
|
|
60
|
+
"": None,
|
|
61
|
+
**{
|
|
62
|
+
c: c
|
|
63
|
+
for c in self.parent.dataset.adata.obs[group_by]
|
|
64
|
+
.sort_values()
|
|
65
|
+
.unique()
|
|
66
|
+
},
|
|
67
|
+
}
|
|
68
|
+
variable_controls["reference_batch"].options = options
|
|
69
|
+
|
|
70
|
+
variable_controls["group_by"].observe(update_reference_batch, names="value")
|
|
71
|
+
|
|
50
72
|
super().__init__(
|
|
51
73
|
parent=parent,
|
|
52
74
|
fixed_params={},
|
|
53
75
|
variable_controls=variable_controls,
|
|
54
76
|
)
|
|
55
77
|
|
|
56
|
-
def function(
|
|
78
|
+
def function(
|
|
79
|
+
self,
|
|
80
|
+
use_rep: str,
|
|
81
|
+
group_by: str,
|
|
82
|
+
flavor: str,
|
|
83
|
+
reference_batch: str | None,
|
|
84
|
+
max_iters: int,
|
|
85
|
+
):
|
|
57
86
|
adata = self.parent.dataset.adata
|
|
58
87
|
|
|
59
88
|
key_added = f"{use_rep}_{flavor}"
|
|
@@ -68,9 +97,13 @@ class Integration(ProcessorStepBase):
|
|
|
68
97
|
with self.broker.std_output:
|
|
69
98
|
match flavor:
|
|
70
99
|
case "harmony":
|
|
71
|
-
from
|
|
100
|
+
from sclab.preprocess import harmony_integrate
|
|
72
101
|
|
|
73
|
-
harmony_integrate(
|
|
102
|
+
harmony_integrate(
|
|
103
|
+
**kvargs,
|
|
104
|
+
reference_batch=reference_batch,
|
|
105
|
+
max_iter_harmony=max_iters,
|
|
106
|
+
)
|
|
74
107
|
|
|
75
108
|
case "scanorama":
|
|
76
109
|
from scanpy.external.pp import scanorama_integrate
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from ipywidgets import Dropdown, IntText
|
|
1
|
+
from ipywidgets import Dropdown, IntRangeSlider, IntText
|
|
2
2
|
|
|
3
3
|
from sclab.dataset.processor import Processor
|
|
4
4
|
from sclab.dataset.processor.step import ProcessorStepBase
|
|
@@ -22,7 +22,12 @@ class Neighbors(ProcessorStepBase):
|
|
|
22
22
|
description="Use rep.",
|
|
23
23
|
),
|
|
24
24
|
n_neighbors=IntText(value=20, description="N neighbors"),
|
|
25
|
-
|
|
25
|
+
dims=IntRangeSlider(
|
|
26
|
+
min=1,
|
|
27
|
+
max=30,
|
|
28
|
+
value=(1, 10),
|
|
29
|
+
description="Use dims",
|
|
30
|
+
),
|
|
26
31
|
metric=Dropdown(
|
|
27
32
|
options=["euclidean", "cosine"],
|
|
28
33
|
value="euclidean",
|
|
@@ -31,6 +36,14 @@ class Neighbors(ProcessorStepBase):
|
|
|
31
36
|
**parent.make_groupbybatch_checkbox(),
|
|
32
37
|
)
|
|
33
38
|
|
|
39
|
+
def update_dims_range(*args, **kwargs):
|
|
40
|
+
adata = self.parent.dataset.adata
|
|
41
|
+
use_rep = variable_controls["use_rep"].value
|
|
42
|
+
max_dim = adata.obsm[use_rep].shape[1]
|
|
43
|
+
variable_controls["dims"].max = max_dim
|
|
44
|
+
|
|
45
|
+
variable_controls["use_rep"].observe(update_dims_range, names="value")
|
|
46
|
+
|
|
34
47
|
super().__init__(
|
|
35
48
|
parent=parent,
|
|
36
49
|
fixed_params={},
|
|
@@ -41,13 +54,20 @@ class Neighbors(ProcessorStepBase):
|
|
|
41
54
|
self,
|
|
42
55
|
n_neighbors: int = 20,
|
|
43
56
|
use_rep: str = "X_pca",
|
|
44
|
-
|
|
57
|
+
dims: tuple[int, int] = (1, 10),
|
|
45
58
|
metric: str = "euclidean",
|
|
46
59
|
group_by_batch: bool = False,
|
|
47
60
|
):
|
|
48
61
|
import scanpy as sc
|
|
49
62
|
|
|
50
63
|
adata = self.parent.dataset.adata
|
|
64
|
+
min_dim, max_dim = dims
|
|
65
|
+
min_dim = min_dim - 1
|
|
66
|
+
|
|
67
|
+
if min_dim > 0:
|
|
68
|
+
adata.obsm[use_rep + "_trimmed"] = adata.obsm[use_rep][:, min_dim:max_dim]
|
|
69
|
+
use_rep = use_rep + "_trimmed"
|
|
70
|
+
n_dims = max_dim - min_dim
|
|
51
71
|
|
|
52
72
|
if group_by_batch and self.parent.batch_key:
|
|
53
73
|
group_by = self.parent.batch_key
|
|
@@ -58,7 +78,7 @@ class Neighbors(ProcessorStepBase):
|
|
|
58
78
|
n_pcs=n_dims,
|
|
59
79
|
use_annoy=False,
|
|
60
80
|
metric=metric,
|
|
61
|
-
|
|
81
|
+
neighbors_within_batch=n_neighbors,
|
|
62
82
|
)
|
|
63
83
|
else:
|
|
64
84
|
sc.pp.neighbors(
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import numpy as np
|
|
2
1
|
import pandas as pd
|
|
3
2
|
import plotly.express as px
|
|
4
3
|
from ipywidgets import Button, Dropdown, IntText
|
|
@@ -64,7 +63,9 @@ class PCA(ProcessorStepBase):
|
|
|
64
63
|
counts_layer = self.parent.dataset.counts_layer
|
|
65
64
|
|
|
66
65
|
if reference_batch:
|
|
67
|
-
|
|
66
|
+
batch_key = self.parent.batch_key
|
|
67
|
+
|
|
68
|
+
obs_mask = adata.obs[batch_key] == reference_batch
|
|
68
69
|
adata_ref = adata[obs_mask].copy()
|
|
69
70
|
if mask_var == "highly_variable":
|
|
70
71
|
sc.pp.highly_variable_genes(
|
|
@@ -85,13 +86,12 @@ class PCA(ProcessorStepBase):
|
|
|
85
86
|
uns_pca = adata_ref.uns["pca"]
|
|
86
87
|
uns_pca["reference_batch"] = reference_batch
|
|
87
88
|
PCs = adata_ref.varm["PCs"]
|
|
88
|
-
X_pca
|
|
89
|
-
X_pca = X_pca - X_pca.mean(axis=0, keepdims=True)
|
|
90
|
-
adata.obsm["X_pca"] = X_pca
|
|
89
|
+
adata.obsm["X_pca"] = adata.X.dot(PCs)
|
|
91
90
|
adata.uns["pca"] = uns_pca
|
|
92
91
|
adata.varm["PCs"] = PCs
|
|
93
92
|
else:
|
|
94
93
|
sc.pp.pca(adata, n_comps=n_comps, mask_var=mask_var, svd_solver="arpack")
|
|
94
|
+
adata.obsm["X_pca"] = adata.X.dot(adata.varm["PCs"])
|
|
95
95
|
|
|
96
96
|
self.plot_variance_ratio_button.disabled = False
|
|
97
97
|
self.broker.publish(
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
1
3
|
import numpy as np
|
|
4
|
+
from anndata import ImplicitModificationWarning
|
|
2
5
|
from ipywidgets import Checkbox, Dropdown
|
|
3
6
|
from tqdm.auto import tqdm
|
|
4
7
|
|
|
@@ -156,7 +159,17 @@ class Preprocess(ProcessorStepBase):
|
|
|
156
159
|
|
|
157
160
|
if scale:
|
|
158
161
|
new_layer += "_scale"
|
|
159
|
-
|
|
162
|
+
if group_by is not None:
|
|
163
|
+
for _, idx in adata.obs.groupby(group_by, observed=True).groups.items():
|
|
164
|
+
with warnings.catch_warnings():
|
|
165
|
+
warnings.filterwarnings(
|
|
166
|
+
"ignore",
|
|
167
|
+
category=ImplicitModificationWarning,
|
|
168
|
+
message="Modifying `X` on a view results in data being overridden",
|
|
169
|
+
)
|
|
170
|
+
adata[idx].X = sc.pp.scale(adata[idx].X, zero_center=False)
|
|
171
|
+
else:
|
|
172
|
+
sc.pp.scale(adata, zero_center=False)
|
|
160
173
|
|
|
161
174
|
adata.layers[new_layer] = adata.X.copy()
|
|
162
175
|
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import numpy as np
|
|
1
2
|
from ipywidgets import Dropdown, IntText
|
|
2
3
|
|
|
3
4
|
from sclab.dataset.processor import Processor
|
|
@@ -21,6 +22,7 @@ class QC(ProcessorStepBase):
|
|
|
21
22
|
value="counts",
|
|
22
23
|
description="Layer",
|
|
23
24
|
),
|
|
25
|
+
min_counts=IntText(value=50, description="Min. Counts"),
|
|
24
26
|
min_genes=IntText(value=5, description="Min. Genes"),
|
|
25
27
|
min_cells=IntText(value=0, description="Min. Cells"),
|
|
26
28
|
max_rank=IntText(value=0, description="Max. Rank"),
|
|
@@ -45,8 +47,10 @@ class QC(ProcessorStepBase):
|
|
|
45
47
|
def compute_qc_metrics(
|
|
46
48
|
self,
|
|
47
49
|
layer: str | None = None,
|
|
50
|
+
min_counts: int = 50,
|
|
48
51
|
min_genes: int = 5,
|
|
49
52
|
min_cells: int = 5,
|
|
53
|
+
max_rank: int = 0,
|
|
50
54
|
):
|
|
51
55
|
import scanpy as sc
|
|
52
56
|
|
|
@@ -58,6 +62,11 @@ class QC(ProcessorStepBase):
|
|
|
58
62
|
|
|
59
63
|
adata.layers["qc_tmp_current_X"] = adata.X
|
|
60
64
|
adata.X = adata.layers[layer].copy()
|
|
65
|
+
rowsums = np.asarray(adata.X.sum(axis=1)).squeeze()
|
|
66
|
+
|
|
67
|
+
obs_idx = adata.obs_names[rowsums >= min_counts]
|
|
68
|
+
adata._inplace_subset_obs(obs_idx)
|
|
69
|
+
|
|
61
70
|
sc.pp.calculate_qc_metrics(adata, percent_top=None, log1p=False, inplace=True)
|
|
62
71
|
|
|
63
72
|
sc.pp.filter_cells(adata, min_genes=min_genes)
|
|
@@ -68,19 +77,26 @@ class QC(ProcessorStepBase):
|
|
|
68
77
|
# Restore original X
|
|
69
78
|
adata.X = adata.layers.pop("qc_tmp_current_X")
|
|
70
79
|
|
|
80
|
+
if max_rank > 0:
|
|
81
|
+
series = self.parent.dataset.adata.obs["barcode_rank"]
|
|
82
|
+
index = series.loc[series < max_rank].index
|
|
83
|
+
self.parent.dataset.filter_rows(index)
|
|
84
|
+
|
|
71
85
|
def function(
|
|
72
86
|
self,
|
|
73
87
|
layer: str | None = None,
|
|
88
|
+
min_counts: int = 50,
|
|
74
89
|
min_genes: int = 5,
|
|
75
90
|
min_cells: int = 5,
|
|
76
91
|
max_rank: int = 0,
|
|
77
92
|
):
|
|
78
|
-
self.compute_qc_metrics(
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
93
|
+
self.compute_qc_metrics(
|
|
94
|
+
layer,
|
|
95
|
+
min_counts,
|
|
96
|
+
min_genes,
|
|
97
|
+
min_cells,
|
|
98
|
+
max_rank,
|
|
99
|
+
)
|
|
84
100
|
|
|
85
101
|
self.broker.publish("dset_metadata_change", self.parent.dataset.metadata)
|
|
86
102
|
self.broker.publish(
|
sclab/gui/__init__.py
ADDED
|
File without changes
|