sclab 0.1.7__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sclab/__init__.py +3 -1
- sclab/_io.py +83 -12
- sclab/_methods_registry.py +65 -0
- sclab/_sclab.py +241 -21
- sclab/dataset/_dataset.py +4 -6
- sclab/dataset/processor/_processor.py +41 -19
- sclab/dataset/processor/_results_panel.py +94 -0
- sclab/dataset/processor/step/_processor_step_base.py +12 -6
- sclab/examples/processor_steps/__init__.py +8 -0
- sclab/examples/processor_steps/_cluster.py +2 -2
- sclab/examples/processor_steps/_differential_expression.py +329 -0
- sclab/examples/processor_steps/_doublet_detection.py +68 -0
- sclab/examples/processor_steps/_gene_expression.py +125 -0
- sclab/examples/processor_steps/_integration.py +116 -0
- sclab/examples/processor_steps/_neighbors.py +26 -6
- sclab/examples/processor_steps/_pca.py +13 -8
- sclab/examples/processor_steps/_preprocess.py +52 -25
- sclab/examples/processor_steps/_qc.py +24 -8
- sclab/examples/processor_steps/_umap.py +2 -2
- sclab/gui/__init__.py +0 -0
- sclab/gui/components/__init__.py +7 -0
- sclab/gui/components/_guided_pseudotime.py +482 -0
- sclab/gui/components/_transfer_metadata.py +186 -0
- sclab/methods/__init__.py +50 -0
- sclab/preprocess/__init__.py +26 -0
- sclab/preprocess/_cca.py +176 -0
- sclab/preprocess/_cca_integrate.py +109 -0
- sclab/preprocess/_filter_obs.py +42 -0
- sclab/preprocess/_harmony.py +421 -0
- sclab/preprocess/_harmony_integrate.py +53 -0
- sclab/preprocess/_normalize_weighted.py +65 -0
- sclab/preprocess/_pca.py +51 -0
- sclab/preprocess/_preprocess.py +155 -0
- sclab/preprocess/_qc.py +38 -0
- sclab/preprocess/_rpca.py +116 -0
- sclab/preprocess/_subset.py +208 -0
- sclab/preprocess/_transfer_metadata.py +196 -0
- sclab/preprocess/_transform.py +82 -0
- sclab/preprocess/_utils.py +96 -0
- sclab/scanpy/__init__.py +0 -0
- sclab/scanpy/_compat.py +92 -0
- sclab/scanpy/_settings.py +526 -0
- sclab/scanpy/logging.py +290 -0
- sclab/scanpy/plotting/__init__.py +0 -0
- sclab/scanpy/plotting/_rcmod.py +73 -0
- sclab/scanpy/plotting/palettes.py +221 -0
- sclab/scanpy/readwrite.py +1108 -0
- sclab/tools/__init__.py +0 -0
- sclab/tools/cellflow/__init__.py +0 -0
- sclab/tools/cellflow/density_dynamics/__init__.py +0 -0
- sclab/tools/cellflow/density_dynamics/_density_dynamics.py +349 -0
- sclab/tools/cellflow/pseudotime/__init__.py +0 -0
- sclab/tools/cellflow/pseudotime/_pseudotime.py +336 -0
- sclab/tools/cellflow/pseudotime/timeseries.py +226 -0
- sclab/tools/cellflow/utils/__init__.py +0 -0
- sclab/tools/cellflow/utils/density_nd.py +215 -0
- sclab/tools/cellflow/utils/interpolate.py +334 -0
- sclab/tools/cellflow/utils/periodic_genes.py +106 -0
- sclab/tools/cellflow/utils/smoothen.py +124 -0
- sclab/tools/cellflow/utils/times.py +55 -0
- sclab/tools/differential_expression/__init__.py +7 -0
- sclab/tools/differential_expression/_pseudobulk_edger.py +309 -0
- sclab/tools/differential_expression/_pseudobulk_helpers.py +290 -0
- sclab/tools/differential_expression/_pseudobulk_limma.py +257 -0
- sclab/tools/doublet_detection/__init__.py +5 -0
- sclab/tools/doublet_detection/_scrublet.py +64 -0
- sclab/tools/embedding/__init__.py +0 -0
- sclab/tools/imputation/__init__.py +0 -0
- sclab/tools/imputation/_alra.py +135 -0
- sclab/tools/labeling/__init__.py +6 -0
- sclab/tools/labeling/sctype.py +233 -0
- sclab/tools/utils/__init__.py +5 -0
- sclab/tools/utils/_aggregate_and_filter.py +290 -0
- sclab/utils/__init__.py +5 -0
- sclab/utils/_write_excel.py +510 -0
- {sclab-0.1.7.dist-info → sclab-0.3.4.dist-info}/METADATA +29 -12
- sclab-0.3.4.dist-info/RECORD +93 -0
- {sclab-0.1.7.dist-info → sclab-0.3.4.dist-info}/WHEEL +1 -1
- sclab-0.3.4.dist-info/licenses/LICENSE +29 -0
- sclab-0.1.7.dist-info/RECORD +0 -30
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import plotly.colors as pc
|
|
4
|
+
import plotly.express as px
|
|
5
|
+
from ipywidgets import Combobox, Dropdown
|
|
6
|
+
|
|
7
|
+
from sclab.dataset.processor import Processor
|
|
8
|
+
from sclab.dataset.processor.step import ProcessorStepBase
|
|
9
|
+
|
|
10
|
+
colorscales = list(
|
|
11
|
+
filter(lambda s: "swatch" not in s and not s.startswith("_"), dir(pc.sequential))
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class GeneExpression(ProcessorStepBase):
|
|
16
|
+
parent: Processor
|
|
17
|
+
name: str = "gene_expression"
|
|
18
|
+
description: str = "Gene Expression"
|
|
19
|
+
|
|
20
|
+
run_button_description = "Plot Expression"
|
|
21
|
+
|
|
22
|
+
def __init__(self, parent: Processor) -> None:
|
|
23
|
+
df = parent.dataset.metadata.select_dtypes("number")
|
|
24
|
+
df = parent.dataset.metadata
|
|
25
|
+
axis_key_options = {"": None, **{c: c for c in df.columns}}
|
|
26
|
+
|
|
27
|
+
gene_input_options = parent.dataset.adata.var_names
|
|
28
|
+
genes_df = parent.dataset.adata.var
|
|
29
|
+
info_cols = ["name", "symbol", "description"]
|
|
30
|
+
for col in [
|
|
31
|
+
c for c in genes_df.columns if any([s.lower() in c for s in info_cols])
|
|
32
|
+
]:
|
|
33
|
+
new_info = genes_df[col].astype(str).str.replace("nan", "")
|
|
34
|
+
gene_input_options = gene_input_options + " - " + new_info
|
|
35
|
+
|
|
36
|
+
variable_controls = dict(
|
|
37
|
+
gene_input=Combobox(
|
|
38
|
+
placeholder="Type gene name",
|
|
39
|
+
options=gene_input_options.to_list(),
|
|
40
|
+
description="Gene",
|
|
41
|
+
ensure_option=True,
|
|
42
|
+
),
|
|
43
|
+
layer=Dropdown(
|
|
44
|
+
options=tuple(parent.dataset.adata.layers.keys()),
|
|
45
|
+
value=None,
|
|
46
|
+
description="Layer",
|
|
47
|
+
),
|
|
48
|
+
time_key=Dropdown(
|
|
49
|
+
options=axis_key_options,
|
|
50
|
+
value=None,
|
|
51
|
+
description="Horiz. axis",
|
|
52
|
+
),
|
|
53
|
+
colorscale=Dropdown(
|
|
54
|
+
options=colorscales, value="Oryel", description="Col. scale"
|
|
55
|
+
),
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
super().__init__(
|
|
59
|
+
parent=parent,
|
|
60
|
+
fixed_params={},
|
|
61
|
+
variable_controls=variable_controls,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
self.variable_controls["gene_input"].observe(self.send_plot, names="value")
|
|
65
|
+
self.variable_controls["layer"].observe(self.send_plot, names="value")
|
|
66
|
+
self.variable_controls["time_key"].observe(self.send_plot, names="value")
|
|
67
|
+
self.variable_controls["colorscale"].observe(self.send_plot, names="value")
|
|
68
|
+
|
|
69
|
+
def function(self, *pargs, **kwargs):
|
|
70
|
+
self.send_plot({})
|
|
71
|
+
|
|
72
|
+
def send_plot(self, change: dict):
|
|
73
|
+
adata = self.parent.dataset.adata
|
|
74
|
+
metadata = self.parent.dataset.metadata
|
|
75
|
+
selected_cells = self.parent.dataset.selected_rows
|
|
76
|
+
|
|
77
|
+
gene_input: str = self.variable_controls["gene_input"].value
|
|
78
|
+
layer: str = self.variable_controls["layer"].value
|
|
79
|
+
time_key: str = self.variable_controls["time_key"].value
|
|
80
|
+
colorscale: str = self.variable_controls["colorscale"].value
|
|
81
|
+
|
|
82
|
+
if gene_input is None or gene_input == "":
|
|
83
|
+
self.update_output("")
|
|
84
|
+
return
|
|
85
|
+
|
|
86
|
+
if layer is None or layer == "":
|
|
87
|
+
self.update_output("")
|
|
88
|
+
return
|
|
89
|
+
|
|
90
|
+
gene_id = gene_input.split(" ")[0]
|
|
91
|
+
|
|
92
|
+
if layer == "X":
|
|
93
|
+
X = adata[:, gene_id].X
|
|
94
|
+
else:
|
|
95
|
+
X = adata[:, gene_id].layers[layer]
|
|
96
|
+
|
|
97
|
+
E = np.asarray(X.sum(axis=1)).flatten()
|
|
98
|
+
|
|
99
|
+
self.update_output(f"Showing gene: {gene_id}")
|
|
100
|
+
# self.variable_controls["gene_input"].value = ""
|
|
101
|
+
|
|
102
|
+
df = pd.DataFrame({gene_id: E}, index=adata.obs.index)
|
|
103
|
+
metadata = metadata.join(df)
|
|
104
|
+
if selected_cells.size > 0:
|
|
105
|
+
metadata = metadata.loc[selected_cells]
|
|
106
|
+
|
|
107
|
+
if time_key is None:
|
|
108
|
+
self.broker.publish(
|
|
109
|
+
"dplt_plot_figure_request",
|
|
110
|
+
metadata=metadata,
|
|
111
|
+
colorby=gene_id,
|
|
112
|
+
color_continuous_scale=colorscale,
|
|
113
|
+
)
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
fig = px.scatter(
|
|
117
|
+
metadata,
|
|
118
|
+
x=time_key,
|
|
119
|
+
y=gene_id,
|
|
120
|
+
color=gene_id,
|
|
121
|
+
color_continuous_scale=colorscale,
|
|
122
|
+
hover_name=adata.obs.index,
|
|
123
|
+
title=f"Gene: {gene_id}, Layer: {layer}",
|
|
124
|
+
)
|
|
125
|
+
self.broker.publish("dplt_plot_figure_request", figure=fig)
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
from ipywidgets import Dropdown, IntText
|
|
2
|
+
|
|
3
|
+
from sclab.dataset.processor import Processor
|
|
4
|
+
from sclab.dataset.processor.step import ProcessorStepBase
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Integration(ProcessorStepBase):
|
|
8
|
+
parent: Processor
|
|
9
|
+
name: str = "integration"
|
|
10
|
+
description: str = "Integration"
|
|
11
|
+
|
|
12
|
+
def __init__(self, parent: Processor) -> None:
|
|
13
|
+
cat_metadata = parent.dataset._metadata.select_dtypes(
|
|
14
|
+
include=["object", "category"]
|
|
15
|
+
)
|
|
16
|
+
cat_options = {"": None, **{c: c for c in cat_metadata.columns}}
|
|
17
|
+
|
|
18
|
+
variable_controls = dict(
|
|
19
|
+
use_rep=Dropdown(
|
|
20
|
+
options=tuple(parent.dataset.adata.obsm.keys()),
|
|
21
|
+
value=None,
|
|
22
|
+
description="Use rep.",
|
|
23
|
+
),
|
|
24
|
+
group_by=Dropdown(
|
|
25
|
+
options=cat_options,
|
|
26
|
+
value="batch" if "batch" in cat_options else None,
|
|
27
|
+
description="GroupBy",
|
|
28
|
+
),
|
|
29
|
+
reference_batch=Dropdown(
|
|
30
|
+
description="Reference Batch",
|
|
31
|
+
),
|
|
32
|
+
flavor=Dropdown(
|
|
33
|
+
options=["cca", "harmony", "scanorama"],
|
|
34
|
+
value="cca",
|
|
35
|
+
description="Flavor",
|
|
36
|
+
),
|
|
37
|
+
max_iters=IntText(
|
|
38
|
+
value=20,
|
|
39
|
+
description="Max iters",
|
|
40
|
+
),
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def update_reference_batch(*args, **kwargs):
|
|
44
|
+
group_by = variable_controls["group_by"].value
|
|
45
|
+
options = {
|
|
46
|
+
"": None,
|
|
47
|
+
**{
|
|
48
|
+
c: c
|
|
49
|
+
for c in self.parent.dataset.adata.obs[group_by]
|
|
50
|
+
.sort_values()
|
|
51
|
+
.unique()
|
|
52
|
+
},
|
|
53
|
+
}
|
|
54
|
+
variable_controls["reference_batch"].options = options
|
|
55
|
+
|
|
56
|
+
variable_controls["group_by"].observe(update_reference_batch, names="value")
|
|
57
|
+
|
|
58
|
+
super().__init__(
|
|
59
|
+
parent=parent,
|
|
60
|
+
fixed_params={},
|
|
61
|
+
variable_controls=variable_controls,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
def function(
|
|
65
|
+
self,
|
|
66
|
+
use_rep: str | None,
|
|
67
|
+
group_by: str,
|
|
68
|
+
flavor: str,
|
|
69
|
+
reference_batch: str | None,
|
|
70
|
+
max_iters: int,
|
|
71
|
+
):
|
|
72
|
+
adata = self.parent.dataset.adata
|
|
73
|
+
|
|
74
|
+
if use_rep is None:
|
|
75
|
+
use_rep = "X"
|
|
76
|
+
|
|
77
|
+
key_added = f"{use_rep}_{flavor}"
|
|
78
|
+
kvargs = {
|
|
79
|
+
"adata": adata,
|
|
80
|
+
"key": group_by,
|
|
81
|
+
"basis": use_rep,
|
|
82
|
+
"adjusted_basis": key_added,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
self.broker.std_output.clear_output(wait=False)
|
|
86
|
+
with self.broker.std_output:
|
|
87
|
+
match flavor:
|
|
88
|
+
case "cca":
|
|
89
|
+
from sclab.preprocess import cca_integrate
|
|
90
|
+
|
|
91
|
+
cca_integrate(
|
|
92
|
+
**kvargs,
|
|
93
|
+
reference_batch=reference_batch,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
case "harmony":
|
|
97
|
+
from sclab.preprocess import harmony_integrate
|
|
98
|
+
|
|
99
|
+
harmony_integrate(
|
|
100
|
+
**kvargs,
|
|
101
|
+
reference_batch=reference_batch,
|
|
102
|
+
max_iter_harmony=max_iters,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
case "scanorama":
|
|
106
|
+
from scanpy.external.pp import scanorama_integrate
|
|
107
|
+
|
|
108
|
+
scanorama_integrate(**kvargs)
|
|
109
|
+
case _:
|
|
110
|
+
raise ValueError(f"Unknown flavor: {flavor}")
|
|
111
|
+
|
|
112
|
+
self.broker.publish(
|
|
113
|
+
"dset_data_dict_change",
|
|
114
|
+
self.parent.dataset.data_dict,
|
|
115
|
+
key_added,
|
|
116
|
+
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from ipywidgets import Dropdown, IntText
|
|
1
|
+
from ipywidgets import Dropdown, IntRangeSlider, IntText
|
|
2
2
|
|
|
3
3
|
from sclab.dataset.processor import Processor
|
|
4
4
|
from sclab.dataset.processor.step import ProcessorStepBase
|
|
@@ -6,6 +6,8 @@ from sclab.dataset.processor.step import ProcessorStepBase
|
|
|
6
6
|
|
|
7
7
|
class Neighbors(ProcessorStepBase):
|
|
8
8
|
parent: Processor
|
|
9
|
+
name: str = "neighbors"
|
|
10
|
+
description: str = "Neighbors"
|
|
9
11
|
|
|
10
12
|
def __init__(self, parent: Processor) -> None:
|
|
11
13
|
try:
|
|
@@ -20,7 +22,12 @@ class Neighbors(ProcessorStepBase):
|
|
|
20
22
|
description="Use rep.",
|
|
21
23
|
),
|
|
22
24
|
n_neighbors=IntText(value=20, description="N neighbors"),
|
|
23
|
-
|
|
25
|
+
dims=IntRangeSlider(
|
|
26
|
+
min=1,
|
|
27
|
+
max=30,
|
|
28
|
+
value=(1, 10),
|
|
29
|
+
description="Use dims",
|
|
30
|
+
),
|
|
24
31
|
metric=Dropdown(
|
|
25
32
|
options=["euclidean", "cosine"],
|
|
26
33
|
value="euclidean",
|
|
@@ -29,10 +36,16 @@ class Neighbors(ProcessorStepBase):
|
|
|
29
36
|
**parent.make_groupbybatch_checkbox(),
|
|
30
37
|
)
|
|
31
38
|
|
|
39
|
+
def update_dims_range(*args, **kwargs):
|
|
40
|
+
adata = self.parent.dataset.adata
|
|
41
|
+
use_rep = variable_controls["use_rep"].value
|
|
42
|
+
max_dim = adata.obsm[use_rep].shape[1]
|
|
43
|
+
variable_controls["dims"].max = max_dim
|
|
44
|
+
|
|
45
|
+
variable_controls["use_rep"].observe(update_dims_range, names="value")
|
|
46
|
+
|
|
32
47
|
super().__init__(
|
|
33
48
|
parent=parent,
|
|
34
|
-
name="neighbors",
|
|
35
|
-
description="Neighbors",
|
|
36
49
|
fixed_params={},
|
|
37
50
|
variable_controls=variable_controls,
|
|
38
51
|
)
|
|
@@ -41,13 +54,20 @@ class Neighbors(ProcessorStepBase):
|
|
|
41
54
|
self,
|
|
42
55
|
n_neighbors: int = 20,
|
|
43
56
|
use_rep: str = "X_pca",
|
|
44
|
-
|
|
57
|
+
dims: tuple[int, int] = (1, 10),
|
|
45
58
|
metric: str = "euclidean",
|
|
46
59
|
group_by_batch: bool = False,
|
|
47
60
|
):
|
|
48
61
|
import scanpy as sc
|
|
49
62
|
|
|
50
63
|
adata = self.parent.dataset.adata
|
|
64
|
+
min_dim, max_dim = dims
|
|
65
|
+
min_dim = min_dim - 1
|
|
66
|
+
|
|
67
|
+
if min_dim > 0:
|
|
68
|
+
adata.obsm[use_rep + "_trimmed"] = adata.obsm[use_rep][:, min_dim:max_dim]
|
|
69
|
+
use_rep = use_rep + "_trimmed"
|
|
70
|
+
n_dims = max_dim - min_dim
|
|
51
71
|
|
|
52
72
|
if group_by_batch and self.parent.batch_key:
|
|
53
73
|
group_by = self.parent.batch_key
|
|
@@ -58,7 +78,7 @@ class Neighbors(ProcessorStepBase):
|
|
|
58
78
|
n_pcs=n_dims,
|
|
59
79
|
use_annoy=False,
|
|
60
80
|
metric=metric,
|
|
61
|
-
|
|
81
|
+
neighbors_within_batch=n_neighbors,
|
|
62
82
|
)
|
|
63
83
|
else:
|
|
64
84
|
sc.pp.neighbors(
|
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
import numpy as np
|
|
2
1
|
import pandas as pd
|
|
3
2
|
import plotly.express as px
|
|
4
|
-
from ipywidgets import Button, Dropdown, IntText
|
|
3
|
+
from ipywidgets import Button, Checkbox, Dropdown, IntText
|
|
5
4
|
|
|
6
5
|
from sclab.dataset.processor import Processor
|
|
7
6
|
from sclab.dataset.processor.step import ProcessorStepBase
|
|
@@ -9,6 +8,8 @@ from sclab.dataset.processor.step import ProcessorStepBase
|
|
|
9
8
|
|
|
10
9
|
class PCA(ProcessorStepBase):
|
|
11
10
|
parent: Processor
|
|
11
|
+
name: str = "pca"
|
|
12
|
+
description: str = "PCA"
|
|
12
13
|
|
|
13
14
|
def __init__(self, parent: Processor) -> None:
|
|
14
15
|
try:
|
|
@@ -25,12 +26,11 @@ class PCA(ProcessorStepBase):
|
|
|
25
26
|
n_comps=IntText(value=30, description="N comps."),
|
|
26
27
|
mask_var=Dropdown(options=mask_var_options, description="Genes mask"),
|
|
27
28
|
**parent.make_selectbatch_drowpdown(description="Reference Batch"),
|
|
29
|
+
zero_center=Checkbox(value=False, description="Zero center"),
|
|
28
30
|
)
|
|
29
31
|
|
|
30
32
|
super().__init__(
|
|
31
33
|
parent=parent,
|
|
32
|
-
name="pca",
|
|
33
|
-
description="PCA",
|
|
34
34
|
fixed_params={},
|
|
35
35
|
variable_controls=variable_controls,
|
|
36
36
|
)
|
|
@@ -57,6 +57,7 @@ class PCA(ProcessorStepBase):
|
|
|
57
57
|
n_comps: int = 30,
|
|
58
58
|
mask_var: str | None = None,
|
|
59
59
|
reference_batch: str | None = None,
|
|
60
|
+
zero_center: bool = False,
|
|
60
61
|
):
|
|
61
62
|
import scanpy as sc
|
|
62
63
|
|
|
@@ -64,7 +65,9 @@ class PCA(ProcessorStepBase):
|
|
|
64
65
|
counts_layer = self.parent.dataset.counts_layer
|
|
65
66
|
|
|
66
67
|
if reference_batch:
|
|
67
|
-
|
|
68
|
+
batch_key = self.parent.batch_key
|
|
69
|
+
|
|
70
|
+
obs_mask = adata.obs[batch_key] == reference_batch
|
|
68
71
|
adata_ref = adata[obs_mask].copy()
|
|
69
72
|
if mask_var == "highly_variable":
|
|
70
73
|
sc.pp.highly_variable_genes(
|
|
@@ -85,13 +88,15 @@ class PCA(ProcessorStepBase):
|
|
|
85
88
|
uns_pca = adata_ref.uns["pca"]
|
|
86
89
|
uns_pca["reference_batch"] = reference_batch
|
|
87
90
|
PCs = adata_ref.varm["PCs"]
|
|
88
|
-
X_pca
|
|
89
|
-
X_pca = X_pca - X_pca.mean(axis=0, keepdims=True)
|
|
90
|
-
adata.obsm["X_pca"] = X_pca
|
|
91
|
+
adata.obsm["X_pca"] = adata.X.dot(PCs)
|
|
91
92
|
adata.uns["pca"] = uns_pca
|
|
92
93
|
adata.varm["PCs"] = PCs
|
|
93
94
|
else:
|
|
94
95
|
sc.pp.pca(adata, n_comps=n_comps, mask_var=mask_var, svd_solver="arpack")
|
|
96
|
+
adata.obsm["X_pca"] = adata.X.dot(adata.varm["PCs"])
|
|
97
|
+
|
|
98
|
+
if zero_center:
|
|
99
|
+
adata.obsm["X_pca"] -= adata.obsm["X_pca"].mean(axis=0, keepdims=True)
|
|
95
100
|
|
|
96
101
|
self.plot_variance_ratio_button.disabled = False
|
|
97
102
|
self.broker.publish(
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
1
3
|
import numpy as np
|
|
4
|
+
from anndata import ImplicitModificationWarning
|
|
2
5
|
from ipywidgets import Checkbox, Dropdown
|
|
3
6
|
from tqdm.auto import tqdm
|
|
4
7
|
|
|
@@ -8,6 +11,8 @@ from sclab.dataset.processor.step import ProcessorStepBase
|
|
|
8
11
|
|
|
9
12
|
class Preprocess(ProcessorStepBase):
|
|
10
13
|
parent: Processor
|
|
14
|
+
name: str = "preprocess"
|
|
15
|
+
description: str = "Preprocess"
|
|
11
16
|
|
|
12
17
|
def __init__(self, parent: Processor) -> None:
|
|
13
18
|
try:
|
|
@@ -50,8 +55,6 @@ class Preprocess(ProcessorStepBase):
|
|
|
50
55
|
|
|
51
56
|
super().__init__(
|
|
52
57
|
parent=parent,
|
|
53
|
-
name="preprocess",
|
|
54
|
-
description="Preprocess",
|
|
55
58
|
fixed_params={},
|
|
56
59
|
variable_controls=variable_controls,
|
|
57
60
|
)
|
|
@@ -103,25 +106,41 @@ class Preprocess(ProcessorStepBase):
|
|
|
103
106
|
)
|
|
104
107
|
pbar.update(10)
|
|
105
108
|
|
|
106
|
-
|
|
107
|
-
adata
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
109
|
+
if group_by is not None:
|
|
110
|
+
adata.var["highly_variable"] = False
|
|
111
|
+
for name, idx in adata.obs.groupby(group_by, observed=True).groups.items():
|
|
112
|
+
hvg_seurat = sc.pp.highly_variable_genes(
|
|
113
|
+
adata[idx],
|
|
114
|
+
layer=f"{layer}_log1p",
|
|
115
|
+
flavor="seurat",
|
|
116
|
+
inplace=False,
|
|
117
|
+
)["highly_variable"]
|
|
118
|
+
|
|
119
|
+
hvg_seurat_v3 = sc.pp.highly_variable_genes(
|
|
120
|
+
adata[idx],
|
|
121
|
+
layer=layer,
|
|
122
|
+
flavor="seurat_v3_paper",
|
|
123
|
+
n_top_genes=hvg_seurat.sum(),
|
|
124
|
+
inplace=False,
|
|
125
|
+
)["highly_variable"]
|
|
126
|
+
|
|
127
|
+
adata.var[f"highly_variable_{name}"] = hvg_seurat | hvg_seurat_v3
|
|
128
|
+
adata.var["highly_variable"] |= adata.var[f"highly_variable_{name}"]
|
|
129
|
+
|
|
130
|
+
else:
|
|
131
|
+
sc.pp.highly_variable_genes(adata, layer=f"{layer}_log1p", flavor="seurat")
|
|
132
|
+
hvg_seurat = adata.var["highly_variable"]
|
|
133
|
+
|
|
134
|
+
sc.pp.highly_variable_genes(
|
|
135
|
+
adata,
|
|
136
|
+
layer=layer,
|
|
137
|
+
flavor="seurat_v3_paper",
|
|
138
|
+
n_top_genes=hvg_seurat.sum(),
|
|
139
|
+
)
|
|
140
|
+
hvg_seurat_v3 = adata.var["highly_variable"]
|
|
141
|
+
|
|
142
|
+
adata.var["highly_variable"] = hvg_seurat | hvg_seurat_v3
|
|
121
143
|
|
|
122
|
-
adata.var["highly_variable"] = hvg_seurat | hvg_seurat_v3
|
|
123
|
-
adata.var["highly_variable_seurat"] = hvg_seurat
|
|
124
|
-
adata.var["highly_variable_seurat_v3"] = hvg_seurat_v3
|
|
125
144
|
pbar.update(10)
|
|
126
145
|
pbar.update(10)
|
|
127
146
|
|
|
@@ -129,7 +148,6 @@ class Preprocess(ProcessorStepBase):
|
|
|
129
148
|
if normalize_total:
|
|
130
149
|
new_layer += "_normt"
|
|
131
150
|
sc.pp.normalize_total(adata, target_sum=1e4)
|
|
132
|
-
adata.layers[new_layer] = adata.X.copy()
|
|
133
151
|
|
|
134
152
|
pbar.update(10)
|
|
135
153
|
pbar.update(10)
|
|
@@ -138,7 +156,6 @@ class Preprocess(ProcessorStepBase):
|
|
|
138
156
|
new_layer += "_log1p"
|
|
139
157
|
adata.uns.pop("log1p", None)
|
|
140
158
|
sc.pp.log1p(adata)
|
|
141
|
-
adata.layers[new_layer] = adata.X.copy()
|
|
142
159
|
pbar.update(10)
|
|
143
160
|
|
|
144
161
|
vars_to_regress = []
|
|
@@ -154,13 +171,23 @@ class Preprocess(ProcessorStepBase):
|
|
|
154
171
|
if vars_to_regress:
|
|
155
172
|
new_layer += "_regr"
|
|
156
173
|
sc.pp.regress_out(adata, keys=vars_to_regress, n_jobs=1)
|
|
157
|
-
adata.layers[new_layer] = adata.X.copy()
|
|
158
174
|
pbar.update(10)
|
|
159
175
|
|
|
160
176
|
if scale:
|
|
161
177
|
new_layer += "_scale"
|
|
162
|
-
|
|
163
|
-
|
|
178
|
+
if group_by is not None:
|
|
179
|
+
for _, idx in adata.obs.groupby(group_by, observed=True).groups.items():
|
|
180
|
+
with warnings.catch_warnings():
|
|
181
|
+
warnings.filterwarnings(
|
|
182
|
+
"ignore",
|
|
183
|
+
category=ImplicitModificationWarning,
|
|
184
|
+
message="Modifying `X` on a view results in data being overridden",
|
|
185
|
+
)
|
|
186
|
+
adata[idx].X = sc.pp.scale(adata[idx].X, zero_center=False)
|
|
187
|
+
else:
|
|
188
|
+
sc.pp.scale(adata, zero_center=False)
|
|
189
|
+
|
|
190
|
+
adata.layers[new_layer] = adata.X.copy()
|
|
164
191
|
|
|
165
192
|
pbar.update(10)
|
|
166
193
|
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import numpy as np
|
|
1
2
|
from ipywidgets import Dropdown, IntText
|
|
2
3
|
|
|
3
4
|
from sclab.dataset.processor import Processor
|
|
@@ -6,6 +7,8 @@ from sclab.dataset.processor.step import ProcessorStepBase
|
|
|
6
7
|
|
|
7
8
|
class QC(ProcessorStepBase):
|
|
8
9
|
parent: Processor
|
|
10
|
+
name: str = "qc"
|
|
11
|
+
description: str = "QC"
|
|
9
12
|
|
|
10
13
|
def __init__(self, parent: Processor) -> None:
|
|
11
14
|
try:
|
|
@@ -19,6 +22,7 @@ class QC(ProcessorStepBase):
|
|
|
19
22
|
value="counts",
|
|
20
23
|
description="Layer",
|
|
21
24
|
),
|
|
25
|
+
min_counts=IntText(value=50, description="Min. Counts"),
|
|
22
26
|
min_genes=IntText(value=5, description="Min. Genes"),
|
|
23
27
|
min_cells=IntText(value=0, description="Min. Cells"),
|
|
24
28
|
max_rank=IntText(value=0, description="Max. Rank"),
|
|
@@ -36,8 +40,6 @@ class QC(ProcessorStepBase):
|
|
|
36
40
|
|
|
37
41
|
super().__init__(
|
|
38
42
|
parent=parent,
|
|
39
|
-
name="qc",
|
|
40
|
-
description="QC",
|
|
41
43
|
fixed_params={},
|
|
42
44
|
variable_controls=variable_controls,
|
|
43
45
|
)
|
|
@@ -45,8 +47,10 @@ class QC(ProcessorStepBase):
|
|
|
45
47
|
def compute_qc_metrics(
|
|
46
48
|
self,
|
|
47
49
|
layer: str | None = None,
|
|
50
|
+
min_counts: int = 50,
|
|
48
51
|
min_genes: int = 5,
|
|
49
52
|
min_cells: int = 5,
|
|
53
|
+
max_rank: int = 0,
|
|
50
54
|
):
|
|
51
55
|
import scanpy as sc
|
|
52
56
|
|
|
@@ -58,6 +62,11 @@ class QC(ProcessorStepBase):
|
|
|
58
62
|
|
|
59
63
|
adata.layers["qc_tmp_current_X"] = adata.X
|
|
60
64
|
adata.X = adata.layers[layer].copy()
|
|
65
|
+
rowsums = np.asarray(adata.X.sum(axis=1)).squeeze()
|
|
66
|
+
|
|
67
|
+
obs_idx = adata.obs_names[rowsums >= min_counts]
|
|
68
|
+
adata._inplace_subset_obs(obs_idx)
|
|
69
|
+
|
|
61
70
|
sc.pp.calculate_qc_metrics(adata, percent_top=None, log1p=False, inplace=True)
|
|
62
71
|
|
|
63
72
|
sc.pp.filter_cells(adata, min_genes=min_genes)
|
|
@@ -68,19 +77,26 @@ class QC(ProcessorStepBase):
|
|
|
68
77
|
# Restore original X
|
|
69
78
|
adata.X = adata.layers.pop("qc_tmp_current_X")
|
|
70
79
|
|
|
80
|
+
if max_rank > 0:
|
|
81
|
+
series = self.parent.dataset.adata.obs["barcode_rank"]
|
|
82
|
+
index = series.loc[series < max_rank].index
|
|
83
|
+
self.parent.dataset.filter_rows(index)
|
|
84
|
+
|
|
71
85
|
def function(
|
|
72
86
|
self,
|
|
73
87
|
layer: str | None = None,
|
|
88
|
+
min_counts: int = 50,
|
|
74
89
|
min_genes: int = 5,
|
|
75
90
|
min_cells: int = 5,
|
|
76
91
|
max_rank: int = 0,
|
|
77
92
|
):
|
|
78
|
-
self.compute_qc_metrics(
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
93
|
+
self.compute_qc_metrics(
|
|
94
|
+
layer,
|
|
95
|
+
min_counts,
|
|
96
|
+
min_genes,
|
|
97
|
+
min_cells,
|
|
98
|
+
max_rank,
|
|
99
|
+
)
|
|
84
100
|
|
|
85
101
|
self.broker.publish("dset_metadata_change", self.parent.dataset.metadata)
|
|
86
102
|
self.broker.publish(
|
|
@@ -7,6 +7,8 @@ from sclab.dataset.processor.step import ProcessorStepBase
|
|
|
7
7
|
|
|
8
8
|
class UMAP(ProcessorStepBase):
|
|
9
9
|
parent: Processor
|
|
10
|
+
name: str = "umap"
|
|
11
|
+
description: str = "UMAP"
|
|
10
12
|
|
|
11
13
|
def __init__(self, parent: Processor) -> None:
|
|
12
14
|
try:
|
|
@@ -21,8 +23,6 @@ class UMAP(ProcessorStepBase):
|
|
|
21
23
|
|
|
22
24
|
super().__init__(
|
|
23
25
|
parent=parent,
|
|
24
|
-
name="umap",
|
|
25
|
-
description="UMAP",
|
|
26
26
|
fixed_params={},
|
|
27
27
|
variable_controls=variable_controls,
|
|
28
28
|
)
|
sclab/gui/__init__.py
ADDED
|
File without changes
|