sclab 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sclab/__init__.py +7 -0
- sclab/_io.py +32 -0
- sclab/_sclab.py +80 -0
- sclab/dataset/__init__.py +8 -0
- sclab/dataset/_dataset.py +398 -0
- sclab/dataset/_exceptions.py +2 -0
- sclab/dataset/plotter/__init__.py +7 -0
- sclab/dataset/plotter/_controls.py +594 -0
- sclab/dataset/plotter/_plotter.py +1017 -0
- sclab/dataset/plotter/_utils.py +437 -0
- sclab/dataset/processor/__init__.py +7 -0
- sclab/dataset/processor/_processor.py +1063 -0
- sclab/dataset/processor/step/__init__.py +7 -0
- sclab/dataset/processor/step/_basic_processor_step.py +109 -0
- sclab/dataset/processor/step/_processor_step_base.py +120 -0
- sclab/event/__init__.py +7 -0
- sclab/event/_broker.py +201 -0
- sclab/event/_client.py +81 -0
- sclab/event/_utils.py +14 -0
- sclab/examples/__init__.py +5 -0
- sclab/examples/processor_steps/__init__.py +15 -0
- sclab/examples/processor_steps/_cluster.py +37 -0
- sclab/examples/processor_steps/_neighbors.py +72 -0
- sclab/examples/processor_steps/_pca.py +124 -0
- sclab/examples/processor_steps/_preprocess.py +186 -0
- sclab/examples/processor_steps/_qc.py +93 -0
- sclab/examples/processor_steps/_umap.py +48 -0
- sclab-0.1.7.dist-info/METADATA +139 -0
- sclab-0.1.7.dist-info/RECORD +30 -0
- sclab-0.1.7.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import plotly.express as px
|
|
4
|
+
from ipywidgets import Button, Dropdown, IntText
|
|
5
|
+
|
|
6
|
+
from sclab.dataset.processor import Processor
|
|
7
|
+
from sclab.dataset.processor.step import ProcessorStepBase
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PCA(ProcessorStepBase):
|
|
11
|
+
parent: Processor
|
|
12
|
+
|
|
13
|
+
def __init__(self, parent: Processor) -> None:
|
|
14
|
+
try:
|
|
15
|
+
import scanpy as sc # noqa: F401
|
|
16
|
+
except ImportError:
|
|
17
|
+
raise ImportError("Please install scanpy: `pip install scanpy`")
|
|
18
|
+
|
|
19
|
+
bool_var_df = parent.dataset.adata.var.select_dtypes(include=bool)
|
|
20
|
+
mask_var_options = {
|
|
21
|
+
"": None,
|
|
22
|
+
**{col: col for col in bool_var_df.columns},
|
|
23
|
+
}
|
|
24
|
+
variable_controls = dict(
|
|
25
|
+
n_comps=IntText(value=30, description="N comps."),
|
|
26
|
+
mask_var=Dropdown(options=mask_var_options, description="Genes mask"),
|
|
27
|
+
**parent.make_selectbatch_drowpdown(description="Reference Batch"),
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
super().__init__(
|
|
31
|
+
parent=parent,
|
|
32
|
+
name="pca",
|
|
33
|
+
description="PCA",
|
|
34
|
+
fixed_params={},
|
|
35
|
+
variable_controls=variable_controls,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def make_controls(self):
|
|
39
|
+
self.plot_variance_ratio_button = Button(
|
|
40
|
+
description="Plot Variance Ratio",
|
|
41
|
+
layout={"width": "auto"},
|
|
42
|
+
button_style="info",
|
|
43
|
+
disabled=True,
|
|
44
|
+
)
|
|
45
|
+
self.plot_variance_ratio_button.on_click(self.plot_variance_ratio_callback)
|
|
46
|
+
|
|
47
|
+
self.controls_list = [
|
|
48
|
+
*self.variable_controls.values(),
|
|
49
|
+
self.run_button,
|
|
50
|
+
self.plot_variance_ratio_button,
|
|
51
|
+
self.output,
|
|
52
|
+
]
|
|
53
|
+
return super().make_controls()
|
|
54
|
+
|
|
55
|
+
def function(
|
|
56
|
+
self,
|
|
57
|
+
n_comps: int = 30,
|
|
58
|
+
mask_var: str | None = None,
|
|
59
|
+
reference_batch: str | None = None,
|
|
60
|
+
):
|
|
61
|
+
import scanpy as sc
|
|
62
|
+
|
|
63
|
+
adata = self.parent.dataset.adata
|
|
64
|
+
counts_layer = self.parent.dataset.counts_layer
|
|
65
|
+
|
|
66
|
+
if reference_batch:
|
|
67
|
+
obs_mask = adata.obs[self.parent.batch_key] == reference_batch
|
|
68
|
+
adata_ref = adata[obs_mask].copy()
|
|
69
|
+
if mask_var == "highly_variable":
|
|
70
|
+
sc.pp.highly_variable_genes(
|
|
71
|
+
adata_ref, layer=f"{counts_layer}_log1p", flavor="seurat"
|
|
72
|
+
)
|
|
73
|
+
hvg_seurat = adata_ref.var["highly_variable"]
|
|
74
|
+
sc.pp.highly_variable_genes(
|
|
75
|
+
adata_ref,
|
|
76
|
+
layer=counts_layer,
|
|
77
|
+
flavor="seurat_v3_paper",
|
|
78
|
+
n_top_genes=hvg_seurat.sum(),
|
|
79
|
+
)
|
|
80
|
+
hvg_seurat_v3 = adata_ref.var["highly_variable"]
|
|
81
|
+
adata_ref.var["highly_variable"] = hvg_seurat | hvg_seurat_v3
|
|
82
|
+
sc.pp.pca(
|
|
83
|
+
adata_ref, n_comps=n_comps, mask_var=mask_var, svd_solver="arpack"
|
|
84
|
+
)
|
|
85
|
+
uns_pca = adata_ref.uns["pca"]
|
|
86
|
+
uns_pca["reference_batch"] = reference_batch
|
|
87
|
+
PCs = adata_ref.varm["PCs"]
|
|
88
|
+
X_pca: np.ndarray = adata.X.dot(PCs)
|
|
89
|
+
X_pca = X_pca - X_pca.mean(axis=0, keepdims=True)
|
|
90
|
+
adata.obsm["X_pca"] = X_pca
|
|
91
|
+
adata.uns["pca"] = uns_pca
|
|
92
|
+
adata.varm["PCs"] = PCs
|
|
93
|
+
else:
|
|
94
|
+
sc.pp.pca(adata, n_comps=n_comps, mask_var=mask_var, svd_solver="arpack")
|
|
95
|
+
|
|
96
|
+
self.plot_variance_ratio_button.disabled = False
|
|
97
|
+
self.broker.publish(
|
|
98
|
+
"dset_data_dict_change", self.parent.dataset.data_dict, "X_pca"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
def plot_variance_ratio_callback(self, *args, **kwargs):
|
|
102
|
+
adata = self.parent.dataset.adata
|
|
103
|
+
ncomps = self.variable_controls["n_comps"].value
|
|
104
|
+
|
|
105
|
+
df = pd.DataFrame(
|
|
106
|
+
{k: adata.uns["pca"][k] for k in ["variance", "variance_ratio"]},
|
|
107
|
+
index=pd.Index(range(ncomps), name="component") + 1,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
fig = px.scatter(df, y="variance_ratio")
|
|
111
|
+
self.broker.publish("dplt_plot_figure_request", figure=fig)
|
|
112
|
+
|
|
113
|
+
def dset_var_dataframe_change_callback(self, *args, **kwargs):
|
|
114
|
+
var_df = self.parent.dataset.adata.var
|
|
115
|
+
df = var_df.select_dtypes(include=["bool"])
|
|
116
|
+
options = {"": None, **{c: c for c in df.columns}}
|
|
117
|
+
|
|
118
|
+
control: Dropdown = self.variable_controls["mask_var"]
|
|
119
|
+
current_value = control.value
|
|
120
|
+
control.options = options
|
|
121
|
+
if current_value not in control.options:
|
|
122
|
+
control.value = None
|
|
123
|
+
else:
|
|
124
|
+
control.value = current_value
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from ipywidgets import Checkbox, Dropdown
|
|
3
|
+
from tqdm.auto import tqdm
|
|
4
|
+
|
|
5
|
+
from sclab.dataset.processor import Processor
|
|
6
|
+
from sclab.dataset.processor.step import ProcessorStepBase
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Preprocess(ProcessorStepBase):
|
|
10
|
+
parent: Processor
|
|
11
|
+
|
|
12
|
+
def __init__(self, parent: Processor) -> None:
|
|
13
|
+
try:
|
|
14
|
+
import scanpy as sc # noqa: F401
|
|
15
|
+
except ImportError:
|
|
16
|
+
raise ImportError("Please install scanpy: `pip install scanpy`")
|
|
17
|
+
|
|
18
|
+
cat_metadata = parent.dataset._metadata.select_dtypes(
|
|
19
|
+
include=["object", "category"]
|
|
20
|
+
)
|
|
21
|
+
cat_options = {"": None, **{c: c for c in cat_metadata.columns}}
|
|
22
|
+
|
|
23
|
+
variable_controls = dict(
|
|
24
|
+
layer=Dropdown(
|
|
25
|
+
options=tuple(parent.dataset.adata.layers.keys()),
|
|
26
|
+
value="counts",
|
|
27
|
+
description="Layer",
|
|
28
|
+
),
|
|
29
|
+
group_by=Dropdown(
|
|
30
|
+
options=cat_options,
|
|
31
|
+
value=None,
|
|
32
|
+
description="GroupBy",
|
|
33
|
+
),
|
|
34
|
+
regress_total_counts=Checkbox(description="Regr. out total counts"),
|
|
35
|
+
regress_n_genes=Checkbox(description="Regr. out n genes"),
|
|
36
|
+
normalize_total=Checkbox(value=True, description="Normalize total"),
|
|
37
|
+
log1p=Checkbox(value=True, description="Log1p"),
|
|
38
|
+
scale=Checkbox(value=True, description="Scale"),
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def filter_layers(change: dict):
|
|
42
|
+
new_options: tuple[str] = change["new"]
|
|
43
|
+
if any(s.endswith("log1p") for s in new_options):
|
|
44
|
+
new_options = tuple(
|
|
45
|
+
filter(lambda y: not y.endswith("log1p"), new_options)
|
|
46
|
+
)
|
|
47
|
+
variable_controls["layer"].options = new_options
|
|
48
|
+
|
|
49
|
+
variable_controls["layer"].observe(filter_layers, "options", "change")
|
|
50
|
+
|
|
51
|
+
super().__init__(
|
|
52
|
+
parent=parent,
|
|
53
|
+
name="preprocess",
|
|
54
|
+
description="Preprocess",
|
|
55
|
+
fixed_params={},
|
|
56
|
+
variable_controls=variable_controls,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
def function(
|
|
60
|
+
self,
|
|
61
|
+
layer: str | None = None,
|
|
62
|
+
group_by: str | None = None,
|
|
63
|
+
regress_total_counts: bool = False,
|
|
64
|
+
regress_n_genes: bool = False,
|
|
65
|
+
normalize_total: bool = False,
|
|
66
|
+
log1p: bool = True,
|
|
67
|
+
scale: bool = True,
|
|
68
|
+
):
|
|
69
|
+
import scanpy as sc
|
|
70
|
+
|
|
71
|
+
self.output.clear_output(wait=True)
|
|
72
|
+
with self.output:
|
|
73
|
+
pbar = tqdm(total=100, bar_format="{percentage:3.0f}%|{bar}|")
|
|
74
|
+
|
|
75
|
+
dataset = self.parent.dataset
|
|
76
|
+
adata = dataset.adata
|
|
77
|
+
if layer is None:
|
|
78
|
+
layer = dataset.counts_layer
|
|
79
|
+
|
|
80
|
+
if f"{layer}_log1p" not in adata.layers:
|
|
81
|
+
adata.layers[f"{layer}_log1p"] = sc.pp.log1p(adata.layers[layer].copy())
|
|
82
|
+
pbar.update(10)
|
|
83
|
+
|
|
84
|
+
if layer != "X":
|
|
85
|
+
adata.X = adata.layers[layer].copy()
|
|
86
|
+
start_n_cells_i, start_n_genes = adata.shape
|
|
87
|
+
|
|
88
|
+
sc.pp.calculate_qc_metrics(
|
|
89
|
+
adata,
|
|
90
|
+
percent_top=None,
|
|
91
|
+
log1p=False,
|
|
92
|
+
inplace=True,
|
|
93
|
+
)
|
|
94
|
+
sc.pp.filter_cells(adata, min_genes=5)
|
|
95
|
+
sc.pp.filter_genes(adata, min_cells=5)
|
|
96
|
+
pbar.update(10)
|
|
97
|
+
|
|
98
|
+
sc.pp.calculate_qc_metrics(
|
|
99
|
+
adata,
|
|
100
|
+
percent_top=None,
|
|
101
|
+
log1p=False,
|
|
102
|
+
inplace=True,
|
|
103
|
+
)
|
|
104
|
+
pbar.update(10)
|
|
105
|
+
|
|
106
|
+
sc.pp.highly_variable_genes(
|
|
107
|
+
adata,
|
|
108
|
+
layer=f"{layer}_log1p",
|
|
109
|
+
flavor="seurat",
|
|
110
|
+
batch_key=group_by,
|
|
111
|
+
)
|
|
112
|
+
hvg_seurat = adata.var["highly_variable"]
|
|
113
|
+
sc.pp.highly_variable_genes(
|
|
114
|
+
adata,
|
|
115
|
+
layer=layer,
|
|
116
|
+
flavor="seurat_v3_paper",
|
|
117
|
+
batch_key=group_by,
|
|
118
|
+
n_top_genes=hvg_seurat.sum(),
|
|
119
|
+
)
|
|
120
|
+
hvg_seurat_v3 = adata.var["highly_variable"]
|
|
121
|
+
|
|
122
|
+
adata.var["highly_variable"] = hvg_seurat | hvg_seurat_v3
|
|
123
|
+
adata.var["highly_variable_seurat"] = hvg_seurat
|
|
124
|
+
adata.var["highly_variable_seurat_v3"] = hvg_seurat_v3
|
|
125
|
+
pbar.update(10)
|
|
126
|
+
pbar.update(10)
|
|
127
|
+
|
|
128
|
+
new_layer = layer
|
|
129
|
+
if normalize_total:
|
|
130
|
+
new_layer += "_normt"
|
|
131
|
+
sc.pp.normalize_total(adata, target_sum=1e4)
|
|
132
|
+
adata.layers[new_layer] = adata.X.copy()
|
|
133
|
+
|
|
134
|
+
pbar.update(10)
|
|
135
|
+
pbar.update(10)
|
|
136
|
+
|
|
137
|
+
if log1p:
|
|
138
|
+
new_layer += "_log1p"
|
|
139
|
+
adata.uns.pop("log1p", None)
|
|
140
|
+
sc.pp.log1p(adata)
|
|
141
|
+
adata.layers[new_layer] = adata.X.copy()
|
|
142
|
+
pbar.update(10)
|
|
143
|
+
|
|
144
|
+
vars_to_regress = []
|
|
145
|
+
if regress_n_genes:
|
|
146
|
+
vars_to_regress.append("n_genes_by_counts")
|
|
147
|
+
|
|
148
|
+
if regress_total_counts and log1p:
|
|
149
|
+
adata.obs["log1p_total_counts"] = np.log1p(adata.obs["total_counts"])
|
|
150
|
+
vars_to_regress.append("log1p_total_counts")
|
|
151
|
+
elif regress_total_counts:
|
|
152
|
+
vars_to_regress.append("total_counts")
|
|
153
|
+
|
|
154
|
+
if vars_to_regress:
|
|
155
|
+
new_layer += "_regr"
|
|
156
|
+
sc.pp.regress_out(adata, keys=vars_to_regress, n_jobs=1)
|
|
157
|
+
adata.layers[new_layer] = adata.X.copy()
|
|
158
|
+
pbar.update(10)
|
|
159
|
+
|
|
160
|
+
if scale:
|
|
161
|
+
new_layer += "_scale"
|
|
162
|
+
sc.pp.scale(adata, zero_center=False)
|
|
163
|
+
adata.layers[new_layer] = adata.X.copy()
|
|
164
|
+
|
|
165
|
+
pbar.update(10)
|
|
166
|
+
|
|
167
|
+
self.broker.publish("dset_metadata_change", dataset.metadata)
|
|
168
|
+
self.broker.publish("dset_data_dict_change", dataset.data_dict, "metadata")
|
|
169
|
+
self.broker.publish("dset_anndata_layers_change", dataset.adata.layers.keys())
|
|
170
|
+
self.broker.publish(
|
|
171
|
+
"ctrl_value_change_request",
|
|
172
|
+
data_key="metadata",
|
|
173
|
+
selected_axes_1="n_genes_by_counts",
|
|
174
|
+
selected_axes_2="total_counts",
|
|
175
|
+
)
|
|
176
|
+
self.broker.publish("dset_var_dataframe_change", dataset.adata.var)
|
|
177
|
+
|
|
178
|
+
end_n_cells_i, end_n_genes = adata.shape
|
|
179
|
+
if start_n_cells_i != end_n_cells_i:
|
|
180
|
+
self.broker.publish("dset_total_rows_change", dataset.metadata)
|
|
181
|
+
if start_n_genes != end_n_genes:
|
|
182
|
+
self.broker.publish("dset_total_vars_change", adata.var)
|
|
183
|
+
|
|
184
|
+
adata.X = adata.X.astype(np.float32)
|
|
185
|
+
|
|
186
|
+
pbar.close()
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
from ipywidgets import Dropdown, IntText
|
|
2
|
+
|
|
3
|
+
from sclab.dataset.processor import Processor
|
|
4
|
+
from sclab.dataset.processor.step import ProcessorStepBase
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class QC(ProcessorStepBase):
|
|
8
|
+
parent: Processor
|
|
9
|
+
|
|
10
|
+
def __init__(self, parent: Processor) -> None:
|
|
11
|
+
try:
|
|
12
|
+
import scanpy as sc # noqa: F401
|
|
13
|
+
except ImportError:
|
|
14
|
+
raise ImportError("Please install scanpy: `pip install scanpy`")
|
|
15
|
+
|
|
16
|
+
variable_controls = dict(
|
|
17
|
+
layer=Dropdown(
|
|
18
|
+
options=tuple(parent.dataset.adata.layers.keys()),
|
|
19
|
+
value="counts",
|
|
20
|
+
description="Layer",
|
|
21
|
+
),
|
|
22
|
+
min_genes=IntText(value=5, description="Min. Genes"),
|
|
23
|
+
min_cells=IntText(value=0, description="Min. Cells"),
|
|
24
|
+
max_rank=IntText(value=0, description="Max. Rank"),
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
def filter_layers(change: dict):
|
|
28
|
+
new_options: tuple[str] = change["new"]
|
|
29
|
+
if any(s.endswith("log1p") for s in new_options):
|
|
30
|
+
new_options = tuple(
|
|
31
|
+
filter(lambda y: not y.endswith("log1p"), new_options)
|
|
32
|
+
)
|
|
33
|
+
variable_controls["layer"].options = new_options
|
|
34
|
+
|
|
35
|
+
variable_controls["layer"].observe(filter_layers, "options", "change")
|
|
36
|
+
|
|
37
|
+
super().__init__(
|
|
38
|
+
parent=parent,
|
|
39
|
+
name="qc",
|
|
40
|
+
description="QC",
|
|
41
|
+
fixed_params={},
|
|
42
|
+
variable_controls=variable_controls,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
def compute_qc_metrics(
|
|
46
|
+
self,
|
|
47
|
+
layer: str | None = None,
|
|
48
|
+
min_genes: int = 5,
|
|
49
|
+
min_cells: int = 5,
|
|
50
|
+
):
|
|
51
|
+
import scanpy as sc
|
|
52
|
+
|
|
53
|
+
if layer is None:
|
|
54
|
+
layer = self.parent.dataset.counts_layer
|
|
55
|
+
|
|
56
|
+
dataset = self.parent.dataset
|
|
57
|
+
adata = dataset.adata
|
|
58
|
+
|
|
59
|
+
adata.layers["qc_tmp_current_X"] = adata.X
|
|
60
|
+
adata.X = adata.layers[layer].copy()
|
|
61
|
+
sc.pp.calculate_qc_metrics(adata, percent_top=None, log1p=False, inplace=True)
|
|
62
|
+
|
|
63
|
+
sc.pp.filter_cells(adata, min_genes=min_genes)
|
|
64
|
+
sc.pp.filter_genes(adata, min_cells=min_cells)
|
|
65
|
+
sc.pp.calculate_qc_metrics(adata, percent_top=None, log1p=False, inplace=True)
|
|
66
|
+
adata.obs["barcode_rank"] = adata.obs["total_counts"].rank(ascending=False)
|
|
67
|
+
|
|
68
|
+
# Restore original X
|
|
69
|
+
adata.X = adata.layers.pop("qc_tmp_current_X")
|
|
70
|
+
|
|
71
|
+
def function(
|
|
72
|
+
self,
|
|
73
|
+
layer: str | None = None,
|
|
74
|
+
min_genes: int = 5,
|
|
75
|
+
min_cells: int = 5,
|
|
76
|
+
max_rank: int = 0,
|
|
77
|
+
):
|
|
78
|
+
self.compute_qc_metrics(layer, min_genes, min_cells)
|
|
79
|
+
|
|
80
|
+
if max_rank > 0:
|
|
81
|
+
series = self.parent.dataset.adata.obs["barcode_rank"]
|
|
82
|
+
index = series.loc[series < max_rank].index
|
|
83
|
+
self.parent.dataset.filter_rows(index)
|
|
84
|
+
|
|
85
|
+
self.broker.publish("dset_metadata_change", self.parent.dataset.metadata)
|
|
86
|
+
self.broker.publish(
|
|
87
|
+
"ctrl_value_change_request",
|
|
88
|
+
data_key="metadata",
|
|
89
|
+
selected_axes_1="barcode_rank",
|
|
90
|
+
selected_axes_2="total_counts",
|
|
91
|
+
log_axes_2=True,
|
|
92
|
+
)
|
|
93
|
+
self.broker.publish("dset_var_dataframe_change", self.parent.dataset.adata.var)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from ipywidgets import Checkbox, IntText
|
|
3
|
+
|
|
4
|
+
from sclab.dataset.processor import Processor
|
|
5
|
+
from sclab.dataset.processor.step import ProcessorStepBase
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class UMAP(ProcessorStepBase):
|
|
9
|
+
parent: Processor
|
|
10
|
+
|
|
11
|
+
def __init__(self, parent: Processor) -> None:
|
|
12
|
+
try:
|
|
13
|
+
import scanpy as sc # noqa: F401
|
|
14
|
+
except ImportError:
|
|
15
|
+
raise ImportError("Please install scanpy: `pip install scanpy`")
|
|
16
|
+
|
|
17
|
+
variable_controls = dict(
|
|
18
|
+
n_components=IntText(value=2, description="N comp."),
|
|
19
|
+
zero_center=Checkbox(value=False, description="Zero center"),
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
super().__init__(
|
|
23
|
+
parent=parent,
|
|
24
|
+
name="umap",
|
|
25
|
+
description="UMAP",
|
|
26
|
+
fixed_params={},
|
|
27
|
+
variable_controls=variable_controls,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
def function(
|
|
31
|
+
self,
|
|
32
|
+
n_components: int = 2,
|
|
33
|
+
zero_center: bool = True,
|
|
34
|
+
):
|
|
35
|
+
import scanpy as sc
|
|
36
|
+
|
|
37
|
+
dataset = self.parent.dataset
|
|
38
|
+
adata = self.parent.dataset.adata
|
|
39
|
+
|
|
40
|
+
sc.tl.umap(adata, n_components=n_components)
|
|
41
|
+
X: np.ndarray = adata.obsm.pop("X_umap")
|
|
42
|
+
if zero_center:
|
|
43
|
+
X = X - X.mean(axis=0, keepdims=True)
|
|
44
|
+
|
|
45
|
+
key = f"X_{n_components}Dumap"
|
|
46
|
+
adata.obsm[key] = X
|
|
47
|
+
|
|
48
|
+
self.broker.publish("dset_data_dict_change", dataset.data_dict, key)
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: sclab
|
|
3
|
+
Version: 0.1.7
|
|
4
|
+
Summary: sclab
|
|
5
|
+
Author-email: Argenis Arriojas <ArriojasMaldonado001@umb.edu>
|
|
6
|
+
Requires-Python: >=3.10,<3.13
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Classifier: License :: OSI Approved :: BSD License
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Requires-Dist: anndata
|
|
14
|
+
Requires-Dist: anywidget
|
|
15
|
+
Requires-Dist: ipywidgets
|
|
16
|
+
Requires-Dist: itables
|
|
17
|
+
Requires-Dist: numpy<2.2
|
|
18
|
+
Requires-Dist: pandas
|
|
19
|
+
Requires-Dist: plotly
|
|
20
|
+
Requires-Dist: scanpy
|
|
21
|
+
Requires-Dist: scikit-learn
|
|
22
|
+
Requires-Dist: scikit-misc
|
|
23
|
+
Requires-Dist: svgpathtools
|
|
24
|
+
Requires-Dist: pytest>=8.3.4 ; extra == "test"
|
|
25
|
+
Project-URL: Bug Tracker, https://github.com/umbibio/sclab/issues
|
|
26
|
+
Project-URL: Changelog, https://github.com/umbibio/sclab/blob/main/CHANGELOG.md
|
|
27
|
+
Project-URL: Documentation, https://github.com/umbibio/sclab/docs
|
|
28
|
+
Project-URL: Homepage, https://github.com/umbibio/sclab
|
|
29
|
+
Project-URL: Repository, https://github.com/umbibio/sclab.git
|
|
30
|
+
Provides-Extra: test
|
|
31
|
+
|
|
32
|
+
# SCLab
|
|
33
|
+
|
|
34
|
+
SCLab is an interactive single-cell analysis toolkit that provides a seamless interface for analyzing and visualizing single-cell RNA sequencing data. Built on top of popular tools like scanpy and AnnData, SCLab offers an event-driven architecture for real-time updates and interactive visualizations.
|
|
35
|
+
|
|
36
|
+
## Features
|
|
37
|
+
|
|
38
|
+
- **Interactive Data Analysis**: Built-in dashboard with real-time updates
|
|
39
|
+
- **Quality Control**: Comprehensive QC metrics and filtering capabilities
|
|
40
|
+
- **Preprocessing**: Normalization, log transformation, and scaling with progress tracking
|
|
41
|
+
- **Dimensionality Reduction**: PCA with batch effect correction support
|
|
42
|
+
- **Visualization**: Interactive plots and tables using plotly and itables
|
|
43
|
+
- **Event System**: Robust event-driven architecture for real-time updates
|
|
44
|
+
|
|
45
|
+
## Installation
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install sclab
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Quick Start
|
|
52
|
+
|
|
53
|
+
Open a Jupyter Notebook and run the following:
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
from sclab import SCLabDashboard
|
|
57
|
+
import scanpy as sc
|
|
58
|
+
from IPython.display import display
|
|
59
|
+
|
|
60
|
+
# Load your data
|
|
61
|
+
adata = sc.read_10x_h5("your_data.h5")
|
|
62
|
+
|
|
63
|
+
# Create dashboard
|
|
64
|
+
dashboard = SCLabDashboard(adata, name="My Analysis")
|
|
65
|
+
|
|
66
|
+
# Display dashboard
|
|
67
|
+
display(dashboard)
|
|
68
|
+
|
|
69
|
+
# The dashboard provides easy access to components:
|
|
70
|
+
dashboard.ds # Dataset (wrapper for AnnData)
|
|
71
|
+
dashboard.pl # Plotter
|
|
72
|
+
dashboard.pr # Processor
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Components
|
|
76
|
+
|
|
77
|
+
### SCLabDashboard
|
|
78
|
+
|
|
79
|
+
The main interface that integrates all components with a tabbed layout:
|
|
80
|
+
- Main graph for visualizations
|
|
81
|
+
- Observations table
|
|
82
|
+
- Genes table
|
|
83
|
+
- Event logs
|
|
84
|
+
|
|
85
|
+
### Dataset
|
|
86
|
+
|
|
87
|
+
Handles data management with:
|
|
88
|
+
- AnnData integration
|
|
89
|
+
- Interactive tables
|
|
90
|
+
- Row selection and filtering
|
|
91
|
+
- Metadata handling
|
|
92
|
+
|
|
93
|
+
### Processor
|
|
94
|
+
|
|
95
|
+
Handles data processing steps. It is configurable with custom steps implementing the `ProcessorStepBase` interface. This package provides multiple examples of steps:
|
|
96
|
+
|
|
97
|
+
- QC
|
|
98
|
+
- Preprocessing
|
|
99
|
+
- PCA
|
|
100
|
+
- Nearest Neighbors
|
|
101
|
+
- UMAP
|
|
102
|
+
- Clustering
|
|
103
|
+
|
|
104
|
+
### Plotter
|
|
105
|
+
|
|
106
|
+
Provides interactive visualizations with:
|
|
107
|
+
- Real-time updates
|
|
108
|
+
- Customizable plots
|
|
109
|
+
- Batch effect visualization
|
|
110
|
+
- Export capabilities
|
|
111
|
+
|
|
112
|
+
## Requirements
|
|
113
|
+
|
|
114
|
+
- Python ≥ 3.12
|
|
115
|
+
- anndata ≥ 0.11.3
|
|
116
|
+
- scanpy ≥ 1.10.4
|
|
117
|
+
- Other dependencies listed in pyproject.toml
|
|
118
|
+
|
|
119
|
+
## Contributing
|
|
120
|
+
|
|
121
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
122
|
+
|
|
123
|
+
## License
|
|
124
|
+
|
|
125
|
+
This project is licensed under the BSD 3-Clause License - see the [LICENSE](LICENSE) file for details.
|
|
126
|
+
|
|
127
|
+
## Citation
|
|
128
|
+
|
|
129
|
+
If you use SCLab in your research, please cite:
|
|
130
|
+
|
|
131
|
+
```bibtex
|
|
132
|
+
@software{sclab2024,
|
|
133
|
+
author = {Arriojas, Argenis},
|
|
134
|
+
title = {SCLab: Interactive Single-Cell Analysis Toolkit},
|
|
135
|
+
year = {2024},
|
|
136
|
+
publisher = {GitHub},
|
|
137
|
+
url = {https://github.com/umbibio/sclab}
|
|
138
|
+
}
|
|
139
|
+
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
sclab/__init__.py,sha256=0RUdbuZt9XoQgNmLnMCR4-LYIXnwXUgNyuLEL1Gu9Ms,95
|
|
2
|
+
sclab/_io.py,sha256=mVazSViXfKpfY1iUNZXgQc1hutu6MXTaGZXF4rcqKLE,875
|
|
3
|
+
sclab/_sclab.py,sha256=Eln0nWlnFpnDabW94UK8qOdxIys9FWSyqsm8lvlnk4A,2156
|
|
4
|
+
sclab/dataset/__init__.py,sha256=f9PoXIMAPnC3Var1ODr3mXkotW6u6NRPQvlgcWYXk54,143
|
|
5
|
+
sclab/dataset/_dataset.py,sha256=51E8iwiUFxFBvka8EsCuZrc-nfNxHcB4iEAy_Ja5qPg,14409
|
|
6
|
+
sclab/dataset/_exceptions.py,sha256=g8RJL8PiRmD4--PkOs5CZth_qeaduvieMlKJNkrUIYA,45
|
|
7
|
+
sclab/dataset/plotter/__init__.py,sha256=pdWhAywnoc1_nnIcD-qNW0U364-yo4k5DwP79y9NVgA,129
|
|
8
|
+
sclab/dataset/plotter/_controls.py,sha256=NBDdtY1wQbIEvE_WToigR5bEEoi3z30_67T_dLSOSIo,20295
|
|
9
|
+
sclab/dataset/plotter/_plotter.py,sha256=EXwk6KSM7FOyLLJ8HCgIB_2m730t0kHsVKWfQOxL6kA,37023
|
|
10
|
+
sclab/dataset/plotter/_utils.py,sha256=ANm_R9PJd33-QJtZzqXCN5pJ_XrJ7AA4AvSHaDRbOMA,11595
|
|
11
|
+
sclab/dataset/processor/__init__.py,sha256=v8Qbusb6h8oSndv9q-12vzHrz5BDF3N2LCkQG9KC19I,104
|
|
12
|
+
sclab/dataset/processor/_processor.py,sha256=bvDdjcZDD7ighP-wvmebh4Tuc_-HyFT9KP8bFOB4POY,40811
|
|
13
|
+
sclab/dataset/processor/step/__init__.py,sha256=j8j4oU9NMdWHn6kVjft7Klm6xme8M6wzebxJj_zNehg,179
|
|
14
|
+
sclab/dataset/processor/step/_basic_processor_step.py,sha256=7NKC4W-I_EU3QBPi5BL9-NwAF_h6_oWl1l_kEk6gSAg,3640
|
|
15
|
+
sclab/dataset/processor/step/_processor_step_base.py,sha256=Yk8nCDu4atk-txJcEEC6kravzP6FDBjcrdh6kNyZjdM,3909
|
|
16
|
+
sclab/event/__init__.py,sha256=unkfpmoxitPVZvWcc9YhkUwcXODdcd9cw_toVXVIDMU,126
|
|
17
|
+
sclab/event/_broker.py,sha256=W4GgoBhJS1LxGQv8TYFg71o5jUQPO8BDLrKzahDUyEM,7121
|
|
18
|
+
sclab/event/_client.py,sha256=Kx7pza-CzLySR0JBBneiZQAjtllUnjWKNKSfw3K2ZQE,2362
|
|
19
|
+
sclab/event/_utils.py,sha256=LehiFhn7dopNEaTFERJhs6fiVgXCBvseRxWTSWc6u-k,456
|
|
20
|
+
sclab/examples/__init__.py,sha256=uSu4DMfF2K7xlZbLC_CmANyS3D56khGLMSVt6x9XXiI,68
|
|
21
|
+
sclab/examples/processor_steps/__init__.py,sha256=tm2YWKIkBPcBoHp4VS78tLyQX44Y7zUj2dgzFH058DA,279
|
|
22
|
+
sclab/examples/processor_steps/_cluster.py,sha256=fXI8J_ae8ZXjckmCff3WfwSa9HTdQmEDYQ3vTilmV9k,1124
|
|
23
|
+
sclab/examples/processor_steps/_neighbors.py,sha256=-FxRBi5Uaywrn2fXnlgmwtQrTefPPdnfOuI6RhKw314,2210
|
|
24
|
+
sclab/examples/processor_steps/_pca.py,sha256=XKEau9MtHr0LBDFTc9Y3QUQRv5fvNyKT8KeFujepypc,4589
|
|
25
|
+
sclab/examples/processor_steps/_preprocess.py,sha256=ftuWt9asef_a5OU195UazfI34MjA82nE1LWTPjhXPPY,6340
|
|
26
|
+
sclab/examples/processor_steps/_qc.py,sha256=VPiyY7o7YM8v6hetjxrSfnrn_fJhuC5EzeMi1bSUsw0,3242
|
|
27
|
+
sclab/examples/processor_steps/_umap.py,sha256=Geyme_biIHxGE7zD8dsECUfQy31ff-w1BrAN55BW5Ug,1406
|
|
28
|
+
sclab-0.1.7.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
|
|
29
|
+
sclab-0.1.7.dist-info/METADATA,sha256=UTX_EmyQXRn0F2r69fnTKwju9H3PnMExTN08LuTfCkI,3767
|
|
30
|
+
sclab-0.1.7.dist-info/RECORD,,
|