sclab 0.1.7__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sclab might be problematic. Click here for more details.

@@ -0,0 +1,80 @@
1
+ from ipywidgets import GridBox, Layout, Stack, ToggleButtons, link
2
+
3
+ from sclab.event import EventBroker, EventClient
4
+
5
+
6
+ class _Results:
7
+ namespace: str
8
+
9
+
10
+ class ResultsPanel(GridBox, EventClient):
11
+ available_results: ToggleButtons
12
+ results_stack: Stack
13
+
14
+ events: list[str] = [
15
+ # "rslt_add_result",
16
+ # "rslt_remove_result",
17
+ ]
18
+
19
+ def __init__(
20
+ self,
21
+ broker: EventBroker,
22
+ ):
23
+ EventClient.__init__(self, broker)
24
+
25
+ self.available_results = ToggleButtons(options={})
26
+ self.results_stack = Stack([])
27
+
28
+ link(
29
+ (self.available_results, "value"),
30
+ (self.results_stack, "selected_index"),
31
+ )
32
+
33
+ GridBox.__init__(
34
+ self,
35
+ [self.available_results, self.results_stack],
36
+ layout=Layout(
37
+ width="100%",
38
+ grid_template_columns="150px auto",
39
+ grid_template_areas=""" "available-results selected-results_stack" """,
40
+ border="0px solid black",
41
+ ),
42
+ )
43
+
44
+ def add_result(self, results: _Results):
45
+ current_stack = list(self.results_stack.children)
46
+ namespace = results.namespace
47
+
48
+ options: dict[str, int] = self.available_results.options
49
+ options = options.copy()
50
+ idx = options.get(namespace, len(options))
51
+ options[namespace] = idx
52
+
53
+ if len(current_stack) < idx + 1:
54
+ current_stack.append(results)
55
+ else:
56
+ current_stack[idx] = results
57
+
58
+ self.results_stack.children = tuple(current_stack)
59
+ self.available_results.options = options
60
+
61
+ def remove_result(self, name: str):
62
+ options: dict[str, int] = self.available_results.options
63
+ options = options.copy()
64
+ idx = options.pop(name)
65
+
66
+ current_stack = list(self.results_stack.children)
67
+ current_stack.pop(idx)
68
+
69
+ current_selection = self.results_stack.selected_index
70
+ if (
71
+ current_selection is not None
72
+ and current_selection > 0
73
+ and current_selection == idx
74
+ ):
75
+ idx = current_selection - 1
76
+ self.results_stack.selected_index = idx
77
+
78
+ self.results_stack.children = tuple(current_stack)
79
+ self.available_results.options = options
80
+ self.available_results.value = idx
@@ -8,33 +8,36 @@ from ipywidgets.widgets.widget_description import DescriptionWidget
8
8
 
9
9
  from ....event import EventClient
10
10
  from .._processor import Processor
11
+ from .._results_panel import _Results
11
12
 
12
13
 
13
14
  class ProcessorStepBase(EventClient):
14
15
  events: list[str] = None
15
16
  parent: Processor
16
- name: str
17
- description: str
17
+ name: str = None
18
+ description: str = None
18
19
  fixed_params: dict[str, Any]
19
20
  variable_controls: dict[str, DescriptionWidget | ValueWidget]
20
21
  output: Output
21
22
  run_button: Button
22
23
  controls_list: list[DescriptionWidget | ValueWidget | Button]
23
24
  controls: VBox
25
+ results: _Results | None
26
+ order: int = 1000
24
27
 
25
28
  run_button_description = "Run"
26
29
 
27
30
  def __init__(
28
31
  self,
29
32
  parent: Processor,
30
- name: str,
31
- description: str,
32
33
  fixed_params: dict[str, Any],
33
34
  variable_controls: dict[str, DescriptionWidget | ValueWidget],
35
+ results: _Results | None = None,
34
36
  ):
37
+ assert self.name
38
+ assert self.description
39
+
35
40
  self.parent = parent
36
- self.name = name
37
- self.description = description
38
41
  self.fixed_params = fixed_params
39
42
  self.variable_controls = variable_controls
40
43
 
@@ -56,6 +59,9 @@ class ProcessorStepBase(EventClient):
56
59
  ]
57
60
  self.make_controls()
58
61
 
62
+ if results is not None:
63
+ self.results = results
64
+ parent.results_panel.add_result(self.results)
59
65
  super().__init__(parent.broker)
60
66
 
61
67
  def make_controls(self):
@@ -1,4 +1,5 @@
1
1
  from ._cluster import Cluster
2
+ from ._differential_expression import DifferentialExpression
2
3
  from ._neighbors import Neighbors
3
4
  from ._pca import PCA
4
5
  from ._preprocess import Preprocess
@@ -12,4 +13,5 @@ __all__ = [
12
13
  "Neighbors",
13
14
  "UMAP",
14
15
  "Cluster",
16
+ "DifferentialExpression",
15
17
  ]
@@ -6,6 +6,8 @@ from sclab.dataset.processor.step import ProcessorStepBase
6
6
 
7
7
  class Cluster(ProcessorStepBase):
8
8
  parent: Processor
9
+ name: str = "cluster"
10
+ description: str = "Cluster"
9
11
 
10
12
  def __init__(self, parent: Processor) -> None:
11
13
  try:
@@ -21,8 +23,6 @@ class Cluster(ProcessorStepBase):
21
23
 
22
24
  super().__init__(
23
25
  parent=parent,
24
- name="cluster",
25
- description="Cluster",
26
26
  fixed_params={},
27
27
  variable_controls=variable_controls,
28
28
  )
@@ -0,0 +1,328 @@
1
+ from typing import Any, Iterable, Literal
2
+
3
+ import itables
4
+ import numpy as np
5
+ import pandas as pd
6
+ from anndata import AnnData
7
+ from IPython.display import Markdown, display
8
+ from ipywidgets import Dropdown, Output, SelectMultiple, Text, ToggleButtons
9
+ from ipywidgets.widgets.valuewidget import ValueWidget
10
+ from ipywidgets.widgets.widget_box import VBox
11
+ from ipywidgets.widgets.widget_description import DescriptionWidget
12
+ from packaging.version import Version
13
+
14
+ from sclab.dataset import SCLabDataset
15
+ from sclab.dataset.processor import Processor
16
+ from sclab.dataset.processor.step import ProcessorStepBase
17
+
18
+
19
+ class DifferentialExpressionResults(VBox):
20
+ dataset: SCLabDataset
21
+ result_selector: Dropdown
22
+ group_selector: ToggleButtons
23
+ table_output: Output
24
+ namespace: str = "differential_expression"
25
+
26
+ def __init__(self, dataset: SCLabDataset):
27
+ self.dataset = dataset
28
+ self.result_selector = Dropdown()
29
+ self.group_selector = ToggleButtons()
30
+ self.table_output = Output()
31
+
32
+ self.result_selector.observe(self._update_group_selector, "value")
33
+ self.result_selector.observe(self._update_table, "value")
34
+ self.group_selector.observe(self._update_table, "value")
35
+
36
+ super().__init__(
37
+ [
38
+ self.result_selector,
39
+ self.group_selector,
40
+ self.table_output,
41
+ ]
42
+ )
43
+
44
+ self.sync_results_list()
45
+
46
+ def sync_results_list(self, focus_result: str | None = None):
47
+ adata = self.dataset.adata
48
+ uns: dict[str, Any] = adata.uns
49
+ current_selection = self.result_selector.value
50
+ new_options = tuple(filter(lambda x: x.startswith(self.namespace), uns.keys()))
51
+
52
+ if focus_result is not None and focus_result in new_options:
53
+ current_selection = focus_result
54
+ elif current_selection not in new_options:
55
+ current_selection = None
56
+
57
+ self.result_selector.options = new_options
58
+ self.result_selector.value = current_selection
59
+
60
+ def _update_group_selector(self, *args, **kwargs):
61
+ selected_result = self.result_selector.value
62
+ uns: dict[str, Any] = self.dataset.adata.uns
63
+ gene_names: np.rec.recarray = uns[selected_result]["names"]
64
+ self.group_selector.options = ("all",) + gene_names.dtype.names
65
+ self.group_selector.value = "all"
66
+
67
+ def _update_table(self, *args, **kwargs):
68
+ selected_result = self.result_selector.value
69
+ selected_group = self.group_selector.value
70
+
71
+ adata = self.dataset.adata
72
+ params = adata.uns[selected_result]["params"]
73
+
74
+ groupby = params["groupby"]
75
+ reference = params["reference"]
76
+ table_name = f"{selected_result}_by_{groupby}_{selected_group}_vs_{reference}"
77
+
78
+ params_text = "Parameters:\n "
79
+ params_text += "\n ".join(f"{k}: {v}" for k, v in params.items())
80
+ params_text = f"```\n{params_text}\n```"
81
+
82
+ if "gene_name" in adata.var:
83
+ gene_symbols = "gene_name"
84
+ elif "name" in adata.var:
85
+ gene_symbols = "name"
86
+ elif "gene_symbol" in adata.var:
87
+ gene_symbols = "gene_symbol"
88
+ elif "symbol" in adata.var:
89
+ gene_symbols = "symbol"
90
+ else:
91
+ gene_symbols = None
92
+
93
+ group = selected_group if selected_group != "all" else None
94
+ df = _rank_genes_groups_df(
95
+ adata, group=group, key=selected_result, gene_symbols=gene_symbols
96
+ )
97
+
98
+ self.table_output.clear_output()
99
+ with self.table_output:
100
+ display(Markdown(f"## {table_name}"))
101
+ itables.show(
102
+ df,
103
+ buttons=[
104
+ "pageLength",
105
+ {
106
+ "extend": "colvis",
107
+ "collectionLayout": "fixed columns",
108
+ "popoverTitle": "Column visibility control",
109
+ },
110
+ "copyHtml5",
111
+ {"extend": "csvHtml5", "title": table_name},
112
+ ],
113
+ columnDefs=[
114
+ {"visible": True, "targets": [0]},
115
+ {"visible": True, "targets": "_all"},
116
+ ],
117
+ style="width:100%",
118
+ classes="display cell-border",
119
+ stateSave=False,
120
+ )
121
+ display(Markdown(params_text))
122
+
123
+
124
+ class DifferentialExpression(ProcessorStepBase):
125
+ parent: Processor
126
+ results: DifferentialExpressionResults
127
+ name: str = "differential_expression"
128
+ description: str = "Differential Expression"
129
+
130
+ def __init__(self, parent: Processor) -> None:
131
+ try:
132
+ import scanpy as sc # noqa: F401
133
+ except ImportError:
134
+ raise ImportError("Please install scanpy: `pip install scanpy`")
135
+
136
+ metadata = parent.dataset._metadata.select_dtypes(
137
+ include=["object", "category"]
138
+ )
139
+ groupby_options = (None,) + tuple(metadata.columns)
140
+
141
+ variable_controls: dict[str, DescriptionWidget | ValueWidget]
142
+ variable_controls = dict(
143
+ groupby=Dropdown(options=groupby_options, description="Group by"),
144
+ groups=SelectMultiple(description="Groups"),
145
+ reference=Dropdown(description="Reference"),
146
+ layer=Dropdown(
147
+ options=(None,) + tuple(parent.dataset.adata.layers.keys()),
148
+ value=None,
149
+ description="Layer",
150
+ ),
151
+ name=Text(description="Name", value="", continuous_update=False),
152
+ )
153
+
154
+ variable_controls["groupby"].observe(
155
+ self._update_groups_options, "value", "change"
156
+ )
157
+ variable_controls["groupby"].observe(
158
+ self._update_reference_options, "value", "change"
159
+ )
160
+
161
+ results = DifferentialExpressionResults(parent.dataset)
162
+ super().__init__(
163
+ parent=parent,
164
+ fixed_params={},
165
+ variable_controls=variable_controls,
166
+ results=results,
167
+ )
168
+
169
+ def function(
170
+ self,
171
+ groupby: str,
172
+ groups: Iterable[str] | Literal["all"],
173
+ reference: str,
174
+ layer: str | None,
175
+ name: str | None,
176
+ ):
177
+ import scanpy as sc
178
+
179
+ assert groupby
180
+
181
+ if not groups:
182
+ groups = "all"
183
+
184
+ key_added = "differential_expression"
185
+ if name:
186
+ key_added = f"{key_added}_{name}"
187
+
188
+ adata = self.parent.dataset.adata
189
+ uns: dict[str, Any] = adata.uns
190
+ if key_added in adata.uns:
191
+ related_names = list(filter(lambda x: x.startswith(key_added), uns.keys()))
192
+ key_added = f"{key_added}_{len(related_names) + 1}"
193
+
194
+ sc.tl.rank_genes_groups(
195
+ adata,
196
+ groupby,
197
+ groups=groups,
198
+ reference=reference,
199
+ layer=layer,
200
+ key_added=key_added,
201
+ )
202
+
203
+ self.results.sync_results_list(focus_result=key_added)
204
+
205
+ def _update_groups_options(self, *args, **kwargs):
206
+ groupby = self.variable_controls["groupby"].value
207
+ metadata = self.parent.dataset._metadata
208
+ control: Dropdown = self.variable_controls["groups"]
209
+
210
+ if groupby is None:
211
+ control.options = ("",)
212
+ return
213
+
214
+ options = tuple(metadata[groupby].sort_values().unique())
215
+ control.options = options
216
+
217
+ def _update_reference_options(self, *args, **kwargs):
218
+ groupby = self.variable_controls["groupby"].value
219
+ metadata = self.parent.dataset._metadata
220
+ control: Dropdown = self.variable_controls["reference"]
221
+
222
+ if groupby is None:
223
+ control.options = ("",)
224
+ control.value = ""
225
+ return
226
+
227
+ options = ("rest",)
228
+ options += tuple(metadata[groupby].sort_values().unique())
229
+
230
+ current_value = control.value
231
+ control.options = options
232
+ if current_value not in control.options:
233
+ control.value = "rest"
234
+ else:
235
+ control.value = current_value
236
+
237
+ def dset_var_dataframe_change_callback(self, *args, **kwargs):
238
+ var_df = self.parent.dataset.adata.var
239
+ df = var_df.select_dtypes(include=["bool"])
240
+ options = {"": None, **{c: c for c in df.columns}}
241
+
242
+ control: Dropdown = self.variable_controls["mask_var"]
243
+ current_value = control.value
244
+ control.options = options
245
+ if current_value not in control.options:
246
+ control.value = None
247
+ else:
248
+ control.value = current_value
249
+
250
+
251
+ # from scanpy 1.10.4
252
+ # scanpy/src/scanpy/get/get.py
253
+ def _rank_genes_groups_df(
254
+ adata: AnnData,
255
+ group: str | Iterable[str] | None,
256
+ *,
257
+ key: str = "rank_genes_groups",
258
+ pval_cutoff: float | None = None,
259
+ log2fc_min: float | None = None,
260
+ log2fc_max: float | None = None,
261
+ gene_symbols: str | None = None,
262
+ ) -> pd.DataFrame:
263
+ """\
264
+ Params
265
+ ------
266
+ adata
267
+ Object to get results from.
268
+ group
269
+ Which group (as in :func:`scanpy.tl.rank_genes_groups`'s `groupby`
270
+ argument) to return results from. Can be a list. All groups are
271
+ returned if groups is `None`.
272
+ key
273
+ Key differential expression groups were stored under.
274
+ pval_cutoff
275
+ Return only adjusted p-values below the cutoff.
276
+ log2fc_min
277
+ Minimum logfc to return.
278
+ log2fc_max
279
+ Maximum logfc to return.
280
+ gene_symbols
281
+ Column name in `.var` DataFrame that stores gene symbols. Specifying
282
+ this will add that column to the returned dataframe.
283
+
284
+ """
285
+ if isinstance(group, str):
286
+ group = [group]
287
+ if group is None:
288
+ group = list(adata.uns[key]["names"].dtype.names)
289
+ method = adata.uns[key]["params"]["method"]
290
+ if method == "logreg":
291
+ colnames = ["names", "scores"]
292
+ else:
293
+ colnames = ["names", "scores", "logfoldchanges", "pvals", "pvals_adj"]
294
+
295
+ d = [pd.DataFrame(adata.uns[key][c])[group] for c in colnames]
296
+ d = pd.concat(d, axis=1, names=[None, "group"], keys=colnames)
297
+ if Version(pd.__version__) >= Version("2.1"):
298
+ d = d.stack(level=1, future_stack=True).reset_index()
299
+ else:
300
+ d = d.stack(level=1).reset_index()
301
+ d["group"] = pd.Categorical(d["group"], categories=group)
302
+ d = d.sort_values(["group", "level_0"]).drop(columns="level_0")
303
+
304
+ if method != "logreg":
305
+ if pval_cutoff is not None:
306
+ d = d[d["pvals_adj"] < pval_cutoff]
307
+ if log2fc_min is not None:
308
+ d = d[d["logfoldchanges"] > log2fc_min]
309
+ if log2fc_max is not None:
310
+ d = d[d["logfoldchanges"] < log2fc_max]
311
+ if gene_symbols is not None:
312
+ d = d.join(adata.var[gene_symbols], on="names")
313
+
314
+ for pts, name in {"pts": "pct_nz_group", "pts_rest": "pct_nz_reference"}.items():
315
+ if pts in adata.uns[key]:
316
+ pts_df = (
317
+ adata.uns[key][pts][group]
318
+ .rename_axis(index="names")
319
+ .reset_index()
320
+ .melt(id_vars="names", var_name="group", value_name=name)
321
+ )
322
+ d = d.merge(pts_df)
323
+
324
+ # remove group column for backward compat if len(group) == 1
325
+ if len(group) == 1:
326
+ d.drop(columns="group", inplace=True)
327
+
328
+ return d.reset_index(drop=True)
@@ -6,6 +6,8 @@ from sclab.dataset.processor.step import ProcessorStepBase
6
6
 
7
7
  class Neighbors(ProcessorStepBase):
8
8
  parent: Processor
9
+ name: str = "neighbors"
10
+ description: str = "Neighbors"
9
11
 
10
12
  def __init__(self, parent: Processor) -> None:
11
13
  try:
@@ -31,8 +33,6 @@ class Neighbors(ProcessorStepBase):
31
33
 
32
34
  super().__init__(
33
35
  parent=parent,
34
- name="neighbors",
35
- description="Neighbors",
36
36
  fixed_params={},
37
37
  variable_controls=variable_controls,
38
38
  )
@@ -9,6 +9,8 @@ from sclab.dataset.processor.step import ProcessorStepBase
9
9
 
10
10
  class PCA(ProcessorStepBase):
11
11
  parent: Processor
12
+ name: str = "pca"
13
+ description: str = "PCA"
12
14
 
13
15
  def __init__(self, parent: Processor) -> None:
14
16
  try:
@@ -29,8 +31,6 @@ class PCA(ProcessorStepBase):
29
31
 
30
32
  super().__init__(
31
33
  parent=parent,
32
- name="pca",
33
- description="PCA",
34
34
  fixed_params={},
35
35
  variable_controls=variable_controls,
36
36
  )
@@ -8,6 +8,8 @@ from sclab.dataset.processor.step import ProcessorStepBase
8
8
 
9
9
  class Preprocess(ProcessorStepBase):
10
10
  parent: Processor
11
+ name: str = "preprocess"
12
+ description: str = "Preprocess"
11
13
 
12
14
  def __init__(self, parent: Processor) -> None:
13
15
  try:
@@ -50,8 +52,6 @@ class Preprocess(ProcessorStepBase):
50
52
 
51
53
  super().__init__(
52
54
  parent=parent,
53
- name="preprocess",
54
- description="Preprocess",
55
55
  fixed_params={},
56
56
  variable_controls=variable_controls,
57
57
  )
@@ -6,6 +6,8 @@ from sclab.dataset.processor.step import ProcessorStepBase
6
6
 
7
7
  class QC(ProcessorStepBase):
8
8
  parent: Processor
9
+ name: str = "qc"
10
+ description: str = "QC"
9
11
 
10
12
  def __init__(self, parent: Processor) -> None:
11
13
  try:
@@ -36,8 +38,6 @@ class QC(ProcessorStepBase):
36
38
 
37
39
  super().__init__(
38
40
  parent=parent,
39
- name="qc",
40
- description="QC",
41
41
  fixed_params={},
42
42
  variable_controls=variable_controls,
43
43
  )
@@ -7,6 +7,8 @@ from sclab.dataset.processor.step import ProcessorStepBase
7
7
 
8
8
  class UMAP(ProcessorStepBase):
9
9
  parent: Processor
10
+ name: str = "umap"
11
+ description: str = "UMAP"
10
12
 
11
13
  def __init__(self, parent: Processor) -> None:
12
14
  try:
@@ -21,8 +23,6 @@ class UMAP(ProcessorStepBase):
21
23
 
22
24
  super().__init__(
23
25
  parent=parent,
24
- name="umap",
25
- description="UMAP",
26
26
  fixed_params={},
27
27
  variable_controls=variable_controls,
28
28
  )
@@ -0,0 +1,28 @@
1
+ from .._methods_registry import register_sclab_method
2
+ from ..examples.processor_steps import (
3
+ PCA,
4
+ QC,
5
+ UMAP,
6
+ Cluster,
7
+ DifferentialExpression,
8
+ Neighbors,
9
+ Preprocess,
10
+ )
11
+
12
+ __all__ = [
13
+ "QC",
14
+ "Preprocess",
15
+ "PCA",
16
+ "Neighbors",
17
+ "UMAP",
18
+ "Cluster",
19
+ "DifferentialExpression",
20
+ ]
21
+
22
+ register_sclab_method("Processing")(QC)
23
+ register_sclab_method("Processing")(Preprocess)
24
+ register_sclab_method("Processing")(PCA)
25
+ register_sclab_method("Processing")(Neighbors)
26
+ register_sclab_method("Processing")(UMAP)
27
+ register_sclab_method("Processing")(Cluster)
28
+ register_sclab_method("Analysis")(DifferentialExpression)