sclab 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sclab/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ from ._sclab import SCLabDashboard
2
+
3
+ __all__ = [
4
+ "SCLabDashboard",
5
+ ]
6
+
7
+ __version__ = "0.1.7"
sclab/_io.py ADDED
@@ -0,0 +1,32 @@
1
+ from pathlib import Path
2
+
3
+ import anndata as ad
4
+
5
+
6
+ def read_adata(path: str | Path, var_names: str = "gene_ids") -> ad.AnnData:
7
+ path = Path(path)
8
+
9
+ match path.suffix:
10
+ case ".h5" | "":
11
+ try:
12
+ import scanpy as sc
13
+ except ImportError:
14
+ raise ImportError("Please install scanpy: `pip install scanpy`")
15
+
16
+ match path.suffix:
17
+ case ".h5":
18
+ adata = sc.read_10x_h5(path)
19
+ case ".h5ad":
20
+ adata = ad.read_h5ad(path)
21
+ case "":
22
+ assert path.is_dir()
23
+ adata = sc.read_10x_mtx(path)
24
+ case _:
25
+ raise ValueError(
26
+ "Input file must be a 10x h5, h5ad or a folder of 10x mtx files"
27
+ )
28
+
29
+ if var_names in adata.var:
30
+ adata.var = adata.var.set_index(var_names)
31
+
32
+ return adata
sclab/_sclab.py ADDED
@@ -0,0 +1,80 @@
1
+ from pathlib import Path
2
+
3
+ from anndata import AnnData
4
+ from ipywidgets.widgets import GridBox, Layout, Tab
5
+
6
+ from ._io import read_adata
7
+ from .dataset import SCLabDataset
8
+ from .dataset.plotter import Plotter
9
+ from .dataset.processor import Processor
10
+ from .event import EventBroker
11
+
12
+
13
+ class SCLabDashboard(GridBox):
14
+ broker: EventBroker
15
+
16
+ def __init__(
17
+ self,
18
+ adata: AnnData | None = None,
19
+ filepath: str | Path | None = None,
20
+ name: str = "SCLab Dashboard",
21
+ counts_layer: str = "counts",
22
+ batch_key: str | None = None,
23
+ copy: bool = True,
24
+ ):
25
+ if adata is None and filepath is None:
26
+ raise ValueError("Either adata or filepath must be provided")
27
+
28
+ if adata is None:
29
+ adata = read_adata(filepath)
30
+
31
+ self.broker = EventBroker()
32
+ self.dataset = SCLabDataset(
33
+ adata, name=name, counts_layer=counts_layer, copy=copy, broker=self.broker
34
+ )
35
+ self.plotter = Plotter(self.dataset)
36
+ self.processor = Processor(
37
+ self.dataset,
38
+ self.plotter,
39
+ batch_key=batch_key,
40
+ )
41
+
42
+ self.main_content = Tab(
43
+ children=[
44
+ self.plotter,
45
+ self.dataset.obs_table,
46
+ self.dataset.var_table,
47
+ self.broker.logs_tab,
48
+ ],
49
+ titles=[
50
+ "Main graph",
51
+ "Observations",
52
+ "Genes",
53
+ "Logs",
54
+ ],
55
+ )
56
+
57
+ super().__init__(
58
+ [
59
+ self.processor.main_accordion,
60
+ self.main_content,
61
+ ],
62
+ layout=Layout(
63
+ width="100%",
64
+ grid_template_columns="350px auto",
65
+ grid_template_areas=""" "processor plotter" """,
66
+ border="0px solid black",
67
+ ),
68
+ )
69
+
70
+ @property
71
+ def ds(self):
72
+ return self.dataset
73
+
74
+ @property
75
+ def pr(self):
76
+ return self.processor
77
+
78
+ @property
79
+ def pl(self):
80
+ return self.plotter
@@ -0,0 +1,8 @@
1
+ from . import plotter, processor
2
+ from ._dataset import SCLabDataset
3
+
4
+ __all__ = [
5
+ "plotter",
6
+ "processor",
7
+ "SCLabDataset",
8
+ ]
@@ -0,0 +1,398 @@
1
+ from collections.abc import Sequence
2
+
3
+ import itables
4
+ import itables.options
5
+ import numpy as np
6
+ import pandas as pd
7
+ from anndata import AnnData
8
+ from ipywidgets import GridBox, Layout, Output
9
+ from numpy.typing import NDArray
10
+
11
+ from ..event import EventBroker, EventClient
12
+ from ._exceptions import InvalidRowSubset
13
+
14
+ itables.options.maxBytes = "50MB"
15
+
16
+
17
+ class SCLabDataset(EventClient):
18
+ adata: AnnData
19
+ name: str
20
+ _data_dict: dict[str, pd.DataFrame]
21
+ _metadata: pd.DataFrame
22
+ _selected_data_key: str | None = None
23
+ events: list[str] = [
24
+ "dset_data_dict_change",
25
+ "dset_data_key_selection_change",
26
+ "dset_metadata_change",
27
+ "dset_selected_rows_change",
28
+ "dset_total_rows_change",
29
+ "dset_anndata_layers_change",
30
+ "dset_anndata_neighbors_change",
31
+ "dset_var_dataframe_change",
32
+ "dset_total_vars_change",
33
+ ]
34
+ preemptions: dict[str, list[str]] = {
35
+ "dset_data_key_selection_change": [
36
+ "ctrl_selected_axes_1_change",
37
+ "ctrl_selected_axes_2_change",
38
+ "ctrl_selected_axes_3_change",
39
+ "ctrl_n_dimensions_change",
40
+ ],
41
+ "dset_metadata_change": [
42
+ "dspr_selection_values_change",
43
+ ],
44
+ "dset_total_rows_change": [
45
+ "dspr_selection_values_change",
46
+ ],
47
+ }
48
+ _selected_rows: pd.Index | None = None
49
+
50
+ def __init__(
51
+ self,
52
+ adata: AnnData,
53
+ name: str = "SCLabDataset",
54
+ counts_layer: str = "counts",
55
+ copy: bool = True,
56
+ broker: EventBroker | None = None,
57
+ ):
58
+ if not isinstance(adata, AnnData):
59
+ raise TypeError("adata must be an instance of AnnData")
60
+
61
+ self.name = name
62
+
63
+ # we keep the original counts layer to be able to reset it
64
+ self.counts_layer = counts_layer
65
+
66
+ self.load_adata(adata, copy=copy)
67
+
68
+ self.obs_table_output = Output(style={"width": "98%"})
69
+ self.var_table_output = Output(style={"width": "98%"})
70
+
71
+ self.obs_table = GridBox(
72
+ [
73
+ self.obs_table_output,
74
+ ],
75
+ layout=Layout(
76
+ width="100%",
77
+ grid_template_columns="auto",
78
+ grid_template_areas=""" "obs_table" """,
79
+ border="0px solid black",
80
+ ),
81
+ )
82
+
83
+ self.var_table = GridBox(
84
+ [
85
+ self.var_table_output,
86
+ ],
87
+ layout=Layout(
88
+ width="100%",
89
+ grid_template_columns="auto",
90
+ grid_template_areas=""" "var_table" """,
91
+ border="0px solid black",
92
+ ),
93
+ )
94
+
95
+ if broker is None:
96
+ broker = EventBroker()
97
+
98
+ super().__init__(broker)
99
+
100
+ def update_obs_table(incoming_change: pd.DataFrame | dict, *args, **kvargs):
101
+ if isinstance(incoming_change, dict):
102
+ df = self.adata.obs
103
+ elif isinstance(incoming_change, pd.DataFrame):
104
+ df = incoming_change
105
+ else:
106
+ raise TypeError("incoming_change must be a DataFrame or a dict")
107
+
108
+ self.obs_table_output.clear_output(wait=True)
109
+ with self.obs_table_output:
110
+ itables.show(
111
+ df.reset_index(),
112
+ tableId=f"singlecell_dataset_obs_itable_{self.uuid}",
113
+ layout={"top1": "searchBuilder"},
114
+ buttons=[
115
+ "pageLength",
116
+ {
117
+ "extend": "colvis",
118
+ "collectionLayout": "fixed columns",
119
+ "popoverTitle": "Column visibility control",
120
+ },
121
+ "copyHtml5",
122
+ {"extend": "csvHtml5", "title": f"{self.name}_cells"},
123
+ {"extend": "excelHtml5", "title": f"{self.name}_cells"},
124
+ ],
125
+ columnDefs=[
126
+ {"visible": True, "targets": [0]},
127
+ {"visible": False, "targets": "_all"},
128
+ ],
129
+ style="width:100%",
130
+ classes="display cell-border",
131
+ stateSave=True,
132
+ )
133
+
134
+ def update_var_table(incoming_change: pd.DataFrame | dict, *args, **kvargs):
135
+ if isinstance(incoming_change, dict):
136
+ df = self.adata.var
137
+ elif isinstance(incoming_change, pd.DataFrame):
138
+ df = incoming_change
139
+ else:
140
+ raise TypeError("incoming_change must be a DataFrame or a dict")
141
+
142
+ self.var_table_output.clear_output(wait=True)
143
+ with self.var_table_output:
144
+ itables.show(
145
+ df.reset_index(),
146
+ tableId=f"singlecell_dataset_var_itable_{self.uuid}",
147
+ layout={"top1": "searchBuilder"},
148
+ buttons=[
149
+ "pageLength",
150
+ {
151
+ "extend": "colvis",
152
+ "collectionLayout": "fixed columns",
153
+ "popoverTitle": "Column visibility control",
154
+ },
155
+ "copyHtml5",
156
+ {"extend": "csvHtml5", "title": f"{self.name}_genes"},
157
+ {"extend": "excelHtml5", "title": f"{self.name}_genes"},
158
+ ],
159
+ columnDefs=[
160
+ {"visible": True, "targets": [0]},
161
+ {"visible": False, "targets": "_all"},
162
+ ],
163
+ style="width:100%",
164
+ classes="display cell-border",
165
+ stateSave=True,
166
+ )
167
+
168
+ update_obs_table(self.adata.obs)
169
+ update_var_table(self.adata.var)
170
+
171
+ broker.subscribe("dset_metadata_change", update_obs_table)
172
+ broker.subscribe("dset_total_rows_change", update_obs_table)
173
+
174
+ broker.subscribe("dset_var_dataframe_change", update_var_table)
175
+ broker.subscribe("dset_total_vars_change", update_var_table)
176
+
177
+ def load_adata(self, adata: AnnData, copy: bool = True):
178
+ if copy:
179
+ self.adata = adata.copy()
180
+ else:
181
+ self.adata = adata
182
+
183
+ if self.counts_layer not in self.adata.layers:
184
+ self.adata.layers[self.counts_layer] = self.adata.X.copy()
185
+
186
+ @property
187
+ def data_dict(self) -> dict:
188
+ return {
189
+ "metadata": self.metadata.select_dtypes(include="number"),
190
+ **self._data_dict,
191
+ }
192
+
193
+ @data_dict.setter
194
+ def data_dict(self, value: dict[str, pd.DataFrame | NDArray]):
195
+ self._data_dict = self._validate_data_dict(value)
196
+ self.broker.publish("dset_data_dict_change", self.data_dict)
197
+
198
+ @property
199
+ def _data_dict(self):
200
+ return self._validate_data_dict(self.adata.obsm._data)
201
+
202
+ @property
203
+ def _metadata(self):
204
+ return self.adata.obs
205
+
206
+ def update_data_dict(self):
207
+ self.data_dict = self.adata.obsm._data
208
+
209
+ def _validate_data_dict(self, value: dict[str, pd.DataFrame | NDArray]) -> dict:
210
+ assert isinstance(value, dict), "data_dict must be a dictionary"
211
+
212
+ index = None
213
+ tmp_dict = {}
214
+ for key, val in value.items():
215
+ assert isinstance(key, str), "data_dict keys must be strings"
216
+
217
+ val = self._validate_data(key, val)
218
+
219
+ if index is None:
220
+ index = val.index
221
+ else:
222
+ # TODO: improve matching of index. We should accept index in different order
223
+ assert val.index.equals(index), "all data must have the same index"
224
+
225
+ tmp_dict[key] = val
226
+
227
+ return tmp_dict
228
+
229
+ @property
230
+ def data(self) -> pd.DataFrame:
231
+ if not self._selected_data_key:
232
+ return pd.DataFrame(index=self.metadata.index)
233
+ return self.data_dict[self._selected_data_key]
234
+
235
+ def select_data_key(self, key: str):
236
+ if key not in self.data_dict:
237
+ raise ValueError(f"key '{key}' not found in data_dict")
238
+
239
+ self._selected_data_key = key
240
+
241
+ self.broker.publish("dset_data_key_selection_change", self.data)
242
+
243
+ def reset_data_key(self):
244
+ self._selected_data_key = None
245
+
246
+ self.broker.publish("dset_data_key_selection_change", self.data)
247
+
248
+ def _validate_data(
249
+ self, dk: str, value: pd.DataFrame | NDArray | None
250
+ ) -> pd.DataFrame:
251
+ if value is None:
252
+ value = pd.DataFrame(index=self.metadata.index)
253
+
254
+ elif isinstance(value, np.ndarray):
255
+ assert value.ndim <= 2, "data array must be 1D or 2D"
256
+
257
+ if not self.metadata.empty:
258
+ assert value.shape[0] == self._metadata.shape[0], (
259
+ "data must have same length as metadata"
260
+ )
261
+ value = pd.DataFrame(value, index=self.metadata.index)
262
+
263
+ else:
264
+ value = pd.DataFrame(value)
265
+ value.columns = [f"{dk.upper()} {i + 1}" for i in range(value.shape[1])]
266
+
267
+ elif isinstance(value, pd.DataFrame):
268
+ if not self.metadata.empty:
269
+ assert value.index.equals(self.metadata.index), (
270
+ "data must have same index as metadata"
271
+ )
272
+
273
+ else:
274
+ raise TypeError("data must be a pandas DataFrame or numpy array")
275
+
276
+ return value
277
+
278
+ @property
279
+ def metadata(self) -> pd.DataFrame:
280
+ # Retain only numerical, categorical and string columns.
281
+ # If a column has object dtype (string) and there are no more than 10 unique values,
282
+ # convert it to categorical.
283
+ metadata = self._metadata.select_dtypes(
284
+ include=["number", "object", "category", "boolean"]
285
+ ).copy()
286
+ for col in metadata.columns:
287
+ if metadata[col].dtype == "object":
288
+ if metadata[col].nunique() <= 10:
289
+ metadata[col] = metadata[col].astype("category")
290
+ else:
291
+ metadata.drop(col, axis=1, inplace=True)
292
+
293
+ # is_selected may be a boolean column or a column of NaNs
294
+ # if it is a boolean column, a selection has been defined (possible all False)
295
+ # if it is a column of NaNs, no selection has been defined
296
+ if self._selected_rows is not None:
297
+ metadata["is_selected"] = metadata.index.isin(self._selected_rows)
298
+ else:
299
+ metadata["is_selected"] = pd.NA
300
+ metadata["is_selected"] = metadata["is_selected"].astype("boolean")
301
+ return metadata
302
+
303
+ @metadata.setter
304
+ def metadata(self, value: pd.DataFrame | None):
305
+ if value is None:
306
+ value = pd.DataFrame()
307
+
308
+ if not isinstance(value, pd.DataFrame):
309
+ raise TypeError("metadata must be a pandas DataFrame")
310
+
311
+ self._metadata = value
312
+ self.broker.publish("dset_metadata_change", self.metadata)
313
+
314
+ @property
315
+ def row_names(self) -> pd.Index:
316
+ return self.metadata.index
317
+
318
+ @property
319
+ def selected_rows(self) -> pd.Index:
320
+ if self._selected_rows is None:
321
+ index = pd.Index([], name="selected_rows")
322
+ else:
323
+ index = self._selected_rows
324
+ return index
325
+
326
+ @selected_rows.setter
327
+ def selected_rows(self, value: pd.Index | None):
328
+ if value is None:
329
+ self._selected_rows = None
330
+ else:
331
+ row_names_dtype = self.metadata.index.dtype
332
+
333
+ self._selected_rows = value.astype(row_names_dtype)
334
+ self._selected_rows.name = "selected_rows"
335
+
336
+ self.broker.publish("dset_selected_rows_change", value)
337
+
338
+ @property
339
+ def selected_rows_mask(self) -> NDArray[np.bool]:
340
+ return self.metadata.index.isin(self.selected_rows)
341
+
342
+ @property
343
+ def selected_rows_data(self) -> pd.DataFrame:
344
+ return self.data.loc[self.selected_rows]
345
+
346
+ @property
347
+ def selected_rows_metadata(self) -> pd.DataFrame:
348
+ return self.metadata.loc[self.selected_rows]
349
+
350
+ def select_rows(self, index: pd.Index):
351
+ assert isinstance(index, pd.Index), "index must be a pandas Index"
352
+ assert index.isin(self.metadata.index).all(), "index contains invalid values"
353
+ self.selected_rows = self.selected_rows.union(index)
354
+
355
+ def deselect_rows(self, index: pd.Index):
356
+ assert isinstance(index, pd.Index), "index must be a pandas Index"
357
+ assert index.isin(self.metadata.index).all(), "index contains invalid values"
358
+ self.selected_rows = self.selected_rows.difference(index)
359
+
360
+ def clear_selected_rows(self):
361
+ self.selected_rows = None
362
+
363
+ def filter_rows(self, index: pd.Index | Sequence):
364
+ if not isinstance(index, pd.Index):
365
+ index = pd.Index(index)
366
+
367
+ if not index.isin(self.metadata.index).all():
368
+ raise InvalidRowSubset("index contains invalid values")
369
+
370
+ self.adata = self.adata[index].copy()
371
+
372
+ self.broker.publish("dset_total_rows_change", self.metadata)
373
+
374
+ def apply_label(self, index: pd.Index, column: str, label: str):
375
+ if column not in self._metadata.columns:
376
+ dtype = pd.CategoricalDtype([label], ordered=False)
377
+ self._metadata[column] = pd.Series(index=self.row_names, dtype=dtype)
378
+
379
+ if label and label not in self._metadata[column].cat.categories:
380
+ self._metadata[column] = self._metadata[column].cat.add_categories(label)
381
+ elif not label:
382
+ label = np.nan
383
+
384
+ self._metadata.loc[index, column] = label
385
+
386
+ self.broker.publish("dset_metadata_change", self.metadata, column)
387
+
388
+ def ctrl_data_key_change_callback(self, new_value: str):
389
+ if new_value is None:
390
+ self.reset_data_key()
391
+ else:
392
+ self.select_data_key(new_value)
393
+
394
+ def dplt_selected_points_change_callback(self, new_value: pd.Index):
395
+ self.selected_rows = new_value
396
+
397
+ def dspr_clear_selection_click_callback(self):
398
+ self.clear_selected_rows()
@@ -0,0 +1,2 @@
1
+ class InvalidRowSubset(Exception):
2
+ ...
@@ -0,0 +1,7 @@
1
+ from ._controls import PlotterControls
2
+ from ._plotter import Plotter
3
+
4
+ __all__ = [
5
+ "Plotter",
6
+ "PlotterControls",
7
+ ]