sclab 0.1.8__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sclab might be problematic. Click here for more details.

sclab/__init__.py CHANGED
@@ -1,7 +1,9 @@
1
+ from . import methods
1
2
  from ._sclab import SCLabDashboard
2
3
 
3
4
  __all__ = [
5
+ "methods",
4
6
  "SCLabDashboard",
5
7
  ]
6
8
 
7
- __version__ = "0.1.8"
9
+ __version__ = "0.2.3"
sclab/_io.py CHANGED
@@ -1,26 +1,25 @@
1
+ from io import BytesIO
1
2
  from pathlib import Path
3
+ from urllib.parse import urlparse
2
4
 
3
- import anndata as ad
5
+ import requests
6
+ from anndata import AnnData, read_h5ad
7
+ from tqdm.auto import tqdm
4
8
 
5
9
 
6
- def read_adata(path: str | Path, var_names: str = "gene_ids") -> ad.AnnData:
7
- path = Path(path)
10
+ def read_adata(path: str | Path, var_names: str = "gene_ids") -> AnnData:
11
+ from .scanpy.readwrite import read_10x_h5, read_10x_mtx
8
12
 
9
- match path.suffix:
10
- case ".h5" | "":
11
- try:
12
- import scanpy as sc
13
- except ImportError:
14
- raise ImportError("Please install scanpy: `pip install scanpy`")
13
+ path = Path(path)
15
14
 
16
15
  match path.suffix:
17
16
  case ".h5":
18
- adata = sc.read_10x_h5(path)
17
+ adata = read_10x_h5(path)
19
18
  case ".h5ad":
20
- adata = ad.read_h5ad(path)
19
+ adata = read_h5ad(path)
21
20
  case "":
22
21
  assert path.is_dir()
23
- adata = sc.read_10x_mtx(path)
22
+ adata = read_10x_mtx(path)
24
23
  case _:
25
24
  raise ValueError(
26
25
  "Input file must be a 10x h5, h5ad or a folder of 10x mtx files"
@@ -30,3 +29,81 @@ def read_adata(path: str | Path, var_names: str = "gene_ids") -> ad.AnnData:
30
29
  adata.var = adata.var.set_index(var_names)
31
30
 
32
31
  return adata
32
+
33
+
34
+ def load_adata_from_url(
35
+ url: str,
36
+ var_names: str = "gene_ids",
37
+ progress: bool = True,
38
+ ) -> AnnData:
39
+ """
40
+ Load an AnnData object from a URL to an .h5ad file.
41
+
42
+ Parameters:
43
+ -----------
44
+ url : str
45
+ URL to the .h5ad file
46
+ var_names : str
47
+ Name of the variable column in the .h5ad file
48
+ progress : bool
49
+ Whether to show a progress bar
50
+
51
+ Returns:
52
+ --------
53
+ anndata.AnnData
54
+ Loaded AnnData object
55
+ """
56
+ from .scanpy.readwrite import read_10x_h5
57
+
58
+ assert is_valid_url(url), "URL is not valid"
59
+ url_path = Path(urlparse(url).path)
60
+
61
+ if url_path.suffix == ".h5":
62
+ try:
63
+ import scanpy as sc
64
+ except ImportError:
65
+ raise ImportError("Please install scanpy: `pip install scanpy`")
66
+
67
+ file_content = fetch_file(url, progress=progress)
68
+ match url_path.suffix:
69
+ case ".h5":
70
+ adata = read_10x_h5(file_content)
71
+ case ".h5ad":
72
+ adata = read_h5ad(file_content)
73
+ case _:
74
+ raise ValueError("Input file must be a 10x h5 or h5ad file")
75
+
76
+ if var_names in adata.var:
77
+ adata.var = adata.var.set_index(var_names)
78
+
79
+ return adata
80
+
81
+
82
+ def fetch_file(url: str, progress: bool = True) -> BytesIO:
83
+ response = requests.get(url, stream=True)
84
+ response.raise_for_status()
85
+
86
+ total_size_in_bytes = int(response.headers.get("content-length", 0))
87
+ block_size = 1024 # 1 Kibibyte
88
+
89
+ if progress:
90
+ progress_bar = tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True)
91
+
92
+ result = BytesIO()
93
+ for data in response.iter_content(block_size):
94
+ result.write(data)
95
+ if progress:
96
+ progress_bar.update(len(data))
97
+
98
+ if progress:
99
+ progress_bar.close()
100
+
101
+ return result
102
+
103
+
104
+ def is_valid_url(url: str) -> bool:
105
+ if not isinstance(url, str):
106
+ return False
107
+
108
+ result = urlparse(url)
109
+ return all([result.scheme, result.netloc])
@@ -0,0 +1,65 @@
1
+ from typing import Callable, Type
2
+
3
+
4
+ # full class definition is in .dataset/processor/step/_processor_step_base.py
5
+ class ProcessorStepBase:
6
+ name: str
7
+ description: str
8
+
9
+
10
+ methods_registry: dict[str, list[ProcessorStepBase]] = {}
11
+
12
+
13
+ def register_sclab_method(
14
+ category: str,
15
+ name: str | None = None,
16
+ description: str | None = None,
17
+ order: int | None = None,
18
+ ) -> Callable:
19
+ """
20
+ Decorator to register a class as a sclab method.
21
+
22
+ Args:
23
+ category: The category to register the method under (e.g., "Processing")
24
+ name: Optional display name for the method. If None, uses the class name.
25
+ description: Optional description of the method.
26
+ order: Optional ordering within the category. Lower numbers appear first.
27
+
28
+ Returns:
29
+ Decorated class
30
+ """
31
+
32
+ def decorator(cls: Type[ProcessorStepBase]) -> Type[ProcessorStepBase]:
33
+ if name:
34
+ cls.name = name
35
+
36
+ if description:
37
+ cls.description = description
38
+
39
+ if order is not None:
40
+ cls.order = order
41
+
42
+ # Initialize the category in the registry if it doesn't exist
43
+ if category not in methods_registry:
44
+ methods_registry[category] = []
45
+
46
+ methods_list = methods_registry[category]
47
+
48
+ # Add the class to the registry
49
+ methods_list.append(cls)
50
+
51
+ # Sort the methods by order
52
+ methods_registry[category] = sorted(methods_list, key=lambda x: x.order)
53
+
54
+ return cls
55
+
56
+ return decorator
57
+
58
+
59
+ def get_sclab_methods():
60
+ methods = {}
61
+
62
+ for category, methods_list in methods_registry.items():
63
+ methods[category] = sorted(methods_list, key=lambda x: x.order)
64
+
65
+ return methods
sclab/_sclab.py CHANGED
@@ -1,9 +1,24 @@
1
+ import inspect
2
+ from io import BytesIO
1
3
  from pathlib import Path
2
4
 
3
5
  from anndata import AnnData
4
- from ipywidgets.widgets import GridBox, Layout, Tab
6
+ from IPython.display import display
7
+ from ipywidgets.widgets import (
8
+ Button,
9
+ FileUpload,
10
+ GridBox,
11
+ HBox,
12
+ Label,
13
+ Layout,
14
+ Output,
15
+ Tab,
16
+ Text,
17
+ ToggleButtons,
18
+ VBox,
19
+ )
5
20
 
6
- from ._io import read_adata
21
+ from ._io import is_valid_url, load_adata_from_url, read_adata
7
22
  from .dataset import SCLabDataset
8
23
  from .dataset.plotter import Plotter
9
24
  from .dataset.processor import Processor
@@ -12,60 +27,106 @@ from .event import EventBroker
12
27
 
13
28
  class SCLabDashboard(GridBox):
14
29
  broker: EventBroker
30
+ dataset: SCLabDataset
31
+ plotter: Plotter
32
+ processor: Processor
33
+ main_content: Tab
15
34
 
16
35
  def __init__(
17
36
  self,
18
- adata: AnnData | None = None,
19
- filepath: str | Path | None = None,
37
+ adata_or_filepath_or_url: AnnData | str | None = None,
20
38
  name: str = "SCLab Dashboard",
21
39
  counts_layer: str = "counts",
22
40
  batch_key: str | None = None,
23
- copy: bool = True,
41
+ copy: bool = False,
24
42
  ):
25
- if adata is None and filepath is None:
26
- raise ValueError("Either adata or filepath must be provided")
43
+ if adata_or_filepath_or_url is None:
44
+ adata = None
27
45
 
28
- if adata is None:
46
+ elif isinstance(adata_or_filepath_or_url, AnnData):
47
+ adata = adata_or_filepath_or_url
48
+
49
+ elif is_valid_url(adata_or_filepath_or_url):
50
+ url = adata_or_filepath_or_url
51
+ adata = load_adata_from_url(url)
52
+
53
+ elif isinstance(adata_or_filepath_or_url, str):
54
+ filepath = adata_or_filepath_or_url
29
55
  adata = read_adata(filepath)
30
56
 
57
+ self.name = name
58
+ self.counts_layer = counts_layer
59
+ self.batch_key = batch_key
60
+
31
61
  self.broker = EventBroker()
62
+
63
+ self.dataset = None
64
+ self.plotter = None
65
+ self.processor = None
66
+ self.main_content = None
67
+
68
+ self.data_loader_layout = Layout(
69
+ width="100%",
70
+ height="500px",
71
+ grid_template_columns="auto",
72
+ grid_template_areas=""" "data_loader" """,
73
+ border="0px solid black",
74
+ )
75
+
76
+ self.dashboard_layout = Layout(
77
+ width="100%",
78
+ height="100%",
79
+ grid_template_columns="350px auto",
80
+ grid_template_areas=""" "processor plotter" """,
81
+ border="0px solid black",
82
+ )
83
+
84
+ self.data_loader = DataLoader(self)
85
+
86
+ GridBox.__init__(self)
87
+ if adata is not None:
88
+ self._load(adata, copy=copy)
89
+ else:
90
+ self.children = (self.data_loader,)
91
+ self.layout = self.data_loader_layout
92
+
93
+ def _load(self, adata: AnnData, copy: bool = False):
32
94
  self.dataset = SCLabDataset(
33
- adata, name=name, counts_layer=counts_layer, copy=copy, broker=self.broker
95
+ adata,
96
+ name=self.name,
97
+ counts_layer=self.counts_layer,
98
+ copy=copy,
99
+ broker=self.broker,
34
100
  )
35
101
  self.plotter = Plotter(self.dataset)
36
102
  self.processor = Processor(
37
103
  self.dataset,
38
104
  self.plotter,
39
- batch_key=batch_key,
105
+ batch_key=self.batch_key,
40
106
  )
41
107
 
42
108
  self.main_content = Tab(
43
109
  children=[
44
110
  self.plotter,
111
+ self.processor.results_panel,
45
112
  self.dataset.obs_table,
46
113
  self.dataset.var_table,
47
114
  self.broker.logs_tab,
48
115
  ],
49
116
  titles=[
50
117
  "Main graph",
118
+ "Results",
51
119
  "Observations",
52
120
  "Genes",
53
121
  "Logs",
54
122
  ],
55
123
  )
56
124
 
57
- super().__init__(
58
- [
59
- self.processor.main_accordion,
60
- self.main_content,
61
- ],
62
- layout=Layout(
63
- width="100%",
64
- grid_template_columns="350px auto",
65
- grid_template_areas=""" "processor plotter" """,
66
- border="0px solid black",
67
- ),
125
+ self.children = (
126
+ self.processor.main_accordion,
127
+ self.main_content,
68
128
  )
129
+ self.layout = self.dashboard_layout
69
130
 
70
131
  @property
71
132
  def ds(self):
@@ -78,3 +139,150 @@ class SCLabDashboard(GridBox):
78
139
  @property
79
140
  def pl(self):
80
141
  return self.plotter
142
+
143
+
144
+ class DataLoader(VBox):
145
+ dashboard: SCLabDashboard
146
+ adata: AnnData
147
+
148
+ upload: FileUpload
149
+ upload_info: Output
150
+ upload_row: HBox
151
+ upload_row_label: Label
152
+
153
+ url: Text
154
+ load_button: Button
155
+ url_row: HBox
156
+ url_row_label: Label
157
+
158
+ defined_adatas_dict: dict[str, AnnData]
159
+ defined_adatas: ToggleButtons
160
+ defined_adatas_row: HBox
161
+ defined_adatas_label: Label
162
+
163
+ progress_output: Output
164
+ continue_button: Button
165
+
166
+ def __init__(self, dashboard: SCLabDashboard):
167
+ self.dashboard = dashboard
168
+
169
+ self.upload_row_label = Label("Load from file:", layout=Layout(width="120px"))
170
+ self.upload = FileUpload(layout=Layout(width="200px"))
171
+ self.upload_info = Output(layout=Layout(width="95%"))
172
+ self.upload_row = HBox(
173
+ [self.upload_row_label, self.upload, self.upload_info],
174
+ layout=Layout(width="100%"),
175
+ )
176
+ self.upload.observe(self.on_upload, "value")
177
+
178
+ self.url_row_label = Label("Load from URL:", layout=Layout(width="120px"))
179
+ self.url = Text(placeholder="https://...", layout=Layout(width="100%"))
180
+ self.load_button = Button(description="Load", layout=Layout(width="200px"))
181
+ self.url_row = HBox(
182
+ [self.url_row_label, self.url, self.load_button],
183
+ layout=Layout(width="100%"),
184
+ )
185
+ self.load_button.on_click(self.on_load_url)
186
+
187
+ user_f_locals = inspect.stack()[2].frame.f_locals
188
+ self.defined_adatas_dict = {}
189
+ for name, variable_type in [(k, type(v)) for k, v in user_f_locals.items()]:
190
+ if variable_type is AnnData:
191
+ self.defined_adatas_dict[name] = user_f_locals[name]
192
+
193
+ self.defined_adatas_label = Label(
194
+ "Defined datasets:", layout=Layout(width="120px")
195
+ )
196
+ self.defined_adatas = ToggleButtons(
197
+ options=list(self.defined_adatas_dict.keys()),
198
+ value=None,
199
+ layout=Layout(width="100%"),
200
+ )
201
+ self.defined_adatas_row = HBox(
202
+ [self.defined_adatas_label, self.defined_adatas],
203
+ layout=Layout(width="100%"),
204
+ )
205
+ self.defined_adatas.observe(self.on_defined_adatas_toggle, "value")
206
+
207
+ self.progress_output = Output(layout=Layout(width="95%"))
208
+ self.continue_button = Button(
209
+ description="Continue", layout=Layout(width="100%"), button_style="success"
210
+ )
211
+ self.continue_button.on_click(self.on_continue)
212
+
213
+ VBox.__init__(
214
+ self,
215
+ [
216
+ self.url_row,
217
+ self.upload_row,
218
+ self.defined_adatas_row,
219
+ self.progress_output,
220
+ ],
221
+ layout=Layout(width="100%"),
222
+ )
223
+
224
+ def on_defined_adatas_toggle(self, *args, **kwargs):
225
+ adata = self.defined_adatas_dict[self.defined_adatas.value]
226
+
227
+ self.progress_output.clear_output()
228
+ with self.progress_output:
229
+ print(f"Loaded {adata.shape[0]} observations and {adata.shape[1]} genes\n")
230
+ print(adata)
231
+ display(self.continue_button)
232
+
233
+ self.adata = adata
234
+
235
+ def on_upload(self, *args, **kwargs):
236
+ from .scanpy.readwrite import read_10x_h5, read_h5ad
237
+
238
+ files = self.upload.value
239
+ if len(files) == 0:
240
+ return
241
+
242
+ file = files[0]
243
+
244
+ self.upload_info.clear_output()
245
+ with self.upload_info:
246
+ for k, v in file.items():
247
+ if k == "content":
248
+ continue
249
+ print(f"{k}: {v}")
250
+
251
+ filename = file["name"]
252
+ contents = BytesIO(file["content"].tobytes())
253
+ var_names = "gene_ids"
254
+
255
+ path = Path(filename)
256
+
257
+ match path.suffix:
258
+ case ".h5":
259
+ adata = read_10x_h5(contents)
260
+ case ".h5ad":
261
+ adata = read_h5ad(contents)
262
+ case _:
263
+ self.upload_info.clear_output()
264
+ with self.upload_info:
265
+ print(f"`{filename}` is not valid")
266
+ print("Please upload a 10x h5 or h5ad file")
267
+ return
268
+
269
+ if var_names in adata.var:
270
+ adata.var = adata.var.set_index(var_names)
271
+
272
+ with self.progress_output:
273
+ print(f"Loaded {adata.shape[0]} observations and {adata.shape[1]} genes\n")
274
+ print(adata)
275
+ display(self.continue_button)
276
+
277
+ self.adata = adata
278
+
279
+ def on_load_url(self, *args, **kwargs):
280
+ self.progress_output.clear_output()
281
+ with self.progress_output:
282
+ self.adata = load_adata_from_url(self.url.value)
283
+ display(self.continue_button)
284
+
285
+ def on_continue(self, *args, **kwargs):
286
+ self.dashboard._load(self.adata)
287
+ self.adata = None
288
+ self.defined_adatas_dict = {}
sclab/dataset/_dataset.py CHANGED
@@ -120,7 +120,6 @@ class SCLabDataset(EventClient):
120
120
  },
121
121
  "copyHtml5",
122
122
  {"extend": "csvHtml5", "title": f"{self.name}_cells"},
123
- {"extend": "excelHtml5", "title": f"{self.name}_cells"},
124
123
  ],
125
124
  columnDefs=[
126
125
  {"visible": True, "targets": [0]},
@@ -128,7 +127,7 @@ class SCLabDataset(EventClient):
128
127
  ],
129
128
  style="width:100%",
130
129
  classes="display cell-border",
131
- stateSave=True,
130
+ stateSave=False,
132
131
  )
133
132
 
134
133
  def update_var_table(incoming_change: pd.DataFrame | dict, *args, **kvargs):
@@ -154,7 +153,6 @@ class SCLabDataset(EventClient):
154
153
  },
155
154
  "copyHtml5",
156
155
  {"extend": "csvHtml5", "title": f"{self.name}_genes"},
157
- {"extend": "excelHtml5", "title": f"{self.name}_genes"},
158
156
  ],
159
157
  columnDefs=[
160
158
  {"visible": True, "targets": [0]},
@@ -162,7 +160,7 @@ class SCLabDataset(EventClient):
162
160
  ],
163
161
  style="width:100%",
164
162
  classes="display cell-border",
165
- stateSave=True,
163
+ stateSave=False,
166
164
  )
167
165
 
168
166
  update_obs_table(self.adata.obs)
@@ -33,9 +33,11 @@ from pandas.api.types import (
33
33
  )
34
34
  from traitlets import TraitError
35
35
 
36
+ from ..._methods_registry import get_sclab_methods
36
37
  from ...event import EventBroker, EventClient
37
38
  from .._dataset import SCLabDataset
38
39
  from ..plotter import Plotter
40
+ from ._results_panel import ResultsPanel
39
41
 
40
42
  logger = logging.getLogger(__name__)
41
43
 
@@ -75,6 +77,7 @@ _ProcessorStep = BasicProcessorStep | ProcessorStepBase
75
77
  class Processor(EventClient):
76
78
  dataset: SCLabDataset
77
79
  plotter: Plotter
80
+ results_panel: ResultsPanel
78
81
  batch_key: str | None
79
82
  batch_values: list[str] | None
80
83
  broker: EventBroker
@@ -109,6 +112,7 @@ class Processor(EventClient):
109
112
  ):
110
113
  self.dataset = dataset
111
114
  self.plotter = plotter
115
+ self.results_panel = ResultsPanel(self.dataset.broker)
112
116
  self.broker = self.dataset.broker
113
117
  self.selection_controls_list = []
114
118
  self.selection_controls_dict = {}
@@ -187,6 +191,9 @@ class Processor(EventClient):
187
191
  super().__init__(self.broker)
188
192
  self.broker.subscribe("dset_metadata_change", self._make_selection_controls)
189
193
 
194
+ registered_methods = get_sclab_methods()
195
+ self.add_steps(registered_methods)
196
+
190
197
  @property
191
198
  def step_groups(self) -> dict[str, Accordion]:
192
199
  return dict(zip(self.main_accordion.titles, self.main_accordion.children))
@@ -277,9 +284,9 @@ class Processor(EventClient):
277
284
  for step_group_name, steps_list in steps.items():
278
285
  for i, step in enumerate(steps_list):
279
286
  if isinstance(step, type):
280
- assert issubclass(
281
- step, ProcessorStepBase
282
- ), f"{step} must be a subclass of {ProcessorStepBase}"
287
+ assert issubclass(step, ProcessorStepBase), (
288
+ f"{step} must be a subclass of {ProcessorStepBase}"
289
+ )
283
290
  steps_list[i] = step(self)
284
291
 
285
292
  steps: dict[str, list[_ProcessorStep]]
@@ -288,9 +295,9 @@ class Processor(EventClient):
288
295
  # we make sure the steps have not been previously added
289
296
  for step_group_name, steps_list in steps.items():
290
297
  for step in steps_list:
291
- assert (
292
- step.description not in self.steps
293
- ), f"Step {step.description} already exists"
298
+ assert step.description not in self.steps, (
299
+ f"Step {step.description} already exists"
300
+ )
294
301
 
295
302
  # we add the new steps
296
303
  group_steps_dict: dict[str, _ProcessorStep]
@@ -327,9 +334,9 @@ class Processor(EventClient):
327
334
  def add_step_object(
328
335
  self, step: ProcessorStepBase, accordion: Accordion | None = None
329
336
  ):
330
- assert (
331
- step.description not in self.steps
332
- ), f"Step {step.description} already exists"
337
+ assert step.description not in self.steps, (
338
+ f"Step {step.description} already exists"
339
+ )
333
340
  self.steps[step.description] = step
334
341
 
335
342
  if accordion is not None:
@@ -532,12 +539,12 @@ class Processor(EventClient):
532
539
  ]
533
540
  )
534
541
 
535
- self.selection_labeling_controls_dict[
536
- "create_new_key_checkbox"
537
- ] = create_new_key_checkbox
538
- self.selection_labeling_controls_dict[
539
- "existing_metadata_key"
540
- ] = existing_metadata_key
542
+ self.selection_labeling_controls_dict["create_new_key_checkbox"] = (
543
+ create_new_key_checkbox
544
+ )
545
+ self.selection_labeling_controls_dict["existing_metadata_key"] = (
546
+ existing_metadata_key
547
+ )
541
548
  self.selection_labeling_controls_dict["existing_label"] = existing_label
542
549
  self.selection_labeling_controls_dict["new_medatadata_key"] = new_medatadata_key
543
550
  self.selection_labeling_controls_dict["new_label"] = new_label