sclab 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1063 @@
1
+ import json
2
+ import logging
3
+ from hashlib import sha256
4
+ from pathlib import Path
5
+ from typing import Any, Literal
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ from ipywidgets.widgets import (
10
+ HTML,
11
+ Accordion,
12
+ Button,
13
+ Checkbox,
14
+ Combobox,
15
+ Dropdown,
16
+ FloatRangeSlider,
17
+ HBox,
18
+ IntRangeSlider,
19
+ Output,
20
+ SelectMultiple,
21
+ Tab,
22
+ Text,
23
+ VBox,
24
+ )
25
+ from ipywidgets.widgets.valuewidget import ValueWidget
26
+ from ipywidgets.widgets.widget_description import DescriptionWidget
27
+ from pandas import CategoricalDtype
28
+ from pandas.api.types import (
29
+ is_bool_dtype,
30
+ is_float_dtype,
31
+ is_integer_dtype,
32
+ is_numeric_dtype,
33
+ )
34
+ from traitlets import TraitError
35
+
36
+ from ...event import EventBroker, EventClient
37
+ from .._dataset import SCLabDataset
38
+ from ..plotter import Plotter
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+
43
+ # forward declaration
44
+ class ProcessorStepBase(EventClient):
45
+ events: list[str] = None
46
+ parent: "Processor"
47
+ name: str
48
+ description: str
49
+ fixed_params: dict[str, Any]
50
+ variable_controls: dict[str, DescriptionWidget | ValueWidget]
51
+ output: Output
52
+ run_button: Button
53
+ controls_list: list[DescriptionWidget | ValueWidget | Button]
54
+ controls: VBox
55
+ run_button_description = "Run"
56
+
57
+
58
+ # forward declaration
59
+ class BasicProcessorStep(EventClient):
60
+ events: list[str] = None
61
+ parent: "Processor"
62
+ description: str
63
+ function_name: str
64
+ function: callable
65
+ fixed_params: dict[str, Any]
66
+ variable_controls: dict[str, DescriptionWidget | ValueWidget]
67
+ output: Output
68
+ run_button: Button
69
+ controls: VBox
70
+
71
+
72
+ _ProcessorStep = BasicProcessorStep | ProcessorStepBase
73
+
74
+
75
+ class Processor(EventClient):
76
+ dataset: SCLabDataset
77
+ plotter: Plotter
78
+ batch_key: str | None
79
+ batch_values: list[str] | None
80
+ broker: EventBroker
81
+ metadata_table: pd.DataFrame
82
+ selection_controls_list: list[DescriptionWidget | ValueWidget]
83
+ selection_controls_dict: dict[str, DescriptionWidget | ValueWidget]
84
+ selection_controls_container: VBox
85
+ selection_labeling_controls_dict: dict[
86
+ str, DescriptionWidget | ValueWidget | Accordion
87
+ ]
88
+ selection_buttons_dict: dict[str, Button | Accordion]
89
+ all_controls_list: list[DescriptionWidget | ValueWidget]
90
+ steps: dict[str, BasicProcessorStep | ProcessorStepBase]
91
+ step_history: list[dict[str, str | tuple | dict]]
92
+ main_accordion: Accordion
93
+ _loaded_step_history: list[dict[str, str | tuple | dict]] | None = None
94
+
95
+ events = [
96
+ "dspr_selection_values_change",
97
+ "dspr_clear_selection_click",
98
+ "dspr_keep_selected_click",
99
+ "dspr_drop_selected_click",
100
+ "dspr_apply_label_click",
101
+ ]
102
+
103
+ def __init__(
104
+ self,
105
+ dataset: SCLabDataset,
106
+ plotter: Plotter,
107
+ *,
108
+ batch_key: str | None = None,
109
+ ):
110
+ self.dataset = dataset
111
+ self.plotter = plotter
112
+ self.broker = self.dataset.broker
113
+ self.selection_controls_list = []
114
+ self.selection_controls_dict = {}
115
+ self.selection_controls_container = VBox(layout=dict(width="100%"))
116
+ self.selection_labeling_controls_dict = {}
117
+ self.selection_buttons_dict = {}
118
+ self.all_controls_list = []
119
+ self.step_history = []
120
+
121
+ def update_category_control_visibility_callback(change):
122
+ if "new" not in change or not change["new"]:
123
+ return
124
+ new_category = change["new"]
125
+
126
+ if "old" in change and change["old"] == change["new"]:
127
+ return
128
+
129
+ if "old" in change and change["old"]:
130
+ old_category = change["old"]
131
+ control = self.selection_controls_dict[old_category]
132
+ control.layout.visibility = "hidden"
133
+ control.layout.height = "0px"
134
+ else:
135
+ for control in self.selection_controls_list:
136
+ if isinstance(control, SelectMultiple):
137
+ control.layout.visibility = "hidden"
138
+ control.layout.height = "0px"
139
+
140
+ control = self.selection_controls_dict[new_category]
141
+ n_options = len(control.options)
142
+ h = np.clip(n_options * 18 + 15, 20, 150)
143
+ control.layout.height = f"{h}px"
144
+ control.layout.visibility = "visible"
145
+
146
+ self.visible_category_dropdown = Dropdown(options=[], description="Category")
147
+ self.visible_category_dropdown.layout.width = "95%"
148
+ self.visible_category_dropdown.layout.margin = "0px 0px 10px"
149
+ self.visible_category_dropdown.style.description_width = "0px"
150
+ self.visible_category_dropdown.observe(
151
+ update_category_control_visibility_callback, "value", "change"
152
+ )
153
+ self._make_selection_buttons()
154
+ self._make_selection_labeling_controls()
155
+ self._make_selection_controls()
156
+
157
+ self.main_accordion = Accordion(
158
+ [
159
+ self.selection_controls_container,
160
+ ],
161
+ titles=[
162
+ "Selection Controls",
163
+ ],
164
+ )
165
+ self.steps = {}
166
+ self.add_step_func("filter_rows", self.filter_rows, use_run_button=False)
167
+ self.add_step_func("apply_label", self.apply_label, use_run_button=False)
168
+
169
+ if "batch" in self.dataset.adata.obs.columns and not batch_key:
170
+ batch_key = "batch"
171
+
172
+ if batch_key:
173
+ batch_values = (
174
+ self.dataset.adata.obs[batch_key]
175
+ .sort_values()
176
+ .astype(str)
177
+ .unique()
178
+ .tolist()
179
+ )
180
+
181
+ else:
182
+ batch_values = None
183
+
184
+ self.batch_key = batch_key
185
+ self.batch_values = batch_values
186
+
187
+ super().__init__(self.broker)
188
+ self.broker.subscribe("dset_metadata_change", self._make_selection_controls)
189
+
190
+ @property
191
+ def step_groups(self) -> dict[str, Accordion]:
192
+ return dict(zip(self.main_accordion.titles, self.main_accordion.children))
193
+
194
+ def _create_new_step_group(
195
+ self,
196
+ group_name: str,
197
+ group_steps: dict[str, _ProcessorStep] | None = None,
198
+ ):
199
+ if group_steps is None:
200
+ group_steps = {}
201
+
202
+ # accordion childrens are widgets, we need to extract the corresponding controls
203
+ group_steps_dict = {k: v.controls for k, v in group_steps.items()}
204
+
205
+ # create a new step group accordion
206
+ step_group_accordion = Accordion(
207
+ titles=list(group_steps_dict.keys()),
208
+ children=list(group_steps_dict.values()),
209
+ )
210
+
211
+ # add the new step group accordion to the main accordion
212
+ children = list(self.main_accordion.children)
213
+ titles = list(self.main_accordion.titles)
214
+ children.append(step_group_accordion)
215
+ titles.append(group_name)
216
+ self.main_accordion.children = tuple(children)
217
+ self.main_accordion.titles = tuple(titles)
218
+
219
+ def _update_step_group(self, group_name: str, new_steps: dict[str, _ProcessorStep]):
220
+ # get the current group steps accordion
221
+ group_steps_accordion = self.step_groups[group_name]
222
+ current_steps_dict = dict(
223
+ zip(group_steps_accordion.titles, group_steps_accordion.children)
224
+ )
225
+
226
+ # accordion childrens are widgets, we need to extract the corresponding controls
227
+ new_steps_dict = {k: v.controls for k, v in new_steps.items()}
228
+
229
+ # update the current group steps accordion. We merge the current and new steps
230
+ steps = {**current_steps_dict, **new_steps_dict}
231
+ group_steps_accordion.children = tuple(steps.values())
232
+ group_steps_accordion.titles = tuple(steps.keys())
233
+
234
+ def add_steps(
235
+ self,
236
+ _steps: _ProcessorStep
237
+ | type
238
+ | list[_ProcessorStep | type]
239
+ | dict[str, _ProcessorStep | type]
240
+ | dict[str, list[_ProcessorStep | type]],
241
+ step_group_name: str = "Processing",
242
+ ):
243
+ # we make sure _steps is a dictionary of lists of steps
244
+ """
245
+ Add one or more steps to the dataset processor. The steps can be given as a single
246
+ step instance, a type of step to be instantiated, a list of steps, or a dictionary
247
+ of step group names to lists of steps.
248
+
249
+ Parameters
250
+ ----------
251
+ _steps: _ProcessorStep | type | list[_ProcessorStep | type] | dict[str, _ProcessorStep | type] | dict[str, list[_ProcessorStep | type]]
252
+ The steps to add to the dataset processor.
253
+ step_group_name: str, optional
254
+ The name of the step group to add the steps to. If the step group does not exist,
255
+ it will be created. Defaults to "Processing".
256
+
257
+ Raises
258
+ ------
259
+ ValueError
260
+ If the step has already been added to the dataset processor.
261
+ """
262
+ from .step._processor_step_base import ProcessorStepBase
263
+
264
+ if not isinstance(_steps, list | dict):
265
+ steps = {step_group_name: [_steps]}
266
+
267
+ elif isinstance(_steps, list):
268
+ steps = {step_group_name: _steps}
269
+
270
+ elif isinstance(_steps, dict):
271
+ steps = _steps
272
+ for key, value in _steps.items():
273
+ if not isinstance(value, list):
274
+ steps[key] = [value]
275
+
276
+ # if there are uninstantiated steps, we instantiate them
277
+ for step_group_name, steps_list in steps.items():
278
+ for i, step in enumerate(steps_list):
279
+ if isinstance(step, type):
280
+ assert issubclass(
281
+ step, ProcessorStepBase
282
+ ), f"{step} must be a subclass of {ProcessorStepBase}"
283
+ steps_list[i] = step(self)
284
+
285
+ steps: dict[str, list[_ProcessorStep]]
286
+ steps_list: list[_ProcessorStep]
287
+
288
+ # we make sure the steps have not been previously added
289
+ for step_group_name, steps_list in steps.items():
290
+ for step in steps_list:
291
+ assert (
292
+ step.description not in self.steps
293
+ ), f"Step {step.description} already exists"
294
+
295
+ # we add the new steps
296
+ group_steps_dict: dict[str, _ProcessorStep]
297
+ for step_group_name, steps_list in steps.items():
298
+ group_steps_dict = {step.description: step for step in steps_list}
299
+
300
+ if step_group_name in self.step_groups:
301
+ # update the existing step group
302
+ self._update_step_group(step_group_name, group_steps_dict)
303
+ else:
304
+ # create a new step group
305
+ self._create_new_step_group(step_group_name, group_steps_dict)
306
+
307
+ # we register the new steps
308
+ for step in steps_list:
309
+ self.steps[step.description] = step
310
+
311
+ def add_step_func(
312
+ self,
313
+ description: str,
314
+ function: callable,
315
+ fixed_params: dict[str, Any] = {},
316
+ variable_controls: dict[str, DescriptionWidget | ValueWidget] = {},
317
+ use_run_button: bool = True,
318
+ accordion: Accordion | None = None,
319
+ ):
320
+ from .step import BasicProcessorStep
321
+
322
+ step = BasicProcessorStep(
323
+ self, description, function, fixed_params, variable_controls, use_run_button
324
+ )
325
+ self.add_step_object(step, accordion)
326
+
327
+ def add_step_object(
328
+ self, step: ProcessorStepBase, accordion: Accordion | None = None
329
+ ):
330
+ assert (
331
+ step.description not in self.steps
332
+ ), f"Step {step.description} already exists"
333
+ self.steps[step.description] = step
334
+
335
+ if accordion is not None:
336
+ self.append_to_accordion(accordion, step.controls, step.description)
337
+
338
+ def append_to_accordion(self, accordion: Accordion, panel: VBox, title: str):
339
+ children = list(accordion.children)
340
+ children.append(panel)
341
+ accordion.children = tuple(children)
342
+ accordion.set_title(len(children) - 1, title)
343
+
344
+ def _make_selection_controls(self, *args, **kwargs):
345
+ for column in self.dataset.metadata.columns:
346
+ if column in self.selection_controls_dict:
347
+ self.update_column_selection_control(column)
348
+ else:
349
+ self.add_column_selection_control(column)
350
+
351
+ for column in self.selection_controls_dict.keys():
352
+ if column not in self.dataset.metadata.columns:
353
+ self.remove_column_selection_control(column)
354
+
355
+ def _make_selection_buttons(self):
356
+ """
357
+ Create buttons for selection actions
358
+
359
+ We make sure these buttons are created only once and are not recreated when
360
+ updating the selection controls.
361
+ """
362
+
363
+ clear_selection_button = Button(
364
+ description="Clear Selection",
365
+ button_style="primary",
366
+ layout=dict(width="98%"),
367
+ )
368
+ clear_selection_button.on_click(
369
+ self.button_click_event_publisher("dspr", "clear_selection")
370
+ )
371
+
372
+ keep_selected_button = Button(
373
+ description="Keep Selected", button_style="danger"
374
+ )
375
+ keep_selected_button.on_click(
376
+ self.button_click_event_publisher("dspr", "keep_selected")
377
+ )
378
+
379
+ drop_selected_button = Button(
380
+ description="Drop Selected", button_style="danger"
381
+ )
382
+ drop_selected_button.on_click(
383
+ self.button_click_event_publisher("dspr", "drop_selected")
384
+ )
385
+
386
+ keep_drop_buttons = HBox([keep_selected_button, drop_selected_button])
387
+ keep_drop_buttons_accordion = Accordion(
388
+ [keep_drop_buttons], titles=["Keep/Drop"]
389
+ )
390
+
391
+ self.selection_buttons_dict["clear_selection"] = clear_selection_button
392
+ self.selection_buttons_dict["keep_drop_buttons"] = keep_drop_buttons
393
+ self.selection_buttons_dict["keep_drop_accordion"] = keep_drop_buttons_accordion
394
+
395
+ def _make_selection_labeling_controls(self):
396
+ # widgets visible when a new key is to be created
397
+ new_medatadata_key = Text(
398
+ description="Key",
399
+ layout=dict(width="98%"),
400
+ placeholder="New metadata key",
401
+ )
402
+ new_label = Text(
403
+ description="Label",
404
+ layout=dict(width="98%"),
405
+ disabled=True,
406
+ )
407
+ new_medatadata_key.layout.visibility = "hidden"
408
+ new_label.layout.visibility = "hidden"
409
+ new_medatadata_key.layout.height = "0px"
410
+ new_medatadata_key.layout.margin = "0px"
411
+ new_label.layout.height = "0px"
412
+ new_label.layout.margin = "0px"
413
+
414
+ # widgets visible when an existing key is to be updated
415
+ existing_metadata_key = Dropdown(
416
+ description="Key",
417
+ layout=dict(width="98%"),
418
+ )
419
+ existing_label = Dropdown(
420
+ description="Label",
421
+ layout=dict(width="98%"),
422
+ disabled=True,
423
+ )
424
+
425
+ create_new_key_checkbox = Checkbox(
426
+ value=False,
427
+ description="Create new key/label",
428
+ layout=dict(width="98%"),
429
+ )
430
+
431
+ def _update_widget_visibility(change: dict):
432
+ df = self.dataset.metadata.select_dtypes(include=["bool", "category"])
433
+ create_new_key = change["new"]
434
+ if create_new_key:
435
+ new_medatadata_key.disabled = False
436
+ new_label.disabled = False
437
+ existing_metadata_key.disabled = True
438
+ existing_label.disabled = True
439
+
440
+ new_medatadata_key.layout.visibility = "visible"
441
+ new_label.layout.visibility = "visible"
442
+ existing_metadata_key.layout.visibility = "hidden"
443
+ existing_label.layout.visibility = "hidden"
444
+
445
+ new_medatadata_key.layout.height = "28px"
446
+ new_medatadata_key.layout.margin = "2px"
447
+ new_label.layout.height = "28px"
448
+ new_label.layout.margin = "2px"
449
+ existing_metadata_key.layout.height = "0px"
450
+ existing_metadata_key.layout.margin = "0px"
451
+ existing_label.layout.height = "0px"
452
+ existing_label.layout.margin = "0px"
453
+
454
+ new_medatadata_key.value = ""
455
+ new_label.value = ""
456
+ else:
457
+ new_medatadata_key.disabled = True
458
+ new_label.disabled = True
459
+ existing_metadata_key.disabled = False
460
+ existing_label.disabled = False
461
+
462
+ new_medatadata_key.layout.visibility = "hidden"
463
+ new_label.layout.visibility = "hidden"
464
+ existing_metadata_key.layout.visibility = "visible"
465
+ existing_label.layout.visibility = "visible"
466
+
467
+ new_medatadata_key.layout.height = "0px"
468
+ new_medatadata_key.layout.margin = "0px"
469
+ new_label.layout.height = "0px"
470
+ new_label.layout.margin = "0px"
471
+ existing_metadata_key.layout.height = "28px"
472
+ existing_metadata_key.layout.margin = "2px"
473
+ existing_label.layout.height = "28px"
474
+ existing_label.layout.margin = "2px"
475
+
476
+ existing_metadata_key.options = [""] + df.columns.to_list()
477
+ existing_metadata_key.value = ""
478
+ existing_label.options = [""]
479
+ existing_label.value = ""
480
+
481
+ create_new_key_checkbox.observe(_update_widget_visibility, "value", "change")
482
+
483
+ def _update_new_label_options_callback(change: dict):
484
+ metadata_key_value = change["new"]
485
+ if metadata_key_value is None or metadata_key_value == "":
486
+ existing_label.disabled = True
487
+ existing_label.options = [""]
488
+ existing_label.value = ""
489
+ else:
490
+ series: pd.Series = self.dataset.metadata[metadata_key_value]
491
+ if isinstance(series.dtype, CategoricalDtype):
492
+ existing_label.options = [""] + series.cat.categories.to_list()
493
+ elif is_bool_dtype(series):
494
+ existing_label.options = ["", True, False]
495
+ elif is_integer_dtype(series) or is_float_dtype(series):
496
+ existing_label.options = []
497
+
498
+ existing_label.disabled = False
499
+ existing_label.value = ""
500
+
501
+ existing_metadata_key.observe(
502
+ _update_new_label_options_callback, "value", "change"
503
+ )
504
+
505
+ apply_button = Button(
506
+ description="Apply", disabled=True, button_style="primary"
507
+ )
508
+ apply_button.on_click(self.button_click_event_publisher("dspr", "apply_label"))
509
+
510
+ def _update_button_disabled(_):
511
+ new_key = new_medatadata_key.value
512
+ existing_key = existing_metadata_key.value
513
+
514
+ if create_new_key_checkbox.value:
515
+ apply_button.disabled = new_key == ""
516
+ else:
517
+ apply_button.disabled = existing_key == ""
518
+
519
+ new_medatadata_key.observe(_update_button_disabled, "value", "change")
520
+ new_label.observe(_update_button_disabled, "value", "change")
521
+ existing_metadata_key.observe(_update_button_disabled, "value", "change")
522
+ existing_label.observe(_update_button_disabled, "value", "change")
523
+
524
+ container = VBox(
525
+ [
526
+ create_new_key_checkbox,
527
+ new_medatadata_key,
528
+ new_label,
529
+ existing_metadata_key,
530
+ existing_label,
531
+ apply_button,
532
+ ]
533
+ )
534
+
535
+ self.selection_labeling_controls_dict[
536
+ "create_new_key_checkbox"
537
+ ] = create_new_key_checkbox
538
+ self.selection_labeling_controls_dict[
539
+ "existing_metadata_key"
540
+ ] = existing_metadata_key
541
+ self.selection_labeling_controls_dict["existing_label"] = existing_label
542
+ self.selection_labeling_controls_dict["new_medatadata_key"] = new_medatadata_key
543
+ self.selection_labeling_controls_dict["new_label"] = new_label
544
+ self.selection_labeling_controls_dict["apply_button"] = apply_button
545
+ self.selection_labeling_controls_dict["container"] = container
546
+ self.selection_labeling_controls_dict["accordion"] = Accordion(
547
+ [container],
548
+ titles=["Label"],
549
+ )
550
+
551
+ def add_column_selection_control(self, column: str):
552
+ if column in self.selection_controls_dict:
553
+ return
554
+
555
+ if column == "is_selected":
556
+ return
557
+
558
+ series = self.dataset.metadata[column]
559
+ allna = series.isna().all()
560
+ dtype = series.dtype
561
+
562
+ if isinstance(dtype, CategoricalDtype):
563
+ control = SelectMultiple(
564
+ options=series.cat.categories,
565
+ description=column,
566
+ tooltip=column,
567
+ )
568
+ control.style.description_width = "0px"
569
+ control.layout.margin = "0px"
570
+
571
+ elif is_bool_dtype(dtype):
572
+ control = SelectMultiple(
573
+ options=[True, False],
574
+ description=column,
575
+ tooltip=column,
576
+ )
577
+ control.style.description_width = "0px"
578
+ control.layout.margin = "0px"
579
+
580
+ elif is_integer_dtype(dtype):
581
+ if allna:
582
+ min_value, max_value = 0.0, 0.0
583
+ else:
584
+ min_value, max_value = series.min(), series.max()
585
+
586
+ control = IntRangeSlider(
587
+ min=min_value,
588
+ max=max_value,
589
+ value=(min_value, min_value),
590
+ description=column,
591
+ tooltip=column,
592
+ )
593
+ control.style.description_width = "0px"
594
+
595
+ elif is_float_dtype(dtype):
596
+ eps = np.finfo(dtype).eps
597
+ if allna:
598
+ min_value, max_value = 0.0, 0.0
599
+ else:
600
+ min_value, max_value = series.min() - eps, series.max() + eps
601
+
602
+ span = max_value - min_value
603
+ step = span / 100
604
+ control = FloatRangeSlider(
605
+ min=min_value,
606
+ max=max_value,
607
+ step=step,
608
+ description=column,
609
+ tooltip=column,
610
+ value=(min_value, min_value),
611
+ )
612
+ control.style.description_width = "0px"
613
+
614
+ else:
615
+ raise TypeError(f"Unsupported dtype: {series.dtype}")
616
+ control.layout.width = "98%"
617
+ control.observe(self._publish_selection_value_change, "value")
618
+
619
+ self.selection_controls_list.append(control)
620
+ self.selection_controls_dict[column] = control
621
+ self._make_selection_controls_container()
622
+
623
+ def _make_selection_controls_container(self):
624
+ categorical_controls = [self.visible_category_dropdown]
625
+ range_slider_controls = []
626
+
627
+ for control in self.selection_controls_list:
628
+ if isinstance(control, SelectMultiple):
629
+ control.layout.visibility = "hidden"
630
+ control.layout.height = "0px"
631
+ control.layout.margin = "0px"
632
+ categorical_controls.append(control)
633
+ elif isinstance(control, IntRangeSlider | FloatRangeSlider):
634
+ label = HTML(f"{control.description}", layout={"height": "20px"})
635
+ control.layout.height = "20px"
636
+ range_slider_controls.append(label)
637
+ range_slider_controls.append(control)
638
+ else:
639
+ raise RuntimeError(f"Unsupported control type {type(control)}")
640
+
641
+ old_value = self.visible_category_dropdown.value
642
+ options = [c.description for c in categorical_controls[1:]]
643
+ self.visible_category_dropdown.options = options
644
+ if old_value in options:
645
+ self.visible_category_dropdown.value = old_value
646
+
647
+ tabs = Tab(
648
+ [
649
+ VBox(categorical_controls, layout={"height": "200px"}),
650
+ VBox(range_slider_controls, layout={"height": "200px"}),
651
+ ],
652
+ layout=dict(width="98%"),
653
+ titles=["Categorical", "Numeric"],
654
+ )
655
+ selection_criteria = VBox(
656
+ [
657
+ tabs,
658
+ ]
659
+ )
660
+ selection_actions = VBox(
661
+ [
662
+ Accordion(
663
+ [
664
+ self.selection_labeling_controls_dict["container"],
665
+ self.selection_buttons_dict["keep_drop_buttons"],
666
+ ],
667
+ titles=["Label", "Keep/Drop"],
668
+ ),
669
+ ]
670
+ )
671
+
672
+ self.selection_controls_container.children = tuple(
673
+ [
674
+ Accordion([selection_criteria], titles=["Selection Criteria"]),
675
+ Accordion([selection_actions], titles=["Selection Actions"]),
676
+ self.selection_buttons_dict["clear_selection"],
677
+ ]
678
+ )
679
+
680
+ def update_column_selection_control(self, column: str):
681
+ if column not in self.selection_controls_dict:
682
+ return
683
+
684
+ control = self.selection_controls_dict[column]
685
+ series = self.dataset.metadata[column]
686
+ allna = series.isna().all()
687
+ dtype = series.dtype
688
+ if isinstance(dtype, CategoricalDtype):
689
+ control.options = series.cat.categories
690
+ control.value = tuple()
691
+
692
+ elif is_bool_dtype(dtype):
693
+ control.value = tuple()
694
+
695
+ elif is_integer_dtype(dtype):
696
+ if allna:
697
+ min_value, max_value = 0.0, 0.0
698
+ else:
699
+ min_value, max_value = series.min(), series.max()
700
+
701
+ try:
702
+ control.min, control.max = min_value, max_value
703
+ except TraitError:
704
+ try:
705
+ control.max, control.min = max_value, min_value
706
+ except TraitError:
707
+ pass
708
+
709
+ control.value = (control.min, control.min)
710
+
711
+ elif is_float_dtype(dtype):
712
+ dtype = series.dtype
713
+ eps = np.finfo(dtype).eps
714
+ min_value, max_value = series.min() - eps, series.max() + eps
715
+ span = max_value - min_value
716
+ step = span / 100
717
+ try:
718
+ control.min = min_value
719
+ control.max = max_value
720
+ control.step = step
721
+ except TraitError:
722
+ try:
723
+ control.max = max_value
724
+ control.min = min_value
725
+ control.step = step
726
+ except TraitError:
727
+ pass
728
+ control.value = (control.min, control.min)
729
+
730
+ else:
731
+ raise TypeError(f"Unsupported dtype: {series.dtype}")
732
+
733
+ def remove_column_selection_control(self, column: str):
734
+ if column not in self.selection_controls_dict:
735
+ return
736
+
737
+ control = self.selection_controls_dict.pop(column)
738
+ self.all_controls_list.remove(control)
739
+ self._make_selection_controls_container()
740
+
741
+ def append_to_step_history(self, step_description: str, params: dict):
742
+ params_ = {}
743
+ for key, value in params.items():
744
+ if isinstance(value, pd.Index):
745
+ value = value.to_list()
746
+ params_[key] = value
747
+
748
+ self.step_history.append(
749
+ {
750
+ "step_description": step_description,
751
+ "params": params_,
752
+ }
753
+ )
754
+
755
+ def save_step_history(
756
+ self, path: str | Path, format: Literal["pickle", "json"] | None = None
757
+ ):
758
+ path = Path(path)
759
+ if format is None:
760
+ format = path.suffix[1:]
761
+
762
+ if format == "pickle":
763
+ import pickle
764
+
765
+ with open(path, "wb") as f:
766
+ pickle.dump(self.step_history, f)
767
+ elif format == "json":
768
+ import json
769
+
770
+ with open(path, "w") as f:
771
+ json.dump(self.step_history, f, indent=4)
772
+ else:
773
+ raise ValueError(f"Unsupported format {format}")
774
+
775
+ def load_step_history(self, path: str):
776
+ path = Path(path)
777
+ format = path.suffix[1:]
778
+
779
+ if format == "pickle":
780
+ import pickle
781
+
782
+ with open(path, "rb") as f:
783
+ self._loaded_step_history: list = pickle.load(f)
784
+ elif format == "json":
785
+ import json
786
+
787
+ with open(path, "r") as f:
788
+ self._loaded_step_history: list = json.load(f)
789
+ else:
790
+ raise ValueError(f"Unsupported format {format}")
791
+
792
+ def apply_step_history(self):
793
+ assert self._loaded_step_history is not None, "No step history loaded"
794
+
795
+ current_step_history = self.step_history
796
+ new_step_history = self._loaded_step_history
797
+ n = len(current_step_history)
798
+ N = len(new_step_history)
799
+ assert N >= n, "Step history mismatch"
800
+
801
+ for i, (present_step, incoming_step) in enumerate(
802
+ zip(current_step_history, new_step_history)
803
+ ):
804
+ assert present_step == incoming_step, "Step history mismatch"
805
+ step_description = present_step["step_description"]
806
+ logger.info(f"Step {i + 1: 2d}/{N} already applied: {step_description}")
807
+
808
+ new_steps_to_apply = new_step_history[n:]
809
+ for i, step in enumerate(new_steps_to_apply):
810
+ step_description = step["step_description"]
811
+ params = step["params"]
812
+ logger.info(f"Applying step {n + i + 1: 2d}/{N}: {step_description}")
813
+ self.steps[step_description].run(**params)
814
+
815
+ def print_step_history(self, with_hash: bool = False):
816
+ for i, step in enumerate(self.step_history):
817
+ desc = step["step_description"]
818
+ params = step["params"]
819
+ p = []
820
+ for k, v in params.items():
821
+ if isinstance(v, list):
822
+ v = f"list({len(v)})"
823
+ if v is None:
824
+ v = "None"
825
+ if not isinstance(v, int | float | str):
826
+ v = type(v)
827
+ p.append(f"{k}={v}")
828
+ if with_hash:
829
+ history_hash = self._get_step_history_hash(self.step_history[: i + 1])
830
+ history_hash = history_hash[:8] + " ..."
831
+ else:
832
+ history_hash = ""
833
+ print(f"({i + 1: 3d}) {history_hash} {desc}({', '.join(p)})")
834
+
835
+ def _get_step_history_hash(self, history: list[dict]):
836
+ history_json = json.dumps(history)
837
+ history_json_hash = sha256(history_json.encode()).hexdigest()
838
+ return history_json_hash
839
+
840
+ @property
841
+ def step_history_hash(self):
842
+ return self._get_step_history_hash(self.step_history)
843
+
844
+ @property
845
+ def selection_values(self):
846
+ return {
847
+ column: control.value
848
+ for column, control in self.selection_controls_dict.items()
849
+ }
850
+
851
+ def _publish_selection_value_change(self, change: dict):
852
+ owner: DescriptionWidget = change["owner"]
853
+ column = owner.description
854
+ new_value = change["new"]
855
+ self.broker.publish("dspr_selection_values_change", column, new_value=new_value)
856
+
857
+ def filter_rows(self, index: pd.Index | list):
858
+ if isinstance(index, list):
859
+ index = pd.Index(index)
860
+ self.dataset.filter_rows(index)
861
+
862
+ def apply_label(self, index: pd.Index | list, column: str, label: str):
863
+ if isinstance(index, list):
864
+ index = pd.Index(index)
865
+ self.dataset.apply_label(index, column, label)
866
+
867
+ def make_selectbatch_drowpdown(self, description="Select Batch"):
868
+ control = dict()
869
+ if self.batch_key:
870
+ control_key = description.lower().replace(" ", "_")
871
+ control[control_key] = Dropdown(
872
+ options={"": None, **{v: v for v in self.batch_values}},
873
+ value=None,
874
+ description=description,
875
+ )
876
+ return control
877
+
878
+ def make_groupbybatch_checkbox(self, description="Group By Batch"):
879
+ control = dict()
880
+ if self.batch_key:
881
+ control["group_by_batch"] = Checkbox(
882
+ value=True,
883
+ description=description,
884
+ )
885
+ return control
886
+
887
+ def dspr_selection_values_change_callback(self, column_changed: str, new_value):
888
+ row_names = self.dataset.metadata.index
889
+ selected_rows = pd.Index([])
890
+
891
+ # we will check if we intended to make a selection
892
+ selection_attempted = False
893
+ for column, value in self.selection_values.items():
894
+ series = self.dataset.metadata[column]
895
+ subset = pd.Index([])
896
+
897
+ if is_numeric_dtype(series) and not is_bool_dtype(series):
898
+ # this must be a range slider with value = tuple(min, max)
899
+ min_value, max_value = value
900
+ if max_value > min_value:
901
+ subset = row_names[(series >= min_value) & (series <= max_value)]
902
+ selection_attempted = True
903
+
904
+ elif value:
905
+ # this must be a select multiple with value = tuple(selected_values)
906
+ subset = row_names[series.isin(value)]
907
+ selection_attempted = True
908
+
909
+ if selected_rows.empty:
910
+ # we found the first non-empty subset, initialize selected_rows
911
+ selected_rows = subset
912
+
913
+ elif not subset.empty:
914
+ # we found another non-empty subset, intersect with previously selected_rows
915
+ selected_rows = selected_rows.intersection(subset)
916
+
917
+ if selected_rows.empty:
918
+ # control values don't intersect, we will return an empty selection
919
+ break
920
+
921
+ # if no selection was attempted, we will set None
922
+ # if a selection was attempted but no rows were selected, we will set an empty index
923
+ selected_rows = selected_rows if selection_attempted else None
924
+ self.dataset.selected_rows = selected_rows
925
+
926
+ def dspr_clear_selection_click_callback(self):
927
+ for control in self.selection_controls_dict.values():
928
+ if isinstance(control, SelectMultiple):
929
+ control.value = tuple()
930
+
931
+ elif isinstance(control, IntRangeSlider | FloatRangeSlider):
932
+ control.value = control.min, control.min
933
+
934
+ else:
935
+ raise RuntimeError(f"Unsupported control type {type(control)}")
936
+
937
+ def dspr_apply_label_click_callback(self):
938
+ if self.dataset.selected_rows is None:
939
+ return
940
+
941
+ rows_to_label = self.dataset.selected_rows
942
+
943
+ create_new_key = self.selection_labeling_controls_dict[
944
+ "create_new_key_checkbox"
945
+ ].value
946
+ if create_new_key:
947
+ column = self.selection_labeling_controls_dict["new_medatadata_key"].value
948
+ label = self.selection_labeling_controls_dict["new_label"].value
949
+ else:
950
+ column = self.selection_labeling_controls_dict[
951
+ "existing_metadata_key"
952
+ ].value
953
+ label = self.selection_labeling_controls_dict["existing_label"].value
954
+
955
+ if column == "":
956
+ return
957
+
958
+ self.steps["apply_label"].run(index=rows_to_label, column=column, label=label)
959
+
960
+ def dspr_keep_selected_click_callback(self):
961
+ if self.dataset.selected_rows is not None:
962
+ rows_to_keep = self.dataset.selected_rows
963
+ self.steps["filter_rows"].run(index=rows_to_keep)
964
+
965
+ def dspr_drop_selected_click_callback(self):
966
+ if self.dataset.selected_rows is not None:
967
+ rows_to_keep = self.dataset.row_names.difference(self.dataset.selected_rows)
968
+ self.steps["filter_rows"].run(index=rows_to_keep)
969
+
970
+ def dplt_selected_points_change_callback(self, new_value: pd.Index):
971
+ for column, control in self.selection_controls_dict.items():
972
+ if isinstance(control, SelectMultiple):
973
+ control.value = tuple()
974
+
975
+ elif isinstance(control, IntRangeSlider | FloatRangeSlider):
976
+ control.value = control.min, control.min
977
+
978
+ else:
979
+ raise RuntimeError(
980
+ f"Unsupported control type {type(control)} for column {column}"
981
+ )
982
+
983
+ def dset_total_rows_change_callback(self, metadata: pd.DataFrame):
984
+ for column in self.selection_controls_dict.keys():
985
+ self.update_column_selection_control(column)
986
+
987
+ def dset_metadata_change_callback(self, *args, **kwargs):
988
+ metadata = self.dataset._metadata
989
+ df = metadata.select_dtypes(include=["bool", "category"])
990
+ ctrl: Dropdown = self.selection_labeling_controls_dict["existing_metadata_key"]
991
+ ctrl.options = [""] + df.columns.to_list()
992
+
993
+ metadata = self.dataset._metadata.select_dtypes(include=["object", "category"])
994
+ options = {"": None, **{c: c for c in metadata.columns}}
995
+ for control in self.all_controls_list:
996
+ if not isinstance(control, Dropdown):
997
+ continue
998
+ description: str = control.description
999
+ if description.lower().strip(" :.").startswith("group"):
1000
+ current_value = control.value
1001
+ control.options = options
1002
+ if current_value not in control.options:
1003
+ control.value = None
1004
+ else:
1005
+ control.value = current_value
1006
+
1007
+ metadata = self.dataset._metadata
1008
+ options = {"": None, **{c: c for c in metadata.columns}}
1009
+ for control in self.all_controls_list:
1010
+ if not isinstance(control, Dropdown):
1011
+ continue
1012
+ description: str = control.description
1013
+ if description.lower().strip(" :.").endswith("axis"):
1014
+ current_value = control.value
1015
+ control.options = options
1016
+ if current_value not in control.options:
1017
+ control.value = None
1018
+ else:
1019
+ control.value = current_value
1020
+
1021
+ def dset_anndata_layers_change_callback(self, layers):
1022
+ options = {layer: layer for layer in layers}
1023
+ for control in self.all_controls_list:
1024
+ if not isinstance(control, Dropdown):
1025
+ continue
1026
+ description: str = control.description
1027
+ if description.lower().strip(" :.") == "layer":
1028
+ current_value = control.value
1029
+ control.options = options
1030
+ if current_value not in control.options:
1031
+ control.value = None
1032
+ else:
1033
+ control.value = current_value
1034
+
1035
+ def dset_data_dict_change_callback(self, *args, **kwargs):
1036
+ options = {v: v for v in self.dataset.adata.obsm.keys()}
1037
+ for control in self.all_controls_list:
1038
+ if not isinstance(control, Dropdown):
1039
+ continue
1040
+ description: str = control.description
1041
+ if description.lower().strip(" :.") == "use rep":
1042
+ current_value = control.value
1043
+ control.options = options
1044
+ if current_value is None and "X_pca" in control.options:
1045
+ control.value = "X_pca"
1046
+ elif current_value not in control.options:
1047
+ control.value = None
1048
+ else:
1049
+ control.value = current_value
1050
+
1051
+ def dset_total_vars_change_callback(self, *args, **kwargs):
1052
+ options = {v: v for v in self.dataset.adata.var_names}
1053
+ for control in self.all_controls_list:
1054
+ if not isinstance(control, Dropdown | Combobox):
1055
+ continue
1056
+ description: str = control.description
1057
+ if description.lower().strip(" :.") == "gene":
1058
+ current_value = control.value
1059
+ control.options = options
1060
+ if current_value not in control.options:
1061
+ control.value = None
1062
+ else:
1063
+ control.value = current_value