sclab 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sclab/__init__.py +7 -0
- sclab/_io.py +32 -0
- sclab/_sclab.py +80 -0
- sclab/dataset/__init__.py +8 -0
- sclab/dataset/_dataset.py +398 -0
- sclab/dataset/_exceptions.py +2 -0
- sclab/dataset/plotter/__init__.py +7 -0
- sclab/dataset/plotter/_controls.py +594 -0
- sclab/dataset/plotter/_plotter.py +1017 -0
- sclab/dataset/plotter/_utils.py +437 -0
- sclab/dataset/processor/__init__.py +7 -0
- sclab/dataset/processor/_processor.py +1063 -0
- sclab/dataset/processor/step/__init__.py +7 -0
- sclab/dataset/processor/step/_basic_processor_step.py +109 -0
- sclab/dataset/processor/step/_processor_step_base.py +120 -0
- sclab/event/__init__.py +7 -0
- sclab/event/_broker.py +201 -0
- sclab/event/_client.py +81 -0
- sclab/event/_utils.py +14 -0
- sclab/examples/__init__.py +5 -0
- sclab/examples/processor_steps/__init__.py +15 -0
- sclab/examples/processor_steps/_cluster.py +37 -0
- sclab/examples/processor_steps/_neighbors.py +72 -0
- sclab/examples/processor_steps/_pca.py +124 -0
- sclab/examples/processor_steps/_preprocess.py +186 -0
- sclab/examples/processor_steps/_qc.py +93 -0
- sclab/examples/processor_steps/_umap.py +48 -0
- sclab-0.1.7.dist-info/METADATA +139 -0
- sclab-0.1.7.dist-info/RECORD +30 -0
- sclab-0.1.7.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,1063 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from hashlib import sha256
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Literal
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from ipywidgets.widgets import (
|
|
10
|
+
HTML,
|
|
11
|
+
Accordion,
|
|
12
|
+
Button,
|
|
13
|
+
Checkbox,
|
|
14
|
+
Combobox,
|
|
15
|
+
Dropdown,
|
|
16
|
+
FloatRangeSlider,
|
|
17
|
+
HBox,
|
|
18
|
+
IntRangeSlider,
|
|
19
|
+
Output,
|
|
20
|
+
SelectMultiple,
|
|
21
|
+
Tab,
|
|
22
|
+
Text,
|
|
23
|
+
VBox,
|
|
24
|
+
)
|
|
25
|
+
from ipywidgets.widgets.valuewidget import ValueWidget
|
|
26
|
+
from ipywidgets.widgets.widget_description import DescriptionWidget
|
|
27
|
+
from pandas import CategoricalDtype
|
|
28
|
+
from pandas.api.types import (
|
|
29
|
+
is_bool_dtype,
|
|
30
|
+
is_float_dtype,
|
|
31
|
+
is_integer_dtype,
|
|
32
|
+
is_numeric_dtype,
|
|
33
|
+
)
|
|
34
|
+
from traitlets import TraitError
|
|
35
|
+
|
|
36
|
+
from ...event import EventBroker, EventClient
|
|
37
|
+
from .._dataset import SCLabDataset
|
|
38
|
+
from ..plotter import Plotter
|
|
39
|
+
|
|
40
|
+
logger = logging.getLogger(__name__)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# forward declaration
|
|
44
|
+
class ProcessorStepBase(EventClient):
|
|
45
|
+
events: list[str] = None
|
|
46
|
+
parent: "Processor"
|
|
47
|
+
name: str
|
|
48
|
+
description: str
|
|
49
|
+
fixed_params: dict[str, Any]
|
|
50
|
+
variable_controls: dict[str, DescriptionWidget | ValueWidget]
|
|
51
|
+
output: Output
|
|
52
|
+
run_button: Button
|
|
53
|
+
controls_list: list[DescriptionWidget | ValueWidget | Button]
|
|
54
|
+
controls: VBox
|
|
55
|
+
run_button_description = "Run"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# forward declaration
|
|
59
|
+
class BasicProcessorStep(EventClient):
|
|
60
|
+
events: list[str] = None
|
|
61
|
+
parent: "Processor"
|
|
62
|
+
description: str
|
|
63
|
+
function_name: str
|
|
64
|
+
function: callable
|
|
65
|
+
fixed_params: dict[str, Any]
|
|
66
|
+
variable_controls: dict[str, DescriptionWidget | ValueWidget]
|
|
67
|
+
output: Output
|
|
68
|
+
run_button: Button
|
|
69
|
+
controls: VBox
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
_ProcessorStep = BasicProcessorStep | ProcessorStepBase
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class Processor(EventClient):
|
|
76
|
+
dataset: SCLabDataset
|
|
77
|
+
plotter: Plotter
|
|
78
|
+
batch_key: str | None
|
|
79
|
+
batch_values: list[str] | None
|
|
80
|
+
broker: EventBroker
|
|
81
|
+
metadata_table: pd.DataFrame
|
|
82
|
+
selection_controls_list: list[DescriptionWidget | ValueWidget]
|
|
83
|
+
selection_controls_dict: dict[str, DescriptionWidget | ValueWidget]
|
|
84
|
+
selection_controls_container: VBox
|
|
85
|
+
selection_labeling_controls_dict: dict[
|
|
86
|
+
str, DescriptionWidget | ValueWidget | Accordion
|
|
87
|
+
]
|
|
88
|
+
selection_buttons_dict: dict[str, Button | Accordion]
|
|
89
|
+
all_controls_list: list[DescriptionWidget | ValueWidget]
|
|
90
|
+
steps: dict[str, BasicProcessorStep | ProcessorStepBase]
|
|
91
|
+
step_history: list[dict[str, str | tuple | dict]]
|
|
92
|
+
main_accordion: Accordion
|
|
93
|
+
_loaded_step_history: list[dict[str, str | tuple | dict]] | None = None
|
|
94
|
+
|
|
95
|
+
events = [
|
|
96
|
+
"dspr_selection_values_change",
|
|
97
|
+
"dspr_clear_selection_click",
|
|
98
|
+
"dspr_keep_selected_click",
|
|
99
|
+
"dspr_drop_selected_click",
|
|
100
|
+
"dspr_apply_label_click",
|
|
101
|
+
]
|
|
102
|
+
|
|
103
|
+
def __init__(
|
|
104
|
+
self,
|
|
105
|
+
dataset: SCLabDataset,
|
|
106
|
+
plotter: Plotter,
|
|
107
|
+
*,
|
|
108
|
+
batch_key: str | None = None,
|
|
109
|
+
):
|
|
110
|
+
self.dataset = dataset
|
|
111
|
+
self.plotter = plotter
|
|
112
|
+
self.broker = self.dataset.broker
|
|
113
|
+
self.selection_controls_list = []
|
|
114
|
+
self.selection_controls_dict = {}
|
|
115
|
+
self.selection_controls_container = VBox(layout=dict(width="100%"))
|
|
116
|
+
self.selection_labeling_controls_dict = {}
|
|
117
|
+
self.selection_buttons_dict = {}
|
|
118
|
+
self.all_controls_list = []
|
|
119
|
+
self.step_history = []
|
|
120
|
+
|
|
121
|
+
def update_category_control_visibility_callback(change):
|
|
122
|
+
if "new" not in change or not change["new"]:
|
|
123
|
+
return
|
|
124
|
+
new_category = change["new"]
|
|
125
|
+
|
|
126
|
+
if "old" in change and change["old"] == change["new"]:
|
|
127
|
+
return
|
|
128
|
+
|
|
129
|
+
if "old" in change and change["old"]:
|
|
130
|
+
old_category = change["old"]
|
|
131
|
+
control = self.selection_controls_dict[old_category]
|
|
132
|
+
control.layout.visibility = "hidden"
|
|
133
|
+
control.layout.height = "0px"
|
|
134
|
+
else:
|
|
135
|
+
for control in self.selection_controls_list:
|
|
136
|
+
if isinstance(control, SelectMultiple):
|
|
137
|
+
control.layout.visibility = "hidden"
|
|
138
|
+
control.layout.height = "0px"
|
|
139
|
+
|
|
140
|
+
control = self.selection_controls_dict[new_category]
|
|
141
|
+
n_options = len(control.options)
|
|
142
|
+
h = np.clip(n_options * 18 + 15, 20, 150)
|
|
143
|
+
control.layout.height = f"{h}px"
|
|
144
|
+
control.layout.visibility = "visible"
|
|
145
|
+
|
|
146
|
+
self.visible_category_dropdown = Dropdown(options=[], description="Category")
|
|
147
|
+
self.visible_category_dropdown.layout.width = "95%"
|
|
148
|
+
self.visible_category_dropdown.layout.margin = "0px 0px 10px"
|
|
149
|
+
self.visible_category_dropdown.style.description_width = "0px"
|
|
150
|
+
self.visible_category_dropdown.observe(
|
|
151
|
+
update_category_control_visibility_callback, "value", "change"
|
|
152
|
+
)
|
|
153
|
+
self._make_selection_buttons()
|
|
154
|
+
self._make_selection_labeling_controls()
|
|
155
|
+
self._make_selection_controls()
|
|
156
|
+
|
|
157
|
+
self.main_accordion = Accordion(
|
|
158
|
+
[
|
|
159
|
+
self.selection_controls_container,
|
|
160
|
+
],
|
|
161
|
+
titles=[
|
|
162
|
+
"Selection Controls",
|
|
163
|
+
],
|
|
164
|
+
)
|
|
165
|
+
self.steps = {}
|
|
166
|
+
self.add_step_func("filter_rows", self.filter_rows, use_run_button=False)
|
|
167
|
+
self.add_step_func("apply_label", self.apply_label, use_run_button=False)
|
|
168
|
+
|
|
169
|
+
if "batch" in self.dataset.adata.obs.columns and not batch_key:
|
|
170
|
+
batch_key = "batch"
|
|
171
|
+
|
|
172
|
+
if batch_key:
|
|
173
|
+
batch_values = (
|
|
174
|
+
self.dataset.adata.obs[batch_key]
|
|
175
|
+
.sort_values()
|
|
176
|
+
.astype(str)
|
|
177
|
+
.unique()
|
|
178
|
+
.tolist()
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
else:
|
|
182
|
+
batch_values = None
|
|
183
|
+
|
|
184
|
+
self.batch_key = batch_key
|
|
185
|
+
self.batch_values = batch_values
|
|
186
|
+
|
|
187
|
+
super().__init__(self.broker)
|
|
188
|
+
self.broker.subscribe("dset_metadata_change", self._make_selection_controls)
|
|
189
|
+
|
|
190
|
+
@property
|
|
191
|
+
def step_groups(self) -> dict[str, Accordion]:
|
|
192
|
+
return dict(zip(self.main_accordion.titles, self.main_accordion.children))
|
|
193
|
+
|
|
194
|
+
def _create_new_step_group(
|
|
195
|
+
self,
|
|
196
|
+
group_name: str,
|
|
197
|
+
group_steps: dict[str, _ProcessorStep] | None = None,
|
|
198
|
+
):
|
|
199
|
+
if group_steps is None:
|
|
200
|
+
group_steps = {}
|
|
201
|
+
|
|
202
|
+
# accordion childrens are widgets, we need to extract the corresponding controls
|
|
203
|
+
group_steps_dict = {k: v.controls for k, v in group_steps.items()}
|
|
204
|
+
|
|
205
|
+
# create a new step group accordion
|
|
206
|
+
step_group_accordion = Accordion(
|
|
207
|
+
titles=list(group_steps_dict.keys()),
|
|
208
|
+
children=list(group_steps_dict.values()),
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
# add the new step group accordion to the main accordion
|
|
212
|
+
children = list(self.main_accordion.children)
|
|
213
|
+
titles = list(self.main_accordion.titles)
|
|
214
|
+
children.append(step_group_accordion)
|
|
215
|
+
titles.append(group_name)
|
|
216
|
+
self.main_accordion.children = tuple(children)
|
|
217
|
+
self.main_accordion.titles = tuple(titles)
|
|
218
|
+
|
|
219
|
+
def _update_step_group(self, group_name: str, new_steps: dict[str, _ProcessorStep]):
|
|
220
|
+
# get the current group steps accordion
|
|
221
|
+
group_steps_accordion = self.step_groups[group_name]
|
|
222
|
+
current_steps_dict = dict(
|
|
223
|
+
zip(group_steps_accordion.titles, group_steps_accordion.children)
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# accordion childrens are widgets, we need to extract the corresponding controls
|
|
227
|
+
new_steps_dict = {k: v.controls for k, v in new_steps.items()}
|
|
228
|
+
|
|
229
|
+
# update the current group steps accordion. We merge the current and new steps
|
|
230
|
+
steps = {**current_steps_dict, **new_steps_dict}
|
|
231
|
+
group_steps_accordion.children = tuple(steps.values())
|
|
232
|
+
group_steps_accordion.titles = tuple(steps.keys())
|
|
233
|
+
|
|
234
|
+
def add_steps(
|
|
235
|
+
self,
|
|
236
|
+
_steps: _ProcessorStep
|
|
237
|
+
| type
|
|
238
|
+
| list[_ProcessorStep | type]
|
|
239
|
+
| dict[str, _ProcessorStep | type]
|
|
240
|
+
| dict[str, list[_ProcessorStep | type]],
|
|
241
|
+
step_group_name: str = "Processing",
|
|
242
|
+
):
|
|
243
|
+
# we make sure _steps is a dictionary of lists of steps
|
|
244
|
+
"""
|
|
245
|
+
Add one or more steps to the dataset processor. The steps can be given as a single
|
|
246
|
+
step instance, a type of step to be instantiated, a list of steps, or a dictionary
|
|
247
|
+
of step group names to lists of steps.
|
|
248
|
+
|
|
249
|
+
Parameters
|
|
250
|
+
----------
|
|
251
|
+
_steps: _ProcessorStep | type | list[_ProcessorStep | type] | dict[str, _ProcessorStep | type] | dict[str, list[_ProcessorStep | type]]
|
|
252
|
+
The steps to add to the dataset processor.
|
|
253
|
+
step_group_name: str, optional
|
|
254
|
+
The name of the step group to add the steps to. If the step group does not exist,
|
|
255
|
+
it will be created. Defaults to "Processing".
|
|
256
|
+
|
|
257
|
+
Raises
|
|
258
|
+
------
|
|
259
|
+
ValueError
|
|
260
|
+
If the step has already been added to the dataset processor.
|
|
261
|
+
"""
|
|
262
|
+
from .step._processor_step_base import ProcessorStepBase
|
|
263
|
+
|
|
264
|
+
if not isinstance(_steps, list | dict):
|
|
265
|
+
steps = {step_group_name: [_steps]}
|
|
266
|
+
|
|
267
|
+
elif isinstance(_steps, list):
|
|
268
|
+
steps = {step_group_name: _steps}
|
|
269
|
+
|
|
270
|
+
elif isinstance(_steps, dict):
|
|
271
|
+
steps = _steps
|
|
272
|
+
for key, value in _steps.items():
|
|
273
|
+
if not isinstance(value, list):
|
|
274
|
+
steps[key] = [value]
|
|
275
|
+
|
|
276
|
+
# if there are uninstantiated steps, we instantiate them
|
|
277
|
+
for step_group_name, steps_list in steps.items():
|
|
278
|
+
for i, step in enumerate(steps_list):
|
|
279
|
+
if isinstance(step, type):
|
|
280
|
+
assert issubclass(
|
|
281
|
+
step, ProcessorStepBase
|
|
282
|
+
), f"{step} must be a subclass of {ProcessorStepBase}"
|
|
283
|
+
steps_list[i] = step(self)
|
|
284
|
+
|
|
285
|
+
steps: dict[str, list[_ProcessorStep]]
|
|
286
|
+
steps_list: list[_ProcessorStep]
|
|
287
|
+
|
|
288
|
+
# we make sure the steps have not been previously added
|
|
289
|
+
for step_group_name, steps_list in steps.items():
|
|
290
|
+
for step in steps_list:
|
|
291
|
+
assert (
|
|
292
|
+
step.description not in self.steps
|
|
293
|
+
), f"Step {step.description} already exists"
|
|
294
|
+
|
|
295
|
+
# we add the new steps
|
|
296
|
+
group_steps_dict: dict[str, _ProcessorStep]
|
|
297
|
+
for step_group_name, steps_list in steps.items():
|
|
298
|
+
group_steps_dict = {step.description: step for step in steps_list}
|
|
299
|
+
|
|
300
|
+
if step_group_name in self.step_groups:
|
|
301
|
+
# update the existing step group
|
|
302
|
+
self._update_step_group(step_group_name, group_steps_dict)
|
|
303
|
+
else:
|
|
304
|
+
# create a new step group
|
|
305
|
+
self._create_new_step_group(step_group_name, group_steps_dict)
|
|
306
|
+
|
|
307
|
+
# we register the new steps
|
|
308
|
+
for step in steps_list:
|
|
309
|
+
self.steps[step.description] = step
|
|
310
|
+
|
|
311
|
+
def add_step_func(
|
|
312
|
+
self,
|
|
313
|
+
description: str,
|
|
314
|
+
function: callable,
|
|
315
|
+
fixed_params: dict[str, Any] = {},
|
|
316
|
+
variable_controls: dict[str, DescriptionWidget | ValueWidget] = {},
|
|
317
|
+
use_run_button: bool = True,
|
|
318
|
+
accordion: Accordion | None = None,
|
|
319
|
+
):
|
|
320
|
+
from .step import BasicProcessorStep
|
|
321
|
+
|
|
322
|
+
step = BasicProcessorStep(
|
|
323
|
+
self, description, function, fixed_params, variable_controls, use_run_button
|
|
324
|
+
)
|
|
325
|
+
self.add_step_object(step, accordion)
|
|
326
|
+
|
|
327
|
+
def add_step_object(
|
|
328
|
+
self, step: ProcessorStepBase, accordion: Accordion | None = None
|
|
329
|
+
):
|
|
330
|
+
assert (
|
|
331
|
+
step.description not in self.steps
|
|
332
|
+
), f"Step {step.description} already exists"
|
|
333
|
+
self.steps[step.description] = step
|
|
334
|
+
|
|
335
|
+
if accordion is not None:
|
|
336
|
+
self.append_to_accordion(accordion, step.controls, step.description)
|
|
337
|
+
|
|
338
|
+
def append_to_accordion(self, accordion: Accordion, panel: VBox, title: str):
|
|
339
|
+
children = list(accordion.children)
|
|
340
|
+
children.append(panel)
|
|
341
|
+
accordion.children = tuple(children)
|
|
342
|
+
accordion.set_title(len(children) - 1, title)
|
|
343
|
+
|
|
344
|
+
def _make_selection_controls(self, *args, **kwargs):
|
|
345
|
+
for column in self.dataset.metadata.columns:
|
|
346
|
+
if column in self.selection_controls_dict:
|
|
347
|
+
self.update_column_selection_control(column)
|
|
348
|
+
else:
|
|
349
|
+
self.add_column_selection_control(column)
|
|
350
|
+
|
|
351
|
+
for column in self.selection_controls_dict.keys():
|
|
352
|
+
if column not in self.dataset.metadata.columns:
|
|
353
|
+
self.remove_column_selection_control(column)
|
|
354
|
+
|
|
355
|
+
def _make_selection_buttons(self):
|
|
356
|
+
"""
|
|
357
|
+
Create buttons for selection actions
|
|
358
|
+
|
|
359
|
+
We make sure these buttons are created only once and are not recreated when
|
|
360
|
+
updating the selection controls.
|
|
361
|
+
"""
|
|
362
|
+
|
|
363
|
+
clear_selection_button = Button(
|
|
364
|
+
description="Clear Selection",
|
|
365
|
+
button_style="primary",
|
|
366
|
+
layout=dict(width="98%"),
|
|
367
|
+
)
|
|
368
|
+
clear_selection_button.on_click(
|
|
369
|
+
self.button_click_event_publisher("dspr", "clear_selection")
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
keep_selected_button = Button(
|
|
373
|
+
description="Keep Selected", button_style="danger"
|
|
374
|
+
)
|
|
375
|
+
keep_selected_button.on_click(
|
|
376
|
+
self.button_click_event_publisher("dspr", "keep_selected")
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
drop_selected_button = Button(
|
|
380
|
+
description="Drop Selected", button_style="danger"
|
|
381
|
+
)
|
|
382
|
+
drop_selected_button.on_click(
|
|
383
|
+
self.button_click_event_publisher("dspr", "drop_selected")
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
keep_drop_buttons = HBox([keep_selected_button, drop_selected_button])
|
|
387
|
+
keep_drop_buttons_accordion = Accordion(
|
|
388
|
+
[keep_drop_buttons], titles=["Keep/Drop"]
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
self.selection_buttons_dict["clear_selection"] = clear_selection_button
|
|
392
|
+
self.selection_buttons_dict["keep_drop_buttons"] = keep_drop_buttons
|
|
393
|
+
self.selection_buttons_dict["keep_drop_accordion"] = keep_drop_buttons_accordion
|
|
394
|
+
|
|
395
|
+
def _make_selection_labeling_controls(self):
|
|
396
|
+
# widgets visible when a new key is to be created
|
|
397
|
+
new_medatadata_key = Text(
|
|
398
|
+
description="Key",
|
|
399
|
+
layout=dict(width="98%"),
|
|
400
|
+
placeholder="New metadata key",
|
|
401
|
+
)
|
|
402
|
+
new_label = Text(
|
|
403
|
+
description="Label",
|
|
404
|
+
layout=dict(width="98%"),
|
|
405
|
+
disabled=True,
|
|
406
|
+
)
|
|
407
|
+
new_medatadata_key.layout.visibility = "hidden"
|
|
408
|
+
new_label.layout.visibility = "hidden"
|
|
409
|
+
new_medatadata_key.layout.height = "0px"
|
|
410
|
+
new_medatadata_key.layout.margin = "0px"
|
|
411
|
+
new_label.layout.height = "0px"
|
|
412
|
+
new_label.layout.margin = "0px"
|
|
413
|
+
|
|
414
|
+
# widgets visible when an existing key is to be updated
|
|
415
|
+
existing_metadata_key = Dropdown(
|
|
416
|
+
description="Key",
|
|
417
|
+
layout=dict(width="98%"),
|
|
418
|
+
)
|
|
419
|
+
existing_label = Dropdown(
|
|
420
|
+
description="Label",
|
|
421
|
+
layout=dict(width="98%"),
|
|
422
|
+
disabled=True,
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
create_new_key_checkbox = Checkbox(
|
|
426
|
+
value=False,
|
|
427
|
+
description="Create new key/label",
|
|
428
|
+
layout=dict(width="98%"),
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
def _update_widget_visibility(change: dict):
|
|
432
|
+
df = self.dataset.metadata.select_dtypes(include=["bool", "category"])
|
|
433
|
+
create_new_key = change["new"]
|
|
434
|
+
if create_new_key:
|
|
435
|
+
new_medatadata_key.disabled = False
|
|
436
|
+
new_label.disabled = False
|
|
437
|
+
existing_metadata_key.disabled = True
|
|
438
|
+
existing_label.disabled = True
|
|
439
|
+
|
|
440
|
+
new_medatadata_key.layout.visibility = "visible"
|
|
441
|
+
new_label.layout.visibility = "visible"
|
|
442
|
+
existing_metadata_key.layout.visibility = "hidden"
|
|
443
|
+
existing_label.layout.visibility = "hidden"
|
|
444
|
+
|
|
445
|
+
new_medatadata_key.layout.height = "28px"
|
|
446
|
+
new_medatadata_key.layout.margin = "2px"
|
|
447
|
+
new_label.layout.height = "28px"
|
|
448
|
+
new_label.layout.margin = "2px"
|
|
449
|
+
existing_metadata_key.layout.height = "0px"
|
|
450
|
+
existing_metadata_key.layout.margin = "0px"
|
|
451
|
+
existing_label.layout.height = "0px"
|
|
452
|
+
existing_label.layout.margin = "0px"
|
|
453
|
+
|
|
454
|
+
new_medatadata_key.value = ""
|
|
455
|
+
new_label.value = ""
|
|
456
|
+
else:
|
|
457
|
+
new_medatadata_key.disabled = True
|
|
458
|
+
new_label.disabled = True
|
|
459
|
+
existing_metadata_key.disabled = False
|
|
460
|
+
existing_label.disabled = False
|
|
461
|
+
|
|
462
|
+
new_medatadata_key.layout.visibility = "hidden"
|
|
463
|
+
new_label.layout.visibility = "hidden"
|
|
464
|
+
existing_metadata_key.layout.visibility = "visible"
|
|
465
|
+
existing_label.layout.visibility = "visible"
|
|
466
|
+
|
|
467
|
+
new_medatadata_key.layout.height = "0px"
|
|
468
|
+
new_medatadata_key.layout.margin = "0px"
|
|
469
|
+
new_label.layout.height = "0px"
|
|
470
|
+
new_label.layout.margin = "0px"
|
|
471
|
+
existing_metadata_key.layout.height = "28px"
|
|
472
|
+
existing_metadata_key.layout.margin = "2px"
|
|
473
|
+
existing_label.layout.height = "28px"
|
|
474
|
+
existing_label.layout.margin = "2px"
|
|
475
|
+
|
|
476
|
+
existing_metadata_key.options = [""] + df.columns.to_list()
|
|
477
|
+
existing_metadata_key.value = ""
|
|
478
|
+
existing_label.options = [""]
|
|
479
|
+
existing_label.value = ""
|
|
480
|
+
|
|
481
|
+
create_new_key_checkbox.observe(_update_widget_visibility, "value", "change")
|
|
482
|
+
|
|
483
|
+
def _update_new_label_options_callback(change: dict):
|
|
484
|
+
metadata_key_value = change["new"]
|
|
485
|
+
if metadata_key_value is None or metadata_key_value == "":
|
|
486
|
+
existing_label.disabled = True
|
|
487
|
+
existing_label.options = [""]
|
|
488
|
+
existing_label.value = ""
|
|
489
|
+
else:
|
|
490
|
+
series: pd.Series = self.dataset.metadata[metadata_key_value]
|
|
491
|
+
if isinstance(series.dtype, CategoricalDtype):
|
|
492
|
+
existing_label.options = [""] + series.cat.categories.to_list()
|
|
493
|
+
elif is_bool_dtype(series):
|
|
494
|
+
existing_label.options = ["", True, False]
|
|
495
|
+
elif is_integer_dtype(series) or is_float_dtype(series):
|
|
496
|
+
existing_label.options = []
|
|
497
|
+
|
|
498
|
+
existing_label.disabled = False
|
|
499
|
+
existing_label.value = ""
|
|
500
|
+
|
|
501
|
+
existing_metadata_key.observe(
|
|
502
|
+
_update_new_label_options_callback, "value", "change"
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
apply_button = Button(
|
|
506
|
+
description="Apply", disabled=True, button_style="primary"
|
|
507
|
+
)
|
|
508
|
+
apply_button.on_click(self.button_click_event_publisher("dspr", "apply_label"))
|
|
509
|
+
|
|
510
|
+
def _update_button_disabled(_):
|
|
511
|
+
new_key = new_medatadata_key.value
|
|
512
|
+
existing_key = existing_metadata_key.value
|
|
513
|
+
|
|
514
|
+
if create_new_key_checkbox.value:
|
|
515
|
+
apply_button.disabled = new_key == ""
|
|
516
|
+
else:
|
|
517
|
+
apply_button.disabled = existing_key == ""
|
|
518
|
+
|
|
519
|
+
new_medatadata_key.observe(_update_button_disabled, "value", "change")
|
|
520
|
+
new_label.observe(_update_button_disabled, "value", "change")
|
|
521
|
+
existing_metadata_key.observe(_update_button_disabled, "value", "change")
|
|
522
|
+
existing_label.observe(_update_button_disabled, "value", "change")
|
|
523
|
+
|
|
524
|
+
container = VBox(
|
|
525
|
+
[
|
|
526
|
+
create_new_key_checkbox,
|
|
527
|
+
new_medatadata_key,
|
|
528
|
+
new_label,
|
|
529
|
+
existing_metadata_key,
|
|
530
|
+
existing_label,
|
|
531
|
+
apply_button,
|
|
532
|
+
]
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
self.selection_labeling_controls_dict[
|
|
536
|
+
"create_new_key_checkbox"
|
|
537
|
+
] = create_new_key_checkbox
|
|
538
|
+
self.selection_labeling_controls_dict[
|
|
539
|
+
"existing_metadata_key"
|
|
540
|
+
] = existing_metadata_key
|
|
541
|
+
self.selection_labeling_controls_dict["existing_label"] = existing_label
|
|
542
|
+
self.selection_labeling_controls_dict["new_medatadata_key"] = new_medatadata_key
|
|
543
|
+
self.selection_labeling_controls_dict["new_label"] = new_label
|
|
544
|
+
self.selection_labeling_controls_dict["apply_button"] = apply_button
|
|
545
|
+
self.selection_labeling_controls_dict["container"] = container
|
|
546
|
+
self.selection_labeling_controls_dict["accordion"] = Accordion(
|
|
547
|
+
[container],
|
|
548
|
+
titles=["Label"],
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
def add_column_selection_control(self, column: str):
|
|
552
|
+
if column in self.selection_controls_dict:
|
|
553
|
+
return
|
|
554
|
+
|
|
555
|
+
if column == "is_selected":
|
|
556
|
+
return
|
|
557
|
+
|
|
558
|
+
series = self.dataset.metadata[column]
|
|
559
|
+
allna = series.isna().all()
|
|
560
|
+
dtype = series.dtype
|
|
561
|
+
|
|
562
|
+
if isinstance(dtype, CategoricalDtype):
|
|
563
|
+
control = SelectMultiple(
|
|
564
|
+
options=series.cat.categories,
|
|
565
|
+
description=column,
|
|
566
|
+
tooltip=column,
|
|
567
|
+
)
|
|
568
|
+
control.style.description_width = "0px"
|
|
569
|
+
control.layout.margin = "0px"
|
|
570
|
+
|
|
571
|
+
elif is_bool_dtype(dtype):
|
|
572
|
+
control = SelectMultiple(
|
|
573
|
+
options=[True, False],
|
|
574
|
+
description=column,
|
|
575
|
+
tooltip=column,
|
|
576
|
+
)
|
|
577
|
+
control.style.description_width = "0px"
|
|
578
|
+
control.layout.margin = "0px"
|
|
579
|
+
|
|
580
|
+
elif is_integer_dtype(dtype):
|
|
581
|
+
if allna:
|
|
582
|
+
min_value, max_value = 0.0, 0.0
|
|
583
|
+
else:
|
|
584
|
+
min_value, max_value = series.min(), series.max()
|
|
585
|
+
|
|
586
|
+
control = IntRangeSlider(
|
|
587
|
+
min=min_value,
|
|
588
|
+
max=max_value,
|
|
589
|
+
value=(min_value, min_value),
|
|
590
|
+
description=column,
|
|
591
|
+
tooltip=column,
|
|
592
|
+
)
|
|
593
|
+
control.style.description_width = "0px"
|
|
594
|
+
|
|
595
|
+
elif is_float_dtype(dtype):
|
|
596
|
+
eps = np.finfo(dtype).eps
|
|
597
|
+
if allna:
|
|
598
|
+
min_value, max_value = 0.0, 0.0
|
|
599
|
+
else:
|
|
600
|
+
min_value, max_value = series.min() - eps, series.max() + eps
|
|
601
|
+
|
|
602
|
+
span = max_value - min_value
|
|
603
|
+
step = span / 100
|
|
604
|
+
control = FloatRangeSlider(
|
|
605
|
+
min=min_value,
|
|
606
|
+
max=max_value,
|
|
607
|
+
step=step,
|
|
608
|
+
description=column,
|
|
609
|
+
tooltip=column,
|
|
610
|
+
value=(min_value, min_value),
|
|
611
|
+
)
|
|
612
|
+
control.style.description_width = "0px"
|
|
613
|
+
|
|
614
|
+
else:
|
|
615
|
+
raise TypeError(f"Unsupported dtype: {series.dtype}")
|
|
616
|
+
control.layout.width = "98%"
|
|
617
|
+
control.observe(self._publish_selection_value_change, "value")
|
|
618
|
+
|
|
619
|
+
self.selection_controls_list.append(control)
|
|
620
|
+
self.selection_controls_dict[column] = control
|
|
621
|
+
self._make_selection_controls_container()
|
|
622
|
+
|
|
623
|
+
def _make_selection_controls_container(self):
|
|
624
|
+
categorical_controls = [self.visible_category_dropdown]
|
|
625
|
+
range_slider_controls = []
|
|
626
|
+
|
|
627
|
+
for control in self.selection_controls_list:
|
|
628
|
+
if isinstance(control, SelectMultiple):
|
|
629
|
+
control.layout.visibility = "hidden"
|
|
630
|
+
control.layout.height = "0px"
|
|
631
|
+
control.layout.margin = "0px"
|
|
632
|
+
categorical_controls.append(control)
|
|
633
|
+
elif isinstance(control, IntRangeSlider | FloatRangeSlider):
|
|
634
|
+
label = HTML(f"{control.description}", layout={"height": "20px"})
|
|
635
|
+
control.layout.height = "20px"
|
|
636
|
+
range_slider_controls.append(label)
|
|
637
|
+
range_slider_controls.append(control)
|
|
638
|
+
else:
|
|
639
|
+
raise RuntimeError(f"Unsupported control type {type(control)}")
|
|
640
|
+
|
|
641
|
+
old_value = self.visible_category_dropdown.value
|
|
642
|
+
options = [c.description for c in categorical_controls[1:]]
|
|
643
|
+
self.visible_category_dropdown.options = options
|
|
644
|
+
if old_value in options:
|
|
645
|
+
self.visible_category_dropdown.value = old_value
|
|
646
|
+
|
|
647
|
+
tabs = Tab(
|
|
648
|
+
[
|
|
649
|
+
VBox(categorical_controls, layout={"height": "200px"}),
|
|
650
|
+
VBox(range_slider_controls, layout={"height": "200px"}),
|
|
651
|
+
],
|
|
652
|
+
layout=dict(width="98%"),
|
|
653
|
+
titles=["Categorical", "Numeric"],
|
|
654
|
+
)
|
|
655
|
+
selection_criteria = VBox(
|
|
656
|
+
[
|
|
657
|
+
tabs,
|
|
658
|
+
]
|
|
659
|
+
)
|
|
660
|
+
selection_actions = VBox(
|
|
661
|
+
[
|
|
662
|
+
Accordion(
|
|
663
|
+
[
|
|
664
|
+
self.selection_labeling_controls_dict["container"],
|
|
665
|
+
self.selection_buttons_dict["keep_drop_buttons"],
|
|
666
|
+
],
|
|
667
|
+
titles=["Label", "Keep/Drop"],
|
|
668
|
+
),
|
|
669
|
+
]
|
|
670
|
+
)
|
|
671
|
+
|
|
672
|
+
self.selection_controls_container.children = tuple(
|
|
673
|
+
[
|
|
674
|
+
Accordion([selection_criteria], titles=["Selection Criteria"]),
|
|
675
|
+
Accordion([selection_actions], titles=["Selection Actions"]),
|
|
676
|
+
self.selection_buttons_dict["clear_selection"],
|
|
677
|
+
]
|
|
678
|
+
)
|
|
679
|
+
|
|
680
|
+
def update_column_selection_control(self, column: str):
|
|
681
|
+
if column not in self.selection_controls_dict:
|
|
682
|
+
return
|
|
683
|
+
|
|
684
|
+
control = self.selection_controls_dict[column]
|
|
685
|
+
series = self.dataset.metadata[column]
|
|
686
|
+
allna = series.isna().all()
|
|
687
|
+
dtype = series.dtype
|
|
688
|
+
if isinstance(dtype, CategoricalDtype):
|
|
689
|
+
control.options = series.cat.categories
|
|
690
|
+
control.value = tuple()
|
|
691
|
+
|
|
692
|
+
elif is_bool_dtype(dtype):
|
|
693
|
+
control.value = tuple()
|
|
694
|
+
|
|
695
|
+
elif is_integer_dtype(dtype):
|
|
696
|
+
if allna:
|
|
697
|
+
min_value, max_value = 0.0, 0.0
|
|
698
|
+
else:
|
|
699
|
+
min_value, max_value = series.min(), series.max()
|
|
700
|
+
|
|
701
|
+
try:
|
|
702
|
+
control.min, control.max = min_value, max_value
|
|
703
|
+
except TraitError:
|
|
704
|
+
try:
|
|
705
|
+
control.max, control.min = max_value, min_value
|
|
706
|
+
except TraitError:
|
|
707
|
+
pass
|
|
708
|
+
|
|
709
|
+
control.value = (control.min, control.min)
|
|
710
|
+
|
|
711
|
+
elif is_float_dtype(dtype):
|
|
712
|
+
dtype = series.dtype
|
|
713
|
+
eps = np.finfo(dtype).eps
|
|
714
|
+
min_value, max_value = series.min() - eps, series.max() + eps
|
|
715
|
+
span = max_value - min_value
|
|
716
|
+
step = span / 100
|
|
717
|
+
try:
|
|
718
|
+
control.min = min_value
|
|
719
|
+
control.max = max_value
|
|
720
|
+
control.step = step
|
|
721
|
+
except TraitError:
|
|
722
|
+
try:
|
|
723
|
+
control.max = max_value
|
|
724
|
+
control.min = min_value
|
|
725
|
+
control.step = step
|
|
726
|
+
except TraitError:
|
|
727
|
+
pass
|
|
728
|
+
control.value = (control.min, control.min)
|
|
729
|
+
|
|
730
|
+
else:
|
|
731
|
+
raise TypeError(f"Unsupported dtype: {series.dtype}")
|
|
732
|
+
|
|
733
|
+
def remove_column_selection_control(self, column: str):
|
|
734
|
+
if column not in self.selection_controls_dict:
|
|
735
|
+
return
|
|
736
|
+
|
|
737
|
+
control = self.selection_controls_dict.pop(column)
|
|
738
|
+
self.all_controls_list.remove(control)
|
|
739
|
+
self._make_selection_controls_container()
|
|
740
|
+
|
|
741
|
+
def append_to_step_history(self, step_description: str, params: dict):
|
|
742
|
+
params_ = {}
|
|
743
|
+
for key, value in params.items():
|
|
744
|
+
if isinstance(value, pd.Index):
|
|
745
|
+
value = value.to_list()
|
|
746
|
+
params_[key] = value
|
|
747
|
+
|
|
748
|
+
self.step_history.append(
|
|
749
|
+
{
|
|
750
|
+
"step_description": step_description,
|
|
751
|
+
"params": params_,
|
|
752
|
+
}
|
|
753
|
+
)
|
|
754
|
+
|
|
755
|
+
def save_step_history(
|
|
756
|
+
self, path: str | Path, format: Literal["pickle", "json"] | None = None
|
|
757
|
+
):
|
|
758
|
+
path = Path(path)
|
|
759
|
+
if format is None:
|
|
760
|
+
format = path.suffix[1:]
|
|
761
|
+
|
|
762
|
+
if format == "pickle":
|
|
763
|
+
import pickle
|
|
764
|
+
|
|
765
|
+
with open(path, "wb") as f:
|
|
766
|
+
pickle.dump(self.step_history, f)
|
|
767
|
+
elif format == "json":
|
|
768
|
+
import json
|
|
769
|
+
|
|
770
|
+
with open(path, "w") as f:
|
|
771
|
+
json.dump(self.step_history, f, indent=4)
|
|
772
|
+
else:
|
|
773
|
+
raise ValueError(f"Unsupported format {format}")
|
|
774
|
+
|
|
775
|
+
def load_step_history(self, path: str):
|
|
776
|
+
path = Path(path)
|
|
777
|
+
format = path.suffix[1:]
|
|
778
|
+
|
|
779
|
+
if format == "pickle":
|
|
780
|
+
import pickle
|
|
781
|
+
|
|
782
|
+
with open(path, "rb") as f:
|
|
783
|
+
self._loaded_step_history: list = pickle.load(f)
|
|
784
|
+
elif format == "json":
|
|
785
|
+
import json
|
|
786
|
+
|
|
787
|
+
with open(path, "r") as f:
|
|
788
|
+
self._loaded_step_history: list = json.load(f)
|
|
789
|
+
else:
|
|
790
|
+
raise ValueError(f"Unsupported format {format}")
|
|
791
|
+
|
|
792
|
+
def apply_step_history(self):
|
|
793
|
+
assert self._loaded_step_history is not None, "No step history loaded"
|
|
794
|
+
|
|
795
|
+
current_step_history = self.step_history
|
|
796
|
+
new_step_history = self._loaded_step_history
|
|
797
|
+
n = len(current_step_history)
|
|
798
|
+
N = len(new_step_history)
|
|
799
|
+
assert N >= n, "Step history mismatch"
|
|
800
|
+
|
|
801
|
+
for i, (present_step, incoming_step) in enumerate(
|
|
802
|
+
zip(current_step_history, new_step_history)
|
|
803
|
+
):
|
|
804
|
+
assert present_step == incoming_step, "Step history mismatch"
|
|
805
|
+
step_description = present_step["step_description"]
|
|
806
|
+
logger.info(f"Step {i + 1: 2d}/{N} already applied: {step_description}")
|
|
807
|
+
|
|
808
|
+
new_steps_to_apply = new_step_history[n:]
|
|
809
|
+
for i, step in enumerate(new_steps_to_apply):
|
|
810
|
+
step_description = step["step_description"]
|
|
811
|
+
params = step["params"]
|
|
812
|
+
logger.info(f"Applying step {n + i + 1: 2d}/{N}: {step_description}")
|
|
813
|
+
self.steps[step_description].run(**params)
|
|
814
|
+
|
|
815
|
+
def print_step_history(self, with_hash: bool = False):
|
|
816
|
+
for i, step in enumerate(self.step_history):
|
|
817
|
+
desc = step["step_description"]
|
|
818
|
+
params = step["params"]
|
|
819
|
+
p = []
|
|
820
|
+
for k, v in params.items():
|
|
821
|
+
if isinstance(v, list):
|
|
822
|
+
v = f"list({len(v)})"
|
|
823
|
+
if v is None:
|
|
824
|
+
v = "None"
|
|
825
|
+
if not isinstance(v, int | float | str):
|
|
826
|
+
v = type(v)
|
|
827
|
+
p.append(f"{k}={v}")
|
|
828
|
+
if with_hash:
|
|
829
|
+
history_hash = self._get_step_history_hash(self.step_history[: i + 1])
|
|
830
|
+
history_hash = history_hash[:8] + " ..."
|
|
831
|
+
else:
|
|
832
|
+
history_hash = ""
|
|
833
|
+
print(f"({i + 1: 3d}) {history_hash} {desc}({', '.join(p)})")
|
|
834
|
+
|
|
835
|
+
def _get_step_history_hash(self, history: list[dict]):
|
|
836
|
+
history_json = json.dumps(history)
|
|
837
|
+
history_json_hash = sha256(history_json.encode()).hexdigest()
|
|
838
|
+
return history_json_hash
|
|
839
|
+
|
|
840
|
+
@property
|
|
841
|
+
def step_history_hash(self):
|
|
842
|
+
return self._get_step_history_hash(self.step_history)
|
|
843
|
+
|
|
844
|
+
@property
|
|
845
|
+
def selection_values(self):
|
|
846
|
+
return {
|
|
847
|
+
column: control.value
|
|
848
|
+
for column, control in self.selection_controls_dict.items()
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
def _publish_selection_value_change(self, change: dict):
|
|
852
|
+
owner: DescriptionWidget = change["owner"]
|
|
853
|
+
column = owner.description
|
|
854
|
+
new_value = change["new"]
|
|
855
|
+
self.broker.publish("dspr_selection_values_change", column, new_value=new_value)
|
|
856
|
+
|
|
857
|
+
def filter_rows(self, index: pd.Index | list):
|
|
858
|
+
if isinstance(index, list):
|
|
859
|
+
index = pd.Index(index)
|
|
860
|
+
self.dataset.filter_rows(index)
|
|
861
|
+
|
|
862
|
+
def apply_label(self, index: pd.Index | list, column: str, label: str):
|
|
863
|
+
if isinstance(index, list):
|
|
864
|
+
index = pd.Index(index)
|
|
865
|
+
self.dataset.apply_label(index, column, label)
|
|
866
|
+
|
|
867
|
+
def make_selectbatch_drowpdown(self, description="Select Batch"):
|
|
868
|
+
control = dict()
|
|
869
|
+
if self.batch_key:
|
|
870
|
+
control_key = description.lower().replace(" ", "_")
|
|
871
|
+
control[control_key] = Dropdown(
|
|
872
|
+
options={"": None, **{v: v for v in self.batch_values}},
|
|
873
|
+
value=None,
|
|
874
|
+
description=description,
|
|
875
|
+
)
|
|
876
|
+
return control
|
|
877
|
+
|
|
878
|
+
def make_groupbybatch_checkbox(self, description="Group By Batch"):
|
|
879
|
+
control = dict()
|
|
880
|
+
if self.batch_key:
|
|
881
|
+
control["group_by_batch"] = Checkbox(
|
|
882
|
+
value=True,
|
|
883
|
+
description=description,
|
|
884
|
+
)
|
|
885
|
+
return control
|
|
886
|
+
|
|
887
|
+
def dspr_selection_values_change_callback(self, column_changed: str, new_value):
|
|
888
|
+
row_names = self.dataset.metadata.index
|
|
889
|
+
selected_rows = pd.Index([])
|
|
890
|
+
|
|
891
|
+
# we will check if we intended to make a selection
|
|
892
|
+
selection_attempted = False
|
|
893
|
+
for column, value in self.selection_values.items():
|
|
894
|
+
series = self.dataset.metadata[column]
|
|
895
|
+
subset = pd.Index([])
|
|
896
|
+
|
|
897
|
+
if is_numeric_dtype(series) and not is_bool_dtype(series):
|
|
898
|
+
# this must be a range slider with value = tuple(min, max)
|
|
899
|
+
min_value, max_value = value
|
|
900
|
+
if max_value > min_value:
|
|
901
|
+
subset = row_names[(series >= min_value) & (series <= max_value)]
|
|
902
|
+
selection_attempted = True
|
|
903
|
+
|
|
904
|
+
elif value:
|
|
905
|
+
# this must be a select multiple with value = tuple(selected_values)
|
|
906
|
+
subset = row_names[series.isin(value)]
|
|
907
|
+
selection_attempted = True
|
|
908
|
+
|
|
909
|
+
if selected_rows.empty:
|
|
910
|
+
# we found the first non-empty subset, initialize selected_rows
|
|
911
|
+
selected_rows = subset
|
|
912
|
+
|
|
913
|
+
elif not subset.empty:
|
|
914
|
+
# we found another non-empty subset, intersect with previously selected_rows
|
|
915
|
+
selected_rows = selected_rows.intersection(subset)
|
|
916
|
+
|
|
917
|
+
if selected_rows.empty:
|
|
918
|
+
# control values don't intersect, we will return an empty selection
|
|
919
|
+
break
|
|
920
|
+
|
|
921
|
+
# if no selection was attempted, we will set None
|
|
922
|
+
# if a selection was attempted but no rows were selected, we will set an empty index
|
|
923
|
+
selected_rows = selected_rows if selection_attempted else None
|
|
924
|
+
self.dataset.selected_rows = selected_rows
|
|
925
|
+
|
|
926
|
+
def dspr_clear_selection_click_callback(self):
|
|
927
|
+
for control in self.selection_controls_dict.values():
|
|
928
|
+
if isinstance(control, SelectMultiple):
|
|
929
|
+
control.value = tuple()
|
|
930
|
+
|
|
931
|
+
elif isinstance(control, IntRangeSlider | FloatRangeSlider):
|
|
932
|
+
control.value = control.min, control.min
|
|
933
|
+
|
|
934
|
+
else:
|
|
935
|
+
raise RuntimeError(f"Unsupported control type {type(control)}")
|
|
936
|
+
|
|
937
|
+
def dspr_apply_label_click_callback(self):
|
|
938
|
+
if self.dataset.selected_rows is None:
|
|
939
|
+
return
|
|
940
|
+
|
|
941
|
+
rows_to_label = self.dataset.selected_rows
|
|
942
|
+
|
|
943
|
+
create_new_key = self.selection_labeling_controls_dict[
|
|
944
|
+
"create_new_key_checkbox"
|
|
945
|
+
].value
|
|
946
|
+
if create_new_key:
|
|
947
|
+
column = self.selection_labeling_controls_dict["new_medatadata_key"].value
|
|
948
|
+
label = self.selection_labeling_controls_dict["new_label"].value
|
|
949
|
+
else:
|
|
950
|
+
column = self.selection_labeling_controls_dict[
|
|
951
|
+
"existing_metadata_key"
|
|
952
|
+
].value
|
|
953
|
+
label = self.selection_labeling_controls_dict["existing_label"].value
|
|
954
|
+
|
|
955
|
+
if column == "":
|
|
956
|
+
return
|
|
957
|
+
|
|
958
|
+
self.steps["apply_label"].run(index=rows_to_label, column=column, label=label)
|
|
959
|
+
|
|
960
|
+
def dspr_keep_selected_click_callback(self):
|
|
961
|
+
if self.dataset.selected_rows is not None:
|
|
962
|
+
rows_to_keep = self.dataset.selected_rows
|
|
963
|
+
self.steps["filter_rows"].run(index=rows_to_keep)
|
|
964
|
+
|
|
965
|
+
def dspr_drop_selected_click_callback(self):
|
|
966
|
+
if self.dataset.selected_rows is not None:
|
|
967
|
+
rows_to_keep = self.dataset.row_names.difference(self.dataset.selected_rows)
|
|
968
|
+
self.steps["filter_rows"].run(index=rows_to_keep)
|
|
969
|
+
|
|
970
|
+
def dplt_selected_points_change_callback(self, new_value: pd.Index):
|
|
971
|
+
for column, control in self.selection_controls_dict.items():
|
|
972
|
+
if isinstance(control, SelectMultiple):
|
|
973
|
+
control.value = tuple()
|
|
974
|
+
|
|
975
|
+
elif isinstance(control, IntRangeSlider | FloatRangeSlider):
|
|
976
|
+
control.value = control.min, control.min
|
|
977
|
+
|
|
978
|
+
else:
|
|
979
|
+
raise RuntimeError(
|
|
980
|
+
f"Unsupported control type {type(control)} for column {column}"
|
|
981
|
+
)
|
|
982
|
+
|
|
983
|
+
def dset_total_rows_change_callback(self, metadata: pd.DataFrame):
|
|
984
|
+
for column in self.selection_controls_dict.keys():
|
|
985
|
+
self.update_column_selection_control(column)
|
|
986
|
+
|
|
987
|
+
def dset_metadata_change_callback(self, *args, **kwargs):
|
|
988
|
+
metadata = self.dataset._metadata
|
|
989
|
+
df = metadata.select_dtypes(include=["bool", "category"])
|
|
990
|
+
ctrl: Dropdown = self.selection_labeling_controls_dict["existing_metadata_key"]
|
|
991
|
+
ctrl.options = [""] + df.columns.to_list()
|
|
992
|
+
|
|
993
|
+
metadata = self.dataset._metadata.select_dtypes(include=["object", "category"])
|
|
994
|
+
options = {"": None, **{c: c for c in metadata.columns}}
|
|
995
|
+
for control in self.all_controls_list:
|
|
996
|
+
if not isinstance(control, Dropdown):
|
|
997
|
+
continue
|
|
998
|
+
description: str = control.description
|
|
999
|
+
if description.lower().strip(" :.").startswith("group"):
|
|
1000
|
+
current_value = control.value
|
|
1001
|
+
control.options = options
|
|
1002
|
+
if current_value not in control.options:
|
|
1003
|
+
control.value = None
|
|
1004
|
+
else:
|
|
1005
|
+
control.value = current_value
|
|
1006
|
+
|
|
1007
|
+
metadata = self.dataset._metadata
|
|
1008
|
+
options = {"": None, **{c: c for c in metadata.columns}}
|
|
1009
|
+
for control in self.all_controls_list:
|
|
1010
|
+
if not isinstance(control, Dropdown):
|
|
1011
|
+
continue
|
|
1012
|
+
description: str = control.description
|
|
1013
|
+
if description.lower().strip(" :.").endswith("axis"):
|
|
1014
|
+
current_value = control.value
|
|
1015
|
+
control.options = options
|
|
1016
|
+
if current_value not in control.options:
|
|
1017
|
+
control.value = None
|
|
1018
|
+
else:
|
|
1019
|
+
control.value = current_value
|
|
1020
|
+
|
|
1021
|
+
def dset_anndata_layers_change_callback(self, layers):
|
|
1022
|
+
options = {layer: layer for layer in layers}
|
|
1023
|
+
for control in self.all_controls_list:
|
|
1024
|
+
if not isinstance(control, Dropdown):
|
|
1025
|
+
continue
|
|
1026
|
+
description: str = control.description
|
|
1027
|
+
if description.lower().strip(" :.") == "layer":
|
|
1028
|
+
current_value = control.value
|
|
1029
|
+
control.options = options
|
|
1030
|
+
if current_value not in control.options:
|
|
1031
|
+
control.value = None
|
|
1032
|
+
else:
|
|
1033
|
+
control.value = current_value
|
|
1034
|
+
|
|
1035
|
+
def dset_data_dict_change_callback(self, *args, **kwargs):
|
|
1036
|
+
options = {v: v for v in self.dataset.adata.obsm.keys()}
|
|
1037
|
+
for control in self.all_controls_list:
|
|
1038
|
+
if not isinstance(control, Dropdown):
|
|
1039
|
+
continue
|
|
1040
|
+
description: str = control.description
|
|
1041
|
+
if description.lower().strip(" :.") == "use rep":
|
|
1042
|
+
current_value = control.value
|
|
1043
|
+
control.options = options
|
|
1044
|
+
if current_value is None and "X_pca" in control.options:
|
|
1045
|
+
control.value = "X_pca"
|
|
1046
|
+
elif current_value not in control.options:
|
|
1047
|
+
control.value = None
|
|
1048
|
+
else:
|
|
1049
|
+
control.value = current_value
|
|
1050
|
+
|
|
1051
|
+
def dset_total_vars_change_callback(self, *args, **kwargs):
|
|
1052
|
+
options = {v: v for v in self.dataset.adata.var_names}
|
|
1053
|
+
for control in self.all_controls_list:
|
|
1054
|
+
if not isinstance(control, Dropdown | Combobox):
|
|
1055
|
+
continue
|
|
1056
|
+
description: str = control.description
|
|
1057
|
+
if description.lower().strip(" :.") == "gene":
|
|
1058
|
+
current_value = control.value
|
|
1059
|
+
control.options = options
|
|
1060
|
+
if current_value not in control.options:
|
|
1061
|
+
control.value = None
|
|
1062
|
+
else:
|
|
1063
|
+
control.value = current_value
|