feedback-forensics 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- feedback_forensics/app/__init__.py +0 -0
- feedback_forensics/app/callbacks.py +683 -0
- feedback_forensics/app/constants.py +181 -0
- feedback_forensics/app/data_loader.py +102 -0
- feedback_forensics/app/datasets.py +226 -0
- feedback_forensics/app/info_texts.py +35 -0
- feedback_forensics/app/interface.py +334 -0
- feedback_forensics/app/loader.py +118 -0
- feedback_forensics/app/main.py +61 -0
- feedback_forensics/app/metrics.py +293 -0
- feedback_forensics/app/metrics_test.py +143 -0
- feedback_forensics/app/plotting/__init__.py +1 -0
- feedback_forensics/app/plotting/main.py +350 -0
- feedback_forensics/app/plotting/metrics_table.py +68 -0
- feedback_forensics/app/plotting/multiple.py +132 -0
- feedback_forensics/app/plotting/single.py +91 -0
- feedback_forensics/app/plotting/utils.py +67 -0
- feedback_forensics/app/plotting_v2/__init__.py +1 -0
- feedback_forensics/app/plotting_v2/constants.py +52 -0
- feedback_forensics/app/plotting_v2/main.py +33 -0
- feedback_forensics/app/plotting_v2/table.py +604 -0
- feedback_forensics/app/styling.py +25 -0
- feedback_forensics/app/url_parser.py +80 -0
- feedback_forensics/app/utils.py +35 -0
- feedback_forensics/assets/feedback_forensics_logo.png +0 -0
- feedback_forensics-0.1.2.dist-info/METADATA +132 -0
- feedback_forensics-0.1.2.dist-info/RECORD +31 -0
- feedback_forensics-0.1.2.dist-info/WHEEL +4 -0
- feedback_forensics-0.1.2.dist-info/entry_points.txt +2 -0
- feedback_forensics-0.1.2.dist-info/licenses/LICENSE +201 -0
|
File without changes
|
|
@@ -0,0 +1,683 @@
|
|
|
1
|
+
"""Call backs to be used in the app."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import pathlib
|
|
5
|
+
|
|
6
|
+
import gradio as gr
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from loguru import logger
|
|
9
|
+
|
|
10
|
+
from feedback_forensics.app.loader import get_votes_df
|
|
11
|
+
import feedback_forensics.app.plotting
|
|
12
|
+
import feedback_forensics.app.plotting_v2
|
|
13
|
+
from feedback_forensics.app.utils import get_csv_columns
|
|
14
|
+
from feedback_forensics.app.constants import NONE_SELECTED_VALUE
|
|
15
|
+
from feedback_forensics.app.datasets import (
|
|
16
|
+
get_config_from_name,
|
|
17
|
+
get_dataset_from_name,
|
|
18
|
+
BuiltinDataset,
|
|
19
|
+
Config,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
from feedback_forensics.app.url_parser import (
|
|
23
|
+
get_config_from_query_params,
|
|
24
|
+
get_url_with_query_params,
|
|
25
|
+
get_list_member_from_url_string,
|
|
26
|
+
transfer_url_list_to_nonurl_list,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def split_votes_dfs(
|
|
31
|
+
votes_dfs: dict[str, pd.DataFrame],
|
|
32
|
+
split_col: str,
|
|
33
|
+
selected_vals: list[str] | None = None,
|
|
34
|
+
) -> dict[str, pd.DataFrame]:
|
|
35
|
+
"""Split votes_dfs by split_col.
|
|
36
|
+
|
|
37
|
+
First assert that only one votes_df in vote_dfs, and split that votes_df into multiple, based on the unique values of split_col.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
votes_dfs: Dictionary mapping dataset names to DataFrames
|
|
41
|
+
split_col: Column to split on
|
|
42
|
+
selected_vals: Optional list of values to filter split_col by. If None, use all values.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Dictionary mapping split values to filtered DataFrames
|
|
46
|
+
"""
|
|
47
|
+
assert len(votes_dfs) == 1, "Only one votes_df is supported for now"
|
|
48
|
+
votes_df = list(votes_dfs.values())[0]
|
|
49
|
+
split_dfs = {}
|
|
50
|
+
votes_df[split_col] = votes_df[split_col].astype(str)
|
|
51
|
+
|
|
52
|
+
if selected_vals:
|
|
53
|
+
# Filter to only selected values before grouping
|
|
54
|
+
votes_df = votes_df[votes_df[split_col].isin(selected_vals)]
|
|
55
|
+
|
|
56
|
+
grouped_df = votes_df.groupby(split_col)
|
|
57
|
+
for name, group in grouped_df:
|
|
58
|
+
split_dfs[name] = group
|
|
59
|
+
|
|
60
|
+
return split_dfs
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def generate_callbacks(inp: dict, state: dict, out: dict) -> dict:
|
|
64
|
+
"""Generate callbacks for the ICAI app."""
|
|
65
|
+
|
|
66
|
+
def load_data(
|
|
67
|
+
data: dict,
|
|
68
|
+
*,
|
|
69
|
+
reset_filters_if_new: bool = True,
|
|
70
|
+
used_from_button: bool = False,
|
|
71
|
+
filterable_columns: list[str] | None = None,
|
|
72
|
+
dataset_name: str = None,
|
|
73
|
+
dataset_description: str = None,
|
|
74
|
+
) -> dict:
|
|
75
|
+
"""Load data with dictionary inputs instead of individual arguments."""
|
|
76
|
+
datasets = data[inp["active_datasets_dropdown"]]
|
|
77
|
+
cache = data[state["cache"]]
|
|
78
|
+
split_col = data[inp["split_col_dropdown"]]
|
|
79
|
+
selected_vals = data[inp["split_col_selected_vals_dropdown"]]
|
|
80
|
+
gr.Info(f"Loading data for {datasets}...", duration=3)
|
|
81
|
+
|
|
82
|
+
votes_dfs = {}
|
|
83
|
+
for dataset in datasets:
|
|
84
|
+
dataset_config = data[state["avail_datasets"]][dataset]
|
|
85
|
+
path = dataset_config.path
|
|
86
|
+
# check results dir inside the path
|
|
87
|
+
results_dir = pathlib.Path(path) / "results"
|
|
88
|
+
votes_df: pd.DataFrame = get_votes_df(results_dir, cache=cache)
|
|
89
|
+
|
|
90
|
+
votes_dfs[dataset] = votes_df
|
|
91
|
+
|
|
92
|
+
# fig = feedback_forensics.app.plotting.generate_plot(
|
|
93
|
+
# votes_df,
|
|
94
|
+
# unfiltered_df=unfiltered_df,
|
|
95
|
+
# show_examples=show_individual_prefs,
|
|
96
|
+
# sort_examples_by_agreement=(
|
|
97
|
+
# True if pref_order == "By reconstruction success" else False
|
|
98
|
+
# ),
|
|
99
|
+
# shown_metric_names=metrics,
|
|
100
|
+
# plot_col_name=plot_col_name,
|
|
101
|
+
# )
|
|
102
|
+
|
|
103
|
+
# parsing of potential url params
|
|
104
|
+
if split_col != NONE_SELECTED_VALUE and split_col is not None:
|
|
105
|
+
|
|
106
|
+
if len(votes_dfs) > 1:
|
|
107
|
+
raise gr.Error(
|
|
108
|
+
"Only one votes_df is supported for now when splitting by column"
|
|
109
|
+
)
|
|
110
|
+
if (
|
|
111
|
+
selected_vals is None
|
|
112
|
+
or selected_vals == []
|
|
113
|
+
or set(selected_vals)
|
|
114
|
+
== set(inp["split_col_selected_vals_dropdown"].choices)
|
|
115
|
+
):
|
|
116
|
+
votes_dfs = split_votes_dfs(votes_dfs, split_col)
|
|
117
|
+
else:
|
|
118
|
+
votes_dfs = split_votes_dfs(votes_dfs, split_col, selected_vals)
|
|
119
|
+
|
|
120
|
+
fig = feedback_forensics.app.plotting_v2.generate_plot(
|
|
121
|
+
votes_df_dict=votes_dfs,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
plot = gr.Plot(fig)
|
|
125
|
+
|
|
126
|
+
return {
|
|
127
|
+
out["plot"]: plot,
|
|
128
|
+
state["cache"]: cache,
|
|
129
|
+
out["share_link"]: get_url_with_query_params(
|
|
130
|
+
datasets=datasets,
|
|
131
|
+
col=data[inp["split_col_dropdown"]],
|
|
132
|
+
col_vals=data[inp["split_col_selected_vals_dropdown"]],
|
|
133
|
+
base_url=data[state["app_url"]],
|
|
134
|
+
),
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
def _get_columns_in_dataset(dataset_name, data) -> str:
|
|
138
|
+
dataset_config = data[state["avail_datasets"]][dataset_name]
|
|
139
|
+
avail_cols = get_csv_columns(
|
|
140
|
+
dataset_config.path / "results" / "000_train_data.csv",
|
|
141
|
+
)
|
|
142
|
+
if dataset_config.filterable_columns:
|
|
143
|
+
avail_cols = [
|
|
144
|
+
col for col in avail_cols if col in dataset_config.filterable_columns
|
|
145
|
+
]
|
|
146
|
+
return avail_cols
|
|
147
|
+
|
|
148
|
+
def update_col_split_dropdowns(data: dict):
|
|
149
|
+
"""Update column and split value dropdowns."""
|
|
150
|
+
|
|
151
|
+
datasets = data[inp["active_datasets_dropdown"]]
|
|
152
|
+
|
|
153
|
+
if len(datasets) == 1:
|
|
154
|
+
menus_inactive = False
|
|
155
|
+
else:
|
|
156
|
+
menus_inactive = True
|
|
157
|
+
|
|
158
|
+
if menus_inactive:
|
|
159
|
+
return {
|
|
160
|
+
inp["split_col_dropdown"]: gr.Dropdown(
|
|
161
|
+
choices=[NONE_SELECTED_VALUE],
|
|
162
|
+
value=NONE_SELECTED_VALUE,
|
|
163
|
+
interactive=False,
|
|
164
|
+
visible=False,
|
|
165
|
+
),
|
|
166
|
+
inp["split_col_selected_vals_dropdown"]: gr.Dropdown(
|
|
167
|
+
choices=[],
|
|
168
|
+
value=None,
|
|
169
|
+
interactive=False,
|
|
170
|
+
visible=False,
|
|
171
|
+
),
|
|
172
|
+
inp["split_col_non_available_md"]: gr.Markdown(
|
|
173
|
+
visible=True,
|
|
174
|
+
),
|
|
175
|
+
}
|
|
176
|
+
else:
|
|
177
|
+
split_col = data[inp["split_col_dropdown"]]
|
|
178
|
+
|
|
179
|
+
avail_cols = _get_columns_in_dataset(datasets[0], data)
|
|
180
|
+
|
|
181
|
+
if split_col not in avail_cols:
|
|
182
|
+
split_col = NONE_SELECTED_VALUE
|
|
183
|
+
|
|
184
|
+
tuple_avail_cols = [(col, col) for col in avail_cols]
|
|
185
|
+
|
|
186
|
+
return {
|
|
187
|
+
inp["split_col_dropdown"]: gr.Dropdown(
|
|
188
|
+
choices=[
|
|
189
|
+
(
|
|
190
|
+
"(No grouping applied, click to select column)",
|
|
191
|
+
NONE_SELECTED_VALUE,
|
|
192
|
+
)
|
|
193
|
+
]
|
|
194
|
+
+ tuple_avail_cols,
|
|
195
|
+
value=split_col,
|
|
196
|
+
interactive=True,
|
|
197
|
+
visible=True,
|
|
198
|
+
),
|
|
199
|
+
inp["split_col_selected_vals_dropdown"]: gr.Dropdown(
|
|
200
|
+
choices=[],
|
|
201
|
+
value=None,
|
|
202
|
+
interactive=False,
|
|
203
|
+
visible=False,
|
|
204
|
+
),
|
|
205
|
+
inp["split_col_non_available_md"]: gr.Markdown(
|
|
206
|
+
visible=False,
|
|
207
|
+
),
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
def _get_avail_col_values(col_name, data):
|
|
211
|
+
dataset = data[inp["active_datasets_dropdown"]][0]
|
|
212
|
+
dataset_config = data[state["avail_datasets"]][dataset]
|
|
213
|
+
results_dir = pathlib.Path(dataset_config.path) / "results"
|
|
214
|
+
cache = data[state["cache"]]
|
|
215
|
+
votes_df: pd.DataFrame = get_votes_df(results_dir, cache=cache)
|
|
216
|
+
votes_df = votes_df.groupby("comparison_id").first()
|
|
217
|
+
value_counts = votes_df[col_name].value_counts()
|
|
218
|
+
avail_values = [
|
|
219
|
+
(count, f"{val} ({count})", str(val)) for val, count in value_counts.items()
|
|
220
|
+
]
|
|
221
|
+
# sort by count descending
|
|
222
|
+
avail_values = sorted(avail_values, key=lambda x: x[0], reverse=True)
|
|
223
|
+
# remove count from avail_values
|
|
224
|
+
avail_values = [(val[1], val[2]) for val in avail_values]
|
|
225
|
+
return avail_values
|
|
226
|
+
|
|
227
|
+
def update_col_split_value_dropdown(data: dict):
|
|
228
|
+
"""Update column split value dropdown."""
|
|
229
|
+
split_col = data[inp["split_col_dropdown"]]
|
|
230
|
+
|
|
231
|
+
if split_col != NONE_SELECTED_VALUE:
|
|
232
|
+
avail_values = _get_avail_col_values(split_col, data)
|
|
233
|
+
return {
|
|
234
|
+
inp["split_col_selected_vals_dropdown"]: gr.Dropdown(
|
|
235
|
+
choices=avail_values,
|
|
236
|
+
value=[val[1] for val in avail_values[: min(len(avail_values), 3)]],
|
|
237
|
+
multiselect=True,
|
|
238
|
+
interactive=True,
|
|
239
|
+
visible=True,
|
|
240
|
+
),
|
|
241
|
+
}
|
|
242
|
+
else:
|
|
243
|
+
return {
|
|
244
|
+
inp["split_col_selected_vals_dropdown"]: gr.Dropdown(
|
|
245
|
+
choices=[],
|
|
246
|
+
value=None,
|
|
247
|
+
interactive=False,
|
|
248
|
+
visible=False,
|
|
249
|
+
),
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
def update_dataset_buttons(active_dataset: str) -> dict:
|
|
253
|
+
"""Update dataset button variants based on active dataset."""
|
|
254
|
+
updates = {}
|
|
255
|
+
for name, btn in inp["dataset_btns"].items():
|
|
256
|
+
updates[btn] = gr.Button(
|
|
257
|
+
variant="primary" if name == active_dataset else "secondary"
|
|
258
|
+
)
|
|
259
|
+
return updates
|
|
260
|
+
|
|
261
|
+
def update_advanced_config_and_load_data(data: dict):
|
|
262
|
+
"""Update config with dictionary inputs instead of individual arguments."""
|
|
263
|
+
prior_state_datapath = data[state["datapath"]]
|
|
264
|
+
selected_adv_config = data[inp["simple_config_dropdown"]]
|
|
265
|
+
cache = data[state["cache"]]
|
|
266
|
+
|
|
267
|
+
# get dataset name from button clicked
|
|
268
|
+
# other buttons are not in data dict
|
|
269
|
+
dataset_name = None
|
|
270
|
+
for button in inp["dataset_btns"].values():
|
|
271
|
+
if button in data:
|
|
272
|
+
dataset_name = data[button]
|
|
273
|
+
|
|
274
|
+
if dataset_name is None:
|
|
275
|
+
dataset_name = data[state["active_dataset"]]
|
|
276
|
+
|
|
277
|
+
# load dataset specific setup
|
|
278
|
+
dataset_config: BuiltinDataset = get_dataset_from_name(dataset_name)
|
|
279
|
+
|
|
280
|
+
new_path = True if dataset_config.path != prior_state_datapath else False
|
|
281
|
+
|
|
282
|
+
if not dataset_config.options:
|
|
283
|
+
simple_config_avail = False
|
|
284
|
+
else:
|
|
285
|
+
simple_config_avail = True
|
|
286
|
+
|
|
287
|
+
# load selected advanced config
|
|
288
|
+
if new_path:
|
|
289
|
+
if dataset_config.options:
|
|
290
|
+
selected_adv_config = (
|
|
291
|
+
dataset_config.options[0].name
|
|
292
|
+
if dataset_config.options
|
|
293
|
+
else NONE_SELECTED_VALUE
|
|
294
|
+
)
|
|
295
|
+
else:
|
|
296
|
+
selected_adv_config = NONE_SELECTED_VALUE
|
|
297
|
+
|
|
298
|
+
adv_config: Config = get_config_from_name(
|
|
299
|
+
selected_adv_config, dataset_config.options
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Update button variants
|
|
303
|
+
button_updates = update_dataset_buttons(dataset_name)
|
|
304
|
+
|
|
305
|
+
return {
|
|
306
|
+
**button_updates,
|
|
307
|
+
inp["simple_config_dropdown_placeholder"]: gr.Text(
|
|
308
|
+
visible=not simple_config_avail
|
|
309
|
+
),
|
|
310
|
+
inp["simple_config_dropdown"]: gr.Dropdown(
|
|
311
|
+
choices=(
|
|
312
|
+
[config.name for config in dataset_config.options]
|
|
313
|
+
+ [NONE_SELECTED_VALUE]
|
|
314
|
+
if dataset_config.options
|
|
315
|
+
else [NONE_SELECTED_VALUE]
|
|
316
|
+
),
|
|
317
|
+
value=selected_adv_config,
|
|
318
|
+
interactive=True,
|
|
319
|
+
visible=simple_config_avail,
|
|
320
|
+
),
|
|
321
|
+
state["active_dataset"]: dataset_name, # Update active dataset state
|
|
322
|
+
inp["datapath"]: dataset_config.path,
|
|
323
|
+
state["datapath"]: dataset_config.path,
|
|
324
|
+
state["dataset_name"]: dataset_name,
|
|
325
|
+
**load_data(
|
|
326
|
+
{
|
|
327
|
+
inp["datapath"]: dataset_config.path,
|
|
328
|
+
state["datapath"]: prior_state_datapath,
|
|
329
|
+
inp[
|
|
330
|
+
"show_individual_prefs_dropdown"
|
|
331
|
+
]: adv_config.show_individual_prefs,
|
|
332
|
+
inp["pref_order_dropdown"]: adv_config.pref_order,
|
|
333
|
+
inp["plot_col_name_dropdown"]: adv_config.plot_col_name,
|
|
334
|
+
inp["plot_col_value_dropdown"]: adv_config.plot_col_values,
|
|
335
|
+
inp["filter_col_dropdown"]: adv_config.filter_col,
|
|
336
|
+
inp["filter_value_dropdown"]: adv_config.filter_value,
|
|
337
|
+
inp["filter_col_dropdown_2"]: adv_config.filter_col_2,
|
|
338
|
+
inp["filter_value_dropdown_2"]: adv_config.filter_value_2,
|
|
339
|
+
inp["metrics_dropdown"]: adv_config.metrics,
|
|
340
|
+
state["cache"]: cache,
|
|
341
|
+
},
|
|
342
|
+
reset_filters_if_new=False,
|
|
343
|
+
used_from_button=True,
|
|
344
|
+
filterable_columns=dataset_config.filterable_columns,
|
|
345
|
+
dataset_name=dataset_config.name,
|
|
346
|
+
dataset_description=dataset_config.description,
|
|
347
|
+
),
|
|
348
|
+
inp["filter_value_dropdown"]: gr.Dropdown(
|
|
349
|
+
choices=[adv_config.filter_value],
|
|
350
|
+
value=adv_config.filter_value,
|
|
351
|
+
interactive=True,
|
|
352
|
+
),
|
|
353
|
+
inp["filter_value_dropdown_2"]: gr.Dropdown(
|
|
354
|
+
choices=[adv_config.filter_value_2],
|
|
355
|
+
value=adv_config.filter_value_2,
|
|
356
|
+
interactive=True,
|
|
357
|
+
),
|
|
358
|
+
inp["show_individual_prefs_dropdown"]: gr.Dropdown(
|
|
359
|
+
value=adv_config.show_individual_prefs,
|
|
360
|
+
interactive=True,
|
|
361
|
+
),
|
|
362
|
+
inp["pref_order_dropdown"]: gr.Dropdown(
|
|
363
|
+
value=adv_config.pref_order,
|
|
364
|
+
interactive=True,
|
|
365
|
+
),
|
|
366
|
+
inp["metrics_dropdown"]: gr.Dropdown(
|
|
367
|
+
value=adv_config.metrics,
|
|
368
|
+
interactive=True,
|
|
369
|
+
),
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
def set_filter_val_dropdown(data: dict):
|
|
373
|
+
"""Set filter values with dictionary inputs."""
|
|
374
|
+
votes_df = data.pop(state["unfiltered_df"])
|
|
375
|
+
column = data.popitem()[1]
|
|
376
|
+
|
|
377
|
+
if NONE_SELECTED_VALUE in votes_df.columns:
|
|
378
|
+
raise gr.Error(
|
|
379
|
+
f"Column '{NONE_SELECTED_VALUE}' is in the "
|
|
380
|
+
"dataframe. This is currently not "
|
|
381
|
+
"supported."
|
|
382
|
+
)
|
|
383
|
+
if column == NONE_SELECTED_VALUE:
|
|
384
|
+
return gr.Dropdown(
|
|
385
|
+
choices=[NONE_SELECTED_VALUE],
|
|
386
|
+
value=NONE_SELECTED_VALUE,
|
|
387
|
+
interactive=True,
|
|
388
|
+
)
|
|
389
|
+
else:
|
|
390
|
+
avail_values = votes_df[column].unique().tolist()
|
|
391
|
+
return gr.Dropdown(
|
|
392
|
+
choices=[NONE_SELECTED_VALUE] + avail_values,
|
|
393
|
+
value=NONE_SELECTED_VALUE,
|
|
394
|
+
interactive=True,
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
def load_from_query_params(data: dict, request: gr.Request):
|
|
398
|
+
"""Load data from query params."""
|
|
399
|
+
config = get_config_from_query_params(request)
|
|
400
|
+
app_url = request.headers["origin"]
|
|
401
|
+
return_dict = {
|
|
402
|
+
state["app_url"]: app_url,
|
|
403
|
+
}
|
|
404
|
+
data[state["app_url"]] = app_url
|
|
405
|
+
if "datasets" in config:
|
|
406
|
+
data[inp["active_datasets_dropdown"]] = config["datasets"]
|
|
407
|
+
return_dict[inp["active_datasets_dropdown"]] = gr.Dropdown(
|
|
408
|
+
value=config["datasets"],
|
|
409
|
+
)
|
|
410
|
+
if "col" not in config:
|
|
411
|
+
# update split col dropdowns even if no column is selected
|
|
412
|
+
split_col_interface_dict = update_col_split_dropdowns(data)
|
|
413
|
+
return_dict = {
|
|
414
|
+
**return_dict,
|
|
415
|
+
**split_col_interface_dict,
|
|
416
|
+
}
|
|
417
|
+
return_dict = {
|
|
418
|
+
**return_dict,
|
|
419
|
+
**update_col_split_value_dropdown(data),
|
|
420
|
+
}
|
|
421
|
+
else:
|
|
422
|
+
# parse out column and value params from url
|
|
423
|
+
if "datasets" in config and len(config["datasets"]) > 1:
|
|
424
|
+
gr.Warning(
|
|
425
|
+
f"URL problem: only one dataset is supported when splitting by column. Requested {len(config['datasets'])} datasets in URL ({config['datasets']}), and requested splitting by column {config['col']}.",
|
|
426
|
+
duration=15,
|
|
427
|
+
)
|
|
428
|
+
split_col = None
|
|
429
|
+
else:
|
|
430
|
+
url_split_col = config["col"]
|
|
431
|
+
|
|
432
|
+
# adapt split col to match available columns in dataset
|
|
433
|
+
avail_cols = _get_columns_in_dataset(config["datasets"][0], data)
|
|
434
|
+
split_col = get_list_member_from_url_string(
|
|
435
|
+
url_string=url_split_col, list_members=avail_cols
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
if split_col is None:
|
|
439
|
+
gr.Warning(
|
|
440
|
+
f"URL problem: column '{url_split_col}' not found in dataset '{config['datasets'][0]}' (available columns: {avail_cols}).",
|
|
441
|
+
duration=15,
|
|
442
|
+
)
|
|
443
|
+
data[inp["split_col_dropdown"]] = NONE_SELECTED_VALUE
|
|
444
|
+
else:
|
|
445
|
+
data[inp["split_col_dropdown"]] = split_col
|
|
446
|
+
|
|
447
|
+
split_col_interface_dict = update_col_split_dropdowns(data)
|
|
448
|
+
return_dict = {
|
|
449
|
+
**return_dict,
|
|
450
|
+
**split_col_interface_dict,
|
|
451
|
+
}
|
|
452
|
+
return_dict = {
|
|
453
|
+
**return_dict,
|
|
454
|
+
**update_col_split_value_dropdown(data),
|
|
455
|
+
}
|
|
456
|
+
if (
|
|
457
|
+
"col_vals" in config
|
|
458
|
+
and split_col is not None
|
|
459
|
+
and split_col != NONE_SELECTED_VALUE
|
|
460
|
+
):
|
|
461
|
+
avail_values = _get_avail_col_values(split_col, data)
|
|
462
|
+
init_selected_vals = config["col_vals"]
|
|
463
|
+
selected_vals = transfer_url_list_to_nonurl_list(
|
|
464
|
+
url_list=init_selected_vals,
|
|
465
|
+
nonurl_list=[val[1] for val in avail_values],
|
|
466
|
+
)
|
|
467
|
+
if len(selected_vals) != len(init_selected_vals):
|
|
468
|
+
gr.Warning(
|
|
469
|
+
f"URL problem: not all values for column {split_col} in URL ({init_selected_vals}) could be read succesfully. Requested values: {init_selected_vals}, retrieved values: {selected_vals}.",
|
|
470
|
+
duration=15,
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
data[inp["split_col_selected_vals_dropdown"]] = selected_vals
|
|
474
|
+
return_dict[inp["split_col_selected_vals_dropdown"]] = gr.Dropdown(
|
|
475
|
+
choices=avail_values,
|
|
476
|
+
value=selected_vals,
|
|
477
|
+
interactive=True,
|
|
478
|
+
visible=True,
|
|
479
|
+
)
|
|
480
|
+
return_dict = {**return_dict, **load_data(data)}
|
|
481
|
+
return return_dict
|
|
482
|
+
|
|
483
|
+
return {
|
|
484
|
+
"load_data": load_data,
|
|
485
|
+
"load_from_query_params": load_from_query_params,
|
|
486
|
+
"set_filter_val_dropdown": set_filter_val_dropdown,
|
|
487
|
+
"update_advanced_config_and_load_data": update_advanced_config_and_load_data,
|
|
488
|
+
"update_col_split_dropdowns": update_col_split_dropdowns,
|
|
489
|
+
"update_col_split_value_dropdown": update_col_split_value_dropdown,
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
def create_dataset_info(
|
|
494
|
+
unfiltered_df: pd.DataFrame,
|
|
495
|
+
filtered_df: pd.DataFrame,
|
|
496
|
+
dataset_name: str | None = None,
|
|
497
|
+
dataset_path: str | None = None,
|
|
498
|
+
dataset_description: str | None = None,
|
|
499
|
+
) -> str:
|
|
500
|
+
"""Create dataset info markdown string.
|
|
501
|
+
|
|
502
|
+
Args:
|
|
503
|
+
df: DataFrame containing the dataset
|
|
504
|
+
dataset_name: Name of the dataset
|
|
505
|
+
dataset_description: Description of the dataset
|
|
506
|
+
|
|
507
|
+
Returns:
|
|
508
|
+
str: Markdown formatted dataset info
|
|
509
|
+
"""
|
|
510
|
+
if unfiltered_df.empty:
|
|
511
|
+
return "*No dataset loaded*"
|
|
512
|
+
|
|
513
|
+
if dataset_name is None:
|
|
514
|
+
dataset_name = "N/A"
|
|
515
|
+
if dataset_description is None:
|
|
516
|
+
dataset_description = "N/A"
|
|
517
|
+
if dataset_path is None:
|
|
518
|
+
dataset_path = "N/A"
|
|
519
|
+
|
|
520
|
+
metrics = {}
|
|
521
|
+
|
|
522
|
+
for name, df in [("Unfiltered", unfiltered_df), ("Filtered", filtered_df)]:
|
|
523
|
+
metrics[name] = {}
|
|
524
|
+
metrics[name]["num_comparisons"] = df["comparison_id"].nunique()
|
|
525
|
+
metrics[name]["num_principles"] = df["principle"].nunique()
|
|
526
|
+
metrics[name]["num_total_votes"] = len(df)
|
|
527
|
+
|
|
528
|
+
info = f"""
|
|
529
|
+
**Name**: {dataset_name}
|
|
530
|
+
|
|
531
|
+
**Path**: {dataset_path}
|
|
532
|
+
|
|
533
|
+
**Description**: {dataset_description}
|
|
534
|
+
|
|
535
|
+
**Metrics:**
|
|
536
|
+
- *Total pairwise comparisons*: {metrics["Unfiltered"]["num_comparisons"]:,} (shown: {metrics["Filtered"]["num_comparisons"]:,})
|
|
537
|
+
- *Total tested principles*: {metrics["Unfiltered"]["num_principles"]:,} (shown: {metrics["Filtered"]["num_principles"]:,})
|
|
538
|
+
- *Total votes (comparisons x principles)*: {metrics["Unfiltered"]["num_total_votes"]:,} (shown: {metrics["Filtered"]["num_total_votes"]:,})
|
|
539
|
+
"""
|
|
540
|
+
|
|
541
|
+
return info
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
def attach_callbacks(
|
|
545
|
+
inp: dict, state: dict, out: dict, callbacks: dict, demo: gr.Blocks
|
|
546
|
+
) -> None:
|
|
547
|
+
"""Attach callbacks using dictionary inputs."""
|
|
548
|
+
|
|
549
|
+
all_inputs = {
|
|
550
|
+
inp["active_datasets_dropdown"],
|
|
551
|
+
state["avail_datasets"],
|
|
552
|
+
inp["split_col_dropdown"],
|
|
553
|
+
inp["split_col_selected_vals_dropdown"],
|
|
554
|
+
inp["datapath"],
|
|
555
|
+
state["datapath"],
|
|
556
|
+
state["dataset_name"],
|
|
557
|
+
state["active_dataset"],
|
|
558
|
+
state["app_url"],
|
|
559
|
+
inp["show_individual_prefs_dropdown"],
|
|
560
|
+
inp["pref_order_dropdown"],
|
|
561
|
+
inp["plot_col_name_dropdown"],
|
|
562
|
+
inp["plot_col_value_dropdown"],
|
|
563
|
+
inp["filter_col_dropdown"],
|
|
564
|
+
inp["filter_value_dropdown"],
|
|
565
|
+
inp["filter_col_dropdown_2"],
|
|
566
|
+
inp["filter_value_dropdown_2"],
|
|
567
|
+
inp["metrics_dropdown"],
|
|
568
|
+
inp["simple_config_dropdown"],
|
|
569
|
+
state["cache"],
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
dataset_selection_outputs = [
|
|
573
|
+
inp["split_col_dropdown"],
|
|
574
|
+
inp["split_col_selected_vals_dropdown"],
|
|
575
|
+
inp["split_col_non_available_md"],
|
|
576
|
+
inp["load_btn"],
|
|
577
|
+
]
|
|
578
|
+
|
|
579
|
+
load_data_outputs = [
|
|
580
|
+
inp["split_col_dropdown"],
|
|
581
|
+
inp["split_col_selected_vals_dropdown"],
|
|
582
|
+
inp["split_col_non_available_md"],
|
|
583
|
+
inp["plot_col_name_dropdown"],
|
|
584
|
+
inp["plot_col_value_dropdown"],
|
|
585
|
+
inp["filter_col_dropdown"],
|
|
586
|
+
inp["filter_col_dropdown_2"],
|
|
587
|
+
out["share_link"],
|
|
588
|
+
out["plot"],
|
|
589
|
+
state["df"],
|
|
590
|
+
state["unfiltered_df"],
|
|
591
|
+
state["datapath"],
|
|
592
|
+
state["active_dataset"],
|
|
593
|
+
state["dataset_name"],
|
|
594
|
+
state["cache"],
|
|
595
|
+
inp["datapath"],
|
|
596
|
+
inp["dataset_info"],
|
|
597
|
+
inp["load_btn"],
|
|
598
|
+
] + list(inp["dataset_btns"].values())
|
|
599
|
+
|
|
600
|
+
# reload data when load button is clicked or view config is changed
|
|
601
|
+
inp["load_btn"].click(
|
|
602
|
+
callbacks["load_data"],
|
|
603
|
+
inputs=all_inputs,
|
|
604
|
+
outputs=load_data_outputs,
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
for config_value_dropdown in [
|
|
608
|
+
inp["pref_order_dropdown"],
|
|
609
|
+
inp["show_individual_prefs_dropdown"],
|
|
610
|
+
inp["plot_col_value_dropdown"],
|
|
611
|
+
inp["filter_value_dropdown"],
|
|
612
|
+
inp["filter_value_dropdown_2"],
|
|
613
|
+
inp["metrics_dropdown"],
|
|
614
|
+
]:
|
|
615
|
+
config_value_dropdown.input(
|
|
616
|
+
callbacks["load_data"],
|
|
617
|
+
inputs=all_inputs,
|
|
618
|
+
outputs=load_data_outputs,
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
update_load_data_outputs = (
|
|
622
|
+
load_data_outputs
|
|
623
|
+
+ [
|
|
624
|
+
inp["simple_config_dropdown"],
|
|
625
|
+
inp["simple_config_dropdown_placeholder"],
|
|
626
|
+
inp["plot_col_value_dropdown"],
|
|
627
|
+
inp["filter_value_dropdown"],
|
|
628
|
+
inp["filter_value_dropdown_2"],
|
|
629
|
+
inp["show_individual_prefs_dropdown"],
|
|
630
|
+
inp["pref_order_dropdown"],
|
|
631
|
+
inp["metrics_dropdown"],
|
|
632
|
+
state["active_dataset"], # Add active dataset state
|
|
633
|
+
]
|
|
634
|
+
+ list(inp["dataset_btns"].values())
|
|
635
|
+
) # Add all dataset buttons as outputs
|
|
636
|
+
|
|
637
|
+
inp["active_datasets_dropdown"].input(
|
|
638
|
+
callbacks["update_col_split_dropdowns"],
|
|
639
|
+
inputs=all_inputs,
|
|
640
|
+
outputs=dataset_selection_outputs,
|
|
641
|
+
)
|
|
642
|
+
inp["split_col_dropdown"].input(
|
|
643
|
+
callbacks["update_col_split_value_dropdown"],
|
|
644
|
+
inputs=all_inputs,
|
|
645
|
+
outputs=dataset_selection_outputs,
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
# TODO: remove old dataset selection panel (including from callbacks etc.)
|
|
649
|
+
for dataset_button in inp["dataset_btns"].values():
|
|
650
|
+
dataset_button.click(
|
|
651
|
+
callbacks["update_advanced_config_and_load_data"],
|
|
652
|
+
inputs=all_inputs.union({dataset_button}),
|
|
653
|
+
outputs=update_load_data_outputs,
|
|
654
|
+
)
|
|
655
|
+
|
|
656
|
+
inp["simple_config_dropdown"].input(
|
|
657
|
+
callbacks["update_advanced_config_and_load_data"],
|
|
658
|
+
inputs=all_inputs,
|
|
659
|
+
outputs=update_load_data_outputs,
|
|
660
|
+
)
|
|
661
|
+
|
|
662
|
+
# update filter value dropdowns when
|
|
663
|
+
# corresponding filter column dropdown is changed
|
|
664
|
+
for dropdown, output in [
|
|
665
|
+
(inp["plot_col_name_dropdown"], inp["plot_col_value_dropdown"]),
|
|
666
|
+
(inp["filter_col_dropdown"], inp["filter_value_dropdown"]),
|
|
667
|
+
(inp["filter_col_dropdown_2"], inp["filter_value_dropdown_2"]),
|
|
668
|
+
]:
|
|
669
|
+
dropdown.input(
|
|
670
|
+
callbacks["set_filter_val_dropdown"],
|
|
671
|
+
inputs={state["unfiltered_df"], dropdown},
|
|
672
|
+
outputs=[output],
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
# finally add callbacks that run on start of app
|
|
676
|
+
demo.load(
|
|
677
|
+
callbacks["load_from_query_params"],
|
|
678
|
+
inputs=all_inputs,
|
|
679
|
+
outputs=load_data_outputs
|
|
680
|
+
+ [inp["active_datasets_dropdown"]]
|
|
681
|
+
+ [state["app_url"]],
|
|
682
|
+
trigger_mode="always_last",
|
|
683
|
+
)
|