scanpath-studio 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scanpath_studio/__init__.py +13 -0
- scanpath_studio/__main__.py +18 -0
- scanpath_studio/annotations.py +320 -0
- scanpath_studio/app.py +752 -0
- scanpath_studio/constants.py +83 -0
- scanpath_studio/controls.py +467 -0
- scanpath_studio/data.py +961 -0
- scanpath_studio/export.py +510 -0
- scanpath_studio/measures.py +505 -0
- scanpath_studio/onestop_shard.py +139 -0
- scanpath_studio/plots.py +2186 -0
- scanpath_studio/sample_data/fixations.csv +3210 -0
- scanpath_studio/sample_data/fixations.parquet +0 -0
- scanpath_studio/sample_data/ia.csv +3923 -0
- scanpath_studio/sample_data/ia.parquet +0 -0
- scanpath_studio/sample_data/raw_gaze.csv +2234 -0
- scanpath_studio/sample_data/raw_gaze.parquet +0 -0
- scanpath_studio/styles.py +72 -0
- scanpath_studio/synthetic.py +133 -0
- scanpath_studio/tabs.py +1916 -0
- scanpath_studio/update_sample_data.py +500 -0
- scanpath_studio/utils.py +556 -0
- scanpath_studio-0.14.0.dist-info/METADATA +229 -0
- scanpath_studio-0.14.0.dist-info/RECORD +28 -0
- scanpath_studio-0.14.0.dist-info/WHEEL +5 -0
- scanpath_studio-0.14.0.dist-info/entry_points.txt +2 -0
- scanpath_studio-0.14.0.dist-info/licenses/LICENSE +21 -0
- scanpath_studio-0.14.0.dist-info/top_level.txt +1 -0
scanpath_studio/app.py
ADDED
|
@@ -0,0 +1,752 @@
|
|
|
1
|
+
"""Scanpath Studio Streamlit app.
|
|
2
|
+
|
|
3
|
+
This is the main entry point for the Streamlit application that visualizes
|
|
4
|
+
eye-tracking scanpaths over text.
|
|
5
|
+
|
|
6
|
+
Architecture:
|
|
7
|
+
- Entry point: main() function configures Streamlit and orchestrates the UI
|
|
8
|
+
- Data flow: CSV upload → schema inference → normalization → filtering → plotting
|
|
9
|
+
- UI structure: Sidebar controls + 4 tabbed views (Interactive, Animation, Raw Data, Stats)
|
|
10
|
+
|
|
11
|
+
Data Pipeline:
|
|
12
|
+
1. Load raw CSVs (words + fixations + optional raw gaze)
|
|
13
|
+
2. Infer schema via candidate column matching
|
|
14
|
+
3. Normalize to canonical column names
|
|
15
|
+
4. Apply participant/trial/paragraph filters
|
|
16
|
+
5. Build trial combinations for selection
|
|
17
|
+
6. Render visualizations with user-controlled settings
|
|
18
|
+
|
|
19
|
+
Usage:
|
|
20
|
+
# Development mode (watch for changes):
|
|
21
|
+
$ streamlit run scanpath_studio/app.py
|
|
22
|
+
|
|
23
|
+
# Package mode:
|
|
24
|
+
$ python -m scanpath_studio
|
|
25
|
+
# or
|
|
26
|
+
$ scanpath-studio
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
from typing import Optional, Tuple
|
|
32
|
+
|
|
33
|
+
import pandas as pd
|
|
34
|
+
import streamlit as st
|
|
35
|
+
|
|
36
|
+
# Allow running via `streamlit run scanpath_studio/app.py` by adding the
|
|
37
|
+
# repository root to sys.path when executed as a script instead of a package.
|
|
38
|
+
if __package__ is None or __package__ == "":
|
|
39
|
+
import sys
|
|
40
|
+
from pathlib import Path
|
|
41
|
+
|
|
42
|
+
root = Path(__file__).resolve().parent.parent
|
|
43
|
+
if str(root) not in sys.path:
|
|
44
|
+
sys.path.insert(0, str(root))
|
|
45
|
+
|
|
46
|
+
from scanpath_studio.annotations import (
|
|
47
|
+
filter_keys,
|
|
48
|
+
render_annotations_sidebar,
|
|
49
|
+
)
|
|
50
|
+
from scanpath_studio.constants import DEFAULT_LINE_SPACING, FONT_FAMILY
|
|
51
|
+
from scanpath_studio.controls import (
|
|
52
|
+
FIX_FIELD_SPECS,
|
|
53
|
+
RAW_GAZE_FIELD_SPECS,
|
|
54
|
+
WORD_FIELD_SPECS,
|
|
55
|
+
column_mapping_ui,
|
|
56
|
+
data_dictionary_help_text,
|
|
57
|
+
sidebar_controls,
|
|
58
|
+
sidebar_trial_filters,
|
|
59
|
+
)
|
|
60
|
+
from scanpath_studio.data import (
|
|
61
|
+
compute_canvas_size,
|
|
62
|
+
default_filters,
|
|
63
|
+
filter_data,
|
|
64
|
+
filter_raw_gaze,
|
|
65
|
+
filter_to_keys,
|
|
66
|
+
filter_trials,
|
|
67
|
+
infer_fix_schema,
|
|
68
|
+
infer_raw_gaze_schema,
|
|
69
|
+
infer_word_schema,
|
|
70
|
+
load_onestop_server_bundle,
|
|
71
|
+
load_sample_data,
|
|
72
|
+
load_sample_raw_gaze,
|
|
73
|
+
normalize_fixations,
|
|
74
|
+
normalize_raw_gaze,
|
|
75
|
+
normalize_words,
|
|
76
|
+
onestop_data_dir,
|
|
77
|
+
propose_fix_schema,
|
|
78
|
+
propose_raw_gaze_schema,
|
|
79
|
+
propose_word_schema,
|
|
80
|
+
read_table,
|
|
81
|
+
validate_fix_schema,
|
|
82
|
+
validate_raw_gaze_schema,
|
|
83
|
+
validate_word_schema,
|
|
84
|
+
)
|
|
85
|
+
from scanpath_studio.styles import get_app_css
|
|
86
|
+
from scanpath_studio.tabs import (
|
|
87
|
+
render_animation_tab,
|
|
88
|
+
render_data_statistics_tab,
|
|
89
|
+
render_raw_data_tab,
|
|
90
|
+
render_single_trial_tab,
|
|
91
|
+
)
|
|
92
|
+
from scanpath_studio.utils import ( # noqa: F401
|
|
93
|
+
build_combo_options,
|
|
94
|
+
build_comparison_options as _build_comparison_options,
|
|
95
|
+
compute_trial_stats,
|
|
96
|
+
friendly_trial_label as _friendly_trial_label,
|
|
97
|
+
gather_trial_metadata,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
UPLOAD_CHOICE = "Upload tables"
|
|
101
|
+
DEMO_CHOICE = "Use bundled demo"
|
|
102
|
+
# A tiny, fully-specified synthetic trial (scanpath_studio.synthetic)
|
|
103
|
+
# with known ground-truth reading measures — handy for sanity-checking the viz
|
|
104
|
+
# against documented expected values.
|
|
105
|
+
SYNTHETIC_CHOICE = "Synthetic test trial"
|
|
106
|
+
# Server-side OneStop lacclab bundle. Only offered when $ONESTOP_DATA_DIR is
|
|
107
|
+
# set; selected automatically when the page is opened with `?source=onestop`
|
|
108
|
+
# in the URL. See data.load_onestop_server_bundle().
|
|
109
|
+
ONESTOP_CHOICE = "OneStop server bundle"
|
|
110
|
+
|
|
111
|
+
# URL query-param → session_state key map for the deep-link API. Used by
|
|
112
|
+
# `_apply_url_preset()` to preset widgets when the page is opened from an
|
|
113
|
+
# external tool with a deep link.
|
|
114
|
+
#
|
|
115
|
+
# Selection prefixes — every selectable tab (Interactive Plot, Animated
|
|
116
|
+
# Scanpath, …) renders its own `select_trial` with a different `key_prefix`,
|
|
117
|
+
# so a URL deep link has to seed all of them or only the first tab lands on
|
|
118
|
+
# the requested trial. Keep this list in sync with the `key_prefix=` values
|
|
119
|
+
# passed to `select_trial` in tabs.py.
|
|
120
|
+
_SELECTION_PREFIXES = ("single", "anim")
|
|
121
|
+
_URL_PRESETS = {
|
|
122
|
+
# viz prefs (`controls.sidebar_controls`)
|
|
123
|
+
"show_order": ("global_show_order", lambda v: v not in {"0", "false", "no"}),
|
|
124
|
+
"hide_fixation_numbers": ("global_show_order", lambda v: v in {"0", "false", "no"}),
|
|
125
|
+
"show_saccades": ("global_show_saccades", lambda v: v not in {"0", "false", "no"}),
|
|
126
|
+
"show_heatmap": ("global_show_heatmap", lambda v: v not in {"0", "false", "no"}),
|
|
127
|
+
"show_words": ("global_show_words", lambda v: v not in {"0", "false", "no"}),
|
|
128
|
+
"show_labels": ("global_show_labels", lambda v: v not in {"0", "false", "no"}),
|
|
129
|
+
"show_fixations": ("global_show_fix", lambda v: v not in {"0", "false", "no"}),
|
|
130
|
+
"heatmap_colorscale": ("global_heatmap_colorscale", str),
|
|
131
|
+
"fixation_colorscale": ("global_fixation_colorscale", str),
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _apply_url_preset() -> Optional[str]:
|
|
136
|
+
"""Read `st.query_params` and preset Streamlit session state for deep links.
|
|
137
|
+
|
|
138
|
+
Returns the URL-requested `source` ("onestop"/"demo"/"upload") or `None`.
|
|
139
|
+
Call this at the very top of `main()` — before any widgets render — so
|
|
140
|
+
session_state values are picked up as the widgets' initial values.
|
|
141
|
+
|
|
142
|
+
URL schema (all params optional):
|
|
143
|
+
?source=onestop → force "OneStop server bundle" data source
|
|
144
|
+
&participant=p001 → preselect participant (Participant mode)
|
|
145
|
+
&trial=37 → preselect trial_index slider
|
|
146
|
+
&tab=animation → land on Animated Scanpath tab
|
|
147
|
+
&heatmap_colorscale=Greens
|
|
148
|
+
&hide_fixation_numbers=1
|
|
149
|
+
&show_saccades=1
|
|
150
|
+
&show_heatmap=1
|
|
151
|
+
...etc — see _URL_PRESETS above
|
|
152
|
+
|
|
153
|
+
Bonus side-effect: when any colorscale is set via URL, also forces the
|
|
154
|
+
"Advanced styling" sidebar expander open so the value is visible/editable.
|
|
155
|
+
|
|
156
|
+
External tools can deep-link into this app via the URL schema above to
|
|
157
|
+
land on a specific trial with the reviewer's preferred viz settings.
|
|
158
|
+
"""
|
|
159
|
+
qp = st.query_params
|
|
160
|
+
if not qp:
|
|
161
|
+
return None
|
|
162
|
+
|
|
163
|
+
# Seed selection state for every tab that exposes a `select_trial` widget.
|
|
164
|
+
# `?participant=` + `?trial=` map onto Participant mode with the matching
|
|
165
|
+
# participant / slider value. Without this loop the Animated Scanpath tab
|
|
166
|
+
# (key_prefix="anim") would default to "Trial" mode and land on the
|
|
167
|
+
# alphabetically-first trial instead of the deep-linked one.
|
|
168
|
+
if "participant" in qp or "trial" in qp:
|
|
169
|
+
for prefix in _SELECTION_PREFIXES:
|
|
170
|
+
st.session_state.setdefault(f"{prefix}_select_trial_mode", "Participant")
|
|
171
|
+
if "participant" in qp:
|
|
172
|
+
st.session_state.setdefault(
|
|
173
|
+
f"{prefix}_participant", str(qp["participant"])
|
|
174
|
+
)
|
|
175
|
+
if "trial" in qp:
|
|
176
|
+
try:
|
|
177
|
+
st.session_state.setdefault(f"{prefix}_slider", int(qp["trial"]))
|
|
178
|
+
except (ValueError, TypeError):
|
|
179
|
+
st.warning(f"Ignored bad URL param ?trial={qp['trial']!r}")
|
|
180
|
+
|
|
181
|
+
for url_key, (state_key, coerce) in _URL_PRESETS.items():
|
|
182
|
+
if url_key not in qp:
|
|
183
|
+
continue
|
|
184
|
+
raw = qp[url_key]
|
|
185
|
+
try:
|
|
186
|
+
value = coerce(raw)
|
|
187
|
+
except (ValueError, TypeError):
|
|
188
|
+
st.warning(f"Ignored bad URL param ?{url_key}={raw!r}")
|
|
189
|
+
continue
|
|
190
|
+
st.session_state.setdefault(state_key, value)
|
|
191
|
+
|
|
192
|
+
# Heatmap / fixation colorscale only render under the Advanced expander —
|
|
193
|
+
# auto-open it so the URL value is exposed in the sidebar.
|
|
194
|
+
if "heatmap_colorscale" in qp or "fixation_colorscale" in qp:
|
|
195
|
+
st.session_state.setdefault("global_advanced", True)
|
|
196
|
+
|
|
197
|
+
source = qp.get("source")
|
|
198
|
+
return source.lower() if source else None
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def configure_page() -> None:
|
|
202
|
+
"""Streamlit page config + custom CSS.
|
|
203
|
+
|
|
204
|
+
When loaded from an iframe with `?embed=true`, Streamlit's built-in embed
|
|
205
|
+
mode already hides the header/menu — we additionally collapse the sidebar
|
|
206
|
+
so the iframe is mostly the plot.
|
|
207
|
+
"""
|
|
208
|
+
is_embed = (st.query_params.get("embed") or "").lower() in {"true", "1"}
|
|
209
|
+
st.set_page_config(
|
|
210
|
+
page_title="Scanpath Studio",
|
|
211
|
+
page_icon="👀",
|
|
212
|
+
layout="wide",
|
|
213
|
+
initial_sidebar_state="collapsed" if is_embed else "auto",
|
|
214
|
+
)
|
|
215
|
+
st.markdown(get_app_css(), unsafe_allow_html=True)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _render_about_panel() -> None:
|
|
219
|
+
"""Compact header with title + Lab/Code pill links."""
|
|
220
|
+
from scanpath_studio.constants import CITATION
|
|
221
|
+
|
|
222
|
+
title_col, links_col = st.columns([5, 2])
|
|
223
|
+
with title_col:
|
|
224
|
+
st.title("Scanpath Studio")
|
|
225
|
+
st.caption("Interactive exploration of eye movements in reading.")
|
|
226
|
+
with links_col:
|
|
227
|
+
st.markdown(
|
|
228
|
+
f"""<div class="header-link-row">
|
|
229
|
+
<a class="header-link lab" href="https://lacclab.github.io/" target="_blank" rel="noopener">🧪 LaCC Lab</a>
|
|
230
|
+
<a class="header-link code" href="{CITATION["url"]}" target="_blank" rel="noopener">💻 Code</a>
|
|
231
|
+
</div>""",
|
|
232
|
+
unsafe_allow_html=True,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
# -----------------------------------------------------------------------------
|
|
237
|
+
# Data loading
|
|
238
|
+
# -----------------------------------------------------------------------------
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def load_words_and_fixations(
|
|
242
|
+
data_choice: str,
|
|
243
|
+
participant: Optional[str] = None,
|
|
244
|
+
) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
245
|
+
"""Load word and fixation data from user uploads or bundled demo files.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
data_choice: Either "Upload csv tables" / "Use bundled demo" / "OneStop server bundle"
|
|
249
|
+
participant: Lowercased participant_id from the URL deep link. When set
|
|
250
|
+
AND `data_choice == ONESTOP_CHOICE`, the OneStop loader fast-paths
|
|
251
|
+
to just that pid's Parquet shard — sub-second instead of ~3 min.
|
|
252
|
+
Ignored for the other data sources.
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
Tuple of (words_df, fixations_df) as raw DataFrames before normalization
|
|
256
|
+
|
|
257
|
+
UI Effects:
|
|
258
|
+
- Renders file uploaders in sidebar when data_choice is "Upload csv tables"
|
|
259
|
+
- Shows info message if uploads are incomplete
|
|
260
|
+
- Falls back to sample data if uploads missing
|
|
261
|
+
"""
|
|
262
|
+
if data_choice == SYNTHETIC_CHOICE:
|
|
263
|
+
from scanpath_studio.synthetic import load_synthetic_data
|
|
264
|
+
|
|
265
|
+
return load_synthetic_data()
|
|
266
|
+
if data_choice == UPLOAD_CHOICE:
|
|
267
|
+
uploaded_words = st.sidebar.file_uploader(
|
|
268
|
+
"Words/IA table", type=["csv", "parquet", "feather"]
|
|
269
|
+
)
|
|
270
|
+
uploaded_fixations = st.sidebar.file_uploader(
|
|
271
|
+
"Fixations table", type=["csv", "parquet", "feather"]
|
|
272
|
+
)
|
|
273
|
+
if uploaded_words and uploaded_fixations:
|
|
274
|
+
return read_table(uploaded_words), read_table(uploaded_fixations)
|
|
275
|
+
st.sidebar.info("Upload both files or switch to demo data.")
|
|
276
|
+
return load_sample_data()
|
|
277
|
+
if data_choice == ONESTOP_CHOICE:
|
|
278
|
+
words, fixations = load_onestop_server_bundle(participant=participant)
|
|
279
|
+
if words.empty or fixations.empty:
|
|
280
|
+
st.sidebar.warning(
|
|
281
|
+
"OneStop bundle unavailable — falling back to demo data."
|
|
282
|
+
)
|
|
283
|
+
return load_sample_data()
|
|
284
|
+
return words, fixations
|
|
285
|
+
return load_sample_data()
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def prepare_data(
|
|
289
|
+
words_df: pd.DataFrame,
|
|
290
|
+
fixations_df: pd.DataFrame,
|
|
291
|
+
allow_override: bool,
|
|
292
|
+
) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
293
|
+
"""Infer schemas and normalize incoming dataframes to canonical column names.
|
|
294
|
+
|
|
295
|
+
When ``allow_override`` is True, render sidebar expanders that let the user
|
|
296
|
+
pick the exact column names for each field (pre-filled with auto-detection).
|
|
297
|
+
Otherwise fall back to the original infer-then-stop flow used for demo data.
|
|
298
|
+
"""
|
|
299
|
+
if allow_override:
|
|
300
|
+
word_proposed = propose_word_schema(words_df)
|
|
301
|
+
word_problems = validate_word_schema(word_proposed)
|
|
302
|
+
word_schema = column_mapping_ui(
|
|
303
|
+
words_df,
|
|
304
|
+
table_label="Words/IA",
|
|
305
|
+
state_key_prefix="col_map_words",
|
|
306
|
+
field_specs=WORD_FIELD_SPECS,
|
|
307
|
+
proposed=word_proposed,
|
|
308
|
+
problems=word_problems,
|
|
309
|
+
)
|
|
310
|
+
word_problems = validate_word_schema(word_schema)
|
|
311
|
+
if word_problems:
|
|
312
|
+
st.sidebar.error("Words/IA: " + "; ".join(word_problems))
|
|
313
|
+
st.stop()
|
|
314
|
+
|
|
315
|
+
fix_proposed = propose_fix_schema(fixations_df)
|
|
316
|
+
fix_problems = validate_fix_schema(fix_proposed)
|
|
317
|
+
fix_schema = column_mapping_ui(
|
|
318
|
+
fixations_df,
|
|
319
|
+
table_label="Fixations",
|
|
320
|
+
state_key_prefix="col_map_fix",
|
|
321
|
+
field_specs=FIX_FIELD_SPECS,
|
|
322
|
+
proposed=fix_proposed,
|
|
323
|
+
problems=fix_problems,
|
|
324
|
+
)
|
|
325
|
+
fix_problems = validate_fix_schema(fix_schema)
|
|
326
|
+
if fix_problems:
|
|
327
|
+
st.sidebar.error("Fixations: " + "; ".join(fix_problems))
|
|
328
|
+
st.stop()
|
|
329
|
+
else:
|
|
330
|
+
word_schema = infer_word_schema(words_df)
|
|
331
|
+
fix_schema = infer_fix_schema(fixations_df)
|
|
332
|
+
if not word_schema or not fix_schema:
|
|
333
|
+
st.stop()
|
|
334
|
+
|
|
335
|
+
return normalize_words(words_df, word_schema), normalize_fixations(
|
|
336
|
+
fixations_df, fix_schema
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def load_raw_gaze_data(data_choice: str) -> pd.DataFrame:
|
|
341
|
+
"""Load and normalize optional raw gaze data (millisecond-level eye positions).
|
|
342
|
+
|
|
343
|
+
Raw gaze data provides finer temporal resolution than fixation-level data
|
|
344
|
+
and enables overlay visualizations showing continuous gaze paths.
|
|
345
|
+
|
|
346
|
+
Args:
|
|
347
|
+
data_choice: Either "Upload csv tables" or "Use bundled demo"
|
|
348
|
+
|
|
349
|
+
Returns:
|
|
350
|
+
Normalized raw gaze DataFrame with canonical columns, or empty DataFrame
|
|
351
|
+
if not available or schema inference fails
|
|
352
|
+
|
|
353
|
+
Canonical Columns (raw gaze):
|
|
354
|
+
participant_id, trial_id, x, y, timestamp_ms (optional: eye, noise_flag)
|
|
355
|
+
|
|
356
|
+
UI Effects:
|
|
357
|
+
- Renders optional file uploader for "Upload csv tables" mode
|
|
358
|
+
- Shows warning if schema inference fails
|
|
359
|
+
- Shows info message if sample data unavailable
|
|
360
|
+
"""
|
|
361
|
+
raw_gaze_df = pd.DataFrame()
|
|
362
|
+
|
|
363
|
+
if data_choice == SYNTHETIC_CHOICE:
|
|
364
|
+
# The synthetic trial has no raw gaze; skip the uploader entirely.
|
|
365
|
+
return raw_gaze_df
|
|
366
|
+
|
|
367
|
+
if data_choice == DEMO_CHOICE:
|
|
368
|
+
raw_gaze_df = load_sample_raw_gaze()
|
|
369
|
+
if not raw_gaze_df.empty:
|
|
370
|
+
raw_gaze_schema = infer_raw_gaze_schema(raw_gaze_df)
|
|
371
|
+
if raw_gaze_schema:
|
|
372
|
+
raw_gaze_df = normalize_raw_gaze(raw_gaze_df, raw_gaze_schema)
|
|
373
|
+
else:
|
|
374
|
+
st.sidebar.warning("Could not infer raw gaze schema from sample data")
|
|
375
|
+
raw_gaze_df = pd.DataFrame()
|
|
376
|
+
else:
|
|
377
|
+
uploaded_raw_gaze = st.sidebar.file_uploader(
|
|
378
|
+
"Raw gaze table (optional)",
|
|
379
|
+
type=["csv", "parquet", "feather"],
|
|
380
|
+
help="Optional: millisecond-level gaze with participant_id, trial_id, x, y.",
|
|
381
|
+
)
|
|
382
|
+
if uploaded_raw_gaze:
|
|
383
|
+
raw_gaze_df = read_table(uploaded_raw_gaze)
|
|
384
|
+
proposed = propose_raw_gaze_schema(raw_gaze_df)
|
|
385
|
+
initial_problems = validate_raw_gaze_schema(proposed)
|
|
386
|
+
raw_gaze_schema = column_mapping_ui(
|
|
387
|
+
raw_gaze_df,
|
|
388
|
+
table_label="Raw gaze",
|
|
389
|
+
state_key_prefix="col_map_raw_gaze",
|
|
390
|
+
field_specs=RAW_GAZE_FIELD_SPECS,
|
|
391
|
+
proposed=proposed,
|
|
392
|
+
problems=initial_problems,
|
|
393
|
+
)
|
|
394
|
+
problems = validate_raw_gaze_schema(raw_gaze_schema)
|
|
395
|
+
if problems:
|
|
396
|
+
st.sidebar.warning("Raw gaze ignored — " + "; ".join(problems))
|
|
397
|
+
raw_gaze_df = pd.DataFrame()
|
|
398
|
+
else:
|
|
399
|
+
raw_gaze_df = normalize_raw_gaze(raw_gaze_df, raw_gaze_schema)
|
|
400
|
+
|
|
401
|
+
return raw_gaze_df
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
# -----------------------------------------------------------------------------
|
|
405
|
+
# Sidebar controls
|
|
406
|
+
# -----------------------------------------------------------------------------
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def _sidebar_group(title: str) -> None:
|
|
410
|
+
"""Render a section title that groups the toggles below it in the sidebar."""
|
|
411
|
+
st.sidebar.markdown(f"### {title}")
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def render_sidebar_data_source() -> str:
|
|
415
|
+
"""Render the data source selection radio button in sidebar.
|
|
416
|
+
|
|
417
|
+
Returns:
|
|
418
|
+
Selected data source: "Use bundled demo" or "Upload csv tables"
|
|
419
|
+
|
|
420
|
+
UI Components:
|
|
421
|
+
- Section header: "Experimental Setup"
|
|
422
|
+
- Radio button with two options and help text
|
|
423
|
+
- Help text explains expected CSV column formats
|
|
424
|
+
"""
|
|
425
|
+
# Only offer the OneStop bundle when $ONESTOP_DATA_DIR is set on the
|
|
426
|
+
# server. Outside that context the choice would be a dead-end, so we hide it.
|
|
427
|
+
options = [DEMO_CHOICE, SYNTHETIC_CHOICE, UPLOAD_CHOICE]
|
|
428
|
+
if onestop_data_dir() is not None:
|
|
429
|
+
options.insert(0, ONESTOP_CHOICE)
|
|
430
|
+
# Default to OneStop when it's available AND a deep-link forced it via
|
|
431
|
+
# session_state; otherwise the first option in the list.
|
|
432
|
+
default = st.session_state.get("data_source_choice", options[0])
|
|
433
|
+
if default not in options:
|
|
434
|
+
default = options[0]
|
|
435
|
+
source = st.sidebar.expander("Data source", expanded=True)
|
|
436
|
+
return source.radio(
|
|
437
|
+
"Data source",
|
|
438
|
+
options,
|
|
439
|
+
index=options.index(default),
|
|
440
|
+
help=data_dictionary_help_text(),
|
|
441
|
+
key="data_source_choice",
|
|
442
|
+
label_visibility="collapsed",
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def render_sidebar_canvas_controls(
|
|
447
|
+
words_filtered: pd.DataFrame,
|
|
448
|
+
fixations_filtered: pd.DataFrame,
|
|
449
|
+
data_choice: Optional[str] = None,
|
|
450
|
+
) -> Tuple[int, int, int, str, float, bool]:
|
|
451
|
+
"""Render canvas dimension and font controls in sidebar.
|
|
452
|
+
|
|
453
|
+
These controls allow users to match the visualization to their experimental
|
|
454
|
+
display setup, ensuring spatial accuracy and proper word box alignment.
|
|
455
|
+
|
|
456
|
+
Args:
|
|
457
|
+
words_filtered: Filtered words dataframe (used to compute default dimensions)
|
|
458
|
+
fixations_filtered: Filtered fixations dataframe (used for coordinate ranges)
|
|
459
|
+
data_choice: Currently selected data source. When it's the OneStop server
|
|
460
|
+
bundle or the bundled demo (a OneStop subset), defaults to the
|
|
461
|
+
OneStop monitor resolution (2560x1440, Dell U2715H — OneStopL1 paper
|
|
462
|
+
§Monitor). Otherwise defaults are derived from data extents.
|
|
463
|
+
|
|
464
|
+
Returns:
|
|
465
|
+
Tuple of (canvas_width, canvas_height, base_font_size, font_family,
|
|
466
|
+
line_spacing, scale_text_to_boxes). The text-sizing pair keeps the reading
|
|
467
|
+
text true-to-scale: see `plots._word_label_font_px`.
|
|
468
|
+
"""
|
|
469
|
+
# OneStop server bundle + bundled demo share the same experimental setup
|
|
470
|
+
# (Dell U2715H, 2560x1440). Data-derived extents undershoot — text only
|
|
471
|
+
# fills part of the screen — so hard-default to the real monitor here.
|
|
472
|
+
if data_choice in (ONESTOP_CHOICE, DEMO_CHOICE):
|
|
473
|
+
default_canvas_w, default_canvas_h = 2560, 1440
|
|
474
|
+
else:
|
|
475
|
+
default_canvas_w, default_canvas_h = compute_canvas_size(
|
|
476
|
+
words_filtered, fixations_filtered
|
|
477
|
+
)
|
|
478
|
+
canvas_width = min(max(default_canvas_w, 100), 10000)
|
|
479
|
+
canvas_height = min(max(default_canvas_h, 100), 10000)
|
|
480
|
+
|
|
481
|
+
display = st.sidebar.expander("Display settings", expanded=False)
|
|
482
|
+
canvas_width = display.number_input(
|
|
483
|
+
"Monitor width (px)",
|
|
484
|
+
min_value=100,
|
|
485
|
+
max_value=10000,
|
|
486
|
+
value=canvas_width,
|
|
487
|
+
step=10,
|
|
488
|
+
help="Use the real monitor width in pixels to keep coordinates true to scale.",
|
|
489
|
+
)
|
|
490
|
+
canvas_height = display.number_input(
|
|
491
|
+
"Monitor height (px)",
|
|
492
|
+
min_value=100,
|
|
493
|
+
max_value=10000,
|
|
494
|
+
value=canvas_height,
|
|
495
|
+
step=10,
|
|
496
|
+
help="Use the real monitor height in pixels to keep coordinates true to scale.",
|
|
497
|
+
)
|
|
498
|
+
# Reading text is true-to-scale by default: it auto-sizes to the word boxes
|
|
499
|
+
# (text height = box_height / line_spacing) and scales with the figure, so it
|
|
500
|
+
# always fills the real line slot. Untick to fall back to a fixed font size.
|
|
501
|
+
scale_text_to_boxes = display.checkbox(
|
|
502
|
+
"Scale text to boxes",
|
|
503
|
+
value=True,
|
|
504
|
+
help="Size the reading text from the word boxes (height = box height ÷ "
|
|
505
|
+
"line spacing) so it stays true to the real experiment at any zoom. "
|
|
506
|
+
"Untick to use the fixed 'Figure font size' below instead.",
|
|
507
|
+
)
|
|
508
|
+
line_spacing = display.number_input(
|
|
509
|
+
"Line spacing",
|
|
510
|
+
min_value=1.0,
|
|
511
|
+
max_value=10.0,
|
|
512
|
+
value=float(DEFAULT_LINE_SPACING),
|
|
513
|
+
step=0.5,
|
|
514
|
+
disabled=not scale_text_to_boxes,
|
|
515
|
+
help="Line slots per line of text. OneStop rendered one blank line above "
|
|
516
|
+
"and one below each text line, so the box spans 3 line heights → 3.",
|
|
517
|
+
)
|
|
518
|
+
base_font_size = display.number_input(
|
|
519
|
+
"Figure font size (px)",
|
|
520
|
+
min_value=6,
|
|
521
|
+
max_value=72,
|
|
522
|
+
value=16,
|
|
523
|
+
step=1,
|
|
524
|
+
help="Real (monitor-pixel) font size, scaled true-to-scale with the "
|
|
525
|
+
"figure. Used for the reading text when 'Scale text to boxes' is off or "
|
|
526
|
+
"the data has no word boxes, and always for axis/legend chrome.",
|
|
527
|
+
)
|
|
528
|
+
font_family = display.text_input(
|
|
529
|
+
"Text font",
|
|
530
|
+
value=FONT_FAMILY,
|
|
531
|
+
help="Font for the word labels. Use the exact font from your experiment "
|
|
532
|
+
"(e.g. 'Courier New') or a CSS fallback stack.",
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
return (
|
|
536
|
+
int(canvas_width),
|
|
537
|
+
int(canvas_height),
|
|
538
|
+
int(base_font_size),
|
|
539
|
+
font_family,
|
|
540
|
+
float(line_spacing),
|
|
541
|
+
bool(scale_text_to_boxes),
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
# -----------------------------------------------------------------------------
|
|
546
|
+
# Main application
|
|
547
|
+
# -----------------------------------------------------------------------------
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
def main() -> None:
|
|
551
|
+
"""Main application entry point.
|
|
552
|
+
|
|
553
|
+
Orchestrates the full application workflow:
|
|
554
|
+
1. Configure Streamlit page and custom CSS
|
|
555
|
+
2. Render title and caption
|
|
556
|
+
3. Load and normalize data (words, fixations, optional raw gaze)
|
|
557
|
+
4. Apply user-selected filters (participants, trials, paragraphs)
|
|
558
|
+
5. Render sidebar controls (canvas, fonts, visualization settings)
|
|
559
|
+
6. Render tabbed UI (Interactive Plot, Animation, Raw Data, Statistics)
|
|
560
|
+
|
|
561
|
+
Data Flow:
|
|
562
|
+
CSV upload → schema inference → normalization → filtering →
|
|
563
|
+
trial combination building → visualization rendering
|
|
564
|
+
|
|
565
|
+
UI Structure:
|
|
566
|
+
Sidebar: Data source, filters, canvas settings, viz controls
|
|
567
|
+
Main area: 4 tabs for different views of the data
|
|
568
|
+
|
|
569
|
+
Error Handling:
|
|
570
|
+
- Stops execution if schema inference fails
|
|
571
|
+
- Shows warning if filtering eliminates all data
|
|
572
|
+
- Handles missing raw gaze data gracefully
|
|
573
|
+
"""
|
|
574
|
+
configure_page()
|
|
575
|
+
_render_about_panel()
|
|
576
|
+
|
|
577
|
+
# Apply deep-link presets BEFORE any widget renders — see _apply_url_preset
|
|
578
|
+
# for the full URL schema. External tools can deep-link into this app with
|
|
579
|
+
# `?source=...&participant=...&trial=...&...` to land on a specific trial
|
|
580
|
+
# with the reviewer's preferred viz settings.
|
|
581
|
+
url_source = _apply_url_preset()
|
|
582
|
+
if url_source == "onestop" and onestop_data_dir() is not None:
|
|
583
|
+
st.session_state.setdefault("data_source_choice", ONESTOP_CHOICE)
|
|
584
|
+
elif url_source == "demo":
|
|
585
|
+
st.session_state.setdefault("data_source_choice", DEMO_CHOICE)
|
|
586
|
+
elif url_source == "upload":
|
|
587
|
+
st.session_state.setdefault("data_source_choice", UPLOAD_CHOICE)
|
|
588
|
+
|
|
589
|
+
# Data source selection (sidebar)
|
|
590
|
+
_sidebar_group("📂 Data")
|
|
591
|
+
data_choice = render_sidebar_data_source()
|
|
592
|
+
|
|
593
|
+
# Load and prepare core data (words + fixations). Pass the deep-link
|
|
594
|
+
# participant so the OneStop loader can fast-path to a per-pid shard.
|
|
595
|
+
deep_link_pid = st.session_state.get("single_participant")
|
|
596
|
+
words_df, fixations_df = load_words_and_fixations(
|
|
597
|
+
data_choice, participant=deep_link_pid
|
|
598
|
+
)
|
|
599
|
+
words_df, fixations_df = prepare_data(
|
|
600
|
+
words_df, fixations_df, allow_override=(data_choice == UPLOAD_CHOICE)
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
# Load optional raw gaze data
|
|
604
|
+
raw_gaze_df = load_raw_gaze_data(data_choice)
|
|
605
|
+
|
|
606
|
+
# Trial-level filtering / grouping (sidebar): narrow by participant, by
|
|
607
|
+
# condition (Hunting/Gathering, difficulty, first/repeated reading,
|
|
608
|
+
# correctness), and by annotation state (favorites / tags) before anything
|
|
609
|
+
# downstream sees the data.
|
|
610
|
+
trial_filters = sidebar_trial_filters(words_df, fixations_df)
|
|
611
|
+
words_df, fixations_df = filter_trials(
|
|
612
|
+
words_df,
|
|
613
|
+
fixations_df,
|
|
614
|
+
participants=trial_filters["participants"],
|
|
615
|
+
metadata=trial_filters["metadata"],
|
|
616
|
+
)
|
|
617
|
+
if (
|
|
618
|
+
trial_filters["favorites_only"]
|
|
619
|
+
or trial_filters["required_tags"]
|
|
620
|
+
or trial_filters["excluded_tags"]
|
|
621
|
+
) and not fixations_df.empty:
|
|
622
|
+
present_keys = {
|
|
623
|
+
(str(p), str(t))
|
|
624
|
+
for p, t in zip(fixations_df["participant_id"], fixations_df["trial_id"])
|
|
625
|
+
}
|
|
626
|
+
kept = set(
|
|
627
|
+
filter_keys(
|
|
628
|
+
list(present_keys),
|
|
629
|
+
favorites_only=trial_filters["favorites_only"],
|
|
630
|
+
required_tags=trial_filters["required_tags"],
|
|
631
|
+
excluded_tags=trial_filters["excluded_tags"],
|
|
632
|
+
)
|
|
633
|
+
)
|
|
634
|
+
words_df, fixations_df = filter_to_keys(words_df, fixations_df, kept)
|
|
635
|
+
|
|
636
|
+
# Apply filters (participant/trial/paragraph selection)
|
|
637
|
+
filters = default_filters(words_df, fixations_df)
|
|
638
|
+
words_filtered, fixations_filtered = filter_data(words_df, fixations_df, filters)
|
|
639
|
+
|
|
640
|
+
# Filter raw gaze data to match selected participants/trials
|
|
641
|
+
if not raw_gaze_df.empty:
|
|
642
|
+
raw_gaze_filtered = filter_raw_gaze(
|
|
643
|
+
raw_gaze_df,
|
|
644
|
+
filters.get("participants", []),
|
|
645
|
+
filters.get("trials", []),
|
|
646
|
+
)
|
|
647
|
+
if raw_gaze_filtered.empty:
|
|
648
|
+
# Informational, not an error: the loaded raw-gaze samples just
|
|
649
|
+
# don't cover any trial in the current filter (raw gaze typically
|
|
650
|
+
# exists for only a subset of trials). The overlay is optional.
|
|
651
|
+
st.sidebar.caption(
|
|
652
|
+
f"ℹ️ The loaded raw-gaze samples ({len(raw_gaze_df):,} rows) don't "
|
|
653
|
+
"overlap the current trial filter, so the raw-gaze overlay is "
|
|
654
|
+
"unavailable here."
|
|
655
|
+
)
|
|
656
|
+
else:
|
|
657
|
+
raw_gaze_filtered = pd.DataFrame()
|
|
658
|
+
|
|
659
|
+
# Check for empty data after filtering
|
|
660
|
+
if words_filtered.empty or fixations_filtered.empty:
|
|
661
|
+
st.warning(
|
|
662
|
+
"No data after filtering. Loosen the **Filter trials** panel "
|
|
663
|
+
"(participants, condition, or annotation filters) in the sidebar."
|
|
664
|
+
)
|
|
665
|
+
return
|
|
666
|
+
|
|
667
|
+
# Build trial combinations for selection UI
|
|
668
|
+
combos, _, _ = build_combo_options(fixations_filtered)
|
|
669
|
+
|
|
670
|
+
# Canvas and visualization controls (sidebar)
|
|
671
|
+
_sidebar_group("🎨 Visualization")
|
|
672
|
+
(
|
|
673
|
+
canvas_width,
|
|
674
|
+
canvas_height,
|
|
675
|
+
base_font_size,
|
|
676
|
+
font_family,
|
|
677
|
+
line_spacing,
|
|
678
|
+
scale_text_to_boxes,
|
|
679
|
+
) = render_sidebar_canvas_controls(words_filtered, fixations_filtered, data_choice)
|
|
680
|
+
|
|
681
|
+
has_raw_gaze = not raw_gaze_filtered.empty
|
|
682
|
+
viz_settings = sidebar_controls(
|
|
683
|
+
fixations_filtered, base_font_size, has_raw_gaze=has_raw_gaze
|
|
684
|
+
)
|
|
685
|
+
|
|
686
|
+
# Sidebar Annotations panel (download/restore JSON + count). The per-trial
|
|
687
|
+
# star/tags/notes editor lives in the Interactive Plot tab.
|
|
688
|
+
_sidebar_group("📝 Annotations")
|
|
689
|
+
render_annotations_sidebar()
|
|
690
|
+
|
|
691
|
+
# Tab pre-selection isn't supported by st.tabs (Streamlit limitation), so
|
|
692
|
+
# when the deep link asks for animation we surface a banner pointing to it.
|
|
693
|
+
requested_tab = (st.query_params.get("tab") or "").lower()
|
|
694
|
+
if requested_tab == "animation":
|
|
695
|
+
st.info("🎬 For the animated view, click the **Animated Scanpath** tab below.")
|
|
696
|
+
|
|
697
|
+
# Render tabbed interface
|
|
698
|
+
tab_single, tab_animation, tab_raw, tab_stats = st.tabs(
|
|
699
|
+
[
|
|
700
|
+
"Interactive Plot",
|
|
701
|
+
"Animated Scanpath",
|
|
702
|
+
"Raw Data",
|
|
703
|
+
"Data Statistics",
|
|
704
|
+
]
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
with tab_single:
|
|
708
|
+
render_single_trial_tab(
|
|
709
|
+
words_filtered,
|
|
710
|
+
fixations_filtered,
|
|
711
|
+
combos,
|
|
712
|
+
canvas_width=canvas_width,
|
|
713
|
+
canvas_height=canvas_height,
|
|
714
|
+
base_font_size=base_font_size,
|
|
715
|
+
font_family=font_family,
|
|
716
|
+
viz_settings=viz_settings,
|
|
717
|
+
raw_gaze=raw_gaze_filtered,
|
|
718
|
+
line_spacing=line_spacing,
|
|
719
|
+
scale_text_to_boxes=scale_text_to_boxes,
|
|
720
|
+
)
|
|
721
|
+
|
|
722
|
+
with tab_animation:
|
|
723
|
+
render_animation_tab(
|
|
724
|
+
words_filtered,
|
|
725
|
+
fixations_filtered,
|
|
726
|
+
combos,
|
|
727
|
+
canvas_width=canvas_width,
|
|
728
|
+
canvas_height=canvas_height,
|
|
729
|
+
base_font_size=base_font_size,
|
|
730
|
+
font_family=font_family,
|
|
731
|
+
viz_settings=viz_settings,
|
|
732
|
+
line_spacing=line_spacing,
|
|
733
|
+
scale_text_to_boxes=scale_text_to_boxes,
|
|
734
|
+
)
|
|
735
|
+
|
|
736
|
+
with tab_raw:
|
|
737
|
+
render_raw_data_tab(words_filtered, fixations_filtered, raw_gaze_filtered)
|
|
738
|
+
|
|
739
|
+
with tab_stats:
|
|
740
|
+
render_data_statistics_tab(
|
|
741
|
+
words_filtered,
|
|
742
|
+
fixations_filtered,
|
|
743
|
+
raw_gaze_filtered,
|
|
744
|
+
combos,
|
|
745
|
+
canvas_width=canvas_width,
|
|
746
|
+
base_font_size=base_font_size,
|
|
747
|
+
font_family=font_family,
|
|
748
|
+
)
|
|
749
|
+
|
|
750
|
+
|
|
751
|
+
if __name__ == "__main__":
|
|
752
|
+
main()
|