scanpath-studio 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scanpath_studio/app.py ADDED
@@ -0,0 +1,752 @@
1
+ """Scanpath Studio Streamlit app.
2
+
3
+ This is the main entry point for the Streamlit application that visualizes
4
+ eye-tracking scanpaths over text.
5
+
6
+ Architecture:
7
+ - Entry point: main() function configures Streamlit and orchestrates the UI
8
+ - Data flow: CSV upload → schema inference → normalization → filtering → plotting
9
+ - UI structure: Sidebar controls + 4 tabbed views (Interactive, Animation, Raw Data, Stats)
10
+
11
+ Data Pipeline:
12
+ 1. Load raw CSVs (words + fixations + optional raw gaze)
13
+ 2. Infer schema via candidate column matching
14
+ 3. Normalize to canonical column names
15
+ 4. Apply participant/trial/paragraph filters
16
+ 5. Build trial combinations for selection
17
+ 6. Render visualizations with user-controlled settings
18
+
19
+ Usage:
20
+ # Development mode (watch for changes):
21
+ $ streamlit run scanpath_studio/app.py
22
+
23
+ # Package mode:
24
+ $ python -m scanpath_studio
25
+ # or
26
+ $ scanpath-studio
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ from typing import Optional, Tuple
32
+
33
+ import pandas as pd
34
+ import streamlit as st
35
+
36
+ # Allow running via `streamlit run scanpath_studio/app.py` by adding the
37
+ # repository root to sys.path when executed as a script instead of a package.
38
+ if __package__ is None or __package__ == "":
39
+ import sys
40
+ from pathlib import Path
41
+
42
+ root = Path(__file__).resolve().parent.parent
43
+ if str(root) not in sys.path:
44
+ sys.path.insert(0, str(root))
45
+
46
+ from scanpath_studio.annotations import (
47
+ filter_keys,
48
+ render_annotations_sidebar,
49
+ )
50
+ from scanpath_studio.constants import DEFAULT_LINE_SPACING, FONT_FAMILY
51
+ from scanpath_studio.controls import (
52
+ FIX_FIELD_SPECS,
53
+ RAW_GAZE_FIELD_SPECS,
54
+ WORD_FIELD_SPECS,
55
+ column_mapping_ui,
56
+ data_dictionary_help_text,
57
+ sidebar_controls,
58
+ sidebar_trial_filters,
59
+ )
60
+ from scanpath_studio.data import (
61
+ compute_canvas_size,
62
+ default_filters,
63
+ filter_data,
64
+ filter_raw_gaze,
65
+ filter_to_keys,
66
+ filter_trials,
67
+ infer_fix_schema,
68
+ infer_raw_gaze_schema,
69
+ infer_word_schema,
70
+ load_onestop_server_bundle,
71
+ load_sample_data,
72
+ load_sample_raw_gaze,
73
+ normalize_fixations,
74
+ normalize_raw_gaze,
75
+ normalize_words,
76
+ onestop_data_dir,
77
+ propose_fix_schema,
78
+ propose_raw_gaze_schema,
79
+ propose_word_schema,
80
+ read_table,
81
+ validate_fix_schema,
82
+ validate_raw_gaze_schema,
83
+ validate_word_schema,
84
+ )
85
+ from scanpath_studio.styles import get_app_css
86
+ from scanpath_studio.tabs import (
87
+ render_animation_tab,
88
+ render_data_statistics_tab,
89
+ render_raw_data_tab,
90
+ render_single_trial_tab,
91
+ )
92
+ from scanpath_studio.utils import ( # noqa: F401
93
+ build_combo_options,
94
+ build_comparison_options as _build_comparison_options,
95
+ compute_trial_stats,
96
+ friendly_trial_label as _friendly_trial_label,
97
+ gather_trial_metadata,
98
+ )
99
+
100
+ UPLOAD_CHOICE = "Upload tables"
101
+ DEMO_CHOICE = "Use bundled demo"
102
+ # A tiny, fully-specified synthetic trial (scanpath_studio.synthetic)
103
+ # with known ground-truth reading measures — handy for sanity-checking the viz
104
+ # against documented expected values.
105
+ SYNTHETIC_CHOICE = "Synthetic test trial"
106
+ # Server-side OneStop lacclab bundle. Only offered when $ONESTOP_DATA_DIR is
107
+ # set; selected automatically when the page is opened with `?source=onestop`
108
+ # in the URL. See data.load_onestop_server_bundle().
109
+ ONESTOP_CHOICE = "OneStop server bundle"
110
+
111
+ # URL query-param → session_state key map for the deep-link API. Used by
112
+ # `_apply_url_preset()` to preset widgets when the page is opened from an
113
+ # external tool with a deep link.
114
+ #
115
+ # Selection prefixes — every selectable tab (Interactive Plot, Animated
116
+ # Scanpath, …) renders its own `select_trial` with a different `key_prefix`,
117
+ # so a URL deep link has to seed all of them or only the first tab lands on
118
+ # the requested trial. Keep this list in sync with the `key_prefix=` values
119
+ # passed to `select_trial` in tabs.py.
120
+ _SELECTION_PREFIXES = ("single", "anim")
121
+ _URL_PRESETS = {
122
+ # viz prefs (`controls.sidebar_controls`)
123
+ "show_order": ("global_show_order", lambda v: v not in {"0", "false", "no"}),
124
+ "hide_fixation_numbers": ("global_show_order", lambda v: v in {"0", "false", "no"}),
125
+ "show_saccades": ("global_show_saccades", lambda v: v not in {"0", "false", "no"}),
126
+ "show_heatmap": ("global_show_heatmap", lambda v: v not in {"0", "false", "no"}),
127
+ "show_words": ("global_show_words", lambda v: v not in {"0", "false", "no"}),
128
+ "show_labels": ("global_show_labels", lambda v: v not in {"0", "false", "no"}),
129
+ "show_fixations": ("global_show_fix", lambda v: v not in {"0", "false", "no"}),
130
+ "heatmap_colorscale": ("global_heatmap_colorscale", str),
131
+ "fixation_colorscale": ("global_fixation_colorscale", str),
132
+ }
133
+
134
+
135
+ def _apply_url_preset() -> Optional[str]:
136
+ """Read `st.query_params` and preset Streamlit session state for deep links.
137
+
138
+ Returns the URL-requested `source` ("onestop"/"demo"/"upload") or `None`.
139
+ Call this at the very top of `main()` — before any widgets render — so
140
+ session_state values are picked up as the widgets' initial values.
141
+
142
+ URL schema (all params optional):
143
+ ?source=onestop → force "OneStop server bundle" data source
144
+ &participant=p001 → preselect participant (Participant mode)
145
+ &trial=37 → preselect trial_index slider
146
+ &tab=animation → land on Animated Scanpath tab
147
+ &heatmap_colorscale=Greens
148
+ &hide_fixation_numbers=1
149
+ &show_saccades=1
150
+ &show_heatmap=1
151
+ ...etc — see _URL_PRESETS above
152
+
153
+ Bonus side-effect: when any colorscale is set via URL, also forces the
154
+ "Advanced styling" sidebar expander open so the value is visible/editable.
155
+
156
+ External tools can deep-link into this app via the URL schema above to
157
+ land on a specific trial with the reviewer's preferred viz settings.
158
+ """
159
+ qp = st.query_params
160
+ if not qp:
161
+ return None
162
+
163
+ # Seed selection state for every tab that exposes a `select_trial` widget.
164
+ # `?participant=` + `?trial=` map onto Participant mode with the matching
165
+ # participant / slider value. Without this loop the Animated Scanpath tab
166
+ # (key_prefix="anim") would default to "Trial" mode and land on the
167
+ # alphabetically-first trial instead of the deep-linked one.
168
+ if "participant" in qp or "trial" in qp:
169
+ for prefix in _SELECTION_PREFIXES:
170
+ st.session_state.setdefault(f"{prefix}_select_trial_mode", "Participant")
171
+ if "participant" in qp:
172
+ st.session_state.setdefault(
173
+ f"{prefix}_participant", str(qp["participant"])
174
+ )
175
+ if "trial" in qp:
176
+ try:
177
+ st.session_state.setdefault(f"{prefix}_slider", int(qp["trial"]))
178
+ except (ValueError, TypeError):
179
+ st.warning(f"Ignored bad URL param ?trial={qp['trial']!r}")
180
+
181
+ for url_key, (state_key, coerce) in _URL_PRESETS.items():
182
+ if url_key not in qp:
183
+ continue
184
+ raw = qp[url_key]
185
+ try:
186
+ value = coerce(raw)
187
+ except (ValueError, TypeError):
188
+ st.warning(f"Ignored bad URL param ?{url_key}={raw!r}")
189
+ continue
190
+ st.session_state.setdefault(state_key, value)
191
+
192
+ # Heatmap / fixation colorscale only render under the Advanced expander —
193
+ # auto-open it so the URL value is exposed in the sidebar.
194
+ if "heatmap_colorscale" in qp or "fixation_colorscale" in qp:
195
+ st.session_state.setdefault("global_advanced", True)
196
+
197
+ source = qp.get("source")
198
+ return source.lower() if source else None
199
+
200
+
201
+ def configure_page() -> None:
202
+ """Streamlit page config + custom CSS.
203
+
204
+ When loaded from an iframe with `?embed=true`, Streamlit's built-in embed
205
+ mode already hides the header/menu — we additionally collapse the sidebar
206
+ so the iframe is mostly the plot.
207
+ """
208
+ is_embed = (st.query_params.get("embed") or "").lower() in {"true", "1"}
209
+ st.set_page_config(
210
+ page_title="Scanpath Studio",
211
+ page_icon="👀",
212
+ layout="wide",
213
+ initial_sidebar_state="collapsed" if is_embed else "auto",
214
+ )
215
+ st.markdown(get_app_css(), unsafe_allow_html=True)
216
+
217
+
218
+ def _render_about_panel() -> None:
219
+ """Compact header with title + Lab/Code pill links."""
220
+ from scanpath_studio.constants import CITATION
221
+
222
+ title_col, links_col = st.columns([5, 2])
223
+ with title_col:
224
+ st.title("Scanpath Studio")
225
+ st.caption("Interactive exploration of eye movements in reading.")
226
+ with links_col:
227
+ st.markdown(
228
+ f"""<div class="header-link-row">
229
+ <a class="header-link lab" href="https://lacclab.github.io/" target="_blank" rel="noopener">🧪 LaCC Lab</a>
230
+ <a class="header-link code" href="{CITATION["url"]}" target="_blank" rel="noopener">💻 Code</a>
231
+ </div>""",
232
+ unsafe_allow_html=True,
233
+ )
234
+
235
+
236
+ # -----------------------------------------------------------------------------
237
+ # Data loading
238
+ # -----------------------------------------------------------------------------
239
+
240
+
241
+ def load_words_and_fixations(
242
+ data_choice: str,
243
+ participant: Optional[str] = None,
244
+ ) -> Tuple[pd.DataFrame, pd.DataFrame]:
245
+ """Load word and fixation data from user uploads or bundled demo files.
246
+
247
+ Args:
248
+ data_choice: Either "Upload csv tables" / "Use bundled demo" / "OneStop server bundle"
249
+ participant: Lowercased participant_id from the URL deep link. When set
250
+ AND `data_choice == ONESTOP_CHOICE`, the OneStop loader fast-paths
251
+ to just that pid's Parquet shard — sub-second instead of ~3 min.
252
+ Ignored for the other data sources.
253
+
254
+ Returns:
255
+ Tuple of (words_df, fixations_df) as raw DataFrames before normalization
256
+
257
+ UI Effects:
258
+ - Renders file uploaders in sidebar when data_choice is "Upload csv tables"
259
+ - Shows info message if uploads are incomplete
260
+ - Falls back to sample data if uploads missing
261
+ """
262
+ if data_choice == SYNTHETIC_CHOICE:
263
+ from scanpath_studio.synthetic import load_synthetic_data
264
+
265
+ return load_synthetic_data()
266
+ if data_choice == UPLOAD_CHOICE:
267
+ uploaded_words = st.sidebar.file_uploader(
268
+ "Words/IA table", type=["csv", "parquet", "feather"]
269
+ )
270
+ uploaded_fixations = st.sidebar.file_uploader(
271
+ "Fixations table", type=["csv", "parquet", "feather"]
272
+ )
273
+ if uploaded_words and uploaded_fixations:
274
+ return read_table(uploaded_words), read_table(uploaded_fixations)
275
+ st.sidebar.info("Upload both files or switch to demo data.")
276
+ return load_sample_data()
277
+ if data_choice == ONESTOP_CHOICE:
278
+ words, fixations = load_onestop_server_bundle(participant=participant)
279
+ if words.empty or fixations.empty:
280
+ st.sidebar.warning(
281
+ "OneStop bundle unavailable — falling back to demo data."
282
+ )
283
+ return load_sample_data()
284
+ return words, fixations
285
+ return load_sample_data()
286
+
287
+
288
+ def prepare_data(
289
+ words_df: pd.DataFrame,
290
+ fixations_df: pd.DataFrame,
291
+ allow_override: bool,
292
+ ) -> Tuple[pd.DataFrame, pd.DataFrame]:
293
+ """Infer schemas and normalize incoming dataframes to canonical column names.
294
+
295
+ When ``allow_override`` is True, render sidebar expanders that let the user
296
+ pick the exact column names for each field (pre-filled with auto-detection).
297
+ Otherwise fall back to the original infer-then-stop flow used for demo data.
298
+ """
299
+ if allow_override:
300
+ word_proposed = propose_word_schema(words_df)
301
+ word_problems = validate_word_schema(word_proposed)
302
+ word_schema = column_mapping_ui(
303
+ words_df,
304
+ table_label="Words/IA",
305
+ state_key_prefix="col_map_words",
306
+ field_specs=WORD_FIELD_SPECS,
307
+ proposed=word_proposed,
308
+ problems=word_problems,
309
+ )
310
+ word_problems = validate_word_schema(word_schema)
311
+ if word_problems:
312
+ st.sidebar.error("Words/IA: " + "; ".join(word_problems))
313
+ st.stop()
314
+
315
+ fix_proposed = propose_fix_schema(fixations_df)
316
+ fix_problems = validate_fix_schema(fix_proposed)
317
+ fix_schema = column_mapping_ui(
318
+ fixations_df,
319
+ table_label="Fixations",
320
+ state_key_prefix="col_map_fix",
321
+ field_specs=FIX_FIELD_SPECS,
322
+ proposed=fix_proposed,
323
+ problems=fix_problems,
324
+ )
325
+ fix_problems = validate_fix_schema(fix_schema)
326
+ if fix_problems:
327
+ st.sidebar.error("Fixations: " + "; ".join(fix_problems))
328
+ st.stop()
329
+ else:
330
+ word_schema = infer_word_schema(words_df)
331
+ fix_schema = infer_fix_schema(fixations_df)
332
+ if not word_schema or not fix_schema:
333
+ st.stop()
334
+
335
+ return normalize_words(words_df, word_schema), normalize_fixations(
336
+ fixations_df, fix_schema
337
+ )
338
+
339
+
340
+ def load_raw_gaze_data(data_choice: str) -> pd.DataFrame:
341
+ """Load and normalize optional raw gaze data (millisecond-level eye positions).
342
+
343
+ Raw gaze data provides finer temporal resolution than fixation-level data
344
+ and enables overlay visualizations showing continuous gaze paths.
345
+
346
+ Args:
347
+ data_choice: Either "Upload csv tables" or "Use bundled demo"
348
+
349
+ Returns:
350
+ Normalized raw gaze DataFrame with canonical columns, or empty DataFrame
351
+ if not available or schema inference fails
352
+
353
+ Canonical Columns (raw gaze):
354
+ participant_id, trial_id, x, y, timestamp_ms (optional: eye, noise_flag)
355
+
356
+ UI Effects:
357
+ - Renders optional file uploader for "Upload csv tables" mode
358
+ - Shows warning if schema inference fails
359
+ - Shows info message if sample data unavailable
360
+ """
361
+ raw_gaze_df = pd.DataFrame()
362
+
363
+ if data_choice == SYNTHETIC_CHOICE:
364
+ # The synthetic trial has no raw gaze; skip the uploader entirely.
365
+ return raw_gaze_df
366
+
367
+ if data_choice == DEMO_CHOICE:
368
+ raw_gaze_df = load_sample_raw_gaze()
369
+ if not raw_gaze_df.empty:
370
+ raw_gaze_schema = infer_raw_gaze_schema(raw_gaze_df)
371
+ if raw_gaze_schema:
372
+ raw_gaze_df = normalize_raw_gaze(raw_gaze_df, raw_gaze_schema)
373
+ else:
374
+ st.sidebar.warning("Could not infer raw gaze schema from sample data")
375
+ raw_gaze_df = pd.DataFrame()
376
+ else:
377
+ uploaded_raw_gaze = st.sidebar.file_uploader(
378
+ "Raw gaze table (optional)",
379
+ type=["csv", "parquet", "feather"],
380
+ help="Optional: millisecond-level gaze with participant_id, trial_id, x, y.",
381
+ )
382
+ if uploaded_raw_gaze:
383
+ raw_gaze_df = read_table(uploaded_raw_gaze)
384
+ proposed = propose_raw_gaze_schema(raw_gaze_df)
385
+ initial_problems = validate_raw_gaze_schema(proposed)
386
+ raw_gaze_schema = column_mapping_ui(
387
+ raw_gaze_df,
388
+ table_label="Raw gaze",
389
+ state_key_prefix="col_map_raw_gaze",
390
+ field_specs=RAW_GAZE_FIELD_SPECS,
391
+ proposed=proposed,
392
+ problems=initial_problems,
393
+ )
394
+ problems = validate_raw_gaze_schema(raw_gaze_schema)
395
+ if problems:
396
+ st.sidebar.warning("Raw gaze ignored — " + "; ".join(problems))
397
+ raw_gaze_df = pd.DataFrame()
398
+ else:
399
+ raw_gaze_df = normalize_raw_gaze(raw_gaze_df, raw_gaze_schema)
400
+
401
+ return raw_gaze_df
402
+
403
+
404
+ # -----------------------------------------------------------------------------
405
+ # Sidebar controls
406
+ # -----------------------------------------------------------------------------
407
+
408
+
409
+ def _sidebar_group(title: str) -> None:
410
+ """Render a section title that groups the toggles below it in the sidebar."""
411
+ st.sidebar.markdown(f"### {title}")
412
+
413
+
414
+ def render_sidebar_data_source() -> str:
415
+ """Render the data source selection radio button in sidebar.
416
+
417
+ Returns:
418
+ Selected data source: "Use bundled demo" or "Upload csv tables"
419
+
420
+ UI Components:
421
+ - Section header: "Experimental Setup"
422
+ - Radio button with two options and help text
423
+ - Help text explains expected CSV column formats
424
+ """
425
+ # Only offer the OneStop bundle when $ONESTOP_DATA_DIR is set on the
426
+ # server. Outside that context the choice would be a dead-end, so we hide it.
427
+ options = [DEMO_CHOICE, SYNTHETIC_CHOICE, UPLOAD_CHOICE]
428
+ if onestop_data_dir() is not None:
429
+ options.insert(0, ONESTOP_CHOICE)
430
+ # Default to OneStop when it's available AND a deep-link forced it via
431
+ # session_state; otherwise the first option in the list.
432
+ default = st.session_state.get("data_source_choice", options[0])
433
+ if default not in options:
434
+ default = options[0]
435
+ source = st.sidebar.expander("Data source", expanded=True)
436
+ return source.radio(
437
+ "Data source",
438
+ options,
439
+ index=options.index(default),
440
+ help=data_dictionary_help_text(),
441
+ key="data_source_choice",
442
+ label_visibility="collapsed",
443
+ )
444
+
445
+
446
+ def render_sidebar_canvas_controls(
447
+ words_filtered: pd.DataFrame,
448
+ fixations_filtered: pd.DataFrame,
449
+ data_choice: Optional[str] = None,
450
+ ) -> Tuple[int, int, int, str, float, bool]:
451
+ """Render canvas dimension and font controls in sidebar.
452
+
453
+ These controls allow users to match the visualization to their experimental
454
+ display setup, ensuring spatial accuracy and proper word box alignment.
455
+
456
+ Args:
457
+ words_filtered: Filtered words dataframe (used to compute default dimensions)
458
+ fixations_filtered: Filtered fixations dataframe (used for coordinate ranges)
459
+ data_choice: Currently selected data source. When it's the OneStop server
460
+ bundle or the bundled demo (a OneStop subset), defaults to the
461
+ OneStop monitor resolution (2560x1440, Dell U2715H — OneStopL1 paper
462
+ §Monitor). Otherwise defaults are derived from data extents.
463
+
464
+ Returns:
465
+ Tuple of (canvas_width, canvas_height, base_font_size, font_family,
466
+ line_spacing, scale_text_to_boxes). The text-sizing pair keeps the reading
467
+ text true-to-scale: see `plots._word_label_font_px`.
468
+ """
469
+ # OneStop server bundle + bundled demo share the same experimental setup
470
+ # (Dell U2715H, 2560x1440). Data-derived extents undershoot — text only
471
+ # fills part of the screen — so hard-default to the real monitor here.
472
+ if data_choice in (ONESTOP_CHOICE, DEMO_CHOICE):
473
+ default_canvas_w, default_canvas_h = 2560, 1440
474
+ else:
475
+ default_canvas_w, default_canvas_h = compute_canvas_size(
476
+ words_filtered, fixations_filtered
477
+ )
478
+ canvas_width = min(max(default_canvas_w, 100), 10000)
479
+ canvas_height = min(max(default_canvas_h, 100), 10000)
480
+
481
+ display = st.sidebar.expander("Display settings", expanded=False)
482
+ canvas_width = display.number_input(
483
+ "Monitor width (px)",
484
+ min_value=100,
485
+ max_value=10000,
486
+ value=canvas_width,
487
+ step=10,
488
+ help="Use the real monitor width in pixels to keep coordinates true to scale.",
489
+ )
490
+ canvas_height = display.number_input(
491
+ "Monitor height (px)",
492
+ min_value=100,
493
+ max_value=10000,
494
+ value=canvas_height,
495
+ step=10,
496
+ help="Use the real monitor height in pixels to keep coordinates true to scale.",
497
+ )
498
+ # Reading text is true-to-scale by default: it auto-sizes to the word boxes
499
+ # (text height = box_height / line_spacing) and scales with the figure, so it
500
+ # always fills the real line slot. Untick to fall back to a fixed font size.
501
+ scale_text_to_boxes = display.checkbox(
502
+ "Scale text to boxes",
503
+ value=True,
504
+ help="Size the reading text from the word boxes (height = box height ÷ "
505
+ "line spacing) so it stays true to the real experiment at any zoom. "
506
+ "Untick to use the fixed 'Figure font size' below instead.",
507
+ )
508
+ line_spacing = display.number_input(
509
+ "Line spacing",
510
+ min_value=1.0,
511
+ max_value=10.0,
512
+ value=float(DEFAULT_LINE_SPACING),
513
+ step=0.5,
514
+ disabled=not scale_text_to_boxes,
515
+ help="Line slots per line of text. OneStop rendered one blank line above "
516
+ "and one below each text line, so the box spans 3 line heights → 3.",
517
+ )
518
+ base_font_size = display.number_input(
519
+ "Figure font size (px)",
520
+ min_value=6,
521
+ max_value=72,
522
+ value=16,
523
+ step=1,
524
+ help="Real (monitor-pixel) font size, scaled true-to-scale with the "
525
+ "figure. Used for the reading text when 'Scale text to boxes' is off or "
526
+ "the data has no word boxes, and always for axis/legend chrome.",
527
+ )
528
+ font_family = display.text_input(
529
+ "Text font",
530
+ value=FONT_FAMILY,
531
+ help="Font for the word labels. Use the exact font from your experiment "
532
+ "(e.g. 'Courier New') or a CSS fallback stack.",
533
+ )
534
+
535
+ return (
536
+ int(canvas_width),
537
+ int(canvas_height),
538
+ int(base_font_size),
539
+ font_family,
540
+ float(line_spacing),
541
+ bool(scale_text_to_boxes),
542
+ )
543
+
544
+
545
+ # -----------------------------------------------------------------------------
546
+ # Main application
547
+ # -----------------------------------------------------------------------------
548
+
549
+
550
+ def main() -> None:
551
+ """Main application entry point.
552
+
553
+ Orchestrates the full application workflow:
554
+ 1. Configure Streamlit page and custom CSS
555
+ 2. Render title and caption
556
+ 3. Load and normalize data (words, fixations, optional raw gaze)
557
+ 4. Apply user-selected filters (participants, trials, paragraphs)
558
+ 5. Render sidebar controls (canvas, fonts, visualization settings)
559
+ 6. Render tabbed UI (Interactive Plot, Animation, Raw Data, Statistics)
560
+
561
+ Data Flow:
562
+ CSV upload → schema inference → normalization → filtering →
563
+ trial combination building → visualization rendering
564
+
565
+ UI Structure:
566
+ Sidebar: Data source, filters, canvas settings, viz controls
567
+ Main area: 4 tabs for different views of the data
568
+
569
+ Error Handling:
570
+ - Stops execution if schema inference fails
571
+ - Shows warning if filtering eliminates all data
572
+ - Handles missing raw gaze data gracefully
573
+ """
574
+ configure_page()
575
+ _render_about_panel()
576
+
577
+ # Apply deep-link presets BEFORE any widget renders — see _apply_url_preset
578
+ # for the full URL schema. External tools can deep-link into this app with
579
+ # `?source=...&participant=...&trial=...&...` to land on a specific trial
580
+ # with the reviewer's preferred viz settings.
581
+ url_source = _apply_url_preset()
582
+ if url_source == "onestop" and onestop_data_dir() is not None:
583
+ st.session_state.setdefault("data_source_choice", ONESTOP_CHOICE)
584
+ elif url_source == "demo":
585
+ st.session_state.setdefault("data_source_choice", DEMO_CHOICE)
586
+ elif url_source == "upload":
587
+ st.session_state.setdefault("data_source_choice", UPLOAD_CHOICE)
588
+
589
+ # Data source selection (sidebar)
590
+ _sidebar_group("📂 Data")
591
+ data_choice = render_sidebar_data_source()
592
+
593
+ # Load and prepare core data (words + fixations). Pass the deep-link
594
+ # participant so the OneStop loader can fast-path to a per-pid shard.
595
+ deep_link_pid = st.session_state.get("single_participant")
596
+ words_df, fixations_df = load_words_and_fixations(
597
+ data_choice, participant=deep_link_pid
598
+ )
599
+ words_df, fixations_df = prepare_data(
600
+ words_df, fixations_df, allow_override=(data_choice == UPLOAD_CHOICE)
601
+ )
602
+
603
+ # Load optional raw gaze data
604
+ raw_gaze_df = load_raw_gaze_data(data_choice)
605
+
606
+ # Trial-level filtering / grouping (sidebar): narrow by participant, by
607
+ # condition (Hunting/Gathering, difficulty, first/repeated reading,
608
+ # correctness), and by annotation state (favorites / tags) before anything
609
+ # downstream sees the data.
610
+ trial_filters = sidebar_trial_filters(words_df, fixations_df)
611
+ words_df, fixations_df = filter_trials(
612
+ words_df,
613
+ fixations_df,
614
+ participants=trial_filters["participants"],
615
+ metadata=trial_filters["metadata"],
616
+ )
617
+ if (
618
+ trial_filters["favorites_only"]
619
+ or trial_filters["required_tags"]
620
+ or trial_filters["excluded_tags"]
621
+ ) and not fixations_df.empty:
622
+ present_keys = {
623
+ (str(p), str(t))
624
+ for p, t in zip(fixations_df["participant_id"], fixations_df["trial_id"])
625
+ }
626
+ kept = set(
627
+ filter_keys(
628
+ list(present_keys),
629
+ favorites_only=trial_filters["favorites_only"],
630
+ required_tags=trial_filters["required_tags"],
631
+ excluded_tags=trial_filters["excluded_tags"],
632
+ )
633
+ )
634
+ words_df, fixations_df = filter_to_keys(words_df, fixations_df, kept)
635
+
636
+ # Apply filters (participant/trial/paragraph selection)
637
+ filters = default_filters(words_df, fixations_df)
638
+ words_filtered, fixations_filtered = filter_data(words_df, fixations_df, filters)
639
+
640
+ # Filter raw gaze data to match selected participants/trials
641
+ if not raw_gaze_df.empty:
642
+ raw_gaze_filtered = filter_raw_gaze(
643
+ raw_gaze_df,
644
+ filters.get("participants", []),
645
+ filters.get("trials", []),
646
+ )
647
+ if raw_gaze_filtered.empty:
648
+ # Informational, not an error: the loaded raw-gaze samples just
649
+ # don't cover any trial in the current filter (raw gaze typically
650
+ # exists for only a subset of trials). The overlay is optional.
651
+ st.sidebar.caption(
652
+ f"ℹ️ The loaded raw-gaze samples ({len(raw_gaze_df):,} rows) don't "
653
+ "overlap the current trial filter, so the raw-gaze overlay is "
654
+ "unavailable here."
655
+ )
656
+ else:
657
+ raw_gaze_filtered = pd.DataFrame()
658
+
659
+ # Check for empty data after filtering
660
+ if words_filtered.empty or fixations_filtered.empty:
661
+ st.warning(
662
+ "No data after filtering. Loosen the **Filter trials** panel "
663
+ "(participants, condition, or annotation filters) in the sidebar."
664
+ )
665
+ return
666
+
667
+ # Build trial combinations for selection UI
668
+ combos, _, _ = build_combo_options(fixations_filtered)
669
+
670
+ # Canvas and visualization controls (sidebar)
671
+ _sidebar_group("🎨 Visualization")
672
+ (
673
+ canvas_width,
674
+ canvas_height,
675
+ base_font_size,
676
+ font_family,
677
+ line_spacing,
678
+ scale_text_to_boxes,
679
+ ) = render_sidebar_canvas_controls(words_filtered, fixations_filtered, data_choice)
680
+
681
+ has_raw_gaze = not raw_gaze_filtered.empty
682
+ viz_settings = sidebar_controls(
683
+ fixations_filtered, base_font_size, has_raw_gaze=has_raw_gaze
684
+ )
685
+
686
+ # Sidebar Annotations panel (download/restore JSON + count). The per-trial
687
+ # star/tags/notes editor lives in the Interactive Plot tab.
688
+ _sidebar_group("📝 Annotations")
689
+ render_annotations_sidebar()
690
+
691
+ # Tab pre-selection isn't supported by st.tabs (Streamlit limitation), so
692
+ # when the deep link asks for animation we surface a banner pointing to it.
693
+ requested_tab = (st.query_params.get("tab") or "").lower()
694
+ if requested_tab == "animation":
695
+ st.info("🎬 For the animated view, click the **Animated Scanpath** tab below.")
696
+
697
+ # Render tabbed interface
698
+ tab_single, tab_animation, tab_raw, tab_stats = st.tabs(
699
+ [
700
+ "Interactive Plot",
701
+ "Animated Scanpath",
702
+ "Raw Data",
703
+ "Data Statistics",
704
+ ]
705
+ )
706
+
707
+ with tab_single:
708
+ render_single_trial_tab(
709
+ words_filtered,
710
+ fixations_filtered,
711
+ combos,
712
+ canvas_width=canvas_width,
713
+ canvas_height=canvas_height,
714
+ base_font_size=base_font_size,
715
+ font_family=font_family,
716
+ viz_settings=viz_settings,
717
+ raw_gaze=raw_gaze_filtered,
718
+ line_spacing=line_spacing,
719
+ scale_text_to_boxes=scale_text_to_boxes,
720
+ )
721
+
722
+ with tab_animation:
723
+ render_animation_tab(
724
+ words_filtered,
725
+ fixations_filtered,
726
+ combos,
727
+ canvas_width=canvas_width,
728
+ canvas_height=canvas_height,
729
+ base_font_size=base_font_size,
730
+ font_family=font_family,
731
+ viz_settings=viz_settings,
732
+ line_spacing=line_spacing,
733
+ scale_text_to_boxes=scale_text_to_boxes,
734
+ )
735
+
736
+ with tab_raw:
737
+ render_raw_data_tab(words_filtered, fixations_filtered, raw_gaze_filtered)
738
+
739
+ with tab_stats:
740
+ render_data_statistics_tab(
741
+ words_filtered,
742
+ fixations_filtered,
743
+ raw_gaze_filtered,
744
+ combos,
745
+ canvas_width=canvas_width,
746
+ base_font_size=base_font_size,
747
+ font_family=font_family,
748
+ )
749
+
750
+
751
+ if __name__ == "__main__":
752
+ main()