openadapt-ml 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. openadapt_ml/__init__.py +0 -0
  2. openadapt_ml/benchmarks/__init__.py +125 -0
  3. openadapt_ml/benchmarks/agent.py +825 -0
  4. openadapt_ml/benchmarks/azure.py +761 -0
  5. openadapt_ml/benchmarks/base.py +366 -0
  6. openadapt_ml/benchmarks/cli.py +884 -0
  7. openadapt_ml/benchmarks/data_collection.py +432 -0
  8. openadapt_ml/benchmarks/runner.py +381 -0
  9. openadapt_ml/benchmarks/waa.py +704 -0
  10. openadapt_ml/cloud/__init__.py +5 -0
  11. openadapt_ml/cloud/azure_inference.py +441 -0
  12. openadapt_ml/cloud/lambda_labs.py +2445 -0
  13. openadapt_ml/cloud/local.py +790 -0
  14. openadapt_ml/config.py +56 -0
  15. openadapt_ml/datasets/__init__.py +0 -0
  16. openadapt_ml/datasets/next_action.py +507 -0
  17. openadapt_ml/evals/__init__.py +23 -0
  18. openadapt_ml/evals/grounding.py +241 -0
  19. openadapt_ml/evals/plot_eval_metrics.py +174 -0
  20. openadapt_ml/evals/trajectory_matching.py +486 -0
  21. openadapt_ml/grounding/__init__.py +45 -0
  22. openadapt_ml/grounding/base.py +236 -0
  23. openadapt_ml/grounding/detector.py +570 -0
  24. openadapt_ml/ingest/__init__.py +43 -0
  25. openadapt_ml/ingest/capture.py +312 -0
  26. openadapt_ml/ingest/loader.py +232 -0
  27. openadapt_ml/ingest/synthetic.py +1102 -0
  28. openadapt_ml/models/__init__.py +0 -0
  29. openadapt_ml/models/api_adapter.py +171 -0
  30. openadapt_ml/models/base_adapter.py +59 -0
  31. openadapt_ml/models/dummy_adapter.py +42 -0
  32. openadapt_ml/models/qwen_vl.py +426 -0
  33. openadapt_ml/runtime/__init__.py +0 -0
  34. openadapt_ml/runtime/policy.py +182 -0
  35. openadapt_ml/schemas/__init__.py +53 -0
  36. openadapt_ml/schemas/sessions.py +122 -0
  37. openadapt_ml/schemas/validation.py +252 -0
  38. openadapt_ml/scripts/__init__.py +0 -0
  39. openadapt_ml/scripts/compare.py +1490 -0
  40. openadapt_ml/scripts/demo_policy.py +62 -0
  41. openadapt_ml/scripts/eval_policy.py +287 -0
  42. openadapt_ml/scripts/make_gif.py +153 -0
  43. openadapt_ml/scripts/prepare_synthetic.py +43 -0
  44. openadapt_ml/scripts/run_qwen_login_benchmark.py +192 -0
  45. openadapt_ml/scripts/train.py +174 -0
  46. openadapt_ml/training/__init__.py +0 -0
  47. openadapt_ml/training/benchmark_viewer.py +1538 -0
  48. openadapt_ml/training/shared_ui.py +157 -0
  49. openadapt_ml/training/stub_provider.py +276 -0
  50. openadapt_ml/training/trainer.py +2446 -0
  51. openadapt_ml/training/viewer.py +2970 -0
  52. openadapt_ml-0.1.0.dist-info/METADATA +818 -0
  53. openadapt_ml-0.1.0.dist-info/RECORD +55 -0
  54. openadapt_ml-0.1.0.dist-info/WHEEL +4 -0
  55. openadapt_ml-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,2970 @@
1
+ """Unified viewer HTML generation.
2
+
3
+ This module generates the Viewer HTML with step-by-step playback,
4
+ transcript/audio sync, and model prediction comparison.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from pathlib import Path
11
+
12
+ from openadapt_ml.training.shared_ui import (
13
+ get_shared_header_css as _get_shared_header_css,
14
+ generate_shared_header_html as _generate_shared_header_html,
15
+ )
16
+
17
+
18
+ def _copy_transcript_and_audio(capture_path: Path | None, output_dir: Path) -> None:
19
+ """Copy transcript.json and convert audio to mp3 for viewer playback.
20
+
21
+ Args:
22
+ capture_path: Path to the capture directory (may be None)
23
+ output_dir: Output directory for the viewer
24
+ """
25
+ import shutil
26
+ import subprocess
27
+
28
+ if capture_path is None or not capture_path.exists():
29
+ return
30
+
31
+ # Copy transcript.json if it exists
32
+ transcript_src = capture_path / "transcript.json"
33
+ transcript_dst = output_dir / "transcript.json"
34
+ if transcript_src.exists() and not transcript_dst.exists():
35
+ shutil.copy2(transcript_src, transcript_dst)
36
+ print(f" Copied transcript.json from capture")
37
+
38
+ # Convert audio to mp3 if it exists (ffmpeg required)
39
+ audio_dst = output_dir / "audio.mp3"
40
+ if not audio_dst.exists():
41
+ # Try common audio formats
42
+ for audio_ext in [".flac", ".wav", ".m4a", ".aac", ".ogg"]:
43
+ audio_src = capture_path / f"audio{audio_ext}"
44
+ if audio_src.exists():
45
+ try:
46
+ result = subprocess.run(
47
+ ["ffmpeg", "-i", str(audio_src), "-y", "-q:a", "2", str(audio_dst)],
48
+ capture_output=True,
49
+ timeout=60,
50
+ )
51
+ if result.returncode == 0:
52
+ print(f" Converted {audio_src.name} to audio.mp3")
53
+ else:
54
+ print(f" Warning: ffmpeg conversion failed for {audio_src.name}")
55
+ except FileNotFoundError:
56
+ print(" Warning: ffmpeg not found, cannot convert audio")
57
+ except subprocess.TimeoutExpired:
58
+ print(" Warning: ffmpeg timed out")
59
+ break
60
+
61
+
62
+ def generate_unified_viewer_from_output_dir(output_dir: Path) -> Path | None:
63
+ """Generate the unified viewer.html from existing data in output_dir.
64
+
65
+ Collects predictions from any comparison_epoch*.html or comparison_*.html files
66
+ and consolidates them into a single viewer with checkpoint dropdown.
67
+
68
+ If the original capture is not available locally, extracts all data from
69
+ existing comparison HTML files.
70
+ """
71
+ import re
72
+
73
+ output_dir = Path(output_dir)
74
+
75
+ # Try to load training log to get capture path and goal
76
+ training_log_path = output_dir / "training_log.json"
77
+ capture_path = None
78
+ goal = "Complete the recorded workflow" # Fallback default
79
+ capture_id = "unknown"
80
+
81
+ evaluations: list[dict] = []
82
+
83
+ if training_log_path.exists():
84
+ with open(training_log_path) as f:
85
+ log_data = json.load(f)
86
+
87
+ # Load goal from training log (CRITICAL: must match training prompt)
88
+ goal = log_data.get("goal", "")
89
+ if not goal:
90
+ # Fallback: derive from capture path name
91
+ capture_path_str = log_data.get("capture_path", "")
92
+ if capture_path_str:
93
+ # Convert kebab-case/snake_case to readable text
94
+ dir_name = Path(capture_path_str).name
95
+ goal = dir_name.replace("-", " ").replace("_", " ").strip().capitalize()
96
+ if not goal:
97
+ goal = "Complete the recorded workflow"
98
+
99
+ capture_path_str = log_data.get("capture_path", "")
100
+ if capture_path_str:
101
+ capture_path = Path(capture_path_str)
102
+ capture_id = capture_path.name
103
+ if not capture_path.exists():
104
+ print(f"Capture path not found locally: {capture_path}")
105
+ capture_path = None # Will extract from HTML files
106
+
107
+ # Load evaluations
108
+ evaluations = log_data.get("evaluations", [])
109
+ if evaluations:
110
+ print(f" Loaded {len(evaluations)} evaluations")
111
+
112
+ # Collect predictions and base data from JSON files or HTML files
113
+ predictions_by_checkpoint: dict[str, list[dict]] = {"None": []}
114
+ base_data: list[dict] | None = None
115
+
116
+ # First, try to load from JSON files (preferred)
117
+ for json_file in sorted(output_dir.glob("predictions_*.json")):
118
+ try:
119
+ with open(json_file) as f:
120
+ data = json.load(f)
121
+
122
+ # Determine checkpoint name from filename
123
+ name_match = re.search(r'predictions_(.+)\.json', json_file.name)
124
+ if name_match:
125
+ raw_name = name_match.group(1)
126
+ if raw_name.startswith('epoch'):
127
+ checkpoint_name = f"Epoch {raw_name[5:]}"
128
+ elif raw_name == 'preview':
129
+ checkpoint_name = "Preview"
130
+ else:
131
+ checkpoint_name = raw_name.title()
132
+ else:
133
+ checkpoint_name = json_file.stem
134
+
135
+ # Extract base data from first file
136
+ if base_data is None and 'base_data' in data:
137
+ base_data = data['base_data']
138
+
139
+ # Store predictions
140
+ if 'predictions' in data:
141
+ predictions_by_checkpoint[checkpoint_name] = data['predictions']
142
+ print(f" Loaded predictions from {json_file.name}")
143
+ except Exception as e:
144
+ print(f" Warning: Could not load {json_file.name}: {e}")
145
+
146
+ # Fallback: look for comparison_epoch*.html files and extract their data
147
+ for comp_file in sorted(output_dir.glob("comparison_epoch*.html")):
148
+ match = re.search(r'epoch(\d+)', comp_file.name)
149
+ if not match:
150
+ continue
151
+
152
+ epoch_num = match.group(1)
153
+ checkpoint_name = f"Epoch {epoch_num}"
154
+
155
+ # Extract comparisonData from the HTML
156
+ try:
157
+ html_content = comp_file.read_text()
158
+ # Look for comparisonData = [...]; (supports both const and window. prefix)
159
+ data_match = re.search(
160
+ r'(?:const\s+|window\.)comparisonData\s*=\s*(\[.*?\]);',
161
+ html_content,
162
+ re.DOTALL
163
+ )
164
+ if data_match:
165
+ comparison_data = json.loads(data_match.group(1))
166
+
167
+ # Extract base data from the first file we find
168
+ if base_data is None:
169
+ base_data = []
170
+ for item in comparison_data:
171
+ base_data.append({
172
+ "index": item.get("index", 0),
173
+ "time": item.get("time", 0),
174
+ "image_path": item.get("image_path", ""),
175
+ "human_action": item.get("human_action", {}),
176
+ })
177
+
178
+ # Extract predictions
179
+ predictions = []
180
+ for item in comparison_data:
181
+ predictions.append({
182
+ "predicted_action": item.get("predicted_action"),
183
+ "match": item.get("match"),
184
+ })
185
+ predictions_by_checkpoint[checkpoint_name] = predictions
186
+ print(f" Loaded predictions from {comp_file.name}")
187
+ except Exception as e:
188
+ print(f" Warning: Could not extract data from {comp_file.name}: {e}")
189
+
190
+ # Also check comparison_preview.html
191
+ preview_file = output_dir / "comparison_preview.html"
192
+ if preview_file.exists():
193
+ try:
194
+ html_content = preview_file.read_text()
195
+ data_match = re.search(
196
+ r'(?:const\s+|window\.)comparisonData\s*=\s*(\[.*?\]);',
197
+ html_content,
198
+ re.DOTALL
199
+ )
200
+ if data_match:
201
+ comparison_data = json.loads(data_match.group(1))
202
+
203
+ # Extract base data if we haven't yet
204
+ if base_data is None:
205
+ base_data = []
206
+ for item in comparison_data:
207
+ base_data.append({
208
+ "index": item.get("index", 0),
209
+ "time": item.get("time", 0),
210
+ "image_path": item.get("image_path", ""),
211
+ "human_action": item.get("human_action", {}),
212
+ })
213
+
214
+ predictions = []
215
+ for item in comparison_data:
216
+ predictions.append({
217
+ "predicted_action": item.get("predicted_action"),
218
+ "match": item.get("match"),
219
+ })
220
+ # Only add if it has actual predictions
221
+ has_predictions = any(p.get("predicted_action") for p in predictions)
222
+ if has_predictions and "Preview" not in predictions_by_checkpoint:
223
+ predictions_by_checkpoint["Preview"] = predictions
224
+ print(f" Loaded predictions from comparison_preview.html")
225
+ except Exception as e:
226
+ print(f" Warning: Could not extract data from comparison_preview.html: {e}")
227
+
228
+ # If we still don't have base data, we can't generate the viewer
229
+ if base_data is None:
230
+ print("No comparison data found, cannot generate unified viewer")
231
+ return None
232
+
233
+ # Copy transcript and audio files from capture if available
234
+ _copy_transcript_and_audio(capture_path, output_dir)
235
+
236
+ # Get capture modification time if available
237
+ capture_modified_time = None
238
+ if capture_path and capture_path.exists():
239
+ import datetime
240
+ mtime = capture_path.stat().st_mtime
241
+ capture_modified_time = datetime.datetime.fromtimestamp(mtime).isoformat()
242
+
243
+ # Generate the unified viewer using standalone HTML template
244
+ # (Consolidated approach - always use standalone for reliability)
245
+ viewer_path = output_dir / "viewer.html"
246
+
247
+ _generate_unified_viewer_from_extracted_data(
248
+ base_data=base_data,
249
+ predictions_by_checkpoint=predictions_by_checkpoint,
250
+ output_path=viewer_path,
251
+ capture_id=capture_id,
252
+ goal=goal,
253
+ evaluations=evaluations,
254
+ capture_modified_time=capture_modified_time,
255
+ )
256
+
257
+ return viewer_path
258
+
259
+
260
+ def _generate_unified_viewer_from_extracted_data(
261
+ base_data: list[dict],
262
+ predictions_by_checkpoint: dict[str, list[dict]],
263
+ output_path: Path,
264
+ capture_id: str = "unknown",
265
+ goal: str = "Untitled",
266
+ evaluations: list[dict] | None = None,
267
+ capture_modified_time: str | None = None,
268
+ ) -> None:
269
+ """Generate unified viewer from extracted comparison data.
270
+
271
+ This is used when the original capture isn't available locally
272
+ but we have comparison HTML files to extract from.
273
+ """
274
+ # Get shared header components for consistent nav
275
+ shared_header_css = _get_shared_header_css()
276
+ shared_header_html = _generate_shared_header_html("viewer")
277
+
278
+ # Build base HTML from extracted data (standalone, no openadapt-capture dependency)
279
+ base_data_json = json.dumps(base_data)
280
+ predictions_json = json.dumps(predictions_by_checkpoint)
281
+ evaluations_json = json.dumps(evaluations or [])
282
+ captures_json = json.dumps([{
283
+ "id": capture_id,
284
+ "name": goal,
285
+ "steps": len(base_data),
286
+ }])
287
+ current_capture_json = json.dumps(capture_id)
288
+ capture_modified_time_json = json.dumps(capture_modified_time)
289
+
290
+ # Find first image to get dimensions (for display)
291
+ first_image_path = base_data[0].get("image_path", "") if base_data else ""
292
+
293
+ html = f'''<!DOCTYPE html>
294
+ <html lang="en">
295
+ <head>
296
+ <meta charset="UTF-8">
297
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
298
+ <title>Unified Viewer - {capture_id}</title>
299
+ <style>
300
+ :root {{
301
+ --bg-primary: #0a0a0f;
302
+ --bg-secondary: #12121a;
303
+ --bg-tertiary: #1a1a24;
304
+ --border-color: rgba(255, 255, 255, 0.06);
305
+ --text-primary: #f0f0f0;
306
+ --text-secondary: #888;
307
+ --text-muted: #555;
308
+ --accent: #00d4aa;
309
+ --accent-dim: rgba(0, 212, 170, 0.15);
310
+ }}
311
+ * {{ box-sizing: border-box; margin: 0; padding: 0; }}
312
+ body {{
313
+ font-family: "SF Pro Display", -apple-system, BlinkMacSystemFont, "Inter", sans-serif;
314
+ background: var(--bg-primary);
315
+ color: var(--text-primary);
316
+ min-height: 100vh;
317
+ line-height: 1.5;
318
+ }}
319
+ .container {{
320
+ max-width: 1440px;
321
+ margin: 0 auto;
322
+ padding: 24px;
323
+ }}
324
+ {shared_header_css}
325
+ .nav-bar {{
326
+ display: flex;
327
+ gap: 8px;
328
+ padding: 12px 16px;
329
+ background: var(--bg-secondary);
330
+ border: 1px solid var(--border-color);
331
+ border-radius: 8px;
332
+ margin-bottom: 16px;
333
+ flex-wrap: wrap;
334
+ align-items: center;
335
+ }}
336
+ .nav-link {{
337
+ padding: 8px 16px;
338
+ border-radius: 6px;
339
+ font-size: 0.8rem;
340
+ text-decoration: none;
341
+ color: var(--text-secondary);
342
+ background: var(--bg-tertiary);
343
+ border: 1px solid var(--border-color);
344
+ transition: all 0.2s;
345
+ }}
346
+ .nav-link:hover {{ border-color: var(--accent); color: var(--text-primary); }}
347
+ .nav-link.active {{
348
+ background: var(--accent);
349
+ color: var(--bg-primary);
350
+ border-color: var(--accent);
351
+ font-weight: 600;
352
+ }}
353
+ .nav-label {{
354
+ font-size: 0.75rem;
355
+ color: var(--text-secondary);
356
+ margin-right: 8px;
357
+ }}
358
+ .viewer-controls {{
359
+ display: flex;
360
+ gap: 16px;
361
+ padding: 12px 16px;
362
+ background: var(--bg-secondary);
363
+ border: 1px solid var(--border-color);
364
+ border-radius: 8px;
365
+ margin-bottom: 16px;
366
+ flex-wrap: wrap;
367
+ align-items: center;
368
+ }}
369
+ .control-group {{
370
+ display: flex;
371
+ align-items: center;
372
+ gap: 8px;
373
+ }}
374
+ .control-label {{
375
+ font-size: 0.75rem;
376
+ color: var(--text-secondary);
377
+ text-transform: uppercase;
378
+ letter-spacing: 0.05em;
379
+ }}
380
+ .control-select {{
381
+ padding: 10px 14px;
382
+ border-radius: 8px;
383
+ font-size: 0.85rem;
384
+ background: var(--bg-tertiary);
385
+ color: var(--text-primary);
386
+ border: 1px solid var(--border-color);
387
+ cursor: pointer;
388
+ min-width: 200px;
389
+ appearance: none;
390
+ background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 12 12'%3E%3Cpath fill='%23888' d='M3 4.5L6 7.5L9 4.5'/%3E%3C/svg%3E");
391
+ background-repeat: no-repeat;
392
+ background-position: right 12px center;
393
+ padding-right: 32px;
394
+ transition: all 0.2s;
395
+ }}
396
+ .control-select:hover {{ border-color: var(--accent); background-color: var(--bg-secondary); }}
397
+ .control-select:focus {{ outline: none; border-color: var(--accent); box-shadow: 0 0 0 2px var(--accent-dim); }}
398
+ .control-hint {{ font-size: 0.7rem; color: var(--text-muted); }}
399
+ .comparison-panel {{
400
+ background: var(--bg-secondary);
401
+ border: 1px solid var(--border-color);
402
+ border-radius: 12px;
403
+ margin-bottom: 16px;
404
+ }}
405
+ .comparison-header {{
406
+ display: flex;
407
+ align-items: center;
408
+ gap: 16px;
409
+ padding: 12px 18px;
410
+ border-bottom: 1px solid var(--border-color);
411
+ flex-wrap: wrap;
412
+ }}
413
+ .comparison-panel h2 {{ font-size: 0.9rem; font-weight: 600; margin: 0; }}
414
+ .comparison-content {{
415
+ padding: 14px 18px;
416
+ display: grid;
417
+ grid-template-columns: 1fr 1fr auto;
418
+ gap: 16px;
419
+ align-items: start;
420
+ }}
421
+ .action-box {{ padding: 12px; border-radius: 8px; }}
422
+ .action-box.human {{
423
+ background: rgba(0, 212, 170, 0.1);
424
+ border: 1px solid rgba(0, 212, 170, 0.3);
425
+ }}
426
+ .action-box.predicted {{
427
+ background: rgba(167, 139, 250, 0.1);
428
+ border: 1px solid rgba(167, 139, 250, 0.3);
429
+ }}
430
+ .action-box.predicted.disabled {{ opacity: 0.5; }}
431
+ .action-label {{
432
+ font-size: 0.75rem;
433
+ text-transform: uppercase;
434
+ letter-spacing: 0.05em;
435
+ color: var(--text-muted);
436
+ margin-bottom: 6px;
437
+ }}
438
+ .action-details {{ font-family: "SF Mono", Monaco, monospace; font-size: 0.85rem; }}
439
+ .match-indicator {{
440
+ text-align: center;
441
+ padding: 8px;
442
+ border-radius: 6px;
443
+ font-weight: 600;
444
+ min-width: 80px;
445
+ }}
446
+ .match-indicator.match {{ background: rgba(52, 211, 153, 0.2); color: #34d399; }}
447
+ .match-indicator.mismatch {{ background: rgba(255, 95, 95, 0.2); color: #ff5f5f; }}
448
+ .match-indicator.pending {{ background: var(--bg-tertiary); color: var(--text-muted); }}
449
+ .metrics-summary {{
450
+ display: flex;
451
+ gap: 16px;
452
+ padding: 6px 12px;
453
+ background: var(--bg-tertiary);
454
+ border-radius: 6px;
455
+ }}
456
+ .metric-item {{ display: flex; align-items: center; gap: 6px; }}
457
+ .metric-value {{ font-size: 0.9rem; font-weight: 600; color: var(--accent); }}
458
+ .metric-label {{ font-size: 0.7rem; color: var(--text-muted); text-transform: uppercase; }}
459
+ .overlay-toggles {{ display: flex; gap: 6px; margin-left: auto; }}
460
+ .toggle-btn {{
461
+ padding: 6px 12px;
462
+ border: 1px solid var(--border-color);
463
+ background: var(--bg-tertiary);
464
+ color: var(--text-primary);
465
+ border-radius: 6px;
466
+ cursor: pointer;
467
+ font-size: 0.75rem;
468
+ }}
469
+ .toggle-btn.active {{ background: var(--accent); color: var(--bg-primary); border-color: var(--accent); }}
470
+ .main-content {{ display: grid; grid-template-columns: 1fr 340px; gap: 24px; }}
471
+ .viewer-section {{
472
+ background: var(--bg-secondary);
473
+ border: 1px solid var(--border-color);
474
+ border-radius: 12px;
475
+ overflow: hidden;
476
+ }}
477
+ .frame-container {{
478
+ position: relative;
479
+ background: #000;
480
+ display: flex;
481
+ align-items: center;
482
+ justify-content: center;
483
+ min-height: 420px;
484
+ }}
485
+ .frame-container img {{ max-width: 100%; max-height: 70vh; object-fit: contain; }}
486
+ .click-marker {{
487
+ position: absolute;
488
+ width: 30px;
489
+ height: 30px;
490
+ border-radius: 50%;
491
+ transform: translate(-50%, -50%);
492
+ display: flex;
493
+ align-items: center;
494
+ justify-content: center;
495
+ font-size: 12px;
496
+ font-weight: bold;
497
+ pointer-events: none;
498
+ z-index: 100;
499
+ }}
500
+ .click-marker.human {{
501
+ background: rgba(0, 212, 170, 0.3);
502
+ border: 3px solid #00d4aa;
503
+ color: #00d4aa;
504
+ }}
505
+ .click-marker.predicted {{
506
+ background: rgba(167, 139, 250, 0.3);
507
+ border: 3px solid #a78bfa;
508
+ color: #a78bfa;
509
+ }}
510
+ .click-marker.human::after {{ content: 'H'; }}
511
+ .click-marker.predicted::after {{ content: 'AI'; font-size: 10px; }}
512
+ .sidebar {{
513
+ display: flex;
514
+ flex-direction: column;
515
+ gap: 16px;
516
+ }}
517
+ .step-list {{
518
+ background: var(--bg-secondary);
519
+ border: 1px solid var(--border-color);
520
+ border-radius: 12px;
521
+ max-height: 500px;
522
+ overflow-y: auto;
523
+ }}
524
+ .step-item {{
525
+ padding: 12px 16px;
526
+ border-bottom: 1px solid var(--border-color);
527
+ cursor: pointer;
528
+ transition: background 0.2s;
529
+ }}
530
+ .step-item:hover {{ background: var(--bg-tertiary); }}
531
+ .step-item.active {{ background: var(--accent-dim); border-left: 3px solid var(--accent); }}
532
+ .step-index {{ font-weight: 600; color: var(--accent); }}
533
+ .step-action {{ font-size: 0.85rem; color: var(--text-secondary); }}
534
+ .eval-badges {{
535
+ display: flex;
536
+ gap: 4px;
537
+ margin-top: 4px;
538
+ }}
539
+ .eval-badge {{
540
+ display: inline-flex;
541
+ align-items: center;
542
+ gap: 2px;
543
+ padding: 2px 6px;
544
+ border-radius: 4px;
545
+ font-size: 0.65rem;
546
+ font-weight: 600;
547
+ }}
548
+ .eval-badge.correct {{
549
+ background: rgba(52, 211, 153, 0.2);
550
+ color: #34d399;
551
+ }}
552
+ .eval-badge.incorrect {{
553
+ background: rgba(255, 95, 95, 0.2);
554
+ color: #ff5f5f;
555
+ }}
556
+ .eval-badge .epoch {{
557
+ opacity: 0.7;
558
+ }}
559
+ .playback-controls {{
560
+ display: flex;
561
+ gap: 8px;
562
+ padding: 12px;
563
+ background: var(--bg-secondary);
564
+ border: 1px solid var(--border-color);
565
+ border-radius: 8px;
566
+ flex-wrap: wrap;
567
+ align-items: center;
568
+ }}
569
+ .playback-btn {{
570
+ padding: 8px 12px;
571
+ border: 1px solid var(--border-color);
572
+ background: var(--bg-tertiary);
573
+ color: var(--text-primary);
574
+ border-radius: 6px;
575
+ cursor: pointer;
576
+ font-size: 0.85rem;
577
+ min-width: 40px;
578
+ text-align: center;
579
+ }}
580
+ .playback-btn:hover {{ border-color: var(--accent); }}
581
+ .playback-btn.active {{ background: var(--accent); color: var(--bg-primary); border-color: var(--accent); }}
582
+ .playback-btn.primary {{ flex: 1; min-width: 60px; }}
583
+ .speed-control {{
584
+ display: flex;
585
+ align-items: center;
586
+ gap: 6px;
587
+ margin-left: auto;
588
+ }}
589
+ .speed-control label {{
590
+ font-size: 0.7rem;
591
+ color: var(--text-muted);
592
+ text-transform: uppercase;
593
+ }}
594
+ .speed-control select {{
595
+ padding: 4px 8px;
596
+ border-radius: 4px;
597
+ background: var(--bg-tertiary);
598
+ color: var(--text-primary);
599
+ border: 1px solid var(--border-color);
600
+ font-size: 0.8rem;
601
+ cursor: pointer;
602
+ }}
603
+ .progress-bar {{
604
+ width: 100%;
605
+ height: 4px;
606
+ background: var(--bg-tertiary);
607
+ border-radius: 2px;
608
+ margin-top: 8px;
609
+ overflow: hidden;
610
+ cursor: pointer;
611
+ }}
612
+ .progress-bar .progress {{
613
+ height: 100%;
614
+ background: var(--accent);
615
+ transition: width 0.1s ease;
616
+ }}
617
+ .timeline {{
618
+ width: 100%;
619
+ height: 32px;
620
+ background: var(--bg-tertiary);
621
+ border-radius: 6px;
622
+ margin-top: 12px;
623
+ position: relative;
624
+ cursor: pointer;
625
+ overflow: hidden;
626
+ }}
627
+ .timeline-segments {{
628
+ position: absolute;
629
+ top: 0;
630
+ left: 0;
631
+ right: 0;
632
+ height: 16px;
633
+ display: flex;
634
+ }}
635
+ .timeline-segment {{
636
+ height: 100%;
637
+ background: rgba(0, 212, 170, 0.2);
638
+ border-right: 1px solid var(--bg-secondary);
639
+ transition: background 0.15s;
640
+ position: relative;
641
+ }}
642
+ .timeline-segment:hover {{
643
+ background: rgba(0, 212, 170, 0.4);
644
+ }}
645
+ .timeline-segment.active {{
646
+ background: rgba(0, 212, 170, 0.5);
647
+ }}
648
+ .timeline-segment-tooltip {{
649
+ position: absolute;
650
+ bottom: 100%;
651
+ left: 50%;
652
+ transform: translateX(-50%);
653
+ background: var(--bg-primary);
654
+ border: 1px solid var(--border-color);
655
+ padding: 4px 8px;
656
+ border-radius: 4px;
657
+ font-size: 0.7rem;
658
+ white-space: nowrap;
659
+ opacity: 0;
660
+ pointer-events: none;
661
+ transition: opacity 0.15s;
662
+ z-index: 100;
663
+ }}
664
+ .timeline-segment:hover .timeline-segment-tooltip {{
665
+ opacity: 1;
666
+ }}
667
+ .timeline-markers {{
668
+ position: absolute;
669
+ bottom: 0;
670
+ left: 0;
671
+ right: 0;
672
+ height: 16px;
673
+ }}
674
+ .timeline-marker {{
675
+ position: absolute;
676
+ bottom: 2px;
677
+ width: 8px;
678
+ height: 8px;
679
+ border-radius: 50%;
680
+ transform: translateX(-50%);
681
+ border: 2px solid var(--bg-primary);
682
+ cursor: pointer;
683
+ transition: transform 0.1s;
684
+ }}
685
+ .timeline-marker:hover {{
686
+ transform: translateX(-50%) scale(1.3);
687
+ }}
688
+ .timeline-marker.click {{ background: #ff5f5f; }}
689
+ .timeline-marker.double_click {{ background: #ff5f5f; }}
690
+ .timeline-marker.type {{ background: #34d399; }}
691
+ .timeline-marker.scroll {{ background: #a78bfa; }}
692
+ .timeline-marker.drag {{ background: #00d4aa; }}
693
+ .timeline-marker.done {{ background: #888; }}
694
+ .timeline-marker.active {{
695
+ box-shadow: 0 0 8px currentColor;
696
+ }}
697
+ .timeline-playhead {{
698
+ position: absolute;
699
+ top: 0;
700
+ bottom: 0;
701
+ width: 2px;
702
+ background: var(--accent);
703
+ transform: translateX(-50%);
704
+ pointer-events: none;
705
+ z-index: 10;
706
+ }}
707
+ .timeline-playhead::after {{
708
+ content: '';
709
+ position: absolute;
710
+ top: -4px;
711
+ left: 50%;
712
+ transform: translateX(-50%);
713
+ width: 0;
714
+ height: 0;
715
+ border-left: 5px solid transparent;
716
+ border-right: 5px solid transparent;
717
+ border-top: 6px solid var(--accent);
718
+ }}
719
+ .details-panel {{
720
+ background: var(--bg-secondary);
721
+ border: 1px solid var(--border-color);
722
+ border-radius: 12px;
723
+ margin-top: 16px;
724
+ }}
725
+ .details-header {{
726
+ display: flex;
727
+ justify-content: space-between;
728
+ align-items: center;
729
+ padding: 12px 16px;
730
+ border-bottom: 1px solid var(--border-color);
731
+ }}
732
+ .details-content {{
733
+ padding: 12px 16px;
734
+ font-size: 0.82rem;
735
+ max-height: 400px;
736
+ overflow-y: auto;
737
+ }}
738
+ .detail-row {{
739
+ display: flex;
740
+ margin-bottom: 6px;
741
+ }}
742
+ .detail-key {{
743
+ color: var(--text-muted);
744
+ min-width: 70px;
745
+ font-size: 0.75rem;
746
+ text-transform: uppercase;
747
+ }}
748
+ .detail-value {{
749
+ font-family: "SF Mono", Monaco, monospace;
750
+ color: var(--text-secondary);
751
+ }}
752
+ .copy-btn {{
753
+ background: var(--bg-tertiary);
754
+ border: 1px solid var(--border-color);
755
+ color: var(--text-secondary);
756
+ padding: 4px 10px;
757
+ border-radius: 6px;
758
+ cursor: pointer;
759
+ font-size: 0.7rem;
760
+ text-transform: uppercase;
761
+ }}
762
+ .copy-btn:hover {{ background: var(--bg-secondary); color: var(--text-primary); }}
763
+ .copy-btn.copied {{ background: var(--accent-dim); color: var(--accent); border-color: var(--accent); }}
764
+ .cost-panel {{
765
+ background: linear-gradient(135deg, rgba(239, 68, 68, 0.1), rgba(220, 38, 38, 0.05));
766
+ border: 1px solid rgba(239, 68, 68, 0.3);
767
+ border-radius: 8px;
768
+ padding: 12px 16px;
769
+ margin-bottom: 16px;
770
+ display: none;
771
+ }}
772
+ .cost-panel.visible {{ display: flex; }}
773
+ .cost-panel .cost-items {{
774
+ display: flex;
775
+ gap: 24px;
776
+ align-items: center;
777
+ flex: 1;
778
+ }}
779
+ .cost-panel .cost-item {{
780
+ display: flex;
781
+ flex-direction: column;
782
+ gap: 2px;
783
+ }}
784
+ .cost-panel .cost-label {{
785
+ font-size: 0.7rem;
786
+ color: var(--text-secondary);
787
+ text-transform: uppercase;
788
+ letter-spacing: 0.05em;
789
+ }}
790
+ .cost-panel .cost-value {{
791
+ font-size: 1.1rem;
792
+ font-weight: 600;
793
+ color: #ef4444;
794
+ font-family: "SF Mono", Monaco, monospace;
795
+ }}
796
+ .cost-panel .cost-info {{
797
+ font-size: 0.75rem;
798
+ color: var(--text-muted);
799
+ margin-left: auto;
800
+ }}
801
+ .timestamp-info-panel {{
802
+ background: var(--bg-secondary);
803
+ border: 1px solid var(--border-color);
804
+ border-radius: 8px;
805
+ padding: 10px 16px;
806
+ margin-bottom: 16px;
807
+ display: flex;
808
+ gap: 24px;
809
+ align-items: center;
810
+ flex-wrap: wrap;
811
+ }}
812
+ .timestamp-item {{
813
+ display: flex;
814
+ flex-direction: column;
815
+ gap: 2px;
816
+ }}
817
+ .timestamp-label {{
818
+ font-size: 0.7rem;
819
+ color: var(--text-muted);
820
+ text-transform: uppercase;
821
+ letter-spacing: 0.05em;
822
+ }}
823
+ .timestamp-value {{
824
+ font-size: 0.85rem;
825
+ color: var(--text-secondary);
826
+ font-family: "SF Mono", Monaco, monospace;
827
+ }}
828
+ .transcript-panel {{
829
+ background: var(--bg-secondary);
830
+ border: 1px solid var(--border-color);
831
+ border-radius: 12px;
832
+ }}
833
+ .transcript-header {{
834
+ display: flex;
835
+ justify-content: space-between;
836
+ align-items: center;
837
+ padding: 14px 18px;
838
+ border-bottom: 1px solid var(--border-color);
839
+ }}
840
+ .transcript-panel h2 {{
841
+ font-size: 0.9rem;
842
+ font-weight: 600;
843
+ margin: 0;
844
+ }}
845
+ .transcript-follow-btn {{
846
+ background: var(--bg-tertiary);
847
+ border: 1px solid var(--border-color);
848
+ color: var(--text-muted);
849
+ padding: 4px 10px;
850
+ border-radius: 6px;
851
+ cursor: pointer;
852
+ font-size: 0.7rem;
853
+ transition: all 0.2s;
854
+ }}
855
+ .transcript-follow-btn:hover {{
856
+ border-color: var(--accent);
857
+ color: var(--text-secondary);
858
+ }}
859
+ .transcript-follow-btn.active {{
860
+ background: var(--accent-dim);
861
+ border-color: var(--accent);
862
+ color: var(--accent);
863
+ }}
864
+ .transcript-content {{
865
+ padding: 14px 18px;
866
+ font-size: 0.85rem;
867
+ line-height: 1.9;
868
+ color: var(--text-secondary);
869
+ max-height: 150px;
870
+ overflow-y: auto;
871
+ }}
872
+ .transcript-segment {{
873
+ display: inline;
874
+ cursor: pointer;
875
+ padding: 2px 6px;
876
+ border-radius: 4px;
877
+ transition: all 0.15s ease;
878
+ }}
879
+ .transcript-segment:hover {{
880
+ background: var(--bg-tertiary);
881
+ color: var(--text-primary);
882
+ }}
883
+ .transcript-segment.active {{
884
+ background: var(--accent-dim);
885
+ color: var(--accent);
886
+ }}
887
+ .transcript-time {{
888
+ color: var(--text-muted);
889
+ font-size: 0.7rem;
890
+ font-family: "SF Mono", Monaco, monospace;
891
+ margin-right: 4px;
892
+ }}
893
+ .transcript-empty {{
894
+ color: var(--text-muted);
895
+ font-style: italic;
896
+ text-align: center;
897
+ padding: 16px;
898
+ }}
899
+ .step-list-header {{
900
+ display: flex;
901
+ justify-content: space-between;
902
+ align-items: center;
903
+ padding: 12px 16px;
904
+ border-bottom: 1px solid var(--border-color);
905
+ }}
906
+ .step-list-header h3 {{
907
+ font-size: 0.85rem;
908
+ font-weight: 600;
909
+ margin: 0;
910
+ }}
911
+ .copy-all-btn {{
912
+ background: var(--bg-tertiary);
913
+ border: 1px solid var(--border-color);
914
+ color: var(--text-secondary);
915
+ padding: 4px 10px;
916
+ border-radius: 6px;
917
+ cursor: pointer;
918
+ font-size: 0.7rem;
919
+ text-transform: uppercase;
920
+ }}
921
+ .copy-all-btn:hover {{ background: var(--bg-secondary); color: var(--text-primary); }}
922
+ .copy-all-btn.copied {{ background: var(--accent-dim); color: var(--accent); border-color: var(--accent); }}
923
+
924
+ /* Gallery Panel (Compact - in sidebar) */
925
+ .gallery-panel {{
926
+ background: var(--bg-secondary);
927
+ border: 1px solid var(--border-color);
928
+ border-radius: 12px;
929
+ margin-top: 16px;
930
+ }}
931
+ .gallery-panel-header {{
932
+ display: flex;
933
+ justify-content: space-between;
934
+ align-items: center;
935
+ padding: 12px 16px;
936
+ border-bottom: 1px solid var(--border-color);
937
+ cursor: pointer;
938
+ }}
939
+ .gallery-panel-controls {{
940
+ display: flex;
941
+ gap: 8px;
942
+ }}
943
+ .gallery-maximize-btn, .gallery-collapse-btn, .gallery-close-btn {{
944
+ background: var(--bg-tertiary);
945
+ border: 1px solid var(--border-color);
946
+ color: var(--text-secondary);
947
+ padding: 4px 8px;
948
+ border-radius: 4px;
949
+ cursor: pointer;
950
+ font-size: 0.75rem;
951
+ }}
952
+ .gallery-maximize-btn:hover, .gallery-collapse-btn:hover, .gallery-close-btn:hover {{
953
+ background: var(--bg-secondary);
954
+ color: var(--text-primary);
955
+ }}
956
+ .gallery-panel-content {{
957
+ padding: 12px;
958
+ max-height: 300px;
959
+ overflow-y: auto;
960
+ }}
961
+ .gallery-panel.collapsed .gallery-panel-content {{
962
+ display: none;
963
+ }}
964
+ .gallery-panel.collapsed .gallery-collapse-btn {{
965
+ transform: rotate(-90deg);
966
+ }}
967
+ .gallery-filters-compact {{
968
+ display: flex;
969
+ gap: 8px;
970
+ align-items: center;
971
+ margin-bottom: 12px;
972
+ flex-wrap: wrap;
973
+ }}
974
+ .gallery-filter-select {{
975
+ padding: 4px 8px;
976
+ border-radius: 4px;
977
+ background: var(--bg-tertiary);
978
+ color: var(--text-primary);
979
+ border: 1px solid var(--border-color);
980
+ font-size: 0.75rem;
981
+ cursor: pointer;
982
+ }}
983
+ .gallery-grid-compact {{
984
+ display: grid;
985
+ grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
986
+ gap: 12px;
987
+ }}
988
+ .gallery-card {{
989
+ background: var(--bg-tertiary);
990
+ border-radius: 6px;
991
+ overflow: hidden;
992
+ border: 1px solid var(--border-color);
993
+ cursor: pointer;
994
+ transition: border-color 0.2s;
995
+ }}
996
+ .gallery-card:hover {{
997
+ border-color: var(--accent);
998
+ }}
999
+ .gallery-card.hidden {{
1000
+ display: none;
1001
+ }}
1002
+ .gallery-card .image-wrapper {{
1003
+ position: relative;
1004
+ background: #000;
1005
+ min-height: 80px;
1006
+ display: flex;
1007
+ align-items: center;
1008
+ justify-content: center;
1009
+ }}
1010
+ .gallery-card img {{
1011
+ width: 100%;
1012
+ height: auto;
1013
+ max-height: 120px;
1014
+ object-fit: contain;
1015
+ }}
1016
+ .gallery-card .overlay {{
1017
+ position: absolute;
1018
+ top: 0;
1019
+ left: 0;
1020
+ right: 0;
1021
+ bottom: 0;
1022
+ pointer-events: none;
1023
+ }}
1024
+ .gallery-card .marker {{
1025
+ position: absolute;
1026
+ width: 16px;
1027
+ height: 16px;
1028
+ border-radius: 50%;
1029
+ transform: translate(-50%, -50%);
1030
+ border: 2px solid white;
1031
+ display: flex;
1032
+ align-items: center;
1033
+ justify-content: center;
1034
+ font-size: 7px;
1035
+ font-weight: 700;
1036
+ z-index: 10;
1037
+ }}
1038
+ .gallery-card .marker.human {{
1039
+ background: rgba(0, 212, 170, 0.5);
1040
+ border-color: #00d4aa;
1041
+ color: #00d4aa;
1042
+ }}
1043
+ .gallery-card .marker.predicted {{
1044
+ background: rgba(167, 139, 250, 0.5);
1045
+ border-color: #a78bfa;
1046
+ color: #a78bfa;
1047
+ }}
1048
+ .gallery-card .card-content {{
1049
+ padding: 8px;
1050
+ }}
1051
+ .gallery-card .card-header {{
1052
+ display: flex;
1053
+ justify-content: space-between;
1054
+ align-items: center;
1055
+ font-size: 0.7rem;
1056
+ }}
1057
+ .gallery-card .step-num {{
1058
+ font-weight: 600;
1059
+ color: var(--text-primary);
1060
+ }}
1061
+ .gallery-card .status {{
1062
+ font-size: 0.65rem;
1063
+ font-weight: 600;
1064
+ }}
1065
+ .gallery-card .status.correct {{
1066
+ color: #34d399;
1067
+ }}
1068
+ .gallery-card .status.incorrect {{
1069
+ color: #ff5f5f;
1070
+ }}
1071
+ .gallery-empty {{
1072
+ text-align: center;
1073
+ padding: 20px;
1074
+ color: var(--text-muted);
1075
+ font-size: 0.8rem;
1076
+ }}
1077
+
1078
+ /* Gallery Maximized Overlay */
1079
+ .gallery-maximized-overlay {{
1080
+ display: none;
1081
+ position: fixed;
1082
+ top: 0;
1083
+ left: 0;
1084
+ right: 0;
1085
+ bottom: 0;
1086
+ background: rgba(0, 0, 0, 0.9);
1087
+ z-index: 1000;
1088
+ padding: 20px;
1089
+ overflow-y: auto;
1090
+ }}
1091
+ .gallery-maximized-overlay.active {{
1092
+ display: block;
1093
+ }}
1094
+ .gallery-maximized-content {{
1095
+ max-width: 1600px;
1096
+ margin: 0 auto;
1097
+ }}
1098
+ .gallery-maximized-header {{
1099
+ display: flex;
1100
+ justify-content: space-between;
1101
+ align-items: center;
1102
+ margin-bottom: 20px;
1103
+ padding-bottom: 16px;
1104
+ border-bottom: 1px solid var(--border-color);
1105
+ }}
1106
+ .gallery-maximized-controls {{
1107
+ display: flex;
1108
+ gap: 12px;
1109
+ align-items: center;
1110
+ }}
1111
+ .gallery-grid-maximized {{
1112
+ display: grid;
1113
+ grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
1114
+ gap: 20px;
1115
+ }}
1116
+ .gallery-grid-maximized .gallery-card .image-wrapper {{
1117
+ min-height: 150px;
1118
+ }}
1119
+ .gallery-grid-maximized .gallery-card img {{
1120
+ max-height: 250px;
1121
+ }}
1122
+ .gallery-grid-maximized .gallery-card .marker {{
1123
+ width: 22px;
1124
+ height: 22px;
1125
+ font-size: 9px;
1126
+ }}
1127
+ .gallery-grid-maximized .gallery-card .card-content {{
1128
+ padding: 12px;
1129
+ }}
1130
+ .gallery-grid-maximized .gallery-card .card-header {{
1131
+ font-size: 0.85rem;
1132
+ margin-bottom: 8px;
1133
+ padding-bottom: 8px;
1134
+ border-bottom: 1px solid var(--border-color);
1135
+ }}
1136
+ .gallery-grid-maximized .gallery-card .card-details {{
1137
+ font-size: 0.75rem;
1138
+ color: var(--text-secondary);
1139
+ }}
1140
+ .gallery-grid-maximized .gallery-card .coord-row {{
1141
+ display: flex;
1142
+ justify-content: space-between;
1143
+ margin-bottom: 4px;
1144
+ }}
1145
+ .gallery-grid-maximized .gallery-card .coord-human {{
1146
+ color: #34d399;
1147
+ }}
1148
+ .gallery-grid-maximized .gallery-card .coord-pred {{
1149
+ color: #a78bfa;
1150
+ }}
1151
+ </style>
1152
+ </head>
1153
+ <body>
1154
+ {shared_header_html}
1155
+
1156
+ <div class="container">
1157
+ <div class="viewer-controls">
1158
+ <div class="control-group">
1159
+ <span class="control-label">Training Example:</span>
1160
+ <select class="control-select" id="capture-select"></select>
1161
+ <span class="control-hint" id="capture-hint"></span>
1162
+ </div>
1163
+ <div class="control-group">
1164
+ <span class="control-label">Checkpoint:</span>
1165
+ <select class="control-select" id="checkpoint-select"></select>
1166
+ </div>
1167
+ </div>
1168
+
1169
+ <div class="cost-panel" id="cost-panel">
1170
+ <div class="cost-items">
1171
+ <div class="cost-item">
1172
+ <div class="cost-label">Running Cost</div>
1173
+ <div class="cost-value" id="cost-running">$0.00</div>
1174
+ </div>
1175
+ <div class="cost-item">
1176
+ <div class="cost-label">Total Cost</div>
1177
+ <div class="cost-value" id="cost-total">$0.00</div>
1178
+ </div>
1179
+ <div class="cost-info" id="cost-info"></div>
1180
+ </div>
1181
+ </div>
1182
+
1183
+ <div class="timestamp-info-panel" id="timestamp-info-panel">
1184
+ <div class="timestamp-item">
1185
+ <div class="timestamp-label">Generated</div>
1186
+ <div class="timestamp-value" id="timestamp-generated">--</div>
1187
+ </div>
1188
+ <div class="timestamp-item">
1189
+ <div class="timestamp-label">Data From</div>
1190
+ <div class="timestamp-value" id="timestamp-data-from">--</div>
1191
+ </div>
1192
+ <div class="timestamp-item">
1193
+ <div class="timestamp-label">Capture</div>
1194
+ <div class="timestamp-value" id="timestamp-capture-path">--</div>
1195
+ </div>
1196
+ <div class="timestamp-item">
1197
+ <div class="timestamp-label">Capture Modified</div>
1198
+ <div class="timestamp-value" id="timestamp-capture-modified">--</div>
1199
+ </div>
1200
+ </div>
1201
+
1202
+ <div class="comparison-panel">
1203
+ <div class="comparison-header">
1204
+ <h2>Action Comparison</h2>
1205
+ <div class="metrics-summary" id="metrics-summary"></div>
1206
+ <div class="overlay-toggles" id="overlay-toggles"></div>
1207
+ </div>
1208
+ <div class="comparison-content">
1209
+ <div class="action-box human">
1210
+ <div class="action-label">Human Action</div>
1211
+ <div class="action-details" id="human-action"></div>
1212
+ </div>
1213
+ <div class="action-box predicted" id="predicted-box">
1214
+ <div class="action-label">Model Prediction</div>
1215
+ <div class="action-details" id="predicted-action"></div>
1216
+ </div>
1217
+ <div class="match-indicator" id="match-indicator"></div>
1218
+ </div>
1219
+ </div>
1220
+
1221
+ <div class="main-content" id="main-content">
1222
+ <div class="viewer-section">
1223
+ <div class="frame-container" id="frame-container">
1224
+ <img id="frame-image" src="" alt="Screenshot">
1225
+ <div id="image-placeholder" style="display:none;flex-direction:column;align-items:center;justify-content:center;min-height:300px;width:100%;"></div>
1226
+ </div>
1227
+ <div class="gallery-panel" id="gallery-panel">
1228
+ <div class="gallery-panel-header">
1229
+ <span style="font-size:0.9rem;font-weight:600;">Evaluation Gallery</span>
1230
+ <div class="gallery-panel-controls">
1231
+ <button class="gallery-maximize-btn" id="gallery-maximize-btn" title="Maximize gallery">⤢</button>
1232
+ <button class="gallery-collapse-btn" id="gallery-collapse-btn" title="Collapse">▼</button>
1233
+ </div>
1234
+ </div>
1235
+ <div class="gallery-panel-content" id="gallery-panel-content">
1236
+ <div class="gallery-filters-compact">
1237
+ <select class="gallery-filter-select" id="gallery-epoch-filter">
1238
+ <option value="all">All Epochs</option>
1239
+ </select>
1240
+ <select class="gallery-filter-select" id="gallery-status-filter">
1241
+ <option value="all">All</option>
1242
+ <option value="correct">Correct</option>
1243
+ <option value="incorrect">Incorrect</option>
1244
+ </select>
1245
+ <span class="control-hint" id="gallery-count">0 samples</span>
1246
+ <span class="control-hint" style="margin-left:auto;opacity:0.7;" title="S = Step (capture step index), E = Epoch (training epoch)">S=Step E=Epoch</span>
1247
+ </div>
1248
+ <div class="gallery-grid-compact" id="gallery-grid"></div>
1249
+ <div class="gallery-empty" id="gallery-empty" style="display:none;">
1250
+ No evaluations available.
1251
+ </div>
1252
+ </div>
1253
+ </div>
1254
+ </div>
1255
+ <div class="sidebar">
1256
+ <div class="playback-controls">
1257
+ <button class="playback-btn" id="rewind-btn" title="Rewind (Home)">⏮</button>
1258
+ <button class="playback-btn" id="prev-btn" title="Previous (←)">◀</button>
1259
+ <button class="playback-btn primary" id="play-btn" title="Play/Pause (Space)">▶ Play</button>
1260
+ <button class="playback-btn" id="next-btn" title="Next (→)">▶</button>
1261
+ <button class="playback-btn" id="end-btn" title="End (End)">⏭</button>
1262
+ <div class="speed-control">
1263
+ <label>Speed</label>
1264
+ <select id="speed-select">
1265
+ <option value="2000">0.5x</option>
1266
+ <option value="1000" selected>1x</option>
1267
+ <option value="500">2x</option>
1268
+ <option value="250">4x</option>
1269
+ </select>
1270
+ </div>
1271
+ <div class="progress-bar" id="progress-bar">
1272
+ <div class="progress" id="progress"></div>
1273
+ </div>
1274
+ <div class="timeline" id="timeline">
1275
+ <div class="timeline-segments" id="timeline-segments"></div>
1276
+ <div class="timeline-markers" id="timeline-markers"></div>
1277
+ <div class="timeline-playhead" id="timeline-playhead" style="left: 0%"></div>
1278
+ </div>
1279
+ </div>
1280
+ <div class="step-list" id="step-list">
1281
+ <div class="step-list-header">
1282
+ <h3>Steps</h3>
1283
+ <button class="copy-all-btn" id="copy-all-btn">Copy All</button>
1284
+ </div>
1285
+ <div id="step-list-items"></div>
1286
+ </div>
1287
+ <div class="transcript-panel" id="transcript-panel">
1288
+ <div class="transcript-header">
1289
+ <h2>Transcript</h2>
1290
+ <button class="transcript-follow-btn active" id="transcript-follow-btn" title="Auto-scroll to active segment">Follow</button>
1291
+ </div>
1292
+ <div class="transcript-content" id="transcript-content"></div>
1293
+ </div>
1294
+ <audio id="audio" style="display:none;"></audio>
1295
+ <div class="details-panel" id="details-panel">
1296
+ <div class="details-header">
1297
+ <span style="font-size:0.9rem;font-weight:600;">Step Details</span>
1298
+ <button class="copy-btn" id="copy-btn">Copy</button>
1299
+ </div>
1300
+ <div class="details-content" id="details-content"></div>
1301
+ </div>
1302
+ </div>
1303
+ </div>
1304
+
1305
+ <div class="gallery-maximized-overlay" id="gallery-maximized-overlay">
1306
+ <div class="gallery-maximized-content">
1307
+ <div class="gallery-maximized-header">
1308
+ <h2 style="margin:0;font-size:1.1rem;">Evaluation Gallery</h2>
1309
+ <div class="gallery-maximized-controls">
1310
+ <select class="gallery-filter-select" id="gallery-epoch-filter-max">
1311
+ <option value="all">All Epochs</option>
1312
+ </select>
1313
+ <select class="gallery-filter-select" id="gallery-status-filter-max">
1314
+ <option value="all">All</option>
1315
+ <option value="correct">Correct</option>
1316
+ <option value="incorrect">Incorrect</option>
1317
+ </select>
1318
+ <span class="control-hint" id="gallery-count-max">0 samples</span>
1319
+ <span class="control-hint" style="border-left:1px solid var(--border-color);padding-left:12px;" title="S = Step (capture step index), E = Epoch (training epoch)">S = Step, E = Epoch</span>
1320
+ <button class="gallery-close-btn" id="gallery-close-btn" title="Close">✕</button>
1321
+ </div>
1322
+ </div>
1323
+ <div class="gallery-grid-maximized" id="gallery-grid-max"></div>
1324
+ </div>
1325
+ </div>
1326
+ </div>
1327
+
1328
+ <script>
1329
+ const baseData = {base_data_json};
1330
+ const predictionsByCheckpoint = {predictions_json};
1331
+ const evaluations = {evaluations_json};
1332
+ const availableCaptures = {captures_json};
1333
+ const currentCaptureId = {current_capture_json};
1334
+ const captureModifiedTime = {capture_modified_time_json};
1335
+
1336
+ // Build evaluation map: step_idx -> [{{epoch, correct, distance}}]
1337
+ const evalByStep = {{}};
1338
+ evaluations.forEach(ev => {{
1339
+ const idx = ev.sample_idx;
1340
+ if (!evalByStep[idx]) evalByStep[idx] = [];
1341
+ evalByStep[idx].push({{
1342
+ epoch: ev.epoch,
1343
+ correct: ev.correct,
1344
+ distance: ev.distance
1345
+ }});
1346
+ }});
1347
+
1348
+ let currentIndex = 0;
1349
+ let currentCheckpoint = 'None';
1350
+ let showHumanOverlay = true;
1351
+ let showPredictedOverlay = true;
1352
+ let isPlaying = false;
1353
+ let playInterval = null;
1354
+ let playSpeed = 1000; // ms per step
1355
+
1356
+ // Cloud cost tracking
1357
+ const COST_RATES = {{
1358
+ 'gpu_1x_a10': 0.75, // Lambda Labs A10
1359
+ 'gpu_8x_a100': 1.29, // Lambda Labs A100 (per GPU)
1360
+ 'a10': 0.75, // Generic A10
1361
+ 'a100': 1.29, // Generic A100
1362
+ }};
1363
+
1364
+ function getHourlyRate(instanceType) {{
1365
+ if (!instanceType) return 0;
1366
+ // Try exact match first
1367
+ const lowerType = instanceType.toLowerCase();
1368
+ if (COST_RATES[lowerType]) {{
1369
+ return COST_RATES[lowerType];
1370
+ }}
1371
+ // Try partial match
1372
+ if (lowerType.includes('a100')) return COST_RATES['a100'];
1373
+ if (lowerType.includes('a10')) return COST_RATES['a10'];
1374
+ // Default to A10 rate
1375
+ return COST_RATES['a10'];
1376
+ }}
1377
+
1378
+ async function loadAndDisplayCosts() {{
1379
+ try {{
1380
+ const response = await fetch('training_log.json?t=' + Date.now());
1381
+ if (!response.ok) return;
1382
+
1383
+ const data = await response.json();
1384
+ const instanceType = data.instance_type || '';
1385
+
1386
+ // Only show costs for actual cloud training (not stub/local)
1387
+ if (!instanceType || instanceType === '' || instanceType === 'stub') {{
1388
+ document.getElementById('cost-panel').style.display = 'none';
1389
+ return;
1390
+ }}
1391
+
1392
+ const hourlyRate = getHourlyRate(instanceType);
1393
+ const elapsedTime = data.elapsed_time || 0;
1394
+ const elapsedHours = elapsedTime / 3600;
1395
+ const totalCost = elapsedHours * hourlyRate;
1396
+
1397
+ // Update display
1398
+ document.getElementById('cost-running').textContent = `$${{totalCost.toFixed(2)}}`;
1399
+ document.getElementById('cost-total').textContent = `$${{totalCost.toFixed(2)}}`;
1400
+ document.getElementById('cost-info').textContent = `${{instanceType}} @ $${{hourlyRate.toFixed(2)}}/hr`;
1401
+ document.getElementById('cost-panel').classList.add('visible');
1402
+ }} catch (e) {{
1403
+ // Silently fail if training_log.json not available
1404
+ console.log('Could not load training costs:', e);
1405
+ }}
1406
+ }}
1407
+
1408
+ async function loadAndDisplayTimestamps() {{
1409
+ try {{
1410
+ const response = await fetch('training_log.json?t=' + Date.now());
1411
+ if (!response.ok) throw new Error('Could not load training_log.json');
1412
+
1413
+ const data = await response.json();
1414
+
1415
+ // Format current timestamp (when viewer was generated)
1416
+ const now = new Date();
1417
+ const generatedTime = now.toLocaleString('en-US', {{
1418
+ year: 'numeric',
1419
+ month: 'short',
1420
+ day: 'numeric',
1421
+ hour: '2-digit',
1422
+ minute: '2-digit'
1423
+ }});
1424
+ document.getElementById('timestamp-generated').textContent = generatedTime;
1425
+
1426
+ // Format training log timestamp
1427
+ if (data.started_at) {{
1428
+ const startedAt = new Date(data.started_at);
1429
+ const dataFromTime = startedAt.toLocaleString('en-US', {{
1430
+ year: 'numeric',
1431
+ month: 'short',
1432
+ day: 'numeric',
1433
+ hour: '2-digit',
1434
+ minute: '2-digit'
1435
+ }});
1436
+ document.getElementById('timestamp-data-from').textContent = dataFromTime;
1437
+ }} else {{
1438
+ document.getElementById('timestamp-data-from').textContent = 'N/A';
1439
+ }}
1440
+
1441
+ // Display capture path (shortened if too long)
1442
+ if (data.capture_path) {{
1443
+ const capturePath = data.capture_path;
1444
+ const pathParts = capturePath.split('/');
1445
+ const captureName = pathParts[pathParts.length - 1];
1446
+ document.getElementById('timestamp-capture-path').textContent = captureName;
1447
+ document.getElementById('timestamp-capture-path').title = capturePath;
1448
+ }} else {{
1449
+ document.getElementById('timestamp-capture-path').textContent = currentCaptureId || 'N/A';
1450
+ }}
1451
+
1452
+ // Display capture modification time (passed from Python)
1453
+ if (captureModifiedTime) {{
1454
+ const modifiedAt = new Date(captureModifiedTime);
1455
+ const modifiedTime = modifiedAt.toLocaleString('en-US', {{
1456
+ year: 'numeric',
1457
+ month: 'short',
1458
+ day: 'numeric',
1459
+ hour: '2-digit',
1460
+ minute: '2-digit'
1461
+ }});
1462
+ document.getElementById('timestamp-capture-modified').textContent = modifiedTime;
1463
+ }} else {{
1464
+ document.getElementById('timestamp-capture-modified').textContent = 'N/A';
1465
+ }}
1466
+ }} catch (e) {{
1467
+ console.log('Could not load timestamps:', e);
1468
+ // Set fallback values
1469
+ document.getElementById('timestamp-generated').textContent = new Date().toLocaleString();
1470
+ document.getElementById('timestamp-data-from').textContent = 'N/A';
1471
+ document.getElementById('timestamp-capture-path').textContent = currentCaptureId || 'N/A';
1472
+ if (captureModifiedTime) {{
1473
+ const modifiedAt = new Date(captureModifiedTime);
1474
+ document.getElementById('timestamp-capture-modified').textContent = modifiedAt.toLocaleString();
1475
+ }} else {{
1476
+ document.getElementById('timestamp-capture-modified').textContent = 'N/A';
1477
+ }}
1478
+ }}
1479
+ }}
1480
+
1481
+ function getMergedData() {{
1482
+ const predictions = predictionsByCheckpoint[currentCheckpoint] || [];
1483
+ return baseData.map((base, i) => {{
1484
+ const pred = predictions[i] || {{}};
1485
+ return {{
1486
+ ...base,
1487
+ predicted_action: pred.predicted_action || null,
1488
+ match: pred.match !== undefined ? pred.match : null,
1489
+ }};
1490
+ }});
1491
+ }}
1492
+
1493
+ function parseModelOutput(rawOutput) {{
1494
+ // Parse model output for structured action commands
1495
+ let action = null;
1496
+ let thinking = '';
1497
+
1498
+ // Try to extract SoM actions: CLICK([N]), TYPE([N], "text"), TYPE("text")
1499
+ const clickSomMatch = rawOutput.match(/CLICK\\s*\\(\\s*\\[\\s*(\\d+)\\s*\\]\\s*\\)/);
1500
+ const typeSomMatch = rawOutput.match(/TYPE\\s*\\(\\s*\\[\\s*(\\d+)\\s*\\]\\s*,\\s*["']([^"']*)["']\\s*\\)/);
1501
+ const typeSimpleMatch = rawOutput.match(/TYPE\\s*\\(\\s*["']([^"']*)["']\\s*\\)/);
1502
+
1503
+ // Try coordinate-based: CLICK(x=0.5, y=0.5)
1504
+ const clickCoordMatch = rawOutput.match(/CLICK\\s*\\(\\s*x\\s*=\\s*([\\d.]+)\\s*,\\s*y\\s*=\\s*([\\d.]+)\\s*\\)/);
1505
+
1506
+ // Try to extract thinking/reasoning
1507
+ const thinkMatch = rawOutput.match(/(?:Thought|Thinking|Reasoning|Analysis):\\s*([\\s\\S]*?)(?:Action:|$)/i);
1508
+ const actionMatch = rawOutput.match(/Action:\\s*([^\\n]+)/i);
1509
+
1510
+ if (thinkMatch) thinking = thinkMatch[1].trim().substring(0, 150);
1511
+
1512
+ if (clickSomMatch) {{
1513
+ action = {{ type: 'click', element: `[${{clickSomMatch[1]}}]` }};
1514
+ }} else if (typeSomMatch) {{
1515
+ action = {{ type: 'type', element: `[${{typeSomMatch[1]}}]`, text: typeSomMatch[2] }};
1516
+ }} else if (typeSimpleMatch) {{
1517
+ action = {{ type: 'type', text: typeSimpleMatch[1] }};
1518
+ }} else if (clickCoordMatch) {{
1519
+ action = {{ type: 'click', x: parseFloat(clickCoordMatch[1]), y: parseFloat(clickCoordMatch[2]) }};
1520
+ }} else if (actionMatch) {{
1521
+ // Extract the action line for cleaner display
1522
+ action = {{ type: 'raw', text: actionMatch[1].trim() }};
1523
+ }}
1524
+
1525
+ // Generate HTML
1526
+ let html = '';
1527
+ if (action) {{
1528
+ if (action.type === 'click' && action.element) {{
1529
+ html = `<div style="font-weight:600;color:var(--accent);">CLICK(${{action.element}})</div>`;
1530
+ }} else if (action.type === 'click' && action.x !== undefined) {{
1531
+ html = `<div style="font-weight:600;color:var(--accent);">CLICK(x=${{action.x.toFixed(2)}}, y=${{action.y.toFixed(2)}})</div>`;
1532
+ }} else if (action.type === 'type') {{
1533
+ const elem = action.element ? `${{action.element}}, ` : '';
1534
+ html = `<div style="font-weight:600;color:var(--accent);">TYPE(${{elem}}"${{action.text}}")</div>`;
1535
+ }} else if (action.type === 'raw') {{
1536
+ html = `<div style="color:var(--accent);">${{action.text}}</div>`;
1537
+ }}
1538
+ if (thinking) {{
1539
+ html += `<div style="font-size:0.8rem;color:var(--text-muted);margin-top:4px;max-height:60px;overflow:hidden;">${{thinking}}...</div>`;
1540
+ }}
1541
+ }} else {{
1542
+ // No parseable action - show truncated raw output
1543
+ const truncated = rawOutput.substring(0, 200).replace(/\\n/g, ' ');
1544
+ html = `<div style="font-size:0.85rem;color:var(--text-muted);max-height:80px;overflow:hidden;">${{truncated}}${{rawOutput.length > 200 ? '...' : ''}}</div>`;
1545
+ }}
1546
+
1547
+ return {{ action, thinking, html }};
1548
+ }}
1549
+
1550
+ function initDropdowns() {{
1551
+ const captureSelect = document.getElementById('capture-select');
1552
+ const checkpointSelect = document.getElementById('checkpoint-select');
1553
+ const captureHint = document.getElementById('capture-hint');
1554
+
1555
+ captureSelect.innerHTML = '';
1556
+ availableCaptures.forEach(cap => {{
1557
+ const opt = document.createElement('option');
1558
+ opt.value = cap.id;
1559
+ opt.textContent = `${{cap.name}} (${{cap.steps}} steps)`;
1560
+ opt.selected = cap.id === currentCaptureId;
1561
+ captureSelect.appendChild(opt);
1562
+ }});
1563
+ captureHint.textContent = `(${{availableCaptures.length}} available)`;
1564
+
1565
+ checkpointSelect.innerHTML = '';
1566
+ const checkpointNames = Object.keys(predictionsByCheckpoint);
1567
+ checkpointNames.sort((a, b) => {{
1568
+ if (a === 'None') return -1;
1569
+ if (b === 'None') return 1;
1570
+ const aNum = parseInt(a.match(/\\d+/)?.[0] || '999');
1571
+ const bNum = parseInt(b.match(/\\d+/)?.[0] || '999');
1572
+ return aNum - bNum;
1573
+ }});
1574
+ checkpointNames.forEach(name => {{
1575
+ const opt = document.createElement('option');
1576
+ opt.value = name;
1577
+ opt.textContent = name === 'None' ? 'None (Capture Only)' : name;
1578
+ checkpointSelect.appendChild(opt);
1579
+ }});
1580
+ const latestCheckpoint = checkpointNames.filter(n => n !== 'None').pop();
1581
+ if (latestCheckpoint) {{
1582
+ checkpointSelect.value = latestCheckpoint;
1583
+ currentCheckpoint = latestCheckpoint;
1584
+ }}
1585
+ checkpointSelect.addEventListener('change', (e) => {{
1586
+ currentCheckpoint = e.target.value;
1587
+ updateMetrics();
1588
+ updateDisplay();
1589
+ }});
1590
+ }}
1591
+
1592
+ function computeMetrics() {{
1593
+ const data = getMergedData();
1594
+ let matches = 0, total = 0;
1595
+ data.forEach(d => {{
1596
+ if (d.match !== null) {{ total++; if (d.match) matches++; }}
1597
+ }});
1598
+ return {{
1599
+ accuracy: total > 0 ? (matches / total * 100).toFixed(1) : 'N/A',
1600
+ total: data.length,
1601
+ hasPredictions: total > 0,
1602
+ }};
1603
+ }}
1604
+
1605
+ function updateMetrics() {{
1606
+ const metricsEl = document.getElementById('metrics-summary');
1607
+ const metrics = computeMetrics();
1608
+ if (!metrics.hasPredictions) {{
1609
+ metricsEl.innerHTML = `<div class="metric-item"><span class="metric-label">Steps:</span><span class="metric-value">${{metrics.total}}</span></div>`;
1610
+ }} else {{
1611
+ metricsEl.innerHTML = `<div class="metric-item"><span class="metric-label">Accuracy:</span><span class="metric-value">${{metrics.accuracy}}%</span></div><div class="metric-item"><span class="metric-label">Steps:</span><span class="metric-value">${{metrics.total}}</span></div>`;
1612
+ }}
1613
+ }}
1614
+
1615
+ function updateDisplay() {{
1616
+ const data = getMergedData()[currentIndex];
1617
+ if (!data) return;
1618
+
1619
+ // Update image - handle both local and remote paths
1620
+ const imgEl = document.getElementById('frame-image');
1621
+ const placeholderEl = document.getElementById('image-placeholder');
1622
+
1623
+ // Check if image path is remote (Lambda Labs path)
1624
+ const imagePath = data.image_path || '';
1625
+ const isRemote = imagePath.startsWith('/home/ubuntu/') || imagePath.startsWith('/root/');
1626
+
1627
+ // Try local screenshots folder first
1628
+ const localPath = isRemote ? 'screenshots/' + imagePath.split('/').pop() : imagePath;
1629
+
1630
+ imgEl.src = localPath;
1631
+ imgEl.style.display = 'block';
1632
+ if (placeholderEl) placeholderEl.style.display = 'none';
1633
+
1634
+ imgEl.onerror = () => {{
1635
+ imgEl.style.display = 'none';
1636
+ if (placeholderEl) {{
1637
+ placeholderEl.style.display = 'flex';
1638
+ placeholderEl.innerHTML = `
1639
+ <div style="text-align:center;padding:40px;color:var(--text-muted);">
1640
+ <div style="font-size:2rem;margin-bottom:12px;">📷</div>
1641
+ <div style="margin-bottom:8px;color:var(--text-secondary);">Screenshots not downloaded</div>
1642
+ <div style="font-size:0.8rem;margin-bottom:12px;">
1643
+ Run: <code style="background:var(--bg-tertiary);padding:4px 8px;border-radius:4px;">uv run python -m openadapt_ml.cloud.lambda_labs rsync remote:/home/ubuntu/capture/screenshots/ training_output/screenshots/</code>
1644
+ </div>
1645
+ <div style="font-size:0.75rem;color:var(--text-muted);">Step ${{currentIndex + 1}} of ${{baseData.length}}</div>
1646
+ </div>
1647
+ `;
1648
+ }}
1649
+ }};
1650
+
1651
+ // Update human action
1652
+ const humanEl = document.getElementById('human-action');
1653
+ humanEl.innerHTML = `<div>Type: ${{data.human_action.type || 'unknown'}}</div>${{data.human_action.x !== null && data.human_action.x !== undefined ? `<div>Position: (${{(data.human_action.x * 100).toFixed(1)}}%, ${{(data.human_action.y * 100).toFixed(1)}}%)</div>` : ''}}${{data.human_action.text ? `<div>Text: ${{data.human_action.text}}</div>` : ''}}`;
1654
+
1655
+ // Update predicted action
1656
+ const predictedEl = document.getElementById('predicted-action');
1657
+ const predictedBox = document.getElementById('predicted-box');
1658
+ const hasPredictions = currentCheckpoint !== 'None';
1659
+ predictedBox.classList.toggle('disabled', !hasPredictions);
1660
+ if (!hasPredictions) {{
1661
+ predictedEl.innerHTML = '<em style="color:var(--text-muted);">Select a checkpoint</em>';
1662
+ }} else if (data.predicted_action) {{
1663
+ const pred = data.predicted_action;
1664
+ if (pred.x !== undefined) {{
1665
+ predictedEl.innerHTML = `<div>Type: ${{pred.type || 'click'}}</div><div>Position: (${{(pred.x * 100).toFixed(1)}}%, ${{(pred.y * 100).toFixed(1)}}%)</div>`;
1666
+ }} else {{
1667
+ // Parse raw_output for actions
1668
+ const rawOutput = pred.raw_output || JSON.stringify(pred);
1669
+ const parsed = parseModelOutput(rawOutput);
1670
+ predictedEl.innerHTML = parsed.html;
1671
+ }}
1672
+ }} else {{
1673
+ predictedEl.innerHTML = '<em style="color:var(--text-muted);">No prediction</em>';
1674
+ }}
1675
+
1676
+ // Update match indicator
1677
+ const matchEl = document.getElementById('match-indicator');
1678
+ if (!hasPredictions) {{
1679
+ matchEl.className = 'match-indicator pending'; matchEl.textContent = '—';
1680
+ }} else if (data.match === true) {{
1681
+ matchEl.className = 'match-indicator match'; matchEl.textContent = '✓ Match';
1682
+ }} else if (data.match === false) {{
1683
+ matchEl.className = 'match-indicator mismatch'; matchEl.textContent = '✗ Mismatch';
1684
+ }} else {{
1685
+ matchEl.className = 'match-indicator pending'; matchEl.textContent = '—';
1686
+ }}
1687
+
1688
+ // Update click overlays
1689
+ updateClickOverlays();
1690
+
1691
+ // Update step list active state
1692
+ document.querySelectorAll('.step-item').forEach((el, i) => {{
1693
+ el.classList.toggle('active', i === currentIndex);
1694
+ }});
1695
+
1696
+ // Update details panel
1697
+ updateDetailsPanel(data);
1698
+
1699
+ // Update progress bar
1700
+ updateProgressBar();
1701
+ }}
1702
+
1703
+ function updateDetailsPanel(data) {{
1704
+ const detailsEl = document.getElementById('details-content');
1705
+ const action = data.human_action;
1706
+
1707
+ // Build human action section
1708
+ let html = `
1709
+ <div style="font-weight:600;font-size:0.8rem;color:var(--accent);margin-bottom:8px;text-transform:uppercase;">Human Action</div>
1710
+ <div class="detail-row"><span class="detail-key">Step</span><span class="detail-value">${{currentIndex + 1}} of ${{baseData.length}}</span></div>
1711
+ <div class="detail-row"><span class="detail-key">Time</span><span class="detail-value">${{data.time ? data.time.toFixed(2) + 's' : '—'}}</span></div>
1712
+ <div class="detail-row"><span class="detail-key">Type</span><span class="detail-value">${{action.type}}</span></div>
1713
+ `;
1714
+ if (action.x !== null && action.x !== undefined) {{
1715
+ html += `<div class="detail-row"><span class="detail-key">Position</span><span class="detail-value">(${{(action.x * 100).toFixed(2)}}%, ${{(action.y * 100).toFixed(2)}}%)</span></div>`;
1716
+ }}
1717
+ if (action.text) {{
1718
+ html += `<div class="detail-row"><span class="detail-key">Text</span><span class="detail-value">"${{action.text}}"</span></div>`;
1719
+ }}
1720
+
1721
+ // Build prediction section if available
1722
+ if (data.predicted_action && currentCheckpoint !== 'None') {{
1723
+ const pred = data.predicted_action;
1724
+ html += `<div style="margin-top:12px;padding-top:12px;border-top:1px solid var(--border-color);">`;
1725
+ html += `<div style="font-weight:600;font-size:0.8rem;color:#a78bfa;margin-bottom:8px;text-transform:uppercase;display:flex;justify-content:space-between;">
1726
+ <span>Model Prediction</span>
1727
+ <span style="color:${{data.match === true ? '#34d399' : data.match === false ? '#ff5f5f' : 'var(--text-muted)'}};">${{data.match === true ? '✓ Match' : data.match === false ? '✗ Mismatch' : '—'}}</span>
1728
+ </div>`;
1729
+
1730
+ // Show predicted position if available
1731
+ if (pred.x !== undefined && pred.y !== undefined) {{
1732
+ html += `<div class="detail-row"><span class="detail-key">Type</span><span class="detail-value">${{pred.type || 'click'}}</span></div>`;
1733
+ html += `<div class="detail-row"><span class="detail-key">Position</span><span class="detail-value">(${{(pred.x * 100).toFixed(2)}}%, ${{(pred.y * 100).toFixed(2)}}%)</span></div>`;
1734
+ }}
1735
+
1736
+ // Show raw output (model reasoning)
1737
+ if (pred.raw_output) {{
1738
+ const rawOutput = pred.raw_output;
1739
+ html += `<div class="detail-row" style="flex-direction:column;margin-top:8px;">
1740
+ <span class="detail-key" style="margin-bottom:4px;">Model Output</span>
1741
+ <div class="detail-value" style="font-size:0.75rem;max-height:150px;overflow-y:auto;white-space:pre-wrap;word-break:break-word;background:var(--bg-tertiary);padding:8px;border-radius:4px;">${{rawOutput.replace(/</g, '&lt;').replace(/>/g, '&gt;')}}</div>
1742
+ </div>`;
1743
+ }} else {{
1744
+ // Show whatever fields are present
1745
+ const predStr = JSON.stringify(pred, null, 2);
1746
+ html += `<div class="detail-row" style="flex-direction:column;margin-top:8px;">
1747
+ <span class="detail-key" style="margin-bottom:4px;">Prediction Data</span>
1748
+ <div class="detail-value" style="font-size:0.75rem;max-height:100px;overflow-y:auto;white-space:pre;background:var(--bg-tertiary);padding:8px;border-radius:4px;">${{predStr}}</div>
1749
+ </div>`;
1750
+ }}
1751
+ html += `</div>`;
1752
+ }}
1753
+
1754
+ // Add evaluation history if available
1755
+ const stepEvals = evalByStep[currentIndex];
1756
+ if (stepEvals && stepEvals.length > 0) {{
1757
+ html += `<div style="margin-top:12px;padding-top:12px;border-top:1px solid var(--border-color);">`;
1758
+ html += `<div style="font-weight:600;font-size:0.8rem;color:var(--text-secondary);margin-bottom:8px;text-transform:uppercase;">Evaluation History</div>`;
1759
+
1760
+ // Sort by epoch
1761
+ const sorted = [...stepEvals].sort((a, b) => a.epoch - b.epoch);
1762
+ sorted.forEach(ev => {{
1763
+ const icon = ev.correct ? '✓' : '✗';
1764
+ const color = ev.correct ? '#34d399' : '#ff5f5f';
1765
+ const dist = ev.distance ? ev.distance.toFixed(2) + 'px' : '—';
1766
+ html += `<div class="detail-row">
1767
+ <span class="detail-key">Epoch ${{ev.epoch}}</span>
1768
+ <span class="detail-value" style="color:${{color}};">${{icon}} ${{dist}}</span>
1769
+ </div>`;
1770
+ }});
1771
+
1772
+ // Show improvement trend if multiple epochs
1773
+ if (sorted.length > 1) {{
1774
+ const first = sorted[0];
1775
+ const last = sorted[sorted.length - 1];
1776
+ if (first.distance && last.distance) {{
1777
+ const improvement = first.distance - last.distance;
1778
+ const pct = ((improvement / first.distance) * 100).toFixed(1);
1779
+ const improved = improvement > 0;
1780
+ html += `<div class="detail-row" style="margin-top:4px;padding-top:4px;border-top:1px dashed var(--border-color);">
1781
+ <span class="detail-key">Trend</span>
1782
+ <span class="detail-value" style="color:${{improved ? '#34d399' : '#ff5f5f'}};">
1783
+ ${{improved ? '↓' : '↑'}} ${{Math.abs(improvement).toFixed(1)}}px (${{improved ? '-' : '+'}}${{Math.abs(pct)}}%)
1784
+ </span>
1785
+ </div>`;
1786
+ }}
1787
+ }}
1788
+
1789
+ html += `</div>`;
1790
+ }}
1791
+
1792
+ detailsEl.innerHTML = html;
1793
+ }}
1794
+
1795
+ function setupCopyButton() {{
1796
+ document.getElementById('copy-btn').onclick = function() {{
1797
+ const data = getMergedData()[currentIndex];
1798
+ const text = JSON.stringify(data, null, 2);
1799
+ navigator.clipboard.writeText(text);
1800
+ this.textContent = 'Copied!';
1801
+ this.classList.add('copied');
1802
+ setTimeout(() => {{
1803
+ this.textContent = 'Copy';
1804
+ this.classList.remove('copied');
1805
+ }}, 1500);
1806
+ }};
1807
+ }}
1808
+
1809
+ function setupCopyAllButton() {{
1810
+ const btn = document.getElementById('copy-all-btn');
1811
+ if (!btn) return;
1812
+
1813
+ btn.onclick = function() {{
1814
+ const allData = getMergedData();
1815
+ const text = JSON.stringify(allData, null, 2);
1816
+ navigator.clipboard.writeText(text);
1817
+ this.textContent = 'Copied!';
1818
+ this.classList.add('copied');
1819
+ setTimeout(() => {{
1820
+ this.textContent = 'Copy All';
1821
+ this.classList.remove('copied');
1822
+ }}, 1500);
1823
+ }};
1824
+ }}
1825
+
1826
+ function updateClickOverlays() {{
1827
+ document.querySelectorAll('.click-marker').forEach(el => el.remove());
1828
+ const data = getMergedData()[currentIndex];
1829
+ if (!data) return;
1830
+ const container = document.getElementById('frame-container');
1831
+
1832
+ if (showHumanOverlay && data.human_action.x !== null && data.human_action.x !== undefined) {{
1833
+ const marker = document.createElement('div');
1834
+ marker.className = 'click-marker human';
1835
+ marker.style.left = (data.human_action.x * 100) + '%';
1836
+ marker.style.top = (data.human_action.y * 100) + '%';
1837
+ container.appendChild(marker);
1838
+ }}
1839
+ if (showPredictedOverlay && data.predicted_action && data.predicted_action.x !== undefined) {{
1840
+ const marker = document.createElement('div');
1841
+ marker.className = 'click-marker predicted';
1842
+ marker.style.left = (data.predicted_action.x * 100) + '%';
1843
+ marker.style.top = (data.predicted_action.y * 100) + '%';
1844
+ container.appendChild(marker);
1845
+ }}
1846
+ }}
1847
+
1848
+ function buildStepList() {{
1849
+ const listEl = document.getElementById('step-list-items');
1850
+ if (!listEl) return;
1851
+ listEl.innerHTML = '';
1852
+ const typeColors = {{
1853
+ click: '#ff5f5f',
1854
+ double_click: '#ff5f5f',
1855
+ type: '#34d399',
1856
+ scroll: '#a78bfa',
1857
+ drag: '#00d4aa',
1858
+ done: '#888',
1859
+ }};
1860
+ baseData.forEach((step, i) => {{
1861
+ const item = document.createElement('div');
1862
+ item.className = 'step-item' + (i === currentIndex ? ' active' : '');
1863
+ const action = step.human_action;
1864
+ const time = step.time ? step.time.toFixed(1) + 's' : '';
1865
+ const typeColor = typeColors[action.type] || 'var(--text-secondary)';
1866
+ const actionDetail = action.type === 'type' && action.text
1867
+ ? `"${{action.text.length > 15 ? action.text.slice(0,15) + '...' : action.text}}"`
1868
+ : (action.x !== null && action.x !== undefined ? `(${{(action.x*100).toFixed(0)}}%, ${{(action.y*100).toFixed(0)}}%)` : '');
1869
+
1870
+ // Build evaluation badges
1871
+ let evalBadgesHtml = '';
1872
+ const stepEvals = evalByStep[i];
1873
+ if (stepEvals && stepEvals.length > 0) {{
1874
+ const badges = stepEvals.map(ev => {{
1875
+ const cls = ev.correct ? 'correct' : 'incorrect';
1876
+ const icon = ev.correct ? '✓' : '✗';
1877
+ const dist = ev.distance ? ev.distance.toFixed(1) + 'px' : '';
1878
+ return `<span class="eval-badge ${{cls}}" title="Epoch ${{ev.epoch}}: ${{dist}}"><span class="epoch">E${{ev.epoch}}</span>${{icon}}</span>`;
1879
+ }}).join('');
1880
+ evalBadgesHtml = `<div class="eval-badges">${{badges}}</div>`;
1881
+ }}
1882
+
1883
+ item.innerHTML = `
1884
+ <div style="display:flex;align-items:center;gap:8px;">
1885
+ <span style="font-family:monospace;font-size:0.7rem;color:var(--text-muted);min-width:40px;">${{time}}</span>
1886
+ <span style="font-weight:600;color:${{typeColor}};text-transform:uppercase;font-size:0.75rem;">${{action.type}}</span>
1887
+ </div>
1888
+ <div style="font-size:0.8rem;color:var(--text-secondary);margin-top:2px;font-family:monospace;">${{actionDetail}}</div>
1889
+ ${{evalBadgesHtml}}
1890
+ `;
1891
+ item.onclick = () => {{ currentIndex = i; updateDisplay(); }};
1892
+ listEl.appendChild(item);
1893
+ }});
1894
+ }}
1895
+
1896
+ function setupOverlayToggles() {{
1897
+ const container = document.getElementById('overlay-toggles');
1898
+ container.innerHTML = `<button class="toggle-btn active" id="toggle-human" title="Toggle human overlay (H)">Human</button><button class="toggle-btn active" id="toggle-predicted" title="Toggle AI overlay (A)">AI</button>`;
1899
+ document.getElementById('toggle-human').onclick = function() {{
1900
+ showHumanOverlay = !showHumanOverlay;
1901
+ this.classList.toggle('active', showHumanOverlay);
1902
+ // Also dim the human action box
1903
+ const humanBox = document.querySelector('.action-box.human');
1904
+ if (humanBox) humanBox.style.opacity = showHumanOverlay ? '1' : '0.4';
1905
+ updateClickOverlays();
1906
+ }};
1907
+ document.getElementById('toggle-predicted').onclick = function() {{
1908
+ showPredictedOverlay = !showPredictedOverlay;
1909
+ this.classList.toggle('active', showPredictedOverlay);
1910
+ // Also dim the predicted action box
1911
+ const predictedBox = document.getElementById('predicted-box');
1912
+ if (predictedBox) predictedBox.style.opacity = showPredictedOverlay ? '1' : '0.4';
1913
+ updateClickOverlays();
1914
+ }};
1915
+ }}
1916
+
1917
+ function updateProgressBar() {{
1918
+ const progress = document.getElementById('progress');
1919
+ if (progress) {{
1920
+ const pct = (currentIndex / (baseData.length - 1)) * 100;
1921
+ progress.style.width = pct + '%';
1922
+ }}
1923
+ }}
1924
+
1925
+ function stopPlayback() {{
1926
+ isPlaying = false;
1927
+ if (playInterval) {{
1928
+ clearInterval(playInterval);
1929
+ playInterval = null;
1930
+ }}
1931
+ const playBtn = document.getElementById('play-btn');
1932
+ if (playBtn) {{
1933
+ playBtn.textContent = '▶ Play';
1934
+ playBtn.classList.remove('active');
1935
+ }}
1936
+ // Pause audio if playing
1937
+ if (audioElement && !audioElement.paused) {{
1938
+ audioElement.pause();
1939
+ }}
1940
+ }}
1941
+
1942
+ function startPlayback() {{
1943
+ isPlaying = true;
1944
+ const playBtn = document.getElementById('play-btn');
1945
+ if (playBtn) {{
1946
+ playBtn.textContent = '⏸ Pause';
1947
+ playBtn.classList.add('active');
1948
+ }}
1949
+ // Start audio if available
1950
+ if (audioElement && audioElement.src) {{
1951
+ audioElement.play().catch(e => console.log('Audio play failed:', e));
1952
+ }}
1953
+ playInterval = setInterval(() => {{
1954
+ if (currentIndex < baseData.length - 1) {{
1955
+ currentIndex++;
1956
+ updateDisplay();
1957
+ }} else {{
1958
+ stopPlayback();
1959
+ }}
1960
+ }}, playSpeed);
1961
+ }}
1962
+
1963
+ function togglePlayback() {{
1964
+ if (isPlaying) {{
1965
+ stopPlayback();
1966
+ }} else {{
1967
+ startPlayback();
1968
+ }}
1969
+ }}
1970
+
1971
+ function setupPlaybackControls() {{
1972
+ // Rewind
1973
+ document.getElementById('rewind-btn').onclick = () => {{
1974
+ stopPlayback();
1975
+ currentIndex = 0;
1976
+ updateDisplay();
1977
+ }};
1978
+
1979
+ // Previous
1980
+ document.getElementById('prev-btn').onclick = () => {{
1981
+ stopPlayback();
1982
+ if (currentIndex > 0) {{ currentIndex--; updateDisplay(); }}
1983
+ }};
1984
+
1985
+ // Play/Pause
1986
+ document.getElementById('play-btn').onclick = togglePlayback;
1987
+
1988
+ // Next
1989
+ document.getElementById('next-btn').onclick = () => {{
1990
+ stopPlayback();
1991
+ if (currentIndex < baseData.length - 1) {{ currentIndex++; updateDisplay(); }}
1992
+ }};
1993
+
1994
+ // End
1995
+ document.getElementById('end-btn').onclick = () => {{
1996
+ stopPlayback();
1997
+ currentIndex = baseData.length - 1;
1998
+ updateDisplay();
1999
+ }};
2000
+
2001
+ // Speed control
2002
+ document.getElementById('speed-select').onchange = (e) => {{
2003
+ playSpeed = parseInt(e.target.value);
2004
+ // Map step interval to audio playback rate: 2000ms=0.5x, 1000ms=1x, 500ms=2x, 250ms=4x
2005
+ const playbackRate = 1000 / playSpeed;
2006
+ if (audioElement) {{
2007
+ audioElement.playbackRate = playbackRate;
2008
+ }}
2009
+ if (isPlaying) {{
2010
+ stopPlayback();
2011
+ startPlayback();
2012
+ }}
2013
+ }};
2014
+
2015
+ // Progress bar click to seek
2016
+ document.getElementById('progress-bar').onclick = (e) => {{
2017
+ const rect = e.currentTarget.getBoundingClientRect();
2018
+ const pct = (e.clientX - rect.left) / rect.width;
2019
+ currentIndex = Math.round(pct * (baseData.length - 1));
2020
+ updateDisplay();
2021
+ }};
2022
+
2023
+ // Keyboard shortcuts
2024
+ document.addEventListener('keydown', (e) => {{
2025
+ // Ignore if focused on an input
2026
+ if (e.target.tagName === 'INPUT' || e.target.tagName === 'SELECT') return;
2027
+
2028
+ switch(e.key) {{
2029
+ case 'ArrowLeft':
2030
+ document.getElementById('prev-btn').click();
2031
+ break;
2032
+ case 'ArrowRight':
2033
+ document.getElementById('next-btn').click();
2034
+ break;
2035
+ case ' ': // Space
2036
+ e.preventDefault();
2037
+ togglePlayback();
2038
+ break;
2039
+ case 'Home':
2040
+ document.getElementById('rewind-btn').click();
2041
+ break;
2042
+ case 'End':
2043
+ document.getElementById('end-btn').click();
2044
+ break;
2045
+ case 'h':
2046
+ case 'H':
2047
+ document.getElementById('toggle-human').click();
2048
+ break;
2049
+ case 'a':
2050
+ case 'A':
2051
+ document.getElementById('toggle-predicted').click();
2052
+ break;
2053
+ }}
2054
+ }});
2055
+ }}
2056
+
2057
+ // Transcript/audio sync variables
2058
+ let transcriptSegments = [];
2059
+ let audioElement = null;
2060
+ let lastActiveSegmentIndex = -1;
2061
+ let autoScrollTranscript = true;
2062
+
2063
+ async function loadTranscript() {{
2064
+ // Try to load transcript.json
2065
+ try {{
2066
+ const response = await fetch('transcript.json?t=' + Date.now());
2067
+ if (response.ok) {{
2068
+ const data = await response.json();
2069
+ if (data.segments && data.segments.length > 0) {{
2070
+ transcriptSegments = data.segments;
2071
+ renderTranscript();
2072
+ setupAudioSync();
2073
+ return;
2074
+ }}
2075
+ }}
2076
+ }} catch (e) {{
2077
+ console.log('No transcript.json found');
2078
+ }}
2079
+
2080
+ // Check if any base data has transcript info
2081
+ const hasTranscript = baseData.some(d => d.transcript_text || d.audio_start !== undefined);
2082
+ if (!hasTranscript) {{
2083
+ document.getElementById('transcript-content').innerHTML = '<div class="transcript-empty">No transcript available</div>';
2084
+ return;
2085
+ }}
2086
+
2087
+ // Build segments from base data
2088
+ baseData.forEach((step, i) => {{
2089
+ if (step.transcript_text) {{
2090
+ transcriptSegments.push({{
2091
+ start: step.audio_start || step.time || 0,
2092
+ end: step.audio_end || (baseData[i + 1]?.time || step.time + 5),
2093
+ text: step.transcript_text,
2094
+ stepIndex: i
2095
+ }});
2096
+ }}
2097
+ }});
2098
+
2099
+ if (transcriptSegments.length > 0) {{
2100
+ renderTranscript();
2101
+ setupAudioSync();
2102
+ }} else {{
2103
+ document.getElementById('transcript-content').innerHTML = '<div class="transcript-empty">No transcript available</div>';
2104
+ }}
2105
+ }}
2106
+
2107
+ function renderTranscript() {{
2108
+ const container = document.getElementById('transcript-content');
2109
+ if (transcriptSegments.length === 0) {{
2110
+ container.innerHTML = '<div class="transcript-empty">No transcript available</div>';
2111
+ return;
2112
+ }}
2113
+
2114
+ container.innerHTML = transcriptSegments.map((seg, i) => {{
2115
+ const timeStr = formatTime(seg.start);
2116
+ return `<span class="transcript-segment" data-index="${{i}}" data-start="${{seg.start}}" data-end="${{seg.end}}">` +
2117
+ `<span class="transcript-time">${{timeStr}}</span>${{seg.text}} </span>`;
2118
+ }}).join('');
2119
+
2120
+ // Add click handlers for seek
2121
+ container.querySelectorAll('.transcript-segment').forEach(el => {{
2122
+ el.onclick = () => {{
2123
+ const start = parseFloat(el.dataset.start);
2124
+ seekAudio(start);
2125
+
2126
+ // Also jump to corresponding step if available
2127
+ const segIndex = parseInt(el.dataset.index);
2128
+ if (transcriptSegments[segIndex]?.stepIndex !== undefined) {{
2129
+ currentIndex = transcriptSegments[segIndex].stepIndex;
2130
+ updateDisplay();
2131
+ }}
2132
+ }};
2133
+ }});
2134
+ }}
2135
+
2136
+ function formatTime(seconds) {{
2137
+ const mins = Math.floor(seconds / 60);
2138
+ const secs = Math.floor(seconds % 60);
2139
+ return `${{mins}}:${{secs.toString().padStart(2, '0')}}`;
2140
+ }}
2141
+
2142
+ function seekAudio(time) {{
2143
+ if (!audioElement) {{
2144
+ audioElement = document.getElementById('audio');
2145
+ }}
2146
+ if (audioElement && audioElement.src) {{
2147
+ audioElement.currentTime = time;
2148
+ if (audioElement.paused) {{
2149
+ audioElement.play().catch(e => console.log('Audio play failed:', e));
2150
+ }}
2151
+ }}
2152
+ }}
2153
+
2154
+ function setupAudioSync() {{
2155
+ audioElement = document.getElementById('audio');
2156
+
2157
+ // Try to load audio file
2158
+ const audioSrc = 'audio.mp3';
2159
+ audioElement.src = audioSrc;
2160
+ audioElement.load();
2161
+
2162
+ // Auto-highlight during playback
2163
+ audioElement.ontimeupdate = () => {{
2164
+ const currentTime = audioElement.currentTime;
2165
+ highlightCurrentSegment(currentTime);
2166
+ }};
2167
+
2168
+ audioElement.onerror = () => {{
2169
+ console.log('Audio file not available');
2170
+ }};
2171
+
2172
+ // Setup follow toggle button
2173
+ const followBtn = document.getElementById('transcript-follow-btn');
2174
+ if (followBtn) {{
2175
+ followBtn.onclick = () => {{
2176
+ autoScrollTranscript = !autoScrollTranscript;
2177
+ followBtn.classList.toggle('active', autoScrollTranscript);
2178
+ }};
2179
+ }}
2180
+ }}
2181
+
2182
+ function highlightCurrentSegment(currentTime) {{
2183
+ const segments = document.querySelectorAll('.transcript-segment');
2184
+ let newActiveIndex = -1;
2185
+
2186
+ segments.forEach((el, i) => {{
2187
+ const start = parseFloat(el.dataset.start);
2188
+ const end = parseFloat(el.dataset.end);
2189
+ const isActive = currentTime >= start && currentTime < end;
2190
+ el.classList.toggle('active', isActive);
2191
+
2192
+ if (isActive) {{
2193
+ newActiveIndex = i;
2194
+ }}
2195
+ }});
2196
+
2197
+ // Only scroll when active segment changes (not on every timeupdate)
2198
+ if (newActiveIndex !== lastActiveSegmentIndex && newActiveIndex !== -1) {{
2199
+ lastActiveSegmentIndex = newActiveIndex;
2200
+ if (autoScrollTranscript) {{
2201
+ segments[newActiveIndex].scrollIntoView({{ behavior: 'smooth', block: 'nearest' }});
2202
+ }}
2203
+ }}
2204
+ }}
2205
+
2206
+ // Timeline visualizer
2207
+ let totalDuration = 0;
2208
+
2209
+ function renderTimeline() {{
2210
+ const timeline = document.getElementById('timeline');
2211
+ const segmentsContainer = document.getElementById('timeline-segments');
2212
+ const markersContainer = document.getElementById('timeline-markers');
2213
+
2214
+ if (!timeline || !segmentsContainer || !markersContainer) return;
2215
+
2216
+ // Calculate total duration from audio or last step
2217
+ totalDuration = audioElement?.duration || baseData[baseData.length - 1]?.time || 60;
2218
+
2219
+ // Clear existing
2220
+ segmentsContainer.innerHTML = '';
2221
+ markersContainer.innerHTML = '';
2222
+
2223
+ // Render transcript segments
2224
+ if (transcriptSegments.length > 0) {{
2225
+ transcriptSegments.forEach((seg, i) => {{
2226
+ const left = (seg.start / totalDuration) * 100;
2227
+ const width = Math.max(1, ((seg.end - seg.start) / totalDuration) * 100);
2228
+ const div = document.createElement('div');
2229
+ div.className = 'timeline-segment';
2230
+ div.style.width = width + '%';
2231
+ div.dataset.index = i;
2232
+ div.dataset.start = seg.start;
2233
+ div.dataset.end = seg.end;
2234
+
2235
+ // Tooltip with truncated text
2236
+ const tooltip = document.createElement('div');
2237
+ tooltip.className = 'timeline-segment-tooltip';
2238
+ tooltip.textContent = seg.text.length > 40 ? seg.text.slice(0, 40) + '...' : seg.text;
2239
+ div.appendChild(tooltip);
2240
+
2241
+ div.onclick = (e) => {{
2242
+ e.stopPropagation();
2243
+ seekAudio(seg.start);
2244
+ }};
2245
+ segmentsContainer.appendChild(div);
2246
+ }});
2247
+ }} else {{
2248
+ // No segments - fill with empty space
2249
+ segmentsContainer.innerHTML = '<div class="timeline-segment" style="width:100%;background:transparent;"></div>';
2250
+ }}
2251
+
2252
+ // Render action markers
2253
+ baseData.forEach((step, i) => {{
2254
+ const left = (step.time / totalDuration) * 100;
2255
+ const marker = document.createElement('div');
2256
+ marker.className = `timeline-marker ${{step.human_action?.type || 'unknown'}}`;
2257
+ marker.style.left = left + '%';
2258
+ marker.dataset.index = i;
2259
+ marker.title = `Step ${{i + 1}}: ${{step.human_action?.type || 'unknown'}} @ ${{formatTime(step.time)}}`;
2260
+ marker.onclick = (e) => {{
2261
+ e.stopPropagation();
2262
+ currentIndex = i;
2263
+ updateDisplay();
2264
+ if (step.time && audioElement) {{
2265
+ seekAudio(step.time);
2266
+ }}
2267
+ }};
2268
+ markersContainer.appendChild(marker);
2269
+ }});
2270
+
2271
+ // Timeline click to seek
2272
+ timeline.onclick = (e) => {{
2273
+ const rect = timeline.getBoundingClientRect();
2274
+ const pct = (e.clientX - rect.left) / rect.width;
2275
+ const time = pct * totalDuration;
2276
+ seekAudio(time);
2277
+
2278
+ // Find closest step
2279
+ let closest = 0;
2280
+ let minDist = Infinity;
2281
+ baseData.forEach((step, i) => {{
2282
+ const dist = Math.abs(step.time - time);
2283
+ if (dist < minDist) {{
2284
+ minDist = dist;
2285
+ closest = i;
2286
+ }}
2287
+ }});
2288
+ currentIndex = closest;
2289
+ updateDisplay();
2290
+ }};
2291
+ }}
2292
+
2293
+ function updateTimelinePlayhead() {{
2294
+ const playhead = document.getElementById('timeline-playhead');
2295
+ if (!playhead || !totalDuration) return;
2296
+
2297
+ const currentTime = audioElement?.currentTime || baseData[currentIndex]?.time || 0;
2298
+ const pct = (currentTime / totalDuration) * 100;
2299
+ playhead.style.left = pct + '%';
2300
+
2301
+ // Update active segment
2302
+ document.querySelectorAll('.timeline-segment').forEach(el => {{
2303
+ const start = parseFloat(el.dataset.start) || 0;
2304
+ const end = parseFloat(el.dataset.end) || 0;
2305
+ el.classList.toggle('active', currentTime >= start && currentTime < end);
2306
+ }});
2307
+
2308
+ // Update active marker
2309
+ document.querySelectorAll('.timeline-marker').forEach((el, i) => {{
2310
+ el.classList.toggle('active', i === currentIndex);
2311
+ }});
2312
+ }}
2313
+
2314
+ // Hook into audio timeupdate for playhead
2315
+ const originalHighlight = highlightCurrentSegment;
2316
+ highlightCurrentSegment = function(currentTime) {{
2317
+ originalHighlight(currentTime);
2318
+ updateTimelinePlayhead();
2319
+ }};
2320
+
2321
+ // Initialize
2322
+ initDropdowns();
2323
+ buildStepList();
2324
+ setupOverlayToggles();
2325
+ setupPlaybackControls();
2326
+ setupCopyButton();
2327
+ setupCopyAllButton();
2328
+ updateMetrics();
2329
+ updateDisplay();
2330
+ loadAndDisplayCosts();
2331
+ loadAndDisplayTimestamps();
2332
+ loadTranscript(); // Load transcript and setup audio sync
2333
+
2334
+ // Render timeline after transcript loads (needs segment data)
2335
+ setTimeout(() => {{
2336
+ renderTimeline();
2337
+ updateTimelinePlayhead();
2338
+ }}, 500);
2339
+
2340
+ // Gallery Panel Functions
2341
+ let currentGalleryEpoch = 'all';
2342
+ let currentGalleryStatus = 'all';
2343
+
2344
+ function setupGalleryPanel() {{
2345
+ const panel = document.getElementById('gallery-panel');
2346
+ const collapseBtn = document.getElementById('gallery-collapse-btn');
2347
+ const maximizeBtn = document.getElementById('gallery-maximize-btn');
2348
+ const overlay = document.getElementById('gallery-maximized-overlay');
2349
+ const closeBtn = document.getElementById('gallery-close-btn');
2350
+
2351
+ // Collapse/expand
2352
+ collapseBtn.onclick = function(e) {{
2353
+ e.stopPropagation();
2354
+ panel.classList.toggle('collapsed');
2355
+ }};
2356
+
2357
+ // Maximize
2358
+ maximizeBtn.onclick = function(e) {{
2359
+ e.stopPropagation();
2360
+ overlay.classList.add('active');
2361
+ renderGalleryMaximized();
2362
+ }};
2363
+
2364
+ // Close maximized
2365
+ closeBtn.onclick = function() {{
2366
+ overlay.classList.remove('active');
2367
+ }};
2368
+
2369
+ // Close on escape
2370
+ document.addEventListener('keydown', (e) => {{
2371
+ if (e.key === 'Escape' && overlay.classList.contains('active')) {{
2372
+ overlay.classList.remove('active');
2373
+ }}
2374
+ }});
2375
+
2376
+ // Close on overlay background click
2377
+ overlay.onclick = function(e) {{
2378
+ if (e.target === overlay) {{
2379
+ overlay.classList.remove('active');
2380
+ }}
2381
+ }};
2382
+
2383
+ // Setup compact filters
2384
+ document.getElementById('gallery-epoch-filter').onchange = function() {{
2385
+ currentGalleryEpoch = this.value;
2386
+ filterGallery('compact');
2387
+ }};
2388
+
2389
+ document.getElementById('gallery-status-filter').onchange = function() {{
2390
+ currentGalleryStatus = this.value;
2391
+ filterGallery('compact');
2392
+ }};
2393
+
2394
+ // Setup maximized filters
2395
+ document.getElementById('gallery-epoch-filter-max').onchange = function() {{
2396
+ currentGalleryEpoch = this.value;
2397
+ document.getElementById('gallery-epoch-filter').value = this.value;
2398
+ filterGallery('maximized');
2399
+ }};
2400
+
2401
+ document.getElementById('gallery-status-filter-max').onchange = function() {{
2402
+ currentGalleryStatus = this.value;
2403
+ document.getElementById('gallery-status-filter').value = this.value;
2404
+ filterGallery('maximized');
2405
+ }};
2406
+
2407
+ // Initial render
2408
+ renderGalleryCompact();
2409
+ }}
2410
+
2411
+ function buildGalleryCards(evaluations, compact = true) {{
2412
+ return evaluations.map((ev, i) => {{
2413
+ const statusClass = ev.correct ? 'correct' : 'incorrect';
2414
+ const statusText = ev.correct ? '✓' : '✗';
2415
+ const humanX = ((ev.human_action?.x || 0) * 100).toFixed(1);
2416
+ const humanY = ((ev.human_action?.y || 0) * 100).toFixed(1);
2417
+ const predX = ((ev.predicted_action?.x || 0) * 100).toFixed(1);
2418
+ const predY = ((ev.predicted_action?.y || 0) * 100).toFixed(1);
2419
+ const distance = ev.distance ? ev.distance.toFixed(3) : '—';
2420
+
2421
+ const stepData = baseData[ev.sample_idx];
2422
+ const imagePath = stepData?.screenshot_path || ev.image_path || 'screenshots/sample.png';
2423
+
2424
+ if (compact) {{
2425
+ return `
2426
+ <div class="gallery-card" data-epoch="${{ev.epoch}}" data-correct="${{ev.correct}}" data-step="${{ev.sample_idx}}">
2427
+ <div class="image-wrapper">
2428
+ <img src="${{imagePath}}" alt="Step ${{ev.sample_idx + 1}}" onerror="this.src='screenshots/sample.png'">
2429
+ <div class="overlay">
2430
+ <div class="marker human" style="left: ${{humanX}}%; top: ${{humanY}}%;">H</div>
2431
+ <div class="marker predicted" style="left: ${{predX}}%; top: ${{predY}}%;">AI</div>
2432
+ </div>
2433
+ </div>
2434
+ <div class="card-content">
2435
+ <div class="card-header">
2436
+ <span class="step-num" title="Step ${{ev.sample_idx + 1}}, Epoch ${{ev.epoch + 1}}">S${{ev.sample_idx + 1}} E${{ev.epoch + 1}}</span>
2437
+ <span class="status ${{statusClass}}">${{statusText}}</span>
2438
+ </div>
2439
+ </div>
2440
+ </div>
2441
+ `;
2442
+ }} else {{
2443
+ return `
2444
+ <div class="gallery-card" data-epoch="${{ev.epoch}}" data-correct="${{ev.correct}}" data-step="${{ev.sample_idx}}">
2445
+ <div class="image-wrapper">
2446
+ <img src="${{imagePath}}" alt="Step ${{ev.sample_idx + 1}}" onerror="this.src='screenshots/sample.png'">
2447
+ <div class="overlay">
2448
+ <div class="marker human" style="left: ${{humanX}}%; top: ${{humanY}}%;">H</div>
2449
+ <div class="marker predicted" style="left: ${{predX}}%; top: ${{predY}}%;">AI</div>
2450
+ </div>
2451
+ </div>
2452
+ <div class="card-content">
2453
+ <div class="card-header">
2454
+ <span class="step-num">Step ${{ev.sample_idx + 1}} | Epoch ${{ev.epoch + 1}}</span>
2455
+ <span class="status ${{statusClass}}">${{ev.correct ? '✓ Correct' : '✗ Incorrect'}}</span>
2456
+ </div>
2457
+ <div class="card-details">
2458
+ <div class="coord-row">
2459
+ <span class="coord-human">H: (${{humanX}}%, ${{humanY}}%)</span>
2460
+ <span class="coord-pred">AI: (${{predX}}%, ${{predY}}%)</span>
2461
+ </div>
2462
+ <div>Distance: ${{distance}}</div>
2463
+ </div>
2464
+ </div>
2465
+ </div>
2466
+ `;
2467
+ }}
2468
+ }}).join('');
2469
+ }}
2470
+
2471
+ function renderGalleryCompact() {{
2472
+ const grid = document.getElementById('gallery-grid');
2473
+ const emptyState = document.getElementById('gallery-empty');
2474
+ const epochFilter = document.getElementById('gallery-epoch-filter');
2475
+ const countEl = document.getElementById('gallery-count');
2476
+
2477
+ if (evaluations.length === 0) {{
2478
+ grid.style.display = 'none';
2479
+ emptyState.style.display = 'block';
2480
+ countEl.textContent = '0 samples';
2481
+ return;
2482
+ }}
2483
+
2484
+ grid.style.display = 'grid';
2485
+ emptyState.style.display = 'none';
2486
+
2487
+ // Populate epoch filter options
2488
+ const epochs = [...new Set(evaluations.map(e => e.epoch))].sort((a, b) => a - b);
2489
+ epochFilter.innerHTML = '<option value="all">All Epochs</option>' +
2490
+ epochs.map(ep => `<option value="${{ep}}">Epoch ${{ep + 1}}</option>`).join('');
2491
+
2492
+ grid.innerHTML = buildGalleryCards(evaluations, true);
2493
+ setupCardClickHandlers(grid, false);
2494
+ filterGallery('compact');
2495
+ }}
2496
+
2497
+ function renderGalleryMaximized() {{
2498
+ const grid = document.getElementById('gallery-grid-max');
2499
+ const epochFilter = document.getElementById('gallery-epoch-filter-max');
2500
+ const countEl = document.getElementById('gallery-count-max');
2501
+
2502
+ if (evaluations.length === 0) {{
2503
+ grid.innerHTML = '<div class="gallery-empty">No evaluations available.</div>';
2504
+ countEl.textContent = '0 samples';
2505
+ return;
2506
+ }}
2507
+
2508
+ // Populate epoch filter options
2509
+ const epochs = [...new Set(evaluations.map(e => e.epoch))].sort((a, b) => a - b);
2510
+ epochFilter.innerHTML = '<option value="all">All Epochs</option>' +
2511
+ epochs.map(ep => `<option value="${{ep}}">Epoch ${{ep + 1}}</option>`).join('');
2512
+
2513
+ // Sync filter values
2514
+ epochFilter.value = currentGalleryEpoch;
2515
+ document.getElementById('gallery-status-filter-max').value = currentGalleryStatus;
2516
+
2517
+ grid.innerHTML = buildGalleryCards(evaluations, false);
2518
+ setupCardClickHandlers(grid, true);
2519
+ filterGallery('maximized');
2520
+ }}
2521
+
2522
+ function setupCardClickHandlers(grid, closeOverlay) {{
2523
+ grid.querySelectorAll('.gallery-card').forEach(card => {{
2524
+ card.onclick = function() {{
2525
+ const stepIdx = parseInt(this.dataset.step);
2526
+ currentIndex = stepIdx;
2527
+ updateDisplay();
2528
+ if (closeOverlay) {{
2529
+ document.getElementById('gallery-maximized-overlay').classList.remove('active');
2530
+ }}
2531
+ }};
2532
+ }});
2533
+ }}
2534
+
2535
+ function filterGallery(mode) {{
2536
+ const isCompact = mode === 'compact';
2537
+ const grid = document.getElementById(isCompact ? 'gallery-grid' : 'gallery-grid-max');
2538
+ const countEl = document.getElementById(isCompact ? 'gallery-count' : 'gallery-count-max');
2539
+ const cards = grid.querySelectorAll('.gallery-card');
2540
+ let visible = 0;
2541
+
2542
+ cards.forEach(card => {{
2543
+ const epoch = card.dataset.epoch;
2544
+ const correct = card.dataset.correct === 'true';
2545
+
2546
+ const epochMatch = currentGalleryEpoch === 'all' || epoch === currentGalleryEpoch;
2547
+ const statusMatch = currentGalleryStatus === 'all' ||
2548
+ (currentGalleryStatus === 'correct' && correct) ||
2549
+ (currentGalleryStatus === 'incorrect' && !correct);
2550
+
2551
+ if (epochMatch && statusMatch) {{
2552
+ card.classList.remove('hidden');
2553
+ visible++;
2554
+ }} else {{
2555
+ card.classList.add('hidden');
2556
+ }}
2557
+ }});
2558
+
2559
+ countEl.textContent = `${{visible}} of ${{cards.length}} samples`;
2560
+ }}
2561
+
2562
+ // Initialize gallery panel
2563
+ setupGalleryPanel();
2564
+ </script>
2565
+ </body>
2566
+ </html>'''
2567
+
2568
+ output_path.write_text(html, encoding='utf-8')
2569
+ print(f"Generated unified viewer: {output_path}")
2570
+
2571
+
2572
+ def _enhance_comparison_to_unified_viewer(
2573
+ base_html_file: Path,
2574
+ predictions_by_checkpoint: dict[str, list[dict]],
2575
+ output_path: Path,
2576
+ capture_id: str = "unknown",
2577
+ goal: str = "Untitled",
2578
+ ) -> None:
2579
+ """Enhance an existing comparison HTML file into a unified viewer.
2580
+
2581
+ DEPRECATED: This function uses script injection which is fragile.
2582
+ Use _generate_unified_viewer_from_extracted_data() instead for a
2583
+ standalone viewer that doesn't depend on the comparison.html structure.
2584
+
2585
+ Takes the nice openadapt-capture viewer and adds:
2586
+ - Simplified nav (Training + Viewer only)
2587
+ - Checkpoint dropdown to switch between predictions
2588
+ - Training example dropdown (stub for future)
2589
+ """
2590
+ import re
2591
+
2592
+ html = base_html_file.read_text()
2593
+
2594
+ # Extract base data from the existing comparisonData (supports both const and window. prefix)
2595
+ data_match = re.search(
2596
+ r'(?:const\s+|window\.)comparisonData\s*=\s*(\[.*?\]);',
2597
+ html,
2598
+ re.DOTALL
2599
+ )
2600
+ if not data_match:
2601
+ print(f"Could not find comparisonData in {base_html_file}")
2602
+ return
2603
+
2604
+ base_comparison_data = json.loads(data_match.group(1))
2605
+
2606
+ # Build base data (human actions only) and ensure predictions dict has base data
2607
+ base_data = []
2608
+ for item in base_comparison_data:
2609
+ base_data.append({
2610
+ "index": item.get("index", 0),
2611
+ "time": item.get("time", 0),
2612
+ "image_path": item.get("image_path", ""),
2613
+ "human_action": item.get("human_action", {}),
2614
+ })
2615
+
2616
+ # JSON encode predictions
2617
+ predictions_json = json.dumps(predictions_by_checkpoint)
2618
+ captures_json = json.dumps([{
2619
+ "id": capture_id,
2620
+ "name": goal,
2621
+ "steps": len(base_data),
2622
+ }])
2623
+
2624
+ # 1. Replace nav bar with unified header combining nav + controls
2625
+ # Use shared header CSS and HTML for consistency with training dashboard
2626
+ header_css = f'<style>{_get_shared_header_css()}</style>'
2627
+
2628
+ # Build the controls HTML for the viewer (example + checkpoint dropdowns)
2629
+ controls_html = f'''
2630
+ <div class="control-group">
2631
+ <span class="control-label">Example</span>
2632
+ <select id="capture-select">
2633
+ <option value="{capture_id}">{goal[:40]}{'...' if len(goal) > 40 else ''} ({len(base_data)})</option>
2634
+ </select>
2635
+ </div>
2636
+ <div class="control-group">
2637
+ <span class="control-label">Checkpoint</span>
2638
+ <select id="checkpoint-select"></select>
2639
+ </div>
2640
+ '''
2641
+
2642
+ unified_header = header_css + _generate_shared_header_html(
2643
+ "viewer",
2644
+ controls_html=controls_html,
2645
+ meta_html=f"ID: {capture_id}"
2646
+ )
2647
+
2648
+ # Remove any old viewer-controls div if it exists (from previous runs)
2649
+ html = re.sub(
2650
+ r'<div class="viewer-controls"[^>]*>.*?</div>\s*(?=<)',
2651
+ '',
2652
+ html,
2653
+ flags=re.DOTALL
2654
+ )
2655
+
2656
+ # Try to replace existing nav with unified header
2657
+ nav_replaced = False
2658
+ if re.search(r'<nav class="nav-bar"', html):
2659
+ html = re.sub(
2660
+ r'<nav class="nav-bar"[^>]*>.*?</nav>\s*',
2661
+ unified_header,
2662
+ html,
2663
+ flags=re.DOTALL
2664
+ )
2665
+ nav_replaced = True
2666
+
2667
+ # Remove the old <header> element - unified header already contains all info
2668
+ html = re.sub(
2669
+ r'<header[^>]*>.*?</header>\s*',
2670
+ '',
2671
+ html,
2672
+ flags=re.DOTALL
2673
+ )
2674
+
2675
+ # If no nav was found/replaced, insert unified header after <body>
2676
+ if not nav_replaced:
2677
+ html = re.sub(
2678
+ r'(<body[^>]*>)',
2679
+ r'\1\n' + unified_header,
2680
+ html,
2681
+ count=1
2682
+ )
2683
+
2684
+ # 3. Replace the comparisonData with multi-checkpoint system
2685
+ # We need to modify the JavaScript to use our checkpoint system
2686
+
2687
+ checkpoint_script = f'''
2688
+ <script>
2689
+ // Unified viewer: multi-checkpoint support
2690
+ // Bridge local comparisonData to window scope for cross-script access
2691
+ if (typeof comparisonData !== 'undefined' && typeof window.comparisonData === 'undefined') {{
2692
+ window.comparisonData = comparisonData;
2693
+ }}
2694
+
2695
+ // Parse model output for SoM actions
2696
+ window.parseModelOutput = function(rawOutput) {{
2697
+ if (!rawOutput) return {{ html: '<em style="color:var(--text-muted);">No prediction</em>' }};
2698
+
2699
+ // Try to extract SoM actions: CLICK([N]), TYPE([N], "text"), TYPE("text")
2700
+ const clickSomMatch = rawOutput.match(/CLICK\\s*\\(\\s*\\[\\s*(\\d+)\\s*\\]\\s*\\)/);
2701
+ const typeSomMatch = rawOutput.match(/TYPE\\s*\\(\\s*\\[\\s*(\\d+)\\s*\\]\\s*,\\s*["']([^"']*)["']\\s*\\)/);
2702
+ const typeSimpleMatch = rawOutput.match(/TYPE\\s*\\(\\s*["']([^"']*)["']\\s*\\)/);
2703
+ const clickCoordMatch = rawOutput.match(/CLICK\\s*\\(\\s*x\\s*=\\s*([\\d.]+)\\s*,\\s*y\\s*=\\s*([\\d.]+)\\s*\\)/);
2704
+
2705
+ let html = '';
2706
+
2707
+ if (clickSomMatch) {{
2708
+ html = `<div style="font-weight:600;color:#00d4aa;">CLICK([${{clickSomMatch[1]}}])</div>`;
2709
+ }} else if (typeSomMatch) {{
2710
+ html = `<div style="font-weight:600;color:#00d4aa;">TYPE([${{typeSomMatch[1]}}], "${{typeSomMatch[2]}}")</div>`;
2711
+ }} else if (typeSimpleMatch) {{
2712
+ html = `<div style="font-weight:600;color:#00d4aa;">TYPE("${{typeSimpleMatch[1]}}")</div>`;
2713
+ }} else if (clickCoordMatch) {{
2714
+ html = `<div style="font-weight:600;color:#00d4aa;">CLICK(x=${{clickCoordMatch[1]}}, y=${{clickCoordMatch[2]}})</div>`;
2715
+ }} else {{
2716
+ // No structured action - show truncated output
2717
+ const truncated = rawOutput.replace(/\\n/g, ' ').substring(0, 150);
2718
+ html = `<div style="font-size:0.85rem;color:var(--text-muted);max-height:60px;overflow:hidden;">${{truncated}}${{rawOutput.length > 150 ? '...' : ''}}</div>`;
2719
+ }}
2720
+
2721
+ return {{ html }};
2722
+ }};
2723
+
2724
+ // Override prediction display in comparison viewer
2725
+ window.formatPrediction = function(pred) {{
2726
+ if (!pred) return '<em style="color:var(--text-muted);">No prediction</em>';
2727
+ if (pred.x !== undefined) {{
2728
+ return `<div>Type: ${{pred.type || 'click'}}</div><div>Position: (${{(pred.x * 100).toFixed(1)}}%, ${{(pred.y * 100).toFixed(1)}}%)</div>`;
2729
+ }}
2730
+ return window.parseModelOutput(pred.raw_output || JSON.stringify(pred)).html;
2731
+ }};
2732
+
2733
+ // Use window. prefix for cross-script variable access
2734
+ window.predictionsByCheckpoint = {predictions_json};
2735
+ window.availableCaptures = {captures_json};
2736
+ window.currentCheckpoint = 'None';
2737
+
2738
+ // Initialize checkpoint dropdown
2739
+ window.initCheckpointDropdown = function() {{
2740
+ const select = document.getElementById('checkpoint-select');
2741
+ if (!select) return;
2742
+
2743
+ const checkpointNames = Object.keys(window.predictionsByCheckpoint);
2744
+ checkpointNames.sort((a, b) => {{
2745
+ if (a === 'None') return -1;
2746
+ if (b === 'None') return 1;
2747
+ const aNum = parseInt(a.match(/\\d+/)?.[0] || '999');
2748
+ const bNum = parseInt(b.match(/\\d+/)?.[0] || '999');
2749
+ return aNum - bNum;
2750
+ }});
2751
+
2752
+ select.innerHTML = '';
2753
+ checkpointNames.forEach(name => {{
2754
+ const opt = document.createElement('option');
2755
+ opt.value = name;
2756
+ opt.textContent = name === 'None' ? 'None (Capture Only)' : name;
2757
+ select.appendChild(opt);
2758
+ }});
2759
+
2760
+ // Default to latest epoch checkpoint (prefer actual trained checkpoints over Preview)
2761
+ const epochCheckpoints = checkpointNames.filter(n => n.toLowerCase().includes('epoch'));
2762
+ const latestCheckpoint = epochCheckpoints.length > 0
2763
+ ? epochCheckpoints.pop()
2764
+ : checkpointNames.filter(n => n !== 'None').pop();
2765
+ if (latestCheckpoint) {{
2766
+ select.value = latestCheckpoint;
2767
+ window.currentCheckpoint = latestCheckpoint;
2768
+ window.applyCheckpointPredictions(latestCheckpoint);
2769
+ }}
2770
+
2771
+ select.addEventListener('change', (e) => {{
2772
+ window.currentCheckpoint = e.target.value;
2773
+ window.applyCheckpointPredictions(window.currentCheckpoint);
2774
+ }});
2775
+ }};
2776
+
2777
+ // Apply predictions from selected checkpoint to comparisonData
2778
+ window.applyCheckpointPredictions = function(checkpointName) {{
2779
+ const predictions = window.predictionsByCheckpoint[checkpointName] || [];
2780
+
2781
+ // Update comparisonData with new predictions (access from window)
2782
+ if (typeof window.comparisonData !== 'undefined') {{
2783
+ window.comparisonData.forEach((item, i) => {{
2784
+ const pred = predictions[i] || {{}};
2785
+ item.predicted_action = pred.predicted_action || null;
2786
+ item.match = pred.match !== undefined ? pred.match : null;
2787
+ }});
2788
+ }}
2789
+
2790
+ // Refresh display if updateComparison exists (check both window and global scope)
2791
+ const idx = typeof window.currentIndex !== 'undefined' ? window.currentIndex :
2792
+ (typeof currentIndex !== 'undefined' ? currentIndex : 0);
2793
+ if (typeof window.updateComparison === 'function') {{
2794
+ window.updateComparison(idx);
2795
+ }} else if (typeof updateComparison === 'function') {{
2796
+ updateComparison(idx);
2797
+ }}
2798
+
2799
+ // Reformat prediction display after original updateComparison runs
2800
+ setTimeout(() => {{
2801
+ const predEl = document.getElementById('predicted-action') ||
2802
+ document.querySelector('.action-box.predicted .action-details');
2803
+ if (predEl && window.comparisonData && window.comparisonData[idx]) {{
2804
+ const pred = window.comparisonData[idx].predicted_action;
2805
+ if (pred) {{
2806
+ predEl.innerHTML = window.formatPrediction(pred);
2807
+ }}
2808
+ }}
2809
+ }}, 50);
2810
+
2811
+ // Update metrics if setupMetricsSummary exists
2812
+ if (typeof window.setupMetricsSummary === 'function') {{
2813
+ window.setupMetricsSummary();
2814
+ }}
2815
+ }};
2816
+
2817
+ // Initialize on load
2818
+ setTimeout(window.initCheckpointDropdown, 200);
2819
+
2820
+ // Smart auto-scroll: scroll while playing, but stop if user scrolls up
2821
+ (function() {{
2822
+ let autoScrollEnabled = true;
2823
+ let lastScrollTop = 0;
2824
+
2825
+ // Find the events list element
2826
+ const eventsList = document.querySelector('.events-list');
2827
+ if (!eventsList) return;
2828
+
2829
+ // Detect user scroll - disable auto-scroll if scrolling up
2830
+ eventsList.addEventListener('scroll', function() {{
2831
+ const currentScrollTop = eventsList.scrollTop;
2832
+
2833
+ // If user scrolled up, disable auto-scroll
2834
+ if (currentScrollTop < lastScrollTop - 10) {{
2835
+ autoScrollEnabled = false;
2836
+ }}
2837
+
2838
+ // If user scrolled to bottom (within 50px), re-enable auto-scroll
2839
+ const isAtBottom = eventsList.scrollHeight - eventsList.scrollTop - eventsList.clientHeight < 50;
2840
+ if (isAtBottom) {{
2841
+ autoScrollEnabled = true;
2842
+ }}
2843
+
2844
+ lastScrollTop = currentScrollTop;
2845
+ }});
2846
+
2847
+ // Override scrollIntoView behavior for event items
2848
+ const originalScrollIntoView = Element.prototype.scrollIntoView;
2849
+ Element.prototype.scrollIntoView = function(options) {{
2850
+ // Only block scroll for event items when auto-scroll is disabled
2851
+ if (!autoScrollEnabled && this.classList && this.classList.contains('event-item')) {{
2852
+ return; // Skip scrollIntoView when user has scrolled up
2853
+ }}
2854
+ return originalScrollIntoView.call(this, options);
2855
+ }};
2856
+
2857
+ // Add scroll lock indicator
2858
+ const indicator = document.createElement('div');
2859
+ indicator.id = 'scroll-lock-indicator';
2860
+ indicator.style.cssText = 'position:fixed;bottom:20px;right:20px;padding:8px 12px;background:var(--bg-tertiary);border:1px solid var(--border-color);border-radius:4px;font-size:0.75rem;color:var(--text-muted);opacity:0;transition:opacity 0.3s;pointer-events:none;z-index:1000;';
2861
+ indicator.textContent = '⏸ Auto-scroll paused (scroll to bottom to resume)';
2862
+ document.body.appendChild(indicator);
2863
+
2864
+ // Show/hide indicator based on scroll state
2865
+ setInterval(() => {{
2866
+ indicator.style.opacity = autoScrollEnabled ? '0' : '1';
2867
+ }}, 200);
2868
+ }})();
2869
+ </script>
2870
+ '''
2871
+
2872
+ # Insert checkpoint script before </body>
2873
+ html = html.replace('</body>', checkpoint_script + '</body>')
2874
+
2875
+ # 4. Disable the old discoverDashboards that creates wrong nav
2876
+ html = html.replace(
2877
+ 'discoverDashboards();',
2878
+ '// discoverDashboards disabled - using unified viewer nav'
2879
+ )
2880
+
2881
+ # Write output
2882
+ output_path.write_text(html, encoding='utf-8')
2883
+ print(f"Generated unified viewer from {base_html_file.name}: {output_path}")
2884
+
2885
+
2886
+ def _add_static_nav_to_comparison(
2887
+ comparison_path: Path,
2888
+ output_dir: Path,
2889
+ nav_links: list[tuple[str, str]] | None = None,
2890
+ ) -> None:
2891
+ """Add or update static navigation in a comparison HTML file.
2892
+
2893
+ Also moves the Action Comparison panel to main-content (above screenshot) if needed.
2894
+
2895
+ Args:
2896
+ comparison_path: Path to the comparison HTML file
2897
+ output_dir: Directory containing all dashboard files
2898
+ nav_links: Pre-built list of (filename, label) tuples for consistency
2899
+ """
2900
+ import re
2901
+
2902
+ html = comparison_path.read_text()
2903
+
2904
+ # Move comparison panel to be a full-width sibling BEFORE main-content (not inside it)
2905
+ if '<div class="comparison-panel"' in html:
2906
+ # Check if panel is NOT already right before main-content
2907
+ if '<div class="comparison-panel"' in html and 'class="comparison-panel"' in html:
2908
+ # Check if it's in the wrong place (inside sidebar or main-content)
2909
+ in_sidebar = '<div class="sidebar">' in html and html.index('<div class="comparison-panel"') > html.index('<div class="sidebar">')
2910
+ in_main = '<div class="main-content">' in html and '<div class="main-content">\n' in html and '<div class="main-content">\n <div class="comparison-panel"' in html
2911
+
2912
+ if in_sidebar or in_main:
2913
+ # Extract comparison panel from wherever it is
2914
+ panel_match = re.search(
2915
+ r'(\s*<div class="comparison-panel"[^>]*>.*?</div>\s*</div>\s*</div>)',
2916
+ html,
2917
+ re.DOTALL
2918
+ )
2919
+ if panel_match:
2920
+ panel_html = panel_match.group(1)
2921
+ # Remove from current location
2922
+ html = html.replace(panel_html, '')
2923
+ # Insert as sibling BEFORE main-content
2924
+ html = html.replace(
2925
+ '<div class="main-content">',
2926
+ panel_html.strip() + '\n <div class="main-content">'
2927
+ )
2928
+ print(f" Moved Action Comparison above screenshot in {comparison_path.name}")
2929
+
2930
+ # Build nav links if not provided
2931
+ if nav_links is None:
2932
+ nav_links = _build_nav_links()
2933
+
2934
+ # Build nav HTML with active state for current file
2935
+ # NOTE: No "Dashboards:" label to match training dashboard nav
2936
+ current_file = comparison_path.name
2937
+ nav_html = '''
2938
+ <nav class="nav-bar" style="display:flex;gap:8px;padding:12px 16px;background:#12121a;border:1px solid rgba(255,255,255,0.06);border-radius:8px;margin-bottom:16px;flex-wrap:wrap;">
2939
+ '''
2940
+ for filename, label in nav_links:
2941
+ is_active = filename == current_file
2942
+ active_style = "background:#00d4aa;color:#0a0a0f;border-color:#00d4aa;font-weight:600;" if is_active else ""
2943
+ nav_html += f' <a href="{filename}" style="padding:8px 16px;border-radius:6px;font-size:0.8rem;text-decoration:none;color:#888;background:#1a1a24;border:1px solid rgba(255,255,255,0.06);{active_style}">{label}</a>\n'
2944
+ nav_html += ' </nav>\n'
2945
+
2946
+ # ALWAYS replace existing nav or add new one (for consistency)
2947
+ if '<nav class="nav-bar"' in html:
2948
+ # Replace existing nav
2949
+ html = re.sub(
2950
+ r'<nav class="nav-bar"[^>]*>.*?</nav>\s*',
2951
+ nav_html,
2952
+ html,
2953
+ flags=re.DOTALL
2954
+ )
2955
+ print(f" Updated navigation in {comparison_path.name}")
2956
+ elif '<div class="container">' in html:
2957
+ # Insert nav BEFORE the container, not inside it
2958
+ # This ensures the unified header is not affected by container padding
2959
+ html = html.replace(
2960
+ '<div class="container">',
2961
+ nav_html + '\n <div class="container">'
2962
+ )
2963
+ print(f" Added navigation to {comparison_path.name}")
2964
+ elif '<body>' in html:
2965
+ html = html.replace('<body>', '<body>\n' + nav_html)
2966
+ print(f" Added navigation to {comparison_path.name}")
2967
+
2968
+ comparison_path.write_text(html)
2969
+
2970
+