openadapt-ml 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. openadapt_ml/benchmarks/__init__.py +8 -0
  2. openadapt_ml/benchmarks/agent.py +90 -11
  3. openadapt_ml/benchmarks/azure.py +35 -6
  4. openadapt_ml/benchmarks/cli.py +4449 -201
  5. openadapt_ml/benchmarks/live_tracker.py +180 -0
  6. openadapt_ml/benchmarks/runner.py +41 -4
  7. openadapt_ml/benchmarks/viewer.py +1219 -0
  8. openadapt_ml/benchmarks/vm_monitor.py +610 -0
  9. openadapt_ml/benchmarks/waa.py +61 -4
  10. openadapt_ml/benchmarks/waa_deploy/Dockerfile +222 -0
  11. openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
  12. openadapt_ml/benchmarks/waa_deploy/api_agent.py +539 -0
  13. openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
  14. openadapt_ml/benchmarks/waa_live.py +619 -0
  15. openadapt_ml/cloud/local.py +1555 -1
  16. openadapt_ml/cloud/ssh_tunnel.py +553 -0
  17. openadapt_ml/datasets/next_action.py +87 -68
  18. openadapt_ml/evals/grounding.py +26 -8
  19. openadapt_ml/evals/trajectory_matching.py +84 -36
  20. openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
  21. openadapt_ml/experiments/demo_prompt/format_demo.py +226 -0
  22. openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
  23. openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
  24. openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
  25. openadapt_ml/experiments/demo_prompt/run_experiment.py +531 -0
  26. openadapt_ml/experiments/waa_demo/__init__.py +10 -0
  27. openadapt_ml/experiments/waa_demo/demos.py +357 -0
  28. openadapt_ml/experiments/waa_demo/runner.py +717 -0
  29. openadapt_ml/experiments/waa_demo/tasks.py +151 -0
  30. openadapt_ml/export/__init__.py +9 -0
  31. openadapt_ml/export/__main__.py +6 -0
  32. openadapt_ml/export/cli.py +89 -0
  33. openadapt_ml/export/parquet.py +265 -0
  34. openadapt_ml/ingest/__init__.py +3 -4
  35. openadapt_ml/ingest/capture.py +89 -81
  36. openadapt_ml/ingest/loader.py +116 -68
  37. openadapt_ml/ingest/synthetic.py +221 -159
  38. openadapt_ml/retrieval/README.md +226 -0
  39. openadapt_ml/retrieval/USAGE.md +391 -0
  40. openadapt_ml/retrieval/__init__.py +91 -0
  41. openadapt_ml/retrieval/demo_retriever.py +817 -0
  42. openadapt_ml/retrieval/embeddings.py +629 -0
  43. openadapt_ml/retrieval/index.py +194 -0
  44. openadapt_ml/retrieval/retriever.py +160 -0
  45. openadapt_ml/runtime/policy.py +10 -10
  46. openadapt_ml/schema/__init__.py +104 -0
  47. openadapt_ml/schema/converters.py +541 -0
  48. openadapt_ml/schema/episode.py +457 -0
  49. openadapt_ml/scripts/compare.py +26 -16
  50. openadapt_ml/scripts/eval_policy.py +4 -5
  51. openadapt_ml/scripts/prepare_synthetic.py +14 -17
  52. openadapt_ml/scripts/train.py +81 -70
  53. openadapt_ml/training/benchmark_viewer.py +3225 -0
  54. openadapt_ml/training/trainer.py +120 -363
  55. openadapt_ml/training/trl_trainer.py +354 -0
  56. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/METADATA +102 -60
  57. openadapt_ml-0.2.0.dist-info/RECORD +86 -0
  58. openadapt_ml/schemas/__init__.py +0 -53
  59. openadapt_ml/schemas/sessions.py +0 -122
  60. openadapt_ml/schemas/validation.py +0 -252
  61. openadapt_ml-0.1.0.dist-info/RECORD +0 -55
  62. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/WHEEL +0 -0
  63. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1219 @@
1
+ """Benchmark viewer HTML generation.
2
+
3
+ This module generates a standalone HTML viewer for benchmark results,
4
+ showing task list with pass/fail status, step-by-step replay of
5
+ benchmark executions, screenshots, actions, and reasoning at each step.
6
+
7
+ Usage:
8
+ from openadapt_ml.benchmarks.viewer import generate_benchmark_viewer
9
+
10
+ # Generate viewer from benchmark results directory
11
+ generate_benchmark_viewer(
12
+ benchmark_dir=Path("benchmark_results/waa_eval_20241214"),
13
+ output_path=Path("benchmark_results/waa_eval_20241214/benchmark.html"),
14
+ )
15
+
16
+ Directory structure expected:
17
+ benchmark_results/{run_name}/
18
+ |-- metadata.json # Benchmark config, models evaluated
19
+ |-- summary.json # Aggregate results
20
+ |-- tasks/
21
+ | |-- task_001/
22
+ | | |-- task.json # Task definition
23
+ | | |-- execution.json # Execution trace with steps
24
+ | | |-- screenshots/ # Step screenshots
25
+ | | |-- step_000.png
26
+ | | |-- step_001.png
27
+ | | |-- ...
28
+ | |-- task_002/
29
+ | | |-- ...
30
+ """
31
+
32
+ from __future__ import annotations
33
+
34
+ import base64
35
+ import json
36
+ import logging
37
+ from pathlib import Path
38
+ from typing import Any
39
+
40
+ from openadapt_ml.training.shared_ui import (
41
+ get_shared_header_css as _get_shared_header_css,
42
+ generate_shared_header_html as _generate_shared_header_html,
43
+ )
44
+
45
+ logger = logging.getLogger(__name__)
46
+
47
+
48
+ def load_benchmark_metadata(benchmark_dir: Path) -> dict[str, Any]:
49
+ """Load benchmark metadata from metadata.json.
50
+
51
+ Args:
52
+ benchmark_dir: Path to benchmark run directory.
53
+
54
+ Returns:
55
+ Metadata dictionary with benchmark_name, run_name, model_id, etc.
56
+ """
57
+ metadata_path = benchmark_dir / "metadata.json"
58
+ if metadata_path.exists():
59
+ with open(metadata_path) as f:
60
+ return json.load(f)
61
+ return {
62
+ "benchmark_name": "unknown",
63
+ "run_name": benchmark_dir.name,
64
+ "model_id": "unknown",
65
+ "created_at": None,
66
+ }
67
+
68
+
69
+ def load_benchmark_summary(benchmark_dir: Path) -> dict[str, Any]:
70
+ """Load benchmark summary from summary.json.
71
+
72
+ Args:
73
+ benchmark_dir: Path to benchmark run directory.
74
+
75
+ Returns:
76
+ Summary dictionary with success_rate, num_tasks, etc.
77
+ """
78
+ summary_path = benchmark_dir / "summary.json"
79
+ if summary_path.exists():
80
+ with open(summary_path) as f:
81
+ return json.load(f)
82
+ return {
83
+ "num_tasks": 0,
84
+ "num_success": 0,
85
+ "success_rate": 0.0,
86
+ "avg_score": 0.0,
87
+ "avg_steps": 0.0,
88
+ "tasks": [],
89
+ }
90
+
91
+
92
+ def load_task_results(benchmark_dir: Path) -> list[dict[str, Any]]:
93
+ """Load all task results from benchmark run.
94
+
95
+ Args:
96
+ benchmark_dir: Path to benchmark run directory.
97
+
98
+ Returns:
99
+ List of task dictionaries with task definition, execution trace,
100
+ and screenshot paths.
101
+ """
102
+ tasks_dir = benchmark_dir / "tasks"
103
+ if not tasks_dir.exists():
104
+ return []
105
+
106
+ results = []
107
+ for task_dir in sorted(tasks_dir.iterdir()):
108
+ if not task_dir.is_dir():
109
+ continue
110
+
111
+ task_data: dict[str, Any] = {
112
+ "task_dir": str(task_dir),
113
+ "task_id": task_dir.name,
114
+ }
115
+
116
+ # Load task definition
117
+ task_json = task_dir / "task.json"
118
+ if task_json.exists():
119
+ with open(task_json) as f:
120
+ task_data["definition"] = json.load(f)
121
+ else:
122
+ task_data["definition"] = {"task_id": task_dir.name, "instruction": ""}
123
+
124
+ # Load execution trace
125
+ execution_json = task_dir / "execution.json"
126
+ if execution_json.exists():
127
+ with open(execution_json) as f:
128
+ task_data["execution"] = json.load(f)
129
+ else:
130
+ task_data["execution"] = {"steps": [], "success": False, "num_steps": 0}
131
+
132
+ # Load screenshot paths
133
+ screenshots_dir = task_dir / "screenshots"
134
+ if screenshots_dir.exists():
135
+ screenshot_paths = sorted(screenshots_dir.glob("*.png"))
136
+ task_data["screenshots"] = [str(p.relative_to(benchmark_dir)) for p in screenshot_paths]
137
+ else:
138
+ task_data["screenshots"] = []
139
+
140
+ results.append(task_data)
141
+
142
+ return results
143
+
144
+
145
+ def _encode_image_to_base64(image_path: Path) -> str | None:
146
+ """Encode image to base64 data URL for embedding in HTML.
147
+
148
+ Args:
149
+ image_path: Path to PNG image.
150
+
151
+ Returns:
152
+ Data URL string or None if image cannot be loaded.
153
+ """
154
+ try:
155
+ if image_path.exists():
156
+ with open(image_path, "rb") as f:
157
+ data = f.read()
158
+ return f"data:image/png;base64,{base64.b64encode(data).decode()}"
159
+ except Exception as e:
160
+ logger.warning(f"Failed to encode image {image_path}: {e}")
161
+ return None
162
+
163
+
164
+ def _get_domain_stats(tasks: list[dict[str, Any]]) -> dict[str, dict[str, int]]:
165
+ """Calculate per-domain statistics.
166
+
167
+ Args:
168
+ tasks: List of task result dictionaries.
169
+
170
+ Returns:
171
+ Dictionary mapping domain name to {total, success, fail} counts.
172
+ """
173
+ domain_stats: dict[str, dict[str, int]] = {}
174
+
175
+ for task in tasks:
176
+ domain = task.get("definition", {}).get("domain", "unknown")
177
+ success = task.get("execution", {}).get("success", False)
178
+
179
+ if domain not in domain_stats:
180
+ domain_stats[domain] = {"total": 0, "success": 0, "fail": 0}
181
+
182
+ domain_stats[domain]["total"] += 1
183
+ if success:
184
+ domain_stats[domain]["success"] += 1
185
+ else:
186
+ domain_stats[domain]["fail"] += 1
187
+
188
+ return domain_stats
189
+
190
+
191
+ def generate_benchmark_viewer(
192
+ benchmark_dir: Path,
193
+ output_path: Path | None = None,
194
+ embed_screenshots: bool = False,
195
+ ) -> Path:
196
+ """Generate HTML viewer for benchmark results.
197
+
198
+ Args:
199
+ benchmark_dir: Path to benchmark run directory containing metadata.json,
200
+ summary.json, and tasks/ subdirectory.
201
+ output_path: Path for output HTML file. Defaults to benchmark_dir/benchmark.html.
202
+ embed_screenshots: If True, embed screenshots as base64 data URLs.
203
+ This creates a larger but fully standalone HTML file.
204
+
205
+ Returns:
206
+ Path to generated HTML file.
207
+ """
208
+ benchmark_dir = Path(benchmark_dir)
209
+ if output_path is None:
210
+ output_path = benchmark_dir / "benchmark.html"
211
+
212
+ # Load all data
213
+ metadata = load_benchmark_metadata(benchmark_dir)
214
+ summary = load_benchmark_summary(benchmark_dir)
215
+ tasks = load_task_results(benchmark_dir)
216
+
217
+ # Calculate domain statistics
218
+ domain_stats = _get_domain_stats(tasks)
219
+
220
+ # Generate HTML
221
+ html = _generate_benchmark_viewer_html(
222
+ metadata=metadata,
223
+ summary=summary,
224
+ tasks=tasks,
225
+ domain_stats=domain_stats,
226
+ benchmark_dir=benchmark_dir,
227
+ embed_screenshots=embed_screenshots,
228
+ )
229
+
230
+ # Write output
231
+ output_path = Path(output_path)
232
+ output_path.parent.mkdir(parents=True, exist_ok=True)
233
+ output_path.write_text(html)
234
+
235
+ logger.info(f"Generated benchmark viewer: {output_path}")
236
+ return output_path
237
+
238
+
239
+ def _generate_benchmark_viewer_html(
240
+ metadata: dict[str, Any],
241
+ summary: dict[str, Any],
242
+ tasks: list[dict[str, Any]],
243
+ domain_stats: dict[str, dict[str, int]],
244
+ benchmark_dir: Path,
245
+ embed_screenshots: bool = False,
246
+ ) -> str:
247
+ """Generate the HTML content for benchmark viewer.
248
+
249
+ Args:
250
+ metadata: Benchmark metadata.
251
+ summary: Summary statistics.
252
+ tasks: List of task result dictionaries.
253
+ domain_stats: Per-domain statistics.
254
+ benchmark_dir: Base directory for resolving relative paths.
255
+ embed_screenshots: If True, embed screenshots as base64.
256
+
257
+ Returns:
258
+ HTML string.
259
+ """
260
+ # Get shared header components
261
+ shared_header_css = _get_shared_header_css()
262
+ shared_header_html = _generate_shared_header_html("benchmarks")
263
+
264
+ # Serialize data for JavaScript
265
+ metadata_json = json.dumps(metadata)
266
+ summary_json = json.dumps(summary)
267
+ domain_stats_json = json.dumps(domain_stats)
268
+
269
+ # Process tasks for JavaScript - include execution steps and screenshot paths
270
+ tasks_for_js = []
271
+ for task in tasks:
272
+ task_js = {
273
+ "task_id": task.get("task_id"),
274
+ "definition": task.get("definition", {}),
275
+ "execution": task.get("execution", {}),
276
+ "screenshots": task.get("screenshots", []),
277
+ }
278
+
279
+ # Optionally embed screenshots as base64
280
+ if embed_screenshots:
281
+ embedded_screenshots = []
282
+ for screenshot_rel_path in task.get("screenshots", []):
283
+ screenshot_path = benchmark_dir / screenshot_rel_path
284
+ data_url = _encode_image_to_base64(screenshot_path)
285
+ embedded_screenshots.append(data_url or "")
286
+ task_js["embedded_screenshots"] = embedded_screenshots
287
+
288
+ tasks_for_js.append(task_js)
289
+
290
+ tasks_json = json.dumps(tasks_for_js)
291
+
292
+ # Calculate aggregate metrics
293
+ num_tasks = len(tasks)
294
+ num_success = sum(1 for t in tasks if t.get("execution", {}).get("success", False))
295
+ success_rate = (num_success / num_tasks * 100) if num_tasks > 0 else 0
296
+
297
+ html = f'''<!DOCTYPE html>
298
+ <html lang="en">
299
+ <head>
300
+ <meta charset="UTF-8">
301
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
302
+ <title>Benchmark Viewer - {metadata.get("run_name", "Unknown")}</title>
303
+ <style>
304
+ :root {{
305
+ --bg-primary: #0a0a0f;
306
+ --bg-secondary: #12121a;
307
+ --bg-tertiary: #1a1a24;
308
+ --border-color: rgba(255, 255, 255, 0.06);
309
+ --text-primary: #f0f0f0;
310
+ --text-secondary: #888;
311
+ --text-muted: #555;
312
+ --accent: #00d4aa;
313
+ --accent-dim: rgba(0, 212, 170, 0.15);
314
+ --success: #34d399;
315
+ --error: #ff5f5f;
316
+ --warning: #f59e0b;
317
+ }}
318
+ * {{ box-sizing: border-box; margin: 0; padding: 0; }}
319
+ body {{
320
+ font-family: "SF Pro Display", -apple-system, BlinkMacSystemFont, "Inter", sans-serif;
321
+ background: var(--bg-primary);
322
+ color: var(--text-primary);
323
+ min-height: 100vh;
324
+ line-height: 1.5;
325
+ }}
326
+ .container {{
327
+ max-width: 1600px;
328
+ margin: 0 auto;
329
+ padding: 24px;
330
+ }}
331
+ {shared_header_css}
332
+
333
+ /* Summary Panel */
334
+ .summary-panel {{
335
+ background: var(--bg-secondary);
336
+ border: 1px solid var(--border-color);
337
+ border-radius: 12px;
338
+ padding: 20px;
339
+ margin-bottom: 24px;
340
+ }}
341
+ .summary-header {{
342
+ display: flex;
343
+ justify-content: space-between;
344
+ align-items: center;
345
+ margin-bottom: 16px;
346
+ }}
347
+ .summary-header h2 {{
348
+ font-size: 1rem;
349
+ font-weight: 600;
350
+ }}
351
+ .summary-meta {{
352
+ font-size: 0.75rem;
353
+ color: var(--text-secondary);
354
+ font-family: "SF Mono", Monaco, monospace;
355
+ }}
356
+ .summary-stats {{
357
+ display: grid;
358
+ grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));
359
+ gap: 16px;
360
+ margin-bottom: 16px;
361
+ }}
362
+ .stat-card {{
363
+ background: var(--bg-tertiary);
364
+ border-radius: 8px;
365
+ padding: 16px;
366
+ }}
367
+ .stat-card .stat-value {{
368
+ font-size: 1.8rem;
369
+ font-weight: 600;
370
+ font-family: "SF Mono", Monaco, monospace;
371
+ }}
372
+ .stat-card .stat-value.success {{ color: var(--success); }}
373
+ .stat-card .stat-value.error {{ color: var(--error); }}
374
+ .stat-card .stat-label {{
375
+ font-size: 0.7rem;
376
+ color: var(--text-muted);
377
+ text-transform: uppercase;
378
+ letter-spacing: 0.05em;
379
+ margin-top: 4px;
380
+ }}
381
+
382
+ /* Domain breakdown */
383
+ .domain-breakdown {{
384
+ display: flex;
385
+ flex-wrap: wrap;
386
+ gap: 8px;
387
+ }}
388
+ .domain-tag {{
389
+ display: inline-flex;
390
+ align-items: center;
391
+ gap: 6px;
392
+ padding: 6px 12px;
393
+ background: var(--bg-tertiary);
394
+ border-radius: 6px;
395
+ font-size: 0.75rem;
396
+ }}
397
+ .domain-tag .domain-name {{
398
+ color: var(--text-primary);
399
+ }}
400
+ .domain-tag .domain-stats {{
401
+ font-family: "SF Mono", Monaco, monospace;
402
+ color: var(--text-secondary);
403
+ }}
404
+
405
+ /* Filters */
406
+ .filter-bar {{
407
+ display: flex;
408
+ gap: 16px;
409
+ padding: 12px 16px;
410
+ background: var(--bg-secondary);
411
+ border: 1px solid var(--border-color);
412
+ border-radius: 8px;
413
+ margin-bottom: 16px;
414
+ flex-wrap: wrap;
415
+ align-items: center;
416
+ }}
417
+ .filter-group {{
418
+ display: flex;
419
+ align-items: center;
420
+ gap: 8px;
421
+ }}
422
+ .filter-label {{
423
+ font-size: 0.7rem;
424
+ color: var(--text-muted);
425
+ text-transform: uppercase;
426
+ letter-spacing: 0.05em;
427
+ }}
428
+ .filter-select {{
429
+ padding: 8px 32px 8px 12px;
430
+ border-radius: 8px;
431
+ font-size: 0.85rem;
432
+ background: var(--bg-tertiary);
433
+ color: var(--text-primary);
434
+ border: 1px solid var(--border-color);
435
+ cursor: pointer;
436
+ appearance: none;
437
+ background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 12 12'%3E%3Cpath fill='%23888' d='M3 4.5L6 7.5L9 4.5'/%3E%3C/svg%3E");
438
+ background-repeat: no-repeat;
439
+ background-position: right 10px center;
440
+ transition: all 0.2s;
441
+ }}
442
+ .filter-select:hover {{ border-color: var(--accent); }}
443
+ .filter-count {{
444
+ font-size: 0.8rem;
445
+ color: var(--text-secondary);
446
+ margin-left: auto;
447
+ }}
448
+
449
+ /* Main Content Layout */
450
+ .main-content {{
451
+ display: grid;
452
+ grid-template-columns: 350px 1fr;
453
+ gap: 24px;
454
+ }}
455
+ @media (max-width: 1200px) {{
456
+ .main-content {{ grid-template-columns: 1fr; }}
457
+ }}
458
+
459
+ /* Task List */
460
+ .task-list {{
461
+ background: var(--bg-secondary);
462
+ border: 1px solid var(--border-color);
463
+ border-radius: 12px;
464
+ max-height: calc(100vh - 300px);
465
+ overflow-y: auto;
466
+ }}
467
+ .task-list-header {{
468
+ display: flex;
469
+ justify-content: space-between;
470
+ align-items: center;
471
+ padding: 14px 16px;
472
+ border-bottom: 1px solid var(--border-color);
473
+ position: sticky;
474
+ top: 0;
475
+ background: var(--bg-secondary);
476
+ z-index: 10;
477
+ }}
478
+ .task-list-header h3 {{
479
+ font-size: 0.9rem;
480
+ font-weight: 600;
481
+ }}
482
+ .task-item {{
483
+ padding: 12px 16px;
484
+ border-bottom: 1px solid var(--border-color);
485
+ cursor: pointer;
486
+ transition: background 0.2s;
487
+ }}
488
+ .task-item:hover {{ background: var(--bg-tertiary); }}
489
+ .task-item.active {{
490
+ background: var(--accent-dim);
491
+ border-left: 3px solid var(--accent);
492
+ }}
493
+ .task-item.hidden {{ display: none; }}
494
+ .task-item .task-header {{
495
+ display: flex;
496
+ justify-content: space-between;
497
+ align-items: center;
498
+ margin-bottom: 4px;
499
+ }}
500
+ .task-item .task-id {{
501
+ font-family: "SF Mono", Monaco, monospace;
502
+ font-size: 0.8rem;
503
+ font-weight: 600;
504
+ }}
505
+ .task-item .task-status {{
506
+ font-size: 0.7rem;
507
+ font-weight: 600;
508
+ padding: 2px 8px;
509
+ border-radius: 4px;
510
+ }}
511
+ .task-item .task-status.success {{
512
+ background: rgba(52, 211, 153, 0.2);
513
+ color: var(--success);
514
+ }}
515
+ .task-item .task-status.fail {{
516
+ background: rgba(255, 95, 95, 0.2);
517
+ color: var(--error);
518
+ }}
519
+ .task-item .task-info {{
520
+ font-size: 0.75rem;
521
+ color: var(--text-secondary);
522
+ }}
523
+ .task-item .task-domain {{
524
+ color: var(--accent);
525
+ }}
526
+
527
+ /* Task Detail Panel */
528
+ .task-detail {{
529
+ background: var(--bg-secondary);
530
+ border: 1px solid var(--border-color);
531
+ border-radius: 12px;
532
+ overflow: hidden;
533
+ }}
534
+ .task-detail-header {{
535
+ padding: 16px 20px;
536
+ border-bottom: 1px solid var(--border-color);
537
+ }}
538
+ .task-detail-header h2 {{
539
+ font-size: 1rem;
540
+ font-weight: 600;
541
+ margin-bottom: 8px;
542
+ }}
543
+ .task-detail-meta {{
544
+ font-size: 0.8rem;
545
+ color: var(--text-secondary);
546
+ line-height: 1.6;
547
+ }}
548
+ .task-detail-instruction {{
549
+ font-style: italic;
550
+ color: var(--text-primary);
551
+ margin-top: 8px;
552
+ padding: 10px;
553
+ background: var(--bg-tertiary);
554
+ border-radius: 6px;
555
+ font-size: 0.85rem;
556
+ }}
557
+
558
+ /* Step Viewer */
559
+ .step-viewer {{
560
+ display: grid;
561
+ grid-template-columns: 1fr 300px;
562
+ gap: 16px;
563
+ padding: 16px;
564
+ }}
565
+ @media (max-width: 900px) {{
566
+ .step-viewer {{ grid-template-columns: 1fr; }}
567
+ }}
568
+ .screenshot-container {{
569
+ position: relative;
570
+ background: #000;
571
+ border-radius: 8px;
572
+ overflow: hidden;
573
+ min-height: 400px;
574
+ display: flex;
575
+ align-items: center;
576
+ justify-content: center;
577
+ }}
578
+ .screenshot-container img {{
579
+ max-width: 100%;
580
+ max-height: 70vh;
581
+ object-fit: contain;
582
+ }}
583
+ .screenshot-placeholder {{
584
+ color: var(--text-muted);
585
+ font-size: 0.9rem;
586
+ }}
587
+ .click-marker {{
588
+ position: absolute;
589
+ width: 24px;
590
+ height: 24px;
591
+ border-radius: 50%;
592
+ transform: translate(-50%, -50%);
593
+ display: flex;
594
+ align-items: center;
595
+ justify-content: center;
596
+ font-size: 10px;
597
+ font-weight: bold;
598
+ pointer-events: none;
599
+ z-index: 100;
600
+ background: rgba(167, 139, 250, 0.4);
601
+ border: 2px solid #a78bfa;
602
+ color: #a78bfa;
603
+ }}
604
+
605
+ /* Step Controls */
606
+ .step-sidebar {{
607
+ display: flex;
608
+ flex-direction: column;
609
+ gap: 16px;
610
+ }}
611
+ .step-controls {{
612
+ display: flex;
613
+ gap: 8px;
614
+ flex-wrap: wrap;
615
+ align-items: center;
616
+ }}
617
+ .step-btn {{
618
+ padding: 8px 12px;
619
+ border: 1px solid var(--border-color);
620
+ background: var(--bg-tertiary);
621
+ color: var(--text-primary);
622
+ border-radius: 6px;
623
+ cursor: pointer;
624
+ font-size: 0.85rem;
625
+ min-width: 40px;
626
+ text-align: center;
627
+ transition: all 0.2s;
628
+ }}
629
+ .step-btn:hover {{ border-color: var(--accent); }}
630
+ .step-btn.primary {{ flex: 1; min-width: 60px; }}
631
+ .step-btn.active {{
632
+ background: var(--accent);
633
+ color: var(--bg-primary);
634
+ border-color: var(--accent);
635
+ }}
636
+ .step-progress {{
637
+ font-size: 0.8rem;
638
+ color: var(--text-secondary);
639
+ font-family: "SF Mono", Monaco, monospace;
640
+ }}
641
+
642
+ /* Step List */
643
+ .step-list {{
644
+ background: var(--bg-tertiary);
645
+ border-radius: 8px;
646
+ max-height: 300px;
647
+ overflow-y: auto;
648
+ }}
649
+ .step-list-item {{
650
+ padding: 10px 12px;
651
+ border-bottom: 1px solid var(--border-color);
652
+ cursor: pointer;
653
+ transition: background 0.2s;
654
+ font-size: 0.8rem;
655
+ }}
656
+ .step-list-item:hover {{ background: var(--bg-secondary); }}
657
+ .step-list-item.active {{
658
+ background: var(--accent-dim);
659
+ border-left: 2px solid var(--accent);
660
+ }}
661
+ .step-list-item .step-num {{
662
+ font-weight: 600;
663
+ color: var(--accent);
664
+ margin-right: 8px;
665
+ }}
666
+ .step-list-item .step-action {{
667
+ color: var(--text-secondary);
668
+ }}
669
+
670
+ /* Action Detail */
671
+ .action-detail {{
672
+ background: var(--bg-tertiary);
673
+ border-radius: 8px;
674
+ padding: 12px;
675
+ }}
676
+ .action-detail h4 {{
677
+ font-size: 0.8rem;
678
+ color: var(--text-muted);
679
+ text-transform: uppercase;
680
+ letter-spacing: 0.05em;
681
+ margin-bottom: 8px;
682
+ }}
683
+ .action-content {{
684
+ font-family: "SF Mono", Monaco, monospace;
685
+ font-size: 0.8rem;
686
+ color: var(--text-primary);
687
+ word-break: break-word;
688
+ }}
689
+ .reasoning-box {{
690
+ margin-top: 12px;
691
+ padding: 10px;
692
+ background: var(--bg-secondary);
693
+ border-radius: 6px;
694
+ font-size: 0.8rem;
695
+ color: var(--text-secondary);
696
+ line-height: 1.6;
697
+ max-height: 200px;
698
+ overflow-y: auto;
699
+ }}
700
+ .reasoning-box h4 {{
701
+ margin-bottom: 8px;
702
+ }}
703
+
704
+ /* Speed Control */
705
+ .speed-control {{
706
+ display: flex;
707
+ align-items: center;
708
+ gap: 6px;
709
+ margin-left: auto;
710
+ }}
711
+ .speed-control label {{
712
+ font-size: 0.7rem;
713
+ color: var(--text-muted);
714
+ text-transform: uppercase;
715
+ }}
716
+ .speed-control select {{
717
+ padding: 4px 8px;
718
+ border-radius: 4px;
719
+ background: var(--bg-tertiary);
720
+ color: var(--text-primary);
721
+ border: 1px solid var(--border-color);
722
+ font-size: 0.8rem;
723
+ cursor: pointer;
724
+ }}
725
+
726
+ /* Progress Bar */
727
+ .progress-bar {{
728
+ width: 100%;
729
+ height: 4px;
730
+ background: var(--bg-tertiary);
731
+ border-radius: 2px;
732
+ margin-top: 8px;
733
+ overflow: hidden;
734
+ cursor: pointer;
735
+ }}
736
+ .progress-bar .progress {{
737
+ height: 100%;
738
+ background: var(--accent);
739
+ transition: width 0.1s ease;
740
+ }}
741
+
742
+ /* No task selected state */
743
+ .no-task-selected {{
744
+ display: flex;
745
+ flex-direction: column;
746
+ align-items: center;
747
+ justify-content: center;
748
+ min-height: 400px;
749
+ color: var(--text-muted);
750
+ }}
751
+ .no-task-selected .icon {{
752
+ font-size: 3rem;
753
+ margin-bottom: 16px;
754
+ }}
755
+ .no-task-selected p {{
756
+ font-size: 0.9rem;
757
+ }}
758
+ </style>
759
+ </head>
760
+ <body>
761
+ {shared_header_html}
762
+
763
+ <div class="container">
764
+ <!-- Summary Panel -->
765
+ <div class="summary-panel">
766
+ <div class="summary-header">
767
+ <h2>Benchmark Results: {metadata.get("run_name", "Unknown")}</h2>
768
+ <div class="summary-meta">
769
+ <span>Model: {metadata.get("model_id", "unknown")}</span>
770
+ <span> | </span>
771
+ <span>Created: {metadata.get("created_at", "N/A")}</span>
772
+ </div>
773
+ </div>
774
+ <div class="summary-stats">
775
+ <div class="stat-card">
776
+ <div class="stat-value">{num_tasks}</div>
777
+ <div class="stat-label">Total Tasks</div>
778
+ </div>
779
+ <div class="stat-card">
780
+ <div class="stat-value success">{num_success}</div>
781
+ <div class="stat-label">Passed</div>
782
+ </div>
783
+ <div class="stat-card">
784
+ <div class="stat-value error">{num_tasks - num_success}</div>
785
+ <div class="stat-label">Failed</div>
786
+ </div>
787
+ <div class="stat-card">
788
+ <div class="stat-value {'success' if success_rate >= 50 else 'error'}">{success_rate:.1f}%</div>
789
+ <div class="stat-label">Success Rate</div>
790
+ </div>
791
+ </div>
792
+ <div class="domain-breakdown" id="domain-breakdown"></div>
793
+ </div>
794
+
795
+ <!-- Filters -->
796
+ <div class="filter-bar">
797
+ <div class="filter-group">
798
+ <span class="filter-label">Domain:</span>
799
+ <select class="filter-select" id="domain-filter">
800
+ <option value="all">All Domains</option>
801
+ </select>
802
+ </div>
803
+ <div class="filter-group">
804
+ <span class="filter-label">Status:</span>
805
+ <select class="filter-select" id="status-filter">
806
+ <option value="all">All</option>
807
+ <option value="success">Passed</option>
808
+ <option value="fail">Failed</option>
809
+ </select>
810
+ </div>
811
+ <span class="filter-count" id="filter-count">{num_tasks} tasks</span>
812
+ </div>
813
+
814
+ <!-- Main Content -->
815
+ <div class="main-content">
816
+ <!-- Task List -->
817
+ <div class="task-list">
818
+ <div class="task-list-header">
819
+ <h3>Tasks</h3>
820
+ </div>
821
+ <div id="task-list-items"></div>
822
+ </div>
823
+
824
+ <!-- Task Detail Panel -->
825
+ <div class="task-detail" id="task-detail">
826
+ <div class="no-task-selected" id="no-task-selected">
827
+ <div class="icon">+</div>
828
+ <p>Select a task from the list to view details</p>
829
+ </div>
830
+ <div id="task-detail-content" style="display:none;"></div>
831
+ </div>
832
+ </div>
833
+ </div>
834
+
835
+ <script>
836
+ // Data from Python
837
+ const metadata = {metadata_json};
838
+ const summary = {summary_json};
839
+ const domainStats = {domain_stats_json};
840
+ const tasks = {tasks_json};
841
+ const embedScreenshots = {'true' if embed_screenshots else 'false'};
842
+
843
+ let currentTaskIndex = -1;
844
+ let currentStepIndex = 0;
845
+ let isPlaying = false;
846
+ let playInterval = null;
847
+ let playSpeed = 1000;
848
+
849
+ // Initialize page
850
+ function init() {{
851
+ renderDomainBreakdown();
852
+ populateDomainFilter();
853
+ renderTaskList();
854
+ setupFilters();
855
+ }}
856
+
857
+ function renderDomainBreakdown() {{
858
+ const container = document.getElementById('domain-breakdown');
859
+ let html = '';
860
+ for (const [domain, stats] of Object.entries(domainStats)) {{
861
+ const rate = stats.total > 0 ? (stats.success / stats.total * 100).toFixed(0) : 0;
862
+ html += `
863
+ <div class="domain-tag">
864
+ <span class="domain-name">${{domain}}</span>
865
+ <span class="domain-stats">${{stats.success}}/${{stats.total}} (${{rate}}%)</span>
866
+ </div>
867
+ `;
868
+ }}
869
+ container.innerHTML = html;
870
+ }}
871
+
872
+ function populateDomainFilter() {{
873
+ const select = document.getElementById('domain-filter');
874
+ for (const domain of Object.keys(domainStats).sort()) {{
875
+ const option = document.createElement('option');
876
+ option.value = domain;
877
+ option.textContent = domain;
878
+ select.appendChild(option);
879
+ }}
880
+ }}
881
+
882
+ function renderTaskList() {{
883
+ const container = document.getElementById('task-list-items');
884
+ let html = '';
885
+ tasks.forEach((task, idx) => {{
886
+ const def = task.definition || {{}};
887
+ const exec = task.execution || {{}};
888
+ const success = exec.success || false;
889
+ const domain = def.domain || 'unknown';
890
+ const numSteps = exec.num_steps || 0;
891
+
892
+ html += `
893
+ <div class="task-item" data-idx="${{idx}}" data-domain="${{domain}}" data-status="${{success ? 'success' : 'fail'}}" onclick="selectTask(${{idx}})">
894
+ <div class="task-header">
895
+ <span class="task-id">${{task.task_id}}</span>
896
+ <span class="task-status ${{success ? 'success' : 'fail'}}">${{success ? 'PASS' : 'FAIL'}}</span>
897
+ </div>
898
+ <div class="task-info">
899
+ <span class="task-domain">${{domain}}</span>
900
+ <span> | ${{numSteps}} steps</span>
901
+ </div>
902
+ </div>
903
+ `;
904
+ }});
905
+ container.innerHTML = html;
906
+ }}
907
+
908
+ function setupFilters() {{
909
+ document.getElementById('domain-filter').addEventListener('change', filterTasks);
910
+ document.getElementById('status-filter').addEventListener('change', filterTasks);
911
+ }}
912
+
913
+ function filterTasks() {{
914
+ const domainFilter = document.getElementById('domain-filter').value;
915
+ const statusFilter = document.getElementById('status-filter').value;
916
+
917
+ let visibleCount = 0;
918
+ document.querySelectorAll('.task-item').forEach(item => {{
919
+ const domain = item.dataset.domain;
920
+ const status = item.dataset.status;
921
+
922
+ const matchDomain = domainFilter === 'all' || domain === domainFilter;
923
+ const matchStatus = statusFilter === 'all' || status === statusFilter;
924
+
925
+ if (matchDomain && matchStatus) {{
926
+ item.classList.remove('hidden');
927
+ visibleCount++;
928
+ }} else {{
929
+ item.classList.add('hidden');
930
+ }}
931
+ }});
932
+
933
+ document.getElementById('filter-count').textContent = `${{visibleCount}} tasks`;
934
+ }}
935
+
936
+ function selectTask(idx) {{
937
+ currentTaskIndex = idx;
938
+ currentStepIndex = 0;
939
+
940
+ // Update active state in list
941
+ document.querySelectorAll('.task-item').forEach((item, i) => {{
942
+ item.classList.toggle('active', parseInt(item.dataset.idx) === idx);
943
+ }});
944
+
945
+ // Show task detail
946
+ document.getElementById('no-task-selected').style.display = 'none';
947
+ document.getElementById('task-detail-content').style.display = 'block';
948
+
949
+ renderTaskDetail();
950
+ }}
951
+
952
+ function renderTaskDetail() {{
953
+ if (currentTaskIndex < 0) return;
954
+
955
+ const task = tasks[currentTaskIndex];
956
+ const def = task.definition || {{}};
957
+ const exec = task.execution || {{}};
958
+ const steps = exec.steps || [];
959
+ const success = exec.success || false;
960
+
961
+ const container = document.getElementById('task-detail-content');
962
+ container.innerHTML = `
963
+ <div class="task-detail-header">
964
+ <h2>${{task.task_id}} - <span style="color: ${{success ? 'var(--success)' : 'var(--error)'}}">${{success ? 'PASSED' : 'FAILED'}}</span></h2>
965
+ <div class="task-detail-meta">
966
+ Domain: <strong>${{def.domain || 'unknown'}}</strong> |
967
+ Steps: <strong>${{exec.num_steps || steps.length}}</strong> |
968
+ Time: <strong>${{(exec.total_time_seconds || 0).toFixed(1)}}s</strong>
969
+ ${{exec.error ? `<br>Error: <span style="color:var(--error)">${{exec.error}}</span>` : ''}}
970
+ </div>
971
+ <div class="task-detail-instruction">
972
+ ${{def.instruction || 'No instruction available'}}
973
+ </div>
974
+ </div>
975
+ <div class="step-viewer">
976
+ <div class="screenshot-container" id="screenshot-container">
977
+ ${{steps.length > 0 ? '<img id="screenshot-img" src="" alt="Step screenshot">' : '<span class="screenshot-placeholder">No screenshots available</span>'}}
978
+ </div>
979
+ <div class="step-sidebar">
980
+ <div class="step-controls">
981
+ <button class="step-btn" onclick="prevStep()">Prev</button>
982
+ <button class="step-btn primary" id="play-btn" onclick="togglePlay()">Play</button>
983
+ <button class="step-btn" onclick="nextStep()">Next</button>
984
+ <span class="step-progress" id="step-progress">0 / ${{steps.length}}</span>
985
+ <div class="speed-control">
986
+ <label>Speed</label>
987
+ <select id="speed-select" onchange="changeSpeed(this.value)">
988
+ <option value="2000">0.5x</option>
989
+ <option value="1000" selected>1x</option>
990
+ <option value="500">2x</option>
991
+ <option value="250">4x</option>
992
+ </select>
993
+ </div>
994
+ </div>
995
+ <div class="progress-bar" onclick="seekStep(event)">
996
+ <div class="progress" id="step-progress-bar" style="width: 0%"></div>
997
+ </div>
998
+ <div class="step-list" id="step-list"></div>
999
+ <div class="action-detail" id="action-detail">
1000
+ <h4>Action</h4>
1001
+ <div class="action-content" id="action-content">-</div>
1002
+ </div>
1003
+ <div class="reasoning-box" id="reasoning-box" style="display:none;">
1004
+ <h4>Reasoning</h4>
1005
+ <div id="reasoning-content"></div>
1006
+ </div>
1007
+ </div>
1008
+ </div>
1009
+ `;
1010
+
1011
+ renderStepList();
1012
+ if (steps.length > 0) {{
1013
+ updateStep();
1014
+ }}
1015
+ }}
1016
+
1017
+ function renderStepList() {{
1018
+ if (currentTaskIndex < 0) return;
1019
+
1020
+ const task = tasks[currentTaskIndex];
1021
+ const steps = task.execution?.steps || [];
1022
+ const container = document.getElementById('step-list');
1023
+
1024
+ let html = '';
1025
+ steps.forEach((step, idx) => {{
1026
+ const action = step.action || {{}};
1027
+ const actionType = action.type || 'unknown';
1028
+ html += `
1029
+ <div class="step-list-item ${{idx === currentStepIndex ? 'active' : ''}}" onclick="goToStep(${{idx}})">
1030
+ <span class="step-num">#${{idx}}</span>
1031
+ <span class="step-action">${{actionType.toUpperCase()}}</span>
1032
+ </div>
1033
+ `;
1034
+ }});
1035
+ container.innerHTML = html || '<div style="padding:12px;color:var(--text-muted);">No steps</div>';
1036
+ }}
1037
+
1038
+ function updateStep() {{
1039
+ if (currentTaskIndex < 0) return;
1040
+
1041
+ const task = tasks[currentTaskIndex];
1042
+ const steps = task.execution?.steps || [];
1043
+ const screenshots = task.screenshots || [];
1044
+
1045
+ if (steps.length === 0) return;
1046
+
1047
+ const step = steps[currentStepIndex] || {{}};
1048
+ const action = step.action || {{}};
1049
+
1050
+ // Update screenshot
1051
+ const img = document.getElementById('screenshot-img');
1052
+ if (img) {{
1053
+ if (embedScreenshots && task.embedded_screenshots && task.embedded_screenshots[currentStepIndex]) {{
1054
+ img.src = task.embedded_screenshots[currentStepIndex];
1055
+ }} else if (screenshots[currentStepIndex]) {{
1056
+ img.src = screenshots[currentStepIndex];
1057
+ }} else if (step.screenshot_path) {{
1058
+ img.src = step.screenshot_path;
1059
+ }} else {{
1060
+ img.src = '';
1061
+ }}
1062
+ }}
1063
+
1064
+ // Update click marker if action has coordinates
1065
+ const container = document.getElementById('screenshot-container');
1066
+ // Remove existing markers
1067
+ container.querySelectorAll('.click-marker').forEach(m => m.remove());
1068
+
1069
+ if (action.x !== null && action.y !== null && action.x !== undefined && action.y !== undefined) {{
1070
+ const marker = document.createElement('div');
1071
+ marker.className = 'click-marker';
1072
+ marker.style.left = `${{action.x * 100}}%`;
1073
+ marker.style.top = `${{action.y * 100}}%`;
1074
+ marker.textContent = 'AI';
1075
+ container.appendChild(marker);
1076
+ }}
1077
+
1078
+ // Update progress
1079
+ document.getElementById('step-progress').textContent = `${{currentStepIndex + 1}} / ${{steps.length}}`;
1080
+ const progressPct = steps.length > 1 ? (currentStepIndex / (steps.length - 1)) * 100 : 0;
1081
+ document.getElementById('step-progress-bar').style.width = `${{progressPct}}%`;
1082
+
1083
+ // Update action detail
1084
+ const actionContent = document.getElementById('action-content');
1085
+ let actionText = action.type ? action.type.toUpperCase() : 'unknown';
1086
+ if (action.x !== null && action.y !== null && action.x !== undefined && action.y !== undefined) {{
1087
+ actionText += ` (${{(action.x * 100).toFixed(1)}}%, ${{(action.y * 100).toFixed(1)}}%)`;
1088
+ }}
1089
+ if (action.text) {{
1090
+ actionText += ` "${{action.text}}"`;
1091
+ }}
1092
+ if (action.key) {{
1093
+ actionText += ` [${{action.key}}]`;
1094
+ }}
1095
+ actionContent.textContent = actionText;
1096
+
1097
+ // Update reasoning
1098
+ const reasoningBox = document.getElementById('reasoning-box');
1099
+ const reasoningContent = document.getElementById('reasoning-content');
1100
+ if (step.reasoning) {{
1101
+ reasoningBox.style.display = 'block';
1102
+ reasoningContent.textContent = step.reasoning;
1103
+ }} else {{
1104
+ reasoningBox.style.display = 'none';
1105
+ }}
1106
+
1107
+ // Update step list active state
1108
+ document.querySelectorAll('.step-list-item').forEach((item, idx) => {{
1109
+ item.classList.toggle('active', idx === currentStepIndex);
1110
+ }});
1111
+ }}
1112
+
1113
+ function prevStep() {{
1114
+ if (currentStepIndex > 0) {{
1115
+ currentStepIndex--;
1116
+ updateStep();
1117
+ }}
1118
+ }}
1119
+
1120
+ function nextStep() {{
1121
+ const task = tasks[currentTaskIndex];
1122
+ const steps = task?.execution?.steps || [];
1123
+ if (currentStepIndex < steps.length - 1) {{
1124
+ currentStepIndex++;
1125
+ updateStep();
1126
+ }} else if (isPlaying) {{
1127
+ stopPlay();
1128
+ }}
1129
+ }}
1130
+
1131
+ function goToStep(idx) {{
1132
+ currentStepIndex = idx;
1133
+ updateStep();
1134
+ }}
1135
+
1136
+ function seekStep(event) {{
1137
+ const task = tasks[currentTaskIndex];
1138
+ const steps = task?.execution?.steps || [];
1139
+ if (steps.length === 0) return;
1140
+
1141
+ const bar = event.currentTarget;
1142
+ const rect = bar.getBoundingClientRect();
1143
+ const pct = (event.clientX - rect.left) / rect.width;
1144
+ currentStepIndex = Math.floor(pct * steps.length);
1145
+ currentStepIndex = Math.max(0, Math.min(currentStepIndex, steps.length - 1));
1146
+ updateStep();
1147
+ }}
1148
+
1149
+ function togglePlay() {{
1150
+ if (isPlaying) {{
1151
+ stopPlay();
1152
+ }} else {{
1153
+ startPlay();
1154
+ }}
1155
+ }}
1156
+
1157
+ function startPlay() {{
1158
+ isPlaying = true;
1159
+ document.getElementById('play-btn').textContent = 'Pause';
1160
+ document.getElementById('play-btn').classList.add('active');
1161
+ playInterval = setInterval(nextStep, playSpeed);
1162
+ }}
1163
+
1164
+ function stopPlay() {{
1165
+ isPlaying = false;
1166
+ document.getElementById('play-btn').textContent = 'Play';
1167
+ document.getElementById('play-btn').classList.remove('active');
1168
+ if (playInterval) {{
1169
+ clearInterval(playInterval);
1170
+ playInterval = null;
1171
+ }}
1172
+ }}
1173
+
1174
+ function changeSpeed(value) {{
1175
+ playSpeed = parseInt(value);
1176
+ if (isPlaying) {{
1177
+ stopPlay();
1178
+ startPlay();
1179
+ }}
1180
+ }}
1181
+
1182
+ // Keyboard shortcuts
1183
+ document.addEventListener('keydown', (e) => {{
1184
+ if (e.target.tagName === 'INPUT' || e.target.tagName === 'TEXTAREA') return;
1185
+
1186
+ switch (e.key) {{
1187
+ case ' ':
1188
+ e.preventDefault();
1189
+ togglePlay();
1190
+ break;
1191
+ case 'ArrowLeft':
1192
+ e.preventDefault();
1193
+ prevStep();
1194
+ break;
1195
+ case 'ArrowRight':
1196
+ e.preventDefault();
1197
+ nextStep();
1198
+ break;
1199
+ case 'Home':
1200
+ e.preventDefault();
1201
+ goToStep(0);
1202
+ break;
1203
+ case 'End':
1204
+ e.preventDefault();
1205
+ const task = tasks[currentTaskIndex];
1206
+ const steps = task?.execution?.steps || [];
1207
+ goToStep(steps.length - 1);
1208
+ break;
1209
+ }}
1210
+ }});
1211
+
1212
+ // Initialize on load
1213
+ document.addEventListener('DOMContentLoaded', init);
1214
+ </script>
1215
+ </body>
1216
+ </html>
1217
+ '''
1218
+
1219
+ return html