openadapt-ml 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. openadapt_ml/baselines/__init__.py +121 -0
  2. openadapt_ml/baselines/adapter.py +185 -0
  3. openadapt_ml/baselines/cli.py +314 -0
  4. openadapt_ml/baselines/config.py +448 -0
  5. openadapt_ml/baselines/parser.py +922 -0
  6. openadapt_ml/baselines/prompts.py +787 -0
  7. openadapt_ml/benchmarks/__init__.py +13 -107
  8. openadapt_ml/benchmarks/agent.py +297 -374
  9. openadapt_ml/benchmarks/azure.py +62 -24
  10. openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
  11. openadapt_ml/benchmarks/cli.py +1874 -751
  12. openadapt_ml/benchmarks/trace_export.py +631 -0
  13. openadapt_ml/benchmarks/viewer.py +1236 -0
  14. openadapt_ml/benchmarks/vm_monitor.py +1111 -0
  15. openadapt_ml/benchmarks/waa_deploy/Dockerfile +216 -0
  16. openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
  17. openadapt_ml/benchmarks/waa_deploy/api_agent.py +540 -0
  18. openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
  19. openadapt_ml/cloud/azure_inference.py +3 -5
  20. openadapt_ml/cloud/lambda_labs.py +722 -307
  21. openadapt_ml/cloud/local.py +3194 -89
  22. openadapt_ml/cloud/ssh_tunnel.py +595 -0
  23. openadapt_ml/datasets/next_action.py +125 -96
  24. openadapt_ml/evals/grounding.py +32 -9
  25. openadapt_ml/evals/plot_eval_metrics.py +15 -13
  26. openadapt_ml/evals/trajectory_matching.py +120 -57
  27. openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
  28. openadapt_ml/experiments/demo_prompt/format_demo.py +236 -0
  29. openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
  30. openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
  31. openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
  32. openadapt_ml/experiments/demo_prompt/run_experiment.py +541 -0
  33. openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
  34. openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
  35. openadapt_ml/experiments/representation_shootout/config.py +390 -0
  36. openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
  37. openadapt_ml/experiments/representation_shootout/runner.py +687 -0
  38. openadapt_ml/experiments/waa_demo/__init__.py +10 -0
  39. openadapt_ml/experiments/waa_demo/demos.py +357 -0
  40. openadapt_ml/experiments/waa_demo/runner.py +732 -0
  41. openadapt_ml/experiments/waa_demo/tasks.py +151 -0
  42. openadapt_ml/export/__init__.py +9 -0
  43. openadapt_ml/export/__main__.py +6 -0
  44. openadapt_ml/export/cli.py +89 -0
  45. openadapt_ml/export/parquet.py +277 -0
  46. openadapt_ml/grounding/detector.py +18 -14
  47. openadapt_ml/ingest/__init__.py +11 -10
  48. openadapt_ml/ingest/capture.py +97 -86
  49. openadapt_ml/ingest/loader.py +120 -69
  50. openadapt_ml/ingest/synthetic.py +344 -193
  51. openadapt_ml/models/api_adapter.py +14 -4
  52. openadapt_ml/models/base_adapter.py +10 -2
  53. openadapt_ml/models/providers/__init__.py +288 -0
  54. openadapt_ml/models/providers/anthropic.py +266 -0
  55. openadapt_ml/models/providers/base.py +299 -0
  56. openadapt_ml/models/providers/google.py +376 -0
  57. openadapt_ml/models/providers/openai.py +342 -0
  58. openadapt_ml/models/qwen_vl.py +46 -19
  59. openadapt_ml/perception/__init__.py +35 -0
  60. openadapt_ml/perception/integration.py +399 -0
  61. openadapt_ml/retrieval/README.md +226 -0
  62. openadapt_ml/retrieval/USAGE.md +391 -0
  63. openadapt_ml/retrieval/__init__.py +91 -0
  64. openadapt_ml/retrieval/demo_retriever.py +843 -0
  65. openadapt_ml/retrieval/embeddings.py +630 -0
  66. openadapt_ml/retrieval/index.py +194 -0
  67. openadapt_ml/retrieval/retriever.py +162 -0
  68. openadapt_ml/runtime/__init__.py +50 -0
  69. openadapt_ml/runtime/policy.py +27 -14
  70. openadapt_ml/runtime/safety_gate.py +471 -0
  71. openadapt_ml/schema/__init__.py +113 -0
  72. openadapt_ml/schema/converters.py +588 -0
  73. openadapt_ml/schema/episode.py +470 -0
  74. openadapt_ml/scripts/capture_screenshots.py +530 -0
  75. openadapt_ml/scripts/compare.py +102 -61
  76. openadapt_ml/scripts/demo_policy.py +4 -1
  77. openadapt_ml/scripts/eval_policy.py +19 -14
  78. openadapt_ml/scripts/make_gif.py +1 -1
  79. openadapt_ml/scripts/prepare_synthetic.py +16 -17
  80. openadapt_ml/scripts/train.py +98 -75
  81. openadapt_ml/segmentation/README.md +920 -0
  82. openadapt_ml/segmentation/__init__.py +97 -0
  83. openadapt_ml/segmentation/adapters/__init__.py +5 -0
  84. openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
  85. openadapt_ml/segmentation/annotator.py +610 -0
  86. openadapt_ml/segmentation/cache.py +290 -0
  87. openadapt_ml/segmentation/cli.py +674 -0
  88. openadapt_ml/segmentation/deduplicator.py +656 -0
  89. openadapt_ml/segmentation/frame_describer.py +788 -0
  90. openadapt_ml/segmentation/pipeline.py +340 -0
  91. openadapt_ml/segmentation/schemas.py +622 -0
  92. openadapt_ml/segmentation/segment_extractor.py +634 -0
  93. openadapt_ml/training/azure_ops_viewer.py +1097 -0
  94. openadapt_ml/training/benchmark_viewer.py +3255 -19
  95. openadapt_ml/training/shared_ui.py +7 -7
  96. openadapt_ml/training/stub_provider.py +57 -35
  97. openadapt_ml/training/trainer.py +255 -441
  98. openadapt_ml/training/trl_trainer.py +403 -0
  99. openadapt_ml/training/viewer.py +323 -108
  100. openadapt_ml/training/viewer_components.py +180 -0
  101. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +312 -69
  102. openadapt_ml-0.2.1.dist-info/RECORD +116 -0
  103. openadapt_ml/benchmarks/base.py +0 -366
  104. openadapt_ml/benchmarks/data_collection.py +0 -432
  105. openadapt_ml/benchmarks/runner.py +0 -381
  106. openadapt_ml/benchmarks/waa.py +0 -704
  107. openadapt_ml/schemas/__init__.py +0 -53
  108. openadapt_ml/schemas/sessions.py +0 -122
  109. openadapt_ml/schemas/validation.py +0 -252
  110. openadapt_ml-0.1.0.dist-info/RECORD +0 -55
  111. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
  112. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1100 @@
1
+ {
2
+ "timestamp": "2025-12-31T16:59:58.848298",
3
+ "provider": "anthropic",
4
+ "screenshot": "/Users/abrichr/oa/src/openadapt-capture/turn-off-nightshift/screenshots/capture_31807990_step_0.png",
5
+ "demo": "DEMONSTRATION:\nGoal: Turn off Night Shift in macOS System Settings\n\nStep 1:\n Screen: Desktop with application window visible\n Action: CLICK(0.01, 0.01)\n Result: Apple menu opened\n\nStep 2:\n Screen: Apple menu visible with options\n Action: CLICK on \"System Settings...\" menu item\n Result: System Settings application opened\n\nStep 3:\n Screen: System Settings window with sidebar\n Action: CLICK on \"Displays\" in the sidebar\n Result: Displays panel shown in main area\n\nStep 4:\n Screen: Displays panel showing display settings\n Action: CLICK on \"Night Shift...\" button\n Result: Night Shift popup/sheet appeared\n\nStep 5:\n Screen: Night Shift popup with Schedule dropdown\n Action: CLICK on Schedule dropdown, select \"Off\"\n Result: Night Shift schedule set to Off, Night Shift disabled\n\n---",
6
+ "n_test_cases": 45,
7
+ "summary": {
8
+ "zero_shot_accuracy": 0.4666666666666667,
9
+ "with_demo_accuracy": 1.0,
10
+ "control_accuracy": 0.5777777777777777,
11
+ "improvement": 0.5333333333333333
12
+ },
13
+ "correct_counts": {
14
+ "zero_shot": 21,
15
+ "with_demo": 45,
16
+ "control": 26
17
+ },
18
+ "test_cases": [
19
+ {
20
+ "test_case": {
21
+ "name": "displays_night_shift_on",
22
+ "task": "Turn ON Night Shift in macOS System Settings",
23
+ "category": "Displays"
24
+ },
25
+ "results": {
26
+ "zero_shot": {
27
+ "action": "CLICK(20, 8)",
28
+ "correct": true,
29
+ "error": null
30
+ },
31
+ "with_demo": {
32
+ "action": "CLICK(20, 8)",
33
+ "correct": true,
34
+ "error": null
35
+ },
36
+ "control": {
37
+ "action": "CLICK(20, 8)",
38
+ "correct": true,
39
+ "error": null
40
+ }
41
+ }
42
+ },
43
+ {
44
+ "test_case": {
45
+ "name": "displays_night_shift_off",
46
+ "task": "Turn OFF Night Shift in macOS System Settings",
47
+ "category": "Displays"
48
+ },
49
+ "results": {
50
+ "zero_shot": {
51
+ "action": "CLICK(1243, 8)",
52
+ "correct": false,
53
+ "error": null
54
+ },
55
+ "with_demo": {
56
+ "action": "CLICK(20, 8)",
57
+ "correct": true,
58
+ "error": null
59
+ },
60
+ "control": {
61
+ "action": "CLICK(1113, 8)",
62
+ "correct": false,
63
+ "error": null
64
+ }
65
+ }
66
+ },
67
+ {
68
+ "test_case": {
69
+ "name": "displays_night_shift_schedule",
70
+ "task": "Set Night Shift to turn on at sunset in System Settings",
71
+ "category": "Displays"
72
+ },
73
+ "results": {
74
+ "zero_shot": {
75
+ "action": "CLICK(20, 8)",
76
+ "correct": true,
77
+ "error": null
78
+ },
79
+ "with_demo": {
80
+ "action": "CLICK(20, 8)",
81
+ "correct": true,
82
+ "error": null
83
+ },
84
+ "control": {
85
+ "action": "CLICK(20, 8)",
86
+ "correct": true,
87
+ "error": null
88
+ }
89
+ }
90
+ },
91
+ {
92
+ "test_case": {
93
+ "name": "displays_night_shift_temp",
94
+ "task": "Adjust Night Shift color temperature to warmer setting",
95
+ "category": "Displays"
96
+ },
97
+ "results": {
98
+ "zero_shot": {
99
+ "action": "CLICK(1218, 8)",
100
+ "correct": false,
101
+ "error": null
102
+ },
103
+ "with_demo": {
104
+ "action": "CLICK(20, 8)",
105
+ "correct": true,
106
+ "error": null
107
+ },
108
+ "control": {
109
+ "action": "CLICK(1244, 8)",
110
+ "correct": false,
111
+ "error": null
112
+ }
113
+ }
114
+ },
115
+ {
116
+ "test_case": {
117
+ "name": "displays_true_tone_on",
118
+ "task": "Enable True Tone display in System Settings",
119
+ "category": "Displays"
120
+ },
121
+ "results": {
122
+ "zero_shot": {
123
+ "action": "CLICK(1193, 8)",
124
+ "correct": false,
125
+ "error": null
126
+ },
127
+ "with_demo": {
128
+ "action": "CLICK(20, 8)",
129
+ "correct": true,
130
+ "error": null
131
+ },
132
+ "control": {
133
+ "action": "CLICK(20, 8)",
134
+ "correct": true,
135
+ "error": null
136
+ }
137
+ }
138
+ },
139
+ {
140
+ "test_case": {
141
+ "name": "displays_resolution",
142
+ "task": "Change display resolution in System Settings",
143
+ "category": "Displays"
144
+ },
145
+ "results": {
146
+ "zero_shot": {
147
+ "action": "CLICK(1362, 8)",
148
+ "correct": false,
149
+ "error": null
150
+ },
151
+ "with_demo": {
152
+ "action": "CLICK(20, 8)",
153
+ "correct": true,
154
+ "error": null
155
+ },
156
+ "control": {
157
+ "action": "KEY(Cmd+space)",
158
+ "correct": false,
159
+ "error": null
160
+ }
161
+ }
162
+ },
163
+ {
164
+ "test_case": {
165
+ "name": "sound_output_device",
166
+ "task": "Change sound output device in System Settings",
167
+ "category": "Sound"
168
+ },
169
+ "results": {
170
+ "zero_shot": {
171
+ "action": "CLICK(1274, 8)",
172
+ "correct": false,
173
+ "error": null
174
+ },
175
+ "with_demo": {
176
+ "action": "CLICK(20, 8)",
177
+ "correct": true,
178
+ "error": null
179
+ },
180
+ "control": {
181
+ "action": "CLICK(728, 420)",
182
+ "correct": false,
183
+ "error": null
184
+ }
185
+ }
186
+ },
187
+ {
188
+ "test_case": {
189
+ "name": "sound_input_device",
190
+ "task": "Change microphone input device in System Settings",
191
+ "category": "Sound"
192
+ },
193
+ "results": {
194
+ "zero_shot": {
195
+ "action": "CLICK(1083, 8)",
196
+ "correct": false,
197
+ "error": null
198
+ },
199
+ "with_demo": {
200
+ "action": "CLICK(20, 8)",
201
+ "correct": true,
202
+ "error": null
203
+ },
204
+ "control": {
205
+ "action": "CLICK(1193, 8)",
206
+ "correct": false,
207
+ "error": null
208
+ }
209
+ }
210
+ },
211
+ {
212
+ "test_case": {
213
+ "name": "sound_alert_volume",
214
+ "task": "Adjust alert sound volume in System Settings",
215
+ "category": "Sound"
216
+ },
217
+ "results": {
218
+ "zero_shot": {
219
+ "action": "CLICK(1274, 8)",
220
+ "correct": false,
221
+ "error": null
222
+ },
223
+ "with_demo": {
224
+ "action": "CLICK(20, 8)",
225
+ "correct": true,
226
+ "error": null
227
+ },
228
+ "control": {
229
+ "action": "CLICK(1274, 8)",
230
+ "correct": false,
231
+ "error": null
232
+ }
233
+ }
234
+ },
235
+ {
236
+ "test_case": {
237
+ "name": "sound_startup_sound",
238
+ "task": "Disable startup sound in System Settings",
239
+ "category": "Sound"
240
+ },
241
+ "results": {
242
+ "zero_shot": {
243
+ "action": "CLICK(20, 8)",
244
+ "correct": true,
245
+ "error": null
246
+ },
247
+ "with_demo": {
248
+ "action": "CLICK(19, 8)",
249
+ "correct": true,
250
+ "error": null
251
+ },
252
+ "control": {
253
+ "action": "CLICK(20, 8)",
254
+ "correct": true,
255
+ "error": null
256
+ }
257
+ }
258
+ },
259
+ {
260
+ "test_case": {
261
+ "name": "sound_output_volume",
262
+ "task": "Set system volume to 50% in System Settings",
263
+ "category": "Sound"
264
+ },
265
+ "results": {
266
+ "zero_shot": {
267
+ "action": "CLICK(1335, 8)",
268
+ "correct": false,
269
+ "error": null
270
+ },
271
+ "with_demo": {
272
+ "action": "CLICK(20, 8)",
273
+ "correct": true,
274
+ "error": null
275
+ },
276
+ "control": {
277
+ "action": "CLICK(20, 8)",
278
+ "correct": true,
279
+ "error": null
280
+ }
281
+ }
282
+ },
283
+ {
284
+ "test_case": {
285
+ "name": "notifications_dnd",
286
+ "task": "Enable Do Not Disturb in System Settings",
287
+ "category": "Notifications"
288
+ },
289
+ "results": {
290
+ "zero_shot": {
291
+ "action": "CLICK(1219, 8)",
292
+ "correct": false,
293
+ "error": null
294
+ },
295
+ "with_demo": {
296
+ "action": "CLICK(20, 8)",
297
+ "correct": true,
298
+ "error": null
299
+ },
300
+ "control": {
301
+ "action": "CLICK(1218, 8)",
302
+ "correct": false,
303
+ "error": null
304
+ }
305
+ }
306
+ },
307
+ {
308
+ "test_case": {
309
+ "name": "notifications_app",
310
+ "task": "Disable notifications for Messages app in System Settings",
311
+ "category": "Notifications"
312
+ },
313
+ "results": {
314
+ "zero_shot": {
315
+ "action": "CLICK(20, 8)",
316
+ "correct": true,
317
+ "error": null
318
+ },
319
+ "with_demo": {
320
+ "action": "CLICK(20, 8)",
321
+ "correct": true,
322
+ "error": null
323
+ },
324
+ "control": {
325
+ "action": "CLICK(20, 8)",
326
+ "correct": true,
327
+ "error": null
328
+ }
329
+ }
330
+ },
331
+ {
332
+ "test_case": {
333
+ "name": "notifications_preview",
334
+ "task": "Hide notification previews when locked in System Settings",
335
+ "category": "Notifications"
336
+ },
337
+ "results": {
338
+ "zero_shot": {
339
+ "action": "KEY(Cmd+comma)",
340
+ "correct": false,
341
+ "error": null
342
+ },
343
+ "with_demo": {
344
+ "action": "CLICK(20, 8)",
345
+ "correct": true,
346
+ "error": null
347
+ },
348
+ "control": {
349
+ "action": "KEY(Cmd+Space)",
350
+ "correct": false,
351
+ "error": null
352
+ }
353
+ }
354
+ },
355
+ {
356
+ "test_case": {
357
+ "name": "notifications_sound",
358
+ "task": "Turn off notification sounds in System Settings",
359
+ "category": "Notifications"
360
+ },
361
+ "results": {
362
+ "zero_shot": {
363
+ "action": "CLICK(1168, 8)",
364
+ "correct": false,
365
+ "error": null
366
+ },
367
+ "with_demo": {
368
+ "action": "CLICK(20, 8)",
369
+ "correct": true,
370
+ "error": null
371
+ },
372
+ "control": {
373
+ "action": "CLICK(1194, 8)",
374
+ "correct": false,
375
+ "error": null
376
+ }
377
+ }
378
+ },
379
+ {
380
+ "test_case": {
381
+ "name": "focus_enable",
382
+ "task": "Enable Focus mode in System Settings",
383
+ "category": "Focus"
384
+ },
385
+ "results": {
386
+ "zero_shot": {
387
+ "action": "CLICK(20, 8)",
388
+ "correct": true,
389
+ "error": null
390
+ },
391
+ "with_demo": {
392
+ "action": "CLICK(20, 8)",
393
+ "correct": true,
394
+ "error": null
395
+ },
396
+ "control": {
397
+ "action": "CLICK(1218, 8)",
398
+ "correct": false,
399
+ "error": null
400
+ }
401
+ }
402
+ },
403
+ {
404
+ "test_case": {
405
+ "name": "focus_schedule",
406
+ "task": "Schedule Focus mode for work hours in System Settings",
407
+ "category": "Focus"
408
+ },
409
+ "results": {
410
+ "zero_shot": {
411
+ "action": "CLICK(20, 8)",
412
+ "correct": true,
413
+ "error": null
414
+ },
415
+ "with_demo": {
416
+ "action": "CLICK(20, 8)",
417
+ "correct": true,
418
+ "error": null
419
+ },
420
+ "control": {
421
+ "action": "CLICK(1218, 8)",
422
+ "correct": false,
423
+ "error": null
424
+ }
425
+ }
426
+ },
427
+ {
428
+ "test_case": {
429
+ "name": "focus_share",
430
+ "task": "Share Focus status across devices in System Settings",
431
+ "category": "Focus"
432
+ },
433
+ "results": {
434
+ "zero_shot": {
435
+ "action": "CLICK(1194, 8)",
436
+ "correct": false,
437
+ "error": null
438
+ },
439
+ "with_demo": {
440
+ "action": "CLICK(20, 8)",
441
+ "correct": true,
442
+ "error": null
443
+ },
444
+ "control": {
445
+ "action": "CLICK(20, 8)",
446
+ "correct": true,
447
+ "error": null
448
+ }
449
+ }
450
+ },
451
+ {
452
+ "test_case": {
453
+ "name": "general_about",
454
+ "task": "Open About This Mac in System Settings",
455
+ "category": "General"
456
+ },
457
+ "results": {
458
+ "zero_shot": {
459
+ "action": "CLICK(19, 8)",
460
+ "correct": true,
461
+ "error": null
462
+ },
463
+ "with_demo": {
464
+ "action": "CLICK(20, 8)",
465
+ "correct": true,
466
+ "error": null
467
+ },
468
+ "control": {
469
+ "action": "CLICK(20, 8)",
470
+ "correct": true,
471
+ "error": null
472
+ }
473
+ }
474
+ },
475
+ {
476
+ "test_case": {
477
+ "name": "general_update",
478
+ "task": "Check for software updates in System Settings",
479
+ "category": "General"
480
+ },
481
+ "results": {
482
+ "zero_shot": {
483
+ "action": "CLICK(20, 8)",
484
+ "correct": true,
485
+ "error": null
486
+ },
487
+ "with_demo": {
488
+ "action": "CLICK(19, 8)",
489
+ "correct": true,
490
+ "error": null
491
+ },
492
+ "control": {
493
+ "action": "CLICK(20, 8)",
494
+ "correct": true,
495
+ "error": null
496
+ }
497
+ }
498
+ },
499
+ {
500
+ "test_case": {
501
+ "name": "general_storage",
502
+ "task": "View storage usage in System Settings",
503
+ "category": "General"
504
+ },
505
+ "results": {
506
+ "zero_shot": {
507
+ "action": "I need to open System Settings to view storage usage. Let me click on the Apple menu in the top left.",
508
+ "correct": false,
509
+ "error": null
510
+ },
511
+ "with_demo": {
512
+ "action": "CLICK(20, 8)",
513
+ "correct": true,
514
+ "error": null
515
+ },
516
+ "control": {
517
+ "action": "CLICK(1335, 8)",
518
+ "correct": false,
519
+ "error": null
520
+ }
521
+ }
522
+ },
523
+ {
524
+ "test_case": {
525
+ "name": "general_login_items",
526
+ "task": "Manage login items in System Settings",
527
+ "category": "General"
528
+ },
529
+ "results": {
530
+ "zero_shot": {
531
+ "action": "CLICK(1194, 8)",
532
+ "correct": false,
533
+ "error": null
534
+ },
535
+ "with_demo": {
536
+ "action": "CLICK(20, 8)",
537
+ "correct": true,
538
+ "error": null
539
+ },
540
+ "control": {
541
+ "action": "CLICK(1193, 8)",
542
+ "correct": false,
543
+ "error": null
544
+ }
545
+ }
546
+ },
547
+ {
548
+ "test_case": {
549
+ "name": "privacy_location",
550
+ "task": "Disable location services in System Settings",
551
+ "category": "Privacy"
552
+ },
553
+ "results": {
554
+ "zero_shot": {
555
+ "action": "CLICK(1335, 8)",
556
+ "correct": false,
557
+ "error": null
558
+ },
559
+ "with_demo": {
560
+ "action": "CLICK(20, 8)",
561
+ "correct": true,
562
+ "error": null
563
+ },
564
+ "control": {
565
+ "action": "KEY(Cmd+space)",
566
+ "correct": false,
567
+ "error": null
568
+ }
569
+ }
570
+ },
571
+ {
572
+ "test_case": {
573
+ "name": "privacy_camera",
574
+ "task": "Check which apps have camera access in System Settings",
575
+ "category": "Privacy"
576
+ },
577
+ "results": {
578
+ "zero_shot": {
579
+ "action": "CLICK(20, 8)",
580
+ "correct": true,
581
+ "error": null
582
+ },
583
+ "with_demo": {
584
+ "action": "CLICK(20, 8)",
585
+ "correct": true,
586
+ "error": null
587
+ },
588
+ "control": {
589
+ "action": "CLICK(20, 8)",
590
+ "correct": true,
591
+ "error": null
592
+ }
593
+ }
594
+ },
595
+ {
596
+ "test_case": {
597
+ "name": "privacy_microphone",
598
+ "task": "Manage microphone permissions in System Settings",
599
+ "category": "Privacy"
600
+ },
601
+ "results": {
602
+ "zero_shot": {
603
+ "action": "CLICK(1193, 8)",
604
+ "correct": false,
605
+ "error": null
606
+ },
607
+ "with_demo": {
608
+ "action": "CLICK(20, 8)",
609
+ "correct": true,
610
+ "error": null
611
+ },
612
+ "control": {
613
+ "action": "CLICK(1303, 8)",
614
+ "correct": false,
615
+ "error": null
616
+ }
617
+ }
618
+ },
619
+ {
620
+ "test_case": {
621
+ "name": "security_filevault",
622
+ "task": "Check FileVault encryption status in System Settings",
623
+ "category": "Security"
624
+ },
625
+ "results": {
626
+ "zero_shot": {
627
+ "action": "CLICK(1194, 8)",
628
+ "correct": false,
629
+ "error": null
630
+ },
631
+ "with_demo": {
632
+ "action": "CLICK(19, 8)",
633
+ "correct": true,
634
+ "error": null
635
+ },
636
+ "control": {
637
+ "action": "CLICK(20, 8)",
638
+ "correct": true,
639
+ "error": null
640
+ }
641
+ }
642
+ },
643
+ {
644
+ "test_case": {
645
+ "name": "keyboard_shortcuts",
646
+ "task": "Customize keyboard shortcuts in System Settings",
647
+ "category": "Keyboard"
648
+ },
649
+ "results": {
650
+ "zero_shot": {
651
+ "action": "CLICK(20, 8)",
652
+ "correct": true,
653
+ "error": null
654
+ },
655
+ "with_demo": {
656
+ "action": "CLICK(19, 8)",
657
+ "correct": true,
658
+ "error": null
659
+ },
660
+ "control": {
661
+ "action": "KEY(Cmd+Space)",
662
+ "correct": false,
663
+ "error": null
664
+ }
665
+ }
666
+ },
667
+ {
668
+ "test_case": {
669
+ "name": "keyboard_input_sources",
670
+ "task": "Add a new input language in System Settings",
671
+ "category": "Keyboard"
672
+ },
673
+ "results": {
674
+ "zero_shot": {
675
+ "action": "CLICK(20, 8)",
676
+ "correct": true,
677
+ "error": null
678
+ },
679
+ "with_demo": {
680
+ "action": "CLICK(20, 8)",
681
+ "correct": true,
682
+ "error": null
683
+ },
684
+ "control": {
685
+ "action": "CLICK(19, 8)",
686
+ "correct": true,
687
+ "error": null
688
+ }
689
+ }
690
+ },
691
+ {
692
+ "test_case": {
693
+ "name": "keyboard_dictation",
694
+ "task": "Enable dictation in System Settings",
695
+ "category": "Keyboard"
696
+ },
697
+ "results": {
698
+ "zero_shot": {
699
+ "action": "CLICK(20, 8)",
700
+ "correct": true,
701
+ "error": null
702
+ },
703
+ "with_demo": {
704
+ "action": "CLICK(20, 8)",
705
+ "correct": true,
706
+ "error": null
707
+ },
708
+ "control": {
709
+ "action": "CLICK(19, 8)",
710
+ "correct": true,
711
+ "error": null
712
+ }
713
+ }
714
+ },
715
+ {
716
+ "test_case": {
717
+ "name": "trackpad_tap_click",
718
+ "task": "Enable tap to click on trackpad in System Settings",
719
+ "category": "Trackpad"
720
+ },
721
+ "results": {
722
+ "zero_shot": {
723
+ "action": "CLICK(1360, 8)",
724
+ "correct": false,
725
+ "error": null
726
+ },
727
+ "with_demo": {
728
+ "action": "CLICK(20, 8)",
729
+ "correct": true,
730
+ "error": null
731
+ },
732
+ "control": {
733
+ "action": "KEY(Return)",
734
+ "correct": false,
735
+ "error": null
736
+ }
737
+ }
738
+ },
739
+ {
740
+ "test_case": {
741
+ "name": "trackpad_scroll",
742
+ "task": "Change scroll direction in System Settings",
743
+ "category": "Trackpad"
744
+ },
745
+ "results": {
746
+ "zero_shot": {
747
+ "action": "CLICK(20, 8)",
748
+ "correct": true,
749
+ "error": null
750
+ },
751
+ "with_demo": {
752
+ "action": "CLICK(20, 8)",
753
+ "correct": true,
754
+ "error": null
755
+ },
756
+ "control": {
757
+ "action": "CLICK(20, 8)",
758
+ "correct": true,
759
+ "error": null
760
+ }
761
+ }
762
+ },
763
+ {
764
+ "test_case": {
765
+ "name": "mouse_speed",
766
+ "task": "Adjust mouse tracking speed in System Settings",
767
+ "category": "Mouse"
768
+ },
769
+ "results": {
770
+ "zero_shot": {
771
+ "action": "CLICK(20, 8)",
772
+ "correct": true,
773
+ "error": null
774
+ },
775
+ "with_demo": {
776
+ "action": "CLICK(20, 8)",
777
+ "correct": true,
778
+ "error": null
779
+ },
780
+ "control": {
781
+ "action": "CLICK(20, 8)",
782
+ "correct": true,
783
+ "error": null
784
+ }
785
+ }
786
+ },
787
+ {
788
+ "test_case": {
789
+ "name": "wifi_connect",
790
+ "task": "Connect to a WiFi network in System Settings",
791
+ "category": "Network"
792
+ },
793
+ "results": {
794
+ "zero_shot": {
795
+ "action": "CLICK(1362, 8)",
796
+ "correct": false,
797
+ "error": null
798
+ },
799
+ "with_demo": {
800
+ "action": "CLICK(20, 8)",
801
+ "correct": true,
802
+ "error": null
803
+ },
804
+ "control": {
805
+ "action": "CLICK(20, 8)",
806
+ "correct": true,
807
+ "error": null
808
+ }
809
+ }
810
+ },
811
+ {
812
+ "test_case": {
813
+ "name": "wifi_forget",
814
+ "task": "Forget a saved WiFi network in System Settings",
815
+ "category": "Network"
816
+ },
817
+ "results": {
818
+ "zero_shot": {
819
+ "action": "CLICK(1362, 8)",
820
+ "correct": false,
821
+ "error": null
822
+ },
823
+ "with_demo": {
824
+ "action": "CLICK(20, 8)",
825
+ "correct": true,
826
+ "error": null
827
+ },
828
+ "control": {
829
+ "action": "CLICK(20, 8)",
830
+ "correct": true,
831
+ "error": null
832
+ }
833
+ }
834
+ },
835
+ {
836
+ "test_case": {
837
+ "name": "network_dns",
838
+ "task": "Change DNS settings in System Settings",
839
+ "category": "Network"
840
+ },
841
+ "results": {
842
+ "zero_shot": {
843
+ "action": "CLICK(1193, 8)",
844
+ "correct": false,
845
+ "error": null
846
+ },
847
+ "with_demo": {
848
+ "action": "CLICK(20, 8)",
849
+ "correct": true,
850
+ "error": null
851
+ },
852
+ "control": {
853
+ "action": "CLICK(20, 8)",
854
+ "correct": true,
855
+ "error": null
856
+ }
857
+ }
858
+ },
859
+ {
860
+ "test_case": {
861
+ "name": "bluetooth_enable",
862
+ "task": "Turn on Bluetooth in System Settings",
863
+ "category": "Bluetooth"
864
+ },
865
+ "results": {
866
+ "zero_shot": {
867
+ "action": "CLICK(20, 8)",
868
+ "correct": true,
869
+ "error": null
870
+ },
871
+ "with_demo": {
872
+ "action": "CLICK(20, 8)",
873
+ "correct": true,
874
+ "error": null
875
+ },
876
+ "control": {
877
+ "action": "CLICK(20, 8)",
878
+ "correct": true,
879
+ "error": null
880
+ }
881
+ }
882
+ },
883
+ {
884
+ "test_case": {
885
+ "name": "bluetooth_pair",
886
+ "task": "Pair a new Bluetooth device in System Settings",
887
+ "category": "Bluetooth"
888
+ },
889
+ "results": {
890
+ "zero_shot": {
891
+ "action": "CLICK(20, 8)",
892
+ "correct": true,
893
+ "error": null
894
+ },
895
+ "with_demo": {
896
+ "action": "CLICK(20, 8)",
897
+ "correct": true,
898
+ "error": null
899
+ },
900
+ "control": {
901
+ "action": "CLICK(20, 8)",
902
+ "correct": true,
903
+ "error": null
904
+ }
905
+ }
906
+ },
907
+ {
908
+ "test_case": {
909
+ "name": "accessibility_zoom",
910
+ "task": "Enable zoom accessibility feature in System Settings",
911
+ "category": "Accessibility"
912
+ },
913
+ "results": {
914
+ "zero_shot": {
915
+ "action": "CLICK(1195, 8)",
916
+ "correct": false,
917
+ "error": null
918
+ },
919
+ "with_demo": {
920
+ "action": "CLICK(19, 8)",
921
+ "correct": true,
922
+ "error": null
923
+ },
924
+ "control": {
925
+ "action": "CLICK(20, 8)",
926
+ "correct": true,
927
+ "error": null
928
+ }
929
+ }
930
+ },
931
+ {
932
+ "test_case": {
933
+ "name": "accessibility_voiceover",
934
+ "task": "Turn on VoiceOver in System Settings",
935
+ "category": "Accessibility"
936
+ },
937
+ "results": {
938
+ "zero_shot": {
939
+ "action": "CLICK(20, 8)",
940
+ "correct": true,
941
+ "error": null
942
+ },
943
+ "with_demo": {
944
+ "action": "CLICK(20, 8)",
945
+ "correct": true,
946
+ "error": null
947
+ },
948
+ "control": {
949
+ "action": "CLICK(20, 8)",
950
+ "correct": true,
951
+ "error": null
952
+ }
953
+ }
954
+ },
955
+ {
956
+ "test_case": {
957
+ "name": "accessibility_display",
958
+ "task": "Increase contrast in System Settings",
959
+ "category": "Accessibility"
960
+ },
961
+ "results": {
962
+ "zero_shot": {
963
+ "action": "CLICK(1335, 8)",
964
+ "correct": false,
965
+ "error": null
966
+ },
967
+ "with_demo": {
968
+ "action": "CLICK(20, 8)",
969
+ "correct": true,
970
+ "error": null
971
+ },
972
+ "control": {
973
+ "action": "CLICK(20, 8)",
974
+ "correct": true,
975
+ "error": null
976
+ }
977
+ }
978
+ },
979
+ {
980
+ "test_case": {
981
+ "name": "battery_percentage",
982
+ "task": "Show battery percentage in menu bar via System Settings",
983
+ "category": "Battery"
984
+ },
985
+ "results": {
986
+ "zero_shot": {
987
+ "action": "CLICK(20, 8)",
988
+ "correct": true,
989
+ "error": null
990
+ },
991
+ "with_demo": {
992
+ "action": "CLICK(20, 8)",
993
+ "correct": true,
994
+ "error": null
995
+ },
996
+ "control": {
997
+ "action": "CLICK(20, 8)",
998
+ "correct": true,
999
+ "error": null
1000
+ }
1001
+ }
1002
+ },
1003
+ {
1004
+ "test_case": {
1005
+ "name": "battery_low_power",
1006
+ "task": "Enable Low Power Mode in System Settings",
1007
+ "category": "Battery"
1008
+ },
1009
+ "results": {
1010
+ "zero_shot": {
1011
+ "action": "CLICK(1304, 8)",
1012
+ "correct": false,
1013
+ "error": null
1014
+ },
1015
+ "with_demo": {
1016
+ "action": "CLICK(20, 8)",
1017
+ "correct": true,
1018
+ "error": null
1019
+ },
1020
+ "control": {
1021
+ "action": "CLICK(20, 8)",
1022
+ "correct": true,
1023
+ "error": null
1024
+ }
1025
+ }
1026
+ },
1027
+ {
1028
+ "test_case": {
1029
+ "name": "dock_size",
1030
+ "task": "Change Dock size in System Settings",
1031
+ "category": "Desktop & Dock"
1032
+ },
1033
+ "results": {
1034
+ "zero_shot": {
1035
+ "action": "CLICK(20, 8)",
1036
+ "correct": true,
1037
+ "error": null
1038
+ },
1039
+ "with_demo": {
1040
+ "action": "CLICK(20, 8)",
1041
+ "correct": true,
1042
+ "error": null
1043
+ },
1044
+ "control": {
1045
+ "action": "CLICK(1361, 8)",
1046
+ "correct": false,
1047
+ "error": null
1048
+ }
1049
+ }
1050
+ },
1051
+ {
1052
+ "test_case": {
1053
+ "name": "dock_autohide",
1054
+ "task": "Enable auto-hide for Dock in System Settings",
1055
+ "category": "Desktop & Dock"
1056
+ },
1057
+ "results": {
1058
+ "zero_shot": {
1059
+ "action": "CLICK(20, 8)",
1060
+ "correct": true,
1061
+ "error": null
1062
+ },
1063
+ "with_demo": {
1064
+ "action": "CLICK(20, 8)",
1065
+ "correct": true,
1066
+ "error": null
1067
+ },
1068
+ "control": {
1069
+ "action": "CLICK(20, 8)",
1070
+ "correct": true,
1071
+ "error": null
1072
+ }
1073
+ }
1074
+ },
1075
+ {
1076
+ "test_case": {
1077
+ "name": "desktop_wallpaper",
1078
+ "task": "Change desktop wallpaper in System Settings",
1079
+ "category": "Desktop & Dock"
1080
+ },
1081
+ "results": {
1082
+ "zero_shot": {
1083
+ "action": "CLICK(20, 8)",
1084
+ "correct": true,
1085
+ "error": null
1086
+ },
1087
+ "with_demo": {
1088
+ "action": "CLICK(20, 8)",
1089
+ "correct": true,
1090
+ "error": null
1091
+ },
1092
+ "control": {
1093
+ "action": "KEY(Cmd+space)",
1094
+ "correct": false,
1095
+ "error": null
1096
+ }
1097
+ }
1098
+ }
1099
+ ]
1100
+ }