openadapt-ml 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. openadapt_ml/benchmarks/__init__.py +8 -0
  2. openadapt_ml/benchmarks/agent.py +90 -11
  3. openadapt_ml/benchmarks/azure.py +35 -6
  4. openadapt_ml/benchmarks/cli.py +4449 -201
  5. openadapt_ml/benchmarks/live_tracker.py +180 -0
  6. openadapt_ml/benchmarks/runner.py +41 -4
  7. openadapt_ml/benchmarks/viewer.py +1219 -0
  8. openadapt_ml/benchmarks/vm_monitor.py +610 -0
  9. openadapt_ml/benchmarks/waa.py +61 -4
  10. openadapt_ml/benchmarks/waa_deploy/Dockerfile +222 -0
  11. openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
  12. openadapt_ml/benchmarks/waa_deploy/api_agent.py +539 -0
  13. openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
  14. openadapt_ml/benchmarks/waa_live.py +619 -0
  15. openadapt_ml/cloud/local.py +1555 -1
  16. openadapt_ml/cloud/ssh_tunnel.py +553 -0
  17. openadapt_ml/datasets/next_action.py +87 -68
  18. openadapt_ml/evals/grounding.py +26 -8
  19. openadapt_ml/evals/trajectory_matching.py +84 -36
  20. openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
  21. openadapt_ml/experiments/demo_prompt/format_demo.py +226 -0
  22. openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
  23. openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
  24. openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
  25. openadapt_ml/experiments/demo_prompt/run_experiment.py +531 -0
  26. openadapt_ml/experiments/waa_demo/__init__.py +10 -0
  27. openadapt_ml/experiments/waa_demo/demos.py +357 -0
  28. openadapt_ml/experiments/waa_demo/runner.py +717 -0
  29. openadapt_ml/experiments/waa_demo/tasks.py +151 -0
  30. openadapt_ml/export/__init__.py +9 -0
  31. openadapt_ml/export/__main__.py +6 -0
  32. openadapt_ml/export/cli.py +89 -0
  33. openadapt_ml/export/parquet.py +265 -0
  34. openadapt_ml/ingest/__init__.py +3 -4
  35. openadapt_ml/ingest/capture.py +89 -81
  36. openadapt_ml/ingest/loader.py +116 -68
  37. openadapt_ml/ingest/synthetic.py +221 -159
  38. openadapt_ml/retrieval/README.md +226 -0
  39. openadapt_ml/retrieval/USAGE.md +391 -0
  40. openadapt_ml/retrieval/__init__.py +91 -0
  41. openadapt_ml/retrieval/demo_retriever.py +817 -0
  42. openadapt_ml/retrieval/embeddings.py +629 -0
  43. openadapt_ml/retrieval/index.py +194 -0
  44. openadapt_ml/retrieval/retriever.py +160 -0
  45. openadapt_ml/runtime/policy.py +10 -10
  46. openadapt_ml/schema/__init__.py +104 -0
  47. openadapt_ml/schema/converters.py +541 -0
  48. openadapt_ml/schema/episode.py +457 -0
  49. openadapt_ml/scripts/compare.py +26 -16
  50. openadapt_ml/scripts/eval_policy.py +4 -5
  51. openadapt_ml/scripts/prepare_synthetic.py +14 -17
  52. openadapt_ml/scripts/train.py +81 -70
  53. openadapt_ml/training/benchmark_viewer.py +3225 -0
  54. openadapt_ml/training/trainer.py +120 -363
  55. openadapt_ml/training/trl_trainer.py +354 -0
  56. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/METADATA +102 -60
  57. openadapt_ml-0.2.0.dist-info/RECORD +86 -0
  58. openadapt_ml/schemas/__init__.py +0 -53
  59. openadapt_ml/schemas/sessions.py +0 -122
  60. openadapt_ml/schemas/validation.py +0 -252
  61. openadapt_ml-0.1.0.dist-info/RECORD +0 -55
  62. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/WHEEL +0 -0
  63. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -8,7 +8,7 @@ from typing import List, Optional, Tuple
8
8
 
9
9
  from PIL import Image, ImageDraw, ImageFont
10
10
 
11
- from openadapt_ml.schemas.sessions import Action, Episode, Observation, Session, Step
11
+ from openadapt_ml.schema import Action, ActionType, Episode, Observation, Step
12
12
 
13
13
 
14
14
  IMG_WIDTH = 800
@@ -333,12 +333,12 @@ def _script_login_episode(
333
333
  """Create a scripted login episode with a fixed sequence of steps.
334
334
 
335
335
  Steps (6 total):
336
- - Step 0: blank login screen click username field.
337
- - Step 1: username field focused type username.
338
- - Step 2: username typed click password field.
339
- - Step 3: password field focused type password.
340
- - Step 4: password typed click login button.
341
- - Step 5: logged-in screen DONE.
336
+ - Step 0: blank login screen -> click username field.
337
+ - Step 1: username field focused -> type username.
338
+ - Step 2: username typed -> click password field.
339
+ - Step 3: password field focused -> type password.
340
+ - Step 4: password typed -> click login button.
341
+ - Step 5: logged-in screen -> DONE.
342
342
 
343
343
  Each step includes bounding boxes for clickable elements to support
344
344
  bbox-based click hit evaluation.
@@ -354,100 +354,120 @@ def _script_login_episode(
354
354
  password_bbox = _bbox_normalized(layout.password_box)
355
355
  login_bbox = _bbox_normalized(layout.login_button)
356
356
 
357
- # Step 0: blank login screen click username field
357
+ # Step 0: blank login screen -> click username field
358
358
  cx, cy = _center(layout.username_box)
359
359
  img0, _ = _draw_login_screen(layout=layout, jitter=False)
360
360
  img0_path = root / f"{episode_id}_step_0.png"
361
361
  _save_image(img0, img0_path)
362
- obs0 = Observation(image_path=str(img0_path))
362
+ obs0 = Observation(screenshot_path=str(img0_path))
363
363
  steps.append(
364
364
  Step(
365
- t=0.0,
365
+ step_index=0,
366
+ timestamp=0.0,
366
367
  observation=obs0,
367
- action=Action(type="click", x=cx, y=cy, bbox=username_bbox),
368
- thought="Focus the username field.",
368
+ action=Action(
369
+ type=ActionType.CLICK,
370
+ normalized_coordinates=(cx, cy),
371
+ raw={"bbox": username_bbox},
372
+ ),
373
+ reasoning="Focus the username field.",
369
374
  )
370
375
  )
371
376
 
372
- # Step 1: username field focused type username
377
+ # Step 1: username field focused -> type username
373
378
  img1, _ = _draw_login_screen(username="", layout=layout, jitter=False)
374
379
  img1_path = root / f"{episode_id}_step_1.png"
375
380
  _save_image(img1, img1_path)
376
- obs1 = Observation(image_path=str(img1_path))
381
+ obs1 = Observation(screenshot_path=str(img1_path))
377
382
  steps.append(
378
383
  Step(
379
- t=1.0,
384
+ step_index=1,
385
+ timestamp=1.0,
380
386
  observation=obs1,
381
- action=Action(type="type", text=username),
382
- thought="Type the username.",
387
+ action=Action(type=ActionType.TYPE, text=username),
388
+ reasoning="Type the username.",
383
389
  )
384
390
  )
385
391
 
386
- # Step 2: username typed click password field
392
+ # Step 2: username typed -> click password field
387
393
  cx_pw, cy_pw = _center(layout.password_box)
388
394
  img2, _ = _draw_login_screen(username=username, layout=layout, jitter=False)
389
395
  img2_path = root / f"{episode_id}_step_2.png"
390
396
  _save_image(img2, img2_path)
391
- obs2 = Observation(image_path=str(img2_path))
397
+ obs2 = Observation(screenshot_path=str(img2_path))
392
398
  steps.append(
393
399
  Step(
394
- t=2.0,
400
+ step_index=2,
401
+ timestamp=2.0,
395
402
  observation=obs2,
396
- action=Action(type="click", x=cx_pw, y=cy_pw, bbox=password_bbox),
397
- thought="Focus the password field.",
403
+ action=Action(
404
+ type=ActionType.CLICK,
405
+ normalized_coordinates=(cx_pw, cy_pw),
406
+ raw={"bbox": password_bbox},
407
+ ),
408
+ reasoning="Focus the password field.",
398
409
  )
399
410
  )
400
411
 
401
- # Step 3: password field focused type password
412
+ # Step 3: password field focused -> type password
402
413
  img3, _ = _draw_login_screen(username=username, layout=layout, jitter=False)
403
414
  img3_path = root / f"{episode_id}_step_3.png"
404
415
  _save_image(img3, img3_path)
405
- obs3 = Observation(image_path=str(img3_path))
416
+ obs3 = Observation(screenshot_path=str(img3_path))
406
417
  steps.append(
407
418
  Step(
408
- t=3.0,
419
+ step_index=3,
420
+ timestamp=3.0,
409
421
  observation=obs3,
410
- action=Action(type="type", text=password),
411
- thought="Type the password.",
422
+ action=Action(type=ActionType.TYPE, text=password),
423
+ reasoning="Type the password.",
412
424
  )
413
425
  )
414
426
 
415
- # Step 4: password typed click login button
427
+ # Step 4: password typed -> click login button
416
428
  cx_btn, cy_btn = _center(layout.login_button)
417
429
  img4, _ = _draw_login_screen(username=username, password=password, layout=layout, jitter=False)
418
430
  img4_path = root / f"{episode_id}_step_4.png"
419
431
  _save_image(img4, img4_path)
420
- obs4 = Observation(image_path=str(img4_path))
432
+ obs4 = Observation(screenshot_path=str(img4_path))
421
433
  steps.append(
422
434
  Step(
423
- t=4.0,
435
+ step_index=4,
436
+ timestamp=4.0,
424
437
  observation=obs4,
425
- action=Action(type="click", x=cx_btn, y=cy_btn, bbox=login_bbox),
426
- thought="Submit the login form.",
438
+ action=Action(
439
+ type=ActionType.CLICK,
440
+ normalized_coordinates=(cx_btn, cy_btn),
441
+ raw={"bbox": login_bbox},
442
+ ),
443
+ reasoning="Submit the login form.",
427
444
  )
428
445
  )
429
446
 
430
- # Step 5: logged-in screen DONE
447
+ # Step 5: logged-in screen -> DONE
431
448
  img5 = _draw_logged_in_screen(username=username)
432
449
  img5_path = root / f"{episode_id}_step_5.png"
433
450
  _save_image(img5, img5_path)
434
- obs5 = Observation(image_path=str(img5_path))
451
+ obs5 = Observation(screenshot_path=str(img5_path))
435
452
  steps.append(
436
453
  Step(
437
- t=5.0,
454
+ step_index=5,
455
+ timestamp=5.0,
438
456
  observation=obs5,
439
- action=Action(type="done"),
440
- thought="Login successful; workflow complete.",
457
+ action=Action(type=ActionType.DONE),
458
+ reasoning="Login successful; workflow complete.",
441
459
  )
442
460
  )
443
461
 
444
462
  episode = Episode(
445
- id=episode_id,
446
- goal=f"Log in with username '{username}' and password '{password}'",
463
+ episode_id=episode_id,
464
+ instruction=f"Log in with username '{username}' and password '{password}'",
447
465
  steps=steps,
448
- summary="Successful login via username and password.",
449
466
  success=True,
450
- workflow_id="login_basic",
467
+ metadata={
468
+ "summary": "Successful login via username and password.",
469
+ "workflow_id": "login_basic",
470
+ },
451
471
  )
452
472
 
453
473
  return episode
@@ -467,12 +487,12 @@ def _script_login_episode_som(
467
487
  for click actions.
468
488
 
469
489
  Steps (6 total):
470
- - Step 0: SoM login screen click element [1] (username field)
471
- - Step 1: username field focused type username
472
- - Step 2: username typed click element [2] (password field)
473
- - Step 3: password field focused type password
474
- - Step 4: password typed click element [3] (login button)
475
- - Step 5: logged-in screen DONE
490
+ - Step 0: SoM login screen -> click element [1] (username field)
491
+ - Step 1: username field focused -> type username
492
+ - Step 2: username typed -> click element [2] (password field)
493
+ - Step 3: password field focused -> type password
494
+ - Step 4: password typed -> click element [3] (login button)
495
+ - Step 5: logged-in screen -> DONE
476
496
  """
477
497
 
478
498
  steps: List[Step] = []
@@ -492,81 +512,89 @@ def _script_login_episode_som(
492
512
  (SOM_LOGIN_BUTTON, layout.login_button),
493
513
  ]
494
514
 
495
- # Step 0: SoM login screen click username field [1]
515
+ # Step 0: SoM login screen -> click username field [1]
496
516
  cx, cy = _center(layout.username_box)
497
517
  img0, _ = _draw_login_screen(layout=layout, jitter=False)
498
518
  img0_som = _overlay_som_marks(img0, som_elements)
499
519
  img0_path = root / f"{episode_id}_step_0.png"
500
520
  _save_image(img0_som, img0_path)
501
- obs0 = Observation(image_path=str(img0_path))
521
+ obs0 = Observation(screenshot_path=str(img0_path))
502
522
  steps.append(
503
523
  Step(
504
- t=0.0,
524
+ step_index=0,
525
+ timestamp=0.0,
505
526
  observation=obs0,
506
527
  action=Action(
507
- type="click",
508
- x=cx,
509
- y=cy,
510
- bbox=username_bbox,
511
- element_index=SOM_USERNAME_FIELD,
528
+ type=ActionType.CLICK,
529
+ normalized_coordinates=(cx, cy),
530
+ raw={"bbox": username_bbox, "element_index": SOM_USERNAME_FIELD},
512
531
  ),
513
- thought="Focus the username field by clicking element [1].",
532
+ reasoning="Focus the username field by clicking element [1].",
514
533
  )
515
534
  )
516
535
 
517
- # Step 1: username field focused type username into element [1]
536
+ # Step 1: username field focused -> type username into element [1]
518
537
  img1, _ = _draw_login_screen(username="", layout=layout, jitter=False)
519
538
  img1_som = _overlay_som_marks(img1, som_elements)
520
539
  img1_path = root / f"{episode_id}_step_1.png"
521
540
  _save_image(img1_som, img1_path)
522
- obs1 = Observation(image_path=str(img1_path))
541
+ obs1 = Observation(screenshot_path=str(img1_path))
523
542
  steps.append(
524
543
  Step(
525
- t=1.0,
544
+ step_index=1,
545
+ timestamp=1.0,
526
546
  observation=obs1,
527
- action=Action(type="type", text=username, element_index=SOM_USERNAME_FIELD),
528
- thought="Type the username into element [1].",
547
+ action=Action(
548
+ type=ActionType.TYPE,
549
+ text=username,
550
+ raw={"element_index": SOM_USERNAME_FIELD},
551
+ ),
552
+ reasoning="Type the username into element [1].",
529
553
  )
530
554
  )
531
555
 
532
- # Step 2: username typed click password field [2]
556
+ # Step 2: username typed -> click password field [2]
533
557
  cx_pw, cy_pw = _center(layout.password_box)
534
558
  img2, _ = _draw_login_screen(username=username, layout=layout, jitter=False)
535
559
  img2_som = _overlay_som_marks(img2, som_elements)
536
560
  img2_path = root / f"{episode_id}_step_2.png"
537
561
  _save_image(img2_som, img2_path)
538
- obs2 = Observation(image_path=str(img2_path))
562
+ obs2 = Observation(screenshot_path=str(img2_path))
539
563
  steps.append(
540
564
  Step(
541
- t=2.0,
565
+ step_index=2,
566
+ timestamp=2.0,
542
567
  observation=obs2,
543
568
  action=Action(
544
- type="click",
545
- x=cx_pw,
546
- y=cy_pw,
547
- bbox=password_bbox,
548
- element_index=SOM_PASSWORD_FIELD,
569
+ type=ActionType.CLICK,
570
+ normalized_coordinates=(cx_pw, cy_pw),
571
+ raw={"bbox": password_bbox, "element_index": SOM_PASSWORD_FIELD},
549
572
  ),
550
- thought="Focus the password field by clicking element [2].",
573
+ reasoning="Focus the password field by clicking element [2].",
551
574
  )
552
575
  )
553
576
 
554
- # Step 3: password field focused type password into element [2]
577
+ # Step 3: password field focused -> type password into element [2]
555
578
  img3, _ = _draw_login_screen(username=username, layout=layout, jitter=False)
556
579
  img3_som = _overlay_som_marks(img3, som_elements)
557
580
  img3_path = root / f"{episode_id}_step_3.png"
558
581
  _save_image(img3_som, img3_path)
559
- obs3 = Observation(image_path=str(img3_path))
582
+ obs3 = Observation(screenshot_path=str(img3_path))
560
583
  steps.append(
561
584
  Step(
562
- t=3.0,
585
+ step_index=3,
586
+ timestamp=3.0,
563
587
  observation=obs3,
564
- action=Action(type="type", text=password, element_index=SOM_PASSWORD_FIELD),
565
- thought="Type the password into element [2].",
588
+ action=Action(
589
+ type=ActionType.TYPE,
590
+ text=password,
591
+ raw={"element_index": SOM_PASSWORD_FIELD},
592
+ ),
593
+ reasoning="Type the password into element [2].",
566
594
  )
567
595
  )
568
596
 
569
- # Step 4: password typed click login button [3]
597
+ # Step 4: password typed -> click login button [3]
570
598
  cx_btn, cy_btn = _center(layout.login_button)
571
599
  img4, _ = _draw_login_screen(
572
600
  username=username, password=password, layout=layout, jitter=False
@@ -574,43 +602,45 @@ def _script_login_episode_som(
574
602
  img4_som = _overlay_som_marks(img4, som_elements)
575
603
  img4_path = root / f"{episode_id}_step_4.png"
576
604
  _save_image(img4_som, img4_path)
577
- obs4 = Observation(image_path=str(img4_path))
605
+ obs4 = Observation(screenshot_path=str(img4_path))
578
606
  steps.append(
579
607
  Step(
580
- t=4.0,
608
+ step_index=4,
609
+ timestamp=4.0,
581
610
  observation=obs4,
582
611
  action=Action(
583
- type="click",
584
- x=cx_btn,
585
- y=cy_btn,
586
- bbox=login_bbox,
587
- element_index=SOM_LOGIN_BUTTON,
612
+ type=ActionType.CLICK,
613
+ normalized_coordinates=(cx_btn, cy_btn),
614
+ raw={"bbox": login_bbox, "element_index": SOM_LOGIN_BUTTON},
588
615
  ),
589
- thought="Submit the login form by clicking element [3].",
616
+ reasoning="Submit the login form by clicking element [3].",
590
617
  )
591
618
  )
592
619
 
593
- # Step 5: logged-in screen DONE (no SoM needed)
620
+ # Step 5: logged-in screen -> DONE (no SoM needed)
594
621
  img5 = _draw_logged_in_screen(username=username)
595
622
  img5_path = root / f"{episode_id}_step_5.png"
596
623
  _save_image(img5, img5_path)
597
- obs5 = Observation(image_path=str(img5_path))
624
+ obs5 = Observation(screenshot_path=str(img5_path))
598
625
  steps.append(
599
626
  Step(
600
- t=5.0,
627
+ step_index=5,
628
+ timestamp=5.0,
601
629
  observation=obs5,
602
- action=Action(type="done"),
603
- thought="Login successful; workflow complete.",
630
+ action=Action(type=ActionType.DONE),
631
+ reasoning="Login successful; workflow complete.",
604
632
  )
605
633
  )
606
634
 
607
635
  episode = Episode(
608
- id=episode_id,
609
- goal=f"Log in with username '{username}' and password '{password}'",
636
+ episode_id=episode_id,
637
+ instruction=f"Log in with username '{username}' and password '{password}'",
610
638
  steps=steps,
611
- summary="Successful login via username and password (SoM mode).",
612
639
  success=True,
613
- workflow_id="login_basic_som",
640
+ metadata={
641
+ "summary": "Successful login via username and password (SoM mode).",
642
+ "workflow_id": "login_basic_som",
643
+ },
614
644
  )
615
645
 
616
646
  return episode
@@ -822,10 +852,15 @@ def _script_registration_episode(
822
852
  img_path = root / f"{episode_id}_step_{step_idx}.png"
823
853
  _save_image(img, img_path)
824
854
  steps.append(Step(
825
- t=float(step_idx),
826
- observation=Observation(image_path=str(img_path)),
827
- action=Action(type="click", x=cx, y=cy, bbox=bbox, element_index=elem_idx),
828
- thought=f"Focus the {field_name.replace('_', ' ')} field.",
855
+ step_index=step_idx,
856
+ timestamp=float(step_idx),
857
+ observation=Observation(screenshot_path=str(img_path)),
858
+ action=Action(
859
+ type=ActionType.CLICK,
860
+ normalized_coordinates=(cx, cy),
861
+ raw={"bbox": bbox, "element_index": elem_idx},
862
+ ),
863
+ reasoning=f"Focus the {field_name.replace('_', ' ')} field.",
829
864
  ))
830
865
  step_idx += 1
831
866
 
@@ -842,10 +877,15 @@ def _script_registration_episode(
842
877
  img2_path = root / f"{episode_id}_step_{step_idx}.png"
843
878
  _save_image(img2, img2_path)
844
879
  steps.append(Step(
845
- t=float(step_idx),
846
- observation=Observation(image_path=str(img2_path)),
847
- action=Action(type="type", text=value, element_index=elem_idx),
848
- thought=f"Type the {field_name.replace('_', ' ')}.",
880
+ step_index=step_idx,
881
+ timestamp=float(step_idx),
882
+ observation=Observation(screenshot_path=str(img2_path)),
883
+ action=Action(
884
+ type=ActionType.TYPE,
885
+ text=value,
886
+ raw={"element_index": elem_idx},
887
+ ),
888
+ reasoning=f"Type the {field_name.replace('_', ' ')}.",
849
889
  ))
850
890
  current_values[field_name] = value
851
891
  step_idx += 1
@@ -865,10 +905,15 @@ def _script_registration_episode(
865
905
  img_path = root / f"{episode_id}_step_{step_idx}.png"
866
906
  _save_image(img, img_path)
867
907
  steps.append(Step(
868
- t=float(step_idx),
869
- observation=Observation(image_path=str(img_path)),
870
- action=Action(type="click", x=cx, y=cy, bbox=bbox, element_index=SOM_REGISTER_BUTTON),
871
- thought="Submit the registration form.",
908
+ step_index=step_idx,
909
+ timestamp=float(step_idx),
910
+ observation=Observation(screenshot_path=str(img_path)),
911
+ action=Action(
912
+ type=ActionType.CLICK,
913
+ normalized_coordinates=(cx, cy),
914
+ raw={"bbox": bbox, "element_index": SOM_REGISTER_BUTTON},
915
+ ),
916
+ reasoning="Submit the registration form.",
872
917
  ))
873
918
  step_idx += 1
874
919
 
@@ -877,19 +922,22 @@ def _script_registration_episode(
877
922
  img_done_path = root / f"{episode_id}_step_{step_idx}.png"
878
923
  _save_image(img_done, img_done_path)
879
924
  steps.append(Step(
880
- t=float(step_idx),
881
- observation=Observation(image_path=str(img_done_path)),
882
- action=Action(type="done"),
883
- thought="Registration successful; workflow complete.",
925
+ step_index=step_idx,
926
+ timestamp=float(step_idx),
927
+ observation=Observation(screenshot_path=str(img_done_path)),
928
+ action=Action(type=ActionType.DONE),
929
+ reasoning="Registration successful; workflow complete.",
884
930
  ))
885
931
 
886
932
  return Episode(
887
- id=episode_id,
888
- goal=f"Register with first name '{first_name}', last name '{last_name}', email '{email}', and password",
933
+ episode_id=episode_id,
934
+ instruction=f"Register with first name '{first_name}', last name '{last_name}', email '{email}', and password",
889
935
  steps=steps,
890
- summary="Successful registration.",
891
936
  success=True,
892
- workflow_id="registration",
937
+ metadata={
938
+ "summary": "Successful registration.",
939
+ "workflow_id": "registration",
940
+ },
893
941
  )
894
942
 
895
943
 
@@ -943,10 +991,15 @@ def _script_registration_episode_som(
943
991
  img_path = root / f"{episode_id}_step_{step_idx}.png"
944
992
  _save_image(img_som, img_path)
945
993
  steps.append(Step(
946
- t=float(step_idx),
947
- observation=Observation(image_path=str(img_path)),
948
- action=Action(type="click", x=cx, y=cy, bbox=bbox, element_index=elem_idx),
949
- thought=f"Focus element [{elem_idx}] ({field_name.replace('_', ' ')} field).",
994
+ step_index=step_idx,
995
+ timestamp=float(step_idx),
996
+ observation=Observation(screenshot_path=str(img_path)),
997
+ action=Action(
998
+ type=ActionType.CLICK,
999
+ normalized_coordinates=(cx, cy),
1000
+ raw={"bbox": bbox, "element_index": elem_idx},
1001
+ ),
1002
+ reasoning=f"Focus element [{elem_idx}] ({field_name.replace('_', ' ')} field).",
950
1003
  ))
951
1004
  step_idx += 1
952
1005
 
@@ -964,10 +1017,15 @@ def _script_registration_episode_som(
964
1017
  img2_path = root / f"{episode_id}_step_{step_idx}.png"
965
1018
  _save_image(img2_som, img2_path)
966
1019
  steps.append(Step(
967
- t=float(step_idx),
968
- observation=Observation(image_path=str(img2_path)),
969
- action=Action(type="type", text=value, element_index=elem_idx),
970
- thought=f"Type into element [{elem_idx}].",
1020
+ step_index=step_idx,
1021
+ timestamp=float(step_idx),
1022
+ observation=Observation(screenshot_path=str(img2_path)),
1023
+ action=Action(
1024
+ type=ActionType.TYPE,
1025
+ text=value,
1026
+ raw={"element_index": elem_idx},
1027
+ ),
1028
+ reasoning=f"Type into element [{elem_idx}].",
971
1029
  ))
972
1030
  current_values[field_name] = value
973
1031
  step_idx += 1
@@ -988,10 +1046,15 @@ def _script_registration_episode_som(
988
1046
  img_path = root / f"{episode_id}_step_{step_idx}.png"
989
1047
  _save_image(img_som, img_path)
990
1048
  steps.append(Step(
991
- t=float(step_idx),
992
- observation=Observation(image_path=str(img_path)),
993
- action=Action(type="click", x=cx, y=cy, bbox=bbox, element_index=SOM_REGISTER_BUTTON),
994
- thought=f"Click element [{SOM_REGISTER_BUTTON}] to submit registration.",
1049
+ step_index=step_idx,
1050
+ timestamp=float(step_idx),
1051
+ observation=Observation(screenshot_path=str(img_path)),
1052
+ action=Action(
1053
+ type=ActionType.CLICK,
1054
+ normalized_coordinates=(cx, cy),
1055
+ raw={"bbox": bbox, "element_index": SOM_REGISTER_BUTTON},
1056
+ ),
1057
+ reasoning=f"Click element [{SOM_REGISTER_BUTTON}] to submit registration.",
995
1058
  ))
996
1059
  step_idx += 1
997
1060
 
@@ -1000,37 +1063,40 @@ def _script_registration_episode_som(
1000
1063
  img_done_path = root / f"{episode_id}_step_{step_idx}.png"
1001
1064
  _save_image(img_done, img_done_path)
1002
1065
  steps.append(Step(
1003
- t=float(step_idx),
1004
- observation=Observation(image_path=str(img_done_path)),
1005
- action=Action(type="done"),
1006
- thought="Registration successful; workflow complete.",
1066
+ step_index=step_idx,
1067
+ timestamp=float(step_idx),
1068
+ observation=Observation(screenshot_path=str(img_done_path)),
1069
+ action=Action(type=ActionType.DONE),
1070
+ reasoning="Registration successful; workflow complete.",
1007
1071
  ))
1008
1072
 
1009
1073
  return Episode(
1010
- id=episode_id,
1011
- goal=f"Register with first name '{first_name}', last name '{last_name}', email '{email}', and password",
1074
+ episode_id=episode_id,
1075
+ instruction=f"Register with first name '{first_name}', last name '{last_name}', email '{email}', and password",
1012
1076
  steps=steps,
1013
- summary="Successful registration (SoM mode).",
1014
1077
  success=True,
1015
- workflow_id="registration_som",
1078
+ metadata={
1079
+ "summary": "Successful registration (SoM mode).",
1080
+ "workflow_id": "registration_som",
1081
+ },
1016
1082
  )
1017
1083
 
1018
1084
 
1019
- def generate_synthetic_sessions(
1020
- num_sessions: int = 10,
1085
+ def generate_synthetic_episodes(
1086
+ num_episodes: int = 10,
1021
1087
  seed: int | None = None,
1022
1088
  output_dir: str | os.PathLike[str] | None = None,
1023
1089
  jitter: bool = True,
1024
1090
  use_som: bool = False,
1025
1091
  scenario: str = "login",
1026
- ) -> List[Session]:
1027
- """Generate a list of synthetic Sessions with semantic UI episodes.
1092
+ ) -> List[Episode]:
1093
+ """Generate a list of synthetic Episodes with semantic UI episodes.
1028
1094
 
1029
- Each Session contains a single Episode. Images for all steps are written
1030
- to `output_dir`.
1095
+ Each Episode contains steps for a complete UI workflow. Images for all
1096
+ steps are written to `output_dir`.
1031
1097
 
1032
1098
  Args:
1033
- num_sessions: Number of sessions to generate.
1099
+ num_episodes: Number of episodes to generate.
1034
1100
  seed: Random seed for reproducibility.
1035
1101
  output_dir: Directory to write images to.
1036
1102
  jitter: Whether to apply slight position jitter to UI elements.
@@ -1040,6 +1106,9 @@ def generate_synthetic_sessions(
1040
1106
  scenario: Type of UI scenario to generate. Options:
1041
1107
  - "login": Simple login form (6 steps, 3 elements)
1042
1108
  - "registration": Registration form (12 steps, 6 elements)
1109
+
1110
+ Returns:
1111
+ List of Episode objects.
1043
1112
  """
1044
1113
 
1045
1114
  if seed is not None:
@@ -1051,28 +1120,28 @@ def generate_synthetic_sessions(
1051
1120
  else:
1052
1121
  output_root = Path(output_dir)
1053
1122
 
1054
- sessions: List[Session] = []
1123
+ episodes: List[Episode] = []
1055
1124
 
1056
- for i in range(num_sessions):
1057
- session_id = f"session_{i:04d}"
1058
- session_dir = output_root / session_id
1125
+ for i in range(num_episodes):
1126
+ episode_id = f"episode_{i:04d}"
1127
+ episode_dir = output_root / episode_id
1059
1128
 
1060
1129
  if scenario == "login":
1061
- episode_id = f"{session_id}_login"
1130
+ episode_id_full = f"{episode_id}_login"
1062
1131
  username = f"user{i}"
1063
1132
  password = f"pass{i}123"
1064
1133
 
1065
1134
  if use_som:
1066
1135
  episode = _script_login_episode_som(
1067
- session_dir, episode_id, username, password, jitter=jitter
1136
+ episode_dir, episode_id_full, username, password, jitter=jitter
1068
1137
  )
1069
1138
  else:
1070
1139
  episode = _script_login_episode(
1071
- session_dir, episode_id, username, password, jitter=jitter
1140
+ episode_dir, episode_id_full, username, password, jitter=jitter
1072
1141
  )
1073
1142
 
1074
1143
  elif scenario == "registration":
1075
- episode_id = f"{session_id}_registration"
1144
+ episode_id_full = f"{episode_id}_registration"
1076
1145
  first_name = f"John{i}"
1077
1146
  last_name = f"Doe{i}"
1078
1147
  email = f"john{i}@example.com"
@@ -1080,23 +1149,16 @@ def generate_synthetic_sessions(
1080
1149
 
1081
1150
  if use_som:
1082
1151
  episode = _script_registration_episode_som(
1083
- session_dir, episode_id, first_name, last_name, email, password, jitter=jitter
1152
+ episode_dir, episode_id_full, first_name, last_name, email, password, jitter=jitter
1084
1153
  )
1085
1154
  else:
1086
1155
  episode = _script_registration_episode(
1087
- session_dir, episode_id, first_name, last_name, email, password, jitter=jitter
1156
+ episode_dir, episode_id_full, first_name, last_name, email, password, jitter=jitter
1088
1157
  )
1089
1158
 
1090
1159
  else:
1091
1160
  raise ValueError(f"Unknown scenario: {scenario}. Options: login, registration")
1092
1161
 
1093
- session = Session(
1094
- id=session_id,
1095
- episodes=[episode],
1096
- meta={"scenario": scenario, "use_som": use_som},
1097
- )
1098
- sessions.append(session)
1099
-
1100
- return sessions
1101
-
1162
+ episodes.append(episode)
1102
1163
 
1164
+ return episodes