openadapt-ml 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openadapt_ml/benchmarks/__init__.py +8 -0
- openadapt_ml/benchmarks/agent.py +90 -11
- openadapt_ml/benchmarks/azure.py +35 -6
- openadapt_ml/benchmarks/cli.py +4449 -201
- openadapt_ml/benchmarks/live_tracker.py +180 -0
- openadapt_ml/benchmarks/runner.py +41 -4
- openadapt_ml/benchmarks/viewer.py +1219 -0
- openadapt_ml/benchmarks/vm_monitor.py +610 -0
- openadapt_ml/benchmarks/waa.py +61 -4
- openadapt_ml/benchmarks/waa_deploy/Dockerfile +222 -0
- openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
- openadapt_ml/benchmarks/waa_deploy/api_agent.py +539 -0
- openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
- openadapt_ml/benchmarks/waa_live.py +619 -0
- openadapt_ml/cloud/local.py +1555 -1
- openadapt_ml/cloud/ssh_tunnel.py +553 -0
- openadapt_ml/datasets/next_action.py +87 -68
- openadapt_ml/evals/grounding.py +26 -8
- openadapt_ml/evals/trajectory_matching.py +84 -36
- openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
- openadapt_ml/experiments/demo_prompt/format_demo.py +226 -0
- openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
- openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
- openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
- openadapt_ml/experiments/demo_prompt/run_experiment.py +531 -0
- openadapt_ml/experiments/waa_demo/__init__.py +10 -0
- openadapt_ml/experiments/waa_demo/demos.py +357 -0
- openadapt_ml/experiments/waa_demo/runner.py +717 -0
- openadapt_ml/experiments/waa_demo/tasks.py +151 -0
- openadapt_ml/export/__init__.py +9 -0
- openadapt_ml/export/__main__.py +6 -0
- openadapt_ml/export/cli.py +89 -0
- openadapt_ml/export/parquet.py +265 -0
- openadapt_ml/ingest/__init__.py +3 -4
- openadapt_ml/ingest/capture.py +89 -81
- openadapt_ml/ingest/loader.py +116 -68
- openadapt_ml/ingest/synthetic.py +221 -159
- openadapt_ml/retrieval/README.md +226 -0
- openadapt_ml/retrieval/USAGE.md +391 -0
- openadapt_ml/retrieval/__init__.py +91 -0
- openadapt_ml/retrieval/demo_retriever.py +817 -0
- openadapt_ml/retrieval/embeddings.py +629 -0
- openadapt_ml/retrieval/index.py +194 -0
- openadapt_ml/retrieval/retriever.py +160 -0
- openadapt_ml/runtime/policy.py +10 -10
- openadapt_ml/schema/__init__.py +104 -0
- openadapt_ml/schema/converters.py +541 -0
- openadapt_ml/schema/episode.py +457 -0
- openadapt_ml/scripts/compare.py +26 -16
- openadapt_ml/scripts/eval_policy.py +4 -5
- openadapt_ml/scripts/prepare_synthetic.py +14 -17
- openadapt_ml/scripts/train.py +81 -70
- openadapt_ml/training/benchmark_viewer.py +3225 -0
- openadapt_ml/training/trainer.py +120 -363
- openadapt_ml/training/trl_trainer.py +354 -0
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/METADATA +102 -60
- openadapt_ml-0.2.0.dist-info/RECORD +86 -0
- openadapt_ml/schemas/__init__.py +0 -53
- openadapt_ml/schemas/sessions.py +0 -122
- openadapt_ml/schemas/validation.py +0 -252
- openadapt_ml-0.1.0.dist-info/RECORD +0 -55
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/WHEEL +0 -0
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/licenses/LICENSE +0 -0
openadapt_ml/ingest/synthetic.py
CHANGED
|
@@ -8,7 +8,7 @@ from typing import List, Optional, Tuple
|
|
|
8
8
|
|
|
9
9
|
from PIL import Image, ImageDraw, ImageFont
|
|
10
10
|
|
|
11
|
-
from openadapt_ml.
|
|
11
|
+
from openadapt_ml.schema import Action, ActionType, Episode, Observation, Step
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
IMG_WIDTH = 800
|
|
@@ -333,12 +333,12 @@ def _script_login_episode(
|
|
|
333
333
|
"""Create a scripted login episode with a fixed sequence of steps.
|
|
334
334
|
|
|
335
335
|
Steps (6 total):
|
|
336
|
-
- Step 0: blank login screen
|
|
337
|
-
- Step 1: username field focused
|
|
338
|
-
- Step 2: username typed
|
|
339
|
-
- Step 3: password field focused
|
|
340
|
-
- Step 4: password typed
|
|
341
|
-
- Step 5: logged-in screen
|
|
336
|
+
- Step 0: blank login screen -> click username field.
|
|
337
|
+
- Step 1: username field focused -> type username.
|
|
338
|
+
- Step 2: username typed -> click password field.
|
|
339
|
+
- Step 3: password field focused -> type password.
|
|
340
|
+
- Step 4: password typed -> click login button.
|
|
341
|
+
- Step 5: logged-in screen -> DONE.
|
|
342
342
|
|
|
343
343
|
Each step includes bounding boxes for clickable elements to support
|
|
344
344
|
bbox-based click hit evaluation.
|
|
@@ -354,100 +354,120 @@ def _script_login_episode(
|
|
|
354
354
|
password_bbox = _bbox_normalized(layout.password_box)
|
|
355
355
|
login_bbox = _bbox_normalized(layout.login_button)
|
|
356
356
|
|
|
357
|
-
# Step 0: blank login screen
|
|
357
|
+
# Step 0: blank login screen -> click username field
|
|
358
358
|
cx, cy = _center(layout.username_box)
|
|
359
359
|
img0, _ = _draw_login_screen(layout=layout, jitter=False)
|
|
360
360
|
img0_path = root / f"{episode_id}_step_0.png"
|
|
361
361
|
_save_image(img0, img0_path)
|
|
362
|
-
obs0 = Observation(
|
|
362
|
+
obs0 = Observation(screenshot_path=str(img0_path))
|
|
363
363
|
steps.append(
|
|
364
364
|
Step(
|
|
365
|
-
|
|
365
|
+
step_index=0,
|
|
366
|
+
timestamp=0.0,
|
|
366
367
|
observation=obs0,
|
|
367
|
-
action=Action(
|
|
368
|
-
|
|
368
|
+
action=Action(
|
|
369
|
+
type=ActionType.CLICK,
|
|
370
|
+
normalized_coordinates=(cx, cy),
|
|
371
|
+
raw={"bbox": username_bbox},
|
|
372
|
+
),
|
|
373
|
+
reasoning="Focus the username field.",
|
|
369
374
|
)
|
|
370
375
|
)
|
|
371
376
|
|
|
372
|
-
# Step 1: username field focused
|
|
377
|
+
# Step 1: username field focused -> type username
|
|
373
378
|
img1, _ = _draw_login_screen(username="", layout=layout, jitter=False)
|
|
374
379
|
img1_path = root / f"{episode_id}_step_1.png"
|
|
375
380
|
_save_image(img1, img1_path)
|
|
376
|
-
obs1 = Observation(
|
|
381
|
+
obs1 = Observation(screenshot_path=str(img1_path))
|
|
377
382
|
steps.append(
|
|
378
383
|
Step(
|
|
379
|
-
|
|
384
|
+
step_index=1,
|
|
385
|
+
timestamp=1.0,
|
|
380
386
|
observation=obs1,
|
|
381
|
-
action=Action(type=
|
|
382
|
-
|
|
387
|
+
action=Action(type=ActionType.TYPE, text=username),
|
|
388
|
+
reasoning="Type the username.",
|
|
383
389
|
)
|
|
384
390
|
)
|
|
385
391
|
|
|
386
|
-
# Step 2: username typed
|
|
392
|
+
# Step 2: username typed -> click password field
|
|
387
393
|
cx_pw, cy_pw = _center(layout.password_box)
|
|
388
394
|
img2, _ = _draw_login_screen(username=username, layout=layout, jitter=False)
|
|
389
395
|
img2_path = root / f"{episode_id}_step_2.png"
|
|
390
396
|
_save_image(img2, img2_path)
|
|
391
|
-
obs2 = Observation(
|
|
397
|
+
obs2 = Observation(screenshot_path=str(img2_path))
|
|
392
398
|
steps.append(
|
|
393
399
|
Step(
|
|
394
|
-
|
|
400
|
+
step_index=2,
|
|
401
|
+
timestamp=2.0,
|
|
395
402
|
observation=obs2,
|
|
396
|
-
action=Action(
|
|
397
|
-
|
|
403
|
+
action=Action(
|
|
404
|
+
type=ActionType.CLICK,
|
|
405
|
+
normalized_coordinates=(cx_pw, cy_pw),
|
|
406
|
+
raw={"bbox": password_bbox},
|
|
407
|
+
),
|
|
408
|
+
reasoning="Focus the password field.",
|
|
398
409
|
)
|
|
399
410
|
)
|
|
400
411
|
|
|
401
|
-
# Step 3: password field focused
|
|
412
|
+
# Step 3: password field focused -> type password
|
|
402
413
|
img3, _ = _draw_login_screen(username=username, layout=layout, jitter=False)
|
|
403
414
|
img3_path = root / f"{episode_id}_step_3.png"
|
|
404
415
|
_save_image(img3, img3_path)
|
|
405
|
-
obs3 = Observation(
|
|
416
|
+
obs3 = Observation(screenshot_path=str(img3_path))
|
|
406
417
|
steps.append(
|
|
407
418
|
Step(
|
|
408
|
-
|
|
419
|
+
step_index=3,
|
|
420
|
+
timestamp=3.0,
|
|
409
421
|
observation=obs3,
|
|
410
|
-
action=Action(type=
|
|
411
|
-
|
|
422
|
+
action=Action(type=ActionType.TYPE, text=password),
|
|
423
|
+
reasoning="Type the password.",
|
|
412
424
|
)
|
|
413
425
|
)
|
|
414
426
|
|
|
415
|
-
# Step 4: password typed
|
|
427
|
+
# Step 4: password typed -> click login button
|
|
416
428
|
cx_btn, cy_btn = _center(layout.login_button)
|
|
417
429
|
img4, _ = _draw_login_screen(username=username, password=password, layout=layout, jitter=False)
|
|
418
430
|
img4_path = root / f"{episode_id}_step_4.png"
|
|
419
431
|
_save_image(img4, img4_path)
|
|
420
|
-
obs4 = Observation(
|
|
432
|
+
obs4 = Observation(screenshot_path=str(img4_path))
|
|
421
433
|
steps.append(
|
|
422
434
|
Step(
|
|
423
|
-
|
|
435
|
+
step_index=4,
|
|
436
|
+
timestamp=4.0,
|
|
424
437
|
observation=obs4,
|
|
425
|
-
action=Action(
|
|
426
|
-
|
|
438
|
+
action=Action(
|
|
439
|
+
type=ActionType.CLICK,
|
|
440
|
+
normalized_coordinates=(cx_btn, cy_btn),
|
|
441
|
+
raw={"bbox": login_bbox},
|
|
442
|
+
),
|
|
443
|
+
reasoning="Submit the login form.",
|
|
427
444
|
)
|
|
428
445
|
)
|
|
429
446
|
|
|
430
|
-
# Step 5: logged-in screen
|
|
447
|
+
# Step 5: logged-in screen -> DONE
|
|
431
448
|
img5 = _draw_logged_in_screen(username=username)
|
|
432
449
|
img5_path = root / f"{episode_id}_step_5.png"
|
|
433
450
|
_save_image(img5, img5_path)
|
|
434
|
-
obs5 = Observation(
|
|
451
|
+
obs5 = Observation(screenshot_path=str(img5_path))
|
|
435
452
|
steps.append(
|
|
436
453
|
Step(
|
|
437
|
-
|
|
454
|
+
step_index=5,
|
|
455
|
+
timestamp=5.0,
|
|
438
456
|
observation=obs5,
|
|
439
|
-
action=Action(type=
|
|
440
|
-
|
|
457
|
+
action=Action(type=ActionType.DONE),
|
|
458
|
+
reasoning="Login successful; workflow complete.",
|
|
441
459
|
)
|
|
442
460
|
)
|
|
443
461
|
|
|
444
462
|
episode = Episode(
|
|
445
|
-
|
|
446
|
-
|
|
463
|
+
episode_id=episode_id,
|
|
464
|
+
instruction=f"Log in with username '{username}' and password '{password}'",
|
|
447
465
|
steps=steps,
|
|
448
|
-
summary="Successful login via username and password.",
|
|
449
466
|
success=True,
|
|
450
|
-
|
|
467
|
+
metadata={
|
|
468
|
+
"summary": "Successful login via username and password.",
|
|
469
|
+
"workflow_id": "login_basic",
|
|
470
|
+
},
|
|
451
471
|
)
|
|
452
472
|
|
|
453
473
|
return episode
|
|
@@ -467,12 +487,12 @@ def _script_login_episode_som(
|
|
|
467
487
|
for click actions.
|
|
468
488
|
|
|
469
489
|
Steps (6 total):
|
|
470
|
-
- Step 0: SoM login screen
|
|
471
|
-
- Step 1: username field focused
|
|
472
|
-
- Step 2: username typed
|
|
473
|
-
- Step 3: password field focused
|
|
474
|
-
- Step 4: password typed
|
|
475
|
-
- Step 5: logged-in screen
|
|
490
|
+
- Step 0: SoM login screen -> click element [1] (username field)
|
|
491
|
+
- Step 1: username field focused -> type username
|
|
492
|
+
- Step 2: username typed -> click element [2] (password field)
|
|
493
|
+
- Step 3: password field focused -> type password
|
|
494
|
+
- Step 4: password typed -> click element [3] (login button)
|
|
495
|
+
- Step 5: logged-in screen -> DONE
|
|
476
496
|
"""
|
|
477
497
|
|
|
478
498
|
steps: List[Step] = []
|
|
@@ -492,81 +512,89 @@ def _script_login_episode_som(
|
|
|
492
512
|
(SOM_LOGIN_BUTTON, layout.login_button),
|
|
493
513
|
]
|
|
494
514
|
|
|
495
|
-
# Step 0: SoM login screen
|
|
515
|
+
# Step 0: SoM login screen -> click username field [1]
|
|
496
516
|
cx, cy = _center(layout.username_box)
|
|
497
517
|
img0, _ = _draw_login_screen(layout=layout, jitter=False)
|
|
498
518
|
img0_som = _overlay_som_marks(img0, som_elements)
|
|
499
519
|
img0_path = root / f"{episode_id}_step_0.png"
|
|
500
520
|
_save_image(img0_som, img0_path)
|
|
501
|
-
obs0 = Observation(
|
|
521
|
+
obs0 = Observation(screenshot_path=str(img0_path))
|
|
502
522
|
steps.append(
|
|
503
523
|
Step(
|
|
504
|
-
|
|
524
|
+
step_index=0,
|
|
525
|
+
timestamp=0.0,
|
|
505
526
|
observation=obs0,
|
|
506
527
|
action=Action(
|
|
507
|
-
type=
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
bbox=username_bbox,
|
|
511
|
-
element_index=SOM_USERNAME_FIELD,
|
|
528
|
+
type=ActionType.CLICK,
|
|
529
|
+
normalized_coordinates=(cx, cy),
|
|
530
|
+
raw={"bbox": username_bbox, "element_index": SOM_USERNAME_FIELD},
|
|
512
531
|
),
|
|
513
|
-
|
|
532
|
+
reasoning="Focus the username field by clicking element [1].",
|
|
514
533
|
)
|
|
515
534
|
)
|
|
516
535
|
|
|
517
|
-
# Step 1: username field focused
|
|
536
|
+
# Step 1: username field focused -> type username into element [1]
|
|
518
537
|
img1, _ = _draw_login_screen(username="", layout=layout, jitter=False)
|
|
519
538
|
img1_som = _overlay_som_marks(img1, som_elements)
|
|
520
539
|
img1_path = root / f"{episode_id}_step_1.png"
|
|
521
540
|
_save_image(img1_som, img1_path)
|
|
522
|
-
obs1 = Observation(
|
|
541
|
+
obs1 = Observation(screenshot_path=str(img1_path))
|
|
523
542
|
steps.append(
|
|
524
543
|
Step(
|
|
525
|
-
|
|
544
|
+
step_index=1,
|
|
545
|
+
timestamp=1.0,
|
|
526
546
|
observation=obs1,
|
|
527
|
-
action=Action(
|
|
528
|
-
|
|
547
|
+
action=Action(
|
|
548
|
+
type=ActionType.TYPE,
|
|
549
|
+
text=username,
|
|
550
|
+
raw={"element_index": SOM_USERNAME_FIELD},
|
|
551
|
+
),
|
|
552
|
+
reasoning="Type the username into element [1].",
|
|
529
553
|
)
|
|
530
554
|
)
|
|
531
555
|
|
|
532
|
-
# Step 2: username typed
|
|
556
|
+
# Step 2: username typed -> click password field [2]
|
|
533
557
|
cx_pw, cy_pw = _center(layout.password_box)
|
|
534
558
|
img2, _ = _draw_login_screen(username=username, layout=layout, jitter=False)
|
|
535
559
|
img2_som = _overlay_som_marks(img2, som_elements)
|
|
536
560
|
img2_path = root / f"{episode_id}_step_2.png"
|
|
537
561
|
_save_image(img2_som, img2_path)
|
|
538
|
-
obs2 = Observation(
|
|
562
|
+
obs2 = Observation(screenshot_path=str(img2_path))
|
|
539
563
|
steps.append(
|
|
540
564
|
Step(
|
|
541
|
-
|
|
565
|
+
step_index=2,
|
|
566
|
+
timestamp=2.0,
|
|
542
567
|
observation=obs2,
|
|
543
568
|
action=Action(
|
|
544
|
-
type=
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
bbox=password_bbox,
|
|
548
|
-
element_index=SOM_PASSWORD_FIELD,
|
|
569
|
+
type=ActionType.CLICK,
|
|
570
|
+
normalized_coordinates=(cx_pw, cy_pw),
|
|
571
|
+
raw={"bbox": password_bbox, "element_index": SOM_PASSWORD_FIELD},
|
|
549
572
|
),
|
|
550
|
-
|
|
573
|
+
reasoning="Focus the password field by clicking element [2].",
|
|
551
574
|
)
|
|
552
575
|
)
|
|
553
576
|
|
|
554
|
-
# Step 3: password field focused
|
|
577
|
+
# Step 3: password field focused -> type password into element [2]
|
|
555
578
|
img3, _ = _draw_login_screen(username=username, layout=layout, jitter=False)
|
|
556
579
|
img3_som = _overlay_som_marks(img3, som_elements)
|
|
557
580
|
img3_path = root / f"{episode_id}_step_3.png"
|
|
558
581
|
_save_image(img3_som, img3_path)
|
|
559
|
-
obs3 = Observation(
|
|
582
|
+
obs3 = Observation(screenshot_path=str(img3_path))
|
|
560
583
|
steps.append(
|
|
561
584
|
Step(
|
|
562
|
-
|
|
585
|
+
step_index=3,
|
|
586
|
+
timestamp=3.0,
|
|
563
587
|
observation=obs3,
|
|
564
|
-
action=Action(
|
|
565
|
-
|
|
588
|
+
action=Action(
|
|
589
|
+
type=ActionType.TYPE,
|
|
590
|
+
text=password,
|
|
591
|
+
raw={"element_index": SOM_PASSWORD_FIELD},
|
|
592
|
+
),
|
|
593
|
+
reasoning="Type the password into element [2].",
|
|
566
594
|
)
|
|
567
595
|
)
|
|
568
596
|
|
|
569
|
-
# Step 4: password typed
|
|
597
|
+
# Step 4: password typed -> click login button [3]
|
|
570
598
|
cx_btn, cy_btn = _center(layout.login_button)
|
|
571
599
|
img4, _ = _draw_login_screen(
|
|
572
600
|
username=username, password=password, layout=layout, jitter=False
|
|
@@ -574,43 +602,45 @@ def _script_login_episode_som(
|
|
|
574
602
|
img4_som = _overlay_som_marks(img4, som_elements)
|
|
575
603
|
img4_path = root / f"{episode_id}_step_4.png"
|
|
576
604
|
_save_image(img4_som, img4_path)
|
|
577
|
-
obs4 = Observation(
|
|
605
|
+
obs4 = Observation(screenshot_path=str(img4_path))
|
|
578
606
|
steps.append(
|
|
579
607
|
Step(
|
|
580
|
-
|
|
608
|
+
step_index=4,
|
|
609
|
+
timestamp=4.0,
|
|
581
610
|
observation=obs4,
|
|
582
611
|
action=Action(
|
|
583
|
-
type=
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
bbox=login_bbox,
|
|
587
|
-
element_index=SOM_LOGIN_BUTTON,
|
|
612
|
+
type=ActionType.CLICK,
|
|
613
|
+
normalized_coordinates=(cx_btn, cy_btn),
|
|
614
|
+
raw={"bbox": login_bbox, "element_index": SOM_LOGIN_BUTTON},
|
|
588
615
|
),
|
|
589
|
-
|
|
616
|
+
reasoning="Submit the login form by clicking element [3].",
|
|
590
617
|
)
|
|
591
618
|
)
|
|
592
619
|
|
|
593
|
-
# Step 5: logged-in screen
|
|
620
|
+
# Step 5: logged-in screen -> DONE (no SoM needed)
|
|
594
621
|
img5 = _draw_logged_in_screen(username=username)
|
|
595
622
|
img5_path = root / f"{episode_id}_step_5.png"
|
|
596
623
|
_save_image(img5, img5_path)
|
|
597
|
-
obs5 = Observation(
|
|
624
|
+
obs5 = Observation(screenshot_path=str(img5_path))
|
|
598
625
|
steps.append(
|
|
599
626
|
Step(
|
|
600
|
-
|
|
627
|
+
step_index=5,
|
|
628
|
+
timestamp=5.0,
|
|
601
629
|
observation=obs5,
|
|
602
|
-
action=Action(type=
|
|
603
|
-
|
|
630
|
+
action=Action(type=ActionType.DONE),
|
|
631
|
+
reasoning="Login successful; workflow complete.",
|
|
604
632
|
)
|
|
605
633
|
)
|
|
606
634
|
|
|
607
635
|
episode = Episode(
|
|
608
|
-
|
|
609
|
-
|
|
636
|
+
episode_id=episode_id,
|
|
637
|
+
instruction=f"Log in with username '{username}' and password '{password}'",
|
|
610
638
|
steps=steps,
|
|
611
|
-
summary="Successful login via username and password (SoM mode).",
|
|
612
639
|
success=True,
|
|
613
|
-
|
|
640
|
+
metadata={
|
|
641
|
+
"summary": "Successful login via username and password (SoM mode).",
|
|
642
|
+
"workflow_id": "login_basic_som",
|
|
643
|
+
},
|
|
614
644
|
)
|
|
615
645
|
|
|
616
646
|
return episode
|
|
@@ -822,10 +852,15 @@ def _script_registration_episode(
|
|
|
822
852
|
img_path = root / f"{episode_id}_step_{step_idx}.png"
|
|
823
853
|
_save_image(img, img_path)
|
|
824
854
|
steps.append(Step(
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
855
|
+
step_index=step_idx,
|
|
856
|
+
timestamp=float(step_idx),
|
|
857
|
+
observation=Observation(screenshot_path=str(img_path)),
|
|
858
|
+
action=Action(
|
|
859
|
+
type=ActionType.CLICK,
|
|
860
|
+
normalized_coordinates=(cx, cy),
|
|
861
|
+
raw={"bbox": bbox, "element_index": elem_idx},
|
|
862
|
+
),
|
|
863
|
+
reasoning=f"Focus the {field_name.replace('_', ' ')} field.",
|
|
829
864
|
))
|
|
830
865
|
step_idx += 1
|
|
831
866
|
|
|
@@ -842,10 +877,15 @@ def _script_registration_episode(
|
|
|
842
877
|
img2_path = root / f"{episode_id}_step_{step_idx}.png"
|
|
843
878
|
_save_image(img2, img2_path)
|
|
844
879
|
steps.append(Step(
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
880
|
+
step_index=step_idx,
|
|
881
|
+
timestamp=float(step_idx),
|
|
882
|
+
observation=Observation(screenshot_path=str(img2_path)),
|
|
883
|
+
action=Action(
|
|
884
|
+
type=ActionType.TYPE,
|
|
885
|
+
text=value,
|
|
886
|
+
raw={"element_index": elem_idx},
|
|
887
|
+
),
|
|
888
|
+
reasoning=f"Type the {field_name.replace('_', ' ')}.",
|
|
849
889
|
))
|
|
850
890
|
current_values[field_name] = value
|
|
851
891
|
step_idx += 1
|
|
@@ -865,10 +905,15 @@ def _script_registration_episode(
|
|
|
865
905
|
img_path = root / f"{episode_id}_step_{step_idx}.png"
|
|
866
906
|
_save_image(img, img_path)
|
|
867
907
|
steps.append(Step(
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
908
|
+
step_index=step_idx,
|
|
909
|
+
timestamp=float(step_idx),
|
|
910
|
+
observation=Observation(screenshot_path=str(img_path)),
|
|
911
|
+
action=Action(
|
|
912
|
+
type=ActionType.CLICK,
|
|
913
|
+
normalized_coordinates=(cx, cy),
|
|
914
|
+
raw={"bbox": bbox, "element_index": SOM_REGISTER_BUTTON},
|
|
915
|
+
),
|
|
916
|
+
reasoning="Submit the registration form.",
|
|
872
917
|
))
|
|
873
918
|
step_idx += 1
|
|
874
919
|
|
|
@@ -877,19 +922,22 @@ def _script_registration_episode(
|
|
|
877
922
|
img_done_path = root / f"{episode_id}_step_{step_idx}.png"
|
|
878
923
|
_save_image(img_done, img_done_path)
|
|
879
924
|
steps.append(Step(
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
925
|
+
step_index=step_idx,
|
|
926
|
+
timestamp=float(step_idx),
|
|
927
|
+
observation=Observation(screenshot_path=str(img_done_path)),
|
|
928
|
+
action=Action(type=ActionType.DONE),
|
|
929
|
+
reasoning="Registration successful; workflow complete.",
|
|
884
930
|
))
|
|
885
931
|
|
|
886
932
|
return Episode(
|
|
887
|
-
|
|
888
|
-
|
|
933
|
+
episode_id=episode_id,
|
|
934
|
+
instruction=f"Register with first name '{first_name}', last name '{last_name}', email '{email}', and password",
|
|
889
935
|
steps=steps,
|
|
890
|
-
summary="Successful registration.",
|
|
891
936
|
success=True,
|
|
892
|
-
|
|
937
|
+
metadata={
|
|
938
|
+
"summary": "Successful registration.",
|
|
939
|
+
"workflow_id": "registration",
|
|
940
|
+
},
|
|
893
941
|
)
|
|
894
942
|
|
|
895
943
|
|
|
@@ -943,10 +991,15 @@ def _script_registration_episode_som(
|
|
|
943
991
|
img_path = root / f"{episode_id}_step_{step_idx}.png"
|
|
944
992
|
_save_image(img_som, img_path)
|
|
945
993
|
steps.append(Step(
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
994
|
+
step_index=step_idx,
|
|
995
|
+
timestamp=float(step_idx),
|
|
996
|
+
observation=Observation(screenshot_path=str(img_path)),
|
|
997
|
+
action=Action(
|
|
998
|
+
type=ActionType.CLICK,
|
|
999
|
+
normalized_coordinates=(cx, cy),
|
|
1000
|
+
raw={"bbox": bbox, "element_index": elem_idx},
|
|
1001
|
+
),
|
|
1002
|
+
reasoning=f"Focus element [{elem_idx}] ({field_name.replace('_', ' ')} field).",
|
|
950
1003
|
))
|
|
951
1004
|
step_idx += 1
|
|
952
1005
|
|
|
@@ -964,10 +1017,15 @@ def _script_registration_episode_som(
|
|
|
964
1017
|
img2_path = root / f"{episode_id}_step_{step_idx}.png"
|
|
965
1018
|
_save_image(img2_som, img2_path)
|
|
966
1019
|
steps.append(Step(
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
1020
|
+
step_index=step_idx,
|
|
1021
|
+
timestamp=float(step_idx),
|
|
1022
|
+
observation=Observation(screenshot_path=str(img2_path)),
|
|
1023
|
+
action=Action(
|
|
1024
|
+
type=ActionType.TYPE,
|
|
1025
|
+
text=value,
|
|
1026
|
+
raw={"element_index": elem_idx},
|
|
1027
|
+
),
|
|
1028
|
+
reasoning=f"Type into element [{elem_idx}].",
|
|
971
1029
|
))
|
|
972
1030
|
current_values[field_name] = value
|
|
973
1031
|
step_idx += 1
|
|
@@ -988,10 +1046,15 @@ def _script_registration_episode_som(
|
|
|
988
1046
|
img_path = root / f"{episode_id}_step_{step_idx}.png"
|
|
989
1047
|
_save_image(img_som, img_path)
|
|
990
1048
|
steps.append(Step(
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
1049
|
+
step_index=step_idx,
|
|
1050
|
+
timestamp=float(step_idx),
|
|
1051
|
+
observation=Observation(screenshot_path=str(img_path)),
|
|
1052
|
+
action=Action(
|
|
1053
|
+
type=ActionType.CLICK,
|
|
1054
|
+
normalized_coordinates=(cx, cy),
|
|
1055
|
+
raw={"bbox": bbox, "element_index": SOM_REGISTER_BUTTON},
|
|
1056
|
+
),
|
|
1057
|
+
reasoning=f"Click element [{SOM_REGISTER_BUTTON}] to submit registration.",
|
|
995
1058
|
))
|
|
996
1059
|
step_idx += 1
|
|
997
1060
|
|
|
@@ -1000,37 +1063,40 @@ def _script_registration_episode_som(
|
|
|
1000
1063
|
img_done_path = root / f"{episode_id}_step_{step_idx}.png"
|
|
1001
1064
|
_save_image(img_done, img_done_path)
|
|
1002
1065
|
steps.append(Step(
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1066
|
+
step_index=step_idx,
|
|
1067
|
+
timestamp=float(step_idx),
|
|
1068
|
+
observation=Observation(screenshot_path=str(img_done_path)),
|
|
1069
|
+
action=Action(type=ActionType.DONE),
|
|
1070
|
+
reasoning="Registration successful; workflow complete.",
|
|
1007
1071
|
))
|
|
1008
1072
|
|
|
1009
1073
|
return Episode(
|
|
1010
|
-
|
|
1011
|
-
|
|
1074
|
+
episode_id=episode_id,
|
|
1075
|
+
instruction=f"Register with first name '{first_name}', last name '{last_name}', email '{email}', and password",
|
|
1012
1076
|
steps=steps,
|
|
1013
|
-
summary="Successful registration (SoM mode).",
|
|
1014
1077
|
success=True,
|
|
1015
|
-
|
|
1078
|
+
metadata={
|
|
1079
|
+
"summary": "Successful registration (SoM mode).",
|
|
1080
|
+
"workflow_id": "registration_som",
|
|
1081
|
+
},
|
|
1016
1082
|
)
|
|
1017
1083
|
|
|
1018
1084
|
|
|
1019
|
-
def
|
|
1020
|
-
|
|
1085
|
+
def generate_synthetic_episodes(
|
|
1086
|
+
num_episodes: int = 10,
|
|
1021
1087
|
seed: int | None = None,
|
|
1022
1088
|
output_dir: str | os.PathLike[str] | None = None,
|
|
1023
1089
|
jitter: bool = True,
|
|
1024
1090
|
use_som: bool = False,
|
|
1025
1091
|
scenario: str = "login",
|
|
1026
|
-
) -> List[
|
|
1027
|
-
"""Generate a list of synthetic
|
|
1092
|
+
) -> List[Episode]:
|
|
1093
|
+
"""Generate a list of synthetic Episodes with semantic UI episodes.
|
|
1028
1094
|
|
|
1029
|
-
Each
|
|
1030
|
-
to `output_dir`.
|
|
1095
|
+
Each Episode contains steps for a complete UI workflow. Images for all
|
|
1096
|
+
steps are written to `output_dir`.
|
|
1031
1097
|
|
|
1032
1098
|
Args:
|
|
1033
|
-
|
|
1099
|
+
num_episodes: Number of episodes to generate.
|
|
1034
1100
|
seed: Random seed for reproducibility.
|
|
1035
1101
|
output_dir: Directory to write images to.
|
|
1036
1102
|
jitter: Whether to apply slight position jitter to UI elements.
|
|
@@ -1040,6 +1106,9 @@ def generate_synthetic_sessions(
|
|
|
1040
1106
|
scenario: Type of UI scenario to generate. Options:
|
|
1041
1107
|
- "login": Simple login form (6 steps, 3 elements)
|
|
1042
1108
|
- "registration": Registration form (12 steps, 6 elements)
|
|
1109
|
+
|
|
1110
|
+
Returns:
|
|
1111
|
+
List of Episode objects.
|
|
1043
1112
|
"""
|
|
1044
1113
|
|
|
1045
1114
|
if seed is not None:
|
|
@@ -1051,28 +1120,28 @@ def generate_synthetic_sessions(
|
|
|
1051
1120
|
else:
|
|
1052
1121
|
output_root = Path(output_dir)
|
|
1053
1122
|
|
|
1054
|
-
|
|
1123
|
+
episodes: List[Episode] = []
|
|
1055
1124
|
|
|
1056
|
-
for i in range(
|
|
1057
|
-
|
|
1058
|
-
|
|
1125
|
+
for i in range(num_episodes):
|
|
1126
|
+
episode_id = f"episode_{i:04d}"
|
|
1127
|
+
episode_dir = output_root / episode_id
|
|
1059
1128
|
|
|
1060
1129
|
if scenario == "login":
|
|
1061
|
-
|
|
1130
|
+
episode_id_full = f"{episode_id}_login"
|
|
1062
1131
|
username = f"user{i}"
|
|
1063
1132
|
password = f"pass{i}123"
|
|
1064
1133
|
|
|
1065
1134
|
if use_som:
|
|
1066
1135
|
episode = _script_login_episode_som(
|
|
1067
|
-
|
|
1136
|
+
episode_dir, episode_id_full, username, password, jitter=jitter
|
|
1068
1137
|
)
|
|
1069
1138
|
else:
|
|
1070
1139
|
episode = _script_login_episode(
|
|
1071
|
-
|
|
1140
|
+
episode_dir, episode_id_full, username, password, jitter=jitter
|
|
1072
1141
|
)
|
|
1073
1142
|
|
|
1074
1143
|
elif scenario == "registration":
|
|
1075
|
-
|
|
1144
|
+
episode_id_full = f"{episode_id}_registration"
|
|
1076
1145
|
first_name = f"John{i}"
|
|
1077
1146
|
last_name = f"Doe{i}"
|
|
1078
1147
|
email = f"john{i}@example.com"
|
|
@@ -1080,23 +1149,16 @@ def generate_synthetic_sessions(
|
|
|
1080
1149
|
|
|
1081
1150
|
if use_som:
|
|
1082
1151
|
episode = _script_registration_episode_som(
|
|
1083
|
-
|
|
1152
|
+
episode_dir, episode_id_full, first_name, last_name, email, password, jitter=jitter
|
|
1084
1153
|
)
|
|
1085
1154
|
else:
|
|
1086
1155
|
episode = _script_registration_episode(
|
|
1087
|
-
|
|
1156
|
+
episode_dir, episode_id_full, first_name, last_name, email, password, jitter=jitter
|
|
1088
1157
|
)
|
|
1089
1158
|
|
|
1090
1159
|
else:
|
|
1091
1160
|
raise ValueError(f"Unknown scenario: {scenario}. Options: login, registration")
|
|
1092
1161
|
|
|
1093
|
-
|
|
1094
|
-
id=session_id,
|
|
1095
|
-
episodes=[episode],
|
|
1096
|
-
meta={"scenario": scenario, "use_som": use_som},
|
|
1097
|
-
)
|
|
1098
|
-
sessions.append(session)
|
|
1099
|
-
|
|
1100
|
-
return sessions
|
|
1101
|
-
|
|
1162
|
+
episodes.append(episode)
|
|
1102
1163
|
|
|
1164
|
+
return episodes
|