openadapt-ml 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. openadapt_ml/baselines/__init__.py +121 -0
  2. openadapt_ml/baselines/adapter.py +185 -0
  3. openadapt_ml/baselines/cli.py +314 -0
  4. openadapt_ml/baselines/config.py +448 -0
  5. openadapt_ml/baselines/parser.py +922 -0
  6. openadapt_ml/baselines/prompts.py +787 -0
  7. openadapt_ml/benchmarks/__init__.py +13 -107
  8. openadapt_ml/benchmarks/agent.py +297 -374
  9. openadapt_ml/benchmarks/azure.py +62 -24
  10. openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
  11. openadapt_ml/benchmarks/cli.py +1874 -751
  12. openadapt_ml/benchmarks/trace_export.py +631 -0
  13. openadapt_ml/benchmarks/viewer.py +1236 -0
  14. openadapt_ml/benchmarks/vm_monitor.py +1111 -0
  15. openadapt_ml/benchmarks/waa_deploy/Dockerfile +216 -0
  16. openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
  17. openadapt_ml/benchmarks/waa_deploy/api_agent.py +540 -0
  18. openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
  19. openadapt_ml/cloud/azure_inference.py +3 -5
  20. openadapt_ml/cloud/lambda_labs.py +722 -307
  21. openadapt_ml/cloud/local.py +3194 -89
  22. openadapt_ml/cloud/ssh_tunnel.py +595 -0
  23. openadapt_ml/datasets/next_action.py +125 -96
  24. openadapt_ml/evals/grounding.py +32 -9
  25. openadapt_ml/evals/plot_eval_metrics.py +15 -13
  26. openadapt_ml/evals/trajectory_matching.py +120 -57
  27. openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
  28. openadapt_ml/experiments/demo_prompt/format_demo.py +236 -0
  29. openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
  30. openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
  31. openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
  32. openadapt_ml/experiments/demo_prompt/run_experiment.py +541 -0
  33. openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
  34. openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
  35. openadapt_ml/experiments/representation_shootout/config.py +390 -0
  36. openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
  37. openadapt_ml/experiments/representation_shootout/runner.py +687 -0
  38. openadapt_ml/experiments/waa_demo/__init__.py +10 -0
  39. openadapt_ml/experiments/waa_demo/demos.py +357 -0
  40. openadapt_ml/experiments/waa_demo/runner.py +732 -0
  41. openadapt_ml/experiments/waa_demo/tasks.py +151 -0
  42. openadapt_ml/export/__init__.py +9 -0
  43. openadapt_ml/export/__main__.py +6 -0
  44. openadapt_ml/export/cli.py +89 -0
  45. openadapt_ml/export/parquet.py +277 -0
  46. openadapt_ml/grounding/detector.py +18 -14
  47. openadapt_ml/ingest/__init__.py +11 -10
  48. openadapt_ml/ingest/capture.py +97 -86
  49. openadapt_ml/ingest/loader.py +120 -69
  50. openadapt_ml/ingest/synthetic.py +344 -193
  51. openadapt_ml/models/api_adapter.py +14 -4
  52. openadapt_ml/models/base_adapter.py +10 -2
  53. openadapt_ml/models/providers/__init__.py +288 -0
  54. openadapt_ml/models/providers/anthropic.py +266 -0
  55. openadapt_ml/models/providers/base.py +299 -0
  56. openadapt_ml/models/providers/google.py +376 -0
  57. openadapt_ml/models/providers/openai.py +342 -0
  58. openadapt_ml/models/qwen_vl.py +46 -19
  59. openadapt_ml/perception/__init__.py +35 -0
  60. openadapt_ml/perception/integration.py +399 -0
  61. openadapt_ml/retrieval/README.md +226 -0
  62. openadapt_ml/retrieval/USAGE.md +391 -0
  63. openadapt_ml/retrieval/__init__.py +91 -0
  64. openadapt_ml/retrieval/demo_retriever.py +843 -0
  65. openadapt_ml/retrieval/embeddings.py +630 -0
  66. openadapt_ml/retrieval/index.py +194 -0
  67. openadapt_ml/retrieval/retriever.py +162 -0
  68. openadapt_ml/runtime/__init__.py +50 -0
  69. openadapt_ml/runtime/policy.py +27 -14
  70. openadapt_ml/runtime/safety_gate.py +471 -0
  71. openadapt_ml/schema/__init__.py +113 -0
  72. openadapt_ml/schema/converters.py +588 -0
  73. openadapt_ml/schema/episode.py +470 -0
  74. openadapt_ml/scripts/capture_screenshots.py +530 -0
  75. openadapt_ml/scripts/compare.py +102 -61
  76. openadapt_ml/scripts/demo_policy.py +4 -1
  77. openadapt_ml/scripts/eval_policy.py +19 -14
  78. openadapt_ml/scripts/make_gif.py +1 -1
  79. openadapt_ml/scripts/prepare_synthetic.py +16 -17
  80. openadapt_ml/scripts/train.py +98 -75
  81. openadapt_ml/segmentation/README.md +920 -0
  82. openadapt_ml/segmentation/__init__.py +97 -0
  83. openadapt_ml/segmentation/adapters/__init__.py +5 -0
  84. openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
  85. openadapt_ml/segmentation/annotator.py +610 -0
  86. openadapt_ml/segmentation/cache.py +290 -0
  87. openadapt_ml/segmentation/cli.py +674 -0
  88. openadapt_ml/segmentation/deduplicator.py +656 -0
  89. openadapt_ml/segmentation/frame_describer.py +788 -0
  90. openadapt_ml/segmentation/pipeline.py +340 -0
  91. openadapt_ml/segmentation/schemas.py +622 -0
  92. openadapt_ml/segmentation/segment_extractor.py +634 -0
  93. openadapt_ml/training/azure_ops_viewer.py +1097 -0
  94. openadapt_ml/training/benchmark_viewer.py +3255 -19
  95. openadapt_ml/training/shared_ui.py +7 -7
  96. openadapt_ml/training/stub_provider.py +57 -35
  97. openadapt_ml/training/trainer.py +255 -441
  98. openadapt_ml/training/trl_trainer.py +403 -0
  99. openadapt_ml/training/viewer.py +323 -108
  100. openadapt_ml/training/viewer_components.py +180 -0
  101. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +312 -69
  102. openadapt_ml-0.2.1.dist-info/RECORD +116 -0
  103. openadapt_ml/benchmarks/base.py +0 -366
  104. openadapt_ml/benchmarks/data_collection.py +0 -432
  105. openadapt_ml/benchmarks/runner.py +0 -381
  106. openadapt_ml/benchmarks/waa.py +0 -704
  107. openadapt_ml/schemas/__init__.py +0 -53
  108. openadapt_ml/schemas/sessions.py +0 -122
  109. openadapt_ml/schemas/validation.py +0 -252
  110. openadapt_ml-0.1.0.dist-info/RECORD +0 -55
  111. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
  112. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,116 @@
1
+ openadapt_ml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ openadapt_ml/config.py,sha256=eH5WTKRPkkidjkNb25Wn_dUAizUQTsVPMYLDq_ekPJQ,1865
3
+ openadapt_ml/baselines/__init__.py,sha256=psXRe8N-TTum0Io09miXQdPx4wxEFnmqjbamK6M-KaE,3137
4
+ openadapt_ml/baselines/adapter.py,sha256=sfJuHfrr-o2jWwFEBKyujXvhzbZ9jHeO92CcqcQIysM,5975
5
+ openadapt_ml/baselines/cli.py,sha256=T0LN2XcJcL-YingNxLoU3LCjDsjIoGyjFPX3xdPmfY4,7868
6
+ openadapt_ml/baselines/config.py,sha256=R2mf-H687l6njQK4Ugyr7QMwjlk5vfUhgvsFDtW4ljY,14085
7
+ openadapt_ml/baselines/parser.py,sha256=xMRFoqXdOYvO0bV92vGoUJ1Jewv_HzTcIjBg97SALb4,31191
8
+ openadapt_ml/baselines/prompts.py,sha256=w7N2jrfmf-FYHfuv1vGT7QtXMF1Ahuy3uAsfjPoo6Ho,24453
9
+ openadapt_ml/benchmarks/__init__.py,sha256=FaEGc7pRM-eLUXEEpJXcIckwkIWKhfaDkaxGM9VC4Os,877
10
+ openadapt_ml/benchmarks/agent.py,sha256=8UcS9skCy6l18fGYaYt0JzJmYSGNB_WxDWhApbM7QH0,26940
11
+ openadapt_ml/benchmarks/azure.py,sha256=dCrxi90X5NmFNMTT-2WG4AF3-IOO4zQs7yPpnqR-jLc,28238
12
+ openadapt_ml/benchmarks/azure_ops_tracker.py,sha256=NOW21LPagOWIThSCIotI5cBvve92dtIktRIDLuyJ2CI,19309
13
+ openadapt_ml/benchmarks/cli.py,sha256=t4cIGN68GdphCX0AGkWJa_M6D4oUO_M0rfJDzD_POGA,62730
14
+ openadapt_ml/benchmarks/trace_export.py,sha256=Zx-pryEuLe734YHY8MgJsNdj3I3TcTY61OQ9iurgGB0,21746
15
+ openadapt_ml/benchmarks/viewer.py,sha256=Jztt_IoDW1u0WjPqlikfR8dunYzj66xCx0bMDDzJHQ8,41586
16
+ openadapt_ml/benchmarks/vm_monitor.py,sha256=FzmRrzqm0sZTcydfqMtRefBLfTr4fjoyWCxdHLovUj0,35733
17
+ openadapt_ml/benchmarks/waa_deploy/Dockerfile,sha256=F4GzVUoAUHvGlTFj-gGIPSlncG-JIz1_JyeaHvTnLpA,10853
18
+ openadapt_ml/benchmarks/waa_deploy/__init__.py,sha256=KV71HrrgETytfY0i4vFSi-yM0KjoQP2hd9Bl03cZ9yc,320
19
+ openadapt_ml/benchmarks/waa_deploy/api_agent.py,sha256=A5ZFhtBTKz0Q1GarNV51JhkEJwAgJfm9tK4CTJ1UEnE,20040
20
+ openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat,sha256=YxgrSWh76zLijlpxEpulWf9To2JtJ-yR42lr2WyTXiY,1496
21
+ openadapt_ml/cloud/__init__.py,sha256=XYrvxivJeZ8qYnuGod5kodMlm3iT2OK2GAApO3CNB0c,133
22
+ openadapt_ml/cloud/azure_inference.py,sha256=2EQ9fCGJA2hzH7mEcSXVK1U2mO4PwBrQHxUCsGCDJVM,15696
23
+ openadapt_ml/cloud/lambda_labs.py,sha256=NGjVHjpY2nO8F9dHHFi_CVjY1nAwk5kOiguOSSYZkCw,107872
24
+ openadapt_ml/cloud/local.py,sha256=s3grqrpTkcT73tIoDt-HJonoCRwx0zBL7M8vSNYL3nU,166047
25
+ openadapt_ml/cloud/ssh_tunnel.py,sha256=PTcyl9cDmbszTJbOD2MAj21AWXqoJt-2Iz26HAPdxRE,21740
26
+ openadapt_ml/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
+ openadapt_ml/datasets/next_action.py,sha256=2u1fRtOQzYdWZ6N_ebmOtPqWL9LnDun5STGuLmkcxrE,21252
28
+ openadapt_ml/evals/__init__.py,sha256=Kx7bSvPHwmoGVI3q1wS_lC17W2S32YHj0459JRqu6Ow,573
29
+ openadapt_ml/evals/grounding.py,sha256=tqjNWdinbj190vt_mPdc_w452z0AwgR81FDhkmKjDvs,8542
30
+ openadapt_ml/evals/plot_eval_metrics.py,sha256=GO-zhYpUJijb4Hp6fI-8lBR3TgjIDpCaOC4iQ-v9VO0,5259
31
+ openadapt_ml/evals/trajectory_matching.py,sha256=eiWPjMZAAWFmdDwQHVM689I98Kw5ensnAJrfEo_QZZ4,20764
32
+ openadapt_ml/experiments/demo_prompt/__init__.py,sha256=dwS0bI53jXMzHE-DPhb_mhmPdoqSZRIcNbV79wt8KPM,454
33
+ openadapt_ml/experiments/demo_prompt/format_demo.py,sha256=baXgqR-oJG9_hanlDPcPYKdMFLw2KSxJ5ERvl3FAwZ8,6691
34
+ openadapt_ml/experiments/demo_prompt/run_experiment.py,sha256=q_8k6WJeR50vrhS_jPmx_TkbPsk1OrTkwUrRkT_PqLM,16574
35
+ openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json,sha256=08oryOF126toTQDN9xciodavvfsaWNnXuBs0aULwpfI,5326
36
+ openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json,sha256=u03VgYTQia_HzilzNjxdGLpUSdbo4SzmHqI-GXlvurg,26915
37
+ openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json,sha256=FA1JgXXLor6on3lHlfJdNSuKzBca30ggH8IWSJEmmfA,11517
38
+ openadapt_ml/experiments/representation_shootout/__init__.py,sha256=flQ8VdNGZ-Nc5sDofSPpGh65O9Iytwk-9DsOVmTMcHE,1874
39
+ openadapt_ml/experiments/representation_shootout/conditions.py,sha256=77AUh2U1t1ZGTHNLoLUO0WDp_17hLp3uZMUkTb2JYow,22866
40
+ openadapt_ml/experiments/representation_shootout/config.py,sha256=df29SNCSPYXsUK2aYQhvRpb9DtmKtoGmCDlp1rWafZU,13043
41
+ openadapt_ml/experiments/representation_shootout/evaluator.py,sha256=ijza5ky-r0CVBfw-amyztX_122N5ZRZsy_rCzMWLELw,22947
42
+ openadapt_ml/experiments/representation_shootout/runner.py,sha256=gvZkq3Opl_6i5pjc-danV-Q7PLLBdwMmnrwVxQ5fNlI,23413
43
+ openadapt_ml/experiments/waa_demo/__init__.py,sha256=9M8iLxO9GWAw-FIB-0tzsqaweLcO5EVP1Sc5BoK16iU,363
44
+ openadapt_ml/experiments/waa_demo/demos.py,sha256=UwO0EYy8wUEggaBaI_cXuYe_jwSB1hx3ZtPf-z9bhjc,13796
45
+ openadapt_ml/experiments/waa_demo/runner.py,sha256=qe0iP6bvI65-FPpw6--yGZ83ASKKtTGlEL7EoO24MiM,24399
46
+ openadapt_ml/experiments/waa_demo/tasks.py,sha256=jw1QwbOt8xmWBW2lmBWcJzKBXssjv_e0j49MlC2rVJY,5425
47
+ openadapt_ml/export/__init__.py,sha256=mKehKHOio4jGcK-3r0-pb446GdKMPs0O9hAu4S0_R7s,266
48
+ openadapt_ml/export/__main__.py,sha256=0ObtWcdzf6p7gPwhNlCKpNm2FIhmusdYNkuk8tyt77U,149
49
+ openadapt_ml/export/cli.py,sha256=goTKNq9cOO9wsdNluLMH_-f9kdWShH3FPP8sCZ6KaPI,2331
50
+ openadapt_ml/export/parquet.py,sha256=oQfyRAhUUS-EpiOWa7S7s8HaHibhfTSoNPYfKb2HDrA,9897
51
+ openadapt_ml/grounding/__init__.py,sha256=uMvcALFRXmKD6PHhqLZ24Y6zhRUs46_PnWYqiqJP5cM,1412
52
+ openadapt_ml/grounding/base.py,sha256=mnjT25nxltZCD0VBzgIgj2kuCcB4sgXBN97MBaW5P6c,7688
53
+ openadapt_ml/grounding/detector.py,sha256=gu-clpHfHNsbeeVwuM54yxF23lAUPThOZStnOdq8-Es,19890
54
+ openadapt_ml/ingest/__init__.py,sha256=P1Z9-rEBZC8wviMlmu6Fgc-R_83Ku7scVDs5YRejMVE,1481
55
+ openadapt_ml/ingest/capture.py,sha256=hiUTbvGGF90KMUNof-z58azywNcHi-xJxQwHYIRb_4Q,10342
56
+ openadapt_ml/ingest/loader.py,sha256=T3gE4EP-SYXmpeOFCK-VCku1lvzfa6AbPJ5hMBDdAVc,9866
57
+ openadapt_ml/ingest/synthetic.py,sha256=ZX3eoMS08fpCFas07kYKUIMb7Bkp1zsaI9Bnuhis-I8,40482
58
+ openadapt_ml/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
+ openadapt_ml/models/api_adapter.py,sha256=G7S847hmYl6AQ8_4oTP5aMV4YhYBN_k-Sj3RVo1w9Bs,6679
60
+ openadapt_ml/models/base_adapter.py,sha256=vvasd-WhFxQNc2utqnXC_YMHABouXXNF4SwK9CUxpOc,2113
61
+ openadapt_ml/models/dummy_adapter.py,sha256=h4Zu-rjWgtG1r8jRtcsrX-FZm8iImrhrTQ7TsLfjE8A,1581
62
+ openadapt_ml/models/qwen_vl.py,sha256=0QcYqTraUMzT6W6No5aomeat9aUxpRqHai9aCX9-6dM,17652
63
+ openadapt_ml/models/providers/__init__.py,sha256=SBd4ZDSi3tYf7DBxTUOFw66Qr15idjuk45TM0bGAKOQ,8216
64
+ openadapt_ml/models/providers/anthropic.py,sha256=iUZceqn4yrD0s0FlMhgcENze4_AgJJ5u8du_1Ke7qy8,8348
65
+ openadapt_ml/models/providers/base.py,sha256=iGVEYSdzP9w3WRE7LM_vbG9ESXWKoJ5qSLx-ZB2ZcOw,8178
66
+ openadapt_ml/models/providers/google.py,sha256=OGXTaQwWonPy-3kLrBC8wLgIQytPtPDQiDkVqsUf70Y,12081
67
+ openadapt_ml/models/providers/openai.py,sha256=1LJJoWxkVNouowebs_N7iI4i8iSCHAjKvPOSAovC3p0,10487
68
+ openadapt_ml/perception/__init__.py,sha256=goR5qA_O7jvO8-gK5XPwib9TsqYfWIbljcmXao8mzRw,847
69
+ openadapt_ml/perception/integration.py,sha256=F9X4ysYn2RdFN6Wh3rXt5kl-cq0tf_6KdFViz2lAAnA,13296
70
+ openadapt_ml/retrieval/README.md,sha256=j4gXhTo6yH-5cuw4ER4174V-U6TQakOVT6Hj4kj7B0I,5696
71
+ openadapt_ml/retrieval/USAGE.md,sha256=XDIrX-94Z5nC-wvnBY5yF5gTqUYixxCC3wwUFvQx5YM,9278
72
+ openadapt_ml/retrieval/__init__.py,sha256=xocb84riKLUCezUioKssFRhAQsnvexh4W932o368_qg,2726
73
+ openadapt_ml/retrieval/demo_retriever.py,sha256=C4pLZ0HaJGkZ9H3_pQdeQcaQOOAU_YzGukx79WaFyZI,29493
74
+ openadapt_ml/retrieval/embeddings.py,sha256=B2tQq4VwN166H-P3s1kvOrhVlLvi4SAfXsMoxhXV8HE,19239
75
+ openadapt_ml/retrieval/index.py,sha256=UBFnSxp5T5eKt2txFcd0FytKCw1qxONZfxnFJVrduRQ,5710
76
+ openadapt_ml/retrieval/retriever.py,sha256=nDWeVLpfsHWWp2TE9dI_w56FebKI5bNXZPsh79BiQos,4609
77
+ openadapt_ml/runtime/__init__.py,sha256=Lpu29HgUvAVPW_dkRHRkk18BeMHlwZD-gQ3dZYNGzGo,1294
78
+ openadapt_ml/runtime/policy.py,sha256=RUB4AqObz-1FPpEZNQ-XUsmxro2RkgTJDAztzM2B_oE,6839
79
+ openadapt_ml/runtime/safety_gate.py,sha256=qODkdgx4HB2t-NL_iGPQbDZ_9hR2SZso_nTQ6KuMSfo,16729
80
+ openadapt_ml/schema/__init__.py,sha256=bL6Mi0z-qBV3cw-rfEaVP-sfzzZOvywO0H9IPWtSdN8,3097
81
+ openadapt_ml/schema/converters.py,sha256=ftXPNngd27PkALQyqukMsGpHdpB2sWBOlVz69rGyNkM,19157
82
+ openadapt_ml/schema/episode.py,sha256=2WFCdnIkcCRodvJiR6irmBaGhKzMB5XEQzS6iQJk2gY,15501
83
+ openadapt_ml/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
84
+ openadapt_ml/scripts/capture_screenshots.py,sha256=qVxMuIBuH8jT035tQqjfqLRm6OkLhwRvoGBooPGm09E,15283
85
+ openadapt_ml/scripts/compare.py,sha256=rPe_fQrHyINCbqkO9FXtjRsbQGwzlppEnmCK5YfGjgg,57316
86
+ openadapt_ml/scripts/demo_policy.py,sha256=luALfaJhPIh5aFHXdubSMCzPJk04dNzwfdwcCJeNhUk,2120
87
+ openadapt_ml/scripts/eval_policy.py,sha256=DHfQOIyO-mvrsA_zzlVEVwfsX1mLedFVsYvEpgITB5k,10397
88
+ openadapt_ml/scripts/make_gif.py,sha256=fg6jX2BwW4CIVLfWvq3WHjDE5H7tbRYnhZOHSIxhGWo,4433
89
+ openadapt_ml/scripts/prepare_synthetic.py,sha256=2luW436IejDDR2re73yUhtF5Zjf9McAqi2I0z4zs_CE,1123
90
+ openadapt_ml/scripts/run_qwen_login_benchmark.py,sha256=NWIhCAFSX5pYKFRCec7RkrYtzvz2LNMqhDfXcKxlagM,5655
91
+ openadapt_ml/scripts/train.py,sha256=1clpSt07fiMl1sMW57DO5Gco6LV8Oz2_SNGyiwRqcrQ,6759
92
+ openadapt_ml/segmentation/README.md,sha256=lBb3bkOh5Je2Ba_3MMuhB0QUeY44zLROA_S5JTbf7So,26055
93
+ openadapt_ml/segmentation/__init__.py,sha256=iodD7_Is5oLZm4oZttQd_CnLDofzRGbIhkCV1sgBjEU,2567
94
+ openadapt_ml/segmentation/annotator.py,sha256=d50yyxk3HTY1vP_-WXl5aLdmFk3EkdmoMiTqlkIsL78,21648
95
+ openadapt_ml/segmentation/cache.py,sha256=--1M4aoDdWOUYPBIfEnPdNkn9kfoECESs9JwNq5B_NQ,8696
96
+ openadapt_ml/segmentation/cli.py,sha256=L3YbxqTKAE797RYoZj5mxB9s50F7onAnGjbp4GbN-1M,24187
97
+ openadapt_ml/segmentation/deduplicator.py,sha256=aniwrp9IpcrMKfZh6Rx0Ihj_pu_4LepU_RT_eYiRGHI,22826
98
+ openadapt_ml/segmentation/frame_describer.py,sha256=LhgrdEsQ_tMQE7GiwECXWVY0WozEC6Z1Pr8CRTqtINI,26963
99
+ openadapt_ml/segmentation/pipeline.py,sha256=3ztfKt8xZ011xGL5rIIW6dmOyfJp7Af32XdgXuN-QYc,11890
100
+ openadapt_ml/segmentation/schemas.py,sha256=HKf5ImcXRpjM1hB6vJ825OJjUF5QDA5ITSJwcZklscM,19989
101
+ openadapt_ml/segmentation/segment_extractor.py,sha256=R2fI-sD5JsDE9Y3o4o9rpmldrq-TkRsmGrUJEg413vY,22213
102
+ openadapt_ml/segmentation/adapters/__init__.py,sha256=QOLlVvzjmZJP0fhtEZ4yXCvnJSNbWHZP8pV56Zs_5e4,171
103
+ openadapt_ml/segmentation/adapters/capture_adapter.py,sha256=8QDe9pyiRr3e2xuvvYowlNgBsz-NI3xepFL1ZFIAeAU,14353
104
+ openadapt_ml/training/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
+ openadapt_ml/training/azure_ops_viewer.py,sha256=B_Tlye1Z86xdTwc99nGtPqQzcJoDYOU4n7bQI0L-D0E,37679
106
+ openadapt_ml/training/benchmark_viewer.py,sha256=CVQa7c3rts_-a-xarwZ9EneR2i0G0n66f4RiK6wKcjg,174367
107
+ openadapt_ml/training/shared_ui.py,sha256=Ghjpx02k_HuZsyyOnZo6LD08ZIHD5ze7fU4centDMNY,4823
108
+ openadapt_ml/training/stub_provider.py,sha256=wyK4ApK88CCzgjZGl0jkNUrUfuSY-axA_XZMYzeeUpg,10932
109
+ openadapt_ml/training/trainer.py,sha256=yGK79alY9Z0xGRQ2r9EaiWbzGlmE5WZJQL_2TWgc8jU,91358
110
+ openadapt_ml/training/trl_trainer.py,sha256=AL1KFWXMub4vWE2w8eoAoQbSgm2fXO82CIqXULLYwVo,13223
111
+ openadapt_ml/training/viewer.py,sha256=rXpREFbDK_tsu719VUej6iXrgnB8eNP0SEuvB9NUUhA,128104
112
+ openadapt_ml/training/viewer_components.py,sha256=XilaX7r4YXFMT1QkooNnPWqR14SpsiTf7YbrN_g-Lq0,5478
113
+ openadapt_ml-0.2.1.dist-info/METADATA,sha256=4mdYMpWiRht3LWtCTXU7I1qTdjl70bKiLdnzpTKJaFI,36696
114
+ openadapt_ml-0.2.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
115
+ openadapt_ml-0.2.1.dist-info/licenses/LICENSE,sha256=2E5UY67RVLedJuNnwGudkAMtfM3LZNUcHgmaL89TAfw,1068
116
+ openadapt_ml-0.2.1.dist-info/RECORD,,
@@ -1,366 +0,0 @@
1
- """Base classes for benchmark integration.
2
-
3
- This module provides the core abstractions for integrating GUI agent benchmarks
4
- into openadapt-ml. It supports both interactive environments (WAA, OSWorld) and
5
- static trajectory datasets (Mind2Web).
6
-
7
- Example:
8
- from openadapt_ml.benchmarks import WAAAdapter, evaluate_agent_on_benchmark
9
-
10
- adapter = WAAAdapter(waa_repo_path="/path/to/WAA")
11
- results = evaluate_agent_on_benchmark(agent, adapter, max_steps=50)
12
- """
13
-
14
- from __future__ import annotations
15
-
16
- from abc import ABC, abstractmethod
17
- from dataclasses import dataclass, field
18
- from typing import TYPE_CHECKING, Any, Iterator
19
-
20
- if TYPE_CHECKING:
21
- pass
22
-
23
-
24
- @dataclass
25
- class BenchmarkTask:
26
- """Canonical task representation.
27
-
28
- Attributes:
29
- task_id: Unique identifier for the task.
30
- instruction: Natural language task instruction.
31
- domain: Task domain ("web", "desktop", "mobile").
32
- initial_state_ref: Reference to initial state (VM snapshot, URL, etc.).
33
- time_limit_steps: Maximum steps allowed for the task.
34
- raw_config: Original benchmark config (lossless preservation).
35
- evaluation_spec: Benchmark-native evaluation specification.
36
- """
37
-
38
- task_id: str
39
- instruction: str
40
- domain: str # "web", "desktop", "mobile"
41
-
42
- # Environment setup
43
- initial_state_ref: str | None = None # VM snapshot, storage_state, start URL
44
- time_limit_steps: int | None = None
45
-
46
- # Preserve original config losslessly
47
- raw_config: dict[str, Any] = field(default_factory=dict)
48
-
49
- # Evaluation spec (benchmark-native)
50
- evaluation_spec: dict[str, Any] | None = None
51
-
52
-
53
- @dataclass
54
- class BenchmarkObservation:
55
- """Canonical observation at each step.
56
-
57
- Supports multiple observation modalities:
58
- - Visual: screenshots with viewport info
59
- - Structured UI: accessibility tree (UIA/AXTree/DOM)
60
- - Context: URL, window title, focused element
61
-
62
- Attributes:
63
- screenshot: PNG image bytes.
64
- screenshot_path: Path to saved screenshot.
65
- viewport: (width, height) of the viewport.
66
- accessibility_tree: Platform-specific UI tree (UIA/AXTree/DOM).
67
- dom_html: Raw HTML for web tasks.
68
- url: Current URL for web tasks.
69
- window_title: Active window title for desktop tasks.
70
- focused_element: Currently focused UI element.
71
- raw_observation: Original benchmark observation (lossless).
72
- """
73
-
74
- # Visual
75
- screenshot: bytes | None = None # PNG image bytes
76
- screenshot_path: str | None = None
77
- viewport: tuple[int, int] | None = None # (width, height)
78
-
79
- # Structured UI (format varies by platform)
80
- accessibility_tree: dict | None = None # UIA (Windows), AXTree (macOS), DOM (web)
81
- dom_html: str | None = None # Raw HTML for web
82
-
83
- # Context
84
- url: str | None = None # For web tasks
85
- window_title: str | None = None # For desktop tasks
86
- app_name: str | None = None # Active application
87
- focused_element: dict | None = None # {node_id, bbox, text}
88
-
89
- # Raw benchmark-specific data (lossless)
90
- raw_observation: dict[str, Any] | None = None
91
-
92
-
93
- @dataclass
94
- class BenchmarkAction:
95
- """Canonical action representation.
96
-
97
- Supports multiple action types with both coordinate-based and element-based
98
- grounding. The "grounding-first" approach stores both when available.
99
-
100
- Attributes:
101
- type: Action type ("click", "type", "scroll", "key", "drag", "answer", "done").
102
- x: X coordinate (normalized [0,1] or pixels).
103
- y: Y coordinate (normalized [0,1] or pixels).
104
- target_node_id: Element ID from accessibility tree.
105
- target_bbox: Element bounding box.
106
- target_role: Element role (button, textfield, etc.).
107
- target_name: Element accessible name.
108
- text: Text to type (for "type" action).
109
- key: Single key (for "key" action, e.g., "Enter", "Tab").
110
- modifiers: Key modifiers (["ctrl", "shift", "alt"]).
111
- scroll_direction: Scroll direction ("up", "down", "left", "right").
112
- scroll_amount: Scroll amount (pixels or normalized).
113
- end_x: Drag end X coordinate.
114
- end_y: Drag end Y coordinate.
115
- answer: Answer string (for benchmarks that score by answer).
116
- raw_action: Original benchmark action (lossless).
117
- """
118
-
119
- type: str # "click", "type", "scroll", "key", "drag", "answer", "done"
120
-
121
- # Pointer actions - coordinates
122
- x: float | None = None # Normalized [0,1] or pixel
123
- y: float | None = None
124
-
125
- # Element grounding (when available)
126
- target_node_id: str | None = None # DOM/AX/UIA node ID
127
- target_bbox: tuple[float, float, float, float] | None = None
128
- target_role: str | None = None # "button", "textfield", etc.
129
- target_name: str | None = None # Accessible name
130
-
131
- # Keyboard actions
132
- text: str | None = None # For "type" action - text to type
133
- key: str | None = None # For "key" action - single key
134
- modifiers: list[str] | None = None # ["ctrl", "shift", "alt"]
135
-
136
- # Scroll actions
137
- scroll_direction: str | None = None # "up", "down", "left", "right"
138
- scroll_amount: float | None = None # Pixels or normalized
139
-
140
- # Drag actions
141
- end_x: float | None = None
142
- end_y: float | None = None
143
-
144
- # Answer action (some benchmarks score by final answer)
145
- answer: str | None = None
146
-
147
- # Raw benchmark-specific format (lossless)
148
- raw_action: dict[str, Any] | None = None
149
-
150
-
151
- @dataclass
152
- class BenchmarkResult:
153
- """Result of a single task evaluation.
154
-
155
- Attributes:
156
- task_id: ID of the evaluated task.
157
- success: Whether the task was completed successfully.
158
- score: Score between 0.0 and 1.0.
159
- steps: List of (observation, action) pairs from the trajectory.
160
- num_steps: Number of steps taken.
161
- error: Error message if task failed due to error.
162
- reason: Explanation of success/failure.
163
- total_time_seconds: Total time taken for the task.
164
- """
165
-
166
- task_id: str
167
- success: bool
168
- score: float # 0.0 to 1.0
169
-
170
- # Trajectory
171
- steps: list[tuple[BenchmarkObservation, BenchmarkAction]] = field(
172
- default_factory=list
173
- )
174
- num_steps: int = 0
175
-
176
- # Diagnostics
177
- error: str | None = None
178
- reason: str | None = None # Why success/fail
179
-
180
- # Timing
181
- total_time_seconds: float = 0.0
182
-
183
-
184
- @dataclass
185
- class UIElement:
186
- """Normalized UI element for cross-platform use.
187
-
188
- Provides a common representation for UI elements across platforms
189
- (Windows UIA, macOS AXTree, web DOM).
190
-
191
- Attributes:
192
- node_id: Unique identifier for the element.
193
- role: Element role (button, textfield, link, etc.).
194
- name: Accessible name/label.
195
- bbox: Bounding box (normalized [0,1] or pixels).
196
- text: Text content.
197
- value: Current value (for inputs).
198
- children: Child elements.
199
- attributes: Additional platform-specific attributes.
200
- """
201
-
202
- node_id: str
203
- role: str # "button", "textfield", "link", etc.
204
- name: str | None = None # Accessible name/label
205
- bbox: tuple[float, float, float, float] | None = None # (x1, y1, x2, y2)
206
- text: str | None = None # Text content
207
- value: str | None = None # Current value (for inputs)
208
- children: list[UIElement] | None = None
209
- attributes: dict[str, Any] | None = None # Platform-specific
210
-
211
-
212
- class BenchmarkAdapter(ABC):
213
- """Abstract interface for benchmark integration.
214
-
215
- Subclasses implement this interface to integrate specific benchmarks
216
- (WAA, OSWorld, WebArena, etc.) with openadapt-ml.
217
-
218
- Two types of adapters:
219
- - Interactive: Run environment, step through tasks (WAA, OSWorld)
220
- - Static: Load trajectories for offline training/eval (Mind2Web)
221
- """
222
-
223
- @property
224
- @abstractmethod
225
- def name(self) -> str:
226
- """Benchmark name (e.g., 'waa', 'osworld', 'webarena')."""
227
- pass
228
-
229
- @property
230
- @abstractmethod
231
- def benchmark_type(self) -> str:
232
- """Benchmark type: 'interactive' or 'static'."""
233
- pass
234
-
235
- @property
236
- def supports_parallel(self) -> bool:
237
- """Whether the adapter supports parallel task execution."""
238
- return False
239
-
240
- @abstractmethod
241
- def list_tasks(self, domain: str | None = None) -> list[BenchmarkTask]:
242
- """List available tasks, optionally filtered by domain.
243
-
244
- Args:
245
- domain: Optional domain filter (e.g., "browser", "office").
246
-
247
- Returns:
248
- List of BenchmarkTask objects.
249
- """
250
- pass
251
-
252
- @abstractmethod
253
- def load_task(self, task_id: str) -> BenchmarkTask:
254
- """Load a specific task by ID.
255
-
256
- Args:
257
- task_id: Task identifier.
258
-
259
- Returns:
260
- BenchmarkTask object.
261
-
262
- Raises:
263
- KeyError: If task_id not found.
264
- """
265
- pass
266
-
267
- @abstractmethod
268
- def reset(self, task: BenchmarkTask) -> BenchmarkObservation:
269
- """Reset environment to task's initial state.
270
-
271
- Args:
272
- task: Task to initialize.
273
-
274
- Returns:
275
- Initial observation.
276
- """
277
- pass
278
-
279
- @abstractmethod
280
- def step(
281
- self, action: BenchmarkAction
282
- ) -> tuple[BenchmarkObservation, bool, dict[str, Any]]:
283
- """Execute action and return new observation.
284
-
285
- Args:
286
- action: Action to execute.
287
-
288
- Returns:
289
- Tuple of (observation, done, info).
290
- """
291
- pass
292
-
293
- @abstractmethod
294
- def evaluate(self, task: BenchmarkTask) -> BenchmarkResult:
295
- """Run benchmark's native evaluation on current state.
296
-
297
- Args:
298
- task: Task to evaluate.
299
-
300
- Returns:
301
- BenchmarkResult with success/score.
302
- """
303
- pass
304
-
305
- def close(self) -> None:
306
- """Clean up resources (VMs, browser, etc.)."""
307
- pass
308
-
309
- def __enter__(self) -> BenchmarkAdapter:
310
- """Context manager entry."""
311
- return self
312
-
313
- def __exit__(self, exc_type, exc_val, exc_tb) -> None:
314
- """Context manager exit."""
315
- self.close()
316
-
317
-
318
- class StaticDatasetAdapter(BenchmarkAdapter):
319
- """Base for static trajectory datasets (Mind2Web, demos).
320
-
321
- Static adapters load pre-recorded trajectories for offline training
322
- or evaluation, rather than running an interactive environment.
323
- """
324
-
325
- @property
326
- def benchmark_type(self) -> str:
327
- """Static datasets are not interactive."""
328
- return "static"
329
-
330
- @abstractmethod
331
- def load_trajectories(
332
- self, split: str = "test"
333
- ) -> Iterator[tuple[BenchmarkTask, list[tuple[BenchmarkObservation, BenchmarkAction]]]]:
334
- """Iterate over expert trajectories.
335
-
336
- Args:
337
- split: Dataset split ("train", "val", "test").
338
-
339
- Yields:
340
- Tuples of (task, trajectory) where trajectory is a list of
341
- (observation, action) pairs.
342
- """
343
- pass
344
-
345
- def reset(self, task: BenchmarkTask) -> BenchmarkObservation:
346
- """Not supported for static datasets."""
347
- raise NotImplementedError(
348
- "Static datasets don't support interactive reset. "
349
- "Use load_trajectories() instead."
350
- )
351
-
352
- def step(
353
- self, action: BenchmarkAction
354
- ) -> tuple[BenchmarkObservation, bool, dict[str, Any]]:
355
- """Not supported for static datasets."""
356
- raise NotImplementedError(
357
- "Static datasets don't support interactive stepping. "
358
- "Use load_trajectories() instead."
359
- )
360
-
361
- def evaluate(self, task: BenchmarkTask) -> BenchmarkResult:
362
- """Not supported for static datasets."""
363
- raise NotImplementedError(
364
- "Static datasets don't support execution-based evaluation. "
365
- "Use offline metrics instead."
366
- )