openadapt-ml 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. openadapt_ml/baselines/__init__.py +121 -0
  2. openadapt_ml/baselines/adapter.py +185 -0
  3. openadapt_ml/baselines/cli.py +314 -0
  4. openadapt_ml/baselines/config.py +448 -0
  5. openadapt_ml/baselines/parser.py +922 -0
  6. openadapt_ml/baselines/prompts.py +787 -0
  7. openadapt_ml/benchmarks/__init__.py +13 -107
  8. openadapt_ml/benchmarks/agent.py +297 -374
  9. openadapt_ml/benchmarks/azure.py +62 -24
  10. openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
  11. openadapt_ml/benchmarks/cli.py +1874 -751
  12. openadapt_ml/benchmarks/trace_export.py +631 -0
  13. openadapt_ml/benchmarks/viewer.py +1236 -0
  14. openadapt_ml/benchmarks/vm_monitor.py +1111 -0
  15. openadapt_ml/benchmarks/waa_deploy/Dockerfile +216 -0
  16. openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
  17. openadapt_ml/benchmarks/waa_deploy/api_agent.py +540 -0
  18. openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
  19. openadapt_ml/cloud/azure_inference.py +3 -5
  20. openadapt_ml/cloud/lambda_labs.py +722 -307
  21. openadapt_ml/cloud/local.py +3194 -89
  22. openadapt_ml/cloud/ssh_tunnel.py +595 -0
  23. openadapt_ml/datasets/next_action.py +125 -96
  24. openadapt_ml/evals/grounding.py +32 -9
  25. openadapt_ml/evals/plot_eval_metrics.py +15 -13
  26. openadapt_ml/evals/trajectory_matching.py +120 -57
  27. openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
  28. openadapt_ml/experiments/demo_prompt/format_demo.py +236 -0
  29. openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
  30. openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
  31. openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
  32. openadapt_ml/experiments/demo_prompt/run_experiment.py +541 -0
  33. openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
  34. openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
  35. openadapt_ml/experiments/representation_shootout/config.py +390 -0
  36. openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
  37. openadapt_ml/experiments/representation_shootout/runner.py +687 -0
  38. openadapt_ml/experiments/waa_demo/__init__.py +10 -0
  39. openadapt_ml/experiments/waa_demo/demos.py +357 -0
  40. openadapt_ml/experiments/waa_demo/runner.py +732 -0
  41. openadapt_ml/experiments/waa_demo/tasks.py +151 -0
  42. openadapt_ml/export/__init__.py +9 -0
  43. openadapt_ml/export/__main__.py +6 -0
  44. openadapt_ml/export/cli.py +89 -0
  45. openadapt_ml/export/parquet.py +277 -0
  46. openadapt_ml/grounding/detector.py +18 -14
  47. openadapt_ml/ingest/__init__.py +11 -10
  48. openadapt_ml/ingest/capture.py +97 -86
  49. openadapt_ml/ingest/loader.py +120 -69
  50. openadapt_ml/ingest/synthetic.py +344 -193
  51. openadapt_ml/models/api_adapter.py +14 -4
  52. openadapt_ml/models/base_adapter.py +10 -2
  53. openadapt_ml/models/providers/__init__.py +288 -0
  54. openadapt_ml/models/providers/anthropic.py +266 -0
  55. openadapt_ml/models/providers/base.py +299 -0
  56. openadapt_ml/models/providers/google.py +376 -0
  57. openadapt_ml/models/providers/openai.py +342 -0
  58. openadapt_ml/models/qwen_vl.py +46 -19
  59. openadapt_ml/perception/__init__.py +35 -0
  60. openadapt_ml/perception/integration.py +399 -0
  61. openadapt_ml/retrieval/README.md +226 -0
  62. openadapt_ml/retrieval/USAGE.md +391 -0
  63. openadapt_ml/retrieval/__init__.py +91 -0
  64. openadapt_ml/retrieval/demo_retriever.py +843 -0
  65. openadapt_ml/retrieval/embeddings.py +630 -0
  66. openadapt_ml/retrieval/index.py +194 -0
  67. openadapt_ml/retrieval/retriever.py +162 -0
  68. openadapt_ml/runtime/__init__.py +50 -0
  69. openadapt_ml/runtime/policy.py +27 -14
  70. openadapt_ml/runtime/safety_gate.py +471 -0
  71. openadapt_ml/schema/__init__.py +113 -0
  72. openadapt_ml/schema/converters.py +588 -0
  73. openadapt_ml/schema/episode.py +470 -0
  74. openadapt_ml/scripts/capture_screenshots.py +530 -0
  75. openadapt_ml/scripts/compare.py +102 -61
  76. openadapt_ml/scripts/demo_policy.py +4 -1
  77. openadapt_ml/scripts/eval_policy.py +19 -14
  78. openadapt_ml/scripts/make_gif.py +1 -1
  79. openadapt_ml/scripts/prepare_synthetic.py +16 -17
  80. openadapt_ml/scripts/train.py +98 -75
  81. openadapt_ml/segmentation/README.md +920 -0
  82. openadapt_ml/segmentation/__init__.py +97 -0
  83. openadapt_ml/segmentation/adapters/__init__.py +5 -0
  84. openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
  85. openadapt_ml/segmentation/annotator.py +610 -0
  86. openadapt_ml/segmentation/cache.py +290 -0
  87. openadapt_ml/segmentation/cli.py +674 -0
  88. openadapt_ml/segmentation/deduplicator.py +656 -0
  89. openadapt_ml/segmentation/frame_describer.py +788 -0
  90. openadapt_ml/segmentation/pipeline.py +340 -0
  91. openadapt_ml/segmentation/schemas.py +622 -0
  92. openadapt_ml/segmentation/segment_extractor.py +634 -0
  93. openadapt_ml/training/azure_ops_viewer.py +1097 -0
  94. openadapt_ml/training/benchmark_viewer.py +3255 -19
  95. openadapt_ml/training/shared_ui.py +7 -7
  96. openadapt_ml/training/stub_provider.py +57 -35
  97. openadapt_ml/training/trainer.py +255 -441
  98. openadapt_ml/training/trl_trainer.py +403 -0
  99. openadapt_ml/training/viewer.py +323 -108
  100. openadapt_ml/training/viewer_components.py +180 -0
  101. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +312 -69
  102. openadapt_ml-0.2.1.dist-info/RECORD +116 -0
  103. openadapt_ml/benchmarks/base.py +0 -366
  104. openadapt_ml/benchmarks/data_collection.py +0 -432
  105. openadapt_ml/benchmarks/runner.py +0 -381
  106. openadapt_ml/benchmarks/waa.py +0 -704
  107. openadapt_ml/schemas/__init__.py +0 -53
  108. openadapt_ml/schemas/sessions.py +0 -122
  109. openadapt_ml/schemas/validation.py +0 -252
  110. openadapt_ml-0.1.0.dist-info/RECORD +0 -55
  111. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
  112. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,125 +1,31 @@
1
1
  """Benchmark integration for openadapt-ml.
2
2
 
3
- This module provides interfaces and utilities for evaluating GUI agents
4
- on standardized benchmarks like Windows Agent Arena (WAA), OSWorld,
5
- WebArena, and others.
3
+ This module provides ML-specific agents for benchmark evaluation.
4
+ These agents wrap openadapt-ml internals (trained policies, API adapters).
6
5
 
7
- Core classes:
8
- - BenchmarkAdapter: Abstract interface for benchmark integration
9
- - BenchmarkAgent: Abstract interface for agents to be evaluated
10
- - BenchmarkTask, BenchmarkObservation, BenchmarkAction: Data classes
11
-
12
- Agent implementations:
13
- - PolicyAgent: Wraps openadapt-ml AgentPolicy
14
- - APIBenchmarkAgent: Uses hosted VLM APIs (Claude, GPT-5.1)
15
- - ScriptedAgent: Follows predefined action sequence
16
- - RandomAgent: Takes random actions (baseline)
17
-
18
- Evaluation:
19
- - evaluate_agent_on_benchmark: Run agent on benchmark tasks
20
- - compute_metrics: Compute aggregate metrics from results
21
-
22
- Example:
6
+ For benchmark infrastructure (adapters, runners, viewers), use openadapt-evals:
23
7
  ```python
24
- from openadapt_ml.benchmarks import (
25
- BenchmarkAdapter,
26
- PolicyAgent,
27
- APIBenchmarkAgent,
8
+ from openadapt_evals import (
9
+ WAAMockAdapter,
10
+ WAALiveAdapter,
28
11
  evaluate_agent_on_benchmark,
29
- compute_metrics,
30
12
  )
31
-
32
- # Create adapter for specific benchmark
33
- adapter = WAAAdapter(waa_repo_path="/path/to/WAA")
34
-
35
- # Wrap policy as benchmark agent
36
- agent = PolicyAgent(policy)
37
-
38
- # Or use API-backed agent for baselines
39
- agent = APIBenchmarkAgent(provider="anthropic") # Claude
40
- agent = APIBenchmarkAgent(provider="openai") # GPT-5.1
41
-
42
- # Run evaluation
43
- results = evaluate_agent_on_benchmark(agent, adapter, max_steps=50)
44
-
45
- # Compute metrics
46
- metrics = compute_metrics(results)
47
- print(f"Success rate: {metrics['success_rate']:.1%}")
48
13
  ```
14
+
15
+ ML-specific agents (only available in openadapt-ml):
16
+ - PolicyAgent: Wraps openadapt_ml.runtime.policy.AgentPolicy
17
+ - APIBenchmarkAgent: Uses openadapt_ml.models.api_adapter.ApiVLMAdapter
18
+ - UnifiedBaselineAgent: Uses openadapt_ml.baselines adapters
49
19
  """
50
20
 
51
21
  from openadapt_ml.benchmarks.agent import (
52
22
  APIBenchmarkAgent,
53
- BenchmarkAgent,
54
23
  PolicyAgent,
55
- RandomAgent,
56
- ScriptedAgent,
57
- )
58
- from openadapt_ml.benchmarks.base import (
59
- BenchmarkAction,
60
- BenchmarkAdapter,
61
- BenchmarkObservation,
62
- BenchmarkResult,
63
- BenchmarkTask,
64
- StaticDatasetAdapter,
65
- UIElement,
24
+ UnifiedBaselineAgent,
66
25
  )
67
- from openadapt_ml.benchmarks.runner import (
68
- EvaluationConfig,
69
- compute_domain_metrics,
70
- compute_metrics,
71
- evaluate_agent_on_benchmark,
72
- )
73
- from openadapt_ml.benchmarks.waa import WAAAdapter, WAAConfig, WAAMockAdapter
74
-
75
- # Azure orchestration (lazy import to avoid requiring azure-ai-ml)
76
- def _get_azure_classes():
77
- from openadapt_ml.benchmarks.azure import (
78
- AzureConfig,
79
- AzureWAAOrchestrator,
80
- estimate_cost,
81
- )
82
- return AzureConfig, AzureWAAOrchestrator, estimate_cost
83
-
84
26
 
85
27
  __all__ = [
86
- # Base classes
87
- "BenchmarkAdapter",
88
- "BenchmarkTask",
89
- "BenchmarkObservation",
90
- "BenchmarkAction",
91
- "BenchmarkResult",
92
- "StaticDatasetAdapter",
93
- "UIElement",
94
- # Agents
95
- "BenchmarkAgent",
96
28
  "PolicyAgent",
97
29
  "APIBenchmarkAgent",
98
- "ScriptedAgent",
99
- "RandomAgent",
100
- # Evaluation
101
- "EvaluationConfig",
102
- "evaluate_agent_on_benchmark",
103
- "compute_metrics",
104
- "compute_domain_metrics",
105
- # WAA
106
- "WAAAdapter",
107
- "WAAConfig",
108
- "WAAMockAdapter",
109
- # Azure (lazy-loaded)
110
- "AzureConfig",
111
- "AzureWAAOrchestrator",
112
- "estimate_cost",
30
+ "UnifiedBaselineAgent",
113
31
  ]
114
-
115
-
116
- # Lazy loading for Azure classes (avoids requiring azure-ai-ml for basic usage)
117
- def __getattr__(name: str):
118
- if name in ("AzureConfig", "AzureWAAOrchestrator", "estimate_cost"):
119
- from openadapt_ml.benchmarks.azure import (
120
- AzureConfig,
121
- AzureWAAOrchestrator,
122
- estimate_cost,
123
- )
124
- return {"AzureConfig": AzureConfig, "AzureWAAOrchestrator": AzureWAAOrchestrator, "estimate_cost": estimate_cost}[name]
125
- raise AttributeError(f"module {__name__!r} has no attribute {name!r}")