openadapt-ml 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. openadapt_ml/__init__.py +0 -0
  2. openadapt_ml/benchmarks/__init__.py +125 -0
  3. openadapt_ml/benchmarks/agent.py +825 -0
  4. openadapt_ml/benchmarks/azure.py +761 -0
  5. openadapt_ml/benchmarks/base.py +366 -0
  6. openadapt_ml/benchmarks/cli.py +884 -0
  7. openadapt_ml/benchmarks/data_collection.py +432 -0
  8. openadapt_ml/benchmarks/runner.py +381 -0
  9. openadapt_ml/benchmarks/waa.py +704 -0
  10. openadapt_ml/cloud/__init__.py +5 -0
  11. openadapt_ml/cloud/azure_inference.py +441 -0
  12. openadapt_ml/cloud/lambda_labs.py +2445 -0
  13. openadapt_ml/cloud/local.py +790 -0
  14. openadapt_ml/config.py +56 -0
  15. openadapt_ml/datasets/__init__.py +0 -0
  16. openadapt_ml/datasets/next_action.py +507 -0
  17. openadapt_ml/evals/__init__.py +23 -0
  18. openadapt_ml/evals/grounding.py +241 -0
  19. openadapt_ml/evals/plot_eval_metrics.py +174 -0
  20. openadapt_ml/evals/trajectory_matching.py +486 -0
  21. openadapt_ml/grounding/__init__.py +45 -0
  22. openadapt_ml/grounding/base.py +236 -0
  23. openadapt_ml/grounding/detector.py +570 -0
  24. openadapt_ml/ingest/__init__.py +43 -0
  25. openadapt_ml/ingest/capture.py +312 -0
  26. openadapt_ml/ingest/loader.py +232 -0
  27. openadapt_ml/ingest/synthetic.py +1102 -0
  28. openadapt_ml/models/__init__.py +0 -0
  29. openadapt_ml/models/api_adapter.py +171 -0
  30. openadapt_ml/models/base_adapter.py +59 -0
  31. openadapt_ml/models/dummy_adapter.py +42 -0
  32. openadapt_ml/models/qwen_vl.py +426 -0
  33. openadapt_ml/runtime/__init__.py +0 -0
  34. openadapt_ml/runtime/policy.py +182 -0
  35. openadapt_ml/schemas/__init__.py +53 -0
  36. openadapt_ml/schemas/sessions.py +122 -0
  37. openadapt_ml/schemas/validation.py +252 -0
  38. openadapt_ml/scripts/__init__.py +0 -0
  39. openadapt_ml/scripts/compare.py +1490 -0
  40. openadapt_ml/scripts/demo_policy.py +62 -0
  41. openadapt_ml/scripts/eval_policy.py +287 -0
  42. openadapt_ml/scripts/make_gif.py +153 -0
  43. openadapt_ml/scripts/prepare_synthetic.py +43 -0
  44. openadapt_ml/scripts/run_qwen_login_benchmark.py +192 -0
  45. openadapt_ml/scripts/train.py +174 -0
  46. openadapt_ml/training/__init__.py +0 -0
  47. openadapt_ml/training/benchmark_viewer.py +1538 -0
  48. openadapt_ml/training/shared_ui.py +157 -0
  49. openadapt_ml/training/stub_provider.py +276 -0
  50. openadapt_ml/training/trainer.py +2446 -0
  51. openadapt_ml/training/viewer.py +2970 -0
  52. openadapt_ml-0.1.0.dist-info/METADATA +818 -0
  53. openadapt_ml-0.1.0.dist-info/RECORD +55 -0
  54. openadapt_ml-0.1.0.dist-info/WHEEL +4 -0
  55. openadapt_ml-0.1.0.dist-info/licenses/LICENSE +21 -0
File without changes
@@ -0,0 +1,125 @@
1
+ """Benchmark integration for openadapt-ml.
2
+
3
+ This module provides interfaces and utilities for evaluating GUI agents
4
+ on standardized benchmarks like Windows Agent Arena (WAA), OSWorld,
5
+ WebArena, and others.
6
+
7
+ Core classes:
8
+ - BenchmarkAdapter: Abstract interface for benchmark integration
9
+ - BenchmarkAgent: Abstract interface for agents to be evaluated
10
+ - BenchmarkTask, BenchmarkObservation, BenchmarkAction: Data classes
11
+
12
+ Agent implementations:
13
+ - PolicyAgent: Wraps openadapt-ml AgentPolicy
14
+ - APIBenchmarkAgent: Uses hosted VLM APIs (Claude, GPT-5.1)
15
+ - ScriptedAgent: Follows predefined action sequence
16
+ - RandomAgent: Takes random actions (baseline)
17
+
18
+ Evaluation:
19
+ - evaluate_agent_on_benchmark: Run agent on benchmark tasks
20
+ - compute_metrics: Compute aggregate metrics from results
21
+
22
+ Example:
23
+ ```python
24
+ from openadapt_ml.benchmarks import (
25
+ BenchmarkAdapter,
26
+ PolicyAgent,
27
+ APIBenchmarkAgent,
28
+ evaluate_agent_on_benchmark,
29
+ compute_metrics,
30
+ )
31
+
32
+ # Create adapter for specific benchmark
33
+ adapter = WAAAdapter(waa_repo_path="/path/to/WAA")
34
+
35
+ # Wrap policy as benchmark agent
36
+ agent = PolicyAgent(policy)
37
+
38
+ # Or use API-backed agent for baselines
39
+ agent = APIBenchmarkAgent(provider="anthropic") # Claude
40
+ agent = APIBenchmarkAgent(provider="openai") # GPT-5.1
41
+
42
+ # Run evaluation
43
+ results = evaluate_agent_on_benchmark(agent, adapter, max_steps=50)
44
+
45
+ # Compute metrics
46
+ metrics = compute_metrics(results)
47
+ print(f"Success rate: {metrics['success_rate']:.1%}")
48
+ ```
49
+ """
50
+
51
+ from openadapt_ml.benchmarks.agent import (
52
+ APIBenchmarkAgent,
53
+ BenchmarkAgent,
54
+ PolicyAgent,
55
+ RandomAgent,
56
+ ScriptedAgent,
57
+ )
58
+ from openadapt_ml.benchmarks.base import (
59
+ BenchmarkAction,
60
+ BenchmarkAdapter,
61
+ BenchmarkObservation,
62
+ BenchmarkResult,
63
+ BenchmarkTask,
64
+ StaticDatasetAdapter,
65
+ UIElement,
66
+ )
67
+ from openadapt_ml.benchmarks.runner import (
68
+ EvaluationConfig,
69
+ compute_domain_metrics,
70
+ compute_metrics,
71
+ evaluate_agent_on_benchmark,
72
+ )
73
+ from openadapt_ml.benchmarks.waa import WAAAdapter, WAAConfig, WAAMockAdapter
74
+
75
+ # Azure orchestration (lazy import to avoid requiring azure-ai-ml)
76
+ def _get_azure_classes():
77
+ from openadapt_ml.benchmarks.azure import (
78
+ AzureConfig,
79
+ AzureWAAOrchestrator,
80
+ estimate_cost,
81
+ )
82
+ return AzureConfig, AzureWAAOrchestrator, estimate_cost
83
+
84
+
85
+ __all__ = [
86
+ # Base classes
87
+ "BenchmarkAdapter",
88
+ "BenchmarkTask",
89
+ "BenchmarkObservation",
90
+ "BenchmarkAction",
91
+ "BenchmarkResult",
92
+ "StaticDatasetAdapter",
93
+ "UIElement",
94
+ # Agents
95
+ "BenchmarkAgent",
96
+ "PolicyAgent",
97
+ "APIBenchmarkAgent",
98
+ "ScriptedAgent",
99
+ "RandomAgent",
100
+ # Evaluation
101
+ "EvaluationConfig",
102
+ "evaluate_agent_on_benchmark",
103
+ "compute_metrics",
104
+ "compute_domain_metrics",
105
+ # WAA
106
+ "WAAAdapter",
107
+ "WAAConfig",
108
+ "WAAMockAdapter",
109
+ # Azure (lazy-loaded)
110
+ "AzureConfig",
111
+ "AzureWAAOrchestrator",
112
+ "estimate_cost",
113
+ ]
114
+
115
+
116
+ # Lazy loading for Azure classes (avoids requiring azure-ai-ml for basic usage)
117
+ def __getattr__(name: str):
118
+ if name in ("AzureConfig", "AzureWAAOrchestrator", "estimate_cost"):
119
+ from openadapt_ml.benchmarks.azure import (
120
+ AzureConfig,
121
+ AzureWAAOrchestrator,
122
+ estimate_cost,
123
+ )
124
+ return {"AzureConfig": AzureConfig, "AzureWAAOrchestrator": AzureWAAOrchestrator, "estimate_cost": estimate_cost}[name]
125
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")