openadapt-ml 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openadapt_ml/benchmarks/__init__.py +8 -0
- openadapt_ml/benchmarks/agent.py +90 -11
- openadapt_ml/benchmarks/azure.py +35 -6
- openadapt_ml/benchmarks/cli.py +4449 -201
- openadapt_ml/benchmarks/live_tracker.py +180 -0
- openadapt_ml/benchmarks/runner.py +41 -4
- openadapt_ml/benchmarks/viewer.py +1219 -0
- openadapt_ml/benchmarks/vm_monitor.py +610 -0
- openadapt_ml/benchmarks/waa.py +61 -4
- openadapt_ml/benchmarks/waa_deploy/Dockerfile +222 -0
- openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
- openadapt_ml/benchmarks/waa_deploy/api_agent.py +539 -0
- openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
- openadapt_ml/benchmarks/waa_live.py +619 -0
- openadapt_ml/cloud/local.py +1555 -1
- openadapt_ml/cloud/ssh_tunnel.py +553 -0
- openadapt_ml/datasets/next_action.py +87 -68
- openadapt_ml/evals/grounding.py +26 -8
- openadapt_ml/evals/trajectory_matching.py +84 -36
- openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
- openadapt_ml/experiments/demo_prompt/format_demo.py +226 -0
- openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
- openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
- openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
- openadapt_ml/experiments/demo_prompt/run_experiment.py +531 -0
- openadapt_ml/experiments/waa_demo/__init__.py +10 -0
- openadapt_ml/experiments/waa_demo/demos.py +357 -0
- openadapt_ml/experiments/waa_demo/runner.py +717 -0
- openadapt_ml/experiments/waa_demo/tasks.py +151 -0
- openadapt_ml/export/__init__.py +9 -0
- openadapt_ml/export/__main__.py +6 -0
- openadapt_ml/export/cli.py +89 -0
- openadapt_ml/export/parquet.py +265 -0
- openadapt_ml/ingest/__init__.py +3 -4
- openadapt_ml/ingest/capture.py +89 -81
- openadapt_ml/ingest/loader.py +116 -68
- openadapt_ml/ingest/synthetic.py +221 -159
- openadapt_ml/retrieval/README.md +226 -0
- openadapt_ml/retrieval/USAGE.md +391 -0
- openadapt_ml/retrieval/__init__.py +91 -0
- openadapt_ml/retrieval/demo_retriever.py +817 -0
- openadapt_ml/retrieval/embeddings.py +629 -0
- openadapt_ml/retrieval/index.py +194 -0
- openadapt_ml/retrieval/retriever.py +160 -0
- openadapt_ml/runtime/policy.py +10 -10
- openadapt_ml/schema/__init__.py +104 -0
- openadapt_ml/schema/converters.py +541 -0
- openadapt_ml/schema/episode.py +457 -0
- openadapt_ml/scripts/compare.py +26 -16
- openadapt_ml/scripts/eval_policy.py +4 -5
- openadapt_ml/scripts/prepare_synthetic.py +14 -17
- openadapt_ml/scripts/train.py +81 -70
- openadapt_ml/training/benchmark_viewer.py +3225 -0
- openadapt_ml/training/trainer.py +120 -363
- openadapt_ml/training/trl_trainer.py +354 -0
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/METADATA +102 -60
- openadapt_ml-0.2.0.dist-info/RECORD +86 -0
- openadapt_ml/schemas/__init__.py +0 -53
- openadapt_ml/schemas/sessions.py +0 -122
- openadapt_ml/schemas/validation.py +0 -252
- openadapt_ml-0.1.0.dist-info/RECORD +0 -55
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/WHEEL +0 -0
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: openadapt-ml
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Model-agnostic, domain-agnostic ML engine for GUI automation agents
|
|
5
5
|
Project-URL: Homepage, https://github.com/OpenAdaptAI/openadapt-ml
|
|
6
6
|
Project-URL: Repository, https://github.com/OpenAdaptAI/openadapt-ml
|
|
@@ -43,15 +43,29 @@ Requires-Dist: pytest>=9.0.0; extra == 'dev'
|
|
|
43
43
|
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
44
44
|
Provides-Extra: lambda-labs
|
|
45
45
|
Requires-Dist: requests>=2.28.0; extra == 'lambda-labs'
|
|
46
|
+
Provides-Extra: parquet
|
|
47
|
+
Requires-Dist: pyarrow>=14.0.0; extra == 'parquet'
|
|
48
|
+
Provides-Extra: training
|
|
49
|
+
Requires-Dist: datasets>=2.18.0; extra == 'training'
|
|
50
|
+
Requires-Dist: trl>=0.12.0; extra == 'training'
|
|
46
51
|
Description-Content-Type: text/markdown
|
|
47
52
|
|
|
48
53
|
# OpenAdapt-ML
|
|
49
54
|
|
|
55
|
+
[](https://badge.fury.io/py/openadapt-ml)
|
|
50
56
|
[](https://opensource.org/licenses/MIT)
|
|
51
57
|
[](https://www.python.org/)
|
|
52
58
|
|
|
53
59
|
OpenAdapt-ML is a **model-agnostic, domain-agnostic ML engine** for GUI
|
|
54
|
-
automation agents.
|
|
60
|
+
automation agents. It sits above **TRL + Unsloth** (which we use directly for training performance) and provides the GUI-specific layer:
|
|
61
|
+
|
|
62
|
+
- **Episode semantics**: Step/action/observation alignment, screenshot-action coupling, termination handling
|
|
63
|
+
- **Demo-conditioned inference**: Retrieval-augmented prompting (validated: 33% → 100% first-action accuracy)
|
|
64
|
+
- **Benchmark adapters**: WAA today, OSWorld/WebArena planned
|
|
65
|
+
- **VLM adapters**: Updated with leading GUI-agent SOTA open-source models
|
|
66
|
+
- **Training pipeline**: TRL + Unsloth integration for 2x faster training with 50% less VRAM
|
|
67
|
+
|
|
68
|
+
OpenAdapt-ML is **not** a training framework, optimizer, hardware orchestrator, or experiment manager. We use TRL/Unsloth, Lambda Labs/Azure, and W&B/MLflow for those.
|
|
55
69
|
|
|
56
70
|
It provides:
|
|
57
71
|
|
|
@@ -59,24 +73,41 @@ It provides:
|
|
|
59
73
|
- **Synthetic semantic UI generation** for bootstrapping datasets.
|
|
60
74
|
- **Dataset builders** that turn episodes into next-action SFT samples.
|
|
61
75
|
- **VLM adapters** (Qwen3-VL, Qwen2.5-VL) using Hugging Face + PEFT.
|
|
62
|
-
-
|
|
76
|
+
- **SFT training via TRL** with Unsloth optimizations for efficient fine-tuning.
|
|
63
77
|
- A simple **runtime policy** API that predicts the next GUI action.
|
|
64
78
|
|
|
65
79
|
The design is described in detail in [`docs/design.md`](docs/design.md).
|
|
66
80
|
|
|
67
81
|
---
|
|
68
82
|
|
|
69
|
-
## 1.
|
|
83
|
+
## 1. Installation
|
|
70
84
|
|
|
71
|
-
### 1.1
|
|
85
|
+
### 1.1 From PyPI (recommended)
|
|
72
86
|
|
|
73
|
-
|
|
87
|
+
```bash
|
|
88
|
+
# Install the package
|
|
89
|
+
uv add openadapt-ml
|
|
90
|
+
|
|
91
|
+
# For training with TRL (recommended for fine-tuning)
|
|
92
|
+
uv add openadapt-ml[training]
|
|
93
|
+
|
|
94
|
+
# For API-backed VLMs (Claude, GPT)
|
|
95
|
+
uv add openadapt-ml[api]
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### 1.2 From source (development)
|
|
74
99
|
|
|
75
100
|
```bash
|
|
101
|
+
git clone https://github.com/OpenAdaptAI/openadapt-ml.git
|
|
102
|
+
cd openadapt-ml
|
|
76
103
|
uv sync
|
|
77
104
|
```
|
|
78
105
|
|
|
79
|
-
|
|
106
|
+
---
|
|
107
|
+
|
|
108
|
+
## 2. Quickstart
|
|
109
|
+
|
|
110
|
+
### 2.1 Run a small demo policy
|
|
80
111
|
|
|
81
112
|
Run a fast, model-free smoke test:
|
|
82
113
|
|
|
@@ -84,7 +115,7 @@ Run a fast, model-free smoke test:
|
|
|
84
115
|
uv run python -m openadapt_ml.scripts.demo_policy --backend dummy
|
|
85
116
|
```
|
|
86
117
|
|
|
87
|
-
###
|
|
118
|
+
### 2.2 Run the synthetic login benchmark (end-to-end)
|
|
88
119
|
|
|
89
120
|
On a machine with a suitable GPU, you can reproduce the Qwen3-VL synthetic
|
|
90
121
|
login benchmark (train → eval base/FT → plot) with a single command:
|
|
@@ -138,7 +169,7 @@ For complete documentation including training setup, evaluation metrics, SoM mod
|
|
|
138
169
|
|
|
139
170
|
---
|
|
140
171
|
|
|
141
|
-
##
|
|
172
|
+
## 3. Repository Structure
|
|
142
173
|
|
|
143
174
|
Key modules:
|
|
144
175
|
|
|
@@ -159,9 +190,9 @@ Key modules:
|
|
|
159
190
|
- `openadapt_ml/models/dummy_adapter.py`
|
|
160
191
|
- Tiny fake adapter used to validate training and runtime flows without
|
|
161
192
|
loading a real VLM.
|
|
162
|
-
- `openadapt_ml/training/
|
|
163
|
-
-
|
|
164
|
-
|
|
193
|
+
- `openadapt_ml/training/trl_trainer.py`
|
|
194
|
+
- TRL-based SFT training (`train_with_trl`) with Unsloth optimizations
|
|
195
|
+
for 2x faster training and 50% less VRAM.
|
|
165
196
|
- `openadapt_ml/runtime/policy.py`
|
|
166
197
|
- `AgentPolicy` that formats inputs for a VLM and parses textual actions
|
|
167
198
|
like `CLICK(x=..., y=...)` and `DONE()` into structured `Action`s.
|
|
@@ -184,12 +215,12 @@ Configs and docs:
|
|
|
184
215
|
|
|
185
216
|
---
|
|
186
217
|
|
|
187
|
-
##
|
|
218
|
+
## 4. Environment Setup
|
|
188
219
|
|
|
189
220
|
OpenAdapt-ML targets **Python 3.12** and uses [`uv`](https://github.com/astral-sh/uv)
|
|
190
221
|
for dependency management.
|
|
191
222
|
|
|
192
|
-
###
|
|
223
|
+
### 4.1 Install and sync
|
|
193
224
|
|
|
194
225
|
From the repository root:
|
|
195
226
|
|
|
@@ -202,7 +233,7 @@ uv sync
|
|
|
202
233
|
This will create a virtual environment (e.g. `.venv/`) and install all
|
|
203
234
|
packages declared in `pyproject.toml`.
|
|
204
235
|
|
|
205
|
-
###
|
|
236
|
+
### 4.2 Working inside the environment
|
|
206
237
|
|
|
207
238
|
Use `uv run` to execute Python modules and scripts with the synced
|
|
208
239
|
environment:
|
|
@@ -215,12 +246,12 @@ You can also run `pytest` or other tools via `uv run`.
|
|
|
215
246
|
|
|
216
247
|
---
|
|
217
248
|
|
|
218
|
-
##
|
|
249
|
+
## 5. Synthetic Data & Datasets
|
|
219
250
|
|
|
220
251
|
The v1 pipeline is validated on **synthetic, semantic UIs**, starting with a
|
|
221
252
|
simple login flow.
|
|
222
253
|
|
|
223
|
-
###
|
|
254
|
+
### 5.1 Synthetic scenarios
|
|
224
255
|
|
|
225
256
|
OpenAdapt-ML includes synthetic UI generators for structured GUI automation benchmarks.
|
|
226
257
|
Currently two scenarios are supported:
|
|
@@ -255,7 +286,7 @@ A more complex registration form with first name, last name, email, password, co
|
|
|
255
286
|
| Episode Success Rate | **100%** |
|
|
256
287
|
| Episodes / Steps | 32 / 384 |
|
|
257
288
|
|
|
258
|
-
###
|
|
289
|
+
### 5.2 Generating synthetic data
|
|
259
290
|
|
|
260
291
|
Synthetic data is generated on the fly by `generate_synthetic_sessions` in
|
|
261
292
|
`openadapt_ml/ingest/synthetic.py` and used internally by the training
|
|
@@ -286,7 +317,7 @@ Each session contains episodes with:
|
|
|
286
317
|
- An observation (screenshot path).
|
|
287
318
|
- An action (e.g. `CLICK`, `TYPE`, `DONE`).
|
|
288
319
|
|
|
289
|
-
###
|
|
320
|
+
### 5.3 Next-action SFT samples
|
|
290
321
|
|
|
291
322
|
Episodes are converted into SFT-style samples by
|
|
292
323
|
`build_next_action_sft_samples` in `openadapt_ml/datasets/next_action.py`.
|
|
@@ -312,21 +343,20 @@ and its invariants, see `docs/design.md` §7.4.
|
|
|
312
343
|
|
|
313
344
|
---
|
|
314
345
|
|
|
315
|
-
##
|
|
346
|
+
## 6. Training
|
|
316
347
|
|
|
317
|
-
Training
|
|
318
|
-
|
|
348
|
+
Training uses **TRL (Transformer Reinforcement Learning)** with **Unsloth** optimizations
|
|
349
|
+
for efficient VLM fine-tuning. This provides 2x faster training with 50% less VRAM compared
|
|
350
|
+
to standard approaches.
|
|
319
351
|
|
|
320
|
-
The training
|
|
352
|
+
The training pipeline:
|
|
321
353
|
|
|
322
|
-
1. Loads
|
|
323
|
-
2.
|
|
324
|
-
3.
|
|
325
|
-
4.
|
|
326
|
-
5. Instantiates a VLM adapter (e.g. `QwenVLAdapter`).
|
|
327
|
-
6. Runs `train_supervised` over the dataset.
|
|
354
|
+
1. Loads episodes from synthetic data or real recordings.
|
|
355
|
+
2. Converts to TRL-compatible SFT format with images and chat messages.
|
|
356
|
+
3. Fine-tunes using SFTTrainer with LoRA adapters.
|
|
357
|
+
4. Generates checkpoints and training logs for visualization.
|
|
328
358
|
|
|
329
|
-
###
|
|
359
|
+
### 6.1 Qwen3-VL synthetic training
|
|
330
360
|
|
|
331
361
|
Config: `configs/qwen3vl_synthetic.yaml`
|
|
332
362
|
|
|
@@ -353,7 +383,7 @@ This will:
|
|
|
353
383
|
- Run a single-epoch supervised fine-tuning loop.
|
|
354
384
|
- Print loss values as training progresses.
|
|
355
385
|
|
|
356
|
-
###
|
|
386
|
+
### 6.2 Qwen2.5-VL synthetic training
|
|
357
387
|
|
|
358
388
|
Config: `configs/qwen2_5vl_synthetic.yaml`
|
|
359
389
|
|
|
@@ -378,7 +408,7 @@ format expected by the Qwen2.5-VL processor.
|
|
|
378
408
|
> Note: Both configs are sized for **small synthetic smoke runs**, not
|
|
379
409
|
> large-scale production training.
|
|
380
410
|
|
|
381
|
-
###
|
|
411
|
+
### 6.3 Qwen3-VL synthetic login benchmark (hero example)
|
|
382
412
|
|
|
383
413
|
OpenAdapt-ML ships a **synthetic login** benchmark backed by Qwen3-VL,
|
|
384
414
|
used to compare **base vs LoRA-fine-tuned** models on a hardened synthetic
|
|
@@ -413,7 +443,7 @@ It exposes step-level performance metrics, which let us visually answer the ques
|
|
|
413
443
|
3. **Precision matters**: Fine-tuned models have excellent click precision (85-100% hit rate, <0.05 coord error) while API models struggle with the action format
|
|
414
444
|
4. **Size vs specialization**: The fine-tuned 2B model outperforms the general-purpose Claude Sonnet 4.5, showing that domain-specific fine-tuning trumps raw model size
|
|
415
445
|
|
|
416
|
-
###
|
|
446
|
+
### 6.4 Set-of-Marks (SoM) Mode: 100% Accuracy
|
|
417
447
|
|
|
418
448
|
With **Set-of-Marks** visual prompting, fine-tuned Qwen3-VL-2B achieves **100% accuracy** on both login (6-step) and registration (12-step) scenarios:
|
|
419
449
|
|
|
@@ -452,11 +482,11 @@ For the full SoM investigation report, see [`experiments/qwen_login/SOM_INVESTIG
|
|
|
452
482
|
|
|
453
483
|
---
|
|
454
484
|
|
|
455
|
-
##
|
|
485
|
+
## 7. Grounding Module
|
|
456
486
|
|
|
457
487
|
OpenAdapt-ML includes a **grounding module** for locating UI elements on screenshots using natural language descriptions. This enables policy/grounding separation where the policy decides *what* to do and the grounder finds *where* to do it.
|
|
458
488
|
|
|
459
|
-
###
|
|
489
|
+
### 7.1 GeminiGrounder Demo
|
|
460
490
|
|
|
461
491
|
The `GeminiGrounder` uses Google's Gemini vision API to locate UI elements:
|
|
462
492
|
|
|
@@ -475,7 +505,7 @@ if candidates:
|
|
|
475
505
|
print(f"Found at {best.centroid} with {best.confidence:.0%} confidence")
|
|
476
506
|
```
|
|
477
507
|
|
|
478
|
-
###
|
|
508
|
+
### 7.2 Set-of-Marks (SoM) Support
|
|
479
509
|
|
|
480
510
|
The grounding module includes functions for extracting all UI elements and overlaying numbered labels (Set-of-Marks):
|
|
481
511
|
|
|
@@ -497,7 +527,7 @@ This enables element-based actions using indices instead of coordinates:
|
|
|
497
527
|
|
|
498
528
|
See `docs/gemini_grounding.md` for full documentation and `examples/test_gemini_grounding.py` for a complete example.
|
|
499
529
|
|
|
500
|
-
###
|
|
530
|
+
### 7.3 Available Grounders
|
|
501
531
|
|
|
502
532
|
| Grounder | Description | Latency | Use Case |
|
|
503
533
|
|----------|-------------|---------|----------|
|
|
@@ -505,7 +535,7 @@ See `docs/gemini_grounding.md` for full documentation and `examples/test_gemini_
|
|
|
505
535
|
| `OracleGrounder` | Ground-truth bboxes | ~0ms | Evaluation |
|
|
506
536
|
| `DetectorGrounder` | Generic wrapper with backend selection | varies | Flexible |
|
|
507
537
|
|
|
508
|
-
###
|
|
538
|
+
### 7.4 Grounding Evaluation
|
|
509
539
|
|
|
510
540
|
The `openadapt_ml.evals.grounding` module provides metrics for evaluating grounding accuracy:
|
|
511
541
|
|
|
@@ -523,7 +553,7 @@ print(metrics)
|
|
|
523
553
|
|
|
524
554
|
---
|
|
525
555
|
|
|
526
|
-
##
|
|
556
|
+
## 8. VLM Adapters
|
|
527
557
|
|
|
528
558
|
All VLM backends implement the shared `BaseVLMAdapter` interface in
|
|
529
559
|
`openadapt_ml/models/base_adapter.py` (prepare inputs, compute loss, generate
|
|
@@ -542,7 +572,7 @@ Current adapters include:
|
|
|
542
572
|
For full adapter internals and training-time vs runtime behavior, see
|
|
543
573
|
`docs/design.md` §8.
|
|
544
574
|
|
|
545
|
-
###
|
|
575
|
+
### 8.1 API-backed adapters
|
|
546
576
|
|
|
547
577
|
To use the API-backed adapter from Python, you can configure API keys via `.env`
|
|
548
578
|
file, environment variables, or pass them explicitly:
|
|
@@ -565,12 +595,12 @@ The existing CLI scripts `scripts/demo_policy.py` and
|
|
|
565
595
|
|
|
566
596
|
---
|
|
567
597
|
|
|
568
|
-
##
|
|
598
|
+
## 9. Runtime Policy & Demos
|
|
569
599
|
|
|
570
600
|
The runtime policy is implemented in `openadapt_ml/runtime/policy.py` as
|
|
571
601
|
`AgentPolicy`.
|
|
572
602
|
|
|
573
|
-
###
|
|
603
|
+
### 9.1 AgentPolicy
|
|
574
604
|
|
|
575
605
|
`AgentPolicy` is initialized with a VLM adapter (dummy or real). Given an
|
|
576
606
|
SFT-style sample, it:
|
|
@@ -581,7 +611,7 @@ SFT-style sample, it:
|
|
|
581
611
|
- `DONE()`
|
|
582
612
|
3. Returns a structured `Action` plus an optional free-form `thought`.
|
|
583
613
|
|
|
584
|
-
###
|
|
614
|
+
### 9.2 Demo script
|
|
585
615
|
|
|
586
616
|
`openadapt_ml/scripts/demo_policy.py` demonstrates how to use
|
|
587
617
|
`AgentPolicy` with different backends.
|
|
@@ -613,7 +643,7 @@ Each invocation will:
|
|
|
613
643
|
|
|
614
644
|
---
|
|
615
645
|
|
|
616
|
-
##
|
|
646
|
+
## 10. Testing
|
|
617
647
|
|
|
618
648
|
Basic tests are provided under `tests/`.
|
|
619
649
|
|
|
@@ -623,26 +653,26 @@ Run the test suite with:
|
|
|
623
653
|
uv run pytest
|
|
624
654
|
```
|
|
625
655
|
|
|
626
|
-
|
|
656
|
+
Key test files:
|
|
627
657
|
|
|
628
|
-
- `tests/test_training_dummy.py`
|
|
629
|
-
|
|
658
|
+
- `tests/test_training_dummy.py` - Tests TRL training configuration and sample conversion
|
|
659
|
+
- `tests/test_local_cli.py` - Tests local training CLI commands (status, check, viewer)
|
|
630
660
|
|
|
631
661
|
---
|
|
632
662
|
|
|
633
|
-
##
|
|
663
|
+
## 11. Training on Real Data
|
|
634
664
|
|
|
635
665
|
OpenAdapt-ML supports training on real GUI recordings from two sources:
|
|
636
666
|
1. **openadapt-capture** - New lightweight recording format
|
|
637
667
|
2. **OpenAdapt database** - Original OpenAdapt recordings (legacy)
|
|
638
668
|
|
|
639
|
-
###
|
|
669
|
+
### 11.1 Training on openadapt-capture recordings
|
|
640
670
|
|
|
641
671
|
[openadapt-capture](https://github.com/OpenAdaptAI/openadapt-capture) is a lightweight GUI recording tool.
|
|
642
672
|
|
|
643
673
|
```bash
|
|
644
674
|
# Install openadapt-capture
|
|
645
|
-
uv
|
|
675
|
+
uv add openadapt-capture
|
|
646
676
|
|
|
647
677
|
# Record a workflow (e.g., turning off Night Shift)
|
|
648
678
|
openadapt-capture record --output ~/captures/turn-off-nightshift
|
|
@@ -656,7 +686,7 @@ uv run python -m openadapt_ml.scripts.train \
|
|
|
656
686
|
|
|
657
687
|
The goal is automatically derived from the directory name (e.g., `"Turn off nightshift"`).
|
|
658
688
|
|
|
659
|
-
###
|
|
689
|
+
### 11.2 Compare human vs AI predictions
|
|
660
690
|
|
|
661
691
|
```bash
|
|
662
692
|
uv run python -m openadapt_ml.scripts.compare \
|
|
@@ -673,11 +703,11 @@ The comparison viewer shows:
|
|
|
673
703
|
|
|
674
704
|
---
|
|
675
705
|
|
|
676
|
-
##
|
|
706
|
+
## 12. Local Training (CUDA / Apple Silicon)
|
|
677
707
|
|
|
678
708
|
Train locally on your own GPU. Auto-detects CUDA or Apple Silicon (MPS).
|
|
679
709
|
|
|
680
|
-
###
|
|
710
|
+
### 12.1 Quick start
|
|
681
711
|
|
|
682
712
|
```bash
|
|
683
713
|
# Train on a capture (auto-detects device and config)
|
|
@@ -686,7 +716,7 @@ uv run python -m openadapt_ml.cloud.local train \
|
|
|
686
716
|
--open # Opens dashboard in browser
|
|
687
717
|
```
|
|
688
718
|
|
|
689
|
-
###
|
|
719
|
+
### 12.2 Training workflow
|
|
690
720
|
|
|
691
721
|
```bash
|
|
692
722
|
# Check device and training status
|
|
@@ -713,11 +743,11 @@ uv run python -m openadapt_ml.cloud.local compare \
|
|
|
713
743
|
|
|
714
744
|
---
|
|
715
745
|
|
|
716
|
-
##
|
|
746
|
+
## 13. Cloud GPU Training (Lambda Labs)
|
|
717
747
|
|
|
718
748
|
For faster training on powerful GPUs, use Lambda Labs. Full documentation: [`docs/cloud_gpu_training.md`](docs/cloud_gpu_training.md).
|
|
719
749
|
|
|
720
|
-
###
|
|
750
|
+
### 13.1 Quick start
|
|
721
751
|
|
|
722
752
|
```bash
|
|
723
753
|
# Set API key
|
|
@@ -729,7 +759,7 @@ uv run python -m openadapt_ml.cloud.lambda_labs train \
|
|
|
729
759
|
--goal "Turn off Night Shift in System Settings"
|
|
730
760
|
```
|
|
731
761
|
|
|
732
|
-
###
|
|
762
|
+
### 13.2 Manual workflow
|
|
733
763
|
|
|
734
764
|
```bash
|
|
735
765
|
# List available instances and pricing
|
|
@@ -751,7 +781,7 @@ uv run python -m openadapt_ml.cloud.lambda_labs download <instance_id>
|
|
|
751
781
|
uv run python -m openadapt_ml.cloud.lambda_labs terminate <instance_id>
|
|
752
782
|
```
|
|
753
783
|
|
|
754
|
-
###
|
|
784
|
+
### 13.3 Training visualization
|
|
755
785
|
|
|
756
786
|
The training process generates:
|
|
757
787
|
- **`training_output/dashboard.html`** - Real-time training dashboard with loss curves
|
|
@@ -790,9 +820,15 @@ uv run python -m openadapt_ml.cloud.local serve --port 8080 --open
|
|
|
790
820
|
- `Home` / `End` - First/last frame
|
|
791
821
|
- `O` - Toggle click overlay
|
|
792
822
|
|
|
823
|
+
**Benchmark Viewer:**
|
|
824
|
+
|
|
825
|
+

|
|
826
|
+
|
|
827
|
+
*View benchmark evaluation results with task-level filtering, success/failure status, and run comparison. Shows Claude achieving 30% on mock evaluation tasks (simulated environment for testing the pipeline - real WAA evaluation requires Windows VMs).*
|
|
828
|
+
|
|
793
829
|
---
|
|
794
830
|
|
|
795
|
-
##
|
|
831
|
+
## 14. Limitations & Notes
|
|
796
832
|
|
|
797
833
|
- **Apple Silicon / bitsandbytes**:
|
|
798
834
|
- Example configs are sized for CPU / Apple Silicon development runs; see
|
|
@@ -805,12 +841,18 @@ uv run python -m openadapt_ml.cloud.local serve --port 8080 --open
|
|
|
805
841
|
- **Evaluation**:
|
|
806
842
|
- v1 focuses on smoke tests and qualitative behavior on synthetic data.
|
|
807
843
|
More formal evaluation scripts and metrics are planned.
|
|
844
|
+
- **Windows Agent Arena (WAA) on Azure**:
|
|
845
|
+
- WAA requires nested virtualization (Windows VM inside Docker via QEMU)
|
|
846
|
+
- Azure ML managed compute does not support nested virtualization
|
|
847
|
+
- For real WAA evaluation, use dedicated VMs with Dv3/Ev3 series or run locally
|
|
848
|
+
- Mock evaluation (`test-mock`) validates the pipeline without Windows VMs
|
|
849
|
+
- See `CLAUDE.md` for detailed workarounds and infrastructure setup
|
|
808
850
|
|
|
809
851
|
For deeper architectural details, see [`docs/design.md`](docs/design.md).
|
|
810
852
|
|
|
811
853
|
---
|
|
812
854
|
|
|
813
|
-
##
|
|
855
|
+
## 15. Roadmap
|
|
814
856
|
|
|
815
857
|
For the up-to-date, prioritized roadmap (including concrete implementation
|
|
816
858
|
targets and agent-executable acceptance criteria), see
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
openadapt_ml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
openadapt_ml/config.py,sha256=eH5WTKRPkkidjkNb25Wn_dUAizUQTsVPMYLDq_ekPJQ,1865
|
|
3
|
+
openadapt_ml/benchmarks/__init__.py,sha256=RRAoXm_al-DgStCKCycnC60_iZo52mLmUu5nN5IPfxY,3855
|
|
4
|
+
openadapt_ml/benchmarks/agent.py,sha256=kuq-dWqWJH8ogjklAFdnbG_6zVzHtd6Ab2rPxRjvHtU,29724
|
|
5
|
+
openadapt_ml/benchmarks/azure.py,sha256=AI5ZdsNxmQ197ymOj68lTLAtl-eByuguhACqv7UZhIg,28145
|
|
6
|
+
openadapt_ml/benchmarks/base.py,sha256=d7T_zMlMPlN0beDWkpzOvOHYQO6QnsePLQ45iKbi66Y,11667
|
|
7
|
+
openadapt_ml/benchmarks/cli.py,sha256=ElBZDcmDOpA0uAoHYKRx3fLAJ5v8dH4R9SxfhmwTZWw,204898
|
|
8
|
+
openadapt_ml/benchmarks/data_collection.py,sha256=EYOsYnFQifF3MXD0TZxznd-HbODiovnGDtxGjSMpO-Y,14652
|
|
9
|
+
openadapt_ml/benchmarks/live_tracker.py,sha256=1SukwgRYbAzSMzHOhPZOSgZ58L44CYtX-KOfAzyAJZw,5130
|
|
10
|
+
openadapt_ml/benchmarks/runner.py,sha256=a52GasPKEnNgevxilQAI1z8FnzwWddLiDY60NYWxpZk,13616
|
|
11
|
+
openadapt_ml/benchmarks/viewer.py,sha256=Mjbt67gfnSw29rNhU4JYL0cdAc-UZ3QpxmG6GhKnD68,41111
|
|
12
|
+
openadapt_ml/benchmarks/vm_monitor.py,sha256=NKhLF66hcBqxIDHk-iPKn31AfzpXyE-9IkBTYX2k70I,20330
|
|
13
|
+
openadapt_ml/benchmarks/waa.py,sha256=V_RtDfOLW2jD8MN-M9YJYvt67NQCXrUkVxOtcNYVLHo,27008
|
|
14
|
+
openadapt_ml/benchmarks/waa_live.py,sha256=U92KaBWCpJmwqSURJeG5biEolTyBCvyCikWRz05IV-A,22982
|
|
15
|
+
openadapt_ml/benchmarks/waa_deploy/Dockerfile,sha256=m9Nsei1v_m0jF1HWc7yCMz-_wD3rHHv9H9HBC2lFVSE,10969
|
|
16
|
+
openadapt_ml/benchmarks/waa_deploy/__init__.py,sha256=KV71HrrgETytfY0i4vFSi-yM0KjoQP2hd9Bl03cZ9yc,320
|
|
17
|
+
openadapt_ml/benchmarks/waa_deploy/api_agent.py,sha256=dbazhRxc554901LFiVuj6sMmOgoHtTKl8XIAiIJrFWU,20024
|
|
18
|
+
openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat,sha256=YxgrSWh76zLijlpxEpulWf9To2JtJ-yR42lr2WyTXiY,1496
|
|
19
|
+
openadapt_ml/cloud/__init__.py,sha256=XYrvxivJeZ8qYnuGod5kodMlm3iT2OK2GAApO3CNB0c,133
|
|
20
|
+
openadapt_ml/cloud/azure_inference.py,sha256=lqkG86Dn6x2Rp7Y-X9tK_8mn0QfRvaDQNRHg-kJkQyc,15736
|
|
21
|
+
openadapt_ml/cloud/lambda_labs.py,sha256=jZ700gNR8zxO2qGa7yqByKXgb4-18zN30YQdDZ3Fbuk,102206
|
|
22
|
+
openadapt_ml/cloud/local.py,sha256=AX7gxgkwGwOJ1ogp-rQefKDyK3MDNMWA9W2qkwaV66g,104782
|
|
23
|
+
openadapt_ml/cloud/ssh_tunnel.py,sha256=q4VEKT4cEU0-hAP1t9fL2jRur0FMRxg8ZV6j1IQV45k,20824
|
|
24
|
+
openadapt_ml/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
+
openadapt_ml/datasets/next_action.py,sha256=eNNwwdRE-pJa1DCgEIr9eTipZ7pDAEDVxkc9UPObJLs,21219
|
|
26
|
+
openadapt_ml/evals/__init__.py,sha256=Kx7bSvPHwmoGVI3q1wS_lC17W2S32YHj0459JRqu6Ow,573
|
|
27
|
+
openadapt_ml/evals/grounding.py,sha256=un4bWrhqwrJ4O9QqF40NfnhKeOMntGdCJcwXz6ZzNJ4,8447
|
|
28
|
+
openadapt_ml/evals/plot_eval_metrics.py,sha256=gsikQ3MSUY7Pw61D8lB286q0MPpBL9E05UFHPrawViw,5237
|
|
29
|
+
openadapt_ml/evals/trajectory_matching.py,sha256=p40wDDlD0AyiY6vsgkcp6FBDISyKKQKycsrEz7uGF8Y,20616
|
|
30
|
+
openadapt_ml/experiments/demo_prompt/__init__.py,sha256=dwS0bI53jXMzHE-DPhb_mhmPdoqSZRIcNbV79wt8KPM,454
|
|
31
|
+
openadapt_ml/experiments/demo_prompt/format_demo.py,sha256=rIXcjiYhI2YuLDUjY4iTxPp4ZmtvKibQQYTCfEn4lZs,6553
|
|
32
|
+
openadapt_ml/experiments/demo_prompt/run_experiment.py,sha256=uchhadnqxauBXxlalTh7wXPLOOXk7NBY1mwXiN7rpHI,16309
|
|
33
|
+
openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json,sha256=08oryOF126toTQDN9xciodavvfsaWNnXuBs0aULwpfI,5326
|
|
34
|
+
openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json,sha256=u03VgYTQia_HzilzNjxdGLpUSdbo4SzmHqI-GXlvurg,26915
|
|
35
|
+
openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json,sha256=FA1JgXXLor6on3lHlfJdNSuKzBca30ggH8IWSJEmmfA,11517
|
|
36
|
+
openadapt_ml/experiments/waa_demo/__init__.py,sha256=9M8iLxO9GWAw-FIB-0tzsqaweLcO5EVP1Sc5BoK16iU,363
|
|
37
|
+
openadapt_ml/experiments/waa_demo/demos.py,sha256=UwO0EYy8wUEggaBaI_cXuYe_jwSB1hx3ZtPf-z9bhjc,13796
|
|
38
|
+
openadapt_ml/experiments/waa_demo/runner.py,sha256=OxgQhZIqhYeGDYmAcQLEsFh3B053rnuPL0ZEIoXz0bI,24327
|
|
39
|
+
openadapt_ml/experiments/waa_demo/tasks.py,sha256=jw1QwbOt8xmWBW2lmBWcJzKBXssjv_e0j49MlC2rVJY,5425
|
|
40
|
+
openadapt_ml/export/__init__.py,sha256=mKehKHOio4jGcK-3r0-pb446GdKMPs0O9hAu4S0_R7s,266
|
|
41
|
+
openadapt_ml/export/__main__.py,sha256=0ObtWcdzf6p7gPwhNlCKpNm2FIhmusdYNkuk8tyt77U,149
|
|
42
|
+
openadapt_ml/export/cli.py,sha256=goTKNq9cOO9wsdNluLMH_-f9kdWShH3FPP8sCZ6KaPI,2331
|
|
43
|
+
openadapt_ml/export/parquet.py,sha256=1BGHJKJc302trn64mUbuhxYMBPNvH80QW5f8GM63njk,9664
|
|
44
|
+
openadapt_ml/grounding/__init__.py,sha256=uMvcALFRXmKD6PHhqLZ24Y6zhRUs46_PnWYqiqJP5cM,1412
|
|
45
|
+
openadapt_ml/grounding/base.py,sha256=mnjT25nxltZCD0VBzgIgj2kuCcB4sgXBN97MBaW5P6c,7688
|
|
46
|
+
openadapt_ml/grounding/detector.py,sha256=z-6Y_jnUNnhviUjKv6okjJ0r13DmBiNZMzNJo0rTlBY,19786
|
|
47
|
+
openadapt_ml/ingest/__init__.py,sha256=7YASU-pOXtjalcRJ3WCbGuEWne0aVZtypsfcEmp-yFE,1437
|
|
48
|
+
openadapt_ml/ingest/capture.py,sha256=SR22U6M7hU_QAsCYAG4JaxGFHpLZg96k_SJ-tkXAgbs,10218
|
|
49
|
+
openadapt_ml/ingest/loader.py,sha256=PPBFMA9d7oc2bjXgrdPCZQBnv3MvoDFQNJvXeI90_j8,9865
|
|
50
|
+
openadapt_ml/ingest/synthetic.py,sha256=DSUyqbLxHtJjELitTP2C_3bv3-E0UW7P-RFAO9W8XFs,39302
|
|
51
|
+
openadapt_ml/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
|
+
openadapt_ml/models/api_adapter.py,sha256=9EfQrXuFYIT-ea-wmGzJlM4thgVVDIRZnqqS_gL-PbU,6541
|
|
53
|
+
openadapt_ml/models/base_adapter.py,sha256=AG40BhdDORwUNYYg3DM1wsErX57aEJrkK0tyn0tEKhI,2050
|
|
54
|
+
openadapt_ml/models/dummy_adapter.py,sha256=h4Zu-rjWgtG1r8jRtcsrX-FZm8iImrhrTQ7TsLfjE8A,1581
|
|
55
|
+
openadapt_ml/models/qwen_vl.py,sha256=sIJUtDRXAcz9zh3uRWOMYVOxdWIXlcM3vazdNOAsY_U,17239
|
|
56
|
+
openadapt_ml/retrieval/README.md,sha256=j4gXhTo6yH-5cuw4ER4174V-U6TQakOVT6Hj4kj7B0I,5696
|
|
57
|
+
openadapt_ml/retrieval/USAGE.md,sha256=XDIrX-94Z5nC-wvnBY5yF5gTqUYixxCC3wwUFvQx5YM,9278
|
|
58
|
+
openadapt_ml/retrieval/__init__.py,sha256=xocb84riKLUCezUioKssFRhAQsnvexh4W932o368_qg,2726
|
|
59
|
+
openadapt_ml/retrieval/demo_retriever.py,sha256=fYPLKzlG7yGHfV-F-TECBbxkMyP1fqdqPzTJ7G0oXYU,29184
|
|
60
|
+
openadapt_ml/retrieval/embeddings.py,sha256=W4Bqo48Ds4BI9zZg1awkSi9p5kplRRALEjEgVh-jbsY,19239
|
|
61
|
+
openadapt_ml/retrieval/index.py,sha256=UBFnSxp5T5eKt2txFcd0FytKCw1qxONZfxnFJVrduRQ,5710
|
|
62
|
+
openadapt_ml/retrieval/retriever.py,sha256=idJcz4pUHgPHuZvX3VIUmO8Vs-iw4_1w6UUypynRfVY,4579
|
|
63
|
+
openadapt_ml/runtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
64
|
+
openadapt_ml/runtime/policy.py,sha256=M-OyhaE5gWh7e3KJ0Ip_YavlfRaEO7S4UKNjvqH_qsg,6724
|
|
65
|
+
openadapt_ml/schema/__init__.py,sha256=W1Rx58WjFpUE2D1hdujD6tkxr5m8U3nE4JBMwdw7kLc,2787
|
|
66
|
+
openadapt_ml/schema/converters.py,sha256=3qK1z8EATFaDi1M0w9T7PLiRtJu6OtQM7JG7qbE4EKU,18392
|
|
67
|
+
openadapt_ml/schema/episode.py,sha256=_QQ34V39DYLaOx5GnH4mKHoXteekqRSq1C2aJe_Y_5Y,15399
|
|
68
|
+
openadapt_ml/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
69
|
+
openadapt_ml/scripts/compare.py,sha256=IUi3lQfOV8qm1JMvPDAnk3UqT7vPYNjLGeN8qda1wXc,56921
|
|
70
|
+
openadapt_ml/scripts/demo_policy.py,sha256=NqB1akCKT5dlLER5ToBpF15QYaWnbXtRgWBvrOb6HJc,2105
|
|
71
|
+
openadapt_ml/scripts/eval_policy.py,sha256=Tk1QMNlkfcyHToAqdTgdLkJyEGyVLXMcE5EVmJb89Ng,10381
|
|
72
|
+
openadapt_ml/scripts/make_gif.py,sha256=H9fevBZFH31_7vs-OROfg9A2U6eboWXUNjyU2-XLMqw,4439
|
|
73
|
+
openadapt_ml/scripts/prepare_synthetic.py,sha256=RbnY3QiH_RFk2du3awXFn90jxJQNUkhWlei2NSQ1aUs,1109
|
|
74
|
+
openadapt_ml/scripts/run_qwen_login_benchmark.py,sha256=NWIhCAFSX5pYKFRCec7RkrYtzvz2LNMqhDfXcKxlagM,5655
|
|
75
|
+
openadapt_ml/scripts/train.py,sha256=BrDJxiZx1S8igpNi6hC287kh51qU5RtPcdVq1yxEJkQ,6685
|
|
76
|
+
openadapt_ml/training/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
|
+
openadapt_ml/training/benchmark_viewer.py,sha256=iD56K467n0qS-nrcxxGZtABQs6qswB2je5Uj4xEacNI,174238
|
|
78
|
+
openadapt_ml/training/shared_ui.py,sha256=7ZdBDo-__pHUMiuvSot-waYvpinAullRXQTsiPoruBw,4823
|
|
79
|
+
openadapt_ml/training/stub_provider.py,sha256=fw2b0EzsCDDFhTZQIKR49BEFmSEN1d4g9rhFihzErWY,10508
|
|
80
|
+
openadapt_ml/training/trainer.py,sha256=K4LMDe-GxnGZB9gt0IGMU67DcqtcggaaQIbJp4GYcYc,90713
|
|
81
|
+
openadapt_ml/training/trl_trainer.py,sha256=9NQnNt2MDLUUUA_V8f2PwsDLeHnb2fOnmxol24-PyjU,11355
|
|
82
|
+
openadapt_ml/training/viewer.py,sha256=7uA2SHW1Uh7v65s8sOSaYuAVUlyp6GsCOGy6YA0hnBQ,121132
|
|
83
|
+
openadapt_ml-0.2.0.dist-info/METADATA,sha256=-pGXAMvRsWMHCdjsj3GzALIcAr0fPaMvEaUv8jkGYfA,28839
|
|
84
|
+
openadapt_ml-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
85
|
+
openadapt_ml-0.2.0.dist-info/licenses/LICENSE,sha256=2E5UY67RVLedJuNnwGudkAMtfM3LZNUcHgmaL89TAfw,1068
|
|
86
|
+
openadapt_ml-0.2.0.dist-info/RECORD,,
|
openadapt_ml/schemas/__init__.py
DELETED
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
"""Schema definitions and validation for openadapt-ml.
|
|
2
|
-
|
|
3
|
-
Core data structures:
|
|
4
|
-
- Action: A single GUI action (click, type, scroll, etc.)
|
|
5
|
-
- Observation: GUI state observation (screenshot, accessibility tree, etc.)
|
|
6
|
-
- Step: One timestep containing observation + action
|
|
7
|
-
- Episode: A single task attempt / workflow instance
|
|
8
|
-
- Session: Container for multiple episodes
|
|
9
|
-
|
|
10
|
-
Validation:
|
|
11
|
-
- validate_episode(): Validate an Episode object
|
|
12
|
-
- validate_session(): Validate a Session object
|
|
13
|
-
- validate_episodes(): Validate a list of Episodes
|
|
14
|
-
- ValidationError: Raised on schema violations
|
|
15
|
-
"""
|
|
16
|
-
|
|
17
|
-
from openadapt_ml.schemas.sessions import (
|
|
18
|
-
Action,
|
|
19
|
-
ActionType,
|
|
20
|
-
Episode,
|
|
21
|
-
Observation,
|
|
22
|
-
Session,
|
|
23
|
-
Step,
|
|
24
|
-
)
|
|
25
|
-
from openadapt_ml.schemas.validation import (
|
|
26
|
-
ValidationError,
|
|
27
|
-
summarize_episodes,
|
|
28
|
-
validate_action,
|
|
29
|
-
validate_episode,
|
|
30
|
-
validate_episodes,
|
|
31
|
-
validate_observation,
|
|
32
|
-
validate_session,
|
|
33
|
-
validate_step,
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
__all__ = [
|
|
37
|
-
# Core types
|
|
38
|
-
"Action",
|
|
39
|
-
"ActionType",
|
|
40
|
-
"Episode",
|
|
41
|
-
"Observation",
|
|
42
|
-
"Session",
|
|
43
|
-
"Step",
|
|
44
|
-
# Validation
|
|
45
|
-
"ValidationError",
|
|
46
|
-
"validate_action",
|
|
47
|
-
"validate_episode",
|
|
48
|
-
"validate_episodes",
|
|
49
|
-
"validate_observation",
|
|
50
|
-
"validate_session",
|
|
51
|
-
"validate_step",
|
|
52
|
-
"summarize_episodes",
|
|
53
|
-
]
|