openadapt-ml 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openadapt_ml/baselines/__init__.py +121 -0
- openadapt_ml/baselines/adapter.py +185 -0
- openadapt_ml/baselines/cli.py +314 -0
- openadapt_ml/baselines/config.py +448 -0
- openadapt_ml/baselines/parser.py +922 -0
- openadapt_ml/baselines/prompts.py +787 -0
- openadapt_ml/benchmarks/__init__.py +13 -115
- openadapt_ml/benchmarks/agent.py +265 -421
- openadapt_ml/benchmarks/azure.py +28 -19
- openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
- openadapt_ml/benchmarks/cli.py +1722 -4847
- openadapt_ml/benchmarks/trace_export.py +631 -0
- openadapt_ml/benchmarks/viewer.py +22 -5
- openadapt_ml/benchmarks/vm_monitor.py +530 -29
- openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
- openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
- openadapt_ml/cloud/azure_inference.py +3 -5
- openadapt_ml/cloud/lambda_labs.py +722 -307
- openadapt_ml/cloud/local.py +2038 -487
- openadapt_ml/cloud/ssh_tunnel.py +68 -26
- openadapt_ml/datasets/next_action.py +40 -30
- openadapt_ml/evals/grounding.py +8 -3
- openadapt_ml/evals/plot_eval_metrics.py +15 -13
- openadapt_ml/evals/trajectory_matching.py +41 -26
- openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
- openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
- openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
- openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
- openadapt_ml/experiments/representation_shootout/config.py +390 -0
- openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
- openadapt_ml/experiments/representation_shootout/runner.py +687 -0
- openadapt_ml/experiments/waa_demo/runner.py +29 -14
- openadapt_ml/export/parquet.py +36 -24
- openadapt_ml/grounding/detector.py +18 -14
- openadapt_ml/ingest/__init__.py +8 -6
- openadapt_ml/ingest/capture.py +25 -22
- openadapt_ml/ingest/loader.py +7 -4
- openadapt_ml/ingest/synthetic.py +189 -100
- openadapt_ml/models/api_adapter.py +14 -4
- openadapt_ml/models/base_adapter.py +10 -2
- openadapt_ml/models/providers/__init__.py +288 -0
- openadapt_ml/models/providers/anthropic.py +266 -0
- openadapt_ml/models/providers/base.py +299 -0
- openadapt_ml/models/providers/google.py +376 -0
- openadapt_ml/models/providers/openai.py +342 -0
- openadapt_ml/models/qwen_vl.py +46 -19
- openadapt_ml/perception/__init__.py +35 -0
- openadapt_ml/perception/integration.py +399 -0
- openadapt_ml/retrieval/demo_retriever.py +50 -24
- openadapt_ml/retrieval/embeddings.py +9 -8
- openadapt_ml/retrieval/retriever.py +3 -1
- openadapt_ml/runtime/__init__.py +50 -0
- openadapt_ml/runtime/policy.py +18 -5
- openadapt_ml/runtime/safety_gate.py +471 -0
- openadapt_ml/schema/__init__.py +9 -0
- openadapt_ml/schema/converters.py +74 -27
- openadapt_ml/schema/episode.py +31 -18
- openadapt_ml/scripts/capture_screenshots.py +530 -0
- openadapt_ml/scripts/compare.py +85 -54
- openadapt_ml/scripts/demo_policy.py +4 -1
- openadapt_ml/scripts/eval_policy.py +15 -9
- openadapt_ml/scripts/make_gif.py +1 -1
- openadapt_ml/scripts/prepare_synthetic.py +3 -1
- openadapt_ml/scripts/train.py +21 -9
- openadapt_ml/segmentation/README.md +920 -0
- openadapt_ml/segmentation/__init__.py +97 -0
- openadapt_ml/segmentation/adapters/__init__.py +5 -0
- openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
- openadapt_ml/segmentation/annotator.py +610 -0
- openadapt_ml/segmentation/cache.py +290 -0
- openadapt_ml/segmentation/cli.py +674 -0
- openadapt_ml/segmentation/deduplicator.py +656 -0
- openadapt_ml/segmentation/frame_describer.py +788 -0
- openadapt_ml/segmentation/pipeline.py +340 -0
- openadapt_ml/segmentation/schemas.py +622 -0
- openadapt_ml/segmentation/segment_extractor.py +634 -0
- openadapt_ml/training/azure_ops_viewer.py +1097 -0
- openadapt_ml/training/benchmark_viewer.py +52 -41
- openadapt_ml/training/shared_ui.py +7 -7
- openadapt_ml/training/stub_provider.py +57 -35
- openadapt_ml/training/trainer.py +143 -86
- openadapt_ml/training/trl_trainer.py +70 -21
- openadapt_ml/training/viewer.py +323 -108
- openadapt_ml/training/viewer_components.py +180 -0
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.2.dist-info}/METADATA +215 -14
- openadapt_ml-0.2.2.dist-info/RECORD +116 -0
- openadapt_ml/benchmarks/base.py +0 -366
- openadapt_ml/benchmarks/data_collection.py +0 -432
- openadapt_ml/benchmarks/live_tracker.py +0 -180
- openadapt_ml/benchmarks/runner.py +0 -418
- openadapt_ml/benchmarks/waa.py +0 -761
- openadapt_ml/benchmarks/waa_live.py +0 -619
- openadapt_ml-0.2.0.dist-info/RECORD +0 -86
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.2.dist-info}/WHEEL +0 -0
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -26,7 +26,6 @@ import sys
|
|
|
26
26
|
from typing import TYPE_CHECKING, Any
|
|
27
27
|
|
|
28
28
|
from openadapt_ml.experiments.waa_demo.demos import (
|
|
29
|
-
DEMOS,
|
|
30
29
|
format_demo_for_prompt,
|
|
31
30
|
get_complete_demos,
|
|
32
31
|
get_demo,
|
|
@@ -34,14 +33,16 @@ from openadapt_ml.experiments.waa_demo.demos import (
|
|
|
34
33
|
)
|
|
35
34
|
from openadapt_ml.experiments.waa_demo.tasks import (
|
|
36
35
|
TASKS,
|
|
37
|
-
WATask,
|
|
38
|
-
get_manual_tasks,
|
|
39
36
|
get_recorded_tasks,
|
|
40
37
|
get_task,
|
|
41
38
|
)
|
|
42
39
|
|
|
43
40
|
if TYPE_CHECKING:
|
|
44
|
-
from
|
|
41
|
+
from openadapt_evals import (
|
|
42
|
+
BenchmarkAction,
|
|
43
|
+
BenchmarkObservation,
|
|
44
|
+
BenchmarkTask,
|
|
45
|
+
)
|
|
45
46
|
|
|
46
47
|
logger = logging.getLogger(__name__)
|
|
47
48
|
|
|
@@ -73,7 +74,9 @@ def cmd_list(args: argparse.Namespace) -> int:
|
|
|
73
74
|
print()
|
|
74
75
|
print("Tasks needing recorded demos on Windows:")
|
|
75
76
|
for task in get_recorded_tasks():
|
|
76
|
-
print(
|
|
77
|
+
print(
|
|
78
|
+
f" - #{list(TASKS.keys())[list(TASKS.values()).index(task)]}: {task.instruction}"
|
|
79
|
+
)
|
|
77
80
|
|
|
78
81
|
return 0
|
|
79
82
|
|
|
@@ -122,7 +125,9 @@ def cmd_prompt(args: argparse.Namespace) -> int:
|
|
|
122
125
|
else:
|
|
123
126
|
print(f"Task: {task.instruction}")
|
|
124
127
|
print()
|
|
125
|
-
print(
|
|
128
|
+
print(
|
|
129
|
+
"Analyze the screenshot and provide the next action to complete this task."
|
|
130
|
+
)
|
|
126
131
|
if demo and "[PLACEHOLDER" in demo:
|
|
127
132
|
print()
|
|
128
133
|
print("[Note: Demo not available - this would be zero-shot]")
|
|
@@ -208,6 +213,7 @@ Think step by step, then output the action on a new line starting with "ACTION:"
|
|
|
208
213
|
"""Lazily initialize the API adapter."""
|
|
209
214
|
if self._adapter is None:
|
|
210
215
|
from openadapt_ml.models.api_adapter import ApiVLMAdapter
|
|
216
|
+
|
|
211
217
|
self._adapter = ApiVLMAdapter(
|
|
212
218
|
provider=self.provider,
|
|
213
219
|
api_key=self.api_key,
|
|
@@ -261,7 +267,7 @@ Think step by step, then output the action on a new line starting with "ACTION:"
|
|
|
261
267
|
Returns:
|
|
262
268
|
BenchmarkAction parsed from VLM response
|
|
263
269
|
"""
|
|
264
|
-
from
|
|
270
|
+
from openadapt_evals import BenchmarkAction
|
|
265
271
|
|
|
266
272
|
adapter = self._get_adapter()
|
|
267
273
|
|
|
@@ -325,7 +331,9 @@ Think step by step, then output the action on a new line starting with "ACTION:"
|
|
|
325
331
|
history_str = self._format_history(history)
|
|
326
332
|
content_parts.append(f"Previous actions:\n{history_str}")
|
|
327
333
|
|
|
328
|
-
content_parts.append(
|
|
334
|
+
content_parts.append(
|
|
335
|
+
"\nAnalyze the current screenshot and provide the next action."
|
|
336
|
+
)
|
|
329
337
|
|
|
330
338
|
sample: dict[str, Any] = {
|
|
331
339
|
"messages": [
|
|
@@ -401,7 +409,7 @@ Think step by step, then output the action on a new line starting with "ACTION:"
|
|
|
401
409
|
Uses the same parsing logic as APIBenchmarkAgent.
|
|
402
410
|
"""
|
|
403
411
|
import re
|
|
404
|
-
from
|
|
412
|
+
from openadapt_evals import BenchmarkAction
|
|
405
413
|
|
|
406
414
|
raw_action = {"response": response}
|
|
407
415
|
|
|
@@ -457,7 +465,9 @@ Think step by step, then output the action on a new line starting with "ACTION:"
|
|
|
457
465
|
r"TYPE\s*\(\s*[\"'](.+?)[\"']\s*\)", action_line, re.IGNORECASE
|
|
458
466
|
)
|
|
459
467
|
if type_match:
|
|
460
|
-
return BenchmarkAction(
|
|
468
|
+
return BenchmarkAction(
|
|
469
|
+
type="type", text=type_match.group(1), raw_action=raw_action
|
|
470
|
+
)
|
|
461
471
|
|
|
462
472
|
# Parse KEY
|
|
463
473
|
key_match = re.match(r"KEY\s*\(\s*(.+?)\s*\)", action_line, re.IGNORECASE)
|
|
@@ -502,11 +512,12 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|
|
502
512
|
This integrates with the benchmarks infrastructure to run either
|
|
503
513
|
zero-shot or demo-conditioned evaluation on WAA tasks.
|
|
504
514
|
"""
|
|
505
|
-
from
|
|
515
|
+
from openadapt_evals import (
|
|
516
|
+
EvaluationConfig,
|
|
506
517
|
WAAMockAdapter,
|
|
507
518
|
compute_metrics,
|
|
519
|
+
evaluate_agent_on_benchmark,
|
|
508
520
|
)
|
|
509
|
-
from openadapt_ml.benchmarks.runner import EvaluationConfig, evaluate_agent_on_benchmark
|
|
510
521
|
|
|
511
522
|
print("WAA Demo-Conditioned Experiment Runner")
|
|
512
523
|
print("=" * 80)
|
|
@@ -539,7 +550,7 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|
|
539
550
|
print(f"Running {len(task_ids)} tasks with complete demos")
|
|
540
551
|
|
|
541
552
|
# Check for mock mode or real WAA
|
|
542
|
-
use_mock = getattr(args,
|
|
553
|
+
use_mock = getattr(args, "mock", False)
|
|
543
554
|
|
|
544
555
|
if use_mock:
|
|
545
556
|
print("Using mock adapter (no Windows required)")
|
|
@@ -599,7 +610,11 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|
|
599
610
|
except Exception as e:
|
|
600
611
|
print(f"Error during evaluation: {e}")
|
|
601
612
|
if "API key" in str(e) or "api_key" in str(e).lower():
|
|
602
|
-
key_name =
|
|
613
|
+
key_name = (
|
|
614
|
+
"ANTHROPIC_API_KEY"
|
|
615
|
+
if args.provider == "anthropic"
|
|
616
|
+
else "OPENAI_API_KEY"
|
|
617
|
+
)
|
|
603
618
|
print(f"\nMake sure {key_name} is set in your environment or .env file.")
|
|
604
619
|
return 1
|
|
605
620
|
|
openadapt_ml/export/parquet.py
CHANGED
|
@@ -7,7 +7,6 @@ Episode JSON remains the canonical representation.
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
9
|
import json
|
|
10
|
-
from pathlib import Path
|
|
11
10
|
from typing import TYPE_CHECKING
|
|
12
11
|
|
|
13
12
|
if TYPE_CHECKING:
|
|
@@ -73,7 +72,11 @@ def to_parquet(
|
|
|
73
72
|
# Extract action type value (enum -> string)
|
|
74
73
|
action_type = None
|
|
75
74
|
if step.action:
|
|
76
|
-
action_type =
|
|
75
|
+
action_type = (
|
|
76
|
+
step.action.type.value
|
|
77
|
+
if hasattr(step.action.type, "value")
|
|
78
|
+
else step.action.type
|
|
79
|
+
)
|
|
77
80
|
|
|
78
81
|
row = {
|
|
79
82
|
"episode_id": episode.episode_id,
|
|
@@ -84,8 +87,12 @@ def to_parquet(
|
|
|
84
87
|
"action_type": action_type,
|
|
85
88
|
"x": x,
|
|
86
89
|
"y": y,
|
|
87
|
-
"end_x": step.action.normalized_end[0]
|
|
88
|
-
|
|
90
|
+
"end_x": step.action.normalized_end[0]
|
|
91
|
+
if step.action and step.action.normalized_end
|
|
92
|
+
else None,
|
|
93
|
+
"end_y": step.action.normalized_end[1]
|
|
94
|
+
if step.action and step.action.normalized_end
|
|
95
|
+
else None,
|
|
89
96
|
"text": getattr(step.action, "text", None) if step.action else None,
|
|
90
97
|
"key": getattr(step.action, "key", None) if step.action else None,
|
|
91
98
|
"scroll_direction": (
|
|
@@ -131,33 +138,37 @@ def _write_summary(episodes: list[Episode], output_path: str) -> None:
|
|
|
131
138
|
for episode in episodes:
|
|
132
139
|
first_t = episode.steps[0].timestamp if episode.steps else None
|
|
133
140
|
last_t = episode.steps[-1].timestamp if episode.steps else None
|
|
134
|
-
duration = (
|
|
141
|
+
duration = (
|
|
142
|
+
(last_t - first_t) if first_t is not None and last_t is not None else None
|
|
143
|
+
)
|
|
135
144
|
|
|
136
145
|
# Extract action type values (enum -> string)
|
|
137
146
|
first_action_type = None
|
|
138
147
|
last_action_type = None
|
|
139
148
|
if episode.steps and episode.steps[0].action:
|
|
140
149
|
t = episode.steps[0].action.type
|
|
141
|
-
first_action_type = t.value if hasattr(t,
|
|
150
|
+
first_action_type = t.value if hasattr(t, "value") else t
|
|
142
151
|
if episode.steps and episode.steps[-1].action:
|
|
143
152
|
t = episode.steps[-1].action.type
|
|
144
|
-
last_action_type = t.value if hasattr(t,
|
|
145
|
-
|
|
146
|
-
summary_rows.append(
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
153
|
+
last_action_type = t.value if hasattr(t, "value") else t
|
|
154
|
+
|
|
155
|
+
summary_rows.append(
|
|
156
|
+
{
|
|
157
|
+
"episode_id": episode.episode_id,
|
|
158
|
+
"instruction": episode.instruction,
|
|
159
|
+
"task_id": getattr(episode, "task_id", None),
|
|
160
|
+
"step_count": len(episode.steps),
|
|
161
|
+
"duration": duration,
|
|
162
|
+
"success": getattr(episode, "success", None),
|
|
163
|
+
"first_action_type": first_action_type,
|
|
164
|
+
"last_action_type": last_action_type,
|
|
165
|
+
"metadata": (
|
|
166
|
+
json.dumps(episode.metadata)
|
|
167
|
+
if hasattr(episode, "metadata") and episode.metadata
|
|
168
|
+
else None
|
|
169
|
+
),
|
|
170
|
+
}
|
|
171
|
+
)
|
|
161
172
|
|
|
162
173
|
summary_table = pa.Table.from_pylist(summary_rows)
|
|
163
174
|
summary_path = str(output_path).replace(".parquet", "_summary.parquet")
|
|
@@ -255,7 +266,8 @@ def from_parquet(parquet_path: str) -> list[Episode]:
|
|
|
255
266
|
|
|
256
267
|
episode = Episode(
|
|
257
268
|
episode_id=str(episode_id),
|
|
258
|
-
instruction=group.iloc[0].get("instruction")
|
|
269
|
+
instruction=group.iloc[0].get("instruction")
|
|
270
|
+
or group.iloc[0].get("goal", ""),
|
|
259
271
|
steps=steps,
|
|
260
272
|
task_id=group.iloc[0].get("task_id"),
|
|
261
273
|
metadata=metadata,
|
|
@@ -20,7 +20,7 @@ from openadapt_ml.config import settings
|
|
|
20
20
|
from openadapt_ml.grounding.base import GroundingModule, RegionCandidate
|
|
21
21
|
|
|
22
22
|
if TYPE_CHECKING:
|
|
23
|
-
from PIL import Image
|
|
23
|
+
from PIL import Image
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class GeminiGrounder(GroundingModule):
|
|
@@ -104,7 +104,7 @@ class GeminiGrounder(GroundingModule):
|
|
|
104
104
|
|
|
105
105
|
# Try to parse JSON from the response
|
|
106
106
|
# Look for JSON array or object in the response
|
|
107
|
-
json_match = re.search(r
|
|
107
|
+
json_match = re.search(r"\[[\s\S]*\]|\{[\s\S]*\}", response_text)
|
|
108
108
|
if not json_match:
|
|
109
109
|
return candidates
|
|
110
110
|
|
|
@@ -340,11 +340,11 @@ Example output format:
|
|
|
340
340
|
response_text = response.text
|
|
341
341
|
|
|
342
342
|
# Try to extract JSON array from response
|
|
343
|
-
json_match = re.search(r
|
|
343
|
+
json_match = re.search(r"\[[\s\S]*\]", response_text)
|
|
344
344
|
if not json_match:
|
|
345
345
|
# Maybe it's just a plain array
|
|
346
|
-
if response_text.strip().startswith(
|
|
347
|
-
json_match = re.match(r
|
|
346
|
+
if response_text.strip().startswith("["):
|
|
347
|
+
json_match = re.match(r".*", response_text)
|
|
348
348
|
else:
|
|
349
349
|
return []
|
|
350
350
|
|
|
@@ -369,13 +369,18 @@ Example output format:
|
|
|
369
369
|
max(0, min(1, y2 / screenshot.height)),
|
|
370
370
|
]
|
|
371
371
|
|
|
372
|
-
normalized_elements.append(
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
372
|
+
normalized_elements.append(
|
|
373
|
+
{
|
|
374
|
+
"id": elem.get("id", len(normalized_elements) + 1),
|
|
375
|
+
"label": elem.get(
|
|
376
|
+
"label",
|
|
377
|
+
f"Element {elem.get('id', len(normalized_elements) + 1)}",
|
|
378
|
+
),
|
|
379
|
+
"bbox": norm_bbox,
|
|
380
|
+
"type": elem.get("type", "other"),
|
|
381
|
+
"text": elem.get("text", ""),
|
|
382
|
+
}
|
|
383
|
+
)
|
|
379
384
|
|
|
380
385
|
return normalized_elements
|
|
381
386
|
|
|
@@ -549,8 +554,7 @@ class DetectorGrounder(GroundingModule):
|
|
|
549
554
|
self._backend = GeminiGrounder(**kwargs)
|
|
550
555
|
elif backend == "omniparser":
|
|
551
556
|
raise NotImplementedError(
|
|
552
|
-
"OmniParser backend not yet implemented. "
|
|
553
|
-
"Use backend='gemini' for now."
|
|
557
|
+
"OmniParser backend not yet implemented. Use backend='gemini' for now."
|
|
554
558
|
)
|
|
555
559
|
else:
|
|
556
560
|
raise ValueError(f"Unknown backend: {backend}")
|
openadapt_ml/ingest/__init__.py
CHANGED
|
@@ -27,16 +27,18 @@ __all__ = [
|
|
|
27
27
|
|
|
28
28
|
# Conditionally export capture functions if openadapt-capture is installed
|
|
29
29
|
try:
|
|
30
|
-
from openadapt_ml.ingest.capture import (
|
|
30
|
+
from openadapt_ml.ingest.capture import ( # noqa: F401
|
|
31
31
|
capture_to_episode,
|
|
32
32
|
capture_to_session,
|
|
33
33
|
load_captures_as_sessions,
|
|
34
34
|
)
|
|
35
35
|
|
|
36
|
-
__all__.extend(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
36
|
+
__all__.extend(
|
|
37
|
+
[
|
|
38
|
+
"capture_to_episode",
|
|
39
|
+
"capture_to_session",
|
|
40
|
+
"load_captures_as_sessions",
|
|
41
|
+
]
|
|
42
|
+
)
|
|
41
43
|
except ImportError:
|
|
42
44
|
pass
|
openadapt_ml/ingest/capture.py
CHANGED
|
@@ -6,7 +6,6 @@ and convert them to the Episode/Step format used by openadapt-ml for training.
|
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
|
-
import uuid
|
|
10
9
|
from pathlib import Path
|
|
11
10
|
from typing import TYPE_CHECKING
|
|
12
11
|
|
|
@@ -101,7 +100,7 @@ def capture_to_episode(
|
|
|
101
100
|
"""
|
|
102
101
|
try:
|
|
103
102
|
from openadapt_capture import Capture
|
|
104
|
-
from openadapt_capture.events import (
|
|
103
|
+
from openadapt_capture.events import ( # noqa: F401
|
|
105
104
|
EventType,
|
|
106
105
|
KeyTypeEvent,
|
|
107
106
|
MouseClickEvent,
|
|
@@ -135,7 +134,9 @@ def capture_to_episode(
|
|
|
135
134
|
dir_name = capture_path.name
|
|
136
135
|
if dir_name and dir_name != "capture":
|
|
137
136
|
# Convert kebab-case/snake_case to readable text
|
|
138
|
-
instruction =
|
|
137
|
+
instruction = (
|
|
138
|
+
dir_name.replace("-", " ").replace("_", " ").strip().capitalize()
|
|
139
|
+
)
|
|
139
140
|
else:
|
|
140
141
|
instruction = "Complete the recorded workflow"
|
|
141
142
|
|
|
@@ -155,9 +156,7 @@ def capture_to_episode(
|
|
|
155
156
|
screenshot_path = _save_screenshot(screenshot, output_dir, episode_id, idx)
|
|
156
157
|
|
|
157
158
|
# Normalize coordinates
|
|
158
|
-
norm_coords = _normalize_coords(
|
|
159
|
-
action.x, action.y, screen_width, screen_height
|
|
160
|
-
)
|
|
159
|
+
norm_coords = _normalize_coords(action.x, action.y, screen_width, screen_height)
|
|
161
160
|
|
|
162
161
|
# Map event type to openadapt-ml ActionType
|
|
163
162
|
event_type = action.type
|
|
@@ -174,15 +173,15 @@ def capture_to_episode(
|
|
|
174
173
|
if isinstance(action.event, MouseDragEvent):
|
|
175
174
|
end_x = action.event.x + action.event.dx
|
|
176
175
|
end_y = action.event.y + action.event.dy
|
|
177
|
-
norm_end = _normalize_coords(
|
|
178
|
-
|
|
176
|
+
norm_end = _normalize_coords(end_x, end_y, screen_width, screen_height)
|
|
177
|
+
ml_action = ml_action.model_copy(
|
|
178
|
+
update={
|
|
179
|
+
"normalized_end": norm_end,
|
|
180
|
+
"raw": {
|
|
181
|
+
"button": action.event.button,
|
|
182
|
+
},
|
|
183
|
+
}
|
|
179
184
|
)
|
|
180
|
-
ml_action = ml_action.model_copy(update={
|
|
181
|
-
"normalized_end": norm_end,
|
|
182
|
-
"raw": {
|
|
183
|
-
"button": action.event.button,
|
|
184
|
-
},
|
|
185
|
-
})
|
|
186
185
|
|
|
187
186
|
# Handle scroll events
|
|
188
187
|
if isinstance(action.event, MouseScrollEvent):
|
|
@@ -197,13 +196,15 @@ def capture_to_episode(
|
|
|
197
196
|
elif action.event.dx < 0:
|
|
198
197
|
scroll_direction = "left"
|
|
199
198
|
|
|
200
|
-
ml_action = ml_action.model_copy(
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
"
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
199
|
+
ml_action = ml_action.model_copy(
|
|
200
|
+
update={
|
|
201
|
+
"scroll_direction": scroll_direction,
|
|
202
|
+
"raw": {
|
|
203
|
+
"dx": action.event.dx,
|
|
204
|
+
"dy": action.event.dy,
|
|
205
|
+
},
|
|
206
|
+
}
|
|
207
|
+
)
|
|
207
208
|
|
|
208
209
|
# Handle keyboard events - include key names for special keys
|
|
209
210
|
if action.keys:
|
|
@@ -227,7 +228,9 @@ def capture_to_episode(
|
|
|
227
228
|
last_step = steps[-1]
|
|
228
229
|
done_step = Step(
|
|
229
230
|
step_index=len(steps),
|
|
230
|
-
observation=Observation(
|
|
231
|
+
observation=Observation(
|
|
232
|
+
screenshot_path=last_step.observation.screenshot_path
|
|
233
|
+
),
|
|
231
234
|
action=Action(type=ActionType.DONE),
|
|
232
235
|
reasoning="Workflow complete.",
|
|
233
236
|
timestamp=(last_step.timestamp or 0) + 0.1,
|
openadapt_ml/ingest/loader.py
CHANGED
|
@@ -8,9 +8,8 @@ from __future__ import annotations
|
|
|
8
8
|
|
|
9
9
|
import json
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from typing import Any, Dict, List,
|
|
11
|
+
from typing import Any, Dict, List, Union
|
|
12
12
|
|
|
13
|
-
from pydantic import ValidationError
|
|
14
13
|
|
|
15
14
|
from openadapt_ml.schema import Action, ActionType, Episode, Observation, Step
|
|
16
15
|
|
|
@@ -143,7 +142,8 @@ def _dict_to_episode(data: Dict[str, Any], validate: bool = True) -> Episode:
|
|
|
143
142
|
# Parse observation
|
|
144
143
|
obs_data = step_data.get("observation", {})
|
|
145
144
|
observation = Observation(
|
|
146
|
-
screenshot_path=obs_data.get("screenshot_path")
|
|
145
|
+
screenshot_path=obs_data.get("screenshot_path")
|
|
146
|
+
or obs_data.get("image_path"),
|
|
147
147
|
raw=obs_data.get("raw") or obs_data.get("meta"),
|
|
148
148
|
a11y_tree=obs_data.get("a11y_tree") or obs_data.get("accessibility_tree"),
|
|
149
149
|
dom=obs_data.get("dom") or obs_data.get("dom_html"),
|
|
@@ -169,7 +169,10 @@ def _dict_to_episode(data: Dict[str, Any], validate: bool = True) -> Episode:
|
|
|
169
169
|
normalized_end = None
|
|
170
170
|
if action_data.get("normalized_end"):
|
|
171
171
|
normalized_end = tuple(action_data["normalized_end"])
|
|
172
|
-
elif
|
|
172
|
+
elif (
|
|
173
|
+
action_data.get("end_x") is not None
|
|
174
|
+
and action_data.get("end_y") is not None
|
|
175
|
+
):
|
|
173
176
|
normalized_end = (action_data["end_x"], action_data["end_y"])
|
|
174
177
|
|
|
175
178
|
action = Action(
|