openadapt-ml 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openadapt_ml/benchmarks/__init__.py +8 -0
- openadapt_ml/benchmarks/agent.py +90 -11
- openadapt_ml/benchmarks/azure.py +35 -6
- openadapt_ml/benchmarks/cli.py +4449 -201
- openadapt_ml/benchmarks/live_tracker.py +180 -0
- openadapt_ml/benchmarks/runner.py +41 -4
- openadapt_ml/benchmarks/viewer.py +1219 -0
- openadapt_ml/benchmarks/vm_monitor.py +610 -0
- openadapt_ml/benchmarks/waa.py +61 -4
- openadapt_ml/benchmarks/waa_deploy/Dockerfile +222 -0
- openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
- openadapt_ml/benchmarks/waa_deploy/api_agent.py +539 -0
- openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
- openadapt_ml/benchmarks/waa_live.py +619 -0
- openadapt_ml/cloud/local.py +1555 -1
- openadapt_ml/cloud/ssh_tunnel.py +553 -0
- openadapt_ml/datasets/next_action.py +87 -68
- openadapt_ml/evals/grounding.py +26 -8
- openadapt_ml/evals/trajectory_matching.py +84 -36
- openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
- openadapt_ml/experiments/demo_prompt/format_demo.py +226 -0
- openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
- openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
- openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
- openadapt_ml/experiments/demo_prompt/run_experiment.py +531 -0
- openadapt_ml/experiments/waa_demo/__init__.py +10 -0
- openadapt_ml/experiments/waa_demo/demos.py +357 -0
- openadapt_ml/experiments/waa_demo/runner.py +717 -0
- openadapt_ml/experiments/waa_demo/tasks.py +151 -0
- openadapt_ml/export/__init__.py +9 -0
- openadapt_ml/export/__main__.py +6 -0
- openadapt_ml/export/cli.py +89 -0
- openadapt_ml/export/parquet.py +265 -0
- openadapt_ml/ingest/__init__.py +3 -4
- openadapt_ml/ingest/capture.py +89 -81
- openadapt_ml/ingest/loader.py +116 -68
- openadapt_ml/ingest/synthetic.py +221 -159
- openadapt_ml/retrieval/README.md +226 -0
- openadapt_ml/retrieval/USAGE.md +391 -0
- openadapt_ml/retrieval/__init__.py +91 -0
- openadapt_ml/retrieval/demo_retriever.py +817 -0
- openadapt_ml/retrieval/embeddings.py +629 -0
- openadapt_ml/retrieval/index.py +194 -0
- openadapt_ml/retrieval/retriever.py +160 -0
- openadapt_ml/runtime/policy.py +10 -10
- openadapt_ml/schema/__init__.py +104 -0
- openadapt_ml/schema/converters.py +541 -0
- openadapt_ml/schema/episode.py +457 -0
- openadapt_ml/scripts/compare.py +26 -16
- openadapt_ml/scripts/eval_policy.py +4 -5
- openadapt_ml/scripts/prepare_synthetic.py +14 -17
- openadapt_ml/scripts/train.py +81 -70
- openadapt_ml/training/benchmark_viewer.py +3225 -0
- openadapt_ml/training/trainer.py +120 -363
- openadapt_ml/training/trl_trainer.py +354 -0
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/METADATA +102 -60
- openadapt_ml-0.2.0.dist-info/RECORD +86 -0
- openadapt_ml/schemas/__init__.py +0 -53
- openadapt_ml/schemas/sessions.py +0 -122
- openadapt_ml/schemas/validation.py +0 -252
- openadapt_ml-0.1.0.dist-info/RECORD +0 -55
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/WHEEL +0 -0
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""Demo index for storing and retrieving demonstrations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
from openadapt_ml.retrieval.embeddings import TextEmbedder
|
|
9
|
+
from openadapt_ml.schema import Episode
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class DemoMetadata:
|
|
14
|
+
"""Metadata for a single demonstration.
|
|
15
|
+
|
|
16
|
+
Stores both the episode and computed features for retrieval.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
episode: Episode
|
|
20
|
+
app_name: Optional[str] = None
|
|
21
|
+
domain: Optional[str] = None
|
|
22
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
23
|
+
|
|
24
|
+
# Computed at index time
|
|
25
|
+
text_embedding: Dict[str, float] = field(default_factory=dict)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DemoIndex:
|
|
29
|
+
"""Index for demonstrations.
|
|
30
|
+
|
|
31
|
+
Stores episodes with their metadata and embeddings for efficient retrieval.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self) -> None:
|
|
35
|
+
"""Initialize the demo index."""
|
|
36
|
+
self.demos: List[DemoMetadata] = []
|
|
37
|
+
self.embedder = TextEmbedder()
|
|
38
|
+
self._is_fitted = False
|
|
39
|
+
|
|
40
|
+
def _extract_app_name(self, episode: Episode) -> Optional[str]:
|
|
41
|
+
"""Extract app name from episode steps.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
episode: Episode to extract from.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
App name if found, None otherwise.
|
|
48
|
+
"""
|
|
49
|
+
# Look through observations to find app_name
|
|
50
|
+
for step in episode.steps:
|
|
51
|
+
if step.observation and step.observation.app_name:
|
|
52
|
+
return step.observation.app_name
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
def _extract_domain(self, episode: Episode) -> Optional[str]:
|
|
56
|
+
"""Extract domain from episode metadata or URL.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
episode: Episode to extract from.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Domain if found, None otherwise.
|
|
63
|
+
"""
|
|
64
|
+
# Try to extract from URL in observations
|
|
65
|
+
for step in episode.steps:
|
|
66
|
+
if step.observation and step.observation.url:
|
|
67
|
+
url = step.observation.url
|
|
68
|
+
# Simple domain extraction (e.g., "github.com" from "https://github.com/...")
|
|
69
|
+
if "://" in url:
|
|
70
|
+
domain = url.split("://")[1].split("/")[0]
|
|
71
|
+
# Remove www. prefix
|
|
72
|
+
if domain.startswith("www."):
|
|
73
|
+
domain = domain[4:]
|
|
74
|
+
return domain
|
|
75
|
+
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
def add(
|
|
79
|
+
self,
|
|
80
|
+
episode: Episode,
|
|
81
|
+
app_name: Optional[str] = None,
|
|
82
|
+
domain: Optional[str] = None,
|
|
83
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
84
|
+
) -> None:
|
|
85
|
+
"""Add an episode to the index.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
episode: Episode to add.
|
|
89
|
+
app_name: Optional app name (auto-extracted if not provided).
|
|
90
|
+
domain: Optional domain (auto-extracted if not provided).
|
|
91
|
+
metadata: Additional metadata for the episode.
|
|
92
|
+
"""
|
|
93
|
+
# Auto-extract app_name and domain if not provided
|
|
94
|
+
if app_name is None:
|
|
95
|
+
app_name = self._extract_app_name(episode)
|
|
96
|
+
if domain is None:
|
|
97
|
+
domain = self._extract_domain(episode)
|
|
98
|
+
|
|
99
|
+
demo_meta = DemoMetadata(
|
|
100
|
+
episode=episode,
|
|
101
|
+
app_name=app_name,
|
|
102
|
+
domain=domain,
|
|
103
|
+
metadata=metadata or {},
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
self.demos.append(demo_meta)
|
|
107
|
+
# Mark as not fitted since we added new data
|
|
108
|
+
self._is_fitted = False
|
|
109
|
+
|
|
110
|
+
def add_many(self, episodes: List[Episode]) -> None:
|
|
111
|
+
"""Add multiple episodes to the index.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
episodes: List of episodes to add.
|
|
115
|
+
"""
|
|
116
|
+
for episode in episodes:
|
|
117
|
+
self.add(episode)
|
|
118
|
+
|
|
119
|
+
def build(self) -> None:
|
|
120
|
+
"""Build the index by computing embeddings.
|
|
121
|
+
|
|
122
|
+
This must be called after adding all demos and before retrieval.
|
|
123
|
+
"""
|
|
124
|
+
if not self.demos:
|
|
125
|
+
return
|
|
126
|
+
|
|
127
|
+
# Fit embedder on all instruction texts
|
|
128
|
+
instruction_texts = [demo.episode.instruction for demo in self.demos]
|
|
129
|
+
self.embedder.fit(instruction_texts)
|
|
130
|
+
|
|
131
|
+
# Compute embeddings for each demo
|
|
132
|
+
for demo in self.demos:
|
|
133
|
+
demo.text_embedding = self.embedder.embed(demo.episode.instruction)
|
|
134
|
+
|
|
135
|
+
self._is_fitted = True
|
|
136
|
+
|
|
137
|
+
def is_empty(self) -> bool:
|
|
138
|
+
"""Check if the index is empty.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
True if no demos have been added.
|
|
142
|
+
"""
|
|
143
|
+
return len(self.demos) == 0
|
|
144
|
+
|
|
145
|
+
def is_fitted(self) -> bool:
|
|
146
|
+
"""Check if the index has been built.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
True if build() has been called.
|
|
150
|
+
"""
|
|
151
|
+
return self._is_fitted
|
|
152
|
+
|
|
153
|
+
def get_all_demos(self) -> List[DemoMetadata]:
|
|
154
|
+
"""Get all demos in the index.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
List of all DemoMetadata objects.
|
|
158
|
+
"""
|
|
159
|
+
return self.demos
|
|
160
|
+
|
|
161
|
+
def get_apps(self) -> List[str]:
|
|
162
|
+
"""Get list of unique app names in the index.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
List of app names (excluding None).
|
|
166
|
+
"""
|
|
167
|
+
apps = {demo.app_name for demo in self.demos if demo.app_name is not None}
|
|
168
|
+
return sorted(apps)
|
|
169
|
+
|
|
170
|
+
def get_domains(self) -> List[str]:
|
|
171
|
+
"""Get list of unique domains in the index.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
List of domains (excluding None).
|
|
175
|
+
"""
|
|
176
|
+
domains = {demo.domain for demo in self.demos if demo.domain is not None}
|
|
177
|
+
return sorted(domains)
|
|
178
|
+
|
|
179
|
+
def __len__(self) -> int:
|
|
180
|
+
"""Return number of demos in the index.
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
Number of demos.
|
|
184
|
+
"""
|
|
185
|
+
return len(self.demos)
|
|
186
|
+
|
|
187
|
+
def __repr__(self) -> str:
|
|
188
|
+
"""String representation of the index.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
String representation.
|
|
192
|
+
"""
|
|
193
|
+
status = "fitted" if self._is_fitted else "not fitted"
|
|
194
|
+
return f"DemoIndex({len(self.demos)} demos, {status})"
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""Demo retriever for finding similar demonstrations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import List, Optional
|
|
7
|
+
|
|
8
|
+
from openadapt_ml.retrieval.index import DemoIndex, DemoMetadata
|
|
9
|
+
from openadapt_ml.schema import Episode
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class RetrievalResult:
|
|
14
|
+
"""A single retrieval result with score.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
demo: The demo metadata.
|
|
18
|
+
score: Retrieval score (higher is better).
|
|
19
|
+
text_score: Text similarity component.
|
|
20
|
+
domain_bonus: Domain match bonus applied.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
demo: DemoMetadata
|
|
24
|
+
score: float
|
|
25
|
+
text_score: float
|
|
26
|
+
domain_bonus: float
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DemoRetriever:
|
|
30
|
+
"""Retrieves top-K similar demonstrations from an index.
|
|
31
|
+
|
|
32
|
+
Uses text similarity (TF-IDF cosine) with optional domain match bonus.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
index: DemoIndex,
|
|
38
|
+
domain_bonus: float = 0.2,
|
|
39
|
+
) -> None:
|
|
40
|
+
"""Initialize the retriever.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
index: DemoIndex to retrieve from.
|
|
44
|
+
domain_bonus: Bonus score for domain match (default: 0.2).
|
|
45
|
+
|
|
46
|
+
Raises:
|
|
47
|
+
ValueError: If index is empty or not fitted.
|
|
48
|
+
"""
|
|
49
|
+
if index.is_empty():
|
|
50
|
+
raise ValueError("Cannot create retriever from empty index")
|
|
51
|
+
if not index.is_fitted():
|
|
52
|
+
raise ValueError("Index must be built before retrieval (call index.build())")
|
|
53
|
+
|
|
54
|
+
self.index = index
|
|
55
|
+
self.domain_bonus = domain_bonus
|
|
56
|
+
|
|
57
|
+
def _compute_score(
|
|
58
|
+
self,
|
|
59
|
+
task: str,
|
|
60
|
+
demo: DemoMetadata,
|
|
61
|
+
app_context: Optional[str] = None,
|
|
62
|
+
) -> RetrievalResult:
|
|
63
|
+
"""Compute retrieval score for a demo.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
task: Task description to match against.
|
|
67
|
+
demo: Demo metadata to score.
|
|
68
|
+
app_context: Optional app/domain context for bonus.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
RetrievalResult with computed scores.
|
|
72
|
+
"""
|
|
73
|
+
# Text similarity using TF-IDF
|
|
74
|
+
query_embedding = self.index.embedder.embed(task)
|
|
75
|
+
text_score = self.index.embedder.cosine_similarity(
|
|
76
|
+
query_embedding,
|
|
77
|
+
demo.text_embedding,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Domain match bonus
|
|
81
|
+
bonus = 0.0
|
|
82
|
+
if app_context is not None:
|
|
83
|
+
# Check if app_context matches app_name or domain
|
|
84
|
+
app_match = demo.app_name and app_context.lower() in demo.app_name.lower()
|
|
85
|
+
domain_match = demo.domain and app_context.lower() in demo.domain.lower()
|
|
86
|
+
|
|
87
|
+
if app_match or domain_match:
|
|
88
|
+
bonus = self.domain_bonus
|
|
89
|
+
|
|
90
|
+
# Final score is text similarity + bonus
|
|
91
|
+
total_score = text_score + bonus
|
|
92
|
+
|
|
93
|
+
return RetrievalResult(
|
|
94
|
+
demo=demo,
|
|
95
|
+
score=total_score,
|
|
96
|
+
text_score=text_score,
|
|
97
|
+
domain_bonus=bonus,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
def retrieve(
|
|
101
|
+
self,
|
|
102
|
+
task: str,
|
|
103
|
+
app_context: Optional[str] = None,
|
|
104
|
+
top_k: int = 3,
|
|
105
|
+
) -> List[Episode]:
|
|
106
|
+
"""Retrieve top-K most similar demos.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
task: Task description to find demos for.
|
|
110
|
+
app_context: Optional app/domain context (e.g., "Chrome", "github.com").
|
|
111
|
+
top_k: Number of demos to retrieve.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
List of Episode objects, ordered by relevance (most similar first).
|
|
115
|
+
"""
|
|
116
|
+
if self.index.is_empty():
|
|
117
|
+
return []
|
|
118
|
+
|
|
119
|
+
# Score all demos
|
|
120
|
+
results = [
|
|
121
|
+
self._compute_score(task, demo, app_context)
|
|
122
|
+
for demo in self.index.get_all_demos()
|
|
123
|
+
]
|
|
124
|
+
|
|
125
|
+
# Sort by score (descending)
|
|
126
|
+
results.sort(key=lambda r: r.score, reverse=True)
|
|
127
|
+
|
|
128
|
+
# Return top-K episodes
|
|
129
|
+
top_results = results[:top_k]
|
|
130
|
+
return [r.demo.episode for r in top_results]
|
|
131
|
+
|
|
132
|
+
def retrieve_with_scores(
|
|
133
|
+
self,
|
|
134
|
+
task: str,
|
|
135
|
+
app_context: Optional[str] = None,
|
|
136
|
+
top_k: int = 3,
|
|
137
|
+
) -> List[RetrievalResult]:
|
|
138
|
+
"""Retrieve top-K demos with their scores.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
task: Task description to find demos for.
|
|
142
|
+
app_context: Optional app/domain context.
|
|
143
|
+
top_k: Number of demos to retrieve.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
List of RetrievalResult objects with scores.
|
|
147
|
+
"""
|
|
148
|
+
if self.index.is_empty():
|
|
149
|
+
return []
|
|
150
|
+
|
|
151
|
+
# Score all demos
|
|
152
|
+
results = [
|
|
153
|
+
self._compute_score(task, demo, app_context)
|
|
154
|
+
for demo in self.index.get_all_demos()
|
|
155
|
+
]
|
|
156
|
+
|
|
157
|
+
# Sort by score (descending)
|
|
158
|
+
results.sort(key=lambda r: r.score, reverse=True)
|
|
159
|
+
|
|
160
|
+
return results[:top_k]
|
openadapt_ml/runtime/policy.py
CHANGED
|
@@ -8,11 +8,11 @@ from typing import Any, Dict, List, Optional, Tuple
|
|
|
8
8
|
from PIL import Image
|
|
9
9
|
|
|
10
10
|
from openadapt_ml.models.base_adapter import BaseVLMAdapter
|
|
11
|
-
from openadapt_ml.
|
|
11
|
+
from openadapt_ml.schema import Action, ActionType, UIElement
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
# Coordinate-based DSL patterns
|
|
15
|
-
_CLICK_RE = re.compile(r"CLICK\(x=([0-9]*\.?[0-9]+),\s*y=([0-9]*\.?[0-9]+)\)")
|
|
15
|
+
_CLICK_RE = re.compile(r"CLICK\(x=(-?[0-9]*\.?[0-9]+),\s*y=(-?[0-9]*\.?[0-9]+)\)")
|
|
16
16
|
_TYPE_RE = re.compile(r'TYPE\(text="([^"\\]*(?:\\.[^"\\]*)*)"\)')
|
|
17
17
|
_WAIT_RE = re.compile(r"\bWAIT\s*\(\s*\)")
|
|
18
18
|
_DONE_RE = re.compile(r"\bDONE\s*\(\s*\)")
|
|
@@ -119,7 +119,7 @@ class AgentPolicy:
|
|
|
119
119
|
m = _CLICK_SOM_RE.search(text)
|
|
120
120
|
if m:
|
|
121
121
|
idx = int(m.group(1))
|
|
122
|
-
return Action(type=
|
|
122
|
+
return Action(type=ActionType.CLICK, element=UIElement(element_id=str(idx)))
|
|
123
123
|
|
|
124
124
|
# TYPE([N], "text")
|
|
125
125
|
m = _TYPE_SOM_RE.search(text)
|
|
@@ -127,14 +127,14 @@ class AgentPolicy:
|
|
|
127
127
|
idx = int(m.group(1))
|
|
128
128
|
raw_text = m.group(2)
|
|
129
129
|
unescaped = raw_text.replace('\\"', '"').replace("\\\\", "\\")
|
|
130
|
-
return Action(type=
|
|
130
|
+
return Action(type=ActionType.TYPE, text=unescaped, element=UIElement(element_id=str(idx)))
|
|
131
131
|
|
|
132
132
|
# TYPE("text") - SoM style without index
|
|
133
133
|
m = _TYPE_SOM_SIMPLE_RE.search(text)
|
|
134
134
|
if m:
|
|
135
135
|
raw_text = m.group(1)
|
|
136
136
|
unescaped = raw_text.replace('\\"', '"').replace("\\\\", "\\")
|
|
137
|
-
return Action(type=
|
|
137
|
+
return Action(type=ActionType.TYPE, text=unescaped)
|
|
138
138
|
|
|
139
139
|
# Coordinate-based patterns
|
|
140
140
|
# CLICK(x=..., y=...)
|
|
@@ -145,7 +145,7 @@ class AgentPolicy:
|
|
|
145
145
|
# Clamp to [0, 1]
|
|
146
146
|
x = max(0.0, min(1.0, x))
|
|
147
147
|
y = max(0.0, min(1.0, y))
|
|
148
|
-
return Action(type=
|
|
148
|
+
return Action(type=ActionType.CLICK, normalized_coordinates=(x, y))
|
|
149
149
|
|
|
150
150
|
# TYPE(text="...")
|
|
151
151
|
m = _TYPE_RE.search(text)
|
|
@@ -153,18 +153,18 @@ class AgentPolicy:
|
|
|
153
153
|
# Unescape the text content
|
|
154
154
|
raw_text = m.group(1)
|
|
155
155
|
unescaped = raw_text.replace('\\"', '"').replace("\\\\", "\\")
|
|
156
|
-
return Action(type=
|
|
156
|
+
return Action(type=ActionType.TYPE, text=unescaped)
|
|
157
157
|
|
|
158
158
|
# WAIT()
|
|
159
159
|
if _WAIT_RE.search(text):
|
|
160
|
-
return Action(type=
|
|
160
|
+
return Action(type=ActionType.WAIT)
|
|
161
161
|
|
|
162
162
|
# DONE()
|
|
163
163
|
if _DONE_RE.search(text):
|
|
164
|
-
return Action(type=
|
|
164
|
+
return Action(type=ActionType.DONE)
|
|
165
165
|
|
|
166
166
|
# Fallback
|
|
167
|
-
return Action(type=
|
|
167
|
+
return Action(type=ActionType.FAIL, raw={"text": text})
|
|
168
168
|
|
|
169
169
|
def predict_action_from_sample(
|
|
170
170
|
self, sample: Dict[str, Any], max_new_tokens: int = 150
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Episode Schema - Canonical format for GUI trajectory data.
|
|
3
|
+
|
|
4
|
+
A standardized contract for representing GUI automation episodes, enabling
|
|
5
|
+
interoperability across training pipelines, benchmarks, and recording tools.
|
|
6
|
+
|
|
7
|
+
Installation:
|
|
8
|
+
pip install openadapt-ml
|
|
9
|
+
# or: uv add openadapt-ml
|
|
10
|
+
|
|
11
|
+
Basic Usage:
|
|
12
|
+
from openadapt_ml.schema import Episode, Step, Action, Observation, ActionType
|
|
13
|
+
|
|
14
|
+
# Create an episode
|
|
15
|
+
episode = Episode(
|
|
16
|
+
episode_id="demo_001",
|
|
17
|
+
instruction="Open Notepad and type Hello World",
|
|
18
|
+
steps=[
|
|
19
|
+
Step(
|
|
20
|
+
step_index=0,
|
|
21
|
+
observation=Observation(screenshot_path="step_0.png"),
|
|
22
|
+
action=Action(type=ActionType.CLICK, coordinates={"x": 100, "y": 200}),
|
|
23
|
+
),
|
|
24
|
+
Step(
|
|
25
|
+
step_index=1,
|
|
26
|
+
observation=Observation(screenshot_path="step_1.png"),
|
|
27
|
+
action=Action(type=ActionType.TYPE, text="Hello World"),
|
|
28
|
+
),
|
|
29
|
+
],
|
|
30
|
+
success=True,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Save/load JSON
|
|
34
|
+
save_episode(episode, "episode.json")
|
|
35
|
+
episode = load_episode("episode.json")
|
|
36
|
+
|
|
37
|
+
# Validate external data
|
|
38
|
+
is_valid, error = validate_episode({"episode_id": "x", ...})
|
|
39
|
+
|
|
40
|
+
Coordinate Systems:
|
|
41
|
+
# Pixel coordinates (absolute)
|
|
42
|
+
Action(type=ActionType.CLICK, coordinates={"x": 512, "y": 384})
|
|
43
|
+
|
|
44
|
+
# Normalized coordinates (0.0-1.0, resolution-independent)
|
|
45
|
+
Action(type=ActionType.CLICK, normalized_coordinates=(0.5, 0.375))
|
|
46
|
+
|
|
47
|
+
# Both can coexist - use whichever fits your pipeline
|
|
48
|
+
|
|
49
|
+
Converting from Other Formats:
|
|
50
|
+
from openadapt_ml.schema.converters import from_waa_trajectory
|
|
51
|
+
|
|
52
|
+
# Convert Windows Agent Arena format
|
|
53
|
+
episode = from_waa_trajectory(trajectory_list, task_info_dict)
|
|
54
|
+
|
|
55
|
+
# Convert back
|
|
56
|
+
trajectory, task_info = to_waa_trajectory(episode)
|
|
57
|
+
|
|
58
|
+
JSON Schema Export:
|
|
59
|
+
# For external validation tools (e.g., JSON Schema validators, TypeScript codegen)
|
|
60
|
+
export_json_schema("episode.schema.json")
|
|
61
|
+
|
|
62
|
+
See Also:
|
|
63
|
+
- docs/schema/episode.schema.json - Full JSON Schema
|
|
64
|
+
- openadapt_ml.schema.episode - Model definitions
|
|
65
|
+
- openadapt_ml.schema.converters - Format converters
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
from openadapt_ml.schema.episode import (
|
|
69
|
+
SCHEMA_VERSION,
|
|
70
|
+
Episode,
|
|
71
|
+
Step,
|
|
72
|
+
Action,
|
|
73
|
+
Observation,
|
|
74
|
+
ActionType,
|
|
75
|
+
BenchmarkSource,
|
|
76
|
+
Coordinates,
|
|
77
|
+
BoundingBox,
|
|
78
|
+
UIElement,
|
|
79
|
+
validate_episode,
|
|
80
|
+
load_episode,
|
|
81
|
+
save_episode,
|
|
82
|
+
export_json_schema,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
__all__ = [
|
|
86
|
+
# Version
|
|
87
|
+
"SCHEMA_VERSION",
|
|
88
|
+
# Core models
|
|
89
|
+
"Episode",
|
|
90
|
+
"Step",
|
|
91
|
+
"Action",
|
|
92
|
+
"Observation",
|
|
93
|
+
# Supporting models
|
|
94
|
+
"ActionType",
|
|
95
|
+
"BenchmarkSource",
|
|
96
|
+
"Coordinates",
|
|
97
|
+
"BoundingBox",
|
|
98
|
+
"UIElement",
|
|
99
|
+
# Utilities
|
|
100
|
+
"validate_episode",
|
|
101
|
+
"load_episode",
|
|
102
|
+
"save_episode",
|
|
103
|
+
"export_json_schema",
|
|
104
|
+
]
|