openadapt-ml 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openadapt_ml/baselines/__init__.py +121 -0
- openadapt_ml/baselines/adapter.py +185 -0
- openadapt_ml/baselines/cli.py +314 -0
- openadapt_ml/baselines/config.py +448 -0
- openadapt_ml/baselines/parser.py +922 -0
- openadapt_ml/baselines/prompts.py +787 -0
- openadapt_ml/benchmarks/__init__.py +13 -115
- openadapt_ml/benchmarks/agent.py +265 -421
- openadapt_ml/benchmarks/azure.py +28 -19
- openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
- openadapt_ml/benchmarks/cli.py +1722 -4847
- openadapt_ml/benchmarks/trace_export.py +631 -0
- openadapt_ml/benchmarks/viewer.py +22 -5
- openadapt_ml/benchmarks/vm_monitor.py +530 -29
- openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
- openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
- openadapt_ml/cloud/azure_inference.py +3 -5
- openadapt_ml/cloud/lambda_labs.py +722 -307
- openadapt_ml/cloud/local.py +2038 -487
- openadapt_ml/cloud/ssh_tunnel.py +68 -26
- openadapt_ml/datasets/next_action.py +40 -30
- openadapt_ml/evals/grounding.py +8 -3
- openadapt_ml/evals/plot_eval_metrics.py +15 -13
- openadapt_ml/evals/trajectory_matching.py +41 -26
- openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
- openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
- openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
- openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
- openadapt_ml/experiments/representation_shootout/config.py +390 -0
- openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
- openadapt_ml/experiments/representation_shootout/runner.py +687 -0
- openadapt_ml/experiments/waa_demo/runner.py +29 -14
- openadapt_ml/export/parquet.py +36 -24
- openadapt_ml/grounding/detector.py +18 -14
- openadapt_ml/ingest/__init__.py +8 -6
- openadapt_ml/ingest/capture.py +25 -22
- openadapt_ml/ingest/loader.py +7 -4
- openadapt_ml/ingest/synthetic.py +189 -100
- openadapt_ml/models/api_adapter.py +14 -4
- openadapt_ml/models/base_adapter.py +10 -2
- openadapt_ml/models/providers/__init__.py +288 -0
- openadapt_ml/models/providers/anthropic.py +266 -0
- openadapt_ml/models/providers/base.py +299 -0
- openadapt_ml/models/providers/google.py +376 -0
- openadapt_ml/models/providers/openai.py +342 -0
- openadapt_ml/models/qwen_vl.py +46 -19
- openadapt_ml/perception/__init__.py +35 -0
- openadapt_ml/perception/integration.py +399 -0
- openadapt_ml/retrieval/demo_retriever.py +50 -24
- openadapt_ml/retrieval/embeddings.py +9 -8
- openadapt_ml/retrieval/retriever.py +3 -1
- openadapt_ml/runtime/__init__.py +50 -0
- openadapt_ml/runtime/policy.py +18 -5
- openadapt_ml/runtime/safety_gate.py +471 -0
- openadapt_ml/schema/__init__.py +9 -0
- openadapt_ml/schema/converters.py +74 -27
- openadapt_ml/schema/episode.py +31 -18
- openadapt_ml/scripts/capture_screenshots.py +530 -0
- openadapt_ml/scripts/compare.py +85 -54
- openadapt_ml/scripts/demo_policy.py +4 -1
- openadapt_ml/scripts/eval_policy.py +15 -9
- openadapt_ml/scripts/make_gif.py +1 -1
- openadapt_ml/scripts/prepare_synthetic.py +3 -1
- openadapt_ml/scripts/train.py +21 -9
- openadapt_ml/segmentation/README.md +920 -0
- openadapt_ml/segmentation/__init__.py +97 -0
- openadapt_ml/segmentation/adapters/__init__.py +5 -0
- openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
- openadapt_ml/segmentation/annotator.py +610 -0
- openadapt_ml/segmentation/cache.py +290 -0
- openadapt_ml/segmentation/cli.py +674 -0
- openadapt_ml/segmentation/deduplicator.py +656 -0
- openadapt_ml/segmentation/frame_describer.py +788 -0
- openadapt_ml/segmentation/pipeline.py +340 -0
- openadapt_ml/segmentation/schemas.py +622 -0
- openadapt_ml/segmentation/segment_extractor.py +634 -0
- openadapt_ml/training/azure_ops_viewer.py +1097 -0
- openadapt_ml/training/benchmark_viewer.py +52 -41
- openadapt_ml/training/shared_ui.py +7 -7
- openadapt_ml/training/stub_provider.py +57 -35
- openadapt_ml/training/trainer.py +143 -86
- openadapt_ml/training/trl_trainer.py +70 -21
- openadapt_ml/training/viewer.py +323 -108
- openadapt_ml/training/viewer_components.py +180 -0
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +215 -14
- openadapt_ml-0.2.1.dist-info/RECORD +116 -0
- openadapt_ml/benchmarks/base.py +0 -366
- openadapt_ml/benchmarks/data_collection.py +0 -432
- openadapt_ml/benchmarks/live_tracker.py +0 -180
- openadapt_ml/benchmarks/runner.py +0 -418
- openadapt_ml/benchmarks/waa.py +0 -761
- openadapt_ml/benchmarks/waa_live.py +0 -619
- openadapt_ml-0.2.0.dist-info/RECORD +0 -86
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
"""Caching utilities for segmentation pipeline.
|
|
2
|
+
|
|
3
|
+
This module provides caching functionality to avoid re-processing
|
|
4
|
+
recordings and to speed up iterative development.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import hashlib
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
import shutil
|
|
11
|
+
from datetime import datetime, timedelta
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, Optional, TypeVar
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
T = TypeVar("T")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CacheManager:
|
|
21
|
+
"""Manages cached artifacts for the segmentation pipeline.
|
|
22
|
+
|
|
23
|
+
Provides a simple file-based cache with optional TTL (time-to-live)
|
|
24
|
+
and size limits.
|
|
25
|
+
|
|
26
|
+
Example:
|
|
27
|
+
>>> cache = CacheManager()
|
|
28
|
+
>>> cache.set("key", {"data": "value"})
|
|
29
|
+
>>> data = cache.get("key")
|
|
30
|
+
>>> cache.clear()
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
cache_dir: Optional[Path] = None,
|
|
36
|
+
ttl_hours: Optional[int] = None,
|
|
37
|
+
max_size_mb: Optional[int] = None,
|
|
38
|
+
):
|
|
39
|
+
"""Initialize the cache manager.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
cache_dir: Directory for cache files. Defaults to ~/.openadapt/cache/segmentation
|
|
43
|
+
ttl_hours: Time-to-live in hours. None for no expiration.
|
|
44
|
+
max_size_mb: Maximum cache size in MB. None for no limit.
|
|
45
|
+
"""
|
|
46
|
+
self.cache_dir = (
|
|
47
|
+
cache_dir or Path.home() / ".openadapt" / "cache" / "segmentation"
|
|
48
|
+
)
|
|
49
|
+
self.ttl_hours = ttl_hours
|
|
50
|
+
self.max_size_mb = max_size_mb
|
|
51
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
52
|
+
|
|
53
|
+
def _key_to_path(self, key: str) -> Path:
|
|
54
|
+
"""Convert cache key to file path."""
|
|
55
|
+
# Hash long keys
|
|
56
|
+
if len(key) > 100:
|
|
57
|
+
key = hashlib.md5(key.encode()).hexdigest()
|
|
58
|
+
# Sanitize key for filesystem
|
|
59
|
+
safe_key = "".join(c if c.isalnum() or c in "-_." else "_" for c in key)
|
|
60
|
+
return self.cache_dir / f"{safe_key}.json"
|
|
61
|
+
|
|
62
|
+
def get(self, key: str) -> Optional[Any]:
|
|
63
|
+
"""Get a value from the cache.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
key: Cache key.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Cached value or None if not found/expired.
|
|
70
|
+
"""
|
|
71
|
+
path = self._key_to_path(key)
|
|
72
|
+
if not path.exists():
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
data = json.loads(path.read_text())
|
|
77
|
+
|
|
78
|
+
# Check TTL
|
|
79
|
+
if self.ttl_hours is not None:
|
|
80
|
+
cached_at = datetime.fromisoformat(data.get("_cached_at", "1970-01-01"))
|
|
81
|
+
if datetime.now() - cached_at > timedelta(hours=self.ttl_hours):
|
|
82
|
+
path.unlink()
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
return data.get("value")
|
|
86
|
+
|
|
87
|
+
except (json.JSONDecodeError, KeyError) as e:
|
|
88
|
+
logger.warning(f"Invalid cache entry for {key}: {e}")
|
|
89
|
+
path.unlink(missing_ok=True)
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
def set(self, key: str, value: Any) -> None:
|
|
93
|
+
"""Set a value in the cache.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
key: Cache key.
|
|
97
|
+
value: Value to cache (must be JSON serializable).
|
|
98
|
+
"""
|
|
99
|
+
# Enforce size limit
|
|
100
|
+
if self.max_size_mb is not None:
|
|
101
|
+
self._enforce_size_limit()
|
|
102
|
+
|
|
103
|
+
path = self._key_to_path(key)
|
|
104
|
+
data = {
|
|
105
|
+
"value": value,
|
|
106
|
+
"_cached_at": datetime.now().isoformat(),
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
path.write_text(json.dumps(data))
|
|
111
|
+
except (TypeError, OSError) as e:
|
|
112
|
+
logger.warning(f"Failed to cache {key}: {e}")
|
|
113
|
+
|
|
114
|
+
def delete(self, key: str) -> bool:
|
|
115
|
+
"""Delete a cache entry.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
key: Cache key.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
True if entry was deleted, False if not found.
|
|
122
|
+
"""
|
|
123
|
+
path = self._key_to_path(key)
|
|
124
|
+
if path.exists():
|
|
125
|
+
path.unlink()
|
|
126
|
+
return True
|
|
127
|
+
return False
|
|
128
|
+
|
|
129
|
+
def clear(self, pattern: Optional[str] = None) -> int:
|
|
130
|
+
"""Clear cache entries.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
pattern: Optional glob pattern to match keys.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
Number of entries cleared.
|
|
137
|
+
"""
|
|
138
|
+
count = 0
|
|
139
|
+
glob_pattern = f"*{pattern}*.json" if pattern else "*.json"
|
|
140
|
+
|
|
141
|
+
for path in self.cache_dir.glob(glob_pattern):
|
|
142
|
+
path.unlink()
|
|
143
|
+
count += 1
|
|
144
|
+
|
|
145
|
+
return count
|
|
146
|
+
|
|
147
|
+
def _enforce_size_limit(self) -> None:
|
|
148
|
+
"""Remove oldest entries if cache exceeds size limit."""
|
|
149
|
+
if self.max_size_mb is None:
|
|
150
|
+
return
|
|
151
|
+
|
|
152
|
+
# Calculate current size
|
|
153
|
+
total_size = sum(f.stat().st_size for f in self.cache_dir.glob("*.json"))
|
|
154
|
+
max_bytes = self.max_size_mb * 1024 * 1024
|
|
155
|
+
|
|
156
|
+
if total_size <= max_bytes:
|
|
157
|
+
return
|
|
158
|
+
|
|
159
|
+
# Sort by modification time (oldest first)
|
|
160
|
+
files = sorted(
|
|
161
|
+
self.cache_dir.glob("*.json"),
|
|
162
|
+
key=lambda f: f.stat().st_mtime,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Remove oldest until under limit
|
|
166
|
+
for path in files:
|
|
167
|
+
if total_size <= max_bytes:
|
|
168
|
+
break
|
|
169
|
+
total_size -= path.stat().st_size
|
|
170
|
+
path.unlink()
|
|
171
|
+
logger.debug(f"Evicted cache entry: {path.name}")
|
|
172
|
+
|
|
173
|
+
def stats(self) -> dict:
|
|
174
|
+
"""Get cache statistics.
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
Dict with cache stats (count, size, oldest, newest).
|
|
178
|
+
"""
|
|
179
|
+
files = list(self.cache_dir.glob("*.json"))
|
|
180
|
+
if not files:
|
|
181
|
+
return {
|
|
182
|
+
"count": 0,
|
|
183
|
+
"size_mb": 0,
|
|
184
|
+
"oldest": None,
|
|
185
|
+
"newest": None,
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
mtimes = [f.stat().st_mtime for f in files]
|
|
189
|
+
total_size = sum(f.stat().st_size for f in files)
|
|
190
|
+
|
|
191
|
+
return {
|
|
192
|
+
"count": len(files),
|
|
193
|
+
"size_mb": total_size / (1024 * 1024),
|
|
194
|
+
"oldest": datetime.fromtimestamp(min(mtimes)).isoformat(),
|
|
195
|
+
"newest": datetime.fromtimestamp(max(mtimes)).isoformat(),
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class RecordingCache:
|
|
200
|
+
"""Cache for processed recording artifacts.
|
|
201
|
+
|
|
202
|
+
Provides specialized caching for:
|
|
203
|
+
- Frame descriptions (Stage 1)
|
|
204
|
+
- Episode extractions (Stage 2)
|
|
205
|
+
- Embeddings (Stage 3)
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
def __init__(self, cache_dir: Optional[Path] = None):
|
|
209
|
+
"""Initialize recording cache.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
cache_dir: Base cache directory.
|
|
213
|
+
"""
|
|
214
|
+
base_dir = cache_dir or Path.home() / ".openadapt" / "cache" / "segmentation"
|
|
215
|
+
self.descriptions_cache = CacheManager(base_dir / "descriptions")
|
|
216
|
+
self.extractions_cache = CacheManager(base_dir / "extractions")
|
|
217
|
+
self.embeddings_dir = base_dir / "embeddings"
|
|
218
|
+
self.embeddings_dir.mkdir(parents=True, exist_ok=True)
|
|
219
|
+
|
|
220
|
+
def get_description(self, recording_id: str, frame_hash: str) -> Optional[dict]:
|
|
221
|
+
"""Get cached frame description."""
|
|
222
|
+
key = f"{recording_id}_{frame_hash}"
|
|
223
|
+
return self.descriptions_cache.get(key)
|
|
224
|
+
|
|
225
|
+
def set_description(
|
|
226
|
+
self, recording_id: str, frame_hash: str, description: dict
|
|
227
|
+
) -> None:
|
|
228
|
+
"""Cache frame description."""
|
|
229
|
+
key = f"{recording_id}_{frame_hash}"
|
|
230
|
+
self.descriptions_cache.set(key, description)
|
|
231
|
+
|
|
232
|
+
def get_extraction(self, recording_id: str, model: str) -> Optional[dict]:
|
|
233
|
+
"""Get cached episode extraction."""
|
|
234
|
+
key = f"{recording_id}_{model}"
|
|
235
|
+
return self.extractions_cache.get(key)
|
|
236
|
+
|
|
237
|
+
def set_extraction(self, recording_id: str, model: str, extraction: dict) -> None:
|
|
238
|
+
"""Cache episode extraction."""
|
|
239
|
+
key = f"{recording_id}_{model}"
|
|
240
|
+
self.extractions_cache.set(key, extraction)
|
|
241
|
+
|
|
242
|
+
def clear_recording(self, recording_id: str) -> int:
|
|
243
|
+
"""Clear all cache entries for a recording."""
|
|
244
|
+
count = self.descriptions_cache.clear(recording_id)
|
|
245
|
+
count += self.extractions_cache.clear(recording_id)
|
|
246
|
+
|
|
247
|
+
# Clear embeddings
|
|
248
|
+
for path in self.embeddings_dir.glob(f"{recording_id}*"):
|
|
249
|
+
path.unlink()
|
|
250
|
+
count += 1
|
|
251
|
+
|
|
252
|
+
return count
|
|
253
|
+
|
|
254
|
+
def clear_all(self) -> int:
|
|
255
|
+
"""Clear entire cache."""
|
|
256
|
+
count = self.descriptions_cache.clear()
|
|
257
|
+
count += self.extractions_cache.clear()
|
|
258
|
+
|
|
259
|
+
if self.embeddings_dir.exists():
|
|
260
|
+
shutil.rmtree(self.embeddings_dir)
|
|
261
|
+
self.embeddings_dir.mkdir()
|
|
262
|
+
|
|
263
|
+
return count
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
# Default cache instance
|
|
267
|
+
_default_cache: Optional[RecordingCache] = None
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def get_cache() -> RecordingCache:
|
|
271
|
+
"""Get the default cache instance."""
|
|
272
|
+
global _default_cache
|
|
273
|
+
if _default_cache is None:
|
|
274
|
+
_default_cache = RecordingCache()
|
|
275
|
+
return _default_cache
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def clear_cache(recording_id: Optional[str] = None) -> int:
|
|
279
|
+
"""Clear cache entries.
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
recording_id: If specified, only clear cache for this recording.
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
Number of entries cleared.
|
|
286
|
+
"""
|
|
287
|
+
cache = get_cache()
|
|
288
|
+
if recording_id:
|
|
289
|
+
return cache.clear_recording(recording_id)
|
|
290
|
+
return cache.clear_all()
|