openadapt-ml 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. openadapt_ml/baselines/__init__.py +121 -0
  2. openadapt_ml/baselines/adapter.py +185 -0
  3. openadapt_ml/baselines/cli.py +314 -0
  4. openadapt_ml/baselines/config.py +448 -0
  5. openadapt_ml/baselines/parser.py +922 -0
  6. openadapt_ml/baselines/prompts.py +787 -0
  7. openadapt_ml/benchmarks/__init__.py +13 -107
  8. openadapt_ml/benchmarks/agent.py +297 -374
  9. openadapt_ml/benchmarks/azure.py +62 -24
  10. openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
  11. openadapt_ml/benchmarks/cli.py +1874 -751
  12. openadapt_ml/benchmarks/trace_export.py +631 -0
  13. openadapt_ml/benchmarks/viewer.py +1236 -0
  14. openadapt_ml/benchmarks/vm_monitor.py +1111 -0
  15. openadapt_ml/benchmarks/waa_deploy/Dockerfile +216 -0
  16. openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
  17. openadapt_ml/benchmarks/waa_deploy/api_agent.py +540 -0
  18. openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
  19. openadapt_ml/cloud/azure_inference.py +3 -5
  20. openadapt_ml/cloud/lambda_labs.py +722 -307
  21. openadapt_ml/cloud/local.py +3194 -89
  22. openadapt_ml/cloud/ssh_tunnel.py +595 -0
  23. openadapt_ml/datasets/next_action.py +125 -96
  24. openadapt_ml/evals/grounding.py +32 -9
  25. openadapt_ml/evals/plot_eval_metrics.py +15 -13
  26. openadapt_ml/evals/trajectory_matching.py +120 -57
  27. openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
  28. openadapt_ml/experiments/demo_prompt/format_demo.py +236 -0
  29. openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
  30. openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
  31. openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
  32. openadapt_ml/experiments/demo_prompt/run_experiment.py +541 -0
  33. openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
  34. openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
  35. openadapt_ml/experiments/representation_shootout/config.py +390 -0
  36. openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
  37. openadapt_ml/experiments/representation_shootout/runner.py +687 -0
  38. openadapt_ml/experiments/waa_demo/__init__.py +10 -0
  39. openadapt_ml/experiments/waa_demo/demos.py +357 -0
  40. openadapt_ml/experiments/waa_demo/runner.py +732 -0
  41. openadapt_ml/experiments/waa_demo/tasks.py +151 -0
  42. openadapt_ml/export/__init__.py +9 -0
  43. openadapt_ml/export/__main__.py +6 -0
  44. openadapt_ml/export/cli.py +89 -0
  45. openadapt_ml/export/parquet.py +277 -0
  46. openadapt_ml/grounding/detector.py +18 -14
  47. openadapt_ml/ingest/__init__.py +11 -10
  48. openadapt_ml/ingest/capture.py +97 -86
  49. openadapt_ml/ingest/loader.py +120 -69
  50. openadapt_ml/ingest/synthetic.py +344 -193
  51. openadapt_ml/models/api_adapter.py +14 -4
  52. openadapt_ml/models/base_adapter.py +10 -2
  53. openadapt_ml/models/providers/__init__.py +288 -0
  54. openadapt_ml/models/providers/anthropic.py +266 -0
  55. openadapt_ml/models/providers/base.py +299 -0
  56. openadapt_ml/models/providers/google.py +376 -0
  57. openadapt_ml/models/providers/openai.py +342 -0
  58. openadapt_ml/models/qwen_vl.py +46 -19
  59. openadapt_ml/perception/__init__.py +35 -0
  60. openadapt_ml/perception/integration.py +399 -0
  61. openadapt_ml/retrieval/README.md +226 -0
  62. openadapt_ml/retrieval/USAGE.md +391 -0
  63. openadapt_ml/retrieval/__init__.py +91 -0
  64. openadapt_ml/retrieval/demo_retriever.py +843 -0
  65. openadapt_ml/retrieval/embeddings.py +630 -0
  66. openadapt_ml/retrieval/index.py +194 -0
  67. openadapt_ml/retrieval/retriever.py +162 -0
  68. openadapt_ml/runtime/__init__.py +50 -0
  69. openadapt_ml/runtime/policy.py +27 -14
  70. openadapt_ml/runtime/safety_gate.py +471 -0
  71. openadapt_ml/schema/__init__.py +113 -0
  72. openadapt_ml/schema/converters.py +588 -0
  73. openadapt_ml/schema/episode.py +470 -0
  74. openadapt_ml/scripts/capture_screenshots.py +530 -0
  75. openadapt_ml/scripts/compare.py +102 -61
  76. openadapt_ml/scripts/demo_policy.py +4 -1
  77. openadapt_ml/scripts/eval_policy.py +19 -14
  78. openadapt_ml/scripts/make_gif.py +1 -1
  79. openadapt_ml/scripts/prepare_synthetic.py +16 -17
  80. openadapt_ml/scripts/train.py +98 -75
  81. openadapt_ml/segmentation/README.md +920 -0
  82. openadapt_ml/segmentation/__init__.py +97 -0
  83. openadapt_ml/segmentation/adapters/__init__.py +5 -0
  84. openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
  85. openadapt_ml/segmentation/annotator.py +610 -0
  86. openadapt_ml/segmentation/cache.py +290 -0
  87. openadapt_ml/segmentation/cli.py +674 -0
  88. openadapt_ml/segmentation/deduplicator.py +656 -0
  89. openadapt_ml/segmentation/frame_describer.py +788 -0
  90. openadapt_ml/segmentation/pipeline.py +340 -0
  91. openadapt_ml/segmentation/schemas.py +622 -0
  92. openadapt_ml/segmentation/segment_extractor.py +634 -0
  93. openadapt_ml/training/azure_ops_viewer.py +1097 -0
  94. openadapt_ml/training/benchmark_viewer.py +3255 -19
  95. openadapt_ml/training/shared_ui.py +7 -7
  96. openadapt_ml/training/stub_provider.py +57 -35
  97. openadapt_ml/training/trainer.py +255 -441
  98. openadapt_ml/training/trl_trainer.py +403 -0
  99. openadapt_ml/training/viewer.py +323 -108
  100. openadapt_ml/training/viewer_components.py +180 -0
  101. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +312 -69
  102. openadapt_ml-0.2.1.dist-info/RECORD +116 -0
  103. openadapt_ml/benchmarks/base.py +0 -366
  104. openadapt_ml/benchmarks/data_collection.py +0 -432
  105. openadapt_ml/benchmarks/runner.py +0 -381
  106. openadapt_ml/benchmarks/waa.py +0 -704
  107. openadapt_ml/schemas/__init__.py +0 -53
  108. openadapt_ml/schemas/sessions.py +0 -122
  109. openadapt_ml/schemas/validation.py +0 -252
  110. openadapt_ml-0.1.0.dist-info/RECORD +0 -55
  111. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
  112. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,290 @@
1
+ """Caching utilities for segmentation pipeline.
2
+
3
+ This module provides caching functionality to avoid re-processing
4
+ recordings and to speed up iterative development.
5
+ """
6
+
7
+ import hashlib
8
+ import json
9
+ import logging
10
+ import shutil
11
+ from datetime import datetime, timedelta
12
+ from pathlib import Path
13
+ from typing import Any, Optional, TypeVar
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ T = TypeVar("T")
18
+
19
+
20
+ class CacheManager:
21
+ """Manages cached artifacts for the segmentation pipeline.
22
+
23
+ Provides a simple file-based cache with optional TTL (time-to-live)
24
+ and size limits.
25
+
26
+ Example:
27
+ >>> cache = CacheManager()
28
+ >>> cache.set("key", {"data": "value"})
29
+ >>> data = cache.get("key")
30
+ >>> cache.clear()
31
+ """
32
+
33
+ def __init__(
34
+ self,
35
+ cache_dir: Optional[Path] = None,
36
+ ttl_hours: Optional[int] = None,
37
+ max_size_mb: Optional[int] = None,
38
+ ):
39
+ """Initialize the cache manager.
40
+
41
+ Args:
42
+ cache_dir: Directory for cache files. Defaults to ~/.openadapt/cache/segmentation
43
+ ttl_hours: Time-to-live in hours. None for no expiration.
44
+ max_size_mb: Maximum cache size in MB. None for no limit.
45
+ """
46
+ self.cache_dir = (
47
+ cache_dir or Path.home() / ".openadapt" / "cache" / "segmentation"
48
+ )
49
+ self.ttl_hours = ttl_hours
50
+ self.max_size_mb = max_size_mb
51
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
52
+
53
+ def _key_to_path(self, key: str) -> Path:
54
+ """Convert cache key to file path."""
55
+ # Hash long keys
56
+ if len(key) > 100:
57
+ key = hashlib.md5(key.encode()).hexdigest()
58
+ # Sanitize key for filesystem
59
+ safe_key = "".join(c if c.isalnum() or c in "-_." else "_" for c in key)
60
+ return self.cache_dir / f"{safe_key}.json"
61
+
62
+ def get(self, key: str) -> Optional[Any]:
63
+ """Get a value from the cache.
64
+
65
+ Args:
66
+ key: Cache key.
67
+
68
+ Returns:
69
+ Cached value or None if not found/expired.
70
+ """
71
+ path = self._key_to_path(key)
72
+ if not path.exists():
73
+ return None
74
+
75
+ try:
76
+ data = json.loads(path.read_text())
77
+
78
+ # Check TTL
79
+ if self.ttl_hours is not None:
80
+ cached_at = datetime.fromisoformat(data.get("_cached_at", "1970-01-01"))
81
+ if datetime.now() - cached_at > timedelta(hours=self.ttl_hours):
82
+ path.unlink()
83
+ return None
84
+
85
+ return data.get("value")
86
+
87
+ except (json.JSONDecodeError, KeyError) as e:
88
+ logger.warning(f"Invalid cache entry for {key}: {e}")
89
+ path.unlink(missing_ok=True)
90
+ return None
91
+
92
+ def set(self, key: str, value: Any) -> None:
93
+ """Set a value in the cache.
94
+
95
+ Args:
96
+ key: Cache key.
97
+ value: Value to cache (must be JSON serializable).
98
+ """
99
+ # Enforce size limit
100
+ if self.max_size_mb is not None:
101
+ self._enforce_size_limit()
102
+
103
+ path = self._key_to_path(key)
104
+ data = {
105
+ "value": value,
106
+ "_cached_at": datetime.now().isoformat(),
107
+ }
108
+
109
+ try:
110
+ path.write_text(json.dumps(data))
111
+ except (TypeError, OSError) as e:
112
+ logger.warning(f"Failed to cache {key}: {e}")
113
+
114
+ def delete(self, key: str) -> bool:
115
+ """Delete a cache entry.
116
+
117
+ Args:
118
+ key: Cache key.
119
+
120
+ Returns:
121
+ True if entry was deleted, False if not found.
122
+ """
123
+ path = self._key_to_path(key)
124
+ if path.exists():
125
+ path.unlink()
126
+ return True
127
+ return False
128
+
129
+ def clear(self, pattern: Optional[str] = None) -> int:
130
+ """Clear cache entries.
131
+
132
+ Args:
133
+ pattern: Optional glob pattern to match keys.
134
+
135
+ Returns:
136
+ Number of entries cleared.
137
+ """
138
+ count = 0
139
+ glob_pattern = f"*{pattern}*.json" if pattern else "*.json"
140
+
141
+ for path in self.cache_dir.glob(glob_pattern):
142
+ path.unlink()
143
+ count += 1
144
+
145
+ return count
146
+
147
+ def _enforce_size_limit(self) -> None:
148
+ """Remove oldest entries if cache exceeds size limit."""
149
+ if self.max_size_mb is None:
150
+ return
151
+
152
+ # Calculate current size
153
+ total_size = sum(f.stat().st_size for f in self.cache_dir.glob("*.json"))
154
+ max_bytes = self.max_size_mb * 1024 * 1024
155
+
156
+ if total_size <= max_bytes:
157
+ return
158
+
159
+ # Sort by modification time (oldest first)
160
+ files = sorted(
161
+ self.cache_dir.glob("*.json"),
162
+ key=lambda f: f.stat().st_mtime,
163
+ )
164
+
165
+ # Remove oldest until under limit
166
+ for path in files:
167
+ if total_size <= max_bytes:
168
+ break
169
+ total_size -= path.stat().st_size
170
+ path.unlink()
171
+ logger.debug(f"Evicted cache entry: {path.name}")
172
+
173
+ def stats(self) -> dict:
174
+ """Get cache statistics.
175
+
176
+ Returns:
177
+ Dict with cache stats (count, size, oldest, newest).
178
+ """
179
+ files = list(self.cache_dir.glob("*.json"))
180
+ if not files:
181
+ return {
182
+ "count": 0,
183
+ "size_mb": 0,
184
+ "oldest": None,
185
+ "newest": None,
186
+ }
187
+
188
+ mtimes = [f.stat().st_mtime for f in files]
189
+ total_size = sum(f.stat().st_size for f in files)
190
+
191
+ return {
192
+ "count": len(files),
193
+ "size_mb": total_size / (1024 * 1024),
194
+ "oldest": datetime.fromtimestamp(min(mtimes)).isoformat(),
195
+ "newest": datetime.fromtimestamp(max(mtimes)).isoformat(),
196
+ }
197
+
198
+
199
+ class RecordingCache:
200
+ """Cache for processed recording artifacts.
201
+
202
+ Provides specialized caching for:
203
+ - Frame descriptions (Stage 1)
204
+ - Episode extractions (Stage 2)
205
+ - Embeddings (Stage 3)
206
+ """
207
+
208
+ def __init__(self, cache_dir: Optional[Path] = None):
209
+ """Initialize recording cache.
210
+
211
+ Args:
212
+ cache_dir: Base cache directory.
213
+ """
214
+ base_dir = cache_dir or Path.home() / ".openadapt" / "cache" / "segmentation"
215
+ self.descriptions_cache = CacheManager(base_dir / "descriptions")
216
+ self.extractions_cache = CacheManager(base_dir / "extractions")
217
+ self.embeddings_dir = base_dir / "embeddings"
218
+ self.embeddings_dir.mkdir(parents=True, exist_ok=True)
219
+
220
+ def get_description(self, recording_id: str, frame_hash: str) -> Optional[dict]:
221
+ """Get cached frame description."""
222
+ key = f"{recording_id}_{frame_hash}"
223
+ return self.descriptions_cache.get(key)
224
+
225
+ def set_description(
226
+ self, recording_id: str, frame_hash: str, description: dict
227
+ ) -> None:
228
+ """Cache frame description."""
229
+ key = f"{recording_id}_{frame_hash}"
230
+ self.descriptions_cache.set(key, description)
231
+
232
+ def get_extraction(self, recording_id: str, model: str) -> Optional[dict]:
233
+ """Get cached episode extraction."""
234
+ key = f"{recording_id}_{model}"
235
+ return self.extractions_cache.get(key)
236
+
237
+ def set_extraction(self, recording_id: str, model: str, extraction: dict) -> None:
238
+ """Cache episode extraction."""
239
+ key = f"{recording_id}_{model}"
240
+ self.extractions_cache.set(key, extraction)
241
+
242
+ def clear_recording(self, recording_id: str) -> int:
243
+ """Clear all cache entries for a recording."""
244
+ count = self.descriptions_cache.clear(recording_id)
245
+ count += self.extractions_cache.clear(recording_id)
246
+
247
+ # Clear embeddings
248
+ for path in self.embeddings_dir.glob(f"{recording_id}*"):
249
+ path.unlink()
250
+ count += 1
251
+
252
+ return count
253
+
254
+ def clear_all(self) -> int:
255
+ """Clear entire cache."""
256
+ count = self.descriptions_cache.clear()
257
+ count += self.extractions_cache.clear()
258
+
259
+ if self.embeddings_dir.exists():
260
+ shutil.rmtree(self.embeddings_dir)
261
+ self.embeddings_dir.mkdir()
262
+
263
+ return count
264
+
265
+
266
+ # Default cache instance
267
+ _default_cache: Optional[RecordingCache] = None
268
+
269
+
270
+ def get_cache() -> RecordingCache:
271
+ """Get the default cache instance."""
272
+ global _default_cache
273
+ if _default_cache is None:
274
+ _default_cache = RecordingCache()
275
+ return _default_cache
276
+
277
+
278
+ def clear_cache(recording_id: Optional[str] = None) -> int:
279
+ """Clear cache entries.
280
+
281
+ Args:
282
+ recording_id: If specified, only clear cache for this recording.
283
+
284
+ Returns:
285
+ Number of entries cleared.
286
+ """
287
+ cache = get_cache()
288
+ if recording_id:
289
+ return cache.clear_recording(recording_id)
290
+ return cache.clear_all()