openadapt-ml 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openadapt_ml/baselines/__init__.py +121 -0
- openadapt_ml/baselines/adapter.py +185 -0
- openadapt_ml/baselines/cli.py +314 -0
- openadapt_ml/baselines/config.py +448 -0
- openadapt_ml/baselines/parser.py +922 -0
- openadapt_ml/baselines/prompts.py +787 -0
- openadapt_ml/benchmarks/__init__.py +13 -115
- openadapt_ml/benchmarks/agent.py +265 -421
- openadapt_ml/benchmarks/azure.py +28 -19
- openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
- openadapt_ml/benchmarks/cli.py +1722 -4847
- openadapt_ml/benchmarks/trace_export.py +631 -0
- openadapt_ml/benchmarks/viewer.py +22 -5
- openadapt_ml/benchmarks/vm_monitor.py +530 -29
- openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
- openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
- openadapt_ml/cloud/azure_inference.py +3 -5
- openadapt_ml/cloud/lambda_labs.py +722 -307
- openadapt_ml/cloud/local.py +2038 -487
- openadapt_ml/cloud/ssh_tunnel.py +68 -26
- openadapt_ml/datasets/next_action.py +40 -30
- openadapt_ml/evals/grounding.py +8 -3
- openadapt_ml/evals/plot_eval_metrics.py +15 -13
- openadapt_ml/evals/trajectory_matching.py +41 -26
- openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
- openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
- openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
- openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
- openadapt_ml/experiments/representation_shootout/config.py +390 -0
- openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
- openadapt_ml/experiments/representation_shootout/runner.py +687 -0
- openadapt_ml/experiments/waa_demo/runner.py +29 -14
- openadapt_ml/export/parquet.py +36 -24
- openadapt_ml/grounding/detector.py +18 -14
- openadapt_ml/ingest/__init__.py +8 -6
- openadapt_ml/ingest/capture.py +25 -22
- openadapt_ml/ingest/loader.py +7 -4
- openadapt_ml/ingest/synthetic.py +189 -100
- openadapt_ml/models/api_adapter.py +14 -4
- openadapt_ml/models/base_adapter.py +10 -2
- openadapt_ml/models/providers/__init__.py +288 -0
- openadapt_ml/models/providers/anthropic.py +266 -0
- openadapt_ml/models/providers/base.py +299 -0
- openadapt_ml/models/providers/google.py +376 -0
- openadapt_ml/models/providers/openai.py +342 -0
- openadapt_ml/models/qwen_vl.py +46 -19
- openadapt_ml/perception/__init__.py +35 -0
- openadapt_ml/perception/integration.py +399 -0
- openadapt_ml/retrieval/demo_retriever.py +50 -24
- openadapt_ml/retrieval/embeddings.py +9 -8
- openadapt_ml/retrieval/retriever.py +3 -1
- openadapt_ml/runtime/__init__.py +50 -0
- openadapt_ml/runtime/policy.py +18 -5
- openadapt_ml/runtime/safety_gate.py +471 -0
- openadapt_ml/schema/__init__.py +9 -0
- openadapt_ml/schema/converters.py +74 -27
- openadapt_ml/schema/episode.py +31 -18
- openadapt_ml/scripts/capture_screenshots.py +530 -0
- openadapt_ml/scripts/compare.py +85 -54
- openadapt_ml/scripts/demo_policy.py +4 -1
- openadapt_ml/scripts/eval_policy.py +15 -9
- openadapt_ml/scripts/make_gif.py +1 -1
- openadapt_ml/scripts/prepare_synthetic.py +3 -1
- openadapt_ml/scripts/train.py +21 -9
- openadapt_ml/segmentation/README.md +920 -0
- openadapt_ml/segmentation/__init__.py +97 -0
- openadapt_ml/segmentation/adapters/__init__.py +5 -0
- openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
- openadapt_ml/segmentation/annotator.py +610 -0
- openadapt_ml/segmentation/cache.py +290 -0
- openadapt_ml/segmentation/cli.py +674 -0
- openadapt_ml/segmentation/deduplicator.py +656 -0
- openadapt_ml/segmentation/frame_describer.py +788 -0
- openadapt_ml/segmentation/pipeline.py +340 -0
- openadapt_ml/segmentation/schemas.py +622 -0
- openadapt_ml/segmentation/segment_extractor.py +634 -0
- openadapt_ml/training/azure_ops_viewer.py +1097 -0
- openadapt_ml/training/benchmark_viewer.py +52 -41
- openadapt_ml/training/shared_ui.py +7 -7
- openadapt_ml/training/stub_provider.py +57 -35
- openadapt_ml/training/trainer.py +143 -86
- openadapt_ml/training/trl_trainer.py +70 -21
- openadapt_ml/training/viewer.py +323 -108
- openadapt_ml/training/viewer_components.py +180 -0
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.2.dist-info}/METADATA +215 -14
- openadapt_ml-0.2.2.dist-info/RECORD +116 -0
- openadapt_ml/benchmarks/base.py +0 -366
- openadapt_ml/benchmarks/data_collection.py +0 -432
- openadapt_ml/benchmarks/live_tracker.py +0 -180
- openadapt_ml/benchmarks/runner.py +0 -418
- openadapt_ml/benchmarks/waa.py +0 -761
- openadapt_ml/benchmarks/waa_live.py +0 -619
- openadapt_ml-0.2.0.dist-info/RECORD +0 -86
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.2.dist-info}/WHEEL +0 -0
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
"""End-to-end segmentation pipeline.
|
|
2
|
+
|
|
3
|
+
This module provides a unified interface for running the complete
|
|
4
|
+
three-stage segmentation pipeline for episode extraction.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import time
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Optional, Union
|
|
12
|
+
|
|
13
|
+
from openadapt_ml.segmentation.schemas import (
|
|
14
|
+
ActionTranscript,
|
|
15
|
+
EpisodeExtractionResult,
|
|
16
|
+
EpisodeLibrary,
|
|
17
|
+
)
|
|
18
|
+
from openadapt_ml.segmentation.frame_describer import FrameDescriber
|
|
19
|
+
from openadapt_ml.segmentation.segment_extractor import SegmentExtractor
|
|
20
|
+
from openadapt_ml.segmentation.deduplicator import WorkflowDeduplicator
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class PipelineConfig:
|
|
27
|
+
"""Configuration for the segmentation pipeline."""
|
|
28
|
+
|
|
29
|
+
# Stage 1: Frame description
|
|
30
|
+
vlm_model: str = "gemini-2.0-flash"
|
|
31
|
+
vlm_batch_size: int = 10
|
|
32
|
+
|
|
33
|
+
# Stage 2: Episode extraction
|
|
34
|
+
llm_model: str = "gpt-4o"
|
|
35
|
+
use_few_shot: bool = True
|
|
36
|
+
hierarchical: bool = False
|
|
37
|
+
min_segment_duration: float = 2.0
|
|
38
|
+
max_segment_duration: float = 300.0
|
|
39
|
+
|
|
40
|
+
# Stage 3: Deduplication
|
|
41
|
+
similarity_threshold: float = 0.85
|
|
42
|
+
embedding_model: str = "text-embedding-3-large"
|
|
43
|
+
merge_strategy: str = "centroid"
|
|
44
|
+
use_local_embeddings: bool = False
|
|
45
|
+
|
|
46
|
+
# General
|
|
47
|
+
cache_enabled: bool = True
|
|
48
|
+
cache_dir: Optional[Path] = None
|
|
49
|
+
verbose: bool = False
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class PipelineResult:
|
|
54
|
+
"""Result of running the segmentation pipeline."""
|
|
55
|
+
|
|
56
|
+
# Per-recording outputs
|
|
57
|
+
transcripts: dict[str, ActionTranscript] = field(default_factory=dict)
|
|
58
|
+
extractions: dict[str, EpisodeExtractionResult] = field(default_factory=dict)
|
|
59
|
+
|
|
60
|
+
# Combined output
|
|
61
|
+
library: Optional[EpisodeLibrary] = None
|
|
62
|
+
|
|
63
|
+
# Metadata
|
|
64
|
+
config: Optional[PipelineConfig] = None
|
|
65
|
+
recordings_processed: int = 0
|
|
66
|
+
total_episodes_extracted: int = 0
|
|
67
|
+
unique_episodes: int = 0
|
|
68
|
+
processing_time_seconds: float = 0.0
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class SegmentationPipeline:
|
|
72
|
+
"""Complete workflow segmentation pipeline.
|
|
73
|
+
|
|
74
|
+
Orchestrates all three stages to process recordings into
|
|
75
|
+
a deduplicated episode library.
|
|
76
|
+
|
|
77
|
+
Example:
|
|
78
|
+
>>> pipeline = SegmentationPipeline()
|
|
79
|
+
>>> result = pipeline.run(
|
|
80
|
+
... recordings=["recording1/", "recording2/"],
|
|
81
|
+
... output_dir="segments/",
|
|
82
|
+
... )
|
|
83
|
+
>>> print(f"Extracted {result.unique_episodes} unique workflows")
|
|
84
|
+
>>> result.library.to_dict()
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
def __init__(
|
|
88
|
+
self,
|
|
89
|
+
config: Optional[PipelineConfig] = None,
|
|
90
|
+
) -> None:
|
|
91
|
+
"""Initialize the pipeline.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
config: Pipeline configuration. Uses defaults if not specified.
|
|
95
|
+
"""
|
|
96
|
+
self.config = config or PipelineConfig()
|
|
97
|
+
self._describer: Optional[FrameDescriber] = None
|
|
98
|
+
self._extractor: Optional[SegmentExtractor] = None
|
|
99
|
+
self._deduplicator: Optional[WorkflowDeduplicator] = None
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def describer(self) -> FrameDescriber:
|
|
103
|
+
"""Lazy-load frame describer."""
|
|
104
|
+
if self._describer is None:
|
|
105
|
+
self._describer = FrameDescriber(
|
|
106
|
+
model=self.config.vlm_model,
|
|
107
|
+
batch_size=self.config.vlm_batch_size,
|
|
108
|
+
cache_enabled=self.config.cache_enabled,
|
|
109
|
+
cache_dir=self.config.cache_dir,
|
|
110
|
+
)
|
|
111
|
+
return self._describer
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def extractor(self) -> SegmentExtractor:
|
|
115
|
+
"""Lazy-load segment extractor."""
|
|
116
|
+
if self._extractor is None:
|
|
117
|
+
self._extractor = SegmentExtractor(
|
|
118
|
+
model=self.config.llm_model,
|
|
119
|
+
use_few_shot=self.config.use_few_shot,
|
|
120
|
+
hierarchical=self.config.hierarchical,
|
|
121
|
+
min_segment_duration=self.config.min_segment_duration,
|
|
122
|
+
max_segment_duration=self.config.max_segment_duration,
|
|
123
|
+
)
|
|
124
|
+
return self._extractor
|
|
125
|
+
|
|
126
|
+
@property
|
|
127
|
+
def deduplicator(self) -> WorkflowDeduplicator:
|
|
128
|
+
"""Lazy-load deduplicator."""
|
|
129
|
+
if self._deduplicator is None:
|
|
130
|
+
self._deduplicator = WorkflowDeduplicator(
|
|
131
|
+
threshold=self.config.similarity_threshold,
|
|
132
|
+
embedding_model=self.config.embedding_model,
|
|
133
|
+
merge_strategy=self.config.merge_strategy,
|
|
134
|
+
use_local_embeddings=self.config.use_local_embeddings,
|
|
135
|
+
)
|
|
136
|
+
return self._deduplicator
|
|
137
|
+
|
|
138
|
+
def run(
|
|
139
|
+
self,
|
|
140
|
+
recordings: list[Union[str, Path]],
|
|
141
|
+
output_dir: Optional[Union[str, Path]] = None,
|
|
142
|
+
existing_library: Optional[EpisodeLibrary] = None,
|
|
143
|
+
progress_callback: Optional[callable] = None,
|
|
144
|
+
) -> PipelineResult:
|
|
145
|
+
"""Run the complete pipeline on a set of recordings.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
recordings: List of recording paths to process.
|
|
149
|
+
output_dir: Directory to save intermediate and final outputs.
|
|
150
|
+
existing_library: Existing library to merge with.
|
|
151
|
+
progress_callback: Optional callback(stage, current, total).
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
PipelineResult with all outputs.
|
|
155
|
+
"""
|
|
156
|
+
start_time = time.time()
|
|
157
|
+
result = PipelineResult(config=self.config)
|
|
158
|
+
|
|
159
|
+
if output_dir:
|
|
160
|
+
output_dir = Path(output_dir)
|
|
161
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
162
|
+
|
|
163
|
+
# Stage 1: Generate descriptions for each recording
|
|
164
|
+
logger.info(f"Stage 1: Processing {len(recordings)} recordings")
|
|
165
|
+
for i, recording_path in enumerate(recordings):
|
|
166
|
+
recording_path = Path(recording_path)
|
|
167
|
+
recording_id = recording_path.name
|
|
168
|
+
|
|
169
|
+
if progress_callback:
|
|
170
|
+
progress_callback("describe", i + 1, len(recordings))
|
|
171
|
+
|
|
172
|
+
logger.info(f" Describing: {recording_id}")
|
|
173
|
+
transcript = self.run_stage1(recording_path)
|
|
174
|
+
result.transcripts[recording_id] = transcript
|
|
175
|
+
|
|
176
|
+
# Save intermediate result
|
|
177
|
+
if output_dir:
|
|
178
|
+
transcript_path = output_dir / f"{recording_id}_transcript.json"
|
|
179
|
+
transcript_path.write_text(transcript.model_dump_json(indent=2))
|
|
180
|
+
|
|
181
|
+
# Stage 2: Extract episodes from each transcript
|
|
182
|
+
logger.info("Stage 2: Extracting episodes")
|
|
183
|
+
extraction_results = []
|
|
184
|
+
for i, (recording_id, transcript) in enumerate(result.transcripts.items()):
|
|
185
|
+
if progress_callback:
|
|
186
|
+
progress_callback("extract", i + 1, len(result.transcripts))
|
|
187
|
+
|
|
188
|
+
logger.info(f" Extracting: {recording_id}")
|
|
189
|
+
extraction = self.run_stage2(transcript)
|
|
190
|
+
result.extractions[recording_id] = extraction
|
|
191
|
+
extraction_results.append(extraction)
|
|
192
|
+
|
|
193
|
+
# Save intermediate result
|
|
194
|
+
if output_dir:
|
|
195
|
+
extraction_path = output_dir / f"{recording_id}_episodes.json"
|
|
196
|
+
extraction_path.write_text(extraction.model_dump_json(indent=2))
|
|
197
|
+
|
|
198
|
+
# Stage 3: Deduplicate across all recordings
|
|
199
|
+
logger.info("Stage 3: Deduplicating episodes")
|
|
200
|
+
if progress_callback:
|
|
201
|
+
progress_callback("deduplicate", 1, 1)
|
|
202
|
+
|
|
203
|
+
result.library = self.run_stage3(extraction_results, existing_library)
|
|
204
|
+
|
|
205
|
+
# Save final result
|
|
206
|
+
if output_dir:
|
|
207
|
+
library_path = output_dir / "episode_library.json"
|
|
208
|
+
library_path.write_text(result.library.model_dump_json(indent=2))
|
|
209
|
+
|
|
210
|
+
# Calculate statistics
|
|
211
|
+
result.recordings_processed = len(recordings)
|
|
212
|
+
result.total_episodes_extracted = sum(
|
|
213
|
+
len(ext.episodes) for ext in extraction_results
|
|
214
|
+
)
|
|
215
|
+
result.unique_episodes = result.library.unique_episode_count
|
|
216
|
+
result.processing_time_seconds = time.time() - start_time
|
|
217
|
+
|
|
218
|
+
logger.info(
|
|
219
|
+
f"Pipeline complete: {result.unique_episodes} unique episodes "
|
|
220
|
+
f"from {result.total_episodes_extracted} total "
|
|
221
|
+
f"({result.library.deduplication_ratio:.1%} duplicates)"
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
return result
|
|
225
|
+
|
|
226
|
+
def run_stage1(
|
|
227
|
+
self,
|
|
228
|
+
recording: Union[str, Path],
|
|
229
|
+
) -> ActionTranscript:
|
|
230
|
+
"""Run only Stage 1 (frame description).
|
|
231
|
+
|
|
232
|
+
Useful for inspecting intermediate outputs or debugging.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
recording: Recording path.
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
ActionTranscript for this recording.
|
|
239
|
+
"""
|
|
240
|
+
return self.describer.describe_recording(recording)
|
|
241
|
+
|
|
242
|
+
def run_stage2(
|
|
243
|
+
self,
|
|
244
|
+
transcript: ActionTranscript,
|
|
245
|
+
) -> EpisodeExtractionResult:
|
|
246
|
+
"""Run only Stage 2 (episode extraction).
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
transcript: ActionTranscript from Stage 1.
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
EpisodeExtractionResult for this transcript.
|
|
253
|
+
"""
|
|
254
|
+
return self.extractor.extract_segments(transcript)
|
|
255
|
+
|
|
256
|
+
def run_stage3(
|
|
257
|
+
self,
|
|
258
|
+
extractions: list[EpisodeExtractionResult],
|
|
259
|
+
existing_library: Optional[EpisodeLibrary] = None,
|
|
260
|
+
) -> EpisodeLibrary:
|
|
261
|
+
"""Run only Stage 3 (deduplication).
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
extractions: List of extraction results from Stage 2.
|
|
265
|
+
existing_library: Existing library to merge with.
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
Deduplicated EpisodeLibrary.
|
|
269
|
+
"""
|
|
270
|
+
return self.deduplicator.deduplicate(extractions, existing_library)
|
|
271
|
+
|
|
272
|
+
def resume(
|
|
273
|
+
self,
|
|
274
|
+
output_dir: Union[str, Path],
|
|
275
|
+
recordings: Optional[list[Union[str, Path]]] = None,
|
|
276
|
+
) -> PipelineResult:
|
|
277
|
+
"""Resume a previously interrupted pipeline run.
|
|
278
|
+
|
|
279
|
+
Loads cached intermediate results and continues from where it stopped.
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
output_dir: Directory with previous run's outputs.
|
|
283
|
+
recordings: Additional recordings to process (optional).
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
PipelineResult with combined outputs.
|
|
287
|
+
"""
|
|
288
|
+
import json
|
|
289
|
+
|
|
290
|
+
output_dir = Path(output_dir)
|
|
291
|
+
result = PipelineResult(config=self.config)
|
|
292
|
+
|
|
293
|
+
# Load existing transcripts
|
|
294
|
+
for transcript_file in output_dir.glob("*_transcript.json"):
|
|
295
|
+
data = json.loads(transcript_file.read_text())
|
|
296
|
+
transcript = ActionTranscript.model_validate(data)
|
|
297
|
+
result.transcripts[transcript.recording_id] = transcript
|
|
298
|
+
|
|
299
|
+
# Load existing extractions
|
|
300
|
+
for extraction_file in output_dir.glob("*_episodes.json"):
|
|
301
|
+
data = json.loads(extraction_file.read_text())
|
|
302
|
+
extraction = EpisodeExtractionResult.model_validate(data)
|
|
303
|
+
result.extractions[extraction.recording_id] = extraction
|
|
304
|
+
|
|
305
|
+
# Load existing library if present
|
|
306
|
+
library_path = output_dir / "episode_library.json"
|
|
307
|
+
existing_library = None
|
|
308
|
+
if library_path.exists():
|
|
309
|
+
data = json.loads(library_path.read_text())
|
|
310
|
+
existing_library = EpisodeLibrary.model_validate(data)
|
|
311
|
+
|
|
312
|
+
# Process new recordings if provided
|
|
313
|
+
if recordings:
|
|
314
|
+
new_recordings = [
|
|
315
|
+
r for r in recordings if Path(r).name not in result.transcripts
|
|
316
|
+
]
|
|
317
|
+
if new_recordings:
|
|
318
|
+
new_result = self.run(
|
|
319
|
+
new_recordings,
|
|
320
|
+
output_dir=output_dir,
|
|
321
|
+
existing_library=existing_library,
|
|
322
|
+
)
|
|
323
|
+
# Merge results
|
|
324
|
+
result.transcripts.update(new_result.transcripts)
|
|
325
|
+
result.extractions.update(new_result.extractions)
|
|
326
|
+
result.library = new_result.library
|
|
327
|
+
|
|
328
|
+
# If no new recordings, just re-run deduplication
|
|
329
|
+
if not recordings and result.extractions:
|
|
330
|
+
extraction_results = list(result.extractions.values())
|
|
331
|
+
result.library = self.run_stage3(extraction_results, existing_library)
|
|
332
|
+
|
|
333
|
+
result.recordings_processed = len(result.transcripts)
|
|
334
|
+
result.total_episodes_extracted = sum(
|
|
335
|
+
len(ext.episodes) for ext in result.extractions.values()
|
|
336
|
+
)
|
|
337
|
+
if result.library:
|
|
338
|
+
result.unique_episodes = result.library.unique_episode_count
|
|
339
|
+
|
|
340
|
+
return result
|