openadapt-ml 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. openadapt_ml/baselines/__init__.py +121 -0
  2. openadapt_ml/baselines/adapter.py +185 -0
  3. openadapt_ml/baselines/cli.py +314 -0
  4. openadapt_ml/baselines/config.py +448 -0
  5. openadapt_ml/baselines/parser.py +922 -0
  6. openadapt_ml/baselines/prompts.py +787 -0
  7. openadapt_ml/benchmarks/__init__.py +13 -115
  8. openadapt_ml/benchmarks/agent.py +265 -421
  9. openadapt_ml/benchmarks/azure.py +28 -19
  10. openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
  11. openadapt_ml/benchmarks/cli.py +1722 -4847
  12. openadapt_ml/benchmarks/trace_export.py +631 -0
  13. openadapt_ml/benchmarks/viewer.py +22 -5
  14. openadapt_ml/benchmarks/vm_monitor.py +530 -29
  15. openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
  16. openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
  17. openadapt_ml/cloud/azure_inference.py +3 -5
  18. openadapt_ml/cloud/lambda_labs.py +722 -307
  19. openadapt_ml/cloud/local.py +2038 -487
  20. openadapt_ml/cloud/ssh_tunnel.py +68 -26
  21. openadapt_ml/datasets/next_action.py +40 -30
  22. openadapt_ml/evals/grounding.py +8 -3
  23. openadapt_ml/evals/plot_eval_metrics.py +15 -13
  24. openadapt_ml/evals/trajectory_matching.py +41 -26
  25. openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
  26. openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
  27. openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
  28. openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
  29. openadapt_ml/experiments/representation_shootout/config.py +390 -0
  30. openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
  31. openadapt_ml/experiments/representation_shootout/runner.py +687 -0
  32. openadapt_ml/experiments/waa_demo/runner.py +29 -14
  33. openadapt_ml/export/parquet.py +36 -24
  34. openadapt_ml/grounding/detector.py +18 -14
  35. openadapt_ml/ingest/__init__.py +8 -6
  36. openadapt_ml/ingest/capture.py +25 -22
  37. openadapt_ml/ingest/loader.py +7 -4
  38. openadapt_ml/ingest/synthetic.py +189 -100
  39. openadapt_ml/models/api_adapter.py +14 -4
  40. openadapt_ml/models/base_adapter.py +10 -2
  41. openadapt_ml/models/providers/__init__.py +288 -0
  42. openadapt_ml/models/providers/anthropic.py +266 -0
  43. openadapt_ml/models/providers/base.py +299 -0
  44. openadapt_ml/models/providers/google.py +376 -0
  45. openadapt_ml/models/providers/openai.py +342 -0
  46. openadapt_ml/models/qwen_vl.py +46 -19
  47. openadapt_ml/perception/__init__.py +35 -0
  48. openadapt_ml/perception/integration.py +399 -0
  49. openadapt_ml/retrieval/demo_retriever.py +50 -24
  50. openadapt_ml/retrieval/embeddings.py +9 -8
  51. openadapt_ml/retrieval/retriever.py +3 -1
  52. openadapt_ml/runtime/__init__.py +50 -0
  53. openadapt_ml/runtime/policy.py +18 -5
  54. openadapt_ml/runtime/safety_gate.py +471 -0
  55. openadapt_ml/schema/__init__.py +9 -0
  56. openadapt_ml/schema/converters.py +74 -27
  57. openadapt_ml/schema/episode.py +31 -18
  58. openadapt_ml/scripts/capture_screenshots.py +530 -0
  59. openadapt_ml/scripts/compare.py +85 -54
  60. openadapt_ml/scripts/demo_policy.py +4 -1
  61. openadapt_ml/scripts/eval_policy.py +15 -9
  62. openadapt_ml/scripts/make_gif.py +1 -1
  63. openadapt_ml/scripts/prepare_synthetic.py +3 -1
  64. openadapt_ml/scripts/train.py +21 -9
  65. openadapt_ml/segmentation/README.md +920 -0
  66. openadapt_ml/segmentation/__init__.py +97 -0
  67. openadapt_ml/segmentation/adapters/__init__.py +5 -0
  68. openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
  69. openadapt_ml/segmentation/annotator.py +610 -0
  70. openadapt_ml/segmentation/cache.py +290 -0
  71. openadapt_ml/segmentation/cli.py +674 -0
  72. openadapt_ml/segmentation/deduplicator.py +656 -0
  73. openadapt_ml/segmentation/frame_describer.py +788 -0
  74. openadapt_ml/segmentation/pipeline.py +340 -0
  75. openadapt_ml/segmentation/schemas.py +622 -0
  76. openadapt_ml/segmentation/segment_extractor.py +634 -0
  77. openadapt_ml/training/azure_ops_viewer.py +1097 -0
  78. openadapt_ml/training/benchmark_viewer.py +52 -41
  79. openadapt_ml/training/shared_ui.py +7 -7
  80. openadapt_ml/training/stub_provider.py +57 -35
  81. openadapt_ml/training/trainer.py +143 -86
  82. openadapt_ml/training/trl_trainer.py +70 -21
  83. openadapt_ml/training/viewer.py +323 -108
  84. openadapt_ml/training/viewer_components.py +180 -0
  85. {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +215 -14
  86. openadapt_ml-0.2.1.dist-info/RECORD +116 -0
  87. openadapt_ml/benchmarks/base.py +0 -366
  88. openadapt_ml/benchmarks/data_collection.py +0 -432
  89. openadapt_ml/benchmarks/live_tracker.py +0 -180
  90. openadapt_ml/benchmarks/runner.py +0 -418
  91. openadapt_ml/benchmarks/waa.py +0 -761
  92. openadapt_ml/benchmarks/waa_live.py +0 -619
  93. openadapt_ml-0.2.0.dist-info/RECORD +0 -86
  94. {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
  95. {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,340 @@
1
+ """End-to-end segmentation pipeline.
2
+
3
+ This module provides a unified interface for running the complete
4
+ three-stage segmentation pipeline for episode extraction.
5
+ """
6
+
7
+ import logging
8
+ import time
9
+ from dataclasses import dataclass, field
10
+ from pathlib import Path
11
+ from typing import Optional, Union
12
+
13
+ from openadapt_ml.segmentation.schemas import (
14
+ ActionTranscript,
15
+ EpisodeExtractionResult,
16
+ EpisodeLibrary,
17
+ )
18
+ from openadapt_ml.segmentation.frame_describer import FrameDescriber
19
+ from openadapt_ml.segmentation.segment_extractor import SegmentExtractor
20
+ from openadapt_ml.segmentation.deduplicator import WorkflowDeduplicator
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ @dataclass
26
+ class PipelineConfig:
27
+ """Configuration for the segmentation pipeline."""
28
+
29
+ # Stage 1: Frame description
30
+ vlm_model: str = "gemini-2.0-flash"
31
+ vlm_batch_size: int = 10
32
+
33
+ # Stage 2: Episode extraction
34
+ llm_model: str = "gpt-4o"
35
+ use_few_shot: bool = True
36
+ hierarchical: bool = False
37
+ min_segment_duration: float = 2.0
38
+ max_segment_duration: float = 300.0
39
+
40
+ # Stage 3: Deduplication
41
+ similarity_threshold: float = 0.85
42
+ embedding_model: str = "text-embedding-3-large"
43
+ merge_strategy: str = "centroid"
44
+ use_local_embeddings: bool = False
45
+
46
+ # General
47
+ cache_enabled: bool = True
48
+ cache_dir: Optional[Path] = None
49
+ verbose: bool = False
50
+
51
+
52
+ @dataclass
53
+ class PipelineResult:
54
+ """Result of running the segmentation pipeline."""
55
+
56
+ # Per-recording outputs
57
+ transcripts: dict[str, ActionTranscript] = field(default_factory=dict)
58
+ extractions: dict[str, EpisodeExtractionResult] = field(default_factory=dict)
59
+
60
+ # Combined output
61
+ library: Optional[EpisodeLibrary] = None
62
+
63
+ # Metadata
64
+ config: Optional[PipelineConfig] = None
65
+ recordings_processed: int = 0
66
+ total_episodes_extracted: int = 0
67
+ unique_episodes: int = 0
68
+ processing_time_seconds: float = 0.0
69
+
70
+
71
+ class SegmentationPipeline:
72
+ """Complete workflow segmentation pipeline.
73
+
74
+ Orchestrates all three stages to process recordings into
75
+ a deduplicated episode library.
76
+
77
+ Example:
78
+ >>> pipeline = SegmentationPipeline()
79
+ >>> result = pipeline.run(
80
+ ... recordings=["recording1/", "recording2/"],
81
+ ... output_dir="segments/",
82
+ ... )
83
+ >>> print(f"Extracted {result.unique_episodes} unique workflows")
84
+ >>> result.library.to_dict()
85
+ """
86
+
87
+ def __init__(
88
+ self,
89
+ config: Optional[PipelineConfig] = None,
90
+ ) -> None:
91
+ """Initialize the pipeline.
92
+
93
+ Args:
94
+ config: Pipeline configuration. Uses defaults if not specified.
95
+ """
96
+ self.config = config or PipelineConfig()
97
+ self._describer: Optional[FrameDescriber] = None
98
+ self._extractor: Optional[SegmentExtractor] = None
99
+ self._deduplicator: Optional[WorkflowDeduplicator] = None
100
+
101
+ @property
102
+ def describer(self) -> FrameDescriber:
103
+ """Lazy-load frame describer."""
104
+ if self._describer is None:
105
+ self._describer = FrameDescriber(
106
+ model=self.config.vlm_model,
107
+ batch_size=self.config.vlm_batch_size,
108
+ cache_enabled=self.config.cache_enabled,
109
+ cache_dir=self.config.cache_dir,
110
+ )
111
+ return self._describer
112
+
113
+ @property
114
+ def extractor(self) -> SegmentExtractor:
115
+ """Lazy-load segment extractor."""
116
+ if self._extractor is None:
117
+ self._extractor = SegmentExtractor(
118
+ model=self.config.llm_model,
119
+ use_few_shot=self.config.use_few_shot,
120
+ hierarchical=self.config.hierarchical,
121
+ min_segment_duration=self.config.min_segment_duration,
122
+ max_segment_duration=self.config.max_segment_duration,
123
+ )
124
+ return self._extractor
125
+
126
+ @property
127
+ def deduplicator(self) -> WorkflowDeduplicator:
128
+ """Lazy-load deduplicator."""
129
+ if self._deduplicator is None:
130
+ self._deduplicator = WorkflowDeduplicator(
131
+ threshold=self.config.similarity_threshold,
132
+ embedding_model=self.config.embedding_model,
133
+ merge_strategy=self.config.merge_strategy,
134
+ use_local_embeddings=self.config.use_local_embeddings,
135
+ )
136
+ return self._deduplicator
137
+
138
+ def run(
139
+ self,
140
+ recordings: list[Union[str, Path]],
141
+ output_dir: Optional[Union[str, Path]] = None,
142
+ existing_library: Optional[EpisodeLibrary] = None,
143
+ progress_callback: Optional[callable] = None,
144
+ ) -> PipelineResult:
145
+ """Run the complete pipeline on a set of recordings.
146
+
147
+ Args:
148
+ recordings: List of recording paths to process.
149
+ output_dir: Directory to save intermediate and final outputs.
150
+ existing_library: Existing library to merge with.
151
+ progress_callback: Optional callback(stage, current, total).
152
+
153
+ Returns:
154
+ PipelineResult with all outputs.
155
+ """
156
+ start_time = time.time()
157
+ result = PipelineResult(config=self.config)
158
+
159
+ if output_dir:
160
+ output_dir = Path(output_dir)
161
+ output_dir.mkdir(parents=True, exist_ok=True)
162
+
163
+ # Stage 1: Generate descriptions for each recording
164
+ logger.info(f"Stage 1: Processing {len(recordings)} recordings")
165
+ for i, recording_path in enumerate(recordings):
166
+ recording_path = Path(recording_path)
167
+ recording_id = recording_path.name
168
+
169
+ if progress_callback:
170
+ progress_callback("describe", i + 1, len(recordings))
171
+
172
+ logger.info(f" Describing: {recording_id}")
173
+ transcript = self.run_stage1(recording_path)
174
+ result.transcripts[recording_id] = transcript
175
+
176
+ # Save intermediate result
177
+ if output_dir:
178
+ transcript_path = output_dir / f"{recording_id}_transcript.json"
179
+ transcript_path.write_text(transcript.model_dump_json(indent=2))
180
+
181
+ # Stage 2: Extract episodes from each transcript
182
+ logger.info("Stage 2: Extracting episodes")
183
+ extraction_results = []
184
+ for i, (recording_id, transcript) in enumerate(result.transcripts.items()):
185
+ if progress_callback:
186
+ progress_callback("extract", i + 1, len(result.transcripts))
187
+
188
+ logger.info(f" Extracting: {recording_id}")
189
+ extraction = self.run_stage2(transcript)
190
+ result.extractions[recording_id] = extraction
191
+ extraction_results.append(extraction)
192
+
193
+ # Save intermediate result
194
+ if output_dir:
195
+ extraction_path = output_dir / f"{recording_id}_episodes.json"
196
+ extraction_path.write_text(extraction.model_dump_json(indent=2))
197
+
198
+ # Stage 3: Deduplicate across all recordings
199
+ logger.info("Stage 3: Deduplicating episodes")
200
+ if progress_callback:
201
+ progress_callback("deduplicate", 1, 1)
202
+
203
+ result.library = self.run_stage3(extraction_results, existing_library)
204
+
205
+ # Save final result
206
+ if output_dir:
207
+ library_path = output_dir / "episode_library.json"
208
+ library_path.write_text(result.library.model_dump_json(indent=2))
209
+
210
+ # Calculate statistics
211
+ result.recordings_processed = len(recordings)
212
+ result.total_episodes_extracted = sum(
213
+ len(ext.episodes) for ext in extraction_results
214
+ )
215
+ result.unique_episodes = result.library.unique_episode_count
216
+ result.processing_time_seconds = time.time() - start_time
217
+
218
+ logger.info(
219
+ f"Pipeline complete: {result.unique_episodes} unique episodes "
220
+ f"from {result.total_episodes_extracted} total "
221
+ f"({result.library.deduplication_ratio:.1%} duplicates)"
222
+ )
223
+
224
+ return result
225
+
226
+ def run_stage1(
227
+ self,
228
+ recording: Union[str, Path],
229
+ ) -> ActionTranscript:
230
+ """Run only Stage 1 (frame description).
231
+
232
+ Useful for inspecting intermediate outputs or debugging.
233
+
234
+ Args:
235
+ recording: Recording path.
236
+
237
+ Returns:
238
+ ActionTranscript for this recording.
239
+ """
240
+ return self.describer.describe_recording(recording)
241
+
242
+ def run_stage2(
243
+ self,
244
+ transcript: ActionTranscript,
245
+ ) -> EpisodeExtractionResult:
246
+ """Run only Stage 2 (episode extraction).
247
+
248
+ Args:
249
+ transcript: ActionTranscript from Stage 1.
250
+
251
+ Returns:
252
+ EpisodeExtractionResult for this transcript.
253
+ """
254
+ return self.extractor.extract_segments(transcript)
255
+
256
+ def run_stage3(
257
+ self,
258
+ extractions: list[EpisodeExtractionResult],
259
+ existing_library: Optional[EpisodeLibrary] = None,
260
+ ) -> EpisodeLibrary:
261
+ """Run only Stage 3 (deduplication).
262
+
263
+ Args:
264
+ extractions: List of extraction results from Stage 2.
265
+ existing_library: Existing library to merge with.
266
+
267
+ Returns:
268
+ Deduplicated EpisodeLibrary.
269
+ """
270
+ return self.deduplicator.deduplicate(extractions, existing_library)
271
+
272
+ def resume(
273
+ self,
274
+ output_dir: Union[str, Path],
275
+ recordings: Optional[list[Union[str, Path]]] = None,
276
+ ) -> PipelineResult:
277
+ """Resume a previously interrupted pipeline run.
278
+
279
+ Loads cached intermediate results and continues from where it stopped.
280
+
281
+ Args:
282
+ output_dir: Directory with previous run's outputs.
283
+ recordings: Additional recordings to process (optional).
284
+
285
+ Returns:
286
+ PipelineResult with combined outputs.
287
+ """
288
+ import json
289
+
290
+ output_dir = Path(output_dir)
291
+ result = PipelineResult(config=self.config)
292
+
293
+ # Load existing transcripts
294
+ for transcript_file in output_dir.glob("*_transcript.json"):
295
+ data = json.loads(transcript_file.read_text())
296
+ transcript = ActionTranscript.model_validate(data)
297
+ result.transcripts[transcript.recording_id] = transcript
298
+
299
+ # Load existing extractions
300
+ for extraction_file in output_dir.glob("*_episodes.json"):
301
+ data = json.loads(extraction_file.read_text())
302
+ extraction = EpisodeExtractionResult.model_validate(data)
303
+ result.extractions[extraction.recording_id] = extraction
304
+
305
+ # Load existing library if present
306
+ library_path = output_dir / "episode_library.json"
307
+ existing_library = None
308
+ if library_path.exists():
309
+ data = json.loads(library_path.read_text())
310
+ existing_library = EpisodeLibrary.model_validate(data)
311
+
312
+ # Process new recordings if provided
313
+ if recordings:
314
+ new_recordings = [
315
+ r for r in recordings if Path(r).name not in result.transcripts
316
+ ]
317
+ if new_recordings:
318
+ new_result = self.run(
319
+ new_recordings,
320
+ output_dir=output_dir,
321
+ existing_library=existing_library,
322
+ )
323
+ # Merge results
324
+ result.transcripts.update(new_result.transcripts)
325
+ result.extractions.update(new_result.extractions)
326
+ result.library = new_result.library
327
+
328
+ # If no new recordings, just re-run deduplication
329
+ if not recordings and result.extractions:
330
+ extraction_results = list(result.extractions.values())
331
+ result.library = self.run_stage3(extraction_results, existing_library)
332
+
333
+ result.recordings_processed = len(result.transcripts)
334
+ result.total_episodes_extracted = sum(
335
+ len(ext.episodes) for ext in result.extractions.values()
336
+ )
337
+ if result.library:
338
+ result.unique_episodes = result.library.unique_episode_count
339
+
340
+ return result