openadapt-ml 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openadapt_ml/baselines/__init__.py +121 -0
- openadapt_ml/baselines/adapter.py +185 -0
- openadapt_ml/baselines/cli.py +314 -0
- openadapt_ml/baselines/config.py +448 -0
- openadapt_ml/baselines/parser.py +922 -0
- openadapt_ml/baselines/prompts.py +787 -0
- openadapt_ml/benchmarks/__init__.py +13 -115
- openadapt_ml/benchmarks/agent.py +265 -421
- openadapt_ml/benchmarks/azure.py +28 -19
- openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
- openadapt_ml/benchmarks/cli.py +1722 -4847
- openadapt_ml/benchmarks/trace_export.py +631 -0
- openadapt_ml/benchmarks/viewer.py +22 -5
- openadapt_ml/benchmarks/vm_monitor.py +530 -29
- openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
- openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
- openadapt_ml/cloud/azure_inference.py +3 -5
- openadapt_ml/cloud/lambda_labs.py +722 -307
- openadapt_ml/cloud/local.py +2038 -487
- openadapt_ml/cloud/ssh_tunnel.py +68 -26
- openadapt_ml/datasets/next_action.py +40 -30
- openadapt_ml/evals/grounding.py +8 -3
- openadapt_ml/evals/plot_eval_metrics.py +15 -13
- openadapt_ml/evals/trajectory_matching.py +41 -26
- openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
- openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
- openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
- openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
- openadapt_ml/experiments/representation_shootout/config.py +390 -0
- openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
- openadapt_ml/experiments/representation_shootout/runner.py +687 -0
- openadapt_ml/experiments/waa_demo/runner.py +29 -14
- openadapt_ml/export/parquet.py +36 -24
- openadapt_ml/grounding/detector.py +18 -14
- openadapt_ml/ingest/__init__.py +8 -6
- openadapt_ml/ingest/capture.py +25 -22
- openadapt_ml/ingest/loader.py +7 -4
- openadapt_ml/ingest/synthetic.py +189 -100
- openadapt_ml/models/api_adapter.py +14 -4
- openadapt_ml/models/base_adapter.py +10 -2
- openadapt_ml/models/providers/__init__.py +288 -0
- openadapt_ml/models/providers/anthropic.py +266 -0
- openadapt_ml/models/providers/base.py +299 -0
- openadapt_ml/models/providers/google.py +376 -0
- openadapt_ml/models/providers/openai.py +342 -0
- openadapt_ml/models/qwen_vl.py +46 -19
- openadapt_ml/perception/__init__.py +35 -0
- openadapt_ml/perception/integration.py +399 -0
- openadapt_ml/retrieval/demo_retriever.py +50 -24
- openadapt_ml/retrieval/embeddings.py +9 -8
- openadapt_ml/retrieval/retriever.py +3 -1
- openadapt_ml/runtime/__init__.py +50 -0
- openadapt_ml/runtime/policy.py +18 -5
- openadapt_ml/runtime/safety_gate.py +471 -0
- openadapt_ml/schema/__init__.py +9 -0
- openadapt_ml/schema/converters.py +74 -27
- openadapt_ml/schema/episode.py +31 -18
- openadapt_ml/scripts/capture_screenshots.py +530 -0
- openadapt_ml/scripts/compare.py +85 -54
- openadapt_ml/scripts/demo_policy.py +4 -1
- openadapt_ml/scripts/eval_policy.py +15 -9
- openadapt_ml/scripts/make_gif.py +1 -1
- openadapt_ml/scripts/prepare_synthetic.py +3 -1
- openadapt_ml/scripts/train.py +21 -9
- openadapt_ml/segmentation/README.md +920 -0
- openadapt_ml/segmentation/__init__.py +97 -0
- openadapt_ml/segmentation/adapters/__init__.py +5 -0
- openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
- openadapt_ml/segmentation/annotator.py +610 -0
- openadapt_ml/segmentation/cache.py +290 -0
- openadapt_ml/segmentation/cli.py +674 -0
- openadapt_ml/segmentation/deduplicator.py +656 -0
- openadapt_ml/segmentation/frame_describer.py +788 -0
- openadapt_ml/segmentation/pipeline.py +340 -0
- openadapt_ml/segmentation/schemas.py +622 -0
- openadapt_ml/segmentation/segment_extractor.py +634 -0
- openadapt_ml/training/azure_ops_viewer.py +1097 -0
- openadapt_ml/training/benchmark_viewer.py +52 -41
- openadapt_ml/training/shared_ui.py +7 -7
- openadapt_ml/training/stub_provider.py +57 -35
- openadapt_ml/training/trainer.py +143 -86
- openadapt_ml/training/trl_trainer.py +70 -21
- openadapt_ml/training/viewer.py +323 -108
- openadapt_ml/training/viewer_components.py +180 -0
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +215 -14
- openadapt_ml-0.2.1.dist-info/RECORD +116 -0
- openadapt_ml/benchmarks/base.py +0 -366
- openadapt_ml/benchmarks/data_collection.py +0 -432
- openadapt_ml/benchmarks/live_tracker.py +0 -180
- openadapt_ml/benchmarks/runner.py +0 -418
- openadapt_ml/benchmarks/waa.py +0 -761
- openadapt_ml/benchmarks/waa_live.py +0 -619
- openadapt_ml-0.2.0.dist-info/RECORD +0 -86
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,674 @@
|
|
|
1
|
+
"""CLI commands for workflow segmentation.
|
|
2
|
+
|
|
3
|
+
This module provides command-line interface for the segmentation pipeline.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
import click
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@click.group()
|
|
16
|
+
def segment():
|
|
17
|
+
"""Workflow segmentation commands."""
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@segment.command("describe")
|
|
22
|
+
@click.option(
|
|
23
|
+
"--recording", "-r", required=True, multiple=True, help="Recording to describe"
|
|
24
|
+
)
|
|
25
|
+
@click.option("--model", "-m", default="gemini-2.0-flash", help="VLM model")
|
|
26
|
+
@click.option("--batch-size", "-b", default=10, help="Frames per API call")
|
|
27
|
+
@click.option("--output", "-o", help="Output file for transcript")
|
|
28
|
+
@click.option(
|
|
29
|
+
"--format",
|
|
30
|
+
"-f",
|
|
31
|
+
type=click.Choice(["text", "json"]),
|
|
32
|
+
default="text",
|
|
33
|
+
help="Output format",
|
|
34
|
+
)
|
|
35
|
+
@click.option("--no-cache", is_flag=True, help="Disable caching")
|
|
36
|
+
@click.option("--verbose", "-v", is_flag=True, help="Show detailed progress")
|
|
37
|
+
def describe(recording, model, batch_size, output, format, no_cache, verbose):
|
|
38
|
+
"""Generate frame descriptions for a recording (Stage 1)."""
|
|
39
|
+
from openadapt_ml.segmentation.frame_describer import FrameDescriber
|
|
40
|
+
|
|
41
|
+
if verbose:
|
|
42
|
+
logging.basicConfig(level=logging.INFO)
|
|
43
|
+
|
|
44
|
+
describer = FrameDescriber(
|
|
45
|
+
model=model,
|
|
46
|
+
batch_size=batch_size,
|
|
47
|
+
cache_enabled=not no_cache,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
for rec_path in recording:
|
|
51
|
+
click.echo(f"Processing: {rec_path}")
|
|
52
|
+
transcript = describer.describe_recording(rec_path)
|
|
53
|
+
|
|
54
|
+
if output:
|
|
55
|
+
output_path = Path(output)
|
|
56
|
+
if len(recording) > 1:
|
|
57
|
+
output_path = (
|
|
58
|
+
output_path.parent / f"{Path(rec_path).stem}_{output_path.name}"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
if format == "json":
|
|
62
|
+
output_path.write_text(transcript.model_dump_json(indent=2))
|
|
63
|
+
else:
|
|
64
|
+
output_path.write_text(transcript.to_transcript_text())
|
|
65
|
+
click.echo(f" Saved to: {output_path}")
|
|
66
|
+
else:
|
|
67
|
+
if format == "json":
|
|
68
|
+
click.echo(transcript.model_dump_json(indent=2))
|
|
69
|
+
else:
|
|
70
|
+
click.echo(transcript.to_transcript_text())
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@segment.command("extract")
|
|
74
|
+
@click.option("--recording", "-r", help="Recording to segment")
|
|
75
|
+
@click.option("--transcript", "-t", help="Existing transcript file")
|
|
76
|
+
@click.option("--model", "-m", default="gpt-4o", help="LLM model")
|
|
77
|
+
@click.option("--hierarchical", "-h", is_flag=True, help="Extract nested segments")
|
|
78
|
+
@click.option("--no-few-shot", is_flag=True, help="Disable few-shot examples")
|
|
79
|
+
@click.option("--min-duration", default=2.0, help="Minimum segment length (seconds)")
|
|
80
|
+
@click.option("--max-duration", default=300.0, help="Maximum segment length (seconds)")
|
|
81
|
+
@click.option("--output", "-o", help="Output file for segments")
|
|
82
|
+
@click.option("--verbose", "-v", is_flag=True, help="Show detailed progress")
|
|
83
|
+
def extract(
|
|
84
|
+
recording,
|
|
85
|
+
transcript,
|
|
86
|
+
model,
|
|
87
|
+
hierarchical,
|
|
88
|
+
no_few_shot,
|
|
89
|
+
min_duration,
|
|
90
|
+
max_duration,
|
|
91
|
+
output,
|
|
92
|
+
verbose,
|
|
93
|
+
):
|
|
94
|
+
"""Extract workflow segments from a recording (Stage 2)."""
|
|
95
|
+
from openadapt_ml.segmentation.frame_describer import FrameDescriber
|
|
96
|
+
from openadapt_ml.segmentation.segment_extractor import SegmentExtractor
|
|
97
|
+
from openadapt_ml.segmentation.schemas import ActionTranscript
|
|
98
|
+
|
|
99
|
+
if verbose:
|
|
100
|
+
logging.basicConfig(level=logging.INFO)
|
|
101
|
+
|
|
102
|
+
if not recording and not transcript:
|
|
103
|
+
raise click.UsageError("Specify either --recording or --transcript")
|
|
104
|
+
|
|
105
|
+
# Load or generate transcript
|
|
106
|
+
if transcript:
|
|
107
|
+
data = json.loads(Path(transcript).read_text())
|
|
108
|
+
action_transcript = ActionTranscript.model_validate(data)
|
|
109
|
+
else:
|
|
110
|
+
describer = FrameDescriber()
|
|
111
|
+
action_transcript = describer.describe_recording(recording)
|
|
112
|
+
|
|
113
|
+
# Extract segments
|
|
114
|
+
extractor = SegmentExtractor(
|
|
115
|
+
model=model,
|
|
116
|
+
use_few_shot=not no_few_shot,
|
|
117
|
+
hierarchical=hierarchical,
|
|
118
|
+
min_segment_duration=min_duration,
|
|
119
|
+
max_segment_duration=max_duration,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
result = extractor.extract_segments(action_transcript)
|
|
123
|
+
|
|
124
|
+
# Output
|
|
125
|
+
if output:
|
|
126
|
+
Path(output).write_text(result.model_dump_json(indent=2))
|
|
127
|
+
click.echo(f"Saved to: {output}")
|
|
128
|
+
else:
|
|
129
|
+
click.echo(f"\nFound {len(result.episodes)} episodes:")
|
|
130
|
+
for ep in result.episodes:
|
|
131
|
+
click.echo(
|
|
132
|
+
f" - {ep.name} ({ep.start_time_formatted} - {ep.end_time_formatted})"
|
|
133
|
+
)
|
|
134
|
+
click.echo(f" {ep.description[:80]}...")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@segment.command("deduplicate")
|
|
138
|
+
@click.argument("segments", nargs=-1)
|
|
139
|
+
@click.option("--input-dir", "-i", help="Directory with segment files")
|
|
140
|
+
@click.option("--threshold", "-t", default=0.85, help="Similarity threshold (0-1)")
|
|
141
|
+
@click.option(
|
|
142
|
+
"--embedding-model", default="text-embedding-3-large", help="Embedding model"
|
|
143
|
+
)
|
|
144
|
+
@click.option(
|
|
145
|
+
"--merge-strategy",
|
|
146
|
+
type=click.Choice(["centroid", "longest", "first"]),
|
|
147
|
+
default="centroid",
|
|
148
|
+
help="Merge strategy",
|
|
149
|
+
)
|
|
150
|
+
@click.option("--existing", "-e", help="Existing library to merge with")
|
|
151
|
+
@click.option("--output", "-o", required=True, help="Output library file")
|
|
152
|
+
@click.option(
|
|
153
|
+
"--local-embeddings", is_flag=True, help="Use local HuggingFace embeddings"
|
|
154
|
+
)
|
|
155
|
+
@click.option("--verbose", "-v", is_flag=True, help="Show clustering details")
|
|
156
|
+
def deduplicate(
|
|
157
|
+
segments,
|
|
158
|
+
input_dir,
|
|
159
|
+
threshold,
|
|
160
|
+
embedding_model,
|
|
161
|
+
merge_strategy,
|
|
162
|
+
existing,
|
|
163
|
+
output,
|
|
164
|
+
local_embeddings,
|
|
165
|
+
verbose,
|
|
166
|
+
):
|
|
167
|
+
"""Deduplicate segments across recordings (Stage 3)."""
|
|
168
|
+
from openadapt_ml.segmentation.deduplicator import WorkflowDeduplicator
|
|
169
|
+
from openadapt_ml.segmentation.schemas import (
|
|
170
|
+
EpisodeExtractionResult,
|
|
171
|
+
EpisodeLibrary,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
if verbose:
|
|
175
|
+
logging.basicConfig(level=logging.INFO)
|
|
176
|
+
|
|
177
|
+
# Collect segment files
|
|
178
|
+
segment_files = list(segments)
|
|
179
|
+
if input_dir:
|
|
180
|
+
segment_files.extend(Path(input_dir).glob("*_episodes.json"))
|
|
181
|
+
|
|
182
|
+
if not segment_files:
|
|
183
|
+
raise click.UsageError("No segment files specified")
|
|
184
|
+
|
|
185
|
+
# Load extraction results
|
|
186
|
+
extraction_results = []
|
|
187
|
+
for seg_file in segment_files:
|
|
188
|
+
data = json.loads(Path(seg_file).read_text())
|
|
189
|
+
result = EpisodeExtractionResult.model_validate(data)
|
|
190
|
+
extraction_results.append(result)
|
|
191
|
+
click.echo(f"Loaded: {seg_file} ({len(result.episodes)} episodes)")
|
|
192
|
+
|
|
193
|
+
# Load existing library
|
|
194
|
+
existing_library = None
|
|
195
|
+
if existing:
|
|
196
|
+
data = json.loads(Path(existing).read_text())
|
|
197
|
+
existing_library = EpisodeLibrary.model_validate(data)
|
|
198
|
+
click.echo(
|
|
199
|
+
f"Merging with existing library ({existing_library.unique_episode_count} workflows)"
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Deduplicate
|
|
203
|
+
dedup = WorkflowDeduplicator(
|
|
204
|
+
threshold=threshold,
|
|
205
|
+
embedding_model=embedding_model,
|
|
206
|
+
merge_strategy=merge_strategy,
|
|
207
|
+
use_local_embeddings=local_embeddings,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
library = dedup.deduplicate(extraction_results, existing_library)
|
|
211
|
+
|
|
212
|
+
# Save
|
|
213
|
+
Path(output).write_text(library.model_dump_json(indent=2))
|
|
214
|
+
|
|
215
|
+
click.echo("\nResults:")
|
|
216
|
+
click.echo(f" Total episodes: {library.total_episodes_extracted}")
|
|
217
|
+
click.echo(f" Unique workflows: {library.unique_episode_count}")
|
|
218
|
+
click.echo(f" Deduplication ratio: {library.deduplication_ratio:.1%}")
|
|
219
|
+
click.echo(f"\nSaved to: {output}")
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
@segment.command("pipeline")
|
|
223
|
+
@click.argument("recordings", nargs=-1)
|
|
224
|
+
@click.option("--vlm-model", default="gemini-2.0-flash", help="VLM for Stage 1")
|
|
225
|
+
@click.option("--llm-model", default="gpt-4o", help="LLM for Stage 2")
|
|
226
|
+
@click.option("--threshold", default=0.85, help="Dedup threshold for Stage 3")
|
|
227
|
+
@click.option("--output", "-o", required=True, help="Output directory or library file")
|
|
228
|
+
@click.option("--save-intermediate", is_flag=True, help="Save Stage 1/2 outputs")
|
|
229
|
+
@click.option("--resume", help="Resume from checkpoint directory")
|
|
230
|
+
@click.option("--existing", "-e", help="Existing library to merge with")
|
|
231
|
+
@click.option("--local-embeddings", is_flag=True, help="Use local embeddings")
|
|
232
|
+
@click.option("--verbose", "-v", is_flag=True, help="Detailed progress")
|
|
233
|
+
def pipeline(
|
|
234
|
+
recordings,
|
|
235
|
+
vlm_model,
|
|
236
|
+
llm_model,
|
|
237
|
+
threshold,
|
|
238
|
+
output,
|
|
239
|
+
save_intermediate,
|
|
240
|
+
resume,
|
|
241
|
+
existing,
|
|
242
|
+
local_embeddings,
|
|
243
|
+
verbose,
|
|
244
|
+
):
|
|
245
|
+
"""Run complete segmentation pipeline."""
|
|
246
|
+
from openadapt_ml.segmentation.pipeline import SegmentationPipeline, PipelineConfig
|
|
247
|
+
from openadapt_ml.segmentation.schemas import EpisodeLibrary
|
|
248
|
+
|
|
249
|
+
if verbose:
|
|
250
|
+
logging.basicConfig(level=logging.INFO)
|
|
251
|
+
|
|
252
|
+
config = PipelineConfig(
|
|
253
|
+
vlm_model=vlm_model,
|
|
254
|
+
llm_model=llm_model,
|
|
255
|
+
similarity_threshold=threshold,
|
|
256
|
+
use_local_embeddings=local_embeddings,
|
|
257
|
+
verbose=verbose,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
pipeline = SegmentationPipeline(config)
|
|
261
|
+
|
|
262
|
+
# Determine output directory
|
|
263
|
+
output_path = Path(output)
|
|
264
|
+
if output_path.suffix == ".json":
|
|
265
|
+
output_dir = output_path.parent if save_intermediate else None
|
|
266
|
+
library_path = output_path
|
|
267
|
+
else:
|
|
268
|
+
output_dir = output_path
|
|
269
|
+
library_path = output_path / "episode_library.json"
|
|
270
|
+
|
|
271
|
+
# Load existing library
|
|
272
|
+
existing_library = None
|
|
273
|
+
if existing:
|
|
274
|
+
data = json.loads(Path(existing).read_text())
|
|
275
|
+
existing_library = EpisodeLibrary.model_validate(data)
|
|
276
|
+
|
|
277
|
+
# Run or resume
|
|
278
|
+
if resume:
|
|
279
|
+
result = pipeline.resume(resume, list(recordings) if recordings else None)
|
|
280
|
+
else:
|
|
281
|
+
if not recordings:
|
|
282
|
+
raise click.UsageError("Specify recordings to process")
|
|
283
|
+
result = pipeline.run(
|
|
284
|
+
list(recordings),
|
|
285
|
+
output_dir=output_dir,
|
|
286
|
+
existing_library=existing_library,
|
|
287
|
+
progress_callback=lambda stage, cur, tot: click.echo(
|
|
288
|
+
f" [{stage}] {cur}/{tot}"
|
|
289
|
+
)
|
|
290
|
+
if verbose
|
|
291
|
+
else None,
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
# Save final library if not already saved
|
|
295
|
+
if not save_intermediate and result.library:
|
|
296
|
+
library_path.parent.mkdir(parents=True, exist_ok=True)
|
|
297
|
+
library_path.write_text(result.library.model_dump_json(indent=2))
|
|
298
|
+
|
|
299
|
+
click.echo("\nPipeline complete:")
|
|
300
|
+
click.echo(f" Recordings processed: {result.recordings_processed}")
|
|
301
|
+
click.echo(f" Total episodes: {result.total_episodes_extracted}")
|
|
302
|
+
click.echo(f" Unique workflows: {result.unique_episodes}")
|
|
303
|
+
click.echo(f" Processing time: {result.processing_time_seconds:.1f}s")
|
|
304
|
+
click.echo(f"\nLibrary saved to: {library_path}")
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
@segment.command("list")
|
|
308
|
+
@click.option("--library", "-l", required=True, help="Library file to inspect")
|
|
309
|
+
@click.option("--details", "-d", is_flag=True, help="Show segment details")
|
|
310
|
+
@click.option("--app", "-a", help="Filter by application")
|
|
311
|
+
def list_segments(library, details, app):
|
|
312
|
+
"""List existing segments and libraries."""
|
|
313
|
+
from openadapt_ml.segmentation.schemas import EpisodeLibrary
|
|
314
|
+
|
|
315
|
+
data = json.loads(Path(library).read_text())
|
|
316
|
+
lib = EpisodeLibrary.model_validate(data)
|
|
317
|
+
|
|
318
|
+
click.echo(f"Episode Library: {library}")
|
|
319
|
+
click.echo(f" Created: {lib.created_at}")
|
|
320
|
+
click.echo(f" Recordings: {lib.total_recordings_processed}")
|
|
321
|
+
click.echo(f" Total episodes: {lib.total_episodes_extracted}")
|
|
322
|
+
click.echo(f" Unique workflows: {lib.unique_episode_count}")
|
|
323
|
+
click.echo(f" Dedup ratio: {lib.deduplication_ratio:.1%}")
|
|
324
|
+
|
|
325
|
+
click.echo("\nWorkflows:")
|
|
326
|
+
for ep in lib.episodes:
|
|
327
|
+
# Filter by app if specified
|
|
328
|
+
# Note: CanonicalEpisode doesn't have application field directly
|
|
329
|
+
# Would need to track this from source episodes
|
|
330
|
+
|
|
331
|
+
click.echo(f"\n {ep.canonical_name}")
|
|
332
|
+
click.echo(f" Occurrences: {ep.occurrence_count}")
|
|
333
|
+
click.echo(
|
|
334
|
+
f" Recordings: {', '.join(ep.source_recordings[:3])}{'...' if len(ep.source_recordings) > 3 else ''}"
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
if details:
|
|
338
|
+
click.echo(f" Description: {ep.canonical_description[:100]}...")
|
|
339
|
+
click.echo(
|
|
340
|
+
f" Steps: {', '.join(ep.canonical_steps[:3])}{'...' if len(ep.canonical_steps) > 3 else ''}"
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
@segment.command("annotate")
|
|
345
|
+
@click.option("--episodes", "-e", required=True, help="Episodes JSON file from extract")
|
|
346
|
+
@click.option("--recording", "-r", required=True, help="Recording directory path")
|
|
347
|
+
@click.option(
|
|
348
|
+
"--model", "-m", default="gemini-2.0-flash", help="VLM model for annotation"
|
|
349
|
+
)
|
|
350
|
+
@click.option("--lookahead", default=10, help="Frames to analyze after episode end")
|
|
351
|
+
@click.option("--output", "-o", required=True, help="Output annotated library file")
|
|
352
|
+
@click.option("--verbose", "-v", is_flag=True, help="Show detailed progress")
|
|
353
|
+
def annotate(episodes, recording, model, lookahead, output, verbose):
|
|
354
|
+
"""Annotate extracted episodes with VLM analysis.
|
|
355
|
+
|
|
356
|
+
This command analyzes episodes to determine if they are suitable
|
|
357
|
+
for training (gold) by examining the episode frames and frames
|
|
358
|
+
after the episode ends to detect failures.
|
|
359
|
+
"""
|
|
360
|
+
from openadapt_ml.segmentation.annotator import EpisodeAnnotator
|
|
361
|
+
from openadapt_ml.segmentation.schemas import EpisodeExtractionResult
|
|
362
|
+
|
|
363
|
+
if verbose:
|
|
364
|
+
logging.basicConfig(level=logging.INFO)
|
|
365
|
+
|
|
366
|
+
# Load episodes
|
|
367
|
+
data = json.loads(Path(episodes).read_text())
|
|
368
|
+
extraction_result = EpisodeExtractionResult.model_validate(data)
|
|
369
|
+
|
|
370
|
+
click.echo(f"Loaded {len(extraction_result.episodes)} episodes from {episodes}")
|
|
371
|
+
click.echo(f"Using VLM: {model}")
|
|
372
|
+
|
|
373
|
+
# Create annotator
|
|
374
|
+
annotator = EpisodeAnnotator(
|
|
375
|
+
model=model,
|
|
376
|
+
lookahead_frames=lookahead,
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
# Annotate
|
|
380
|
+
def progress(current, total):
|
|
381
|
+
if verbose:
|
|
382
|
+
click.echo(f" Progress: {current}/{total}")
|
|
383
|
+
|
|
384
|
+
library = annotator.annotate_extraction_result(
|
|
385
|
+
extraction_result=extraction_result,
|
|
386
|
+
recording_path=recording,
|
|
387
|
+
progress_callback=progress,
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
# Save
|
|
391
|
+
Path(output).write_text(library.model_dump_json(indent=2))
|
|
392
|
+
|
|
393
|
+
click.echo("\nAnnotation complete:")
|
|
394
|
+
click.echo(f" Total episodes: {library.total_episodes}")
|
|
395
|
+
click.echo(f" Recommended as gold: {library.gold_count}")
|
|
396
|
+
click.echo(
|
|
397
|
+
f" Pending human review: {library.total_episodes - library.verified_count}"
|
|
398
|
+
)
|
|
399
|
+
click.echo(f"\nSaved to: {output}")
|
|
400
|
+
click.echo("\nNext step: Run 'segment review' to verify annotations")
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
@segment.command("review")
|
|
404
|
+
@click.option("--library", "-l", required=True, help="Annotated library file")
|
|
405
|
+
@click.option("--recording", "-r", help="Recording directory (for viewing frames)")
|
|
406
|
+
@click.option("--reviewer", default="human", help="Reviewer name/ID")
|
|
407
|
+
@click.option(
|
|
408
|
+
"--auto-approve-high-confidence", is_flag=True, help="Auto-approve confidence > 0.9"
|
|
409
|
+
)
|
|
410
|
+
@click.option("--output", "-o", help="Output file (defaults to overwriting input)")
|
|
411
|
+
def review(library, recording, reviewer, auto_approve_high_confidence, output):
|
|
412
|
+
"""Interactive review of annotated episodes.
|
|
413
|
+
|
|
414
|
+
This command presents each annotation for human verification.
|
|
415
|
+
Reviewers can approve, reject, or edit each annotation.
|
|
416
|
+
"""
|
|
417
|
+
from openadapt_ml.segmentation.schemas import AnnotatedEpisodeLibrary
|
|
418
|
+
from openadapt_ml.segmentation.annotator import verify_annotation
|
|
419
|
+
|
|
420
|
+
# Load library
|
|
421
|
+
data = json.loads(Path(library).read_text())
|
|
422
|
+
lib = AnnotatedEpisodeLibrary.model_validate(data)
|
|
423
|
+
|
|
424
|
+
click.echo(f"Loaded annotated library: {library}")
|
|
425
|
+
click.echo(f" Total episodes: {lib.total_episodes}")
|
|
426
|
+
click.echo(f" Already verified: {lib.verified_count}")
|
|
427
|
+
click.echo(f" Pending review: {lib.total_episodes - lib.verified_count}")
|
|
428
|
+
|
|
429
|
+
# Auto-approve high confidence if requested
|
|
430
|
+
if auto_approve_high_confidence:
|
|
431
|
+
auto_approved = 0
|
|
432
|
+
new_annotations = []
|
|
433
|
+
for ann in lib.annotations:
|
|
434
|
+
if not ann.human_verified and ann.confidence > 0.9 and ann.is_gold:
|
|
435
|
+
new_ann = verify_annotation(
|
|
436
|
+
ann,
|
|
437
|
+
is_gold=True,
|
|
438
|
+
notes="Auto-approved (confidence > 0.9)",
|
|
439
|
+
verified_by=f"{reviewer}_auto",
|
|
440
|
+
)
|
|
441
|
+
new_annotations.append(new_ann)
|
|
442
|
+
auto_approved += 1
|
|
443
|
+
else:
|
|
444
|
+
new_annotations.append(ann)
|
|
445
|
+
lib.annotations = new_annotations
|
|
446
|
+
click.echo(f"\nAuto-approved {auto_approved} high-confidence gold episodes")
|
|
447
|
+
|
|
448
|
+
# Get pending reviews
|
|
449
|
+
pending = lib.get_pending_review()
|
|
450
|
+
|
|
451
|
+
if not pending:
|
|
452
|
+
click.echo("\nNo episodes pending review!")
|
|
453
|
+
if output:
|
|
454
|
+
Path(output).write_text(lib.model_dump_json(indent=2))
|
|
455
|
+
click.echo(f"Saved to: {output}")
|
|
456
|
+
return
|
|
457
|
+
|
|
458
|
+
click.echo(f"\n{len(pending)} episodes to review:")
|
|
459
|
+
click.echo("Commands: [a]pprove, [r]eject, [s]kip, [n]otes, [q]uit\n")
|
|
460
|
+
|
|
461
|
+
# Interactive review
|
|
462
|
+
reviewed = 0
|
|
463
|
+
new_annotations = []
|
|
464
|
+
annotation_map = {a.annotation_id: a for a in lib.annotations}
|
|
465
|
+
|
|
466
|
+
for episode, annotation in pending:
|
|
467
|
+
click.echo("-" * 60)
|
|
468
|
+
click.echo(f"Episode: {episode.name}")
|
|
469
|
+
click.echo(f"Description: {episode.description}")
|
|
470
|
+
click.echo(
|
|
471
|
+
f"Time: {episode.start_time_formatted} - {episode.end_time_formatted}"
|
|
472
|
+
)
|
|
473
|
+
click.echo(f"Application: {episode.application}")
|
|
474
|
+
click.echo(f"Steps: {', '.join(episode.step_summaries[:5])}")
|
|
475
|
+
click.echo()
|
|
476
|
+
click.echo("VLM Assessment:")
|
|
477
|
+
click.echo(f" Is Gold: {annotation.is_gold}")
|
|
478
|
+
click.echo(f" Confidence: {annotation.confidence:.2f}")
|
|
479
|
+
if annotation.failure_signals:
|
|
480
|
+
click.echo(f" Failure Signals: {', '.join(annotation.failure_signals)}")
|
|
481
|
+
if annotation.exclusion_reason:
|
|
482
|
+
click.echo(f" Exclusion Reason: {annotation.exclusion_reason}")
|
|
483
|
+
click.echo()
|
|
484
|
+
|
|
485
|
+
while True:
|
|
486
|
+
choice = click.prompt(
|
|
487
|
+
"Action [a/r/s/n/q]",
|
|
488
|
+
type=click.Choice(["a", "r", "s", "n", "q"]),
|
|
489
|
+
default="s",
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
if choice == "a":
|
|
493
|
+
notes = click.prompt("Notes (optional)", default="", show_default=False)
|
|
494
|
+
new_ann = verify_annotation(
|
|
495
|
+
annotation,
|
|
496
|
+
is_gold=True,
|
|
497
|
+
notes=notes if notes else None,
|
|
498
|
+
verified_by=reviewer,
|
|
499
|
+
)
|
|
500
|
+
annotation_map[annotation.annotation_id] = new_ann
|
|
501
|
+
click.echo(" Approved as gold")
|
|
502
|
+
reviewed += 1
|
|
503
|
+
break
|
|
504
|
+
|
|
505
|
+
elif choice == "r":
|
|
506
|
+
reason = click.prompt("Rejection reason", default="Manual rejection")
|
|
507
|
+
new_ann = verify_annotation(
|
|
508
|
+
annotation,
|
|
509
|
+
is_gold=False,
|
|
510
|
+
notes=reason,
|
|
511
|
+
verified_by=reviewer,
|
|
512
|
+
)
|
|
513
|
+
annotation_map[annotation.annotation_id] = new_ann
|
|
514
|
+
click.echo(" Rejected")
|
|
515
|
+
reviewed += 1
|
|
516
|
+
break
|
|
517
|
+
|
|
518
|
+
elif choice == "s":
|
|
519
|
+
click.echo(" Skipped")
|
|
520
|
+
break
|
|
521
|
+
|
|
522
|
+
elif choice == "n":
|
|
523
|
+
notes = click.prompt("Add notes")
|
|
524
|
+
annotation.notes = notes
|
|
525
|
+
annotation_map[annotation.annotation_id] = annotation
|
|
526
|
+
click.echo(f" Notes added: {notes}")
|
|
527
|
+
# Continue to ask for a/r/s
|
|
528
|
+
|
|
529
|
+
elif choice == "q":
|
|
530
|
+
click.echo("\nQuitting review...")
|
|
531
|
+
break
|
|
532
|
+
|
|
533
|
+
if choice == "q":
|
|
534
|
+
break
|
|
535
|
+
|
|
536
|
+
# Update library with new annotations
|
|
537
|
+
lib.annotations = list(annotation_map.values())
|
|
538
|
+
|
|
539
|
+
# Save
|
|
540
|
+
output_path = Path(output) if output else Path(library)
|
|
541
|
+
output_path.write_text(lib.model_dump_json(indent=2))
|
|
542
|
+
|
|
543
|
+
click.echo("\nReview session complete:")
|
|
544
|
+
click.echo(f" Reviewed: {reviewed}")
|
|
545
|
+
click.echo(f" Total verified: {lib.verified_count}")
|
|
546
|
+
click.echo(f" Gold episodes: {lib.gold_count}")
|
|
547
|
+
click.echo(f" Export-ready: {lib.export_ready_count}")
|
|
548
|
+
click.echo(f"\nSaved to: {output_path}")
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
@segment.command("export-gold")
|
|
552
|
+
@click.argument("library")
|
|
553
|
+
@click.option(
|
|
554
|
+
"--format",
|
|
555
|
+
"-f",
|
|
556
|
+
type=click.Choice(["json", "jsonl", "hf"]),
|
|
557
|
+
default="jsonl",
|
|
558
|
+
help="Export format",
|
|
559
|
+
)
|
|
560
|
+
@click.option("--output", "-o", required=True, help="Output file/directory")
|
|
561
|
+
@click.option("--recording", "-r", help="Recording directory (for screenshots)")
|
|
562
|
+
@click.option(
|
|
563
|
+
"--include-screenshots", is_flag=True, help="Include screenshots in export"
|
|
564
|
+
)
|
|
565
|
+
def export_gold(library, format, output, recording, include_screenshots):
|
|
566
|
+
"""Export verified gold episodes for fine-tuning.
|
|
567
|
+
|
|
568
|
+
Only exports episodes where is_gold=True AND human_verified=True.
|
|
569
|
+
"""
|
|
570
|
+
from openadapt_ml.segmentation.schemas import AnnotatedEpisodeLibrary
|
|
571
|
+
from openadapt_ml.segmentation.annotator import export_gold_episodes
|
|
572
|
+
|
|
573
|
+
# Load library
|
|
574
|
+
data = json.loads(Path(library).read_text())
|
|
575
|
+
lib = AnnotatedEpisodeLibrary.model_validate(data)
|
|
576
|
+
|
|
577
|
+
click.echo(f"Loaded library: {library}")
|
|
578
|
+
click.echo(f" Export-ready episodes: {lib.export_ready_count}")
|
|
579
|
+
|
|
580
|
+
if lib.export_ready_count == 0:
|
|
581
|
+
click.echo("\nNo episodes ready for export!")
|
|
582
|
+
click.echo("Run 'segment review' first to verify annotations.")
|
|
583
|
+
return
|
|
584
|
+
|
|
585
|
+
# Export
|
|
586
|
+
count = export_gold_episodes(
|
|
587
|
+
library=lib,
|
|
588
|
+
output_path=output,
|
|
589
|
+
recording_path=recording,
|
|
590
|
+
format=format,
|
|
591
|
+
include_screenshots=include_screenshots,
|
|
592
|
+
)
|
|
593
|
+
|
|
594
|
+
click.echo(f"\nExported {count} gold episodes to: {output}")
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
@segment.command("export")
|
|
598
|
+
@click.argument("library")
|
|
599
|
+
@click.option(
|
|
600
|
+
"--format",
|
|
601
|
+
"-f",
|
|
602
|
+
type=click.Choice(["csv", "jsonl", "html"]),
|
|
603
|
+
default="jsonl",
|
|
604
|
+
help="Export format",
|
|
605
|
+
)
|
|
606
|
+
@click.option("--output", "-o", required=True, help="Output file")
|
|
607
|
+
@click.option("--workflow", "-w", help="Export specific workflow")
|
|
608
|
+
def export(library, format, output, workflow):
|
|
609
|
+
"""Export segments to various formats."""
|
|
610
|
+
import csv
|
|
611
|
+
from openadapt_ml.segmentation.schemas import EpisodeLibrary
|
|
612
|
+
|
|
613
|
+
data = json.loads(Path(library).read_text())
|
|
614
|
+
lib = EpisodeLibrary.model_validate(data)
|
|
615
|
+
|
|
616
|
+
# Filter if specified
|
|
617
|
+
episodes = lib.episodes
|
|
618
|
+
if workflow:
|
|
619
|
+
episodes = [e for e in episodes if workflow.lower() in e.canonical_name.lower()]
|
|
620
|
+
|
|
621
|
+
output_path = Path(output)
|
|
622
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
623
|
+
|
|
624
|
+
if format == "csv":
|
|
625
|
+
with open(output_path, "w", newline="") as f:
|
|
626
|
+
writer = csv.writer(f)
|
|
627
|
+
writer.writerow(
|
|
628
|
+
["name", "description", "steps", "occurrences", "recordings"]
|
|
629
|
+
)
|
|
630
|
+
for ep in episodes:
|
|
631
|
+
writer.writerow(
|
|
632
|
+
[
|
|
633
|
+
ep.canonical_name,
|
|
634
|
+
ep.canonical_description,
|
|
635
|
+
"; ".join(ep.canonical_steps),
|
|
636
|
+
ep.occurrence_count,
|
|
637
|
+
", ".join(ep.source_recordings),
|
|
638
|
+
]
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
elif format == "jsonl":
|
|
642
|
+
with open(output_path, "w") as f:
|
|
643
|
+
for ep in episodes:
|
|
644
|
+
f.write(ep.model_dump_json() + "\n")
|
|
645
|
+
|
|
646
|
+
elif format == "html":
|
|
647
|
+
html = ["<html><head><style>"]
|
|
648
|
+
html.append("body { font-family: sans-serif; margin: 2em; }")
|
|
649
|
+
html.append(
|
|
650
|
+
".workflow { border: 1px solid #ccc; padding: 1em; margin: 1em 0; }"
|
|
651
|
+
)
|
|
652
|
+
html.append(".steps { margin-left: 2em; }")
|
|
653
|
+
html.append("</style></head><body>")
|
|
654
|
+
html.append("<h1>Episode Library</h1>")
|
|
655
|
+
html.append(f"<p>{len(episodes)} workflows</p>")
|
|
656
|
+
|
|
657
|
+
for ep in episodes:
|
|
658
|
+
html.append('<div class="workflow">')
|
|
659
|
+
html.append(f"<h2>{ep.canonical_name}</h2>")
|
|
660
|
+
html.append(f"<p>{ep.canonical_description}</p>")
|
|
661
|
+
html.append(f"<p><strong>Occurrences:</strong> {ep.occurrence_count}</p>")
|
|
662
|
+
html.append('<div class="steps"><strong>Steps:</strong><ol>')
|
|
663
|
+
for step in ep.canonical_steps:
|
|
664
|
+
html.append(f"<li>{step}</li>")
|
|
665
|
+
html.append("</ol></div></div>")
|
|
666
|
+
|
|
667
|
+
html.append("</body></html>")
|
|
668
|
+
output_path.write_text("\n".join(html))
|
|
669
|
+
|
|
670
|
+
click.echo(f"Exported {len(episodes)} workflows to: {output_path}")
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
if __name__ == "__main__":
|
|
674
|
+
segment()
|