openadapt-ml 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. openadapt_ml/baselines/__init__.py +121 -0
  2. openadapt_ml/baselines/adapter.py +185 -0
  3. openadapt_ml/baselines/cli.py +314 -0
  4. openadapt_ml/baselines/config.py +448 -0
  5. openadapt_ml/baselines/parser.py +922 -0
  6. openadapt_ml/baselines/prompts.py +787 -0
  7. openadapt_ml/benchmarks/__init__.py +13 -115
  8. openadapt_ml/benchmarks/agent.py +265 -421
  9. openadapt_ml/benchmarks/azure.py +28 -19
  10. openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
  11. openadapt_ml/benchmarks/cli.py +1722 -4847
  12. openadapt_ml/benchmarks/trace_export.py +631 -0
  13. openadapt_ml/benchmarks/viewer.py +22 -5
  14. openadapt_ml/benchmarks/vm_monitor.py +530 -29
  15. openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
  16. openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
  17. openadapt_ml/cloud/azure_inference.py +3 -5
  18. openadapt_ml/cloud/lambda_labs.py +722 -307
  19. openadapt_ml/cloud/local.py +2038 -487
  20. openadapt_ml/cloud/ssh_tunnel.py +68 -26
  21. openadapt_ml/datasets/next_action.py +40 -30
  22. openadapt_ml/evals/grounding.py +8 -3
  23. openadapt_ml/evals/plot_eval_metrics.py +15 -13
  24. openadapt_ml/evals/trajectory_matching.py +41 -26
  25. openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
  26. openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
  27. openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
  28. openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
  29. openadapt_ml/experiments/representation_shootout/config.py +390 -0
  30. openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
  31. openadapt_ml/experiments/representation_shootout/runner.py +687 -0
  32. openadapt_ml/experiments/waa_demo/runner.py +29 -14
  33. openadapt_ml/export/parquet.py +36 -24
  34. openadapt_ml/grounding/detector.py +18 -14
  35. openadapt_ml/ingest/__init__.py +8 -6
  36. openadapt_ml/ingest/capture.py +25 -22
  37. openadapt_ml/ingest/loader.py +7 -4
  38. openadapt_ml/ingest/synthetic.py +189 -100
  39. openadapt_ml/models/api_adapter.py +14 -4
  40. openadapt_ml/models/base_adapter.py +10 -2
  41. openadapt_ml/models/providers/__init__.py +288 -0
  42. openadapt_ml/models/providers/anthropic.py +266 -0
  43. openadapt_ml/models/providers/base.py +299 -0
  44. openadapt_ml/models/providers/google.py +376 -0
  45. openadapt_ml/models/providers/openai.py +342 -0
  46. openadapt_ml/models/qwen_vl.py +46 -19
  47. openadapt_ml/perception/__init__.py +35 -0
  48. openadapt_ml/perception/integration.py +399 -0
  49. openadapt_ml/retrieval/demo_retriever.py +50 -24
  50. openadapt_ml/retrieval/embeddings.py +9 -8
  51. openadapt_ml/retrieval/retriever.py +3 -1
  52. openadapt_ml/runtime/__init__.py +50 -0
  53. openadapt_ml/runtime/policy.py +18 -5
  54. openadapt_ml/runtime/safety_gate.py +471 -0
  55. openadapt_ml/schema/__init__.py +9 -0
  56. openadapt_ml/schema/converters.py +74 -27
  57. openadapt_ml/schema/episode.py +31 -18
  58. openadapt_ml/scripts/capture_screenshots.py +530 -0
  59. openadapt_ml/scripts/compare.py +85 -54
  60. openadapt_ml/scripts/demo_policy.py +4 -1
  61. openadapt_ml/scripts/eval_policy.py +15 -9
  62. openadapt_ml/scripts/make_gif.py +1 -1
  63. openadapt_ml/scripts/prepare_synthetic.py +3 -1
  64. openadapt_ml/scripts/train.py +21 -9
  65. openadapt_ml/segmentation/README.md +920 -0
  66. openadapt_ml/segmentation/__init__.py +97 -0
  67. openadapt_ml/segmentation/adapters/__init__.py +5 -0
  68. openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
  69. openadapt_ml/segmentation/annotator.py +610 -0
  70. openadapt_ml/segmentation/cache.py +290 -0
  71. openadapt_ml/segmentation/cli.py +674 -0
  72. openadapt_ml/segmentation/deduplicator.py +656 -0
  73. openadapt_ml/segmentation/frame_describer.py +788 -0
  74. openadapt_ml/segmentation/pipeline.py +340 -0
  75. openadapt_ml/segmentation/schemas.py +622 -0
  76. openadapt_ml/segmentation/segment_extractor.py +634 -0
  77. openadapt_ml/training/azure_ops_viewer.py +1097 -0
  78. openadapt_ml/training/benchmark_viewer.py +52 -41
  79. openadapt_ml/training/shared_ui.py +7 -7
  80. openadapt_ml/training/stub_provider.py +57 -35
  81. openadapt_ml/training/trainer.py +143 -86
  82. openadapt_ml/training/trl_trainer.py +70 -21
  83. openadapt_ml/training/viewer.py +323 -108
  84. openadapt_ml/training/viewer_components.py +180 -0
  85. {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +215 -14
  86. openadapt_ml-0.2.1.dist-info/RECORD +116 -0
  87. openadapt_ml/benchmarks/base.py +0 -366
  88. openadapt_ml/benchmarks/data_collection.py +0 -432
  89. openadapt_ml/benchmarks/live_tracker.py +0 -180
  90. openadapt_ml/benchmarks/runner.py +0 -418
  91. openadapt_ml/benchmarks/waa.py +0 -761
  92. openadapt_ml/benchmarks/waa_live.py +0 -619
  93. openadapt_ml-0.2.0.dist-info/RECORD +0 -86
  94. {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
  95. {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,674 @@
1
+ """CLI commands for workflow segmentation.
2
+
3
+ This module provides command-line interface for the segmentation pipeline.
4
+ """
5
+
6
+ import json
7
+ import logging
8
+ from pathlib import Path
9
+
10
+ import click
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ @click.group()
16
+ def segment():
17
+ """Workflow segmentation commands."""
18
+ pass
19
+
20
+
21
+ @segment.command("describe")
22
+ @click.option(
23
+ "--recording", "-r", required=True, multiple=True, help="Recording to describe"
24
+ )
25
+ @click.option("--model", "-m", default="gemini-2.0-flash", help="VLM model")
26
+ @click.option("--batch-size", "-b", default=10, help="Frames per API call")
27
+ @click.option("--output", "-o", help="Output file for transcript")
28
+ @click.option(
29
+ "--format",
30
+ "-f",
31
+ type=click.Choice(["text", "json"]),
32
+ default="text",
33
+ help="Output format",
34
+ )
35
+ @click.option("--no-cache", is_flag=True, help="Disable caching")
36
+ @click.option("--verbose", "-v", is_flag=True, help="Show detailed progress")
37
+ def describe(recording, model, batch_size, output, format, no_cache, verbose):
38
+ """Generate frame descriptions for a recording (Stage 1)."""
39
+ from openadapt_ml.segmentation.frame_describer import FrameDescriber
40
+
41
+ if verbose:
42
+ logging.basicConfig(level=logging.INFO)
43
+
44
+ describer = FrameDescriber(
45
+ model=model,
46
+ batch_size=batch_size,
47
+ cache_enabled=not no_cache,
48
+ )
49
+
50
+ for rec_path in recording:
51
+ click.echo(f"Processing: {rec_path}")
52
+ transcript = describer.describe_recording(rec_path)
53
+
54
+ if output:
55
+ output_path = Path(output)
56
+ if len(recording) > 1:
57
+ output_path = (
58
+ output_path.parent / f"{Path(rec_path).stem}_{output_path.name}"
59
+ )
60
+
61
+ if format == "json":
62
+ output_path.write_text(transcript.model_dump_json(indent=2))
63
+ else:
64
+ output_path.write_text(transcript.to_transcript_text())
65
+ click.echo(f" Saved to: {output_path}")
66
+ else:
67
+ if format == "json":
68
+ click.echo(transcript.model_dump_json(indent=2))
69
+ else:
70
+ click.echo(transcript.to_transcript_text())
71
+
72
+
73
+ @segment.command("extract")
74
+ @click.option("--recording", "-r", help="Recording to segment")
75
+ @click.option("--transcript", "-t", help="Existing transcript file")
76
+ @click.option("--model", "-m", default="gpt-4o", help="LLM model")
77
+ @click.option("--hierarchical", "-h", is_flag=True, help="Extract nested segments")
78
+ @click.option("--no-few-shot", is_flag=True, help="Disable few-shot examples")
79
+ @click.option("--min-duration", default=2.0, help="Minimum segment length (seconds)")
80
+ @click.option("--max-duration", default=300.0, help="Maximum segment length (seconds)")
81
+ @click.option("--output", "-o", help="Output file for segments")
82
+ @click.option("--verbose", "-v", is_flag=True, help="Show detailed progress")
83
+ def extract(
84
+ recording,
85
+ transcript,
86
+ model,
87
+ hierarchical,
88
+ no_few_shot,
89
+ min_duration,
90
+ max_duration,
91
+ output,
92
+ verbose,
93
+ ):
94
+ """Extract workflow segments from a recording (Stage 2)."""
95
+ from openadapt_ml.segmentation.frame_describer import FrameDescriber
96
+ from openadapt_ml.segmentation.segment_extractor import SegmentExtractor
97
+ from openadapt_ml.segmentation.schemas import ActionTranscript
98
+
99
+ if verbose:
100
+ logging.basicConfig(level=logging.INFO)
101
+
102
+ if not recording and not transcript:
103
+ raise click.UsageError("Specify either --recording or --transcript")
104
+
105
+ # Load or generate transcript
106
+ if transcript:
107
+ data = json.loads(Path(transcript).read_text())
108
+ action_transcript = ActionTranscript.model_validate(data)
109
+ else:
110
+ describer = FrameDescriber()
111
+ action_transcript = describer.describe_recording(recording)
112
+
113
+ # Extract segments
114
+ extractor = SegmentExtractor(
115
+ model=model,
116
+ use_few_shot=not no_few_shot,
117
+ hierarchical=hierarchical,
118
+ min_segment_duration=min_duration,
119
+ max_segment_duration=max_duration,
120
+ )
121
+
122
+ result = extractor.extract_segments(action_transcript)
123
+
124
+ # Output
125
+ if output:
126
+ Path(output).write_text(result.model_dump_json(indent=2))
127
+ click.echo(f"Saved to: {output}")
128
+ else:
129
+ click.echo(f"\nFound {len(result.episodes)} episodes:")
130
+ for ep in result.episodes:
131
+ click.echo(
132
+ f" - {ep.name} ({ep.start_time_formatted} - {ep.end_time_formatted})"
133
+ )
134
+ click.echo(f" {ep.description[:80]}...")
135
+
136
+
137
+ @segment.command("deduplicate")
138
+ @click.argument("segments", nargs=-1)
139
+ @click.option("--input-dir", "-i", help="Directory with segment files")
140
+ @click.option("--threshold", "-t", default=0.85, help="Similarity threshold (0-1)")
141
+ @click.option(
142
+ "--embedding-model", default="text-embedding-3-large", help="Embedding model"
143
+ )
144
+ @click.option(
145
+ "--merge-strategy",
146
+ type=click.Choice(["centroid", "longest", "first"]),
147
+ default="centroid",
148
+ help="Merge strategy",
149
+ )
150
+ @click.option("--existing", "-e", help="Existing library to merge with")
151
+ @click.option("--output", "-o", required=True, help="Output library file")
152
+ @click.option(
153
+ "--local-embeddings", is_flag=True, help="Use local HuggingFace embeddings"
154
+ )
155
+ @click.option("--verbose", "-v", is_flag=True, help="Show clustering details")
156
+ def deduplicate(
157
+ segments,
158
+ input_dir,
159
+ threshold,
160
+ embedding_model,
161
+ merge_strategy,
162
+ existing,
163
+ output,
164
+ local_embeddings,
165
+ verbose,
166
+ ):
167
+ """Deduplicate segments across recordings (Stage 3)."""
168
+ from openadapt_ml.segmentation.deduplicator import WorkflowDeduplicator
169
+ from openadapt_ml.segmentation.schemas import (
170
+ EpisodeExtractionResult,
171
+ EpisodeLibrary,
172
+ )
173
+
174
+ if verbose:
175
+ logging.basicConfig(level=logging.INFO)
176
+
177
+ # Collect segment files
178
+ segment_files = list(segments)
179
+ if input_dir:
180
+ segment_files.extend(Path(input_dir).glob("*_episodes.json"))
181
+
182
+ if not segment_files:
183
+ raise click.UsageError("No segment files specified")
184
+
185
+ # Load extraction results
186
+ extraction_results = []
187
+ for seg_file in segment_files:
188
+ data = json.loads(Path(seg_file).read_text())
189
+ result = EpisodeExtractionResult.model_validate(data)
190
+ extraction_results.append(result)
191
+ click.echo(f"Loaded: {seg_file} ({len(result.episodes)} episodes)")
192
+
193
+ # Load existing library
194
+ existing_library = None
195
+ if existing:
196
+ data = json.loads(Path(existing).read_text())
197
+ existing_library = EpisodeLibrary.model_validate(data)
198
+ click.echo(
199
+ f"Merging with existing library ({existing_library.unique_episode_count} workflows)"
200
+ )
201
+
202
+ # Deduplicate
203
+ dedup = WorkflowDeduplicator(
204
+ threshold=threshold,
205
+ embedding_model=embedding_model,
206
+ merge_strategy=merge_strategy,
207
+ use_local_embeddings=local_embeddings,
208
+ )
209
+
210
+ library = dedup.deduplicate(extraction_results, existing_library)
211
+
212
+ # Save
213
+ Path(output).write_text(library.model_dump_json(indent=2))
214
+
215
+ click.echo("\nResults:")
216
+ click.echo(f" Total episodes: {library.total_episodes_extracted}")
217
+ click.echo(f" Unique workflows: {library.unique_episode_count}")
218
+ click.echo(f" Deduplication ratio: {library.deduplication_ratio:.1%}")
219
+ click.echo(f"\nSaved to: {output}")
220
+
221
+
222
+ @segment.command("pipeline")
223
+ @click.argument("recordings", nargs=-1)
224
+ @click.option("--vlm-model", default="gemini-2.0-flash", help="VLM for Stage 1")
225
+ @click.option("--llm-model", default="gpt-4o", help="LLM for Stage 2")
226
+ @click.option("--threshold", default=0.85, help="Dedup threshold for Stage 3")
227
+ @click.option("--output", "-o", required=True, help="Output directory or library file")
228
+ @click.option("--save-intermediate", is_flag=True, help="Save Stage 1/2 outputs")
229
+ @click.option("--resume", help="Resume from checkpoint directory")
230
+ @click.option("--existing", "-e", help="Existing library to merge with")
231
+ @click.option("--local-embeddings", is_flag=True, help="Use local embeddings")
232
+ @click.option("--verbose", "-v", is_flag=True, help="Detailed progress")
233
+ def pipeline(
234
+ recordings,
235
+ vlm_model,
236
+ llm_model,
237
+ threshold,
238
+ output,
239
+ save_intermediate,
240
+ resume,
241
+ existing,
242
+ local_embeddings,
243
+ verbose,
244
+ ):
245
+ """Run complete segmentation pipeline."""
246
+ from openadapt_ml.segmentation.pipeline import SegmentationPipeline, PipelineConfig
247
+ from openadapt_ml.segmentation.schemas import EpisodeLibrary
248
+
249
+ if verbose:
250
+ logging.basicConfig(level=logging.INFO)
251
+
252
+ config = PipelineConfig(
253
+ vlm_model=vlm_model,
254
+ llm_model=llm_model,
255
+ similarity_threshold=threshold,
256
+ use_local_embeddings=local_embeddings,
257
+ verbose=verbose,
258
+ )
259
+
260
+ pipeline = SegmentationPipeline(config)
261
+
262
+ # Determine output directory
263
+ output_path = Path(output)
264
+ if output_path.suffix == ".json":
265
+ output_dir = output_path.parent if save_intermediate else None
266
+ library_path = output_path
267
+ else:
268
+ output_dir = output_path
269
+ library_path = output_path / "episode_library.json"
270
+
271
+ # Load existing library
272
+ existing_library = None
273
+ if existing:
274
+ data = json.loads(Path(existing).read_text())
275
+ existing_library = EpisodeLibrary.model_validate(data)
276
+
277
+ # Run or resume
278
+ if resume:
279
+ result = pipeline.resume(resume, list(recordings) if recordings else None)
280
+ else:
281
+ if not recordings:
282
+ raise click.UsageError("Specify recordings to process")
283
+ result = pipeline.run(
284
+ list(recordings),
285
+ output_dir=output_dir,
286
+ existing_library=existing_library,
287
+ progress_callback=lambda stage, cur, tot: click.echo(
288
+ f" [{stage}] {cur}/{tot}"
289
+ )
290
+ if verbose
291
+ else None,
292
+ )
293
+
294
+ # Save final library if not already saved
295
+ if not save_intermediate and result.library:
296
+ library_path.parent.mkdir(parents=True, exist_ok=True)
297
+ library_path.write_text(result.library.model_dump_json(indent=2))
298
+
299
+ click.echo("\nPipeline complete:")
300
+ click.echo(f" Recordings processed: {result.recordings_processed}")
301
+ click.echo(f" Total episodes: {result.total_episodes_extracted}")
302
+ click.echo(f" Unique workflows: {result.unique_episodes}")
303
+ click.echo(f" Processing time: {result.processing_time_seconds:.1f}s")
304
+ click.echo(f"\nLibrary saved to: {library_path}")
305
+
306
+
307
+ @segment.command("list")
308
+ @click.option("--library", "-l", required=True, help="Library file to inspect")
309
+ @click.option("--details", "-d", is_flag=True, help="Show segment details")
310
+ @click.option("--app", "-a", help="Filter by application")
311
+ def list_segments(library, details, app):
312
+ """List existing segments and libraries."""
313
+ from openadapt_ml.segmentation.schemas import EpisodeLibrary
314
+
315
+ data = json.loads(Path(library).read_text())
316
+ lib = EpisodeLibrary.model_validate(data)
317
+
318
+ click.echo(f"Episode Library: {library}")
319
+ click.echo(f" Created: {lib.created_at}")
320
+ click.echo(f" Recordings: {lib.total_recordings_processed}")
321
+ click.echo(f" Total episodes: {lib.total_episodes_extracted}")
322
+ click.echo(f" Unique workflows: {lib.unique_episode_count}")
323
+ click.echo(f" Dedup ratio: {lib.deduplication_ratio:.1%}")
324
+
325
+ click.echo("\nWorkflows:")
326
+ for ep in lib.episodes:
327
+ # Filter by app if specified
328
+ # Note: CanonicalEpisode doesn't have application field directly
329
+ # Would need to track this from source episodes
330
+
331
+ click.echo(f"\n {ep.canonical_name}")
332
+ click.echo(f" Occurrences: {ep.occurrence_count}")
333
+ click.echo(
334
+ f" Recordings: {', '.join(ep.source_recordings[:3])}{'...' if len(ep.source_recordings) > 3 else ''}"
335
+ )
336
+
337
+ if details:
338
+ click.echo(f" Description: {ep.canonical_description[:100]}...")
339
+ click.echo(
340
+ f" Steps: {', '.join(ep.canonical_steps[:3])}{'...' if len(ep.canonical_steps) > 3 else ''}"
341
+ )
342
+
343
+
344
+ @segment.command("annotate")
345
+ @click.option("--episodes", "-e", required=True, help="Episodes JSON file from extract")
346
+ @click.option("--recording", "-r", required=True, help="Recording directory path")
347
+ @click.option(
348
+ "--model", "-m", default="gemini-2.0-flash", help="VLM model for annotation"
349
+ )
350
+ @click.option("--lookahead", default=10, help="Frames to analyze after episode end")
351
+ @click.option("--output", "-o", required=True, help="Output annotated library file")
352
+ @click.option("--verbose", "-v", is_flag=True, help="Show detailed progress")
353
+ def annotate(episodes, recording, model, lookahead, output, verbose):
354
+ """Annotate extracted episodes with VLM analysis.
355
+
356
+ This command analyzes episodes to determine if they are suitable
357
+ for training (gold) by examining the episode frames and frames
358
+ after the episode ends to detect failures.
359
+ """
360
+ from openadapt_ml.segmentation.annotator import EpisodeAnnotator
361
+ from openadapt_ml.segmentation.schemas import EpisodeExtractionResult
362
+
363
+ if verbose:
364
+ logging.basicConfig(level=logging.INFO)
365
+
366
+ # Load episodes
367
+ data = json.loads(Path(episodes).read_text())
368
+ extraction_result = EpisodeExtractionResult.model_validate(data)
369
+
370
+ click.echo(f"Loaded {len(extraction_result.episodes)} episodes from {episodes}")
371
+ click.echo(f"Using VLM: {model}")
372
+
373
+ # Create annotator
374
+ annotator = EpisodeAnnotator(
375
+ model=model,
376
+ lookahead_frames=lookahead,
377
+ )
378
+
379
+ # Annotate
380
+ def progress(current, total):
381
+ if verbose:
382
+ click.echo(f" Progress: {current}/{total}")
383
+
384
+ library = annotator.annotate_extraction_result(
385
+ extraction_result=extraction_result,
386
+ recording_path=recording,
387
+ progress_callback=progress,
388
+ )
389
+
390
+ # Save
391
+ Path(output).write_text(library.model_dump_json(indent=2))
392
+
393
+ click.echo("\nAnnotation complete:")
394
+ click.echo(f" Total episodes: {library.total_episodes}")
395
+ click.echo(f" Recommended as gold: {library.gold_count}")
396
+ click.echo(
397
+ f" Pending human review: {library.total_episodes - library.verified_count}"
398
+ )
399
+ click.echo(f"\nSaved to: {output}")
400
+ click.echo("\nNext step: Run 'segment review' to verify annotations")
401
+
402
+
403
+ @segment.command("review")
404
+ @click.option("--library", "-l", required=True, help="Annotated library file")
405
+ @click.option("--recording", "-r", help="Recording directory (for viewing frames)")
406
+ @click.option("--reviewer", default="human", help="Reviewer name/ID")
407
+ @click.option(
408
+ "--auto-approve-high-confidence", is_flag=True, help="Auto-approve confidence > 0.9"
409
+ )
410
+ @click.option("--output", "-o", help="Output file (defaults to overwriting input)")
411
+ def review(library, recording, reviewer, auto_approve_high_confidence, output):
412
+ """Interactive review of annotated episodes.
413
+
414
+ This command presents each annotation for human verification.
415
+ Reviewers can approve, reject, or edit each annotation.
416
+ """
417
+ from openadapt_ml.segmentation.schemas import AnnotatedEpisodeLibrary
418
+ from openadapt_ml.segmentation.annotator import verify_annotation
419
+
420
+ # Load library
421
+ data = json.loads(Path(library).read_text())
422
+ lib = AnnotatedEpisodeLibrary.model_validate(data)
423
+
424
+ click.echo(f"Loaded annotated library: {library}")
425
+ click.echo(f" Total episodes: {lib.total_episodes}")
426
+ click.echo(f" Already verified: {lib.verified_count}")
427
+ click.echo(f" Pending review: {lib.total_episodes - lib.verified_count}")
428
+
429
+ # Auto-approve high confidence if requested
430
+ if auto_approve_high_confidence:
431
+ auto_approved = 0
432
+ new_annotations = []
433
+ for ann in lib.annotations:
434
+ if not ann.human_verified and ann.confidence > 0.9 and ann.is_gold:
435
+ new_ann = verify_annotation(
436
+ ann,
437
+ is_gold=True,
438
+ notes="Auto-approved (confidence > 0.9)",
439
+ verified_by=f"{reviewer}_auto",
440
+ )
441
+ new_annotations.append(new_ann)
442
+ auto_approved += 1
443
+ else:
444
+ new_annotations.append(ann)
445
+ lib.annotations = new_annotations
446
+ click.echo(f"\nAuto-approved {auto_approved} high-confidence gold episodes")
447
+
448
+ # Get pending reviews
449
+ pending = lib.get_pending_review()
450
+
451
+ if not pending:
452
+ click.echo("\nNo episodes pending review!")
453
+ if output:
454
+ Path(output).write_text(lib.model_dump_json(indent=2))
455
+ click.echo(f"Saved to: {output}")
456
+ return
457
+
458
+ click.echo(f"\n{len(pending)} episodes to review:")
459
+ click.echo("Commands: [a]pprove, [r]eject, [s]kip, [n]otes, [q]uit\n")
460
+
461
+ # Interactive review
462
+ reviewed = 0
463
+ new_annotations = []
464
+ annotation_map = {a.annotation_id: a for a in lib.annotations}
465
+
466
+ for episode, annotation in pending:
467
+ click.echo("-" * 60)
468
+ click.echo(f"Episode: {episode.name}")
469
+ click.echo(f"Description: {episode.description}")
470
+ click.echo(
471
+ f"Time: {episode.start_time_formatted} - {episode.end_time_formatted}"
472
+ )
473
+ click.echo(f"Application: {episode.application}")
474
+ click.echo(f"Steps: {', '.join(episode.step_summaries[:5])}")
475
+ click.echo()
476
+ click.echo("VLM Assessment:")
477
+ click.echo(f" Is Gold: {annotation.is_gold}")
478
+ click.echo(f" Confidence: {annotation.confidence:.2f}")
479
+ if annotation.failure_signals:
480
+ click.echo(f" Failure Signals: {', '.join(annotation.failure_signals)}")
481
+ if annotation.exclusion_reason:
482
+ click.echo(f" Exclusion Reason: {annotation.exclusion_reason}")
483
+ click.echo()
484
+
485
+ while True:
486
+ choice = click.prompt(
487
+ "Action [a/r/s/n/q]",
488
+ type=click.Choice(["a", "r", "s", "n", "q"]),
489
+ default="s",
490
+ )
491
+
492
+ if choice == "a":
493
+ notes = click.prompt("Notes (optional)", default="", show_default=False)
494
+ new_ann = verify_annotation(
495
+ annotation,
496
+ is_gold=True,
497
+ notes=notes if notes else None,
498
+ verified_by=reviewer,
499
+ )
500
+ annotation_map[annotation.annotation_id] = new_ann
501
+ click.echo(" Approved as gold")
502
+ reviewed += 1
503
+ break
504
+
505
+ elif choice == "r":
506
+ reason = click.prompt("Rejection reason", default="Manual rejection")
507
+ new_ann = verify_annotation(
508
+ annotation,
509
+ is_gold=False,
510
+ notes=reason,
511
+ verified_by=reviewer,
512
+ )
513
+ annotation_map[annotation.annotation_id] = new_ann
514
+ click.echo(" Rejected")
515
+ reviewed += 1
516
+ break
517
+
518
+ elif choice == "s":
519
+ click.echo(" Skipped")
520
+ break
521
+
522
+ elif choice == "n":
523
+ notes = click.prompt("Add notes")
524
+ annotation.notes = notes
525
+ annotation_map[annotation.annotation_id] = annotation
526
+ click.echo(f" Notes added: {notes}")
527
+ # Continue to ask for a/r/s
528
+
529
+ elif choice == "q":
530
+ click.echo("\nQuitting review...")
531
+ break
532
+
533
+ if choice == "q":
534
+ break
535
+
536
+ # Update library with new annotations
537
+ lib.annotations = list(annotation_map.values())
538
+
539
+ # Save
540
+ output_path = Path(output) if output else Path(library)
541
+ output_path.write_text(lib.model_dump_json(indent=2))
542
+
543
+ click.echo("\nReview session complete:")
544
+ click.echo(f" Reviewed: {reviewed}")
545
+ click.echo(f" Total verified: {lib.verified_count}")
546
+ click.echo(f" Gold episodes: {lib.gold_count}")
547
+ click.echo(f" Export-ready: {lib.export_ready_count}")
548
+ click.echo(f"\nSaved to: {output_path}")
549
+
550
+
551
+ @segment.command("export-gold")
552
+ @click.argument("library")
553
+ @click.option(
554
+ "--format",
555
+ "-f",
556
+ type=click.Choice(["json", "jsonl", "hf"]),
557
+ default="jsonl",
558
+ help="Export format",
559
+ )
560
+ @click.option("--output", "-o", required=True, help="Output file/directory")
561
+ @click.option("--recording", "-r", help="Recording directory (for screenshots)")
562
+ @click.option(
563
+ "--include-screenshots", is_flag=True, help="Include screenshots in export"
564
+ )
565
+ def export_gold(library, format, output, recording, include_screenshots):
566
+ """Export verified gold episodes for fine-tuning.
567
+
568
+ Only exports episodes where is_gold=True AND human_verified=True.
569
+ """
570
+ from openadapt_ml.segmentation.schemas import AnnotatedEpisodeLibrary
571
+ from openadapt_ml.segmentation.annotator import export_gold_episodes
572
+
573
+ # Load library
574
+ data = json.loads(Path(library).read_text())
575
+ lib = AnnotatedEpisodeLibrary.model_validate(data)
576
+
577
+ click.echo(f"Loaded library: {library}")
578
+ click.echo(f" Export-ready episodes: {lib.export_ready_count}")
579
+
580
+ if lib.export_ready_count == 0:
581
+ click.echo("\nNo episodes ready for export!")
582
+ click.echo("Run 'segment review' first to verify annotations.")
583
+ return
584
+
585
+ # Export
586
+ count = export_gold_episodes(
587
+ library=lib,
588
+ output_path=output,
589
+ recording_path=recording,
590
+ format=format,
591
+ include_screenshots=include_screenshots,
592
+ )
593
+
594
+ click.echo(f"\nExported {count} gold episodes to: {output}")
595
+
596
+
597
+ @segment.command("export")
598
+ @click.argument("library")
599
+ @click.option(
600
+ "--format",
601
+ "-f",
602
+ type=click.Choice(["csv", "jsonl", "html"]),
603
+ default="jsonl",
604
+ help="Export format",
605
+ )
606
+ @click.option("--output", "-o", required=True, help="Output file")
607
+ @click.option("--workflow", "-w", help="Export specific workflow")
608
+ def export(library, format, output, workflow):
609
+ """Export segments to various formats."""
610
+ import csv
611
+ from openadapt_ml.segmentation.schemas import EpisodeLibrary
612
+
613
+ data = json.loads(Path(library).read_text())
614
+ lib = EpisodeLibrary.model_validate(data)
615
+
616
+ # Filter if specified
617
+ episodes = lib.episodes
618
+ if workflow:
619
+ episodes = [e for e in episodes if workflow.lower() in e.canonical_name.lower()]
620
+
621
+ output_path = Path(output)
622
+ output_path.parent.mkdir(parents=True, exist_ok=True)
623
+
624
+ if format == "csv":
625
+ with open(output_path, "w", newline="") as f:
626
+ writer = csv.writer(f)
627
+ writer.writerow(
628
+ ["name", "description", "steps", "occurrences", "recordings"]
629
+ )
630
+ for ep in episodes:
631
+ writer.writerow(
632
+ [
633
+ ep.canonical_name,
634
+ ep.canonical_description,
635
+ "; ".join(ep.canonical_steps),
636
+ ep.occurrence_count,
637
+ ", ".join(ep.source_recordings),
638
+ ]
639
+ )
640
+
641
+ elif format == "jsonl":
642
+ with open(output_path, "w") as f:
643
+ for ep in episodes:
644
+ f.write(ep.model_dump_json() + "\n")
645
+
646
+ elif format == "html":
647
+ html = ["<html><head><style>"]
648
+ html.append("body { font-family: sans-serif; margin: 2em; }")
649
+ html.append(
650
+ ".workflow { border: 1px solid #ccc; padding: 1em; margin: 1em 0; }"
651
+ )
652
+ html.append(".steps { margin-left: 2em; }")
653
+ html.append("</style></head><body>")
654
+ html.append("<h1>Episode Library</h1>")
655
+ html.append(f"<p>{len(episodes)} workflows</p>")
656
+
657
+ for ep in episodes:
658
+ html.append('<div class="workflow">')
659
+ html.append(f"<h2>{ep.canonical_name}</h2>")
660
+ html.append(f"<p>{ep.canonical_description}</p>")
661
+ html.append(f"<p><strong>Occurrences:</strong> {ep.occurrence_count}</p>")
662
+ html.append('<div class="steps"><strong>Steps:</strong><ol>')
663
+ for step in ep.canonical_steps:
664
+ html.append(f"<li>{step}</li>")
665
+ html.append("</ol></div></div>")
666
+
667
+ html.append("</body></html>")
668
+ output_path.write_text("\n".join(html))
669
+
670
+ click.echo(f"Exported {len(episodes)} workflows to: {output_path}")
671
+
672
+
673
+ if __name__ == "__main__":
674
+ segment()