openadapt-ml 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. openadapt_ml/baselines/__init__.py +121 -0
  2. openadapt_ml/baselines/adapter.py +185 -0
  3. openadapt_ml/baselines/cli.py +314 -0
  4. openadapt_ml/baselines/config.py +448 -0
  5. openadapt_ml/baselines/parser.py +922 -0
  6. openadapt_ml/baselines/prompts.py +787 -0
  7. openadapt_ml/benchmarks/__init__.py +13 -115
  8. openadapt_ml/benchmarks/agent.py +265 -421
  9. openadapt_ml/benchmarks/azure.py +28 -19
  10. openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
  11. openadapt_ml/benchmarks/cli.py +1722 -4847
  12. openadapt_ml/benchmarks/trace_export.py +631 -0
  13. openadapt_ml/benchmarks/viewer.py +22 -5
  14. openadapt_ml/benchmarks/vm_monitor.py +530 -29
  15. openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
  16. openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
  17. openadapt_ml/cloud/azure_inference.py +3 -5
  18. openadapt_ml/cloud/lambda_labs.py +722 -307
  19. openadapt_ml/cloud/local.py +2038 -487
  20. openadapt_ml/cloud/ssh_tunnel.py +68 -26
  21. openadapt_ml/datasets/next_action.py +40 -30
  22. openadapt_ml/evals/grounding.py +8 -3
  23. openadapt_ml/evals/plot_eval_metrics.py +15 -13
  24. openadapt_ml/evals/trajectory_matching.py +41 -26
  25. openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
  26. openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
  27. openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
  28. openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
  29. openadapt_ml/experiments/representation_shootout/config.py +390 -0
  30. openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
  31. openadapt_ml/experiments/representation_shootout/runner.py +687 -0
  32. openadapt_ml/experiments/waa_demo/runner.py +29 -14
  33. openadapt_ml/export/parquet.py +36 -24
  34. openadapt_ml/grounding/detector.py +18 -14
  35. openadapt_ml/ingest/__init__.py +8 -6
  36. openadapt_ml/ingest/capture.py +25 -22
  37. openadapt_ml/ingest/loader.py +7 -4
  38. openadapt_ml/ingest/synthetic.py +189 -100
  39. openadapt_ml/models/api_adapter.py +14 -4
  40. openadapt_ml/models/base_adapter.py +10 -2
  41. openadapt_ml/models/providers/__init__.py +288 -0
  42. openadapt_ml/models/providers/anthropic.py +266 -0
  43. openadapt_ml/models/providers/base.py +299 -0
  44. openadapt_ml/models/providers/google.py +376 -0
  45. openadapt_ml/models/providers/openai.py +342 -0
  46. openadapt_ml/models/qwen_vl.py +46 -19
  47. openadapt_ml/perception/__init__.py +35 -0
  48. openadapt_ml/perception/integration.py +399 -0
  49. openadapt_ml/retrieval/demo_retriever.py +50 -24
  50. openadapt_ml/retrieval/embeddings.py +9 -8
  51. openadapt_ml/retrieval/retriever.py +3 -1
  52. openadapt_ml/runtime/__init__.py +50 -0
  53. openadapt_ml/runtime/policy.py +18 -5
  54. openadapt_ml/runtime/safety_gate.py +471 -0
  55. openadapt_ml/schema/__init__.py +9 -0
  56. openadapt_ml/schema/converters.py +74 -27
  57. openadapt_ml/schema/episode.py +31 -18
  58. openadapt_ml/scripts/capture_screenshots.py +530 -0
  59. openadapt_ml/scripts/compare.py +85 -54
  60. openadapt_ml/scripts/demo_policy.py +4 -1
  61. openadapt_ml/scripts/eval_policy.py +15 -9
  62. openadapt_ml/scripts/make_gif.py +1 -1
  63. openadapt_ml/scripts/prepare_synthetic.py +3 -1
  64. openadapt_ml/scripts/train.py +21 -9
  65. openadapt_ml/segmentation/README.md +920 -0
  66. openadapt_ml/segmentation/__init__.py +97 -0
  67. openadapt_ml/segmentation/adapters/__init__.py +5 -0
  68. openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
  69. openadapt_ml/segmentation/annotator.py +610 -0
  70. openadapt_ml/segmentation/cache.py +290 -0
  71. openadapt_ml/segmentation/cli.py +674 -0
  72. openadapt_ml/segmentation/deduplicator.py +656 -0
  73. openadapt_ml/segmentation/frame_describer.py +788 -0
  74. openadapt_ml/segmentation/pipeline.py +340 -0
  75. openadapt_ml/segmentation/schemas.py +622 -0
  76. openadapt_ml/segmentation/segment_extractor.py +634 -0
  77. openadapt_ml/training/azure_ops_viewer.py +1097 -0
  78. openadapt_ml/training/benchmark_viewer.py +52 -41
  79. openadapt_ml/training/shared_ui.py +7 -7
  80. openadapt_ml/training/stub_provider.py +57 -35
  81. openadapt_ml/training/trainer.py +143 -86
  82. openadapt_ml/training/trl_trainer.py +70 -21
  83. openadapt_ml/training/viewer.py +323 -108
  84. openadapt_ml/training/viewer_components.py +180 -0
  85. {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +215 -14
  86. openadapt_ml-0.2.1.dist-info/RECORD +116 -0
  87. openadapt_ml/benchmarks/base.py +0 -366
  88. openadapt_ml/benchmarks/data_collection.py +0 -432
  89. openadapt_ml/benchmarks/live_tracker.py +0 -180
  90. openadapt_ml/benchmarks/runner.py +0 -418
  91. openadapt_ml/benchmarks/waa.py +0 -761
  92. openadapt_ml/benchmarks/waa_live.py +0 -619
  93. openadapt_ml-0.2.0.dist-info/RECORD +0 -86
  94. {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
  95. {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,97 @@
1
+ """Workflow segmentation module for OpenAdapt.
2
+
3
+ This module provides a three-stage pipeline for extracting and deduplicating
4
+ workflow episodes from GUI recordings:
5
+
6
+ 1. **Stage 1 - Frame Description (VLM)**: Generate semantic descriptions
7
+ of each frame + action pair using Vision-Language Models
8
+
9
+ 2. **Stage 2 - Episode Extraction (LLM)**: Identify coherent workflow
10
+ boundaries and extract episodes using Large Language Models
11
+
12
+ 3. **Stage 3 - Deduplication (Embeddings)**: Find and merge similar
13
+ episodes across recordings using embedding similarity
14
+
15
+ Example usage:
16
+ >>> from openadapt_ml.segmentation import SegmentationPipeline
17
+ >>> pipeline = SegmentationPipeline()
18
+ >>> result = pipeline.run(
19
+ ... recordings=["recording1/", "recording2/"],
20
+ ... output_dir="segments/",
21
+ ... )
22
+ >>> print(f"Found {result.unique_episodes} unique workflows")
23
+ """
24
+
25
+ from openadapt_ml.segmentation.schemas import (
26
+ ActionTranscript,
27
+ ActionType,
28
+ AnnotatedEpisodeLibrary,
29
+ CanonicalEpisode,
30
+ Episode,
31
+ EpisodeAnnotation,
32
+ EpisodeBoundary,
33
+ EpisodeExtractionResult,
34
+ EpisodeLibrary,
35
+ EpisodeStep,
36
+ FrameDescription,
37
+ )
38
+ from openadapt_ml.segmentation.frame_describer import (
39
+ FrameDescriber,
40
+ VLMBackend,
41
+ GeminiBackend,
42
+ ClaudeBackend,
43
+ OpenAIBackend,
44
+ )
45
+ from openadapt_ml.segmentation.segment_extractor import SegmentExtractor
46
+ from openadapt_ml.segmentation.deduplicator import (
47
+ WorkflowDeduplicator,
48
+ OpenAIEmbedder,
49
+ LocalEmbedder,
50
+ episode_to_text,
51
+ )
52
+ from openadapt_ml.segmentation.pipeline import (
53
+ SegmentationPipeline,
54
+ PipelineConfig,
55
+ PipelineResult,
56
+ )
57
+ from openadapt_ml.segmentation.annotator import (
58
+ EpisodeAnnotator,
59
+ verify_annotation,
60
+ export_gold_episodes,
61
+ )
62
+
63
+ __all__ = [
64
+ # Schemas
65
+ "ActionTranscript",
66
+ "ActionType",
67
+ "AnnotatedEpisodeLibrary",
68
+ "CanonicalEpisode",
69
+ "Episode",
70
+ "EpisodeAnnotation",
71
+ "EpisodeBoundary",
72
+ "EpisodeExtractionResult",
73
+ "EpisodeLibrary",
74
+ "EpisodeStep",
75
+ "FrameDescription",
76
+ # Frame Describer (Stage 1)
77
+ "FrameDescriber",
78
+ "VLMBackend",
79
+ "GeminiBackend",
80
+ "ClaudeBackend",
81
+ "OpenAIBackend",
82
+ # Segment Extractor (Stage 2)
83
+ "SegmentExtractor",
84
+ # Deduplicator (Stage 3)
85
+ "WorkflowDeduplicator",
86
+ "OpenAIEmbedder",
87
+ "LocalEmbedder",
88
+ "episode_to_text",
89
+ # Pipeline
90
+ "SegmentationPipeline",
91
+ "PipelineConfig",
92
+ "PipelineResult",
93
+ # Annotation (Stage 4)
94
+ "EpisodeAnnotator",
95
+ "verify_annotation",
96
+ "export_gold_episodes",
97
+ ]
@@ -0,0 +1,5 @@
1
+ """Adapters for loading recordings from different formats."""
2
+
3
+ from openadapt_ml.segmentation.adapters.capture_adapter import CaptureAdapter
4
+
5
+ __all__ = ["CaptureAdapter"]
@@ -0,0 +1,420 @@
1
+ """Adapter for openadapt-capture SQLite database format.
2
+
3
+ This adapter loads recordings from the openadapt-capture format
4
+ (capture.db SQLite database) and converts them to the format
5
+ expected by the segmentation pipeline.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ import sqlite3
11
+ from pathlib import Path
12
+ from typing import Optional
13
+
14
+ from PIL import Image
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class CaptureAdapter:
20
+ """Adapter for openadapt-capture SQLite format.
21
+
22
+ The openadapt-capture tool stores recordings in a SQLite database
23
+ (capture.db) with the following structure:
24
+ - capture table: Recording metadata
25
+ - events table: Action events (click, type, scroll, etc.)
26
+ - screenshots/: Directory with PNG files
27
+
28
+ This adapter converts that format to the tuple of (images, events)
29
+ expected by FrameDescriber.
30
+ """
31
+
32
+ # Event types to include in segmentation (actual openadapt-capture types)
33
+ RELEVANT_EVENT_TYPES = {
34
+ "mouse.down",
35
+ "mouse.up",
36
+ "key.down",
37
+ "key.up",
38
+ "mouse.move",
39
+ "screen.frame", # Frame captures (maps to screenshots)
40
+ }
41
+
42
+ def __init__(
43
+ self,
44
+ include_moves: bool = False,
45
+ min_move_distance: float = 50.0,
46
+ ):
47
+ """Initialize the adapter.
48
+
49
+ Args:
50
+ include_moves: Whether to include mouse move events (can be noisy)
51
+ min_move_distance: Minimum pixel distance for move events
52
+ """
53
+ self.include_moves = include_moves
54
+ self.min_move_distance = min_move_distance
55
+
56
+ def load_recording(
57
+ self,
58
+ capture_path: Path,
59
+ ) -> tuple[list[Image.Image], list[dict]]:
60
+ """Load recording from capture.db format.
61
+
62
+ Args:
63
+ capture_path: Path to recording directory with capture.db
64
+
65
+ Returns:
66
+ Tuple of (images, action_events) where:
67
+ - images: List of PIL Images in chronological order
68
+ - action_events: List of dicts with event data
69
+
70
+ Raises:
71
+ FileNotFoundError: If capture.db doesn't exist
72
+ ValueError: If database format is invalid
73
+ """
74
+ db_path = capture_path / "capture.db"
75
+ if not db_path.exists():
76
+ raise FileNotFoundError(f"capture.db not found in {capture_path}")
77
+
78
+ screenshots_dir = capture_path / "screenshots"
79
+ if not screenshots_dir.exists():
80
+ raise FileNotFoundError(
81
+ f"screenshots directory not found in {capture_path}"
82
+ )
83
+
84
+ # Connect to SQLite
85
+ conn = sqlite3.connect(db_path)
86
+ conn.row_factory = sqlite3.Row # Access columns by name
87
+ cursor = conn.cursor()
88
+
89
+ # Get capture metadata
90
+ cursor.execute("SELECT * FROM capture LIMIT 1")
91
+ capture_row = cursor.fetchone()
92
+ if not capture_row:
93
+ raise ValueError("No capture record found in database")
94
+
95
+ capture_metadata = dict(capture_row)
96
+ started_at = capture_metadata["started_at"]
97
+
98
+ # Get all screen.frame events (these define our frames)
99
+ cursor.execute(
100
+ """
101
+ SELECT id, timestamp, type, data
102
+ FROM events
103
+ WHERE type = 'screen.frame'
104
+ ORDER BY timestamp
105
+ """
106
+ )
107
+
108
+ frame_events = cursor.fetchall()
109
+ logger.info(f"Found {len(frame_events)} screen.frame events")
110
+
111
+ # Get all action events (mouse, key)
112
+ cursor.execute(
113
+ """
114
+ SELECT id, timestamp, type, data
115
+ FROM events
116
+ WHERE type IN ('mouse.down', 'mouse.up', 'key.down', 'key.up', 'mouse.move')
117
+ ORDER BY timestamp
118
+ """
119
+ )
120
+
121
+ action_events = cursor.fetchall()
122
+ logger.info(f"Found {len(action_events)} action events")
123
+
124
+ # Pair action events (down+up → single action)
125
+ paired_actions = self._pair_action_events(action_events, started_at)
126
+ logger.info(f"Paired into {len(paired_actions)} actions")
127
+
128
+ # Load screenshot files
129
+ screenshot_files = self._get_screenshot_files(screenshots_dir)
130
+ logger.info(f"Found {len(screenshot_files)} screenshot files")
131
+
132
+ # Build frame list with corresponding actions
133
+ images = []
134
+ events = []
135
+
136
+ for frame_idx, frame_row in enumerate(frame_events):
137
+ frame_timestamp = frame_row["timestamp"]
138
+
139
+ # Find screenshot
140
+ screenshot_path = screenshot_files.get(frame_idx)
141
+ if not screenshot_path:
142
+ logger.warning(f"No screenshot found for frame {frame_idx}")
143
+ continue
144
+
145
+ try:
146
+ # Load image
147
+ images.append(Image.open(screenshot_path))
148
+
149
+ # Find action closest to this frame (within reasonable window)
150
+ frame_relative_time = frame_timestamp - started_at
151
+ closest_action = self._find_closest_action(
152
+ paired_actions, frame_relative_time, window=2.0
153
+ )
154
+
155
+ if closest_action:
156
+ # Use action details
157
+ event = {
158
+ "timestamp": frame_relative_time,
159
+ "frame_index": frame_idx,
160
+ "name": closest_action["type"],
161
+ **closest_action.get("extra", {}),
162
+ }
163
+ else:
164
+ # No action, create a frame-only event
165
+ event = {
166
+ "timestamp": frame_relative_time,
167
+ "frame_index": frame_idx,
168
+ "name": "frame",
169
+ }
170
+
171
+ events.append(event)
172
+
173
+ except Exception as e:
174
+ logger.warning(f"Failed to load screenshot {screenshot_path}: {e}")
175
+
176
+ conn.close()
177
+
178
+ if not images:
179
+ raise ValueError(f"No screenshots loaded from {capture_path}")
180
+
181
+ logger.info(
182
+ f"Loaded {len(images)} frames with {len(events)} events from {capture_path}"
183
+ )
184
+ return images, events
185
+
186
+ def _get_screenshot_files(self, screenshots_dir: Path) -> dict[int, Path]:
187
+ """Get mapping of frame indices to screenshot files.
188
+
189
+ openadapt-capture uses format: capture_{id}_step_{n}.png
190
+
191
+ Args:
192
+ screenshots_dir: Path to screenshots directory
193
+
194
+ Returns:
195
+ Dict mapping frame index to file path
196
+ """
197
+ files = {}
198
+ for png_file in screenshots_dir.glob("*.png"):
199
+ # Parse format: capture_31807990_step_0.png
200
+ parts = png_file.stem.split("_")
201
+ if len(parts) >= 4 and parts[-2] == "step":
202
+ try:
203
+ step_num = int(parts[-1])
204
+ files[step_num] = png_file
205
+ except ValueError:
206
+ logger.warning(f"Could not parse step number from {png_file.name}")
207
+
208
+ return files
209
+
210
+ def _find_screenshot(
211
+ self,
212
+ screenshot_files: dict[int, Path],
213
+ frame_index: int,
214
+ event_id: Optional[int] = None,
215
+ ) -> Optional[Path]:
216
+ """Find screenshot file for frame index.
217
+
218
+ Args:
219
+ screenshot_files: Mapping of frame indices to paths
220
+ frame_index: Current frame index
221
+ event_id: Event ID (unused but kept for future)
222
+
223
+ Returns:
224
+ Path to screenshot or None if not found
225
+ """
226
+ return screenshot_files.get(frame_index)
227
+
228
+ def _convert_event(
229
+ self,
230
+ event_type: str,
231
+ timestamp: float,
232
+ frame_index: int,
233
+ data: dict,
234
+ ) -> dict:
235
+ """Convert openadapt-capture event to segmentation format.
236
+
237
+ Args:
238
+ event_type: Event type (click, type, scroll, etc.)
239
+ timestamp: Timestamp in seconds (relative to recording start)
240
+ frame_index: Frame index in sequence
241
+ data: Event data dictionary
242
+
243
+ Returns:
244
+ Event dict in expected format
245
+ """
246
+ event = {
247
+ "timestamp": timestamp,
248
+ "frame_index": frame_index,
249
+ "name": event_type,
250
+ }
251
+
252
+ # Add coordinates if present
253
+ if "x" in data and "y" in data:
254
+ event["mouse_x"] = data["x"]
255
+ event["mouse_y"] = data["y"]
256
+
257
+ # Add text for typing events
258
+ if event_type in ("type", "key"):
259
+ event["text"] = data.get("text") or data.get("key")
260
+
261
+ # Add scroll direction
262
+ if event_type == "scroll":
263
+ event["scroll_dx"] = data.get("dx", 0)
264
+ event["scroll_dy"] = data.get("dy", 0)
265
+
266
+ # Add drag endpoints
267
+ if event_type == "drag":
268
+ event["start_x"] = data.get("start_x")
269
+ event["start_y"] = data.get("start_y")
270
+ event["end_x"] = data.get("end_x")
271
+ event["end_y"] = data.get("end_y")
272
+
273
+ return event
274
+
275
+ def _pair_action_events(self, action_events: list, started_at: float) -> list[dict]:
276
+ """Pair mouse.down+up and key.down+up events into single actions.
277
+
278
+ Args:
279
+ action_events: List of SQLite Row objects with action events
280
+ started_at: Recording start timestamp
281
+
282
+ Returns:
283
+ List of paired action dicts with type, timestamp, duration, and data
284
+ """
285
+ paired = []
286
+ pending_down = {} # type -> (event, timestamp, data)
287
+
288
+ for row in action_events:
289
+ event_type = row["type"]
290
+ timestamp = row["timestamp"] - started_at # Relative
291
+ data_json = row["data"]
292
+
293
+ try:
294
+ data = json.loads(data_json) if data_json else {}
295
+ except json.JSONDecodeError:
296
+ logger.warning(f"Failed to parse JSON for event {row['id']}")
297
+ continue
298
+
299
+ # Handle down events
300
+ if event_type.endswith(".down"):
301
+ base_type = event_type[:-5] # Remove '.down' → 'mouse' or 'key'
302
+ pending_down[base_type] = (event_type, timestamp, data)
303
+
304
+ # Handle up events
305
+ elif event_type.endswith(".up"):
306
+ base_type = event_type[:-3] # Remove '.up'
307
+
308
+ if base_type in pending_down:
309
+ # Found matching down event
310
+ down_type, down_timestamp, down_data = pending_down.pop(base_type)
311
+ duration = timestamp - down_timestamp
312
+
313
+ # Create paired action
314
+ if base_type == "mouse":
315
+ action = {
316
+ "type": "click",
317
+ "timestamp": down_timestamp,
318
+ "duration": duration,
319
+ "extra": {
320
+ "mouse_x": down_data.get("x"),
321
+ "mouse_y": down_data.get("y"),
322
+ "button": down_data.get("button", "left"),
323
+ },
324
+ }
325
+ elif base_type == "key":
326
+ action = {
327
+ "type": "key",
328
+ "timestamp": down_timestamp,
329
+ "duration": duration,
330
+ "extra": {
331
+ "text": down_data.get("key") or down_data.get("text"),
332
+ "key": down_data.get("key"),
333
+ },
334
+ }
335
+ else:
336
+ continue
337
+
338
+ paired.append(action)
339
+ else:
340
+ # Unpaired up event (shouldn't happen, but log it)
341
+ logger.debug(f"Unpaired {event_type} event at {timestamp}")
342
+
343
+ # Handle mouse.move (if configured to include)
344
+ elif event_type == "mouse.move" and self.include_moves:
345
+ action = {
346
+ "type": "move",
347
+ "timestamp": timestamp,
348
+ "duration": 0.0,
349
+ "extra": {
350
+ "mouse_x": data.get("x"),
351
+ "mouse_y": data.get("y"),
352
+ },
353
+ }
354
+ paired.append(action)
355
+
356
+ # Log any unpaired down events
357
+ for base_type, (down_type, down_timestamp, down_data) in pending_down.items():
358
+ logger.debug(f"Unpaired {down_type} event at {down_timestamp}")
359
+
360
+ return paired
361
+
362
+ def _find_closest_action(
363
+ self, paired_actions: list[dict], frame_time: float, window: float = 2.0
364
+ ) -> Optional[dict]:
365
+ """Find action closest to a given frame time.
366
+
367
+ Args:
368
+ paired_actions: List of paired action dicts
369
+ frame_time: Frame timestamp (relative to recording start)
370
+ window: Maximum time distance in seconds to consider
371
+
372
+ Returns:
373
+ Closest action dict or None if no action within window
374
+ """
375
+ closest_action = None
376
+ closest_distance = float("inf")
377
+
378
+ for action in paired_actions:
379
+ distance = abs(action["timestamp"] - frame_time)
380
+ if distance < closest_distance and distance <= window:
381
+ closest_distance = distance
382
+ closest_action = action
383
+
384
+ return closest_action
385
+
386
+ def get_capture_metadata(self, capture_path: Path) -> dict:
387
+ """Get recording metadata from capture.db.
388
+
389
+ Args:
390
+ capture_path: Path to recording directory
391
+
392
+ Returns:
393
+ Dict with capture metadata (task_description, platform, etc.)
394
+ """
395
+ db_path = capture_path / "capture.db"
396
+ if not db_path.exists():
397
+ raise FileNotFoundError(f"capture.db not found in {capture_path}")
398
+
399
+ conn = sqlite3.connect(db_path)
400
+ conn.row_factory = sqlite3.Row
401
+ cursor = conn.cursor()
402
+
403
+ cursor.execute("SELECT * FROM capture LIMIT 1")
404
+ row = cursor.fetchone()
405
+ conn.close()
406
+
407
+ if not row:
408
+ raise ValueError("No capture record found")
409
+
410
+ metadata = dict(row)
411
+
412
+ # Parse JSON metadata field if present
413
+ if "metadata" in metadata and metadata["metadata"]:
414
+ try:
415
+ extra_metadata = json.loads(metadata["metadata"])
416
+ metadata.update(extra_metadata)
417
+ except json.JSONDecodeError:
418
+ pass
419
+
420
+ return metadata