davinci-resolve-mcp 2.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/AGENTS.md +85 -0
  2. package/CHANGELOG.md +802 -0
  3. package/CLAUDE.md +15 -0
  4. package/LICENSE +21 -0
  5. package/README.md +159 -0
  6. package/SECURITY.md +53 -0
  7. package/bin/davinci-resolve-mcp.mjs +376 -0
  8. package/docs/README.md +56 -0
  9. package/docs/SKILL.md +1145 -0
  10. package/docs/authoring/fuse-dctl-authoring.md +242 -0
  11. package/docs/authoring/script-plugin-authoring.md +195 -0
  12. package/docs/contributing.md +82 -0
  13. package/docs/guides/color-decision-guide.md +387 -0
  14. package/docs/guides/editorial-decision-guide.md +136 -0
  15. package/docs/guides/media-analysis-guide.md +615 -0
  16. package/docs/guides/multicam-setup-guide.md +138 -0
  17. package/docs/install.md +198 -0
  18. package/docs/integrations/workflow-integrations.md +120 -0
  19. package/docs/kernels/README.md +28 -0
  20. package/docs/kernels/audio-fairlight-kernel.md +86 -0
  21. package/docs/kernels/color-grade-kernel.md +103 -0
  22. package/docs/kernels/extension-authoring-kernel.md +101 -0
  23. package/docs/kernels/fusion-composition-kernel.md +91 -0
  24. package/docs/kernels/media-pool-ingest-kernel.md +147 -0
  25. package/docs/kernels/project-lifecycle-kernel.md +120 -0
  26. package/docs/kernels/render-deliver-kernel.md +92 -0
  27. package/docs/kernels/review-annotation-kernel.md +110 -0
  28. package/docs/kernels/timeline-conform-interchange-kernel.md +99 -0
  29. package/docs/kernels/timeline-edit-kernel.md +189 -0
  30. package/docs/notes/codec-plugin-notes.md +136 -0
  31. package/docs/notes/dctl-notes.md +234 -0
  32. package/docs/notes/fusion-template-notes.md +136 -0
  33. package/docs/notes/lut-notes.md +136 -0
  34. package/docs/notes/openfx-notes.md +120 -0
  35. package/docs/process/release-process.md +152 -0
  36. package/docs/reference/api-coverage.md +488 -0
  37. package/docs/reference/resolve_scripting_api.txt +1012 -0
  38. package/examples/README.md +53 -0
  39. package/examples/markers/README.md +81 -0
  40. package/examples/media/README.md +94 -0
  41. package/examples/timeline/README.md +98 -0
  42. package/install.py +1196 -0
  43. package/package.json +52 -0
  44. package/scripts/audit_api_parity.py +275 -0
  45. package/scripts/live_media_analysis_polish_probe.py +65 -0
  46. package/src/__init__.py +3 -0
  47. package/src/analysis_dashboard.py +4936 -0
  48. package/src/control_panel.py +13 -0
  49. package/src/granular/__init__.py +17 -0
  50. package/src/granular/common.py +727 -0
  51. package/src/granular/folder.py +287 -0
  52. package/src/granular/gallery.py +306 -0
  53. package/src/granular/graph.py +309 -0
  54. package/src/granular/media_pool.py +679 -0
  55. package/src/granular/media_pool_item.py +852 -0
  56. package/src/granular/media_storage.py +179 -0
  57. package/src/granular/project.py +1594 -0
  58. package/src/granular/resolve_control.py +521 -0
  59. package/src/granular/timeline.py +1074 -0
  60. package/src/granular/timeline_item.py +2251 -0
  61. package/src/resolve_mcp_server.py +43 -0
  62. package/src/server.py +15691 -0
  63. package/src/utils/__init__.py +3 -0
  64. package/src/utils/app_control.py +319 -0
  65. package/src/utils/audio_fairlight_live_probe.py +263 -0
  66. package/src/utils/cdl.py +20 -0
  67. package/src/utils/cloud_operations.py +192 -0
  68. package/src/utils/color_grade_live_probe.py +444 -0
  69. package/src/utils/dctl_templates.py +368 -0
  70. package/src/utils/extension_authoring_live_probe.py +292 -0
  71. package/src/utils/fuse_templates.py +1968 -0
  72. package/src/utils/fusion_composition_live_probe.py +284 -0
  73. package/src/utils/layout_presets.py +333 -0
  74. package/src/utils/mcp_stdio.py +32 -0
  75. package/src/utils/media_analysis.py +3618 -0
  76. package/src/utils/media_analysis_jobs.py +796 -0
  77. package/src/utils/media_pool_ingest_live_probe.py +592 -0
  78. package/src/utils/multicam.py +393 -0
  79. package/src/utils/object_inspection.py +287 -0
  80. package/src/utils/platform.py +157 -0
  81. package/src/utils/project_lifecycle_live_probe.py +376 -0
  82. package/src/utils/project_properties.py +601 -0
  83. package/src/utils/render_deliver_live_probe.py +384 -0
  84. package/src/utils/resolve_connection.py +77 -0
  85. package/src/utils/review_annotation_live_probe.py +352 -0
  86. package/src/utils/script_templates.py +1193 -0
  87. package/src/utils/sync_detection.py +887 -0
  88. package/src/utils/timeline_conform_live_probe.py +280 -0
  89. package/src/utils/timeline_kernel_live_probe.py +1091 -0
  90. package/src/utils/timeline_kernel_probe.py +185 -0
  91. package/src/utils/timeline_title_text.py +87 -0
  92. package/src/utils/update_check.py +610 -0
@@ -0,0 +1,3618 @@
1
+ """Planning helpers for project-scoped media analysis.
2
+
3
+ This module deliberately performs no package installation and does not modify
4
+ source media. It is the safety/planning layer that the MCP tool uses before any
5
+ future ffprobe, ffmpeg, transcription, or vision work happens.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import asyncio
11
+ import hashlib
12
+ import importlib.util
13
+ import inspect
14
+ import json
15
+ import math
16
+ import os
17
+ import re
18
+ import shutil
19
+ import sqlite3
20
+ import subprocess
21
+ import time
22
+ from datetime import datetime, timezone
23
+ from pathlib import Path
24
+ from typing import Any, Dict, Iterable, List, Optional, Tuple
25
+
26
+ from src.utils.sync_detection import detect_sync_event_capabilities
27
+
28
+
29
+ ANALYSIS_DIR_NAME = "davinci-resolve-mcp-analysis"
30
+ HIDDEN_ANALYSIS_DIR_NAME = ".davinci-resolve-mcp-analysis"
31
+ ANALYSIS_VERSION = "0.1"
32
+ ANALYSIS_INDEX_FILENAME = "index.sqlite"
33
+ ANALYSIS_INDEX_SCHEMA_VERSION = 1
34
+ COMMAND_TIMEOUT_SECONDS = 300
35
+ CHAT_CONTEXT_VISION_PROVIDERS = {"chat_context", "mcp_sampling", "host_chat", "current_chat"}
36
+ MARKER_PLAN_DEFAULT_COLORS = {
37
+ "shot": "Blue",
38
+ "best_moment": "Green",
39
+ "qc_warning": "Red",
40
+ "black_or_title": "Red",
41
+ }
42
+
43
+ DEFAULT_VISION_ANALYSIS_PROMPT = """Return only strict JSON for editorial media analysis.
44
+
45
+ Use the full sequence of sampled keyframes plus the computed motion/variance
46
+ and cut-boundary evidence. Describe what changes across the clip; do not treat
47
+ one frame as the whole clip unless only one frame was provided. When frames are
48
+ tagged shot_start, shot_end, cut_before, or cut_after, explicitly compare the
49
+ adjacent boundary frames and say whether they look like a real cut, a flash
50
+ frame, a title/black insertion, or a high-motion moment inside one continuous
51
+ shot. If a slate or clapper is visible in any sampled frame, confirm it in the
52
+ slate block and extract only clearly readable details. Do not infer slate fields
53
+ from audio cues alone.
54
+
55
+ Use this schema:
56
+ {
57
+ "success": true,
58
+ "provider": "chat_context",
59
+ "clip_summary": "One concise natural-language summary of the full clip evidence.",
60
+ "editorial_classification": {
61
+ "primary_use": "b-roll|interview|action|establishing|detail|screen|unknown",
62
+ "select_potential": "low|medium|high",
63
+ "reason": "Why this clip may or may not be useful editorially."
64
+ },
65
+ "content": {
66
+ "locations": [],
67
+ "people_visible": "none|one|multiple|unknown",
68
+ "actions": [],
69
+ "objects": [],
70
+ "visible_text": [],
71
+ "notable_audio_context": []
72
+ },
73
+ "shot_and_style": {
74
+ "shot_sizes": [],
75
+ "camera_motion": [],
76
+ "composition_notes": "",
77
+ "lighting_mood": "",
78
+ "color_mood": ""
79
+ },
80
+ "slate": {
81
+ "slate_visible": false,
82
+ "scene": "",
83
+ "shot": "",
84
+ "take": "",
85
+ "camera": "",
86
+ "roll": "",
87
+ "date": "",
88
+ "production": "",
89
+ "visible_text": [],
90
+ "confidence": {
91
+ "overall": "low|medium|high",
92
+ "scene": "low|medium|high",
93
+ "shot": "low|medium|high",
94
+ "take": "low|medium|high",
95
+ "camera": "low|medium|high"
96
+ }
97
+ },
98
+ "motion": {
99
+ "overall_level": "low|medium|high|unknown",
100
+ "motion_events": [],
101
+ "quiet_regions": []
102
+ },
103
+ "cut_understanding": {
104
+ "cut_count": 0,
105
+ "likely_edited_sequence": false,
106
+ "flash_frame_candidates": [],
107
+ "notes": []
108
+ },
109
+ "analysis_keyframes": [
110
+ {
111
+ "time_seconds": 0.0,
112
+ "selection_reason": "first_usable|midpoint|last_usable|scene_change|cut_before|cut_after|shot_start|shot_end|flash_candidate|motion_peak|interval",
113
+ "description": "What is visible in this frame.",
114
+ "editing_value": "How an editor might use this moment.",
115
+ "qc_flags": []
116
+ }
117
+ ],
118
+ "editing_notes": {
119
+ "best_moments": [],
120
+ "continuity_flags": [],
121
+ "qc_flags": [],
122
+ "search_tags": []
123
+ },
124
+ "confidence": {
125
+ "visual": "low|medium|high",
126
+ "motion": "computed",
127
+ "transcript": "unavailable|provided"
128
+ }
129
+ }
130
+ Do not include markdown fences, prose outside JSON, or keys outside this schema."""
131
+
132
+ DEPTHS = {"quick", "standard", "deep", "custom"}
133
+ DEFAULT_DEPTH = "standard"
134
+ FRAME_CAPS = {
135
+ "quick": 0,
136
+ "standard": 8,
137
+ "deep": 24,
138
+ "custom": 8,
139
+ }
140
+ HARD_FRAME_CAP = 48
141
+
142
+
143
+ def slugify(value: Any, fallback: str = "untitled") -> str:
144
+ raw = str(value or "").strip().lower()
145
+ slug = re.sub(r"[^a-z0-9._-]+", "-", raw)
146
+ slug = re.sub(r"-+", "-", slug).strip("-._")
147
+ return slug or fallback
148
+
149
+
150
+ def short_hash(value: Any, length: int = 10) -> str:
151
+ raw = str(value or "").encode("utf-8", errors="replace")
152
+ return hashlib.sha1(raw).hexdigest()[:length]
153
+
154
+
155
+ def project_directory_name(project_name: Any, project_id: Any = None) -> str:
156
+ basis = project_id or project_name or "project"
157
+ return f"{slugify(project_name, 'project')}-{short_hash(basis)}"
158
+
159
+
160
+ def stable_clip_directory(record: Dict[str, Any]) -> str:
161
+ basis = (
162
+ record.get("clip_id")
163
+ or record.get("media_id")
164
+ or record.get("file_path")
165
+ or record.get("clip_name")
166
+ or "clip"
167
+ )
168
+ label = slugify(record.get("clip_name") or Path(str(record.get("file_path") or "clip")).stem, "clip")
169
+ return f"{label}-{short_hash(basis, 12)}"
170
+
171
+
172
+ def normalize_path(path: Any) -> str:
173
+ return os.path.realpath(os.path.abspath(os.path.expanduser(str(path))))
174
+
175
+
176
+ def _is_relative_to(path: str, parent: str) -> bool:
177
+ try:
178
+ common = os.path.commonpath([path, parent])
179
+ except ValueError:
180
+ return False
181
+ return common == parent
182
+
183
+
184
+ def _non_empty_source_paths(source_paths: Optional[Iterable[Any]]) -> List[str]:
185
+ out = []
186
+ for source in source_paths or []:
187
+ if source:
188
+ out.append(normalize_path(source))
189
+ return out
190
+
191
+
192
+ def validate_output_root(output_root: Any, source_paths: Optional[Iterable[Any]] = None) -> Tuple[bool, List[str]]:
193
+ """Validate that an analysis output root is not adjacent to source media."""
194
+ errors: List[str] = []
195
+ root = normalize_path(output_root)
196
+
197
+ for source in _non_empty_source_paths(source_paths):
198
+ if root == source:
199
+ errors.append(f"analysis root cannot equal a source file path: {source}")
200
+ continue
201
+ parent = os.path.dirname(source)
202
+ if parent and _is_relative_to(root, parent):
203
+ errors.append(
204
+ "analysis root cannot be inside a source media directory: "
205
+ f"{root} is under {parent}"
206
+ )
207
+
208
+ return not errors, errors
209
+
210
+
211
+ def resolve_output_root(
212
+ *,
213
+ project_name: Any,
214
+ project_id: Any = None,
215
+ analysis_root: Any = None,
216
+ source_paths: Optional[Iterable[Any]] = None,
217
+ create: bool = False,
218
+ ) -> Dict[str, Any]:
219
+ """Resolve a project-scoped analysis root and validate source separation."""
220
+ project_dir = project_directory_name(project_name, project_id)
221
+ if analysis_root:
222
+ base_root = normalize_path(analysis_root)
223
+ else:
224
+ base_root = normalize_path(Path.home() / "Documents" / ANALYSIS_DIR_NAME)
225
+
226
+ # Treat the provided root as a base by default so every project remains
227
+ # isolated even when users choose a shared custom analysis location.
228
+ output_root = normalize_path(os.path.join(base_root, project_dir))
229
+ ok, errors = validate_output_root(output_root, source_paths)
230
+
231
+ if ok and create:
232
+ os.makedirs(output_root, exist_ok=True)
233
+
234
+ return {
235
+ "success": ok,
236
+ "analysis_version": ANALYSIS_VERSION,
237
+ "base_root": base_root,
238
+ "project_root": output_root,
239
+ "project_directory": project_dir,
240
+ "project_name": project_name,
241
+ "project_id": project_id,
242
+ "errors": errors,
243
+ }
244
+
245
+
246
+ def detect_capabilities(env: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
247
+ """Detect available analysis helpers without installing or downloading."""
248
+ env = env if env is not None else os.environ
249
+ whisper_cli = shutil.which("whisper")
250
+ whisper_cpp = shutil.which("whisper-cpp")
251
+ mlx_whisper = importlib.util.find_spec("mlx_whisper") is not None
252
+ cv2 = importlib.util.find_spec("cv2") is not None
253
+ provider = env.get("DAVINCI_RESOLVE_MCP_VISION_PROVIDER")
254
+
255
+ sync_events = detect_sync_event_capabilities()
256
+
257
+ return {
258
+ "success": True,
259
+ "analysis_version": ANALYSIS_VERSION,
260
+ "no_auto_install": True,
261
+ "tools": {
262
+ "ffprobe": {"available": bool(shutil.which("ffprobe")), "path": shutil.which("ffprobe")},
263
+ "ffmpeg": {"available": bool(shutil.which("ffmpeg")), "path": shutil.which("ffmpeg")},
264
+ "whisper_cli": {"available": bool(whisper_cli), "path": whisper_cli},
265
+ "whisper_cpp": {"available": bool(whisper_cpp), "path": whisper_cpp},
266
+ "mlx_whisper": {"available": bool(mlx_whisper), "python_module": "mlx_whisper"},
267
+ "opencv": {"available": bool(cv2), "python_module": "cv2"},
268
+ },
269
+ "transcription": {
270
+ "available": bool(whisper_cli or whisper_cpp or mlx_whisper),
271
+ "backends": [
272
+ name for name, available in (
273
+ ("whisper_cli", bool(whisper_cli)),
274
+ ("whisper_cpp", bool(whisper_cpp)),
275
+ ("mlx_whisper", bool(mlx_whisper)),
276
+ )
277
+ if available
278
+ ],
279
+ },
280
+ "vision": {
281
+ "available": bool(provider),
282
+ "provider": provider,
283
+ "enabled_by_default": False,
284
+ "note": (
285
+ "Vision analysis is opt-in and requires a configured provider. "
286
+ "The 'mock' provider is local-only for tests and never sends frames."
287
+ ),
288
+ },
289
+ "sync_events": {
290
+ "available": bool(sync_events.get("available")),
291
+ "event_types": sync_events.get("event_types", []),
292
+ "source_safe": True,
293
+ "requires": ["ffmpeg", "ffprobe"],
294
+ "note": "Detects likely audio 2-pops and slate claps for advisory sync offset planning.",
295
+ },
296
+ }
297
+
298
+
299
+ def install_guidance(capabilities: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
300
+ caps = capabilities or detect_capabilities()
301
+ tools = caps.get("tools", {})
302
+ missing = {}
303
+
304
+ if not tools.get("ffprobe", {}).get("available") or not tools.get("ffmpeg", {}).get("available"):
305
+ missing["ffmpeg_suite"] = {
306
+ "required_for": [
307
+ "technical metadata",
308
+ "scene detection",
309
+ "motion and variance analysis",
310
+ "2-pop and slate-clap sync detection",
311
+ ],
312
+ "macos": "Ask the user before running: brew install ffmpeg",
313
+ "linux": "Ask the user to install ffmpeg with their distribution package manager.",
314
+ "windows": "Ask the user to install ffmpeg and add ffmpeg/ffprobe to PATH.",
315
+ }
316
+ if not caps.get("transcription", {}).get("available"):
317
+ missing["transcription"] = {
318
+ "required_for": ["deep transcription analysis"],
319
+ "options": [
320
+ "Install/configure whisper CLI",
321
+ "Install/configure whisper-cpp",
322
+ "Install mlx-whisper on supported Apple Silicon systems",
323
+ ],
324
+ "note": "The MCP server must not install these automatically.",
325
+ }
326
+ if not tools.get("opencv", {}).get("available"):
327
+ missing["opencv"] = {
328
+ "required_for": ["optional optical-flow motion scoring"],
329
+ "note": "OpenCV is optional; standard frame-difference motion scoring can work without it.",
330
+ }
331
+ if not caps.get("vision", {}).get("available"):
332
+ missing["vision"] = {
333
+ "required_for": ["LLM visual analysis"],
334
+ "note": (
335
+ "Prefer chat-context vision when the MCP client supports sampling/createMessage. "
336
+ "If unavailable, ask the user whether to continue without visuals or provide setup "
337
+ "steps for a supported vision path. Never send frames off-machine without explicit approval."
338
+ ),
339
+ }
340
+
341
+ return {
342
+ "success": True,
343
+ "no_auto_install": True,
344
+ "missing": missing,
345
+ }
346
+
347
+
348
+ def normalize_depth(value: Any) -> Tuple[Optional[str], Optional[str]]:
349
+ depth = str(value or DEFAULT_DEPTH).strip().lower()
350
+ if depth not in DEPTHS:
351
+ return None, f"Unknown analysis depth '{value}'. Valid: {sorted(DEPTHS)}"
352
+ return depth, None
353
+
354
+
355
+ def _coerce_bool(value: Any, default: bool = False) -> bool:
356
+ if value is None:
357
+ return default
358
+ if isinstance(value, bool):
359
+ return value
360
+ if isinstance(value, str):
361
+ return value.strip().lower() in {"1", "true", "yes", "y", "on"}
362
+ return bool(value)
363
+
364
+
365
+ def _coerce_optional_float(value: Any) -> Optional[float]:
366
+ if value in (None, ""):
367
+ return None
368
+ try:
369
+ return float(value)
370
+ except (TypeError, ValueError):
371
+ return None
372
+
373
+
374
+ def _stable_json_hash(value: Any, length: int = 12) -> str:
375
+ raw = json.dumps(value, sort_keys=True, ensure_ascii=True, default=str)
376
+ return hashlib.sha1(raw.encode("utf-8")).hexdigest()[:length]
377
+
378
+
379
+ def _source_file_signature(path: Any) -> Dict[str, Any]:
380
+ payload = {
381
+ "path": normalize_path(path) if path else None,
382
+ "exists": False,
383
+ "size_bytes": None,
384
+ "mtime_ns": None,
385
+ }
386
+ if not payload["path"]:
387
+ return payload
388
+ try:
389
+ stat = os.stat(payload["path"])
390
+ except OSError:
391
+ return payload
392
+ payload.update({
393
+ "exists": True,
394
+ "size_bytes": stat.st_size,
395
+ "mtime_ns": stat.st_mtime_ns,
396
+ })
397
+ return payload
398
+
399
+
400
+ def analysis_request_signature(
401
+ record: Dict[str, Any],
402
+ depth: str,
403
+ options: Dict[str, Any],
404
+ frame_count: int,
405
+ ) -> Dict[str, Any]:
406
+ """Return the cache signature for a requested analysis profile."""
407
+ transcription = options.get("transcription") or {}
408
+ vision = options.get("vision") or {}
409
+ marker_plan = options.get("marker_plan") or {}
410
+ vision_prompt = vision.get("prompt") or DEFAULT_VISION_ANALYSIS_PROMPT
411
+ return {
412
+ "analysis_version": ANALYSIS_VERSION,
413
+ "depth": depth,
414
+ "analysis_keyframe_budget": int(frame_count or 0),
415
+ "source_file": _source_file_signature(record.get("file_path")),
416
+ "layers": {
417
+ "technical": True,
418
+ "readthrough": depth in {"standard", "deep", "custom"},
419
+ "motion": depth in {"standard", "deep", "custom"},
420
+ "transcription": {
421
+ "enabled": _coerce_bool(transcription.get("enabled"), default=(depth == "deep")),
422
+ "backend": transcription.get("backend"),
423
+ "model": transcription.get("model"),
424
+ "language": transcription.get("language"),
425
+ },
426
+ "vision": {
427
+ "enabled": _coerce_bool(vision.get("enabled"), default=False),
428
+ "provider": vision.get("provider"),
429
+ "prompt_hash": _stable_json_hash(vision_prompt),
430
+ },
431
+ "marker_plan": {
432
+ "enabled": _coerce_bool(marker_plan.get("enabled"), default=True),
433
+ "min_shot_duration_seconds": marker_plan.get("min_shot_duration_seconds"),
434
+ "colors_hash": _stable_json_hash(marker_plan.get("colors") or {}),
435
+ },
436
+ "cut_boundary_analysis": {
437
+ "enabled": depth in {"standard", "deep", "custom"},
438
+ "version": 1,
439
+ "hard_frame_cap": HARD_FRAME_CAP,
440
+ },
441
+ },
442
+ "signature_hash": _stable_json_hash({
443
+ "analysis_version": ANALYSIS_VERSION,
444
+ "depth": depth,
445
+ "frame_count": int(frame_count or 0),
446
+ "source_file": _source_file_signature(record.get("file_path")),
447
+ "transcription": {
448
+ "enabled": _coerce_bool(transcription.get("enabled"), default=(depth == "deep")),
449
+ "backend": transcription.get("backend"),
450
+ "model": transcription.get("model"),
451
+ "language": transcription.get("language"),
452
+ },
453
+ "vision": {
454
+ "enabled": _coerce_bool(vision.get("enabled"), default=False),
455
+ "provider": vision.get("provider"),
456
+ "prompt_hash": _stable_json_hash(vision_prompt),
457
+ },
458
+ "marker_plan": {
459
+ "enabled": _coerce_bool(marker_plan.get("enabled"), default=True),
460
+ "min_shot_duration_seconds": marker_plan.get("min_shot_duration_seconds"),
461
+ "colors_hash": _stable_json_hash(marker_plan.get("colors") or {}),
462
+ },
463
+ "cut_boundary_analysis": {
464
+ "enabled": depth in {"standard", "deep", "custom"},
465
+ "version": 1,
466
+ "hard_frame_cap": HARD_FRAME_CAP,
467
+ },
468
+ }),
469
+ }
470
+
471
+
472
+ def _timestamp_from_analyzed_at(value: Any) -> Optional[float]:
473
+ if not value:
474
+ return None
475
+ raw = str(value).strip()
476
+ try:
477
+ return datetime.fromisoformat(raw.replace("Z", "+00:00")).astimezone(timezone.utc).timestamp()
478
+ except ValueError:
479
+ pass
480
+ try:
481
+ return time.mktime(time.strptime(raw, "%Y-%m-%dT%H:%M:%SZ"))
482
+ except ValueError:
483
+ return None
484
+
485
+
486
+ def vision_uses_chat_context(options: Dict[str, Any], capabilities: Optional[Dict[str, Any]] = None) -> bool:
487
+ vision = options.get("vision") or {}
488
+ if not _coerce_bool(vision.get("enabled"), default=False):
489
+ return False
490
+ provider = vision.get("provider") or (capabilities or {}).get("vision", {}).get("provider")
491
+ return provider in CHAT_CONTEXT_VISION_PROVIDERS
492
+
493
+
494
+ def _bounded_frame_count(depth: str, requested: Any = None) -> int:
495
+ default = FRAME_CAPS.get(depth, FRAME_CAPS[DEFAULT_DEPTH])
496
+ if requested is None:
497
+ return default
498
+ try:
499
+ count = int(requested)
500
+ except (TypeError, ValueError):
501
+ return default
502
+ return max(0, min(count, HARD_FRAME_CAP))
503
+
504
+
505
+ def _artifact_paths(project_root: str, record: Dict[str, Any], depth: str, options: Dict[str, Any]) -> Dict[str, Any]:
506
+ clip_dir = normalize_path(os.path.join(project_root, "clips", stable_clip_directory(record)))
507
+ artifacts: Dict[str, Any] = {
508
+ "clip_dir": clip_dir,
509
+ "analysis_json": os.path.join(clip_dir, "analysis.json"),
510
+ "technical_json": os.path.join(clip_dir, "technical.json"),
511
+ "marker_plan_json": os.path.join(clip_dir, "clip_analysis_markers.json"),
512
+ }
513
+
514
+ if depth in {"standard", "deep", "custom"}:
515
+ artifacts["motion_json"] = os.path.join(clip_dir, "motion.json")
516
+ artifacts["frames_dir"] = os.path.join(clip_dir, "frames")
517
+
518
+ transcription = options.get("transcription") or {}
519
+ if _coerce_bool(transcription.get("enabled"), default=(depth == "deep")):
520
+ artifacts["transcript_json"] = os.path.join(clip_dir, "transcript.json")
521
+ artifacts["transcript_srt"] = os.path.join(clip_dir, "transcript.srt")
522
+ artifacts["transcript_vtt"] = os.path.join(clip_dir, "transcript.vtt")
523
+
524
+ vision = options.get("vision") or {}
525
+ if _coerce_bool(vision.get("enabled"), default=False):
526
+ artifacts["visual_json"] = os.path.join(clip_dir, "visual.json")
527
+
528
+ return artifacts
529
+
530
+
531
+ def _required_capability_gaps(depth: str, options: Dict[str, Any], capabilities: Dict[str, Any]) -> List[Dict[str, Any]]:
532
+ tools = capabilities.get("tools", {})
533
+ gaps: List[Dict[str, Any]] = []
534
+ if not tools.get("ffprobe", {}).get("available"):
535
+ gaps.append({"capability": "ffprobe", "required_for": ["quick", "standard", "deep"]})
536
+ if depth in {"standard", "deep", "custom"} and not tools.get("ffmpeg", {}).get("available"):
537
+ gaps.append({"capability": "ffmpeg", "required_for": ["standard", "deep"]})
538
+
539
+ transcription = options.get("transcription") or {}
540
+ if _coerce_bool(transcription.get("enabled"), default=(depth == "deep")):
541
+ backend = transcription.get("backend")
542
+ if backend in {"mock", "local_mock"}:
543
+ pass
544
+ elif not capabilities.get("transcription", {}).get("available"):
545
+ gaps.append({"capability": "transcription_backend", "required_for": ["transcription"]})
546
+
547
+ vision = options.get("vision") or {}
548
+ if _coerce_bool(vision.get("enabled"), default=False):
549
+ provider = vision.get("provider") or capabilities.get("vision", {}).get("provider")
550
+ if provider in {"mock", "local_mock"} or provider in CHAT_CONTEXT_VISION_PROVIDERS:
551
+ pass
552
+ elif not capabilities.get("vision", {}).get("available"):
553
+ gaps.append({"capability": "vision_provider", "required_for": ["vision"]})
554
+
555
+ return gaps
556
+
557
+
558
+ def build_plan(
559
+ *,
560
+ project_name: Any,
561
+ project_id: Any = None,
562
+ records: List[Dict[str, Any]],
563
+ target: Dict[str, Any],
564
+ params: Optional[Dict[str, Any]] = None,
565
+ capabilities: Optional[Dict[str, Any]] = None,
566
+ ) -> Dict[str, Any]:
567
+ params = params or {}
568
+ depth, depth_error = normalize_depth(params.get("depth"))
569
+ if depth_error:
570
+ return {"success": False, "error": depth_error}
571
+ assert depth is not None
572
+
573
+ source_paths = [record.get("file_path") for record in records if record.get("file_path")]
574
+ root = resolve_output_root(
575
+ project_name=project_name,
576
+ project_id=project_id,
577
+ analysis_root=params.get("analysis_root"),
578
+ source_paths=source_paths,
579
+ create=False,
580
+ )
581
+ if not root.get("success"):
582
+ return {"success": False, "error": "Invalid analysis output root", "output_root": root}
583
+
584
+ caps = capabilities or detect_capabilities()
585
+ options = {
586
+ "transcription": params.get("transcription") or {},
587
+ "vision": params.get("vision") or {},
588
+ "marker_plan": params.get("marker_plan") or params.get("markerPlan") or {},
589
+ }
590
+ gaps = _required_capability_gaps(depth, options, caps)
591
+ frame_count = _bounded_frame_count(depth, params.get("max_analysis_frames"))
592
+ transcription_enabled = _coerce_bool((options.get("transcription") or {}).get("enabled"), default=(depth == "deep"))
593
+ notes = [
594
+ "Plans describe analysis before execution.",
595
+ "All planned artifacts are under the project analysis root, never beside source media.",
596
+ "Missing optional tools are reported as guidance only; nothing is installed automatically.",
597
+ "Session-only execution returns reports to the MCP response and removes scratch artifacts unless keep_artifacts=true.",
598
+ ]
599
+ if caps.get("transcription", {}).get("available") and not transcription_enabled:
600
+ notes.append(
601
+ "Transcription is available but disabled; for story, sound, or audio-spine decisions, "
602
+ "rerun with transcription.enabled=true and allow_model_download=true only if local model use is approved."
603
+ )
604
+ reuse_existing = _coerce_bool(params.get("reuse_existing", params.get("reuseExisting")), default=True)
605
+ force_refresh = _coerce_bool(params.get("force_refresh", params.get("forceRefresh")), default=False)
606
+ max_report_age_days = _coerce_optional_float(params.get("max_report_age_days", params.get("maxReportAgeDays")))
607
+ reuse_policy = str(params.get("reuse_policy", params.get("reusePolicy") or "compatible")).strip().lower()
608
+ if reuse_policy not in {"compatible", "fresh", "strict"}:
609
+ reuse_policy = "compatible"
610
+ if params.get("_reuse_default_analysis_root"):
611
+ reuse_root_payload = resolve_output_root(
612
+ project_name=project_name,
613
+ project_id=project_id,
614
+ analysis_root=None,
615
+ source_paths=source_paths,
616
+ create=False,
617
+ )
618
+ reuse_project_root = reuse_root_payload.get("project_root")
619
+ else:
620
+ reuse_project_root = params.get("reuse_project_root") or params.get("reuseProjectRoot") or root["project_root"]
621
+ reuse_project_root = normalize_path(reuse_project_root) if reuse_project_root else root["project_root"]
622
+ raw_reuse_project_roots = params.get("reuse_project_roots") or params.get("reuseProjectRoots") or []
623
+ if isinstance(raw_reuse_project_roots, str):
624
+ raw_reuse_project_roots = [raw_reuse_project_roots]
625
+ elif not isinstance(raw_reuse_project_roots, list):
626
+ raw_reuse_project_roots = []
627
+ reuse_project_roots = []
628
+ for candidate_root in [reuse_project_root, *raw_reuse_project_roots]:
629
+ if not candidate_root:
630
+ continue
631
+ normalized_root = normalize_path(candidate_root)
632
+ if normalized_root not in reuse_project_roots:
633
+ reuse_project_roots.append(normalized_root)
634
+
635
+ clip_plans = []
636
+ for record in records:
637
+ artifacts = _artifact_paths(root["project_root"], record, depth, options)
638
+ request_signature = analysis_request_signature(record, depth, options, frame_count)
639
+ clip_plan = {
640
+ "record": record,
641
+ "analysis_keyframe_budget": frame_count,
642
+ "analysis_signature": request_signature,
643
+ "cache_status": "not_checked",
644
+ "artifacts": artifacts,
645
+ }
646
+ if not reuse_existing:
647
+ clip_plan["cache_status"] = "reuse_disabled"
648
+ elif force_refresh:
649
+ clip_plan["cache_status"] = "refresh_forced"
650
+ else:
651
+ existing = find_reusable_report_across_roots(
652
+ reuse_project_roots,
653
+ record,
654
+ depth,
655
+ options,
656
+ request_signature=request_signature,
657
+ max_report_age_days=max_report_age_days,
658
+ reuse_policy=reuse_policy,
659
+ )
660
+ if existing:
661
+ clip_plan["existing_report"] = {
662
+ "path": existing.get("path"),
663
+ "reusable": existing.get("reusable", False),
664
+ "missing_layers": existing.get("missing_layers", []),
665
+ "cache_issues": existing.get("cache_issues", []),
666
+ "cache_warnings": existing.get("cache_warnings", []),
667
+ "analyzed_at": existing.get("analyzed_at"),
668
+ "project_root": existing.get("project_root"),
669
+ }
670
+ if existing.get("reusable"):
671
+ clip_plan["skip_execution"] = True
672
+ clip_plan["cache_status"] = "reusable"
673
+ if existing.get("project_root") and existing.get("project_root") != root["project_root"]:
674
+ clip_plan["reuse_reason"] = "Existing analysis report from a related project version satisfies the requested depth and modalities."
675
+ else:
676
+ clip_plan["reuse_reason"] = "Existing analysis report satisfies the requested depth and modalities."
677
+ else:
678
+ clip_plan["cache_status"] = "stale_or_incomplete"
679
+ else:
680
+ clip_plan["cache_status"] = "miss"
681
+ clip_plans.append(clip_plan)
682
+
683
+ per_clip_seconds = {"quick": 2, "standard": 45, "deep": 180, "custom": 45}.get(depth, 45)
684
+ reusable_count = sum(1 for clip in clip_plans if clip.get("skip_execution"))
685
+ stale_count = sum(1 for clip in clip_plans if clip.get("cache_status") == "stale_or_incomplete")
686
+ return {
687
+ "success": True,
688
+ "analysis_version": ANALYSIS_VERSION,
689
+ "dry_run": _coerce_bool(params.get("dry_run"), default=True),
690
+ "session_only": _coerce_bool(params.get("session_only"), default=False),
691
+ "target": target,
692
+ "depth": depth,
693
+ "clip_count": len(records),
694
+ "output_root": root,
695
+ "capability_gaps": gaps,
696
+ "install_guidance": install_guidance(caps) if gaps else {"success": True, "missing": {}},
697
+ "estimated_seconds": per_clip_seconds * len(records),
698
+ "estimated_seconds_after_reuse": per_clip_seconds * max(0, len(records) - reusable_count),
699
+ "analysis_keyframe_budget_per_clip": frame_count,
700
+ "reuse_existing": reuse_existing,
701
+ "force_refresh": force_refresh,
702
+ "reuse_policy": reuse_policy,
703
+ "max_report_age_days": max_report_age_days,
704
+ "reuse_project_root": reuse_project_root,
705
+ "reuse_project_roots": reuse_project_roots,
706
+ "reusable_clip_count": reusable_count,
707
+ "stale_or_incomplete_clip_count": stale_count,
708
+ "clips": clip_plans,
709
+ "notes": notes,
710
+ }
711
+
712
+
713
+ def _run_command(args: List[str], timeout: int = COMMAND_TIMEOUT_SECONDS) -> Tuple[int, str, str]:
714
+ proc = subprocess.run(
715
+ args,
716
+ capture_output=True,
717
+ text=True,
718
+ timeout=timeout,
719
+ check=False,
720
+ )
721
+ return proc.returncode, proc.stdout, proc.stderr
722
+
723
+
724
+ def _write_json(path: str, payload: Dict[str, Any]) -> None:
725
+ os.makedirs(os.path.dirname(path), exist_ok=True)
726
+ tmp_path = f"{path}.tmp"
727
+ with open(tmp_path, "w", encoding="utf-8") as f:
728
+ json.dump(payload, f, indent=2, ensure_ascii=False)
729
+ f.write("\n")
730
+ os.replace(tmp_path, path)
731
+
732
+
733
+ def _read_json(path: str) -> Dict[str, Any]:
734
+ with open(path, "r", encoding="utf-8") as f:
735
+ return json.load(f)
736
+
737
+
738
+ def _fraction_to_float(value: Any) -> Optional[float]:
739
+ if value in (None, "", "0/0"):
740
+ return None
741
+ raw = str(value)
742
+ if "/" in raw:
743
+ num, den = raw.split("/", 1)
744
+ try:
745
+ den_f = float(den)
746
+ if den_f == 0:
747
+ return None
748
+ return float(num) / den_f
749
+ except ValueError:
750
+ return None
751
+ try:
752
+ return float(raw)
753
+ except ValueError:
754
+ return None
755
+
756
+
757
+ def _parse_float(value: Any) -> Optional[float]:
758
+ if value in (None, ""):
759
+ return None
760
+ try:
761
+ return float(value)
762
+ except (TypeError, ValueError):
763
+ return None
764
+
765
+
766
+ def _ffprobe(path: str) -> Dict[str, Any]:
767
+ code, stdout, stderr = _run_command([
768
+ "ffprobe",
769
+ "-v",
770
+ "quiet",
771
+ "-print_format",
772
+ "json",
773
+ "-show_format",
774
+ "-show_streams",
775
+ "-show_chapters",
776
+ path,
777
+ ])
778
+ if code != 0:
779
+ return {"success": False, "error": stderr.strip() or "ffprobe failed"}
780
+ try:
781
+ raw = json.loads(stdout or "{}")
782
+ except json.JSONDecodeError as exc:
783
+ return {"success": False, "error": f"ffprobe returned invalid JSON: {exc}"}
784
+ return {"success": True, "raw": raw, "summary": _ffprobe_summary(raw)}
785
+
786
+
787
+ def _ffprobe_summary(raw: Dict[str, Any]) -> Dict[str, Any]:
788
+ streams = raw.get("streams") or []
789
+ fmt = raw.get("format") or {}
790
+ video = []
791
+ audio = []
792
+ warnings = []
793
+ for stream in streams:
794
+ codec_type = stream.get("codec_type")
795
+ if codec_type == "video":
796
+ r_fps = _fraction_to_float(stream.get("r_frame_rate"))
797
+ avg_fps = _fraction_to_float(stream.get("avg_frame_rate"))
798
+ is_vfr = bool(r_fps and avg_fps and abs(r_fps - avg_fps) > 0.01)
799
+ if is_vfr:
800
+ warnings.append("Container frame rate and average frame rate differ; possible VFR media")
801
+ video.append({
802
+ "index": stream.get("index"),
803
+ "codec": stream.get("codec_name"),
804
+ "codec_long": stream.get("codec_long_name"),
805
+ "profile": stream.get("profile"),
806
+ "pixel_format": stream.get("pix_fmt"),
807
+ "width": stream.get("width"),
808
+ "height": stream.get("height"),
809
+ "r_frame_rate": stream.get("r_frame_rate"),
810
+ "avg_frame_rate": stream.get("avg_frame_rate"),
811
+ "frame_rate": avg_fps or r_fps,
812
+ "is_vfr": is_vfr,
813
+ "color_primaries": stream.get("color_primaries"),
814
+ "transfer_characteristics": stream.get("color_transfer"),
815
+ "matrix_coefficients": stream.get("color_space"),
816
+ "field_order": stream.get("field_order"),
817
+ "duration_seconds": _parse_float(stream.get("duration")),
818
+ "frame_count": int(stream["nb_frames"]) if str(stream.get("nb_frames", "")).isdigit() else None,
819
+ })
820
+ elif codec_type == "audio":
821
+ audio.append({
822
+ "index": stream.get("index"),
823
+ "codec": stream.get("codec_name"),
824
+ "codec_long": stream.get("codec_long_name"),
825
+ "sample_rate": int(stream["sample_rate"]) if str(stream.get("sample_rate", "")).isdigit() else None,
826
+ "channels": stream.get("channels"),
827
+ "channel_layout": stream.get("channel_layout"),
828
+ "duration_seconds": _parse_float(stream.get("duration")),
829
+ })
830
+ return {
831
+ "format": {
832
+ "filename": fmt.get("filename"),
833
+ "format_name": fmt.get("format_name"),
834
+ "duration_seconds": _parse_float(fmt.get("duration")),
835
+ "size_bytes": int(fmt["size"]) if str(fmt.get("size", "")).isdigit() else None,
836
+ "bit_rate": int(fmt["bit_rate"]) if str(fmt.get("bit_rate", "")).isdigit() else None,
837
+ "tags": fmt.get("tags") or {},
838
+ },
839
+ "video": video,
840
+ "audio": audio,
841
+ "chapters": raw.get("chapters") or [],
842
+ "warnings": warnings,
843
+ }
844
+
845
+
846
+ def _media_duration_seconds(record: Dict[str, Any], technical: Dict[str, Any]) -> Optional[float]:
847
+ summary = technical.get("summary") or {}
848
+ duration = ((summary.get("format") or {}).get("duration_seconds"))
849
+ if duration:
850
+ return duration
851
+ videos = summary.get("video") or []
852
+ for video in videos:
853
+ if video.get("duration_seconds"):
854
+ return video["duration_seconds"]
855
+ return None
856
+
857
+
858
+ def _ffmpeg_stderr_filter(path: str, video_filter: Optional[str] = None, audio_filter: Optional[str] = None, frames: Optional[int] = None) -> Tuple[int, str]:
859
+ args = ["ffmpeg", "-hide_banner", "-nostats", "-i", path]
860
+ if video_filter:
861
+ args.extend(["-vf", video_filter])
862
+ if audio_filter:
863
+ args.extend(["-af", audio_filter])
864
+ if frames is not None:
865
+ args.extend(["-frames:v", str(frames)])
866
+ args.extend(["-f", "null", "-"])
867
+ code, _, stderr = _run_command(args)
868
+ return code, stderr
869
+
870
+
871
+ def _parse_loudness(stderr: str) -> Dict[str, Any]:
872
+ def latest(pattern: str) -> Optional[float]:
873
+ matches = re.findall(pattern, stderr)
874
+ if not matches:
875
+ return None
876
+ return _parse_float(matches[-1])
877
+
878
+ return {
879
+ "integrated_lufs": latest(r"I:\s*(-?\d+(?:\.\d+)?)\s*LUFS"),
880
+ "loudness_range_lu": latest(r"LRA:\s*(-?\d+(?:\.\d+)?)\s*LU"),
881
+ "true_peak_dbtp": latest(r"Peak:\s*(-?\d+(?:\.\d+)?)\s*dBFS"),
882
+ }
883
+
884
+
885
+ def _parse_scene_changes(stderr: str) -> List[Dict[str, Any]]:
886
+ scenes = []
887
+ for match in re.finditer(r"pts_time:([0-9.]+)", stderr):
888
+ t = _parse_float(match.group(1))
889
+ if t is not None:
890
+ scenes.append({"time_seconds": t})
891
+ return scenes
892
+
893
+
894
+ def _parse_blackdetect(stderr: str) -> List[Dict[str, Any]]:
895
+ out = []
896
+ pattern = r"black_start:([0-9.]+)\s+black_end:([0-9.]+)\s+black_duration:([0-9.]+)"
897
+ for start, end, duration in re.findall(pattern, stderr):
898
+ out.append({
899
+ "start": _parse_float(start),
900
+ "end": _parse_float(end),
901
+ "duration": _parse_float(duration),
902
+ })
903
+ return out
904
+
905
+
906
+ def _parse_silencedetect(stderr: str) -> List[Dict[str, Any]]:
907
+ starts = [_parse_float(v) for v in re.findall(r"silence_start:\s*([0-9.]+)", stderr)]
908
+ ends = [(_parse_float(end), _parse_float(duration)) for end, duration in re.findall(r"silence_end:\s*([0-9.]+)\s*\|\s*silence_duration:\s*([0-9.]+)", stderr)]
909
+ intervals = []
910
+ for index, start in enumerate(starts):
911
+ end = ends[index][0] if index < len(ends) else None
912
+ duration = ends[index][1] if index < len(ends) else None
913
+ intervals.append({"start": start, "end": end, "duration": duration})
914
+ return intervals
915
+
916
+
917
+ def _parse_idet(stderr: str) -> Dict[str, Any]:
918
+ match = re.search(
919
+ r"Multi frame detection:\s*TFF:\s*(\d+)\s*BFF:\s*(\d+)\s*Progressive:\s*(\d+)\s*Undetermined:\s*(\d+)",
920
+ stderr,
921
+ )
922
+ if not match:
923
+ return {}
924
+ tff, bff, progressive, undetermined = [int(v) for v in match.groups()]
925
+ dominant = max(
926
+ [("tff", tff), ("bff", bff), ("progressive", progressive), ("undetermined", undetermined)],
927
+ key=lambda row: row[1],
928
+ )[0]
929
+ return {
930
+ "tff": tff,
931
+ "bff": bff,
932
+ "progressive": progressive,
933
+ "undetermined": undetermined,
934
+ "dominant": dominant,
935
+ }
936
+
937
+
938
+ def _readthrough_analysis(path: str) -> Dict[str, Any]:
939
+ result: Dict[str, Any] = {"success": True}
940
+
941
+ loud_code, loud_stderr = _ffmpeg_stderr_filter(path, audio_filter="ebur128=peak=true")
942
+ result["loudness"] = {
943
+ "success": loud_code == 0,
944
+ "metrics": _parse_loudness(loud_stderr),
945
+ }
946
+
947
+ scene_code, scene_stderr = _ffmpeg_stderr_filter(path, video_filter="select='gt(scene,0.3)',showinfo")
948
+ result["scenes"] = {
949
+ "success": scene_code == 0,
950
+ "items": _parse_scene_changes(scene_stderr),
951
+ }
952
+
953
+ black_code, black_stderr = _ffmpeg_stderr_filter(path, video_filter="blackdetect=d=0.5:pix_th=0.10")
954
+ result["black_frames"] = {
955
+ "success": black_code == 0,
956
+ "items": _parse_blackdetect(black_stderr),
957
+ }
958
+
959
+ silence_code, silence_stderr = _ffmpeg_stderr_filter(path, audio_filter="silencedetect=noise=-50dB:d=1")
960
+ result["silence"] = {
961
+ "success": silence_code == 0,
962
+ "items": _parse_silencedetect(silence_stderr),
963
+ }
964
+
965
+ idet_code, idet_stderr = _ffmpeg_stderr_filter(path, video_filter="idet", frames=500)
966
+ result["interlace"] = {
967
+ "success": idet_code == 0,
968
+ "metrics": _parse_idet(idet_stderr),
969
+ }
970
+
971
+ return result
972
+
973
+
974
+ def _frame_number_for_time(seconds: Optional[float], fps: Optional[float]) -> Optional[int]:
975
+ if seconds is None:
976
+ return None
977
+ try:
978
+ return int(round(max(0.0, float(seconds)) * max(float(fps or 24.0), 1.0)))
979
+ except (TypeError, ValueError):
980
+ return None
981
+
982
+
983
+ def _frame_step_seconds(fps: Optional[float]) -> float:
984
+ try:
985
+ parsed = float(fps or 24.0)
986
+ except (TypeError, ValueError):
987
+ parsed = 24.0
988
+ return 1.0 / max(parsed, 1.0)
989
+
990
+
991
+ def _clamp_sample_time(value: float, duration: Optional[float]) -> float:
992
+ if duration is None or duration <= 0:
993
+ return max(0.0, value)
994
+ return min(max(0.0, value), max(0.0, duration - 0.001))
995
+
996
+
997
+ def _cut_boundary_analysis(
998
+ duration: Optional[float],
999
+ scene_items: List[Dict[str, Any]],
1000
+ fps: Optional[float],
1001
+ *,
1002
+ min_shot_duration_seconds: float = 0.75,
1003
+ flash_frame_max_duration_seconds: float = 0.25,
1004
+ ) -> Dict[str, Any]:
1005
+ frame_step = _frame_step_seconds(fps)
1006
+ scene_times = []
1007
+ for item in scene_items or []:
1008
+ if not isinstance(item, dict):
1009
+ continue
1010
+ t = _parse_float(item.get("time_seconds"))
1011
+ if t is None or t <= 0:
1012
+ continue
1013
+ if duration is not None and t >= duration:
1014
+ continue
1015
+ scene_times.append(round(t, 3))
1016
+ scene_times = sorted(set(scene_times))
1017
+
1018
+ cut_points = []
1019
+ for index, t in enumerate(scene_times, 1):
1020
+ before_time = _clamp_sample_time(t - frame_step, duration)
1021
+ after_time = _clamp_sample_time(t + frame_step, duration)
1022
+ cut_points.append({
1023
+ "index": index,
1024
+ "time_seconds": t,
1025
+ "frame": _frame_number_for_time(t, fps),
1026
+ "before_time_seconds": before_time,
1027
+ "before_frame": _frame_number_for_time(before_time, fps),
1028
+ "after_time_seconds": after_time,
1029
+ "after_frame": _frame_number_for_time(after_time, fps),
1030
+ "needs_visual_confirmation": True,
1031
+ "source": "ffmpeg_scene_detection",
1032
+ })
1033
+
1034
+ raw_shot_ranges = []
1035
+ boundaries: List[float] = [0.0]
1036
+ boundaries.extend(scene_times)
1037
+ if duration is not None and duration > 0:
1038
+ boundaries.append(float(duration))
1039
+ for index in range(max(0, len(boundaries) - 1)):
1040
+ start = boundaries[index]
1041
+ end = boundaries[index + 1]
1042
+ if end <= start:
1043
+ continue
1044
+ raw_shot_ranges.append({
1045
+ "index": index + 1,
1046
+ "start": start,
1047
+ "end": end,
1048
+ "duration": end - start,
1049
+ "start_frame": _frame_number_for_time(start, fps),
1050
+ "end_frame": _frame_number_for_time(end, fps),
1051
+ })
1052
+
1053
+ shot_ranges = []
1054
+ flash_candidates = []
1055
+ short_shot_candidates = []
1056
+ flash_keys = set()
1057
+ short_keys = set()
1058
+ for raw_shot in raw_shot_ranges:
1059
+ shot_duration = _parse_float(raw_shot.get("duration"))
1060
+ start = _parse_float(raw_shot.get("start"))
1061
+ end = _parse_float(raw_shot.get("end"))
1062
+ if shot_duration is not None and shot_duration <= float(min_shot_duration_seconds):
1063
+ short_keys.add((round(start or 0.0, 3), round(end or 0.0, 3)))
1064
+ short_shot_candidates.append(dict(raw_shot))
1065
+ if (
1066
+ shot_duration is not None
1067
+ and shot_duration <= float(flash_frame_max_duration_seconds)
1068
+ and start not in (None, 0.0)
1069
+ and end is not None
1070
+ and duration is not None
1071
+ and end < duration
1072
+ ):
1073
+ flash_keys.add((round(start, 3), round(end, 3)))
1074
+ flash_candidates.append({
1075
+ **raw_shot,
1076
+ "mid_sample_time_seconds": _clamp_sample_time(start + shot_duration / 2.0, duration),
1077
+ "reason": "adjacent scene detections bound a very short segment",
1078
+ "needs_visual_confirmation": True,
1079
+ })
1080
+
1081
+ for shot in _shot_ranges_from_scenes(
1082
+ duration,
1083
+ [{"time_seconds": t} for t in scene_times],
1084
+ min_duration_seconds=float(min_shot_duration_seconds),
1085
+ ):
1086
+ start = _parse_float(shot.get("start"))
1087
+ end = _parse_float(shot.get("end"))
1088
+ shot_duration = (end - start) if start is not None and end is not None else None
1089
+ first_sample = _clamp_sample_time((start or 0.0) + frame_step, duration)
1090
+ if end is not None:
1091
+ last_sample = _clamp_sample_time(max(start or 0.0, end - frame_step), duration)
1092
+ else:
1093
+ last_sample = first_sample
1094
+ row = {
1095
+ "index": shot.get("index"),
1096
+ "start": start,
1097
+ "end": end,
1098
+ "duration": shot_duration,
1099
+ "start_frame": _frame_number_for_time(start, fps),
1100
+ "end_frame": _frame_number_for_time(end, fps),
1101
+ "first_sample_time_seconds": first_sample,
1102
+ "last_sample_time_seconds": last_sample,
1103
+ "first_sample_frame": _frame_number_for_time(first_sample, fps),
1104
+ "last_sample_frame": _frame_number_for_time(last_sample, fps),
1105
+ }
1106
+ shot_ranges.append(row)
1107
+ short_key = (round(start or 0.0, 3), round(end or 0.0, 3))
1108
+ if shot_duration is not None and shot_duration <= float(min_shot_duration_seconds) and short_key not in short_keys:
1109
+ short_keys.add(short_key)
1110
+ short_shot_candidates.append(row)
1111
+ if (
1112
+ shot_duration is not None
1113
+ and shot_duration <= float(flash_frame_max_duration_seconds)
1114
+ and start not in (None, 0.0)
1115
+ and end is not None
1116
+ and duration is not None
1117
+ and end < duration
1118
+ and (round(start, 3), round(end, 3)) not in flash_keys
1119
+ ):
1120
+ flash_candidates.append({
1121
+ **row,
1122
+ "mid_sample_time_seconds": _clamp_sample_time(start + shot_duration / 2.0, duration),
1123
+ "reason": "scene-bounded shot shorter than flash frame threshold",
1124
+ "needs_visual_confirmation": True,
1125
+ })
1126
+
1127
+ cut_density_per_minute = (len(cut_points) / max(float(duration or 0.0), 1.0)) * 60.0 if duration else 0.0
1128
+ return {
1129
+ "success": True,
1130
+ "source": "ffmpeg_scene_detection",
1131
+ "threshold": 0.3,
1132
+ "fps": fps,
1133
+ "frame_step_seconds": frame_step,
1134
+ "duration_seconds": duration,
1135
+ "cut_count": len(cut_points),
1136
+ "cut_density_per_minute": cut_density_per_minute,
1137
+ "likely_edited_sequence": bool(len(cut_points) >= 2 or cut_density_per_minute >= 3.0),
1138
+ "cut_points": cut_points,
1139
+ "raw_shot_ranges": raw_shot_ranges,
1140
+ "shot_ranges": shot_ranges,
1141
+ "short_shot_candidates": short_shot_candidates,
1142
+ "flash_frame_candidates": flash_candidates,
1143
+ "notes": [
1144
+ "FFmpeg scene detection reads the full video stream; boundary frames are sampled for visual confirmation when available.",
1145
+ "Short scene-bounded ranges are candidates only until LLM/frame review distinguishes flash frames from deliberate cuts or high motion.",
1146
+ ],
1147
+ }
1148
+
1149
+
1150
+ def _sample_times(
1151
+ duration: Optional[float],
1152
+ scene_items: List[Dict[str, Any]],
1153
+ budget: int,
1154
+ *,
1155
+ fps: Optional[float] = None,
1156
+ cut_analysis: Optional[Dict[str, Any]] = None,
1157
+ ) -> List[Dict[str, Any]]:
1158
+ if budget <= 0:
1159
+ return []
1160
+ duration = duration or 0
1161
+ candidates: List[Dict[str, Any]] = []
1162
+
1163
+ def add(time_seconds: Optional[float], reason: str, priority: int, **extra: Any) -> None:
1164
+ if time_seconds is None:
1165
+ return
1166
+ candidates.append({
1167
+ "time_seconds": _clamp_sample_time(float(time_seconds), duration),
1168
+ "selection_reason": reason,
1169
+ "priority": priority,
1170
+ **extra,
1171
+ })
1172
+
1173
+ if duration > 0:
1174
+ add(min(duration * 0.05, max(duration - 0.05, 0)), "first_usable", 6)
1175
+ add(duration * 0.5, "midpoint", 70)
1176
+ add(max(duration - min(duration * 0.05, 0.5), 0), "last_usable", 6)
1177
+
1178
+ cut_analysis = cut_analysis if isinstance(cut_analysis, dict) else {}
1179
+ for cut in cut_analysis.get("cut_points") or []:
1180
+ if not isinstance(cut, dict):
1181
+ continue
1182
+ cut_index = cut.get("index")
1183
+ add(
1184
+ cut.get("before_time_seconds"),
1185
+ "cut_before",
1186
+ 5,
1187
+ cut_index=cut_index,
1188
+ cut_time_seconds=cut.get("time_seconds"),
1189
+ boundary_role="last_frame_before_cut",
1190
+ )
1191
+ add(
1192
+ cut.get("after_time_seconds"),
1193
+ "cut_after",
1194
+ 5,
1195
+ cut_index=cut_index,
1196
+ cut_time_seconds=cut.get("time_seconds"),
1197
+ boundary_role="first_frame_after_cut",
1198
+ )
1199
+
1200
+ for shot in cut_analysis.get("shot_ranges") or []:
1201
+ if not isinstance(shot, dict):
1202
+ continue
1203
+ shot_index = shot.get("index")
1204
+ add(
1205
+ shot.get("first_sample_time_seconds"),
1206
+ "shot_start",
1207
+ 12,
1208
+ shot_index=shot_index,
1209
+ shot_start=shot.get("start"),
1210
+ shot_end=shot.get("end"),
1211
+ )
1212
+ add(
1213
+ shot.get("last_sample_time_seconds"),
1214
+ "shot_end",
1215
+ 12,
1216
+ shot_index=shot_index,
1217
+ shot_start=shot.get("start"),
1218
+ shot_end=shot.get("end"),
1219
+ )
1220
+
1221
+ for flash in cut_analysis.get("flash_frame_candidates") or []:
1222
+ if not isinstance(flash, dict):
1223
+ continue
1224
+ add(
1225
+ flash.get("mid_sample_time_seconds"),
1226
+ "flash_candidate",
1227
+ 4,
1228
+ shot_index=flash.get("index"),
1229
+ shot_start=flash.get("start"),
1230
+ shot_end=flash.get("end"),
1231
+ )
1232
+
1233
+ for scene in scene_items[: max(budget, 1)]:
1234
+ t = scene.get("time_seconds")
1235
+ if isinstance(t, (int, float)) and t >= 0:
1236
+ add(float(t), "scene_change", 15)
1237
+
1238
+ if duration > 0:
1239
+ interval_count = max(0, min(budget, 6) - 3)
1240
+ for index in range(interval_count):
1241
+ add(duration * ((index + 1) / (interval_count + 1)), "interval", 80)
1242
+
1243
+ unique: List[Dict[str, Any]] = []
1244
+ seen = set()
1245
+ frame_step = _frame_step_seconds(fps)
1246
+ for candidate in sorted(candidates, key=lambda row: (int(row.get("priority", 99)), float(row.get("time_seconds") or 0.0))):
1247
+ rounded = round(max(float(candidate.get("time_seconds") or 0.0), 0), 3)
1248
+ key = round(rounded / max(frame_step, 0.001))
1249
+ if key in seen:
1250
+ continue
1251
+ seen.add(key)
1252
+ row = dict(candidate)
1253
+ row["time_seconds"] = rounded
1254
+ row.pop("priority", None)
1255
+ unique.append(row)
1256
+ if len(unique) >= budget:
1257
+ break
1258
+ return sorted(unique, key=lambda row: float(row.get("time_seconds") or 0.0))
1259
+
1260
+
1261
+ def _raw_frame(path: str, time_seconds: float, width: int = 96, height: int = 54) -> Optional[bytes]:
1262
+ args = [
1263
+ "ffmpeg",
1264
+ "-hide_banner",
1265
+ "-loglevel",
1266
+ "error",
1267
+ "-ss",
1268
+ f"{time_seconds:.3f}",
1269
+ "-i",
1270
+ path,
1271
+ "-frames:v",
1272
+ "1",
1273
+ "-vf",
1274
+ f"scale={width}:{height}:force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2,format=rgb24",
1275
+ "-f",
1276
+ "rawvideo",
1277
+ "-",
1278
+ ]
1279
+ proc = subprocess.run(args, capture_output=True, timeout=60, check=False)
1280
+ expected = width * height * 3
1281
+ if proc.returncode != 0 or len(proc.stdout) < expected:
1282
+ return None
1283
+ return proc.stdout[:expected]
1284
+
1285
+
1286
+ def _frame_metrics(raw: bytes) -> Dict[str, Any]:
1287
+ count = max(1, len(raw) // 3)
1288
+ lum_sum = 0.0
1289
+ bins = [0] * 16
1290
+ r_sum = g_sum = b_sum = 0
1291
+ for idx in range(0, len(raw), 3):
1292
+ r, g, b = raw[idx], raw[idx + 1], raw[idx + 2]
1293
+ r_sum += r
1294
+ g_sum += g
1295
+ b_sum += b
1296
+ lum = 0.2126 * r + 0.7152 * g + 0.0722 * b
1297
+ lum_sum += lum
1298
+ bins[min(15, int(lum // 16))] += 1
1299
+ return {
1300
+ "mean_luma": lum_sum / count,
1301
+ "mean_rgb": [r_sum / count, g_sum / count, b_sum / count],
1302
+ "luma_histogram_16": bins,
1303
+ }
1304
+
1305
+
1306
+ def _frame_delta(raw_a: Optional[bytes], raw_b: Optional[bytes]) -> Optional[float]:
1307
+ if not raw_a or not raw_b:
1308
+ return None
1309
+ total = 0
1310
+ n = min(len(raw_a), len(raw_b))
1311
+ for idx in range(n):
1312
+ total += abs(raw_a[idx] - raw_b[idx])
1313
+ return total / max(1, n) / 255.0
1314
+
1315
+
1316
+ def _export_analysis_frame(path: str, time_seconds: float, output_path: str) -> bool:
1317
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
1318
+ code, _, _ = _run_command([
1319
+ "ffmpeg",
1320
+ "-hide_banner",
1321
+ "-loglevel",
1322
+ "error",
1323
+ "-ss",
1324
+ f"{time_seconds:.3f}",
1325
+ "-i",
1326
+ path,
1327
+ "-frames:v",
1328
+ "1",
1329
+ "-q:v",
1330
+ "3",
1331
+ output_path,
1332
+ ], timeout=60)
1333
+ return code == 0 and os.path.isfile(output_path)
1334
+
1335
+
1336
+ def _motion_and_keyframes(
1337
+ path: str,
1338
+ duration: Optional[float],
1339
+ scene_items: List[Dict[str, Any]],
1340
+ artifacts: Dict[str, Any],
1341
+ budget: int,
1342
+ *,
1343
+ fps: Optional[float] = None,
1344
+ cut_analysis: Optional[Dict[str, Any]] = None,
1345
+ write_frames: bool = True,
1346
+ ) -> Dict[str, Any]:
1347
+ sampled = []
1348
+ previous_raw = None
1349
+ required_boundary_frames = 0
1350
+ if isinstance(cut_analysis, dict):
1351
+ required_boundary_frames += len(cut_analysis.get("cut_points") or []) * 2
1352
+ required_boundary_frames += len(cut_analysis.get("flash_frame_candidates") or [])
1353
+ effective_budget = max(int(budget or 0), min(HARD_FRAME_CAP, required_boundary_frames + 3))
1354
+ times = _sample_times(duration, scene_items, effective_budget, fps=fps, cut_analysis=cut_analysis)
1355
+ frames_dir = artifacts.get("frames_dir")
1356
+ for index, sample in enumerate(times, 1):
1357
+ time_seconds = float(sample.get("time_seconds") or 0.0)
1358
+ raw = _raw_frame(path, time_seconds)
1359
+ if not raw:
1360
+ continue
1361
+ metrics = _frame_metrics(raw)
1362
+ delta = _frame_delta(previous_raw, raw)
1363
+ previous_raw = raw
1364
+ frame_path = None
1365
+ if write_frames and frames_dir:
1366
+ candidate = os.path.join(frames_dir, f"sampled_{index:04d}.jpg")
1367
+ if _export_analysis_frame(path, time_seconds, candidate):
1368
+ frame_path = candidate
1369
+ sampled_row = {
1370
+ "index": index,
1371
+ "time_seconds": time_seconds,
1372
+ "selection_reason": sample.get("selection_reason") or "interval",
1373
+ "frame_path": frame_path,
1374
+ "metrics": metrics,
1375
+ "delta_from_previous": delta,
1376
+ }
1377
+ for key in ("cut_index", "cut_time_seconds", "boundary_role", "shot_index", "shot_start", "shot_end", "motion_peak"):
1378
+ if sample.get(key) not in (None, ""):
1379
+ sampled_row[key] = sample.get(key)
1380
+ sampled.append(sampled_row)
1381
+ deltas = [row["delta_from_previous"] for row in sampled if row.get("delta_from_previous") is not None]
1382
+ avg_delta = sum(deltas) / len(deltas) if deltas else 0.0
1383
+ max_delta = max(deltas) if deltas else 0.0
1384
+ if max_delta >= 0.08:
1385
+ for row in sampled:
1386
+ if row.get("delta_from_previous") == max_delta:
1387
+ row["motion_peak"] = True
1388
+ row["motion_peak_source_reason"] = row.get("selection_reason")
1389
+ if max_delta >= 0.2 or avg_delta >= 0.1:
1390
+ level = "high"
1391
+ elif max_delta >= 0.08 or avg_delta >= 0.035:
1392
+ level = "medium"
1393
+ else:
1394
+ level = "low"
1395
+ total_cut_points = len(cut_analysis.get("cut_points") or []) if isinstance(cut_analysis, dict) else 0
1396
+ cut_roles: Dict[Any, set] = {}
1397
+ for row in sampled:
1398
+ cut_index = row.get("cut_index")
1399
+ boundary_role = row.get("boundary_role")
1400
+ if cut_index in (None, "") or boundary_role in (None, ""):
1401
+ continue
1402
+ cut_roles.setdefault(cut_index, set()).add(boundary_role)
1403
+ paired_cut_boundaries = sum(
1404
+ 1
1405
+ for roles in cut_roles.values()
1406
+ if {"last_frame_before_cut", "first_frame_after_cut"}.issubset(roles)
1407
+ )
1408
+ return {
1409
+ "success": True,
1410
+ "requested_sample_budget": int(budget or 0),
1411
+ "effective_sample_budget": effective_budget,
1412
+ "hard_frame_cap": HARD_FRAME_CAP,
1413
+ "cut_boundary_frames_requested": required_boundary_frames,
1414
+ "cut_boundary_sampling_capped": required_boundary_frames + 3 > HARD_FRAME_CAP,
1415
+ "cut_boundary_pairs_total": total_cut_points,
1416
+ "cut_boundary_pairs_sampled": paired_cut_boundaries,
1417
+ "cut_boundary_pair_coverage": paired_cut_boundaries / total_cut_points if total_cut_points else 1.0,
1418
+ "sample_count": len(sampled),
1419
+ "overall_motion_level": level,
1420
+ "average_frame_delta": avg_delta,
1421
+ "max_frame_delta": max_delta,
1422
+ "analysis_keyframes": sampled,
1423
+ "cut_analysis": cut_analysis or {},
1424
+ }
1425
+
1426
+
1427
+ def seconds_to_srt_time(seconds: float) -> str:
1428
+ ms_total = int(round(max(0.0, seconds) * 1000))
1429
+ hours, rem = divmod(ms_total, 3600_000)
1430
+ minutes, rem = divmod(rem, 60_000)
1431
+ secs, ms = divmod(rem, 1000)
1432
+ return f"{hours:02d}:{minutes:02d}:{secs:02d},{ms:03d}"
1433
+
1434
+
1435
+ def seconds_to_vtt_time(seconds: float) -> str:
1436
+ return seconds_to_srt_time(seconds).replace(",", ".")
1437
+
1438
+
1439
+ def segments_to_srt(segments: List[Dict[str, Any]]) -> str:
1440
+ lines = []
1441
+ for index, segment in enumerate(segments, 1):
1442
+ start = seconds_to_srt_time(float(segment.get("start", 0)))
1443
+ end = seconds_to_srt_time(float(segment.get("end", segment.get("start", 0))))
1444
+ text = str(segment.get("text", "")).strip()
1445
+ lines.append(f"{index}\n{start} --> {end}\n{text}\n")
1446
+ return "\n".join(lines)
1447
+
1448
+
1449
+ def segments_to_vtt(segments: List[Dict[str, Any]]) -> str:
1450
+ lines = ["WEBVTT\n"]
1451
+ for segment in segments:
1452
+ start = seconds_to_vtt_time(float(segment.get("start", 0)))
1453
+ end = seconds_to_vtt_time(float(segment.get("end", segment.get("start", 0))))
1454
+ text = str(segment.get("text", "")).strip()
1455
+ lines.append(f"{start} --> {end}\n{text}\n")
1456
+ return "\n".join(lines)
1457
+
1458
+
1459
+ def _write_text(path: str, content: str) -> None:
1460
+ os.makedirs(os.path.dirname(path), exist_ok=True)
1461
+ with open(path, "w", encoding="utf-8") as f:
1462
+ f.write(content)
1463
+
1464
+
1465
+ def _iter_analysis_reports(project_root: str) -> List[Tuple[str, Dict[str, Any]]]:
1466
+ clips_root = os.path.join(normalize_path(project_root), "clips")
1467
+ reports: List[Tuple[str, Dict[str, Any]]] = []
1468
+ if not os.path.isdir(clips_root):
1469
+ return reports
1470
+ for dirpath, _, filenames in os.walk(clips_root):
1471
+ if "analysis.json" not in filenames:
1472
+ continue
1473
+ path = os.path.join(dirpath, "analysis.json")
1474
+ try:
1475
+ reports.append((path, _read_json(path)))
1476
+ except (OSError, json.JSONDecodeError):
1477
+ continue
1478
+ return reports
1479
+
1480
+
1481
+ def _normalized_report_match_value(value: Any, *, path_like: bool = False) -> Optional[str]:
1482
+ if value in (None, ""):
1483
+ return None
1484
+ if path_like:
1485
+ try:
1486
+ return normalize_path(value)
1487
+ except Exception:
1488
+ return str(value)
1489
+ return str(value)
1490
+
1491
+
1492
+ def _report_matches_record(report: Dict[str, Any], record: Dict[str, Any]) -> bool:
1493
+ clip = report.get("clip") or {}
1494
+ report_source = _normalized_report_match_value(report.get("source_file") or clip.get("file_path"), path_like=True)
1495
+ record_source = _normalized_report_match_value(record.get("file_path"), path_like=True)
1496
+ if report_source and record_source and report_source == record_source:
1497
+ return True
1498
+ for key in ("clip_id", "media_id"):
1499
+ report_value = _normalized_report_match_value(clip.get(key))
1500
+ record_value = _normalized_report_match_value(record.get(key))
1501
+ if report_value and record_value and report_value == record_value:
1502
+ return True
1503
+ return False
1504
+
1505
+
1506
+ def _report_missing_layers(report: Dict[str, Any], depth: str, options: Dict[str, Any]) -> List[str]:
1507
+ missing = []
1508
+ if not report.get("technical"):
1509
+ missing.append("technical")
1510
+ if not report.get("clip_analysis_markers"):
1511
+ missing.append("marker_plan")
1512
+ if depth in {"standard", "deep", "custom"}:
1513
+ motion = report.get("motion") or {}
1514
+ readthrough = report.get("readthrough") or {}
1515
+ if not motion or motion.get("status") == "skipped":
1516
+ missing.append("motion")
1517
+ if not readthrough or readthrough.get("reason") == "quick analysis depth":
1518
+ missing.append("readthrough")
1519
+ if not isinstance(readthrough.get("cut_analysis"), dict):
1520
+ missing.append("cut_analysis")
1521
+ transcription = options.get("transcription") or {}
1522
+ if _coerce_bool(transcription.get("enabled"), default=(depth == "deep")):
1523
+ transcript = report.get("transcription") or {}
1524
+ if not transcript.get("success") or transcript.get("status") == "skipped":
1525
+ missing.append("transcription")
1526
+ vision = options.get("vision") or {}
1527
+ if _coerce_bool(vision.get("enabled"), default=False):
1528
+ visual = report.get("visual") or {}
1529
+ if not visual.get("success") or visual.get("status") == "skipped":
1530
+ missing.append("vision")
1531
+ return missing
1532
+
1533
+
1534
+ def _report_cache_state(
1535
+ report: Dict[str, Any],
1536
+ request_signature: Dict[str, Any],
1537
+ *,
1538
+ max_report_age_days: Optional[float] = None,
1539
+ reuse_policy: str = "compatible",
1540
+ ) -> Tuple[List[str], List[str]]:
1541
+ issues: List[str] = []
1542
+ warnings: List[str] = []
1543
+
1544
+ analyzed_ts = _timestamp_from_analyzed_at(report.get("analyzed_at"))
1545
+ if max_report_age_days is not None:
1546
+ if analyzed_ts is None:
1547
+ issues.append("analysis_age_unknown")
1548
+ else:
1549
+ age_days = (time.time() - analyzed_ts) / 86400.0
1550
+ if age_days > max_report_age_days:
1551
+ issues.append(f"analysis_older_than_{max_report_age_days:g}_days")
1552
+
1553
+ report_signature = report.get("analysis_signature") or {}
1554
+ if not report_signature:
1555
+ message = "analysis_signature_missing"
1556
+ if reuse_policy in {"fresh", "strict"}:
1557
+ issues.append(message)
1558
+ else:
1559
+ warnings.append(message)
1560
+ return issues, warnings
1561
+
1562
+ if report_signature.get("analysis_version") != request_signature.get("analysis_version"):
1563
+ issues.append("analysis_version_changed")
1564
+
1565
+ report_source = report_signature.get("source_file") or {}
1566
+ request_source = request_signature.get("source_file") or {}
1567
+ if report_source.get("path") and request_source.get("path") and report_source.get("path") != request_source.get("path"):
1568
+ issues.append("source_path_changed")
1569
+ for key in ("size_bytes", "mtime_ns"):
1570
+ report_value = report_source.get(key)
1571
+ request_value = request_source.get(key)
1572
+ if report_value is not None and request_value is not None and report_value != request_value:
1573
+ issues.append(f"source_{key}_changed")
1574
+
1575
+ report_budget = int(report_signature.get("analysis_keyframe_budget") or 0)
1576
+ request_budget = int(request_signature.get("analysis_keyframe_budget") or 0)
1577
+ if report_budget < request_budget:
1578
+ issues.append("analysis_keyframe_budget_lower_than_requested")
1579
+
1580
+ report_layers = report_signature.get("layers") or {}
1581
+ request_layers = request_signature.get("layers") or {}
1582
+ report_vision = report_layers.get("vision") or {}
1583
+ request_vision = request_layers.get("vision") or {}
1584
+ if request_vision.get("enabled"):
1585
+ if report_vision.get("provider") and request_vision.get("provider") and report_vision.get("provider") != request_vision.get("provider"):
1586
+ issues.append("vision_provider_changed")
1587
+ if report_vision.get("prompt_hash") and request_vision.get("prompt_hash") and report_vision.get("prompt_hash") != request_vision.get("prompt_hash"):
1588
+ issues.append("vision_prompt_changed")
1589
+
1590
+ report_transcription = report_layers.get("transcription") or {}
1591
+ request_transcription = request_layers.get("transcription") or {}
1592
+ if request_transcription.get("enabled"):
1593
+ for key in ("backend", "model", "language"):
1594
+ report_value = report_transcription.get(key)
1595
+ request_value = request_transcription.get(key)
1596
+ if report_value and request_value and report_value != request_value:
1597
+ issues.append(f"transcription_{key}_changed")
1598
+
1599
+ return issues, warnings
1600
+
1601
+
1602
+ def find_reusable_report(
1603
+ project_root: str,
1604
+ record: Dict[str, Any],
1605
+ depth: str,
1606
+ options: Dict[str, Any],
1607
+ *,
1608
+ request_signature: Optional[Dict[str, Any]] = None,
1609
+ max_report_age_days: Optional[float] = None,
1610
+ reuse_policy: str = "compatible",
1611
+ ) -> Optional[Dict[str, Any]]:
1612
+ """Find an existing analysis report that satisfies the requested layers."""
1613
+ frame_count = int((request_signature or {}).get("analysis_keyframe_budget") or FRAME_CAPS.get(depth, FRAME_CAPS[DEFAULT_DEPTH]))
1614
+ request_signature = request_signature or analysis_request_signature(record, depth, options, frame_count)
1615
+ matches = []
1616
+ for path, report in _iter_analysis_reports(project_root):
1617
+ if not _report_matches_record(report, record):
1618
+ continue
1619
+ missing = _report_missing_layers(report, depth, options)
1620
+ cache_issues, cache_warnings = _report_cache_state(
1621
+ report,
1622
+ request_signature,
1623
+ max_report_age_days=max_report_age_days,
1624
+ reuse_policy=reuse_policy,
1625
+ )
1626
+ matches.append({
1627
+ "path": path,
1628
+ "report": report,
1629
+ "missing_layers": missing,
1630
+ "cache_issues": cache_issues,
1631
+ "cache_warnings": cache_warnings,
1632
+ "analyzed_at": report.get("analyzed_at"),
1633
+ "analyzed_timestamp": _timestamp_from_analyzed_at(report.get("analyzed_at")) or 0,
1634
+ })
1635
+ if not matches:
1636
+ return None
1637
+ matches.sort(key=lambda row: (
1638
+ len(row["missing_layers"]) + len(row["cache_issues"]),
1639
+ -float(row.get("analyzed_timestamp") or 0),
1640
+ ))
1641
+ best = matches[0]
1642
+ if best["missing_layers"] or best["cache_issues"]:
1643
+ return {
1644
+ "path": best["path"],
1645
+ "missing_layers": best["missing_layers"],
1646
+ "cache_issues": best["cache_issues"],
1647
+ "cache_warnings": best["cache_warnings"],
1648
+ "analyzed_at": best.get("analyzed_at"),
1649
+ "reusable": False,
1650
+ }
1651
+ return {
1652
+ "path": best["path"],
1653
+ "missing_layers": [],
1654
+ "cache_issues": [],
1655
+ "cache_warnings": best["cache_warnings"],
1656
+ "analyzed_at": best.get("analyzed_at"),
1657
+ "reusable": True,
1658
+ "report": best["report"],
1659
+ }
1660
+
1661
+
1662
+ def _report_reuse_score(candidate: Optional[Dict[str, Any]]) -> Tuple[int, float]:
1663
+ if not candidate:
1664
+ return (9999, 0.0)
1665
+ missing = candidate.get("missing_layers") or []
1666
+ issues = candidate.get("cache_issues") or []
1667
+ timestamp = _timestamp_from_analyzed_at(candidate.get("analyzed_at")) or 0
1668
+ return (len(missing) + len(issues), -float(timestamp))
1669
+
1670
+
1671
+ def find_reusable_report_across_roots(
1672
+ project_roots: Iterable[Any],
1673
+ record: Dict[str, Any],
1674
+ depth: str,
1675
+ options: Dict[str, Any],
1676
+ *,
1677
+ request_signature: Optional[Dict[str, Any]] = None,
1678
+ max_report_age_days: Optional[float] = None,
1679
+ reuse_policy: str = "compatible",
1680
+ ) -> Optional[Dict[str, Any]]:
1681
+ """Find the best compatible report across active and prior project roots."""
1682
+ candidates: List[Dict[str, Any]] = []
1683
+ seen_roots = set()
1684
+ for raw_root in project_roots or []:
1685
+ if not raw_root:
1686
+ continue
1687
+ root = normalize_path(raw_root)
1688
+ if root in seen_roots:
1689
+ continue
1690
+ seen_roots.add(root)
1691
+ candidate = find_reusable_report(
1692
+ root,
1693
+ record,
1694
+ depth,
1695
+ options,
1696
+ request_signature=request_signature,
1697
+ max_report_age_days=max_report_age_days,
1698
+ reuse_policy=reuse_policy,
1699
+ )
1700
+ if not candidate:
1701
+ continue
1702
+ candidate = dict(candidate)
1703
+ candidate["project_root"] = root
1704
+ candidates.append(candidate)
1705
+ if not candidates:
1706
+ return None
1707
+ reusable = [row for row in candidates if row.get("reusable")]
1708
+ pool = reusable or candidates
1709
+ pool.sort(key=_report_reuse_score)
1710
+ return pool[0]
1711
+
1712
+
1713
+ def _normalize_word_timestamps(raw_words: Any) -> List[Dict[str, Any]]:
1714
+ words: List[Dict[str, Any]] = []
1715
+ if not isinstance(raw_words, list):
1716
+ return words
1717
+ for raw_word in raw_words:
1718
+ if not isinstance(raw_word, dict):
1719
+ continue
1720
+ text = str(raw_word.get("word", raw_word.get("text", ""))).strip()
1721
+ start = _parse_float(raw_word.get("start"))
1722
+ end = _parse_float(raw_word.get("end"))
1723
+ word: Dict[str, Any] = {
1724
+ "word": text,
1725
+ "start": start,
1726
+ "end": end if end is not None else start,
1727
+ }
1728
+ for key in ("probability", "confidence", "score"):
1729
+ value = _parse_float(raw_word.get(key))
1730
+ if value is not None:
1731
+ word[key] = value
1732
+ words.append({key: value for key, value in word.items() if value not in (None, "")})
1733
+ return words
1734
+
1735
+
1736
+ def _normalize_transcript_payload(raw: Dict[str, Any], backend: str, language: Optional[str] = None) -> Dict[str, Any]:
1737
+ segments = []
1738
+ all_words: List[Dict[str, Any]] = []
1739
+ for segment in raw.get("segments") or []:
1740
+ start = _parse_float(segment.get("start")) or 0.0
1741
+ end = _parse_float(segment.get("end"))
1742
+ if end is None:
1743
+ end = start
1744
+ normalized_segment = {
1745
+ "start": start,
1746
+ "end": end,
1747
+ "text": str(segment.get("text", "")).strip(),
1748
+ }
1749
+ words = _normalize_word_timestamps(segment.get("words"))
1750
+ if words:
1751
+ normalized_segment["words"] = words
1752
+ all_words.extend(words)
1753
+ segments.append(normalized_segment)
1754
+ top_level_words = _normalize_word_timestamps(raw.get("words"))
1755
+ if top_level_words:
1756
+ all_words = top_level_words
1757
+ text = raw.get("text")
1758
+ if text is None:
1759
+ text = " ".join(segment.get("text", "") for segment in segments).strip()
1760
+ payload = {
1761
+ "success": True,
1762
+ "backend": backend,
1763
+ "language": raw.get("language") or language or "unknown",
1764
+ "text": text,
1765
+ "segments": segments,
1766
+ }
1767
+ if all_words:
1768
+ payload["words"] = all_words
1769
+ return payload
1770
+
1771
+
1772
+ def _write_transcript_artifacts(payload: Dict[str, Any], artifacts: Dict[str, Any]) -> None:
1773
+ if artifacts.get("transcript_json"):
1774
+ _write_json(artifacts["transcript_json"], payload)
1775
+ if artifacts.get("transcript_srt"):
1776
+ _write_text(artifacts["transcript_srt"], segments_to_srt(payload.get("segments", [])))
1777
+ if artifacts.get("transcript_vtt"):
1778
+ _write_text(artifacts["transcript_vtt"], segments_to_vtt(payload.get("segments", [])))
1779
+
1780
+
1781
+ def _transcribe_with_whisper_cli(path: str, artifacts: Dict[str, Any], transcription: Dict[str, Any]) -> Dict[str, Any]:
1782
+ whisper = shutil.which("whisper")
1783
+ if not whisper:
1784
+ return {"success": False, "status": "skipped", "backend": "whisper_cli", "reason": "whisper CLI not found"}
1785
+ work_dir = os.path.join(os.path.dirname(artifacts.get("transcript_json") or artifacts["analysis_json"]), "transcript-work")
1786
+ os.makedirs(work_dir, exist_ok=True)
1787
+ cmd = [
1788
+ whisper,
1789
+ path,
1790
+ "--model",
1791
+ str(transcription.get("model") or "base"),
1792
+ "--output_format",
1793
+ "json",
1794
+ "--output_dir",
1795
+ work_dir,
1796
+ ]
1797
+ if transcription.get("language"):
1798
+ cmd.extend(["--language", str(transcription["language"])])
1799
+ code, _, stderr = _run_command(cmd, timeout=int(transcription.get("timeout", 1800)))
1800
+ if code != 0:
1801
+ return {"success": False, "backend": "whisper_cli", "error": stderr.strip() or "whisper CLI failed"}
1802
+ json_files = sorted(Path(work_dir).glob("*.json"), key=lambda p: p.stat().st_mtime, reverse=True)
1803
+ if not json_files:
1804
+ return {"success": False, "backend": "whisper_cli", "error": "whisper CLI produced no JSON output"}
1805
+ raw = _read_json(str(json_files[0]))
1806
+ payload = _normalize_transcript_payload(raw, "whisper_cli", transcription.get("language"))
1807
+ _write_transcript_artifacts(payload, artifacts)
1808
+ return payload
1809
+
1810
+
1811
+ def _transcribe_with_mlx_whisper(path: str, artifacts: Dict[str, Any], transcription: Dict[str, Any]) -> Dict[str, Any]:
1812
+ try:
1813
+ import mlx_whisper # type: ignore[import-not-found]
1814
+ except ImportError:
1815
+ return {"success": False, "status": "skipped", "backend": "mlx_whisper", "reason": "mlx_whisper module not found"}
1816
+ model = transcription.get("model") or "mlx-community/whisper-large-v3-turbo"
1817
+ kwargs = {}
1818
+ if transcription.get("language"):
1819
+ kwargs["language"] = transcription["language"]
1820
+ raw = mlx_whisper.transcribe(
1821
+ path,
1822
+ path_or_hf_repo=model,
1823
+ word_timestamps=bool(transcription.get("word_timestamps", False)),
1824
+ verbose=False,
1825
+ **kwargs,
1826
+ )
1827
+ payload = _normalize_transcript_payload(raw, "mlx_whisper", transcription.get("language"))
1828
+ _write_transcript_artifacts(payload, artifacts)
1829
+ return payload
1830
+
1831
+
1832
+ def _transcribe(path: str, artifacts: Dict[str, Any], options: Dict[str, Any], capabilities: Dict[str, Any]) -> Dict[str, Any]:
1833
+ transcription = options.get("transcription") or {}
1834
+ if not _coerce_bool(transcription.get("enabled"), default=False):
1835
+ return {"success": True, "status": "skipped", "reason": "transcription disabled"}
1836
+ backend = transcription.get("backend")
1837
+ if not backend:
1838
+ backends = capabilities.get("transcription", {}).get("backends") or []
1839
+ backend = backends[0] if backends else None
1840
+ if backend in {"mock", "local_mock"}:
1841
+ segments = transcription.get("segments") or [{"start": 0.0, "end": 1.0, "text": "Mock local transcript segment."}]
1842
+ payload = {"success": True, "backend": backend, "language": transcription.get("language", "unknown"), "segments": segments, "text": " ".join(s.get("text", "") for s in segments)}
1843
+ _write_transcript_artifacts(payload, artifacts)
1844
+ return payload
1845
+ elif backend in {"whisper_cli", "mlx_whisper"}:
1846
+ if not _coerce_bool(transcription.get("allow_model_download"), default=False):
1847
+ return {
1848
+ "success": False,
1849
+ "status": "skipped",
1850
+ "backend": backend,
1851
+ "reason": "Local transcription may download model files; set allow_model_download=true explicitly to run it.",
1852
+ }
1853
+ if backend == "whisper_cli":
1854
+ return _transcribe_with_whisper_cli(path, artifacts, transcription)
1855
+ return _transcribe_with_mlx_whisper(path, artifacts, transcription)
1856
+ elif backend == "whisper_cpp":
1857
+ if not transcription.get("model_path"):
1858
+ return {
1859
+ "success": False,
1860
+ "status": "skipped",
1861
+ "backend": backend,
1862
+ "reason": "whisper_cpp requires an explicit model_path; no model files are downloaded automatically.",
1863
+ }
1864
+ return {
1865
+ "success": False,
1866
+ "status": "not_implemented",
1867
+ "backend": backend,
1868
+ "reason": "whisper_cpp execution needs per-install CLI validation before enabling.",
1869
+ }
1870
+ elif backend == "resolve":
1871
+ return {
1872
+ "success": False,
1873
+ "status": "skipped",
1874
+ "backend": backend,
1875
+ "reason": "Resolve-native transcription mutates Resolve project state; use explicit media_pool_item/folder transcription actions.",
1876
+ }
1877
+ else:
1878
+ return {"success": False, "status": "skipped", "reason": "No local transcription backend available"}
1879
+
1880
+
1881
+ def _vision_analysis(record: Dict[str, Any], motion: Dict[str, Any], options: Dict[str, Any], artifacts: Dict[str, Any], capabilities: Dict[str, Any]) -> Dict[str, Any]:
1882
+ vision = options.get("vision") or {}
1883
+ if not _coerce_bool(vision.get("enabled"), default=False):
1884
+ return {"success": True, "status": "skipped", "reason": "vision disabled"}
1885
+ provider = vision.get("provider") or capabilities.get("vision", {}).get("provider")
1886
+ if provider in CHAT_CONTEXT_VISION_PROVIDERS:
1887
+ return {
1888
+ "success": False,
1889
+ "status": "skipped",
1890
+ "provider": provider,
1891
+ "reason": "Chat-context vision requires MCP client sampling support for this tool call.",
1892
+ }
1893
+ if provider not in {"mock", "local_mock"}:
1894
+ return {
1895
+ "success": False,
1896
+ "status": "skipped",
1897
+ "provider": provider,
1898
+ "reason": "Only local mock vision is implemented in this offline pass; no frames were sent externally.",
1899
+ }
1900
+ keyframes = []
1901
+ for frame in motion.get("analysis_keyframes", []):
1902
+ frame_row = {
1903
+ "time_seconds": frame.get("time_seconds"),
1904
+ "selection_reason": frame.get("selection_reason"),
1905
+ "description": "Local mock vision description for representative frame.",
1906
+ "editing_value": "Use as a searchable representative moment.",
1907
+ "qc_flags": [],
1908
+ }
1909
+ for key in ("cut_index", "cut_time_seconds", "boundary_role", "shot_index", "shot_start", "shot_end", "motion_peak", "motion_peak_source_reason"):
1910
+ if frame.get(key) not in (None, ""):
1911
+ frame_row[key] = frame.get(key)
1912
+ keyframes.append(frame_row)
1913
+ cut_analysis = motion.get("cut_analysis") if isinstance(motion.get("cut_analysis"), dict) else {}
1914
+ payload = {
1915
+ "success": True,
1916
+ "provider": provider,
1917
+ "clip_summary": f"Local mock visual analysis for {record.get('clip_name') or record.get('file_path')}.",
1918
+ "editorial_classification": {
1919
+ "primary_use": "unknown",
1920
+ "select_potential": "medium" if motion.get("overall_motion_level") != "low" else "low",
1921
+ "reason": "Derived from local motion/variance evidence only.",
1922
+ },
1923
+ "content": {
1924
+ "locations": [],
1925
+ "people_visible": "unknown",
1926
+ "actions": [],
1927
+ "objects": [],
1928
+ "visible_text": [],
1929
+ "notable_audio_context": [],
1930
+ },
1931
+ "shot_and_style": {
1932
+ "shot_sizes": [],
1933
+ "camera_motion": [motion.get("overall_motion_level", "unknown")],
1934
+ "composition_notes": "",
1935
+ "lighting_mood": "",
1936
+ "color_mood": "",
1937
+ },
1938
+ "motion": {
1939
+ "overall_level": motion.get("overall_motion_level", "unknown"),
1940
+ "motion_events": [],
1941
+ "quiet_regions": [],
1942
+ },
1943
+ "cut_understanding": {
1944
+ "cut_count": cut_analysis.get("cut_count", 0),
1945
+ "likely_edited_sequence": bool(cut_analysis.get("likely_edited_sequence")),
1946
+ "flash_frame_candidates": cut_analysis.get("flash_frame_candidates", []),
1947
+ "notes": cut_analysis.get("notes", []),
1948
+ },
1949
+ "analysis_keyframes": keyframes,
1950
+ "editing_notes": {
1951
+ "best_moments": [],
1952
+ "continuity_flags": [],
1953
+ "qc_flags": [],
1954
+ "search_tags": [slugify(record.get("clip_name"), "clip")],
1955
+ },
1956
+ "confidence": {
1957
+ "visual": "low",
1958
+ "motion": "computed",
1959
+ "transcript": "unavailable",
1960
+ },
1961
+ }
1962
+ if artifacts.get("visual_json"):
1963
+ _write_json(artifacts["visual_json"], payload)
1964
+ return payload
1965
+
1966
+
1967
+ def _analysis_fps(record: Dict[str, Any], technical: Dict[str, Any]) -> float:
1968
+ raw = record.get("fps") or record.get("frame_rate") or record.get("frameRate")
1969
+ if raw not in (None, ""):
1970
+ if isinstance(raw, str):
1971
+ fraction = _fraction_to_float(raw)
1972
+ if fraction:
1973
+ return fraction
1974
+ match = re.search(r"\d+(?:\.\d+)?", raw)
1975
+ if match:
1976
+ parsed = _parse_float(match.group(0))
1977
+ if parsed:
1978
+ return parsed
1979
+ parsed = _parse_float(raw)
1980
+ if parsed:
1981
+ return parsed
1982
+ summary = technical.get("summary") if isinstance(technical.get("summary"), dict) else {}
1983
+ for video in summary.get("video") or []:
1984
+ parsed = _parse_float(video.get("frame_rate"))
1985
+ if parsed:
1986
+ return parsed
1987
+ return 24.0
1988
+
1989
+
1990
+ def _seconds_to_frame(seconds: Optional[float], fps: float) -> Optional[int]:
1991
+ if seconds is None:
1992
+ return None
1993
+ try:
1994
+ return int(round(max(0.0, float(seconds)) * max(float(fps), 1.0)))
1995
+ except (TypeError, ValueError):
1996
+ return None
1997
+
1998
+
1999
+ def _duration_frames(start_seconds: Optional[float], end_seconds: Optional[float], fps: float, *, fallback: int = 1) -> int:
2000
+ if start_seconds is None or end_seconds is None:
2001
+ return fallback
2002
+ start_frame = _seconds_to_frame(start_seconds, fps)
2003
+ end_frame = _seconds_to_frame(end_seconds, fps)
2004
+ if start_frame is None or end_frame is None:
2005
+ return fallback
2006
+ return max(1, end_frame - start_frame)
2007
+
2008
+
2009
+ def _time_seconds_from_text(value: Any) -> Optional[float]:
2010
+ if isinstance(value, dict):
2011
+ for key in ("time_seconds", "timeSeconds", "start", "start_seconds", "startSeconds"):
2012
+ parsed = _parse_float(value.get(key))
2013
+ if parsed is not None:
2014
+ return parsed
2015
+ value = value.get("text") or value.get("note") or value.get("description")
2016
+ raw = str(value or "")
2017
+ colon = re.search(r"\b(?:(\d{1,2}):)?(\d{1,2}):(\d{2})([.,]\d+)?\b", raw)
2018
+ if colon:
2019
+ hours = int(colon.group(1) or 0)
2020
+ minutes = int(colon.group(2))
2021
+ seconds = int(colon.group(3))
2022
+ fraction = float((colon.group(4) or "0").replace(",", "."))
2023
+ return hours * 3600 + minutes * 60 + seconds + fraction
2024
+ seconds_match = re.search(r"\b(\d+(?:\.\d+)?)\s*(?:s|sec|secs|seconds)\b", raw, flags=re.IGNORECASE)
2025
+ if seconds_match:
2026
+ return _parse_float(seconds_match.group(1))
2027
+ return None
2028
+
2029
+
2030
+ def _trim_text(value: Any, limit: int = 280) -> str:
2031
+ text = re.sub(r"\s+", " ", str(value or "")).strip()
2032
+ if len(text) <= limit:
2033
+ return text
2034
+ return text[: max(0, limit - 1)].rstrip() + "..."
2035
+
2036
+
2037
+ def _ranges_overlap(
2038
+ start_a: Optional[float],
2039
+ end_a: Optional[float],
2040
+ start_b: Optional[float],
2041
+ end_b: Optional[float],
2042
+ ) -> bool:
2043
+ if start_a is None:
2044
+ start_a = 0.0
2045
+ if start_b is None:
2046
+ start_b = 0.0
2047
+ if end_a is None:
2048
+ end_a = start_a
2049
+ if end_b is None:
2050
+ end_b = start_b
2051
+ return max(start_a, start_b) <= min(end_a, end_b)
2052
+
2053
+
2054
+ def _transcript_words_from_payload(transcript: Dict[str, Any]) -> List[Dict[str, Any]]:
2055
+ words = transcript.get("words") if isinstance(transcript.get("words"), list) else []
2056
+ if words:
2057
+ return [word for word in words if isinstance(word, dict)]
2058
+ out: List[Dict[str, Any]] = []
2059
+ segments = transcript.get("segments") if isinstance(transcript.get("segments"), list) else []
2060
+ for segment in segments:
2061
+ if isinstance(segment, dict) and isinstance(segment.get("words"), list):
2062
+ out.extend(word for word in segment["words"] if isinstance(word, dict))
2063
+ return out
2064
+
2065
+
2066
+ def _transcript_excerpt_for_range(transcript: Dict[str, Any], start: Optional[float], end: Optional[float]) -> str:
2067
+ words = _transcript_words_from_payload(transcript)
2068
+ if words:
2069
+ selected_words = []
2070
+ for word in words:
2071
+ if not isinstance(word, dict):
2072
+ continue
2073
+ word_start = _parse_float(word.get("start"))
2074
+ word_end = _parse_float(word.get("end"))
2075
+ if _ranges_overlap(start, end, word_start, word_end):
2076
+ selected_words.append(str(word.get("word") or "").strip())
2077
+ if selected_words:
2078
+ return _trim_text(" ".join(word for word in selected_words if word), 280)
2079
+
2080
+ segments = transcript.get("segments") if isinstance(transcript.get("segments"), list) else []
2081
+ selected_segments = []
2082
+ for segment in segments:
2083
+ if not isinstance(segment, dict):
2084
+ continue
2085
+ seg_start = _parse_float(segment.get("start"))
2086
+ seg_end = _parse_float(segment.get("end"))
2087
+ if _ranges_overlap(start, end, seg_start, seg_end):
2088
+ selected_segments.append(str(segment.get("text") or "").strip())
2089
+ return _trim_text(" ".join(text for text in selected_segments if text), 280)
2090
+
2091
+
2092
+ def _visual_description_for_time(vision: Dict[str, Any], start: Optional[float], end: Optional[float]) -> str:
2093
+ keyframes = vision.get("analysis_keyframes") if isinstance(vision.get("analysis_keyframes"), list) else []
2094
+ midpoint = None
2095
+ if start is not None and end is not None:
2096
+ midpoint = (float(start) + float(end)) / 2.0
2097
+ elif start is not None:
2098
+ midpoint = float(start)
2099
+ best = None
2100
+ best_distance = None
2101
+ for keyframe in keyframes:
2102
+ if not isinstance(keyframe, dict):
2103
+ continue
2104
+ description = keyframe.get("description") or keyframe.get("visual_description")
2105
+ if not description:
2106
+ continue
2107
+ frame_time = _parse_float(keyframe.get("time_seconds"))
2108
+ distance = abs((frame_time or 0.0) - (midpoint or frame_time or 0.0))
2109
+ if best_distance is None or distance < best_distance:
2110
+ best = description
2111
+ best_distance = distance
2112
+ if best:
2113
+ return _trim_text(best, 360)
2114
+ if vision.get("clip_summary"):
2115
+ return _trim_text(vision.get("clip_summary"), 360)
2116
+ return "Visual description unavailable from this analysis pass."
2117
+
2118
+
2119
+ def _shot_ranges_from_scenes(
2120
+ duration: Optional[float],
2121
+ scene_items: List[Dict[str, Any]],
2122
+ *,
2123
+ min_duration_seconds: float = 0.75,
2124
+ ) -> List[Dict[str, Any]]:
2125
+ scene_times = []
2126
+ for item in scene_items:
2127
+ if not isinstance(item, dict):
2128
+ continue
2129
+ t = _parse_float(item.get("time_seconds"))
2130
+ if t is None or t <= 0:
2131
+ continue
2132
+ if duration is not None and t >= duration:
2133
+ continue
2134
+ scene_times.append(t)
2135
+ scene_times = sorted(set(round(t, 3) for t in scene_times))
2136
+
2137
+ if duration is not None and duration > 0:
2138
+ boundaries = [0.0]
2139
+ for t in scene_times:
2140
+ if t - boundaries[-1] >= min_duration_seconds:
2141
+ boundaries.append(t)
2142
+ if duration - boundaries[-1] >= 0.05:
2143
+ boundaries.append(float(duration))
2144
+ if len(boundaries) < 2:
2145
+ boundaries = [0.0, float(duration)]
2146
+ return [
2147
+ {"index": index + 1, "start": boundaries[index], "end": boundaries[index + 1]}
2148
+ for index in range(len(boundaries) - 1)
2149
+ ]
2150
+
2151
+ if scene_times:
2152
+ starts = [0.0] + scene_times
2153
+ return [
2154
+ {"index": index + 1, "start": start, "end": starts[index + 1] if index + 1 < len(starts) else None}
2155
+ for index, start in enumerate(starts)
2156
+ ]
2157
+ return [{"index": 1, "start": 0.0, "end": duration}]
2158
+
2159
+
2160
+ def _marker_sound_note(transcript: Dict[str, Any], readthrough: Dict[str, Any], start: Optional[float], end: Optional[float]) -> Tuple[str, str]:
2161
+ transcript_text = _transcript_excerpt_for_range(transcript, start, end)
2162
+ if transcript_text:
2163
+ return f"Transcript: {transcript_text}", transcript_text
2164
+ silence_items = ((readthrough.get("silence") or {}).get("items") or []) if isinstance(readthrough.get("silence"), dict) else []
2165
+ for item in silence_items:
2166
+ if isinstance(item, dict) and _ranges_overlap(start, end, _parse_float(item.get("start")), _parse_float(item.get("end"))):
2167
+ return "Sound: detected silence or very low-level audio in this range.", ""
2168
+ return "Sound: no transcript excerpt available for this range.", ""
2169
+
2170
+
2171
+ def _build_marker_entry(
2172
+ *,
2173
+ marker_id: str,
2174
+ marker_type: str,
2175
+ color: str,
2176
+ name: str,
2177
+ start: Optional[float],
2178
+ end: Optional[float],
2179
+ fps: float,
2180
+ visual_description: str,
2181
+ sound_note: str,
2182
+ transcript_text: str = "",
2183
+ source: str,
2184
+ confidence: str = "computed",
2185
+ subtype: Optional[str] = None,
2186
+ ) -> Dict[str, Any]:
2187
+ payload = {
2188
+ "id": marker_id,
2189
+ "type": marker_type,
2190
+ "subtype": subtype,
2191
+ "color": color,
2192
+ "name": name,
2193
+ "start_seconds": start,
2194
+ "end_seconds": end,
2195
+ "start_frame": _seconds_to_frame(start, fps),
2196
+ "duration_frames": _duration_frames(start, end, fps),
2197
+ "visual_description": visual_description,
2198
+ "sound_note": sound_note,
2199
+ "transcript_text": transcript_text,
2200
+ "source": source,
2201
+ "confidence": confidence,
2202
+ "write_to_resolve": False,
2203
+ }
2204
+ return {key: value for key, value in payload.items() if value not in (None, "")}
2205
+
2206
+
2207
+ def _build_clip_marker_plan(
2208
+ record: Dict[str, Any],
2209
+ technical: Dict[str, Any],
2210
+ readthrough: Dict[str, Any],
2211
+ motion: Dict[str, Any],
2212
+ transcript: Dict[str, Any],
2213
+ vision: Dict[str, Any],
2214
+ *,
2215
+ options: Dict[str, Any],
2216
+ analysis_signature: Optional[Dict[str, Any]] = None,
2217
+ ) -> Dict[str, Any]:
2218
+ fps = _analysis_fps(record, technical)
2219
+ duration = _media_duration_seconds(record, technical)
2220
+ marker_options = options.get("marker_plan") if isinstance(options.get("marker_plan"), dict) else {}
2221
+ min_shot_duration = _parse_float(marker_options.get("min_shot_duration_seconds"))
2222
+ if min_shot_duration is None:
2223
+ min_shot_duration = 0.75
2224
+ color_scheme = {
2225
+ **MARKER_PLAN_DEFAULT_COLORS,
2226
+ **({
2227
+ str(key): str(value)
2228
+ for key, value in marker_options.get("colors", {}).items()
2229
+ if value not in (None, "")
2230
+ } if isinstance(marker_options.get("colors"), dict) else {}),
2231
+ }
2232
+ markers: List[Dict[str, Any]] = []
2233
+ untimed_notes: List[Dict[str, Any]] = []
2234
+ scene_items = ((readthrough.get("scenes") or {}).get("items") or []) if isinstance(readthrough.get("scenes"), dict) else []
2235
+ cut_analysis = readthrough.get("cut_analysis") if isinstance(readthrough.get("cut_analysis"), dict) else {}
2236
+ shot_ranges = cut_analysis.get("shot_ranges") if isinstance(cut_analysis.get("shot_ranges"), list) else None
2237
+ if not shot_ranges:
2238
+ shot_ranges = _shot_ranges_from_scenes(duration, scene_items, min_duration_seconds=float(min_shot_duration))
2239
+ for shot in shot_ranges:
2240
+ start = _parse_float(shot.get("start"))
2241
+ end = _parse_float(shot.get("end"))
2242
+ sound_note, transcript_text = _marker_sound_note(transcript, readthrough, start, end)
2243
+ markers.append(_build_marker_entry(
2244
+ marker_id=f"shot-{int(shot['index']):03d}",
2245
+ marker_type="shot",
2246
+ color=color_scheme["shot"],
2247
+ name=f"Shot {int(shot['index']):03d}",
2248
+ start=start,
2249
+ end=end,
2250
+ fps=fps,
2251
+ visual_description=_visual_description_for_time(vision, start, end),
2252
+ sound_note=sound_note,
2253
+ transcript_text=transcript_text,
2254
+ source="scene_detection",
2255
+ ))
2256
+
2257
+ flash_candidates = cut_analysis.get("flash_frame_candidates") if isinstance(cut_analysis.get("flash_frame_candidates"), list) else []
2258
+ for index, item in enumerate(flash_candidates, 1):
2259
+ if not isinstance(item, dict):
2260
+ continue
2261
+ start = _parse_float(item.get("start"))
2262
+ end = _parse_float(item.get("end"))
2263
+ sound_note, transcript_text = _marker_sound_note(transcript, readthrough, start, end)
2264
+ markers.append(_build_marker_entry(
2265
+ marker_id=f"flash-frame-candidate-{index:03d}",
2266
+ marker_type="qc_warning",
2267
+ subtype="flash_frame_candidate",
2268
+ color=color_scheme["qc_warning"],
2269
+ name="QC: Flash Frame Candidate",
2270
+ start=start,
2271
+ end=end,
2272
+ fps=fps,
2273
+ visual_description=(
2274
+ "FFmpeg detected a very short scene-bounded range. Review boundary frames to distinguish "
2275
+ "a flash frame, title/black insertion, or deliberate rapid cut from a high-motion moment."
2276
+ ),
2277
+ sound_note=sound_note,
2278
+ transcript_text=transcript_text,
2279
+ source="cut_boundary_analysis",
2280
+ confidence="computed_needs_visual_confirmation",
2281
+ ))
2282
+
2283
+ black_items = ((readthrough.get("black_frames") or {}).get("items") or []) if isinstance(readthrough.get("black_frames"), dict) else []
2284
+ for index, item in enumerate(black_items, 1):
2285
+ if not isinstance(item, dict):
2286
+ continue
2287
+ start = _parse_float(item.get("start"))
2288
+ end = _parse_float(item.get("end"))
2289
+ sound_note, transcript_text = _marker_sound_note(transcript, readthrough, start, end)
2290
+ markers.append(_build_marker_entry(
2291
+ marker_id=f"black-or-title-{index:03d}",
2292
+ marker_type="qc_warning",
2293
+ subtype="black_or_title",
2294
+ color=color_scheme["black_or_title"],
2295
+ name="QC: Black/Very Dark Range",
2296
+ start=start,
2297
+ end=end,
2298
+ fps=fps,
2299
+ visual_description=(
2300
+ "Detected black or very dark picture. Review as true black, scanned tape black, "
2301
+ "dropout, or title fade before using as an edit point."
2302
+ ),
2303
+ sound_note=sound_note,
2304
+ transcript_text=transcript_text,
2305
+ source="blackdetect",
2306
+ confidence="computed",
2307
+ ))
2308
+
2309
+ editing_notes = vision.get("editing_notes") if isinstance(vision.get("editing_notes"), dict) else {}
2310
+ for index, item in enumerate(editing_notes.get("best_moments") or [], 1):
2311
+ start = _time_seconds_from_text(item)
2312
+ if start is None:
2313
+ untimed_notes.append({"type": "best_moment", "note": _trim_text(item), "reason": "missing_time"})
2314
+ continue
2315
+ end = min(start + 1.0, duration) if duration else start + 1.0
2316
+ sound_note, transcript_text = _marker_sound_note(transcript, readthrough, start, end)
2317
+ markers.append(_build_marker_entry(
2318
+ marker_id=f"best-moment-{index:03d}",
2319
+ marker_type="best_moment",
2320
+ color=color_scheme["best_moment"],
2321
+ name="Best Moment",
2322
+ start=start,
2323
+ end=end,
2324
+ fps=fps,
2325
+ visual_description=_visual_description_for_time(vision, start, end),
2326
+ sound_note=sound_note or _trim_text(item),
2327
+ transcript_text=transcript_text,
2328
+ source="visual_editing_notes",
2329
+ confidence="model_suggested",
2330
+ ))
2331
+
2332
+ qc_sources = list(technical.get("summary", {}).get("warnings") or []) + list(editing_notes.get("qc_flags") or [])
2333
+ for index, item in enumerate(qc_sources, 1):
2334
+ start = _time_seconds_from_text(item)
2335
+ if start is None:
2336
+ untimed_notes.append({"type": "qc_warning", "note": _trim_text(item), "reason": "missing_time"})
2337
+ continue
2338
+ end = min(start + 1.0, duration) if duration else start + 1.0
2339
+ sound_note, transcript_text = _marker_sound_note(transcript, readthrough, start, end)
2340
+ markers.append(_build_marker_entry(
2341
+ marker_id=f"qc-warning-{index:03d}",
2342
+ marker_type="qc_warning",
2343
+ color=color_scheme["qc_warning"],
2344
+ name="QC Warning",
2345
+ start=start,
2346
+ end=end,
2347
+ fps=fps,
2348
+ visual_description=_visual_description_for_time(vision, start, end),
2349
+ sound_note=sound_note,
2350
+ transcript_text=transcript_text,
2351
+ source="analysis_warning",
2352
+ confidence="model_suggested",
2353
+ ))
2354
+
2355
+ markers.sort(key=lambda row: (float(row.get("start_seconds") or 0.0), row.get("type") or "", row.get("id") or ""))
2356
+ words = _transcript_words_from_payload(transcript)
2357
+ return {
2358
+ "success": True,
2359
+ "schema": "davinci_resolve_mcp.clip_analysis_markers.v1",
2360
+ "analysis_version": ANALYSIS_VERSION,
2361
+ "analysis_signature": analysis_signature or {},
2362
+ "clip": record,
2363
+ "fps": fps,
2364
+ "duration_seconds": duration,
2365
+ "color_scheme": color_scheme,
2366
+ "write_to_resolve_default": False,
2367
+ "resolve_marker_writeback": {
2368
+ "optional": True,
2369
+ "enabled": False,
2370
+ "write_action": "publish_clip_metadata",
2371
+ "required_flags": {"write_markers": True, "confirm": True, "dry_run": False},
2372
+ },
2373
+ "transcript_index": {
2374
+ "available": bool(transcript.get("text") or transcript.get("segments")),
2375
+ "segments": len(transcript.get("segments") or []),
2376
+ "word_timestamps": bool(words),
2377
+ "words": len(words),
2378
+ },
2379
+ "timeline_occurrences": record.get("timeline_occurrences") or [],
2380
+ "cut_analysis": {
2381
+ "cut_count": cut_analysis.get("cut_count", 0),
2382
+ "likely_edited_sequence": bool(cut_analysis.get("likely_edited_sequence")),
2383
+ "flash_frame_candidates": len(flash_candidates),
2384
+ },
2385
+ "marker_count": len(markers),
2386
+ "markers": markers,
2387
+ "untimed_notes": untimed_notes,
2388
+ "motion_summary": {
2389
+ "overall_motion_level": motion.get("overall_motion_level"),
2390
+ "average_frame_delta": motion.get("average_frame_delta"),
2391
+ "max_frame_delta": motion.get("max_frame_delta"),
2392
+ },
2393
+ }
2394
+
2395
+
2396
+ def _synthesize_analysis(
2397
+ record: Dict[str, Any],
2398
+ technical: Dict[str, Any],
2399
+ readthrough: Dict[str, Any],
2400
+ motion: Dict[str, Any],
2401
+ transcript: Dict[str, Any],
2402
+ vision: Dict[str, Any],
2403
+ *,
2404
+ depth: str = DEFAULT_DEPTH,
2405
+ options: Optional[Dict[str, Any]] = None,
2406
+ frame_count: int = 0,
2407
+ analysis_signature: Optional[Dict[str, Any]] = None,
2408
+ marker_plan: Optional[Dict[str, Any]] = None,
2409
+ ) -> Dict[str, Any]:
2410
+ warnings = []
2411
+ if technical.get("summary", {}).get("warnings"):
2412
+ warnings.extend(technical["summary"]["warnings"])
2413
+ for key in ("loudness", "scenes", "black_frames", "silence", "interlace"):
2414
+ item = readthrough.get(key)
2415
+ if isinstance(item, dict) and item.get("success") is False:
2416
+ warnings.append(f"{key} analysis did not complete")
2417
+ summary_parts = []
2418
+ if record.get("clip_name"):
2419
+ summary_parts.append(str(record["clip_name"]))
2420
+ duration = _media_duration_seconds(record, technical)
2421
+ if duration is not None:
2422
+ summary_parts.append(f"{duration:.1f}s")
2423
+ if motion.get("overall_motion_level"):
2424
+ summary_parts.append(f"{motion['overall_motion_level']} motion")
2425
+ return {
2426
+ "success": True,
2427
+ "analysis_version": ANALYSIS_VERSION,
2428
+ "analysis_signature": analysis_signature or analysis_request_signature(record, depth, options or {}, frame_count),
2429
+ "analysis_profile": {
2430
+ "depth": depth,
2431
+ "analysis_keyframe_budget": int(frame_count or 0),
2432
+ "transcription_enabled": _coerce_bool(((options or {}).get("transcription") or {}).get("enabled"), default=(depth == "deep")),
2433
+ "vision_enabled": _coerce_bool(((options or {}).get("vision") or {}).get("enabled"), default=False),
2434
+ },
2435
+ "analyzed_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
2436
+ "source_file": record.get("file_path"),
2437
+ "clip": record,
2438
+ "summary": ", ".join(summary_parts) if summary_parts else "Analyzed media clip",
2439
+ "technical_warnings": warnings,
2440
+ "technical": technical.get("summary", {}),
2441
+ "readthrough": readthrough,
2442
+ "cut_analysis": readthrough.get("cut_analysis") if isinstance(readthrough.get("cut_analysis"), dict) else {},
2443
+ "motion": motion,
2444
+ "transcription": transcript,
2445
+ "visual": vision,
2446
+ "analysis_keyframes": motion.get("analysis_keyframes", []),
2447
+ "clip_analysis_markers": marker_plan or {},
2448
+ }
2449
+
2450
+
2451
+ async def _maybe_run_vision_analysis(
2452
+ record: Dict[str, Any],
2453
+ motion: Dict[str, Any],
2454
+ options: Dict[str, Any],
2455
+ artifacts: Dict[str, Any],
2456
+ capabilities: Dict[str, Any],
2457
+ vision_runner: Any = None,
2458
+ ) -> Dict[str, Any]:
2459
+ if vision_runner is not None and vision_uses_chat_context(options, capabilities):
2460
+ payload = vision_runner(record, motion, options, artifacts, capabilities)
2461
+ if inspect.isawaitable(payload):
2462
+ payload = await payload
2463
+ if isinstance(payload, dict):
2464
+ if artifacts.get("visual_json"):
2465
+ _write_json(artifacts["visual_json"], payload)
2466
+ return payload
2467
+ return _vision_analysis(record, motion, options, artifacts, capabilities)
2468
+
2469
+
2470
+ async def execute_plan_async(
2471
+ plan: Dict[str, Any],
2472
+ params: Optional[Dict[str, Any]] = None,
2473
+ capabilities: Optional[Dict[str, Any]] = None,
2474
+ vision_runner: Any = None,
2475
+ ) -> Dict[str, Any]:
2476
+ params = params or {}
2477
+ caps = capabilities or detect_capabilities()
2478
+ session_only = _coerce_bool(params.get("session_only"), default=False)
2479
+ keep_artifacts = _coerce_bool(params.get("keep_artifacts"), default=False)
2480
+ if not plan.get("success"):
2481
+ return plan
2482
+ if plan.get("capability_gaps"):
2483
+ return {
2484
+ "success": False,
2485
+ "error": "Cannot execute analysis with missing required capabilities",
2486
+ "capability_gaps": plan.get("capability_gaps"),
2487
+ "install_guidance": plan.get("install_guidance"),
2488
+ }
2489
+ output_root = plan["output_root"]["project_root"]
2490
+ os.makedirs(output_root, exist_ok=True)
2491
+ options = {
2492
+ "transcription": params.get("transcription") or {},
2493
+ "vision": params.get("vision") or {},
2494
+ "marker_plan": params.get("marker_plan") or params.get("markerPlan") or {},
2495
+ }
2496
+ keep_frame_artifacts_for_vision = vision_uses_chat_context(options, caps)
2497
+ depth = plan.get("depth", DEFAULT_DEPTH)
2498
+ manifest = {
2499
+ "success": True,
2500
+ "analysis_version": ANALYSIS_VERSION,
2501
+ "target": plan.get("target"),
2502
+ "depth": depth,
2503
+ "session_only": session_only,
2504
+ "persistent": not session_only,
2505
+ "keep_artifacts": keep_artifacts,
2506
+ "started_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
2507
+ "project_root": output_root,
2508
+ "clips": [],
2509
+ }
2510
+ _write_json(os.path.join(output_root, "capabilities.json"), caps)
2511
+
2512
+ for clip_plan in plan.get("clips", []):
2513
+ record = clip_plan["record"]
2514
+ artifacts = clip_plan["artifacts"]
2515
+ source = record.get("file_path")
2516
+ existing_report = clip_plan.get("existing_report") or {}
2517
+ clip_result = {
2518
+ "record": record,
2519
+ "artifacts": artifacts,
2520
+ "success": False,
2521
+ }
2522
+ if clip_plan.get("skip_execution") and existing_report.get("path"):
2523
+ clip_result.update({
2524
+ "success": True,
2525
+ "reused": True,
2526
+ "analysis_json": existing_report["path"],
2527
+ "reuse_reason": clip_plan.get("reuse_reason"),
2528
+ "cache_status": clip_plan.get("cache_status"),
2529
+ "cache_warnings": existing_report.get("cache_warnings", []),
2530
+ })
2531
+ manifest["clips"].append(clip_result)
2532
+ continue
2533
+ if not source or not os.path.isfile(source):
2534
+ clip_result["error"] = f"Source media not found: {source}"
2535
+ manifest["clips"].append(clip_result)
2536
+ continue
2537
+
2538
+ technical = _ffprobe(source)
2539
+ if not technical.get("success"):
2540
+ clip_result["error"] = technical.get("error")
2541
+ manifest["clips"].append(clip_result)
2542
+ continue
2543
+ _write_json(artifacts["technical_json"], technical)
2544
+
2545
+ readthrough: Dict[str, Any] = {"success": True, "status": "skipped", "reason": "quick analysis depth"}
2546
+ motion: Dict[str, Any] = {"success": True, "status": "skipped", "analysis_keyframes": []}
2547
+ if depth in {"standard", "deep", "custom"}:
2548
+ readthrough = _readthrough_analysis(source)
2549
+ duration = _media_duration_seconds(record, technical)
2550
+ fps = _analysis_fps(record, technical)
2551
+ readthrough["cut_analysis"] = _cut_boundary_analysis(
2552
+ duration,
2553
+ (readthrough.get("scenes") or {}).get("items", []),
2554
+ fps,
2555
+ )
2556
+ motion = _motion_and_keyframes(
2557
+ source,
2558
+ duration,
2559
+ (readthrough.get("scenes") or {}).get("items", []),
2560
+ artifacts,
2561
+ int(clip_plan.get("analysis_keyframe_budget") or 0),
2562
+ fps=fps,
2563
+ cut_analysis=readthrough.get("cut_analysis"),
2564
+ write_frames=keep_frame_artifacts_for_vision or not _coerce_bool(params.get("cleanup_frames"), default=False),
2565
+ )
2566
+ if artifacts.get("motion_json"):
2567
+ _write_json(artifacts["motion_json"], motion)
2568
+
2569
+ transcript = _transcribe(source, artifacts, options, caps)
2570
+ vision = await _maybe_run_vision_analysis(record, motion, options, artifacts, caps, vision_runner)
2571
+ frame_count = int(clip_plan.get("analysis_keyframe_budget") or 0)
2572
+ marker_plan = _build_clip_marker_plan(
2573
+ record,
2574
+ technical,
2575
+ readthrough,
2576
+ motion,
2577
+ transcript,
2578
+ vision,
2579
+ options=options,
2580
+ analysis_signature=clip_plan.get("analysis_signature"),
2581
+ )
2582
+ if artifacts.get("marker_plan_json"):
2583
+ marker_plan["path"] = artifacts["marker_plan_json"]
2584
+ _write_json(artifacts["marker_plan_json"], marker_plan)
2585
+ analysis = _synthesize_analysis(
2586
+ record,
2587
+ technical,
2588
+ readthrough,
2589
+ motion,
2590
+ transcript,
2591
+ vision,
2592
+ depth=depth,
2593
+ options=options,
2594
+ frame_count=frame_count,
2595
+ analysis_signature=clip_plan.get("analysis_signature"),
2596
+ marker_plan=marker_plan,
2597
+ )
2598
+ _write_json(artifacts["analysis_json"], analysis)
2599
+ if _coerce_bool(params.get("cleanup_frames"), default=False) and artifacts.get("frames_dir"):
2600
+ shutil.rmtree(artifacts["frames_dir"], ignore_errors=True)
2601
+ clip_result.update({
2602
+ "success": True,
2603
+ "analysis_json": artifacts["analysis_json"],
2604
+ "marker_plan_json": artifacts.get("marker_plan_json"),
2605
+ "marker_count": marker_plan.get("marker_count"),
2606
+ })
2607
+ manifest["clips"].append(clip_result)
2608
+
2609
+ manifest["completed_at"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
2610
+ manifest["clip_count"] = len(manifest["clips"])
2611
+ manifest["successful_clip_count"] = sum(1 for row in manifest["clips"] if row.get("success"))
2612
+
2613
+ if (
2614
+ not session_only
2615
+ and manifest["successful_clip_count"]
2616
+ and _coerce_bool(params.get("auto_build_index"), default=True)
2617
+ ):
2618
+ manifest["index"] = build_analysis_index(output_root)
2619
+
2620
+ _write_json(os.path.join(output_root, "manifest.json"), manifest)
2621
+
2622
+ if session_only:
2623
+ reports = []
2624
+ for row in manifest["clips"]:
2625
+ report_path = row.get("analysis_json")
2626
+ if report_path and os.path.isfile(report_path):
2627
+ try:
2628
+ reports.append(_read_json(report_path))
2629
+ except (OSError, json.JSONDecodeError):
2630
+ continue
2631
+ manifest["reports"] = reports
2632
+ manifest["project_summary"] = summarize_reports(output_root)
2633
+ manifest["artifacts_cleaned_up"] = False
2634
+ if not keep_artifacts:
2635
+ cleanup_root = output_root
2636
+ session_temp_base = params.get("_session_temp_base_root")
2637
+ if session_temp_base:
2638
+ candidate = normalize_path(session_temp_base)
2639
+ if (
2640
+ os.path.basename(candidate).startswith("davinci-resolve-mcp-analysis-session-")
2641
+ and _is_relative_to(output_root, candidate)
2642
+ ):
2643
+ cleanup_root = candidate
2644
+ shutil.rmtree(cleanup_root, ignore_errors=True)
2645
+ manifest["artifacts_cleaned_up"] = True
2646
+ manifest["artifact_cleanup_root"] = cleanup_root
2647
+
2648
+ return manifest
2649
+
2650
+
2651
+ def execute_plan(plan: Dict[str, Any], params: Optional[Dict[str, Any]] = None, capabilities: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
2652
+ return asyncio.run(execute_plan_async(plan, params=params, capabilities=capabilities))
2653
+
2654
+
2655
+ def _safe_report_path(project_root: str, report_path: str) -> Tuple[Optional[str], Optional[str]]:
2656
+ root = normalize_path(project_root)
2657
+ candidate = normalize_path(report_path)
2658
+ if not _is_relative_to(candidate, root):
2659
+ return None, "report_path must be under the project analysis root"
2660
+ if not os.path.isfile(candidate):
2661
+ return None, f"Report not found: {candidate}"
2662
+ return candidate, None
2663
+
2664
+
2665
+ def load_report(project_root: str, report_path: Optional[str] = None, clip_dir: Optional[str] = None) -> Dict[str, Any]:
2666
+ if report_path:
2667
+ path, err = _safe_report_path(project_root, report_path)
2668
+ if err:
2669
+ return {"success": False, "error": err}
2670
+ elif clip_dir:
2671
+ path, err = _safe_report_path(project_root, os.path.join(project_root, "clips", clip_dir, "analysis.json"))
2672
+ if err:
2673
+ return {"success": False, "error": err}
2674
+ else:
2675
+ path, err = _safe_report_path(project_root, os.path.join(project_root, "manifest.json"))
2676
+ if err:
2677
+ return {"success": False, "error": err}
2678
+ payload = _read_json(path)
2679
+ return {"success": True, "path": path, "report": payload}
2680
+
2681
+
2682
+ def summarize_reports(project_root: str) -> Dict[str, Any]:
2683
+ root = normalize_path(project_root)
2684
+ clips_root = os.path.join(root, "clips")
2685
+ reports = []
2686
+ if os.path.isdir(clips_root):
2687
+ for dirpath, _, filenames in os.walk(clips_root):
2688
+ if "analysis.json" in filenames:
2689
+ try:
2690
+ reports.append(_read_json(os.path.join(dirpath, "analysis.json")))
2691
+ except (OSError, json.JSONDecodeError):
2692
+ continue
2693
+ warnings = []
2694
+ motion_counts: Dict[str, int] = {}
2695
+ tags: Dict[str, int] = {}
2696
+ signed_report_count = 0
2697
+ newest_ts = 0.0
2698
+ for report in reports:
2699
+ if report.get("analysis_signature"):
2700
+ signed_report_count += 1
2701
+ analyzed_ts = _timestamp_from_analyzed_at(report.get("analyzed_at")) or 0
2702
+ newest_ts = max(newest_ts, analyzed_ts)
2703
+ warnings.extend(report.get("technical_warnings") or [])
2704
+ level = ((report.get("motion") or {}).get("overall_motion_level") or "unknown")
2705
+ motion_counts[level] = motion_counts.get(level, 0) + 1
2706
+ visual = report.get("visual") or {}
2707
+ editing_notes = visual.get("editing_notes") or {}
2708
+ for tag in editing_notes.get("search_tags") or []:
2709
+ tags[tag] = tags.get(tag, 0) + 1
2710
+ summary = {
2711
+ "success": True,
2712
+ "project_root": root,
2713
+ "clip_reports": len(reports),
2714
+ "motion_distribution": motion_counts,
2715
+ "technical_warning_count": len(warnings),
2716
+ "technical_warnings": warnings[:50],
2717
+ "search_tags": sorted(tags, key=tags.get, reverse=True)[:50],
2718
+ "cache": {
2719
+ "signed_report_count": signed_report_count,
2720
+ "unsigned_report_count": max(0, len(reports) - signed_report_count),
2721
+ "newest_analysis_at": (
2722
+ time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(newest_ts))
2723
+ if newest_ts else None
2724
+ ),
2725
+ },
2726
+ }
2727
+ _write_json(os.path.join(root, "project_summary.json"), summary)
2728
+ return summary
2729
+
2730
+
2731
+ def cleanup_artifacts(project_root: str, *, frames_only: bool = True) -> Dict[str, Any]:
2732
+ root = normalize_path(project_root)
2733
+ if not os.path.isdir(root):
2734
+ return {"success": False, "error": f"Project analysis root not found: {root}"}
2735
+ removed = []
2736
+ if frames_only:
2737
+ for dirpath, dirnames, _ in os.walk(root):
2738
+ for dirname in list(dirnames):
2739
+ if dirname == "frames":
2740
+ full = os.path.join(dirpath, dirname)
2741
+ shutil.rmtree(full, ignore_errors=True)
2742
+ removed.append(full)
2743
+ else:
2744
+ shutil.rmtree(root, ignore_errors=True)
2745
+ removed.append(root)
2746
+ return {"success": True, "removed": removed, "frames_only": frames_only}
2747
+
2748
+
2749
+ def _analysis_index_path(project_root: str, index_path: Optional[Any] = None) -> Tuple[Optional[str], Optional[str]]:
2750
+ root = normalize_path(project_root)
2751
+ candidate = normalize_path(index_path) if index_path else os.path.join(root, ANALYSIS_INDEX_FILENAME)
2752
+ if not _is_relative_to(candidate, root):
2753
+ return None, "index_path must be under the project analysis root"
2754
+ return candidate, None
2755
+
2756
+
2757
+ def _iter_analysis_report_files(project_root: str) -> Iterable[str]:
2758
+ clips_root = os.path.join(normalize_path(project_root), "clips")
2759
+ if not os.path.isdir(clips_root):
2760
+ return
2761
+ for dirpath, _, filenames in os.walk(clips_root):
2762
+ if "analysis.json" in filenames:
2763
+ yield os.path.join(dirpath, "analysis.json")
2764
+
2765
+
2766
+ def _index_text(value: Any) -> str:
2767
+ return re.sub(r"\s+", " ", str(value or "")).strip()
2768
+
2769
+
2770
+ def _index_json(value: Any) -> str:
2771
+ return json.dumps(value, ensure_ascii=False, sort_keys=True, default=str)
2772
+
2773
+
2774
+ def _index_as_list(value: Any) -> List[Any]:
2775
+ if value in (None, ""):
2776
+ return []
2777
+ if isinstance(value, list):
2778
+ return value
2779
+ if isinstance(value, tuple):
2780
+ return list(value)
2781
+ return [value]
2782
+
2783
+
2784
+ def _first_video_summary(technical: Dict[str, Any]) -> Dict[str, Any]:
2785
+ videos = technical.get("video") if isinstance(technical.get("video"), list) else []
2786
+ return videos[0] if videos and isinstance(videos[0], dict) else {}
2787
+
2788
+
2789
+ def _index_report_duration(report: Dict[str, Any]) -> Optional[float]:
2790
+ marker_plan = report.get("clip_analysis_markers") if isinstance(report.get("clip_analysis_markers"), dict) else {}
2791
+ duration = _parse_float(marker_plan.get("duration_seconds"))
2792
+ if duration is not None:
2793
+ return duration
2794
+ technical = report.get("technical") if isinstance(report.get("technical"), dict) else {}
2795
+ fmt = technical.get("format") if isinstance(technical.get("format"), dict) else {}
2796
+ duration = _parse_float(fmt.get("duration_seconds"))
2797
+ if duration is not None:
2798
+ return duration
2799
+ return _parse_float(_first_video_summary(technical).get("duration_seconds"))
2800
+
2801
+
2802
+ def _index_report_fps(report: Dict[str, Any]) -> Optional[float]:
2803
+ marker_plan = report.get("clip_analysis_markers") if isinstance(report.get("clip_analysis_markers"), dict) else {}
2804
+ fps = _parse_float(marker_plan.get("fps"))
2805
+ if fps is not None:
2806
+ return fps
2807
+ clip = report.get("clip") if isinstance(report.get("clip"), dict) else {}
2808
+ technical = report.get("technical") if isinstance(report.get("technical"), dict) else {}
2809
+ return _parse_float(_analysis_fps(clip, {"summary": technical}))
2810
+
2811
+
2812
+ def _index_visual_tags(report: Dict[str, Any]) -> List[Tuple[str, str]]:
2813
+ visual = report.get("visual") if isinstance(report.get("visual"), dict) else {}
2814
+ tags: List[Tuple[str, str]] = []
2815
+ editing_notes = visual.get("editing_notes") if isinstance(visual.get("editing_notes"), dict) else {}
2816
+ for tag in _index_as_list(editing_notes.get("search_tags")):
2817
+ text = _index_text(tag)
2818
+ if text:
2819
+ tags.append((text, "visual.search_tags"))
2820
+ content = visual.get("content") if isinstance(visual.get("content"), dict) else {}
2821
+ for key in ("locations", "actions", "objects", "visible_text", "notable_audio_context"):
2822
+ for item in _index_as_list(content.get(key)):
2823
+ text = _index_text(item)
2824
+ if text:
2825
+ tags.append((text, f"visual.content.{key}"))
2826
+ slate = visual.get("slate") if isinstance(visual.get("slate"), dict) else {}
2827
+ for key in ("scene", "shot", "take", "camera", "roll", "production"):
2828
+ text = _index_text(slate.get(key))
2829
+ if text:
2830
+ tags.append((text, f"visual.slate.{key}"))
2831
+ seen = set()
2832
+ unique: List[Tuple[str, str]] = []
2833
+ for tag, source in tags:
2834
+ key = (tag.lower(), source)
2835
+ if key in seen:
2836
+ continue
2837
+ seen.add(key)
2838
+ unique.append((tag, source))
2839
+ return unique
2840
+
2841
+
2842
+ def _index_report_key(report_path: str, report: Dict[str, Any]) -> str:
2843
+ clip = report.get("clip") if isinstance(report.get("clip"), dict) else {}
2844
+ parent = os.path.basename(os.path.dirname(report_path))
2845
+ if parent and parent != "clips":
2846
+ return parent
2847
+ return stable_clip_directory(clip)
2848
+
2849
+
2850
+ def _create_analysis_index_schema(conn: sqlite3.Connection) -> bool:
2851
+ conn.executescript(
2852
+ """
2853
+ CREATE TABLE index_metadata (
2854
+ key TEXT PRIMARY KEY,
2855
+ value TEXT NOT NULL
2856
+ );
2857
+
2858
+ CREATE TABLE clips (
2859
+ clip_key TEXT PRIMARY KEY,
2860
+ clip_id TEXT,
2861
+ media_id TEXT,
2862
+ clip_name TEXT,
2863
+ file_path TEXT,
2864
+ bin_path TEXT,
2865
+ media_type TEXT,
2866
+ duration_seconds REAL,
2867
+ fps REAL,
2868
+ summary TEXT,
2869
+ analyzed_at TEXT,
2870
+ report_path TEXT NOT NULL,
2871
+ marker_plan_path TEXT,
2872
+ technical_warning_count INTEGER NOT NULL DEFAULT 0,
2873
+ motion_level TEXT,
2874
+ transcript_available INTEGER NOT NULL DEFAULT 0,
2875
+ visual_available INTEGER NOT NULL DEFAULT 0,
2876
+ source_size_bytes INTEGER,
2877
+ source_mtime_ns INTEGER,
2878
+ signature_hash TEXT
2879
+ );
2880
+
2881
+ CREATE INDEX idx_clips_file_path ON clips(file_path);
2882
+ CREATE INDEX idx_clips_clip_id ON clips(clip_id);
2883
+ CREATE INDEX idx_clips_motion_level ON clips(motion_level);
2884
+
2885
+ CREATE TABLE technical_warnings (
2886
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
2887
+ clip_key TEXT NOT NULL,
2888
+ warning TEXT NOT NULL,
2889
+ FOREIGN KEY (clip_key) REFERENCES clips(clip_key) ON DELETE CASCADE
2890
+ );
2891
+
2892
+ CREATE TABLE markers (
2893
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
2894
+ clip_key TEXT NOT NULL,
2895
+ marker_id TEXT,
2896
+ marker_type TEXT,
2897
+ subtype TEXT,
2898
+ color TEXT,
2899
+ name TEXT,
2900
+ start_seconds REAL,
2901
+ end_seconds REAL,
2902
+ start_frame INTEGER,
2903
+ duration_frames INTEGER,
2904
+ visual_description TEXT,
2905
+ sound_note TEXT,
2906
+ transcript_text TEXT,
2907
+ source TEXT,
2908
+ confidence TEXT,
2909
+ FOREIGN KEY (clip_key) REFERENCES clips(clip_key) ON DELETE CASCADE
2910
+ );
2911
+
2912
+ CREATE INDEX idx_markers_clip_key ON markers(clip_key);
2913
+ CREATE INDEX idx_markers_type ON markers(marker_type);
2914
+ CREATE INDEX idx_markers_start_seconds ON markers(start_seconds);
2915
+
2916
+ CREATE TABLE transcript_segments (
2917
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
2918
+ clip_key TEXT NOT NULL,
2919
+ segment_index INTEGER NOT NULL,
2920
+ start_seconds REAL,
2921
+ end_seconds REAL,
2922
+ text TEXT NOT NULL,
2923
+ FOREIGN KEY (clip_key) REFERENCES clips(clip_key) ON DELETE CASCADE
2924
+ );
2925
+
2926
+ CREATE INDEX idx_transcript_segments_clip_key ON transcript_segments(clip_key);
2927
+ CREATE INDEX idx_transcript_segments_start_seconds ON transcript_segments(start_seconds);
2928
+
2929
+ CREATE TABLE visual_tags (
2930
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
2931
+ clip_key TEXT NOT NULL,
2932
+ tag TEXT NOT NULL,
2933
+ source TEXT,
2934
+ FOREIGN KEY (clip_key) REFERENCES clips(clip_key) ON DELETE CASCADE
2935
+ );
2936
+
2937
+ CREATE INDEX idx_visual_tags_tag ON visual_tags(tag);
2938
+ CREATE INDEX idx_visual_tags_clip_key ON visual_tags(clip_key);
2939
+
2940
+ CREATE TABLE timeline_occurrences (
2941
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
2942
+ clip_key TEXT NOT NULL,
2943
+ timeline_id TEXT,
2944
+ timeline_name TEXT,
2945
+ track_type TEXT,
2946
+ track_index INTEGER,
2947
+ item_index INTEGER,
2948
+ start_frame INTEGER,
2949
+ end_frame INTEGER,
2950
+ record_frame INTEGER,
2951
+ occurrence_json TEXT NOT NULL,
2952
+ FOREIGN KEY (clip_key) REFERENCES clips(clip_key) ON DELETE CASCADE
2953
+ );
2954
+
2955
+ CREATE INDEX idx_timeline_occurrences_clip_key ON timeline_occurrences(clip_key);
2956
+ CREATE INDEX idx_timeline_occurrences_timeline ON timeline_occurrences(timeline_id, timeline_name);
2957
+
2958
+ CREATE TABLE analysis_keyframes (
2959
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
2960
+ clip_key TEXT NOT NULL,
2961
+ keyframe_index INTEGER,
2962
+ time_seconds REAL,
2963
+ selection_reason TEXT,
2964
+ mean_luma REAL,
2965
+ delta_from_previous REAL,
2966
+ FOREIGN KEY (clip_key) REFERENCES clips(clip_key) ON DELETE CASCADE
2967
+ );
2968
+
2969
+ CREATE INDEX idx_analysis_keyframes_clip_key ON analysis_keyframes(clip_key);
2970
+ CREATE INDEX idx_analysis_keyframes_time_seconds ON analysis_keyframes(time_seconds);
2971
+ """
2972
+ )
2973
+ try:
2974
+ conn.executescript(
2975
+ """
2976
+ CREATE VIRTUAL TABLE clips_fts USING fts5(
2977
+ clip_key UNINDEXED,
2978
+ clip_name,
2979
+ summary,
2980
+ file_path,
2981
+ tags,
2982
+ warnings
2983
+ );
2984
+ CREATE VIRTUAL TABLE markers_fts USING fts5(
2985
+ marker_rowid UNINDEXED,
2986
+ clip_key UNINDEXED,
2987
+ name,
2988
+ visual_description,
2989
+ sound_note,
2990
+ transcript_text
2991
+ );
2992
+ CREATE VIRTUAL TABLE transcripts_fts USING fts5(
2993
+ segment_rowid UNINDEXED,
2994
+ clip_key UNINDEXED,
2995
+ text
2996
+ );
2997
+ """
2998
+ )
2999
+ return True
3000
+ except sqlite3.OperationalError:
3001
+ return False
3002
+
3003
+
3004
+ def _insert_analysis_report_into_index(conn: sqlite3.Connection, report_path: str, report: Dict[str, Any], *, fts_enabled: bool) -> Dict[str, int]:
3005
+ clip = report.get("clip") if isinstance(report.get("clip"), dict) else {}
3006
+ technical = report.get("technical") if isinstance(report.get("technical"), dict) else {}
3007
+ motion = report.get("motion") if isinstance(report.get("motion"), dict) else {}
3008
+ transcription = report.get("transcription") if isinstance(report.get("transcription"), dict) else {}
3009
+ visual = report.get("visual") if isinstance(report.get("visual"), dict) else {}
3010
+ marker_plan = report.get("clip_analysis_markers") if isinstance(report.get("clip_analysis_markers"), dict) else {}
3011
+ signature = report.get("analysis_signature") if isinstance(report.get("analysis_signature"), dict) else {}
3012
+ source_signature = signature.get("source_file") if isinstance(signature.get("source_file"), dict) else {}
3013
+
3014
+ clip_key = _index_report_key(report_path, report)
3015
+ source_file = report.get("source_file") or clip.get("file_path")
3016
+ marker_plan_path = os.path.join(os.path.dirname(report_path), "clip_analysis_markers.json")
3017
+ if not os.path.isfile(marker_plan_path):
3018
+ marker_plan_path = None
3019
+
3020
+ warnings = [_index_text(item) for item in _index_as_list(report.get("technical_warnings")) if _index_text(item)]
3021
+ warnings.extend(
3022
+ _index_text(item)
3023
+ for item in _index_as_list(technical.get("warnings") if isinstance(technical, dict) else None)
3024
+ if _index_text(item)
3025
+ )
3026
+ warnings = list(dict.fromkeys(warnings))
3027
+ visual_tags = _index_visual_tags(report)
3028
+ transcript_segments = transcription.get("segments") if isinstance(transcription.get("segments"), list) else []
3029
+ transcript_text = _index_text(transcription.get("text"))
3030
+ transcript_available = bool(transcript_text or transcript_segments)
3031
+ visual_available = bool(visual.get("success") and (visual.get("clip_summary") or visual_tags or visual.get("analysis_keyframes")))
3032
+
3033
+ conn.execute(
3034
+ """
3035
+ INSERT INTO clips (
3036
+ clip_key, clip_id, media_id, clip_name, file_path, bin_path, media_type,
3037
+ duration_seconds, fps, summary, analyzed_at, report_path, marker_plan_path,
3038
+ technical_warning_count, motion_level, transcript_available, visual_available,
3039
+ source_size_bytes, source_mtime_ns, signature_hash
3040
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
3041
+ """,
3042
+ (
3043
+ clip_key,
3044
+ clip.get("clip_id"),
3045
+ clip.get("media_id"),
3046
+ clip.get("clip_name") or (os.path.basename(str(source_file)) if source_file else None),
3047
+ source_file,
3048
+ clip.get("bin_path"),
3049
+ clip.get("media_type"),
3050
+ _index_report_duration(report),
3051
+ _index_report_fps(report),
3052
+ report.get("summary"),
3053
+ report.get("analyzed_at"),
3054
+ report_path,
3055
+ marker_plan_path,
3056
+ len(warnings),
3057
+ motion.get("overall_motion_level"),
3058
+ int(transcript_available),
3059
+ int(visual_available),
3060
+ source_signature.get("size_bytes"),
3061
+ source_signature.get("mtime_ns"),
3062
+ signature.get("signature_hash"),
3063
+ ),
3064
+ )
3065
+
3066
+ for warning in warnings:
3067
+ conn.execute("INSERT INTO technical_warnings (clip_key, warning) VALUES (?, ?)", (clip_key, warning))
3068
+
3069
+ for tag, source in visual_tags:
3070
+ conn.execute("INSERT INTO visual_tags (clip_key, tag, source) VALUES (?, ?, ?)", (clip_key, tag, source))
3071
+
3072
+ marker_count = 0
3073
+ for marker in marker_plan.get("markers") or []:
3074
+ if not isinstance(marker, dict):
3075
+ continue
3076
+ cur = conn.execute(
3077
+ """
3078
+ INSERT INTO markers (
3079
+ clip_key, marker_id, marker_type, subtype, color, name, start_seconds,
3080
+ end_seconds, start_frame, duration_frames, visual_description, sound_note,
3081
+ transcript_text, source, confidence
3082
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
3083
+ """,
3084
+ (
3085
+ clip_key,
3086
+ marker.get("id"),
3087
+ marker.get("type"),
3088
+ marker.get("subtype"),
3089
+ marker.get("color"),
3090
+ marker.get("name"),
3091
+ _parse_float(marker.get("start_seconds")),
3092
+ _parse_float(marker.get("end_seconds")),
3093
+ marker.get("start_frame"),
3094
+ marker.get("duration_frames"),
3095
+ marker.get("visual_description"),
3096
+ marker.get("sound_note"),
3097
+ marker.get("transcript_text"),
3098
+ marker.get("source"),
3099
+ marker.get("confidence"),
3100
+ ),
3101
+ )
3102
+ marker_count += 1
3103
+ if fts_enabled:
3104
+ conn.execute(
3105
+ """
3106
+ INSERT INTO markers_fts (
3107
+ marker_rowid, clip_key, name, visual_description, sound_note, transcript_text
3108
+ ) VALUES (?, ?, ?, ?, ?, ?)
3109
+ """,
3110
+ (
3111
+ cur.lastrowid,
3112
+ clip_key,
3113
+ marker.get("name"),
3114
+ marker.get("visual_description"),
3115
+ marker.get("sound_note"),
3116
+ marker.get("transcript_text"),
3117
+ ),
3118
+ )
3119
+
3120
+ segment_count = 0
3121
+ if transcript_segments:
3122
+ for index, segment in enumerate(transcript_segments):
3123
+ if not isinstance(segment, dict):
3124
+ continue
3125
+ text = _index_text(segment.get("text"))
3126
+ if not text:
3127
+ continue
3128
+ cur = conn.execute(
3129
+ """
3130
+ INSERT INTO transcript_segments (
3131
+ clip_key, segment_index, start_seconds, end_seconds, text
3132
+ ) VALUES (?, ?, ?, ?, ?)
3133
+ """,
3134
+ (
3135
+ clip_key,
3136
+ index,
3137
+ _parse_float(segment.get("start")),
3138
+ _parse_float(segment.get("end")),
3139
+ text,
3140
+ ),
3141
+ )
3142
+ segment_count += 1
3143
+ if fts_enabled:
3144
+ conn.execute(
3145
+ "INSERT INTO transcripts_fts (segment_rowid, clip_key, text) VALUES (?, ?, ?)",
3146
+ (cur.lastrowid, clip_key, text),
3147
+ )
3148
+ elif transcript_text:
3149
+ cur = conn.execute(
3150
+ """
3151
+ INSERT INTO transcript_segments (
3152
+ clip_key, segment_index, start_seconds, end_seconds, text
3153
+ ) VALUES (?, ?, ?, ?, ?)
3154
+ """,
3155
+ (clip_key, 0, None, None, transcript_text),
3156
+ )
3157
+ segment_count += 1
3158
+ if fts_enabled:
3159
+ conn.execute(
3160
+ "INSERT INTO transcripts_fts (segment_rowid, clip_key, text) VALUES (?, ?, ?)",
3161
+ (cur.lastrowid, clip_key, transcript_text),
3162
+ )
3163
+
3164
+ occurrence_count = 0
3165
+ occurrences = marker_plan.get("timeline_occurrences") or clip.get("timeline_occurrences") or []
3166
+ for occurrence in occurrences:
3167
+ if not isinstance(occurrence, dict):
3168
+ continue
3169
+ conn.execute(
3170
+ """
3171
+ INSERT INTO timeline_occurrences (
3172
+ clip_key, timeline_id, timeline_name, track_type, track_index,
3173
+ item_index, start_frame, end_frame, record_frame, occurrence_json
3174
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
3175
+ """,
3176
+ (
3177
+ clip_key,
3178
+ occurrence.get("timeline_id") or occurrence.get("timelineId"),
3179
+ occurrence.get("timeline_name") or occurrence.get("timelineName"),
3180
+ occurrence.get("track_type") or occurrence.get("trackType"),
3181
+ occurrence.get("track_index") or occurrence.get("trackIndex"),
3182
+ occurrence.get("item_index") or occurrence.get("itemIndex"),
3183
+ occurrence.get("start_frame") or occurrence.get("startFrame"),
3184
+ occurrence.get("end_frame") or occurrence.get("endFrame"),
3185
+ occurrence.get("record_frame") or occurrence.get("recordFrame"),
3186
+ _index_json(occurrence),
3187
+ ),
3188
+ )
3189
+ occurrence_count += 1
3190
+
3191
+ keyframe_count = 0
3192
+ for index, keyframe in enumerate(report.get("analysis_keyframes") or []):
3193
+ if not isinstance(keyframe, dict):
3194
+ continue
3195
+ metrics = keyframe.get("metrics") if isinstance(keyframe.get("metrics"), dict) else {}
3196
+ conn.execute(
3197
+ """
3198
+ INSERT INTO analysis_keyframes (
3199
+ clip_key, keyframe_index, time_seconds, selection_reason, mean_luma, delta_from_previous
3200
+ ) VALUES (?, ?, ?, ?, ?, ?)
3201
+ """,
3202
+ (
3203
+ clip_key,
3204
+ keyframe.get("index", index + 1),
3205
+ _parse_float(keyframe.get("time_seconds")),
3206
+ keyframe.get("selection_reason"),
3207
+ _parse_float(metrics.get("mean_luma")),
3208
+ _parse_float(keyframe.get("delta_from_previous")),
3209
+ ),
3210
+ )
3211
+ keyframe_count += 1
3212
+
3213
+ if fts_enabled:
3214
+ conn.execute(
3215
+ """
3216
+ INSERT INTO clips_fts (clip_key, clip_name, summary, file_path, tags, warnings)
3217
+ VALUES (?, ?, ?, ?, ?, ?)
3218
+ """,
3219
+ (
3220
+ clip_key,
3221
+ clip.get("clip_name") or (os.path.basename(str(source_file)) if source_file else None),
3222
+ report.get("summary"),
3223
+ source_file,
3224
+ " ".join(tag for tag, _ in visual_tags),
3225
+ " ".join(warnings),
3226
+ ),
3227
+ )
3228
+
3229
+ return {
3230
+ "warnings": len(warnings),
3231
+ "markers": marker_count,
3232
+ "transcript_segments": segment_count,
3233
+ "visual_tags": len(visual_tags),
3234
+ "timeline_occurrences": occurrence_count,
3235
+ "analysis_keyframes": keyframe_count,
3236
+ }
3237
+
3238
+
3239
+ def build_analysis_index(project_root: str, *, index_path: Optional[Any] = None) -> Dict[str, Any]:
3240
+ """Build a single-user SQLite index derived from media analysis JSON reports."""
3241
+ root = normalize_path(project_root)
3242
+ if not os.path.isdir(root):
3243
+ return {"success": False, "error": f"Project analysis root not found: {root}"}
3244
+ db_path, err = _analysis_index_path(root, index_path)
3245
+ if err or not db_path:
3246
+ return {"success": False, "error": err}
3247
+
3248
+ os.makedirs(os.path.dirname(db_path), exist_ok=True)
3249
+ tmp_path = f"{db_path}.tmp"
3250
+ for suffix in ("", "-wal", "-shm"):
3251
+ try:
3252
+ os.remove(f"{tmp_path}{suffix}")
3253
+ except OSError:
3254
+ pass
3255
+
3256
+ counts = {
3257
+ "clips": 0,
3258
+ "warnings": 0,
3259
+ "markers": 0,
3260
+ "transcript_segments": 0,
3261
+ "visual_tags": 0,
3262
+ "timeline_occurrences": 0,
3263
+ "analysis_keyframes": 0,
3264
+ }
3265
+ failed_reports: List[Dict[str, Any]] = []
3266
+ built_at = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
3267
+ conn = sqlite3.connect(tmp_path)
3268
+ try:
3269
+ conn.execute("PRAGMA foreign_keys=ON")
3270
+ conn.execute("PRAGMA journal_mode=OFF")
3271
+ conn.execute("PRAGMA synchronous=OFF")
3272
+ fts_enabled = _create_analysis_index_schema(conn)
3273
+ conn.execute("INSERT INTO index_metadata (key, value) VALUES (?, ?)", ("schema_version", str(ANALYSIS_INDEX_SCHEMA_VERSION)))
3274
+ conn.execute("INSERT INTO index_metadata (key, value) VALUES (?, ?)", ("analysis_version", ANALYSIS_VERSION))
3275
+ conn.execute("INSERT INTO index_metadata (key, value) VALUES (?, ?)", ("built_at", built_at))
3276
+ conn.execute("INSERT INTO index_metadata (key, value) VALUES (?, ?)", ("fts_enabled", "1" if fts_enabled else "0"))
3277
+ conn.execute("INSERT INTO index_metadata (key, value) VALUES (?, ?)", ("image_blob_policy", "excluded"))
3278
+
3279
+ for report_path in sorted(_iter_analysis_report_files(root)):
3280
+ try:
3281
+ report = _read_json(report_path)
3282
+ row_counts = _insert_analysis_report_into_index(conn, report_path, report, fts_enabled=fts_enabled)
3283
+ counts["clips"] += 1
3284
+ for key, value in row_counts.items():
3285
+ counts[key] += value
3286
+ except Exception as exc: # pragma: no cover - defensive for arbitrary user reports
3287
+ failed_reports.append({"path": report_path, "error": str(exc)})
3288
+ for key, value in counts.items():
3289
+ conn.execute("INSERT INTO index_metadata (key, value) VALUES (?, ?)", (f"count.{key}", str(value)))
3290
+ conn.commit()
3291
+ finally:
3292
+ conn.close()
3293
+
3294
+ for suffix in ("-wal", "-shm"):
3295
+ try:
3296
+ os.remove(f"{db_path}{suffix}")
3297
+ except OSError:
3298
+ pass
3299
+ os.replace(tmp_path, db_path)
3300
+ try:
3301
+ final_conn = sqlite3.connect(db_path)
3302
+ final_conn.execute("PRAGMA journal_mode=WAL")
3303
+ final_conn.close()
3304
+ except sqlite3.Error:
3305
+ pass
3306
+
3307
+ return {
3308
+ "success": True,
3309
+ "project_root": root,
3310
+ "index_path": db_path,
3311
+ "schema_version": ANALYSIS_INDEX_SCHEMA_VERSION,
3312
+ "built_at": built_at,
3313
+ "single_user": True,
3314
+ "image_blob_policy": "excluded",
3315
+ "fts_enabled": bool(counts["clips"]) and _sqlite_table_exists(db_path, "clips_fts"),
3316
+ "counts": counts,
3317
+ "failed_report_count": len(failed_reports),
3318
+ "failed_reports": failed_reports[:50],
3319
+ "size_bytes": os.path.getsize(db_path) if os.path.isfile(db_path) else 0,
3320
+ }
3321
+
3322
+
3323
+ def _sqlite_table_exists(db_path: str, table_name: str) -> bool:
3324
+ if not os.path.isfile(db_path):
3325
+ return False
3326
+ try:
3327
+ conn = sqlite3.connect(db_path)
3328
+ try:
3329
+ row = conn.execute(
3330
+ "SELECT 1 FROM sqlite_master WHERE name = ? LIMIT 1",
3331
+ (table_name,),
3332
+ ).fetchone()
3333
+ return bool(row)
3334
+ finally:
3335
+ conn.close()
3336
+ except sqlite3.Error:
3337
+ return False
3338
+
3339
+
3340
+ def _analysis_index_counts(conn: sqlite3.Connection) -> Dict[str, int]:
3341
+ counts = {}
3342
+ for table in (
3343
+ "clips",
3344
+ "technical_warnings",
3345
+ "markers",
3346
+ "transcript_segments",
3347
+ "visual_tags",
3348
+ "timeline_occurrences",
3349
+ "analysis_keyframes",
3350
+ ):
3351
+ try:
3352
+ counts[table] = int(conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0])
3353
+ except sqlite3.Error:
3354
+ counts[table] = 0
3355
+ return counts
3356
+
3357
+
3358
+ def analysis_index_status(project_root: str, *, index_path: Optional[Any] = None) -> Dict[str, Any]:
3359
+ root = normalize_path(project_root)
3360
+ db_path, err = _analysis_index_path(root, index_path)
3361
+ if err or not db_path:
3362
+ return {"success": False, "error": err}
3363
+ if not os.path.isfile(db_path):
3364
+ return {
3365
+ "success": True,
3366
+ "exists": False,
3367
+ "project_root": root,
3368
+ "index_path": db_path,
3369
+ "hint": "Persisted analysis builds this automatically; run media_analysis(action='build_index') to rebuild from existing reports.",
3370
+ }
3371
+ conn = sqlite3.connect(db_path)
3372
+ try:
3373
+ metadata = {
3374
+ row[0]: row[1]
3375
+ for row in conn.execute("SELECT key, value FROM index_metadata")
3376
+ }
3377
+ counts = _analysis_index_counts(conn)
3378
+ finally:
3379
+ conn.close()
3380
+ return {
3381
+ "success": True,
3382
+ "exists": True,
3383
+ "project_root": root,
3384
+ "index_path": db_path,
3385
+ "schema_version": int(metadata.get("schema_version") or 0),
3386
+ "analysis_version": metadata.get("analysis_version"),
3387
+ "built_at": metadata.get("built_at"),
3388
+ "single_user": True,
3389
+ "image_blob_policy": metadata.get("image_blob_policy") or "excluded",
3390
+ "fts_enabled": metadata.get("fts_enabled") == "1",
3391
+ "counts": counts,
3392
+ "size_bytes": os.path.getsize(db_path),
3393
+ }
3394
+
3395
+
3396
+ def _fts_query(value: Any) -> str:
3397
+ tokens = re.findall(r"[A-Za-z0-9_]+", str(value or ""))
3398
+ return " OR ".join(f'"{token}"' for token in tokens[:12])
3399
+
3400
+
3401
+ def _row_dict(row: sqlite3.Row) -> Dict[str, Any]:
3402
+ return {key: row[key] for key in row.keys()}
3403
+
3404
+
3405
+ def _normalize_index_result_types(result_types: Optional[Iterable[str]]) -> set:
3406
+ if result_types in (None, ""):
3407
+ return set()
3408
+ if isinstance(result_types, str):
3409
+ raw_items = [result_types]
3410
+ else:
3411
+ raw_items = list(result_types)
3412
+ allowed_values = {"clip", "marker", "transcript"}
3413
+ return {
3414
+ str(value).strip().lower()
3415
+ for value in raw_items
3416
+ if str(value).strip().lower() in allowed_values
3417
+ }
3418
+
3419
+
3420
+ def _query_analysis_index_fts(conn: sqlite3.Connection, query: str, limit: int, result_types: Optional[Iterable[str]]) -> List[Dict[str, Any]]:
3421
+ fts = _fts_query(query)
3422
+ if not fts:
3423
+ return []
3424
+ allowed = _normalize_index_result_types(result_types)
3425
+ results: List[Dict[str, Any]] = []
3426
+ if not allowed or "clip" in allowed:
3427
+ for row in conn.execute(
3428
+ """
3429
+ SELECT
3430
+ 'clip' AS result_type,
3431
+ c.clip_key,
3432
+ c.clip_id,
3433
+ c.media_id,
3434
+ c.clip_name,
3435
+ c.file_path,
3436
+ c.summary,
3437
+ c.report_path,
3438
+ NULL AS marker_type,
3439
+ NULL AS start_seconds,
3440
+ NULL AS end_seconds,
3441
+ bm25(clips_fts) AS rank
3442
+ FROM clips_fts
3443
+ JOIN clips c ON c.clip_key = clips_fts.clip_key
3444
+ WHERE clips_fts MATCH ?
3445
+ ORDER BY rank
3446
+ LIMIT ?
3447
+ """,
3448
+ (fts, limit),
3449
+ ):
3450
+ results.append(_row_dict(row))
3451
+ if not allowed or "marker" in allowed:
3452
+ for row in conn.execute(
3453
+ """
3454
+ SELECT
3455
+ 'marker' AS result_type,
3456
+ c.clip_key,
3457
+ c.clip_id,
3458
+ c.media_id,
3459
+ c.clip_name,
3460
+ c.file_path,
3461
+ m.visual_description AS summary,
3462
+ c.report_path,
3463
+ m.marker_type,
3464
+ m.start_seconds,
3465
+ m.end_seconds,
3466
+ bm25(markers_fts) AS rank
3467
+ FROM markers_fts
3468
+ JOIN markers m ON m.id = markers_fts.marker_rowid
3469
+ JOIN clips c ON c.clip_key = m.clip_key
3470
+ WHERE markers_fts MATCH ?
3471
+ ORDER BY rank
3472
+ LIMIT ?
3473
+ """,
3474
+ (fts, limit),
3475
+ ):
3476
+ results.append(_row_dict(row))
3477
+ if not allowed or "transcript" in allowed:
3478
+ for row in conn.execute(
3479
+ """
3480
+ SELECT
3481
+ 'transcript' AS result_type,
3482
+ c.clip_key,
3483
+ c.clip_id,
3484
+ c.media_id,
3485
+ c.clip_name,
3486
+ c.file_path,
3487
+ s.text AS summary,
3488
+ c.report_path,
3489
+ NULL AS marker_type,
3490
+ s.start_seconds,
3491
+ s.end_seconds,
3492
+ bm25(transcripts_fts) AS rank
3493
+ FROM transcripts_fts
3494
+ JOIN transcript_segments s ON s.id = transcripts_fts.segment_rowid
3495
+ JOIN clips c ON c.clip_key = s.clip_key
3496
+ WHERE transcripts_fts MATCH ?
3497
+ ORDER BY rank
3498
+ LIMIT ?
3499
+ """,
3500
+ (fts, limit),
3501
+ ):
3502
+ results.append(_row_dict(row))
3503
+ results.sort(key=lambda row: (float(row.get("rank") or 0.0), row.get("result_type") or ""))
3504
+ return results[:limit]
3505
+
3506
+
3507
+ def _query_analysis_index_like(conn: sqlite3.Connection, query: str, limit: int, result_types: Optional[Iterable[str]]) -> List[Dict[str, Any]]:
3508
+ needle = f"%{str(query or '').lower()}%"
3509
+ allowed = _normalize_index_result_types(result_types)
3510
+ results: List[Dict[str, Any]] = []
3511
+ if not allowed or "clip" in allowed:
3512
+ for row in conn.execute(
3513
+ """
3514
+ SELECT
3515
+ 'clip' AS result_type,
3516
+ clip_key, clip_id, media_id, clip_name, file_path, summary, report_path,
3517
+ NULL AS marker_type, NULL AS start_seconds, NULL AS end_seconds, 0.0 AS rank
3518
+ FROM clips
3519
+ WHERE lower(coalesce(clip_name, '') || ' ' || coalesce(summary, '') || ' ' || coalesce(file_path, '')) LIKE ?
3520
+ LIMIT ?
3521
+ """,
3522
+ (needle, limit),
3523
+ ):
3524
+ results.append(_row_dict(row))
3525
+ if not allowed or "marker" in allowed:
3526
+ for row in conn.execute(
3527
+ """
3528
+ SELECT
3529
+ 'marker' AS result_type,
3530
+ c.clip_key, c.clip_id, c.media_id, c.clip_name, c.file_path,
3531
+ m.visual_description AS summary, c.report_path, m.marker_type,
3532
+ m.start_seconds, m.end_seconds, 0.0 AS rank
3533
+ FROM markers m
3534
+ JOIN clips c ON c.clip_key = m.clip_key
3535
+ WHERE lower(
3536
+ coalesce(m.name, '') || ' ' || coalesce(m.visual_description, '') || ' ' ||
3537
+ coalesce(m.sound_note, '') || ' ' || coalesce(m.transcript_text, '')
3538
+ ) LIKE ?
3539
+ LIMIT ?
3540
+ """,
3541
+ (needle, limit),
3542
+ ):
3543
+ results.append(_row_dict(row))
3544
+ if not allowed or "transcript" in allowed:
3545
+ for row in conn.execute(
3546
+ """
3547
+ SELECT
3548
+ 'transcript' AS result_type,
3549
+ c.clip_key, c.clip_id, c.media_id, c.clip_name, c.file_path,
3550
+ s.text AS summary, c.report_path, NULL AS marker_type,
3551
+ s.start_seconds, s.end_seconds, 0.0 AS rank
3552
+ FROM transcript_segments s
3553
+ JOIN clips c ON c.clip_key = s.clip_key
3554
+ WHERE lower(s.text) LIKE ?
3555
+ LIMIT ?
3556
+ """,
3557
+ (needle, limit),
3558
+ ):
3559
+ results.append(_row_dict(row))
3560
+ return results[:limit]
3561
+
3562
+
3563
+ def query_analysis_index(
3564
+ project_root: str,
3565
+ query: Any,
3566
+ *,
3567
+ limit: Any = 20,
3568
+ result_types: Optional[Iterable[str]] = None,
3569
+ index_path: Optional[Any] = None,
3570
+ ) -> Dict[str, Any]:
3571
+ root = normalize_path(project_root)
3572
+ db_path, err = _analysis_index_path(root, index_path)
3573
+ if err or not db_path:
3574
+ return {"success": False, "error": err}
3575
+ if not os.path.isfile(db_path):
3576
+ return {"success": False, "error": f"Analysis index not found: {db_path}", "index_path": db_path}
3577
+ try:
3578
+ max_results = max(1, min(int(limit), 100))
3579
+ except (TypeError, ValueError):
3580
+ max_results = 20
3581
+ conn = sqlite3.connect(db_path)
3582
+ conn.row_factory = sqlite3.Row
3583
+ try:
3584
+ has_fts = _sqlite_table_exists(db_path, "clips_fts")
3585
+ if _index_text(query):
3586
+ try:
3587
+ results = _query_analysis_index_fts(conn, str(query), max_results, result_types) if has_fts else []
3588
+ except sqlite3.Error:
3589
+ results = []
3590
+ if not results:
3591
+ results = _query_analysis_index_like(conn, str(query), max_results, result_types)
3592
+ else:
3593
+ results = [
3594
+ _row_dict(row)
3595
+ for row in conn.execute(
3596
+ """
3597
+ SELECT
3598
+ 'clip' AS result_type,
3599
+ clip_key, clip_id, media_id, clip_name, file_path, summary, report_path,
3600
+ NULL AS marker_type, NULL AS start_seconds, NULL AS end_seconds, 0.0 AS rank
3601
+ FROM clips
3602
+ ORDER BY analyzed_at DESC, clip_name
3603
+ LIMIT ?
3604
+ """,
3605
+ (max_results,),
3606
+ )
3607
+ ]
3608
+ finally:
3609
+ conn.close()
3610
+ return {
3611
+ "success": True,
3612
+ "project_root": root,
3613
+ "index_path": db_path,
3614
+ "query": query,
3615
+ "limit": max_results,
3616
+ "result_count": len(results),
3617
+ "results": results,
3618
+ }