media-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. cli/clip.py +79 -0
  2. cli/faces.py +91 -0
  3. cli/metadata.py +68 -0
  4. cli/motion.py +77 -0
  5. cli/objects.py +94 -0
  6. cli/ocr.py +93 -0
  7. cli/scenes.py +57 -0
  8. cli/telemetry.py +65 -0
  9. cli/transcript.py +76 -0
  10. media_engine/__init__.py +7 -0
  11. media_engine/_version.py +34 -0
  12. media_engine/app.py +80 -0
  13. media_engine/batch/__init__.py +56 -0
  14. media_engine/batch/models.py +99 -0
  15. media_engine/batch/processor.py +1131 -0
  16. media_engine/batch/queue.py +232 -0
  17. media_engine/batch/state.py +30 -0
  18. media_engine/batch/timing.py +321 -0
  19. media_engine/cli.py +17 -0
  20. media_engine/config.py +674 -0
  21. media_engine/extractors/__init__.py +75 -0
  22. media_engine/extractors/clip.py +401 -0
  23. media_engine/extractors/faces.py +459 -0
  24. media_engine/extractors/frame_buffer.py +351 -0
  25. media_engine/extractors/frames.py +402 -0
  26. media_engine/extractors/metadata/__init__.py +127 -0
  27. media_engine/extractors/metadata/apple.py +169 -0
  28. media_engine/extractors/metadata/arri.py +118 -0
  29. media_engine/extractors/metadata/avchd.py +208 -0
  30. media_engine/extractors/metadata/avchd_gps.py +270 -0
  31. media_engine/extractors/metadata/base.py +688 -0
  32. media_engine/extractors/metadata/blackmagic.py +139 -0
  33. media_engine/extractors/metadata/camera_360.py +276 -0
  34. media_engine/extractors/metadata/canon.py +290 -0
  35. media_engine/extractors/metadata/dji.py +371 -0
  36. media_engine/extractors/metadata/dv.py +121 -0
  37. media_engine/extractors/metadata/ffmpeg.py +76 -0
  38. media_engine/extractors/metadata/generic.py +119 -0
  39. media_engine/extractors/metadata/gopro.py +256 -0
  40. media_engine/extractors/metadata/red.py +305 -0
  41. media_engine/extractors/metadata/registry.py +114 -0
  42. media_engine/extractors/metadata/sony.py +442 -0
  43. media_engine/extractors/metadata/tesla.py +157 -0
  44. media_engine/extractors/motion.py +765 -0
  45. media_engine/extractors/objects.py +245 -0
  46. media_engine/extractors/objects_qwen.py +754 -0
  47. media_engine/extractors/ocr.py +268 -0
  48. media_engine/extractors/scenes.py +82 -0
  49. media_engine/extractors/shot_type.py +217 -0
  50. media_engine/extractors/telemetry.py +262 -0
  51. media_engine/extractors/transcribe.py +579 -0
  52. media_engine/extractors/translate.py +121 -0
  53. media_engine/extractors/vad.py +263 -0
  54. media_engine/main.py +68 -0
  55. media_engine/py.typed +0 -0
  56. media_engine/routers/__init__.py +15 -0
  57. media_engine/routers/batch.py +78 -0
  58. media_engine/routers/health.py +93 -0
  59. media_engine/routers/models.py +211 -0
  60. media_engine/routers/settings.py +87 -0
  61. media_engine/routers/utils.py +135 -0
  62. media_engine/schemas.py +581 -0
  63. media_engine/utils/__init__.py +5 -0
  64. media_engine/utils/logging.py +54 -0
  65. media_engine/utils/memory.py +49 -0
  66. media_engine-0.1.0.dist-info/METADATA +276 -0
  67. media_engine-0.1.0.dist-info/RECORD +70 -0
  68. media_engine-0.1.0.dist-info/WHEEL +4 -0
  69. media_engine-0.1.0.dist-info/entry_points.txt +11 -0
  70. media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1131 @@
1
+ """Batch job processor - main extraction logic."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import gc
6
+ import logging
7
+ import time
8
+ from datetime import datetime, timezone
9
+ from pathlib import Path
10
+ from typing import TYPE_CHECKING, Any
11
+
12
+ from media_engine.batch.models import (
13
+ BatchRequest,
14
+ ExtractorTiming,
15
+ JobProgress,
16
+ )
17
+ from media_engine.batch.queue import cleanup_expired_batch_jobs, start_next_batch
18
+ from media_engine.batch.state import batch_jobs, batch_jobs_lock
19
+ from media_engine.batch.timing import (
20
+ EXTRACTOR_ORDER,
21
+ calculate_queue_eta,
22
+ get_enabled_extractors_from_request,
23
+ get_predicted_rate,
24
+ get_resolution_bucket,
25
+ predict_extractor_time,
26
+ record_timing,
27
+ )
28
+ from media_engine.utils.memory import clear_memory, get_memory_mb
29
+
30
+ if TYPE_CHECKING:
31
+ pass
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ def run_batch_job(batch_id: str, request: BatchRequest) -> None:
37
+ """Run batch extraction - processes all files per extractor stage.
38
+
39
+ This is more memory efficient as each model is loaded once,
40
+ processes all files, then is unloaded before the next model.
41
+ """
42
+ from media_engine.config import get_settings
43
+ from media_engine.extractors import (
44
+ FFPROBE_WORKERS,
45
+ SharedFrameBuffer,
46
+ analyze_motion,
47
+ check_faces_are_known,
48
+ decode_frames,
49
+ detect_voice_activity,
50
+ extract_clip,
51
+ extract_faces,
52
+ extract_metadata,
53
+ extract_objects,
54
+ extract_objects_qwen,
55
+ extract_ocr,
56
+ extract_scenes,
57
+ extract_telemetry,
58
+ extract_transcript,
59
+ get_adaptive_timestamps,
60
+ get_extractor_timestamps,
61
+ get_sample_timestamps,
62
+ run_ffprobe_batch,
63
+ unload_clip_model,
64
+ unload_face_model,
65
+ unload_ocr_model,
66
+ unload_qwen_model,
67
+ unload_vad_model,
68
+ unload_whisper_model,
69
+ unload_yolo_model,
70
+ )
71
+ from media_engine.extractors.vad import AudioContent
72
+ from media_engine.schemas import (
73
+ BoundingBox,
74
+ FaceDetection,
75
+ FacesResult,
76
+ MediaType,
77
+ get_media_type,
78
+ )
79
+
80
+ settings = get_settings()
81
+
82
+ # Resolve models from settings (handles "auto" -> actual model name)
83
+ whisper_model = settings.get_whisper_model()
84
+ qwen_model = settings.get_qwen_model()
85
+ yolo_model = settings.get_yolo_model()
86
+ clip_model = settings.get_clip_model()
87
+
88
+ logger.info(f"Batch {batch_id} models: whisper={whisper_model}, qwen={qwen_model}, " f"yolo={yolo_model}, clip={clip_model}")
89
+
90
+ batch_start_time = time.time()
91
+ peak_memory = get_memory_mb()
92
+ stage_start_times: dict[str, float] = {} # extractor -> start time
93
+ file_resolutions: dict[int, str] = {} # file_idx -> resolution bucket (for timing predictions)
94
+ file_durations: dict[int, float] = {} # file_idx -> duration in seconds
95
+
96
+ # Get enabled extractors for this batch
97
+ enabled_extractors, enabled_sub_extractors = get_enabled_extractors_from_request(request)
98
+
99
+ def calculate_total_eta(current_extractor: str, stage_eta: float) -> float:
100
+ """Calculate total remaining time for the entire batch.
101
+
102
+ Args:
103
+ current_extractor: Currently running extractor
104
+ stage_eta: Remaining time for current stage
105
+
106
+ Returns:
107
+ Total estimated remaining seconds for the batch
108
+ """
109
+ total_eta = stage_eta if stage_eta else 0.0
110
+
111
+ # Get the current extractor's position in the order
112
+ if current_extractor not in EXTRACTOR_ORDER:
113
+ return total_eta
114
+
115
+ current_ext_idx = EXTRACTOR_ORDER.index(current_extractor)
116
+ num_files = len(request.files)
117
+
118
+ # Add time for remaining extractors (after current one)
119
+ remaining_extractors = EXTRACTOR_ORDER[current_ext_idx + 1 :]
120
+ logger.info(f"ETA calc: current={current_extractor}, remaining={remaining_extractors}, " f"enabled={enabled_extractors}")
121
+
122
+ for ext in remaining_extractors:
123
+ if ext not in enabled_extractors:
124
+ logger.info(f"ETA calc: skipping {ext} (not enabled)")
125
+ continue
126
+
127
+ # Sum predicted time across all files
128
+ for file_idx in range(num_files):
129
+ resolution = file_resolutions.get(file_idx, "1080p")
130
+ duration = file_durations.get(file_idx, 60.0) # Default 1 min
131
+ predicted = predict_extractor_time(
132
+ ext,
133
+ resolution,
134
+ duration,
135
+ enabled_sub_extractors=enabled_sub_extractors if ext == "visual_processing" else None,
136
+ )
137
+ total_eta += predicted
138
+ logger.info(f"ETA calc: {ext} file={file_idx} res={resolution} dur={duration}s -> +{predicted:.1f}s")
139
+
140
+ return round(total_eta, 1)
141
+
142
+ def update_batch_progress(
143
+ extractor: str,
144
+ message: str,
145
+ current: int | None = None,
146
+ total: int | None = None,
147
+ ) -> None:
148
+ nonlocal peak_memory
149
+
150
+ # Track stage start time
151
+ if extractor not in stage_start_times:
152
+ stage_start_times[extractor] = time.time()
153
+
154
+ # Calculate ETA
155
+ stage_elapsed: float | None = None
156
+ eta: float | None = None
157
+ if extractor in stage_start_times:
158
+ stage_elapsed = round(time.time() - stage_start_times[extractor], 1)
159
+ # Calculate ETA if we have progress info
160
+ if current is not None and total is not None and current > 0:
161
+ # Always use prediction-based ETA for remaining files + current file
162
+ # This is more accurate than elapsed-based calculation
163
+ eta = 0.0
164
+
165
+ # Add predicted time for current file (estimate 50% remaining)
166
+ current_file_idx = current - 1
167
+ current_res = file_resolutions.get(current_file_idx, "1080p")
168
+ current_dur = file_durations.get(current_file_idx, 60.0)
169
+ current_predicted = predict_extractor_time(
170
+ extractor,
171
+ current_res,
172
+ current_dur,
173
+ enabled_sub_extractors=enabled_sub_extractors if extractor == "visual_processing" else None,
174
+ )
175
+ # Estimate we're ~halfway through current file if we have some elapsed time
176
+ if stage_elapsed > 0 and current > 1:
177
+ # For files after the first, estimate based on avg time per completed file
178
+ avg_per_file = stage_elapsed / (current - 1)
179
+ eta += max(0, avg_per_file * 0.5) # ~50% of avg remaining for current
180
+ else:
181
+ eta += current_predicted * 0.5 # ~50% of predicted remaining for current
182
+
183
+ # Add predicted time for remaining files
184
+ for file_idx in range(current, total):
185
+ resolution = file_resolutions.get(file_idx, "1080p")
186
+ duration = file_durations.get(file_idx, 60.0)
187
+ eta += predict_extractor_time(
188
+ extractor,
189
+ resolution,
190
+ duration,
191
+ enabled_sub_extractors=enabled_sub_extractors if extractor == "visual_processing" else None,
192
+ )
193
+
194
+ eta = round(eta, 1)
195
+ elif current == 0 and total is not None and total > 0:
196
+ # No progress yet - try to use historical timing for prediction
197
+ # Use the most common resolution in the batch, or "unknown" if none set
198
+ common_res = "unknown"
199
+ if file_resolutions:
200
+ res_counts: dict[str, int] = {}
201
+ for res in file_resolutions.values():
202
+ res_counts[res] = res_counts.get(res, 0) + 1
203
+ common_res = max(res_counts, key=lambda r: res_counts[r])
204
+ predicted = get_predicted_rate(extractor, common_res)
205
+ if predicted is not None:
206
+ eta = round(predicted * total, 1)
207
+
208
+ # Calculate total ETA for entire batch (current stage + remaining stages)
209
+ total_eta = calculate_total_eta(extractor, eta or 0.0)
210
+
211
+ # Debug logging for ETA calculation (use INFO level to see it)
212
+ if total_eta and total_eta > 0:
213
+ logger.info(f"ETA: {extractor} stage={eta}s, total={total_eta}s, " f"subs={enabled_sub_extractors}, files={len(file_durations)}")
214
+
215
+ # Calculate queue ETA (for all queued batches)
216
+ queue_eta, queued_count = calculate_queue_eta()
217
+
218
+ with batch_jobs_lock:
219
+ if batch_id in batch_jobs:
220
+ batch_jobs[batch_id].current_extractor = extractor
221
+ batch_jobs[batch_id].progress = JobProgress(
222
+ message=message,
223
+ current=current,
224
+ total=total,
225
+ stage_elapsed_seconds=stage_elapsed,
226
+ eta_seconds=eta,
227
+ # Always send total_eta - even if 0, it's valid info
228
+ total_eta_seconds=total_eta,
229
+ queue_eta_seconds=queue_eta if queue_eta > 0 else None,
230
+ queued_batches=queued_count if queued_count > 0 else None,
231
+ )
232
+ # Update memory and elapsed time
233
+ current_mem = get_memory_mb()
234
+ peak_memory = max(peak_memory, current_mem)
235
+ batch_jobs[batch_id].memory_mb = current_mem
236
+ batch_jobs[batch_id].peak_memory_mb = peak_memory
237
+ batch_jobs[batch_id].elapsed_seconds = round(time.time() - batch_start_time, 1)
238
+
239
+ def update_file_status(
240
+ file_idx: int,
241
+ status: str,
242
+ result_key: str | None = None,
243
+ result: Any = None,
244
+ error: str | None = None,
245
+ ) -> None:
246
+ with batch_jobs_lock:
247
+ if batch_id in batch_jobs and file_idx < len(batch_jobs[batch_id].files):
248
+ batch_jobs[batch_id].files[file_idx].status = status
249
+ if result_key and result is not None:
250
+ batch_jobs[batch_id].files[file_idx].results[result_key] = result
251
+ if error:
252
+ batch_jobs[batch_id].files[file_idx].error = error
253
+
254
+ def update_extractor_status(file_idx: int, extractor: str, status: str) -> None:
255
+ """Update extractor status for a file.
256
+
257
+ Args:
258
+ file_idx: Index of the file in the batch
259
+ extractor: Name of the extractor
260
+ status: One of 'pending', 'active', 'completed', 'failed', 'skipped'
261
+ """
262
+ with batch_jobs_lock:
263
+ if batch_id in batch_jobs and file_idx < len(batch_jobs[batch_id].files):
264
+ batch_jobs[batch_id].files[file_idx].extractor_status[extractor] = status
265
+
266
+ def start_extractor_timing(extractor: str) -> datetime:
267
+ """Start timing for an extractor stage."""
268
+ started = datetime.now(timezone.utc)
269
+ # Reset stage start time for ETA calculation
270
+ stage_start_times[extractor] = time.time()
271
+ with batch_jobs_lock:
272
+ if batch_id in batch_jobs:
273
+ batch_jobs[batch_id].extractor_timings.append(ExtractorTiming(extractor=extractor, started_at=started))
274
+ return started
275
+
276
+ def end_extractor_timing(extractor: str, files_processed: int) -> None:
277
+ """End timing for an extractor stage."""
278
+ completed = datetime.now(timezone.utc)
279
+ with batch_jobs_lock:
280
+ if batch_id in batch_jobs:
281
+ for timing in batch_jobs[batch_id].extractor_timings:
282
+ if timing.extractor == extractor and timing.completed_at is None:
283
+ timing.completed_at = completed
284
+ timing.duration_seconds = round((completed - timing.started_at).total_seconds(), 2)
285
+ timing.files_processed = files_processed
286
+ break
287
+
288
+ def update_file_timing(file_idx: int, extractor: str, duration: float, units: float | None = None) -> None:
289
+ """Record per-file timing for an extractor.
290
+
291
+ Args:
292
+ file_idx: Index of the file in the batch
293
+ extractor: Name of the extractor
294
+ duration: Wall clock seconds to process
295
+ units: Normalization units for rate calculation:
296
+ - transcript: duration in minutes
297
+ - visual: number of timestamps
298
+ - objects/faces/ocr/clip: number of frames
299
+ - None: store raw seconds (metadata, telemetry, etc.)
300
+ """
301
+ with batch_jobs_lock:
302
+ if batch_id in batch_jobs and file_idx < len(batch_jobs[batch_id].files):
303
+ batch_jobs[batch_id].files[file_idx].timings[extractor] = round(duration, 2)
304
+ # Record to historical timing for future ETA predictions
305
+ resolution = file_resolutions.get(file_idx, "unknown")
306
+ record_timing(extractor, resolution, duration, units)
307
+
308
+ try:
309
+ with batch_jobs_lock:
310
+ batch_jobs[batch_id].status = "running"
311
+
312
+ files = request.files
313
+ total_files = len(files)
314
+
315
+ # Track files that failed metadata extraction - skip them in all subsequent stages
316
+ # If we can't read the file with ffprobe, there's no point trying other extractors
317
+ failed_files: set[int] = set()
318
+
319
+ # Always get file durations and resolutions for ETA predictions (lightweight ffprobe)
320
+ # This runs even if metadata isn't enabled
321
+ if not request.enable_metadata:
322
+ from media_engine.extractors.metadata.base import get_video_info
323
+
324
+ for i, file_path in enumerate(files):
325
+ try:
326
+ _fps, duration, width, height = get_video_info(file_path)
327
+ if duration:
328
+ file_durations[i] = duration
329
+ # Determine resolution bucket from dimensions
330
+ file_resolutions[i] = get_resolution_bucket(width, height)
331
+ logger.info(f"ETA: file {i} duration={duration}s, res={file_resolutions[i]}")
332
+ except Exception as e:
333
+ logger.warning(f"Could not get video info for {file_path}: {e}")
334
+
335
+ # Stage 1: Metadata (parallel ffprobe for speed)
336
+ if request.enable_metadata:
337
+ start_extractor_timing("metadata")
338
+ update_batch_progress(
339
+ "metadata",
340
+ f"Running ffprobe ({FFPROBE_WORKERS} parallel workers)...",
341
+ 0,
342
+ total_files,
343
+ )
344
+
345
+ # Run all ffprobe calls in parallel
346
+ probe_results = run_ffprobe_batch(files)
347
+
348
+ # Extract metadata from each probe result
349
+ for i, file_path in enumerate(files):
350
+ file_start = time.time()
351
+ update_batch_progress("metadata", f"Processing {Path(file_path).name}", i + 1, total_files)
352
+ update_extractor_status(i, "metadata", "active")
353
+ probe_data = probe_results.get(file_path)
354
+
355
+ if isinstance(probe_data, Exception):
356
+ logger.warning(f"Metadata failed for {file_path}: {probe_data}")
357
+ logger.warning(f"Skipping all extractors for {file_path} - file unreadable")
358
+ update_file_status(i, "failed", "metadata", None, str(probe_data))
359
+ update_extractor_status(i, "metadata", "failed")
360
+ update_file_timing(i, "metadata", time.time() - file_start)
361
+ failed_files.add(i)
362
+ continue
363
+
364
+ try:
365
+ metadata = extract_metadata(file_path, probe_data)
366
+ update_file_status(i, "running", "metadata", metadata.model_dump())
367
+ update_extractor_status(i, "metadata", "completed")
368
+ # Store resolution bucket for timing predictions
369
+ file_resolutions[i] = get_resolution_bucket(
370
+ metadata.resolution.width,
371
+ metadata.resolution.height,
372
+ )
373
+ # Store duration for total ETA predictions
374
+ if metadata.duration is not None:
375
+ file_durations[i] = metadata.duration
376
+ logger.info(f"ETA: stored duration {metadata.duration}s for file {i}")
377
+ except Exception as e:
378
+ logger.warning(f"Metadata failed for {file_path}: {e}")
379
+ logger.warning(f"Skipping all extractors for {file_path} - file unreadable")
380
+ update_file_status(i, "failed", "metadata", None, str(e))
381
+ update_extractor_status(i, "metadata", "failed")
382
+ failed_files.add(i)
383
+ update_file_timing(i, "metadata", time.time() - file_start)
384
+ end_extractor_timing("metadata", total_files)
385
+
386
+ # Stage 2: Telemetry (always runs - lightweight, no models)
387
+ start_extractor_timing("telemetry")
388
+ update_batch_progress("telemetry", "Extracting telemetry...", 0, total_files)
389
+ for i, file_path in enumerate(files):
390
+ file_start = time.time()
391
+ update_batch_progress("telemetry", f"Processing {Path(file_path).name}", i + 1, total_files)
392
+ update_extractor_status(i, "telemetry", "active")
393
+ try:
394
+ telemetry = extract_telemetry(file_path)
395
+ update_file_status(
396
+ i,
397
+ "running",
398
+ "telemetry",
399
+ telemetry.model_dump() if telemetry else None,
400
+ )
401
+ update_extractor_status(i, "telemetry", "completed")
402
+ except Exception as e:
403
+ logger.warning(f"Telemetry failed for {file_path}: {e}")
404
+ update_extractor_status(i, "telemetry", "failed")
405
+ update_file_timing(i, "telemetry", time.time() - file_start)
406
+ end_extractor_timing("telemetry", total_files)
407
+
408
+ # Stage 3: Voice Activity Detection (WebRTC VAD - lightweight)
409
+ # Skip for images and files without audio tracks
410
+ if request.enable_vad:
411
+ start_extractor_timing("vad")
412
+ update_batch_progress("vad", "Analyzing audio...", 0, total_files)
413
+ vad_ran = False # Track if we actually ran VAD on any file
414
+ for i, file_path in enumerate(files):
415
+ if i in failed_files:
416
+ update_extractor_status(i, "vad", "skipped")
417
+ continue
418
+ file_start = time.time()
419
+ update_extractor_status(i, "vad", "active")
420
+
421
+ # Check media type - skip VAD for images
422
+ media_type = get_media_type(file_path)
423
+ if media_type == MediaType.IMAGE:
424
+ logger.info(f"Skipping VAD for {file_path} - image file")
425
+ no_audio_result = {
426
+ "audio_content": str(AudioContent.NO_AUDIO),
427
+ "speech_ratio": 0.0,
428
+ "speech_segments": [],
429
+ "total_duration": 0.0,
430
+ }
431
+ update_file_status(i, "running", "vad", no_audio_result)
432
+ update_extractor_status(i, "vad", "completed")
433
+ update_file_timing(i, "vad", time.time() - file_start)
434
+ continue
435
+
436
+ # Check if metadata shows no audio track
437
+ has_audio_track = True
438
+ with batch_jobs_lock:
439
+ file_results = batch_jobs[batch_id].files[i].results
440
+ if file_results and file_results.get("metadata"):
441
+ metadata = file_results["metadata"]
442
+ if metadata.get("audio") is None:
443
+ has_audio_track = False
444
+
445
+ if not has_audio_track:
446
+ logger.info(f"Skipping VAD for {file_path} - no audio track")
447
+ no_audio_result = {
448
+ "audio_content": str(AudioContent.NO_AUDIO),
449
+ "speech_ratio": 0.0,
450
+ "speech_segments": [],
451
+ "total_duration": 0.0,
452
+ }
453
+ update_file_status(i, "running", "vad", no_audio_result)
454
+ update_extractor_status(i, "vad", "completed")
455
+ update_file_timing(i, "vad", time.time() - file_start)
456
+ continue
457
+
458
+ # Run VAD for files with audio
459
+ update_batch_progress("vad", f"Analyzing {Path(file_path).name}", i + 1, total_files)
460
+ try:
461
+ vad_result = detect_voice_activity(file_path)
462
+ update_file_status(i, "running", "vad", vad_result)
463
+ update_extractor_status(i, "vad", "completed")
464
+ vad_ran = True
465
+ except Exception as e:
466
+ logger.warning(f"VAD failed for {file_path}: {e}")
467
+ update_extractor_status(i, "vad", "failed")
468
+ update_file_timing(i, "vad", time.time() - file_start)
469
+
470
+ # Only unload if we actually loaded the model
471
+ if vad_ran:
472
+ update_batch_progress("vad", "Unloading VAD model...", None, None)
473
+ unload_vad_model()
474
+ end_extractor_timing("vad", total_files)
475
+
476
+ # Stage 4: Per-file visual processing
477
+ # Process each file completely before moving to next (memory efficient)
478
+ # Order: Motion → Scenes → Decode frames → Objects → Faces → OCR → CLIP → Release buffer
479
+ #
480
+ # This approach:
481
+ # - Decodes frames once per file
482
+ # - Runs all visual extractors on those frames
483
+ # - Releases buffer before processing next file
484
+ # - Keeps only one file's frames in memory at a time
485
+
486
+ needs_visual_processing = any(
487
+ [
488
+ request.enable_motion,
489
+ request.enable_scenes,
490
+ request.enable_objects,
491
+ request.enable_faces,
492
+ request.enable_ocr,
493
+ request.enable_clip,
494
+ ]
495
+ )
496
+
497
+ # Track motion data for adaptive timestamps
498
+ motion_data: dict[int, Any] = {}
499
+ adaptive_timestamps: dict[int, list[float]] = {}
500
+
501
+ # Track person timestamps for smart face detection
502
+ person_timestamps: dict[int, list[float]] = {}
503
+
504
+ # Skip motion analysis if timestamps are already provided
505
+ has_precomputed_timestamps = bool(request.visual_timestamps)
506
+
507
+ if needs_visual_processing:
508
+ start_extractor_timing("visual_processing")
509
+ update_batch_progress(
510
+ "visual_processing",
511
+ "Processing video frames...",
512
+ 0,
513
+ total_files,
514
+ )
515
+
516
+ for i, file_path in enumerate(files):
517
+ if i in failed_files:
518
+ continue
519
+
520
+ fname = Path(file_path).name
521
+ media_type = get_media_type(file_path)
522
+ file_start = time.time()
523
+
524
+ update_batch_progress(
525
+ "visual_processing",
526
+ f"Processing {fname}",
527
+ i + 1,
528
+ total_files,
529
+ )
530
+
531
+ # --- Motion Analysis ---
532
+ if request.enable_motion or (
533
+ (request.enable_objects or request.enable_faces or request.enable_clip or request.enable_ocr)
534
+ and not has_precomputed_timestamps
535
+ and media_type != MediaType.IMAGE
536
+ ):
537
+ motion_start = time.time()
538
+ update_extractor_status(i, "motion", "active")
539
+ try:
540
+ if media_type == MediaType.IMAGE:
541
+ motion_data[i] = None
542
+ adaptive_timestamps[i] = [0.0]
543
+ update_extractor_status(i, "motion", "completed")
544
+ else:
545
+ motion = analyze_motion(file_path)
546
+ motion_data[i] = motion
547
+ adaptive_timestamps[i] = get_adaptive_timestamps(motion)
548
+
549
+ # Always store motion data when computed (needed for Pass 2 timestamps)
550
+ motion_result = {
551
+ "duration": motion.duration,
552
+ "fps": motion.fps,
553
+ "primary_motion": motion.primary_motion.value,
554
+ "avg_intensity": float(motion.avg_intensity),
555
+ "is_stable": bool(motion.is_stable),
556
+ "segments": [
557
+ {
558
+ "start": seg.start,
559
+ "end": seg.end,
560
+ "motion_type": seg.motion_type.value,
561
+ "intensity": float(seg.intensity),
562
+ }
563
+ for seg in motion.segments
564
+ ],
565
+ }
566
+ update_file_status(i, "running", "motion", motion_result)
567
+ update_extractor_status(i, "motion", "completed")
568
+ logger.info(f"Motion for {fname}: stable={motion.is_stable}, " f"timestamps={len(adaptive_timestamps[i])}")
569
+ except Exception as e:
570
+ logger.warning(f"Motion analysis failed for {file_path}: {e}")
571
+ update_extractor_status(i, "motion", "failed")
572
+ motion_data[i] = None
573
+ # Fallback: generate uniform timestamps from duration
574
+ # This ensures visual extractors still run even if motion fails
575
+ file_result = batch_jobs[batch_id].files[i]
576
+ meta = file_result.results.get("metadata")
577
+ if meta and meta.get("duration"):
578
+ duration = meta["duration"]
579
+ # Generate ~10 uniform timestamps
580
+ num_samples = min(10, max(3, int(duration / 10)))
581
+ step = duration / (num_samples + 1)
582
+ fallback_ts = [step * (j + 1) for j in range(num_samples)]
583
+ adaptive_timestamps[i] = fallback_ts
584
+ logger.info(f"Using fallback timestamps for {fname}: {num_samples} uniform samples")
585
+ else:
586
+ adaptive_timestamps[i] = []
587
+ update_file_timing(i, "motion", time.time() - motion_start)
588
+
589
+ # --- Scene Detection ---
590
+ if request.enable_scenes and media_type != MediaType.IMAGE:
591
+ scenes_start = time.time()
592
+ update_extractor_status(i, "scenes", "active")
593
+ try:
594
+ scenes = extract_scenes(file_path)
595
+ update_file_status(
596
+ i,
597
+ "running",
598
+ "scenes",
599
+ scenes.model_dump() if scenes else None,
600
+ )
601
+ update_extractor_status(i, "scenes", "completed")
602
+ except Exception as e:
603
+ logger.warning(f"Scenes failed for {file_path}: {e}")
604
+ update_extractor_status(i, "scenes", "failed")
605
+ update_file_timing(i, "scenes", time.time() - scenes_start)
606
+
607
+ # --- Decode Frames (for Objects, Faces, OCR, CLIP) ---
608
+ buffer: SharedFrameBuffer | None = None
609
+ visual_extractors_needed = any(
610
+ [
611
+ request.enable_objects,
612
+ request.enable_faces,
613
+ request.enable_ocr,
614
+ request.enable_clip,
615
+ ]
616
+ )
617
+
618
+ if visual_extractors_needed:
619
+ decode_start = time.time()
620
+ update_extractor_status(i, "frame_decode", "active")
621
+ motion = motion_data.get(i)
622
+ timestamps = adaptive_timestamps.get(i, [])
623
+
624
+ # Use precomputed timestamps if provided for this file
625
+ if has_precomputed_timestamps and request.visual_timestamps:
626
+ file_timestamps = request.visual_timestamps.get(file_path)
627
+ if file_timestamps:
628
+ timestamps = file_timestamps
629
+
630
+ # Apply motion-based filtering for stable footage
631
+ if motion and motion.is_stable and timestamps:
632
+ timestamps = get_extractor_timestamps(motion.is_stable, motion.avg_intensity, timestamps)
633
+
634
+ # For images, use timestamp 0
635
+ if media_type == MediaType.IMAGE:
636
+ timestamps = [0.0]
637
+
638
+ if timestamps:
639
+ try:
640
+ buffer = decode_frames(
641
+ file_path,
642
+ timestamps=timestamps,
643
+ max_dimension=1920,
644
+ )
645
+ logger.info(f"Decoded {len(buffer.frames)}/{len(timestamps)} frames for {fname}")
646
+ update_extractor_status(i, "frame_decode", "completed")
647
+ except Exception as e:
648
+ logger.warning(f"Frame decode failed for {file_path}: {e}")
649
+ update_extractor_status(i, "frame_decode", "failed")
650
+ else:
651
+ update_extractor_status(i, "frame_decode", "skipped")
652
+ # Pass frame count as units for per-frame rate calculation
653
+ num_frames = len(buffer.frames) if buffer else None
654
+ update_file_timing(i, "frame_decode", time.time() - decode_start, num_frames)
655
+
656
+ # --- Objects (YOLO) ---
657
+ if request.enable_objects and buffer is not None:
658
+ objects_start = time.time()
659
+ update_extractor_status(i, "objects", "active")
660
+ try:
661
+ objects = extract_objects(
662
+ file_path,
663
+ frame_buffer=buffer,
664
+ model_name=yolo_model,
665
+ )
666
+ if objects:
667
+ update_file_status(i, "running", "objects", {"summary": objects.summary})
668
+ # Collect person timestamps for smart face sampling
669
+ person_ts = list(set(d.timestamp for d in objects.detections if d.label == "person"))
670
+ person_timestamps[i] = sorted(person_ts)
671
+ if person_ts:
672
+ logger.info(f"Found {len(person_ts)} person frames in {fname}")
673
+ else:
674
+ person_timestamps[i] = []
675
+ update_extractor_status(i, "objects", "completed")
676
+ except Exception as e:
677
+ logger.warning(f"Objects failed for {file_path}: {e}")
678
+ person_timestamps[i] = []
679
+ update_extractor_status(i, "objects", "failed")
680
+ # Use number of frames as units for rate calculation
681
+ num_frames = len(buffer.frames) if buffer else None
682
+ update_file_timing(i, "objects", time.time() - objects_start, num_frames)
683
+
684
+ # --- Faces ---
685
+ if request.enable_faces:
686
+ faces_start = time.time()
687
+ face_frame_count: int | None = None
688
+ update_extractor_status(i, "faces", "active")
689
+ try:
690
+ person_ts = person_timestamps.get(i, [])
691
+ motion = motion_data.get(i)
692
+
693
+ # Get video duration from motion data or metadata results
694
+ duration = 0.0
695
+ if motion is not None:
696
+ duration = motion.duration
697
+ else:
698
+ file_result = batch_jobs[batch_id].files[i]
699
+ if file_result.results.get("metadata"):
700
+ duration = file_result.results["metadata"].get("duration", 0.0)
701
+
702
+ # Calculate FPS based on motion intensity
703
+ intensity = motion.avg_intensity if motion else 0.0
704
+ if intensity >= 10.0:
705
+ face_fps = 3.0
706
+ elif intensity >= 6.0:
707
+ face_fps = 2.0
708
+ elif intensity >= 2.0:
709
+ face_fps = 1.5
710
+ else:
711
+ face_fps = 1.0
712
+
713
+ # Adaptive face detection for long videos
714
+ # Short videos (<60s): process all at once
715
+ # Long videos: use batched approach with early exit when faces stabilize
716
+ batch_duration = 30.0 # Process 30s at a time
717
+ verification_interval = 10.0 # Check every 10s once stable
718
+ min_consistent_batches = 2 # Need 2 batches of same faces to go sparse
719
+
720
+ faces = None
721
+ face_frame_count = 0
722
+ all_detections: list[dict[str, Any]] = []
723
+ known_embeddings: list[list[float]] = []
724
+ consistent_batches = 0
725
+ in_verification_mode = False
726
+
727
+ if duration <= 60.0:
728
+ # Short video - process all at once
729
+ num_samples = max(1, int(duration * face_fps))
730
+ step = duration / (num_samples + 1)
731
+ face_timestamps = [step * (j + 1) for j in range(num_samples)]
732
+
733
+ # Merge with YOLO person timestamps
734
+ if person_ts:
735
+ all_ts = sorted(set(person_ts + face_timestamps))
736
+ merged_ts: list[float] = []
737
+ for ts in all_ts:
738
+ if not merged_ts or ts - merged_ts[-1] >= 0.3:
739
+ merged_ts.append(ts)
740
+ face_timestamps = merged_ts
741
+
742
+ if face_timestamps:
743
+ face_buffer = decode_frames(file_path, timestamps=face_timestamps)
744
+ faces = extract_faces(file_path, frame_buffer=face_buffer)
745
+ face_frame_count = len(face_buffer.frames)
746
+ logger.info(f"Face detection on {face_frame_count} frames for {fname} " f"(short video, {face_fps} FPS)")
747
+ else:
748
+ # Long video - use adaptive batching
749
+ current_time = 0.0
750
+ total_frames = 0
751
+
752
+ while current_time < duration:
753
+ # Determine batch parameters
754
+ if in_verification_mode:
755
+ # Sparse verification: just check every 10s
756
+ batch_end = min(current_time + verification_interval, duration)
757
+ batch_timestamps = [current_time + verification_interval / 2]
758
+ else:
759
+ # Normal dense sampling
760
+ batch_end = min(current_time + batch_duration, duration)
761
+ batch_dur = batch_end - current_time
762
+ num_batch_samples = max(1, int(batch_dur * face_fps))
763
+ step = batch_dur / (num_batch_samples + 1)
764
+ batch_timestamps = [current_time + step * (j + 1) for j in range(num_batch_samples)]
765
+
766
+ # Add YOLO person timestamps in this range
767
+ batch_person_ts = [ts for ts in person_ts if current_time <= ts < batch_end]
768
+ if batch_person_ts:
769
+ all_ts = sorted(set(batch_person_ts + batch_timestamps))
770
+ merged_ts = []
771
+ for ts in all_ts:
772
+ if not merged_ts or ts - merged_ts[-1] >= 0.3:
773
+ merged_ts.append(ts)
774
+ batch_timestamps = merged_ts
775
+
776
+ # Process this batch
777
+ if batch_timestamps:
778
+ batch_buffer = decode_frames(file_path, timestamps=batch_timestamps)
779
+ batch_faces = extract_faces(file_path, frame_buffer=batch_buffer)
780
+ total_frames += len(batch_buffer.frames)
781
+
782
+ if batch_faces and batch_faces.detections:
783
+ # Add detections to our collection
784
+ for d in batch_faces.detections:
785
+ all_detections.append(
786
+ {
787
+ "timestamp": d.timestamp,
788
+ "bbox": d.bbox.model_dump(),
789
+ "confidence": d.confidence,
790
+ "embedding": d.embedding,
791
+ "image_base64": d.image_base64,
792
+ "needs_review": d.needs_review,
793
+ "review_reason": d.review_reason,
794
+ }
795
+ )
796
+
797
+ # Check if faces are all known
798
+ all_known, new_embs = check_faces_are_known(batch_faces, known_embeddings)
799
+
800
+ if new_embs:
801
+ # New faces found - add to known and reset consistency
802
+ known_embeddings.extend(new_embs)
803
+ consistent_batches = 0
804
+ if in_verification_mode:
805
+ logger.info(f"New face detected at {current_time:.1f}s, " "exiting verification mode")
806
+ in_verification_mode = False
807
+ elif all_known and known_embeddings:
808
+ # All faces are known
809
+ consistent_batches += 1
810
+ if consistent_batches >= min_consistent_batches and not in_verification_mode:
811
+ in_verification_mode = True
812
+ logger.info(f"Faces stable after {current_time:.1f}s, " "switching to verification mode (every 10s)")
813
+ elif not known_embeddings:
814
+ # No faces in this batch and no known faces yet
815
+ consistent_batches += 1
816
+ if consistent_batches >= min_consistent_batches:
817
+ in_verification_mode = True
818
+
819
+ current_time = batch_end
820
+
821
+ face_frame_count = total_frames
822
+
823
+ # Create result from collected detections
824
+ if all_detections:
825
+ # Reconstruct FacesResult from batched detections
826
+ faces = FacesResult(
827
+ count=len(all_detections),
828
+ unique_estimate=len(known_embeddings),
829
+ detections=[
830
+ FaceDetection(
831
+ timestamp=d["timestamp"],
832
+ bbox=BoundingBox(**d["bbox"]),
833
+ confidence=d["confidence"],
834
+ embedding=d["embedding"],
835
+ image_base64=d["image_base64"],
836
+ needs_review=d.get("needs_review", False),
837
+ review_reason=d.get("review_reason"),
838
+ )
839
+ for d in all_detections
840
+ ],
841
+ )
842
+
843
+ mode_info = "verification" if in_verification_mode else "normal"
844
+ logger.info(
845
+ f"Face detection on {total_frames} frames for {fname} " f"(adaptive batching, {len(known_embeddings)} unique, " f"ended in {mode_info} mode)"
846
+ )
847
+
848
+ # Fallback if no duration info
849
+ if faces is None and buffer is not None:
850
+ faces = extract_faces(file_path, frame_buffer=buffer)
851
+ face_frame_count = len(buffer.frames)
852
+ logger.info(f"Face detection on {len(buffer.frames)} frames for {fname} " "(using shared buffer)")
853
+
854
+ if faces:
855
+ faces_data = {
856
+ "count": faces.count,
857
+ "unique_estimate": faces.unique_estimate,
858
+ "detections": [
859
+ {
860
+ "timestamp": d.timestamp,
861
+ "bbox": d.bbox.model_dump(),
862
+ "confidence": d.confidence,
863
+ "embedding": d.embedding,
864
+ "image_base64": d.image_base64,
865
+ "needs_review": d.needs_review,
866
+ "review_reason": d.review_reason,
867
+ }
868
+ for d in faces.detections
869
+ ],
870
+ }
871
+ update_file_status(i, "running", "faces", faces_data)
872
+ else:
873
+ update_file_status(
874
+ i,
875
+ "running",
876
+ "faces",
877
+ {"count": 0, "unique_estimate": 0, "detections": []},
878
+ )
879
+ update_extractor_status(i, "faces", "completed")
880
+ except Exception as e:
881
+ logger.warning(f"Faces failed for {file_path}: {e}")
882
+ update_extractor_status(i, "faces", "failed")
883
+ update_file_timing(i, "faces", time.time() - faces_start, face_frame_count)
884
+
885
+ # --- OCR ---
886
+ if request.enable_ocr and buffer is not None:
887
+ ocr_start = time.time()
888
+ update_extractor_status(i, "ocr", "active")
889
+ try:
890
+ ocr = extract_ocr(file_path, frame_buffer=buffer)
891
+ update_file_status(i, "running", "ocr", ocr.model_dump() if ocr else None)
892
+ update_extractor_status(i, "ocr", "completed")
893
+ except Exception as e:
894
+ logger.warning(f"OCR failed for {file_path}: {e}")
895
+ update_extractor_status(i, "ocr", "failed")
896
+ num_frames = len(buffer.frames) if buffer else None
897
+ update_file_timing(i, "ocr", time.time() - ocr_start, num_frames)
898
+
899
+ # --- CLIP ---
900
+ if request.enable_clip and buffer is not None:
901
+ clip_start = time.time()
902
+ update_extractor_status(i, "clip", "active")
903
+ try:
904
+ clip = extract_clip(
905
+ file_path,
906
+ frame_buffer=buffer,
907
+ model_name=clip_model,
908
+ )
909
+ if clip:
910
+ update_file_status(i, "running", "clip", clip.model_dump())
911
+ else:
912
+ update_file_status(i, "running", "clip", None)
913
+ update_extractor_status(i, "clip", "completed")
914
+ except Exception as e:
915
+ logger.warning(f"CLIP failed for {file_path}: {e}")
916
+ update_extractor_status(i, "clip", "failed")
917
+ num_frames = len(buffer.frames) if buffer else None
918
+ update_file_timing(i, "clip", time.time() - clip_start, num_frames)
919
+
920
+ # --- Release buffer for this file ---
921
+ if buffer is not None:
922
+ logger.info(f"Releasing frame buffer for {fname}")
923
+ del buffer
924
+ gc.collect()
925
+
926
+ # Update peak memory after each file
927
+ peak_memory = max(peak_memory, get_memory_mb())
928
+
929
+ # Unload all visual models after processing all files
930
+ update_batch_progress("visual_processing", "Unloading models...", None, None)
931
+ if request.enable_objects:
932
+ unload_yolo_model()
933
+ if request.enable_faces:
934
+ unload_face_model()
935
+ if request.enable_ocr:
936
+ unload_ocr_model()
937
+ if request.enable_clip:
938
+ unload_clip_model()
939
+
940
+ end_extractor_timing("visual_processing", total_files)
941
+
942
+ # Stage 5: Visual (Qwen VLM - scene descriptions)
943
+ # Separate stage because Qwen is very heavy and has its own frame handling
944
+ if request.enable_visual:
945
+ start_extractor_timing("visual")
946
+ logger.info("Visual enabled (Qwen VLM)")
947
+ clear_memory()
948
+ update_batch_progress("visual", "Loading Qwen model...", 0, total_files)
949
+ logger.info(f"Qwen batch contexts: {request.contexts}")
950
+
951
+ for i, file_path in enumerate(files):
952
+ if i in failed_files:
953
+ update_extractor_status(i, "visual", "skipped")
954
+ continue
955
+ file_start = time.time()
956
+ fname = Path(file_path).name
957
+ update_batch_progress("visual", f"Analyzing: {fname}", i + 1, total_files)
958
+ update_extractor_status(i, "visual", "active")
959
+ # Get per-file timestamps if provided (declared before try so it's visible after)
960
+ timestamps: list[float] | None = None
961
+ try:
962
+ motion = motion_data.get(i)
963
+ if request.visual_timestamps:
964
+ timestamps = request.visual_timestamps.get(file_path)
965
+ if timestamps is None and motion:
966
+ timestamps = get_sample_timestamps(motion, max_samples=5)
967
+
968
+ file_context = request.contexts.get(file_path) if request.contexts else None
969
+ logger.info(f"Calling Qwen with context for {fname}: {file_context}, lut_path={request.lut_path}")
970
+ visual_result = extract_objects_qwen(
971
+ file_path,
972
+ timestamps=timestamps,
973
+ model_name=qwen_model,
974
+ context=file_context,
975
+ lut_path=request.lut_path,
976
+ )
977
+ visual_data: dict[str, Any] = {"summary": visual_result.summary}
978
+ if visual_result.descriptions:
979
+ visual_data["descriptions"] = visual_result.descriptions
980
+ update_file_status(i, "running", "visual", visual_data)
981
+ update_extractor_status(i, "visual", "completed")
982
+ except Exception as e:
983
+ logger.warning(f"Visual failed for {file_path}: {e}", exc_info=True)
984
+ update_extractor_status(i, "visual", "failed")
985
+ update_file_status(i, "failed", error=str(e))
986
+ failed_files.add(i)
987
+ # Use number of timestamps as units for rate calculation
988
+ num_timestamps = len(timestamps) if timestamps else None
989
+ update_file_timing(i, "visual", time.time() - file_start, num_timestamps)
990
+
991
+ update_batch_progress("visual", "Unloading Qwen model...", None, None)
992
+ unload_qwen_model()
993
+ end_extractor_timing("visual", total_files)
994
+
995
+ # Stage 6: Transcript (Whisper - heavy model)
996
+ # Skip for images and files without audio tracks
997
+ if request.enable_transcript:
998
+ start_extractor_timing("transcript")
999
+ whisper_ran = False # Track if we actually ran Whisper
1000
+
1001
+ # Check if any files need transcription before loading model
1002
+ files_to_transcribe: list[int] = []
1003
+ for i, file_path in enumerate(files):
1004
+ if i in failed_files:
1005
+ update_extractor_status(i, "transcript", "skipped")
1006
+ continue
1007
+ # Skip images
1008
+ media_type = get_media_type(file_path)
1009
+ if media_type == MediaType.IMAGE:
1010
+ update_extractor_status(i, "transcript", "skipped")
1011
+ continue
1012
+ # Check for audio track
1013
+ has_audio = True
1014
+ with batch_jobs_lock:
1015
+ file_results = batch_jobs[batch_id].files[i].results
1016
+ if file_results and file_results.get("metadata"):
1017
+ if file_results["metadata"].get("audio") is None:
1018
+ has_audio = False
1019
+ if has_audio:
1020
+ files_to_transcribe.append(i)
1021
+ else:
1022
+ update_extractor_status(i, "transcript", "skipped")
1023
+
1024
+ if files_to_transcribe:
1025
+ # Clear memory before loading heavy model
1026
+ logger.info("Clearing memory before Whisper...")
1027
+ clear_memory()
1028
+ update_batch_progress(
1029
+ "transcript",
1030
+ "Loading Whisper model...",
1031
+ 0,
1032
+ len(files_to_transcribe),
1033
+ )
1034
+
1035
+ for idx, i in enumerate(files_to_transcribe):
1036
+ file_path = files[i]
1037
+ file_start = time.time()
1038
+ update_batch_progress(
1039
+ "transcript",
1040
+ f"Transcribing {Path(file_path).name}",
1041
+ idx + 1,
1042
+ len(files_to_transcribe),
1043
+ )
1044
+ update_extractor_status(i, "transcript", "active")
1045
+ try:
1046
+ transcript = extract_transcript(
1047
+ file_path,
1048
+ model=whisper_model,
1049
+ language=request.language,
1050
+ fallback_language=settings.fallback_language,
1051
+ language_hints=request.language_hints,
1052
+ context_hint=request.context_hint,
1053
+ )
1054
+ update_file_status(
1055
+ i,
1056
+ "running",
1057
+ "transcript",
1058
+ transcript.model_dump() if transcript else None,
1059
+ )
1060
+ update_extractor_status(i, "transcript", "completed")
1061
+ whisper_ran = True
1062
+ except Exception as e:
1063
+ logger.warning(f"Transcript failed for {file_path}: {e}")
1064
+ update_extractor_status(i, "transcript", "failed")
1065
+ update_file_status(i, "failed", error=str(e))
1066
+ failed_files.add(i)
1067
+ # Get duration in minutes for rate calculation
1068
+ duration_minutes: float | None = None
1069
+ with batch_jobs_lock:
1070
+ file_results = batch_jobs[batch_id].files[i].results
1071
+ if file_results and file_results.get("metadata"):
1072
+ duration_sec = file_results["metadata"].get("duration")
1073
+ if duration_sec:
1074
+ duration_minutes = duration_sec / 60.0
1075
+ update_file_timing(i, "transcript", time.time() - file_start, duration_minutes)
1076
+
1077
+ # Unload Whisper to free memory
1078
+ if whisper_ran:
1079
+ update_batch_progress("transcript", "Unloading Whisper model...", None, None)
1080
+ unload_whisper_model()
1081
+ else:
1082
+ logger.info("Skipping Whisper - no files with audio tracks")
1083
+
1084
+ end_extractor_timing("transcript", total_files)
1085
+
1086
+ # Mark files as completed (skip failed files - they stay "failed")
1087
+ with batch_jobs_lock:
1088
+ for i in range(len(files)):
1089
+ if i in failed_files:
1090
+ # File already marked as failed - don't overwrite
1091
+ error_msg = batch_jobs[batch_id].files[i].error or "unknown error"
1092
+ logger.info(f"Batch {batch_id} file {i} marked failed: {error_msg}")
1093
+ continue
1094
+ # Log results before marking complete
1095
+ result_keys = list(batch_jobs[batch_id].files[i].results.keys())
1096
+ logger.info(f"Batch {batch_id} file {i} results before completion: keys={result_keys}")
1097
+ batch_jobs[batch_id].files[i].status = "completed"
1098
+ batch_jobs[batch_id].status = "completed"
1099
+ batch_jobs[batch_id].current_extractor = None
1100
+ batch_jobs[batch_id].progress = None
1101
+ batch_jobs[batch_id].completed_at = datetime.now(timezone.utc)
1102
+ # Final metrics
1103
+ batch_jobs[batch_id].elapsed_seconds = round(time.time() - batch_start_time, 2)
1104
+ batch_jobs[batch_id].memory_mb = get_memory_mb()
1105
+ batch_jobs[batch_id].peak_memory_mb = max(peak_memory, get_memory_mb())
1106
+
1107
+ # Log timing summary
1108
+ logger.info(f"Batch {batch_id} completed in {batch_jobs[batch_id].elapsed_seconds}s, peak memory: {batch_jobs[batch_id].peak_memory_mb}MB")
1109
+ for timing in batch_jobs[batch_id].extractor_timings:
1110
+ logger.info(f" {timing.extractor}: {timing.duration_seconds}s ({timing.files_processed} files)")
1111
+
1112
+ except Exception as e:
1113
+ logger.error(f"Batch {batch_id} failed: {e}")
1114
+ with batch_jobs_lock:
1115
+ if batch_id in batch_jobs:
1116
+ batch_jobs[batch_id].status = "failed"
1117
+ batch_jobs[batch_id].completed_at = datetime.now(timezone.utc)
1118
+ batch_jobs[batch_id].elapsed_seconds = round(time.time() - batch_start_time, 2)
1119
+ batch_jobs[batch_id].memory_mb = get_memory_mb()
1120
+ batch_jobs[batch_id].peak_memory_mb = peak_memory
1121
+
1122
+ finally:
1123
+ # Cleanup old batch jobs to free memory
1124
+ cleanup_expired_batch_jobs()
1125
+
1126
+ # Clear memory before starting next batch
1127
+ logger.info("Clearing memory after batch completion...")
1128
+ clear_memory()
1129
+
1130
+ # Always start the next batch from queue (or set batch_running = False)
1131
+ start_next_batch()