media-engine 0.1.1__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- media_engine/_version.py +2 -2
- media_engine/batch/models.py +9 -0
- media_engine/batch/processor.py +14 -12
- media_engine/batch/timing.py +1 -1
- media_engine/config.py +91 -19
- media_engine/extractors/faces.py +1 -1
- media_engine/extractors/frame_buffer.py +1 -1
- media_engine/extractors/frames.py +2 -2
- media_engine/extractors/metadata/sony.py +1 -1
- media_engine/extractors/motion.py +4 -4
- media_engine/extractors/objects.py +1 -1
- media_engine/extractors/objects_qwen.py +845 -147
- media_engine/extractors/ocr.py +1 -1
- media_engine/extractors/transcribe.py +1 -1
- media_engine/extractors/vad.py +1 -1
- media_engine/routers/settings.py +2 -0
- media_engine/schemas.py +2 -0
- {media_engine-0.1.1.dist-info → media_engine-0.2.1.dist-info}/METADATA +1 -1
- {media_engine-0.1.1.dist-info → media_engine-0.2.1.dist-info}/RECORD +22 -22
- {media_engine-0.1.1.dist-info → media_engine-0.2.1.dist-info}/WHEEL +0 -0
- {media_engine-0.1.1.dist-info → media_engine-0.2.1.dist-info}/entry_points.txt +0 -0
- {media_engine-0.1.1.dist-info → media_engine-0.2.1.dist-info}/licenses/LICENSE +0 -0
media_engine/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.
|
|
32
|
-
__version_tuple__ = version_tuple = (0,
|
|
31
|
+
__version__ = version = '0.2.1'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 2, 1)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
media_engine/batch/models.py
CHANGED
|
@@ -57,6 +57,15 @@ class BatchRequest(BaseModel):
|
|
|
57
57
|
# Optional LUT path for visual analysis (e.g., for log footage color correction)
|
|
58
58
|
# Applied to extracted frames before sending to Qwen
|
|
59
59
|
lut_path: str | None = None
|
|
60
|
+
# Per-file batch overlap setting for visual analysis (file path -> bool)
|
|
61
|
+
# When True, batches overlap by 1 frame for visual continuity (useful for unstable camera)
|
|
62
|
+
# Example: {"/path/shaky_video.mp4": True}
|
|
63
|
+
visual_batch_overlap: dict[str, bool] | None = None
|
|
64
|
+
# Per-file Qwen strategy override (file path -> strategy)
|
|
65
|
+
# Overrides global qwen_strategy setting for specific files
|
|
66
|
+
# Values: "single", "context", "batch", "batch_context"
|
|
67
|
+
# Example: {"/path/action_video.mp4": "batch_context"}
|
|
68
|
+
visual_strategy: dict[str, str] | None = None
|
|
60
69
|
|
|
61
70
|
|
|
62
71
|
class BatchFileStatus(BaseModel):
|
media_engine/batch/processor.py
CHANGED
|
@@ -85,7 +85,7 @@ def run_batch_job(batch_id: str, request: BatchRequest) -> None:
|
|
|
85
85
|
yolo_model = settings.get_yolo_model()
|
|
86
86
|
clip_model = settings.get_clip_model()
|
|
87
87
|
|
|
88
|
-
logger.info(f"Batch {batch_id} models: whisper={whisper_model}, qwen={qwen_model},
|
|
88
|
+
logger.info(f"Batch {batch_id} models: whisper={whisper_model}, qwen={qwen_model}, yolo={yolo_model}, clip={clip_model}")
|
|
89
89
|
|
|
90
90
|
batch_start_time = time.time()
|
|
91
91
|
peak_memory = get_memory_mb()
|
|
@@ -117,7 +117,7 @@ def run_batch_job(batch_id: str, request: BatchRequest) -> None:
|
|
|
117
117
|
|
|
118
118
|
# Add time for remaining extractors (after current one)
|
|
119
119
|
remaining_extractors = EXTRACTOR_ORDER[current_ext_idx + 1 :]
|
|
120
|
-
logger.info(f"ETA calc: current={current_extractor}, remaining={remaining_extractors},
|
|
120
|
+
logger.info(f"ETA calc: current={current_extractor}, remaining={remaining_extractors}, enabled={enabled_extractors}")
|
|
121
121
|
|
|
122
122
|
for ext in remaining_extractors:
|
|
123
123
|
if ext not in enabled_extractors:
|
|
@@ -210,7 +210,7 @@ def run_batch_job(batch_id: str, request: BatchRequest) -> None:
|
|
|
210
210
|
|
|
211
211
|
# Debug logging for ETA calculation (use INFO level to see it)
|
|
212
212
|
if total_eta and total_eta > 0:
|
|
213
|
-
logger.info(f"ETA: {extractor} stage={eta}s, total={total_eta}s,
|
|
213
|
+
logger.info(f"ETA: {extractor} stage={eta}s, total={total_eta}s, subs={enabled_sub_extractors}, files={len(file_durations)}")
|
|
214
214
|
|
|
215
215
|
# Calculate queue ETA (for all queued batches)
|
|
216
216
|
queue_eta, queued_count = calculate_queue_eta()
|
|
@@ -565,7 +565,7 @@ def run_batch_job(batch_id: str, request: BatchRequest) -> None:
|
|
|
565
565
|
}
|
|
566
566
|
update_file_status(i, "running", "motion", motion_result)
|
|
567
567
|
update_extractor_status(i, "motion", "completed")
|
|
568
|
-
logger.info(f"Motion for {fname}: stable={motion.is_stable},
|
|
568
|
+
logger.info(f"Motion for {fname}: stable={motion.is_stable}, timestamps={len(adaptive_timestamps[i])}")
|
|
569
569
|
except Exception as e:
|
|
570
570
|
logger.warning(f"Motion analysis failed for {file_path}: {e}")
|
|
571
571
|
update_extractor_status(i, "motion", "failed")
|
|
@@ -743,7 +743,7 @@ def run_batch_job(batch_id: str, request: BatchRequest) -> None:
|
|
|
743
743
|
face_buffer = decode_frames(file_path, timestamps=face_timestamps)
|
|
744
744
|
faces = extract_faces(file_path, frame_buffer=face_buffer)
|
|
745
745
|
face_frame_count = len(face_buffer.frames)
|
|
746
|
-
logger.info(f"Face detection on {face_frame_count} frames for {fname}
|
|
746
|
+
logger.info(f"Face detection on {face_frame_count} frames for {fname} (short video, {face_fps} FPS)")
|
|
747
747
|
else:
|
|
748
748
|
# Long video - use adaptive batching
|
|
749
749
|
current_time = 0.0
|
|
@@ -802,14 +802,14 @@ def run_batch_job(batch_id: str, request: BatchRequest) -> None:
|
|
|
802
802
|
known_embeddings.extend(new_embs)
|
|
803
803
|
consistent_batches = 0
|
|
804
804
|
if in_verification_mode:
|
|
805
|
-
logger.info(f"New face detected at {current_time:.1f}s,
|
|
805
|
+
logger.info(f"New face detected at {current_time:.1f}s, exiting verification mode")
|
|
806
806
|
in_verification_mode = False
|
|
807
807
|
elif all_known and known_embeddings:
|
|
808
808
|
# All faces are known
|
|
809
809
|
consistent_batches += 1
|
|
810
810
|
if consistent_batches >= min_consistent_batches and not in_verification_mode:
|
|
811
811
|
in_verification_mode = True
|
|
812
|
-
logger.info(f"Faces stable after {current_time:.1f}s,
|
|
812
|
+
logger.info(f"Faces stable after {current_time:.1f}s, switching to verification mode (every 10s)")
|
|
813
813
|
elif not known_embeddings:
|
|
814
814
|
# No faces in this batch and no known faces yet
|
|
815
815
|
consistent_batches += 1
|
|
@@ -841,15 +841,13 @@ def run_batch_job(batch_id: str, request: BatchRequest) -> None:
|
|
|
841
841
|
)
|
|
842
842
|
|
|
843
843
|
mode_info = "verification" if in_verification_mode else "normal"
|
|
844
|
-
logger.info(
|
|
845
|
-
f"Face detection on {total_frames} frames for {fname} " f"(adaptive batching, {len(known_embeddings)} unique, " f"ended in {mode_info} mode)"
|
|
846
|
-
)
|
|
844
|
+
logger.info(f"Face detection on {total_frames} frames for {fname} (adaptive batching, {len(known_embeddings)} unique, ended in {mode_info} mode)")
|
|
847
845
|
|
|
848
846
|
# Fallback if no duration info
|
|
849
847
|
if faces is None and buffer is not None:
|
|
850
848
|
faces = extract_faces(file_path, frame_buffer=buffer)
|
|
851
849
|
face_frame_count = len(buffer.frames)
|
|
852
|
-
logger.info(f"Face detection on {len(buffer.frames)} frames for {fname}
|
|
850
|
+
logger.info(f"Face detection on {len(buffer.frames)} frames for {fname} (using shared buffer)")
|
|
853
851
|
|
|
854
852
|
if faces:
|
|
855
853
|
faces_data = {
|
|
@@ -966,13 +964,17 @@ def run_batch_job(batch_id: str, request: BatchRequest) -> None:
|
|
|
966
964
|
timestamps = get_sample_timestamps(motion, max_samples=5)
|
|
967
965
|
|
|
968
966
|
file_context = request.contexts.get(file_path) if request.contexts else None
|
|
969
|
-
|
|
967
|
+
file_batch_overlap = request.visual_batch_overlap.get(file_path, False) if request.visual_batch_overlap else False
|
|
968
|
+
file_strategy = request.visual_strategy.get(file_path) if request.visual_strategy else None
|
|
969
|
+
logger.info(f"Calling Qwen for {fname}: context={file_context}, lut_path={request.lut_path}, batch_overlap={file_batch_overlap}, strategy={file_strategy}")
|
|
970
970
|
visual_result = extract_objects_qwen(
|
|
971
971
|
file_path,
|
|
972
972
|
timestamps=timestamps,
|
|
973
973
|
model_name=qwen_model,
|
|
974
974
|
context=file_context,
|
|
975
975
|
lut_path=request.lut_path,
|
|
976
|
+
batch_overlap=file_batch_overlap,
|
|
977
|
+
strategy=file_strategy,
|
|
976
978
|
)
|
|
977
979
|
visual_data: dict[str, Any] = {"summary": visual_result.summary}
|
|
978
980
|
if visual_result.descriptions:
|
media_engine/batch/timing.py
CHANGED
|
@@ -126,7 +126,7 @@ def record_timing(
|
|
|
126
126
|
_timing_history_dirty = True
|
|
127
127
|
|
|
128
128
|
unit_label = "/unit" if units else "s"
|
|
129
|
-
logger.debug(f"Recorded timing: {extractor}@{resolution_bucket} = {rate:.2f}{unit_label}
|
|
129
|
+
logger.debug(f"Recorded timing: {extractor}@{resolution_bucket} = {rate:.2f}{unit_label} (avg: {avg:.2f}{unit_label} from {sample_count} samples)")
|
|
130
130
|
# Save periodically (not on every update to reduce disk I/O)
|
|
131
131
|
if _timing_history_dirty and time.time() - _timing_history_last_save > _TIMING_SAVE_INTERVAL:
|
|
132
132
|
save_timing_history()
|
media_engine/config.py
CHANGED
|
@@ -81,6 +81,15 @@ class ObjectDetector(StrEnum):
|
|
|
81
81
|
QWEN = "qwen"
|
|
82
82
|
|
|
83
83
|
|
|
84
|
+
class QwenStrategy(StrEnum):
|
|
85
|
+
"""Qwen temporal context strategy for multi-frame analysis."""
|
|
86
|
+
|
|
87
|
+
SINGLE = "single" # No context (current behavior)
|
|
88
|
+
CONTEXT = "context" # Pass previous description as text
|
|
89
|
+
BATCH = "batch" # Multi-frame batch (2-3 frames together)
|
|
90
|
+
BATCH_CONTEXT = "batch_context" # Batch + pass context between groups
|
|
91
|
+
|
|
92
|
+
|
|
84
93
|
# =============================================================================
|
|
85
94
|
# Settings (loaded from JSON config file)
|
|
86
95
|
# =============================================================================
|
|
@@ -120,6 +129,7 @@ class Settings(BaseModel):
|
|
|
120
129
|
object_detector: str = DEFAULT_OBJECT_DETECTOR # "auto", "yolo", or "qwen"
|
|
121
130
|
qwen_model: str = DEFAULT_QWEN_MODEL
|
|
122
131
|
qwen_frames_per_scene: int = DEFAULT_QWEN_FRAMES_PER_SCENE
|
|
132
|
+
qwen_strategy: str = "auto" # "auto", "single", "context", "batch", "batch_context"
|
|
123
133
|
|
|
124
134
|
# YOLO model ("auto" = select based on VRAM)
|
|
125
135
|
yolo_model: str = "auto"
|
|
@@ -163,6 +173,12 @@ class Settings(BaseModel):
|
|
|
163
173
|
return get_auto_object_detector()
|
|
164
174
|
return ObjectDetector(self.object_detector)
|
|
165
175
|
|
|
176
|
+
def get_qwen_strategy(self) -> "QwenStrategy":
|
|
177
|
+
"""Get resolved Qwen strategy (handles 'auto')."""
|
|
178
|
+
if self.qwen_strategy == "auto":
|
|
179
|
+
return get_auto_qwen_strategy()
|
|
180
|
+
return QwenStrategy(self.qwen_strategy)
|
|
181
|
+
|
|
166
182
|
|
|
167
183
|
def get_config_path() -> Path:
|
|
168
184
|
"""Get the config file path."""
|
|
@@ -369,7 +385,7 @@ def get_free_memory_gb() -> float:
|
|
|
369
385
|
# Leave a 1GB buffer for system processes
|
|
370
386
|
available_for_models = max(0.0, available_gb - 1.0)
|
|
371
387
|
|
|
372
|
-
logger.info(f"Memory: {mem.total / (1024**3):.0f}GB total,
|
|
388
|
+
logger.info(f"Memory: {mem.total / (1024**3):.0f}GB total, {mem.available / (1024**3):.1f}GB available, {available_for_models:.1f}GB for models")
|
|
373
389
|
return available_for_models
|
|
374
390
|
|
|
375
391
|
except ImportError:
|
|
@@ -432,46 +448,101 @@ def get_auto_whisper_model() -> str:
|
|
|
432
448
|
|
|
433
449
|
|
|
434
450
|
def get_auto_qwen_model() -> str:
|
|
435
|
-
"""Select Qwen2-VL model based on available
|
|
451
|
+
"""Select Qwen2-VL model based on available free memory.
|
|
436
452
|
|
|
437
|
-
|
|
|
438
|
-
|
|
439
|
-
| <8GB
|
|
440
|
-
| 8-16GB
|
|
441
|
-
| 16GB+
|
|
453
|
+
| Free Memory | Model | Size | Quality |
|
|
454
|
+
|-------------|----------------|-------|---------|
|
|
455
|
+
| <8GB | (use YOLO) | - | Basic |
|
|
456
|
+
| 8-16GB | Qwen2-VL-2B | ~5GB | Good |
|
|
457
|
+
| 16GB+ | Qwen2-VL-7B | ~15GB | Best |
|
|
442
458
|
"""
|
|
443
|
-
|
|
459
|
+
free_mem = get_free_memory_gb()
|
|
444
460
|
|
|
445
|
-
if
|
|
461
|
+
if free_mem >= 16:
|
|
446
462
|
model = "Qwen/Qwen2-VL-7B-Instruct"
|
|
447
|
-
elif
|
|
463
|
+
elif free_mem >= 8:
|
|
448
464
|
model = "Qwen/Qwen2-VL-2B-Instruct"
|
|
449
465
|
else:
|
|
450
|
-
# Not enough
|
|
466
|
+
# Not enough free memory for Qwen, should use YOLO instead
|
|
451
467
|
model = "Qwen/Qwen2-VL-2B-Instruct"
|
|
452
|
-
logger.warning(f"Low
|
|
468
|
+
logger.warning(f"Low free memory ({free_mem:.1f}GB) - consider using YOLO instead of Qwen")
|
|
453
469
|
|
|
454
|
-
logger.info(f"Auto-selected Qwen model: {model} (
|
|
470
|
+
logger.info(f"Auto-selected Qwen model: {model} (free memory: {free_mem:.1f}GB)")
|
|
455
471
|
return model
|
|
456
472
|
|
|
457
473
|
|
|
458
474
|
def get_auto_object_detector() -> ObjectDetector:
|
|
459
|
-
"""Select object detector based on available
|
|
475
|
+
"""Select object detector based on available free memory.
|
|
460
476
|
|
|
461
477
|
YOLO is faster and uses less memory.
|
|
462
|
-
Qwen provides better scene understanding but needs more
|
|
478
|
+
Qwen provides better scene understanding but needs more memory.
|
|
463
479
|
"""
|
|
464
|
-
|
|
480
|
+
free_mem = get_free_memory_gb()
|
|
465
481
|
|
|
466
|
-
if
|
|
482
|
+
if free_mem >= 8:
|
|
467
483
|
detector = ObjectDetector.QWEN
|
|
468
484
|
else:
|
|
469
485
|
detector = ObjectDetector.YOLO
|
|
470
486
|
|
|
471
|
-
logger.info(f"Auto-selected object detector: {detector} (
|
|
487
|
+
logger.info(f"Auto-selected object detector: {detector} (free memory: {free_mem:.1f}GB)")
|
|
472
488
|
return detector
|
|
473
489
|
|
|
474
490
|
|
|
491
|
+
def get_auto_qwen_strategy() -> QwenStrategy:
|
|
492
|
+
"""Select Qwen temporal context strategy based on available free memory.
|
|
493
|
+
|
|
494
|
+
Thresholds based on Qwen 2B with 1080p images (max 1280px width).
|
|
495
|
+
|
|
496
|
+
| Free Memory | Strategy | Frames per Call | Description |
|
|
497
|
+
|-------------|---------------|-----------------|--------------------------|
|
|
498
|
+
| <8GB | CONTEXT | 1 | Text context only |
|
|
499
|
+
| 8-12GB | BATCH | 2-3 | Multi-frame batches |
|
|
500
|
+
| 12GB+ | BATCH_CONTEXT | 2-3 | Batches + text context |
|
|
501
|
+
"""
|
|
502
|
+
free_mem = get_free_memory_gb()
|
|
503
|
+
|
|
504
|
+
if free_mem >= 12:
|
|
505
|
+
strategy = QwenStrategy.BATCH_CONTEXT
|
|
506
|
+
elif free_mem >= 8:
|
|
507
|
+
strategy = QwenStrategy.BATCH
|
|
508
|
+
else:
|
|
509
|
+
strategy = QwenStrategy.CONTEXT
|
|
510
|
+
|
|
511
|
+
logger.info(f"Auto-selected Qwen strategy: {strategy} (free memory: {free_mem:.1f}GB)")
|
|
512
|
+
return strategy
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
def get_auto_qwen_batch_size() -> int:
|
|
516
|
+
"""Select Qwen batch size based on available free memory.
|
|
517
|
+
|
|
518
|
+
Larger batches provide better temporal context but use more memory.
|
|
519
|
+
Each additional frame in a batch adds ~0.5-1GB memory overhead.
|
|
520
|
+
|
|
521
|
+
| Free Memory | Batch Size | Notes |
|
|
522
|
+
|-------------|------------|--------------------------|
|
|
523
|
+
| <10GB | 2 | Minimal batching |
|
|
524
|
+
| 10-15GB | 3 | Default batch size |
|
|
525
|
+
| 15-25GB | 4 | Good temporal context |
|
|
526
|
+
| 25-40GB | 5 | Rich temporal context |
|
|
527
|
+
| 40GB+ | 6 | Maximum temporal context |
|
|
528
|
+
"""
|
|
529
|
+
free_mem = get_free_memory_gb()
|
|
530
|
+
|
|
531
|
+
if free_mem >= 40:
|
|
532
|
+
batch_size = 6
|
|
533
|
+
elif free_mem >= 25:
|
|
534
|
+
batch_size = 5
|
|
535
|
+
elif free_mem >= 15:
|
|
536
|
+
batch_size = 4
|
|
537
|
+
elif free_mem >= 10:
|
|
538
|
+
batch_size = 3
|
|
539
|
+
else:
|
|
540
|
+
batch_size = 2
|
|
541
|
+
|
|
542
|
+
logger.info(f"Auto-selected Qwen batch size: {batch_size} (free memory: {free_mem:.1f}GB)")
|
|
543
|
+
return batch_size
|
|
544
|
+
|
|
545
|
+
|
|
475
546
|
def get_auto_yolo_model() -> str:
|
|
476
547
|
"""Select YOLO model based on available VRAM.
|
|
477
548
|
|
|
@@ -546,6 +617,7 @@ def get_vram_summary() -> dict:
|
|
|
546
617
|
"free_memory_gb": round(free_mem, 1),
|
|
547
618
|
"auto_whisper_model": get_auto_whisper_model(),
|
|
548
619
|
"auto_qwen_model": get_auto_qwen_model() if vram >= 8 else None,
|
|
620
|
+
"auto_qwen_strategy": str(get_auto_qwen_strategy()),
|
|
549
621
|
"auto_yolo_model": get_auto_yolo_model(),
|
|
550
622
|
"auto_clip_model": get_auto_clip_model(),
|
|
551
623
|
"auto_object_detector": str(get_auto_object_detector()),
|
|
@@ -654,7 +726,7 @@ def check_memory_before_load(model_name: str, clear_memory_func: Any | None = No
|
|
|
654
726
|
available = vram if device != DeviceType.CPU else ram
|
|
655
727
|
|
|
656
728
|
if available < required_gb:
|
|
657
|
-
logger.warning(f"Low memory ({available:.1f}GB available) for {model_name}
|
|
729
|
+
logger.warning(f"Low memory ({available:.1f}GB available) for {model_name} ({required_gb:.1f}GB required)")
|
|
658
730
|
|
|
659
731
|
# Try to free memory
|
|
660
732
|
if clear_memory_func is not None:
|
media_engine/extractors/faces.py
CHANGED
|
@@ -200,7 +200,7 @@ def extract_faces(
|
|
|
200
200
|
unique_faces, unique_estimate = _deduplicate_faces(detections, all_embeddings, frame_size=frame_size)
|
|
201
201
|
|
|
202
202
|
needs_review = sum(1 for f in unique_faces if f.needs_review)
|
|
203
|
-
logger.info(f"Detected {len(detections)} faces, {unique_estimate} unique,
|
|
203
|
+
logger.info(f"Detected {len(detections)} faces, {unique_estimate} unique, {needs_review} need review")
|
|
204
204
|
|
|
205
205
|
return FacesResult(
|
|
206
206
|
count=len(detections),
|
|
@@ -286,7 +286,7 @@ def decode_frames(
|
|
|
286
286
|
out_width = out_width - (out_width % 2)
|
|
287
287
|
out_height = out_height - (out_height % 2)
|
|
288
288
|
|
|
289
|
-
logger.info(f"Decoding {len(timestamps)} frames from {file_path}
|
|
289
|
+
logger.info(f"Decoding {len(timestamps)} frames from {file_path} at {out_width}x{out_height}" + (f" (hwaccel={hwaccel})" if hwaccel else ""))
|
|
290
290
|
|
|
291
291
|
frames: dict[float, SharedFrame] = {}
|
|
292
292
|
|
|
@@ -99,7 +99,7 @@ class FrameExtractor:
|
|
|
99
99
|
pixels = self._width * self._height
|
|
100
100
|
max_dim = max(self._width, self._height)
|
|
101
101
|
if pixels > HIGH_RES_THRESHOLD and max_dim > self.max_dimension:
|
|
102
|
-
logger.info(f"High-res video ({self._width}x{self._height}),
|
|
102
|
+
logger.info(f"High-res video ({self._width}x{self._height}), using FFmpeg decode at {self.max_dimension}px")
|
|
103
103
|
self._use_ffmpeg_decode = True
|
|
104
104
|
# Release opencv capture - we'll use FFmpeg instead
|
|
105
105
|
self.cap.release()
|
|
@@ -279,7 +279,7 @@ class FrameExtractor:
|
|
|
279
279
|
try:
|
|
280
280
|
# Scale filter that maintains aspect ratio
|
|
281
281
|
# scale=W:H:force_original_aspect_ratio=decrease
|
|
282
|
-
scale_filter = f"scale={self.max_dimension}:{self.max_dimension}
|
|
282
|
+
scale_filter = f"scale={self.max_dimension}:{self.max_dimension}:force_original_aspect_ratio=decrease"
|
|
283
283
|
|
|
284
284
|
cmd = [
|
|
285
285
|
"ffmpeg",
|
|
@@ -416,7 +416,7 @@ class SonyExtractor:
|
|
|
416
416
|
total_duration=recording.total_duration,
|
|
417
417
|
file_index=file_index,
|
|
418
418
|
)
|
|
419
|
-
logger.info(f"Detected spanned recording: file {file_index + 1} of {len(recording.clips)},
|
|
419
|
+
logger.info(f"Detected spanned recording: file {file_index + 1} of {len(recording.clips)}, total duration {recording.total_duration:.1f}s")
|
|
420
420
|
|
|
421
421
|
return Metadata(
|
|
422
422
|
duration=base_metadata.duration,
|
|
@@ -377,7 +377,7 @@ def analyze_motion(
|
|
|
377
377
|
total_flow_time += time.perf_counter() - flow_start
|
|
378
378
|
|
|
379
379
|
# Log timing breakdown
|
|
380
|
-
logger.info(f"Motion analysis timing: decode={total_load_time:.2f}s,
|
|
380
|
+
logger.info(f"Motion analysis timing: decode={total_load_time:.2f}s, optical_flow={total_flow_time:.2f}s, frames={global_frame_idx}")
|
|
381
381
|
|
|
382
382
|
if not frame_motions:
|
|
383
383
|
return MotionAnalysis(
|
|
@@ -693,7 +693,7 @@ def get_adaptive_timestamps(
|
|
|
693
693
|
motion.duration * 0.5,
|
|
694
694
|
motion.duration * 0.85,
|
|
695
695
|
]
|
|
696
|
-
logger.info(f"Stable video optimization: {len(timestamps)} frames only
|
|
696
|
+
logger.info(f"Stable video optimization: {len(timestamps)} frames only (avg_intensity={motion.avg_intensity:.1f})")
|
|
697
697
|
return timestamps
|
|
698
698
|
|
|
699
699
|
if motion.is_stable:
|
|
@@ -704,7 +704,7 @@ def get_adaptive_timestamps(
|
|
|
704
704
|
else:
|
|
705
705
|
step = motion.duration / (num_samples + 1)
|
|
706
706
|
timestamps = [step * (i + 1) for i in range(num_samples)]
|
|
707
|
-
logger.info(f"Stable video: {len(timestamps)} frames
|
|
707
|
+
logger.info(f"Stable video: {len(timestamps)} frames (avg_intensity={motion.avg_intensity:.1f})")
|
|
708
708
|
return timestamps
|
|
709
709
|
|
|
710
710
|
if not motion.segments:
|
|
@@ -753,7 +753,7 @@ def get_adaptive_timestamps(
|
|
|
753
753
|
# Ensure timestamps are within video bounds
|
|
754
754
|
timestamps = [max(0.1, min(t, motion.duration - 0.1)) for t in timestamps]
|
|
755
755
|
|
|
756
|
-
logger.info(f"Adaptive sampling: {len(timestamps)} frames
|
|
756
|
+
logger.info(f"Adaptive sampling: {len(timestamps)} frames (avg_intensity={motion.avg_intensity:.1f}, stable={motion.is_stable})")
|
|
757
757
|
|
|
758
758
|
return timestamps
|
|
759
759
|
|
|
@@ -157,7 +157,7 @@ def extract_objects(
|
|
|
157
157
|
# Deduplicate - track unique objects
|
|
158
158
|
unique_detections, summary = _deduplicate_objects(raw_detections)
|
|
159
159
|
|
|
160
|
-
logger.info(f"Detected {len(raw_detections)} objects,
|
|
160
|
+
logger.info(f"Detected {len(raw_detections)} objects, {len(unique_detections)} unique across {len(summary)} types")
|
|
161
161
|
|
|
162
162
|
return ObjectsResult(
|
|
163
163
|
summary=summary,
|