media-engine 0.2.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {media_engine-0.2.0 → media_engine-0.2.1}/PKG-INFO +1 -1
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/_version.py +2 -2
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/objects_qwen.py +123 -51
- {media_engine-0.2.0 → media_engine-0.2.1}/.github/workflows/ci.yml +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/.github/workflows/release.yml +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/.gitignore +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/API.md +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/CLAUDE.md +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/Dockerfile +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/Dockerfile.cuda +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/Dockerfile.mlx +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/LICENSE +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/MANIFEST.in +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/README.md +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/cli/clip.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/cli/faces.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/cli/metadata.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/cli/motion.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/cli/objects.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/cli/ocr.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/cli/scenes.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/cli/telemetry.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/cli/transcript.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/demo/index.html +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/demo/run.sh +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/demo/server.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/docker-compose.yml +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/__init__.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/app.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/batch/__init__.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/batch/models.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/batch/processor.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/batch/queue.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/batch/state.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/batch/timing.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/cli.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/config.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/__init__.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/clip.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/faces.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/frame_buffer.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/frames.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/metadata/__init__.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/metadata/apple.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/metadata/arri.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/metadata/avchd.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/metadata/avchd_gps.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/metadata/base.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/metadata/blackmagic.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/metadata/camera_360.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/metadata/canon.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/metadata/dji.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/metadata/dv.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/metadata/ffmpeg.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/metadata/generic.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/metadata/gopro.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/metadata/red.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/metadata/registry.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/metadata/sony.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/metadata/tesla.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/motion.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/objects.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/ocr.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/scenes.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/shot_type.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/telemetry.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/transcribe.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/translate.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/extractors/vad.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/main.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/py.typed +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/routers/__init__.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/routers/batch.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/routers/health.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/routers/models.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/routers/settings.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/routers/utils.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/schemas.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/utils/__init__.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/utils/logging.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/media_engine/utils/memory.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/pyproject.toml +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/tests/__init__.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/tests/conftest.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/tests/stress_test.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/tests/test_api.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/tests/test_batch_api.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/tests/test_clip.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/tests/test_dependencies.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/tests/test_faces.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/tests/test_memory.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/tests/test_metadata.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/tests/test_model_lifecycle.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/tests/test_objects.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/tests/test_ocr.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/tests/test_scenes.py +0 -0
- {media_engine-0.2.0 → media_engine-0.2.1}/tests/test_transcribe.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: media-engine
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: AI-powered video extraction API for metadata, transcripts, faces, scenes, objects, and more
|
|
5
5
|
Project-URL: Repository, https://github.com/thetrainroom/media-engine
|
|
6
6
|
Project-URL: Issues, https://github.com/thetrainroom/media-engine/issues
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.2.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 2,
|
|
31
|
+
__version__ = version = '0.2.1'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 2, 1)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -237,24 +237,18 @@ def _get_qwen_model(
|
|
|
237
237
|
|
|
238
238
|
def _build_analysis_prompt(context: dict[str, str] | None = None) -> str:
|
|
239
239
|
"""Build the analysis prompt, optionally including context."""
|
|
240
|
-
base_prompt = """
|
|
240
|
+
base_prompt = """Describe what you see in this image. List main objects and write a short description.
|
|
241
241
|
|
|
242
|
-
|
|
242
|
+
JSON format:
|
|
243
|
+
{"objects": ["object1", "object2"], "description": "scene description"}
|
|
243
244
|
|
|
244
|
-
|
|
245
|
-
{"objects": ["
|
|
246
|
-
|
|
247
|
-
Rules for objects:
|
|
248
|
-
- Be specific: "scissors" not "tool", "laptop" not "device"
|
|
249
|
-
- Include people as "person" or "man"/"woman"
|
|
250
|
-
- Only list clearly visible objects
|
|
245
|
+
If the image is unclear, use:
|
|
246
|
+
{"objects": [], "description": "unknown", "error": "reason why"}
|
|
251
247
|
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
- Mention the setting/environment
|
|
255
|
-
- Keep it to 1-2 sentences
|
|
248
|
+
Example:
|
|
249
|
+
{"objects": ["mountain", "ocean", "lighthouse"], "description": "A lighthouse on a rocky coast with mountains in the background."}
|
|
256
250
|
|
|
257
|
-
Respond with JSON only
|
|
251
|
+
Respond with JSON only. Describe what you CAN see."""
|
|
258
252
|
|
|
259
253
|
if not context:
|
|
260
254
|
return base_prompt
|
|
@@ -312,11 +306,19 @@ IMPORTANT: This location has these nearby landmarks: {nearby_landmarks}
|
|
|
312
306
|
log_instruction = f"""
|
|
313
307
|
NOTE: {log_footage_note}
|
|
314
308
|
- Focus on describing the content and action, not the color grading
|
|
309
|
+
"""
|
|
310
|
+
|
|
311
|
+
# Add topic/activity instruction if provided
|
|
312
|
+
topic = context.get("topic", "") or context.get("activity", "")
|
|
313
|
+
topic_instruction = ""
|
|
314
|
+
if topic:
|
|
315
|
+
topic_instruction = f"""
|
|
316
|
+
IMPORTANT: This video shows "{topic}". Use this context to interpret the action.
|
|
315
317
|
"""
|
|
316
318
|
|
|
317
319
|
# Enhanced prompt with context
|
|
318
320
|
return f"""{context_section}
|
|
319
|
-
{person_instruction}{landmark_instruction}{log_instruction}
|
|
321
|
+
{person_instruction}{landmark_instruction}{log_instruction}{topic_instruction}
|
|
320
322
|
Look at this image carefully and describe what you see.
|
|
321
323
|
|
|
322
324
|
You MUST respond with ONLY this exact JSON format:
|
|
@@ -379,6 +381,7 @@ def _build_batch_prompt(
|
|
|
379
381
|
|
|
380
382
|
# Build context section if available
|
|
381
383
|
context_section = ""
|
|
384
|
+
topic_hint = ""
|
|
382
385
|
if context:
|
|
383
386
|
context_lines = ["Known context about this video:"]
|
|
384
387
|
labels = {
|
|
@@ -386,6 +389,7 @@ def _build_batch_prompt(
|
|
|
386
389
|
"location": "Location",
|
|
387
390
|
"nearby_landmarks": "Nearby landmarks/POIs",
|
|
388
391
|
"activity": "Activity",
|
|
392
|
+
"topic": "Activity/Subject",
|
|
389
393
|
"language": "Language spoken",
|
|
390
394
|
"device": "Filmed with",
|
|
391
395
|
}
|
|
@@ -393,28 +397,34 @@ def _build_batch_prompt(
|
|
|
393
397
|
if value and key not in ("log_footage_note", "color_transfer"):
|
|
394
398
|
label = labels.get(key, key.replace("_", " ").title())
|
|
395
399
|
context_lines.append(f"- {label}: {value}")
|
|
400
|
+
# Capture topic for special instruction
|
|
401
|
+
if key in ("topic", "activity") and value:
|
|
402
|
+
topic_hint = value
|
|
396
403
|
context_section = "\n".join(context_lines) + "\n\n"
|
|
397
404
|
|
|
398
405
|
person_instruction = ""
|
|
399
406
|
if person_name:
|
|
400
407
|
person_instruction = f'Use "{person_name}" instead of "person" in objects and description.\n'
|
|
401
408
|
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
2. What ACTION or movement occurs across the frames?
|
|
407
|
-
3. How does the scene change from first to last frame?
|
|
409
|
+
# Add topic instruction if provided
|
|
410
|
+
topic_instruction = ""
|
|
411
|
+
if topic_hint:
|
|
412
|
+
topic_instruction = f'IMPORTANT: This video shows "{topic_hint}". Use this context to interpret what you see.\n'
|
|
408
413
|
|
|
409
|
-
|
|
410
|
-
{{
|
|
414
|
+
return f"""{context_section}These {num_frames} frames are from a video.
|
|
415
|
+
{person_instruction}{topic_instruction}
|
|
416
|
+
Describe what you see. List main objects and write a short description.
|
|
411
417
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
- Describe the ACTION that unfolds across frames (e.g., "person walks toward camera", "car turns left")
|
|
415
|
-
- Keep description to 1-2 sentences summarizing the sequence
|
|
418
|
+
JSON format:
|
|
419
|
+
{{"objects": ["object1", "object2"], "action": "what is happening", "description": "scene description"}}
|
|
416
420
|
|
|
417
|
-
|
|
421
|
+
If the image is unclear or you cannot identify content, use:
|
|
422
|
+
{{"objects": [], "action": "unknown", "description": "unknown", "error": "reason why"}}
|
|
423
|
+
|
|
424
|
+
Example:
|
|
425
|
+
{{"objects": ["bus", "road", "mountain"], "action": "bus driving", "description": "A bus on a coastal road with mountains."}}
|
|
426
|
+
|
|
427
|
+
Respond with JSON only. Describe what you CAN see, even if partial."""
|
|
418
428
|
|
|
419
429
|
|
|
420
430
|
def _build_batch_context_prompt(
|
|
@@ -862,9 +872,16 @@ def _analyze_frames_batch_context(
|
|
|
862
872
|
|
|
863
873
|
# Build content with all images in the batch
|
|
864
874
|
content: list[dict[str, str]] = []
|
|
865
|
-
for frame_path,
|
|
875
|
+
for frame_path, ts in batch:
|
|
876
|
+
# Verify frame exists and log size
|
|
877
|
+
if os.path.exists(frame_path):
|
|
878
|
+
size_kb = os.path.getsize(frame_path) / 1024
|
|
879
|
+
logger.info(f"Batch frame {ts:.1f}s: {size_kb:.1f}KB")
|
|
880
|
+
else:
|
|
881
|
+
logger.warning(f"Batch frame missing: {frame_path}")
|
|
866
882
|
content.append({"type": "image", "image": f"file://{frame_path}"})
|
|
867
883
|
content.append({"type": "text", "text": prompt})
|
|
884
|
+
logger.info(f"Batch {batch_idx + 1}: sending {len(batch)} images to Qwen")
|
|
868
885
|
|
|
869
886
|
messages = [{"role": "user", "content": content}]
|
|
870
887
|
|
|
@@ -942,9 +959,18 @@ def _fix_malformed_json(text: str) -> str:
|
|
|
942
959
|
# Remove markdown code blocks
|
|
943
960
|
text = text.replace("```json", "").replace("```", "").strip()
|
|
944
961
|
|
|
962
|
+
# Remove invalid control characters (keep newlines and tabs for readability)
|
|
963
|
+
# Control chars are 0x00-0x1F except \t (0x09), \n (0x0A), \r (0x0D)
|
|
964
|
+
text = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f]", "", text)
|
|
965
|
+
|
|
945
966
|
# Fix escaped quotes before colons: "action\": -> "action":
|
|
946
967
|
text = text.replace('\\":', '":')
|
|
947
968
|
|
|
969
|
+
# Fix markdown bold in JSON keys: "action**: -> "action":
|
|
970
|
+
# Model sometimes outputs "key**: "value" instead of "key": "value"
|
|
971
|
+
text = re.sub(r'"\*+:', '":', text)
|
|
972
|
+
text = re.sub(r"(\w)\*+:", r'\1":', text) # action**: -> action":
|
|
973
|
+
|
|
948
974
|
# Replace single quotes with double quotes for keys and string values
|
|
949
975
|
# But be careful not to replace apostrophes within words
|
|
950
976
|
# First, handle keys: 'key': -> "key":
|
|
@@ -1009,6 +1035,11 @@ def _parse_batch_response(response: str) -> tuple[list[str], str | None]:
|
|
|
1009
1035
|
if isinstance(change, str) and change.strip():
|
|
1010
1036
|
desc_parts.append(f"Change: {change.strip()}")
|
|
1011
1037
|
|
|
1038
|
+
# Check for error field (model couldn't fully analyze)
|
|
1039
|
+
error = data.get("error", "")
|
|
1040
|
+
if isinstance(error, str) and error.strip():
|
|
1041
|
+
logger.warning(f"Qwen reported issue: {error}")
|
|
1042
|
+
|
|
1012
1043
|
if desc_parts:
|
|
1013
1044
|
description = " ".join(desc_parts)
|
|
1014
1045
|
|
|
@@ -1017,6 +1048,33 @@ def _parse_batch_response(response: str) -> tuple[list[str], str | None]:
|
|
|
1017
1048
|
except (json.JSONDecodeError, ValueError) as e:
|
|
1018
1049
|
logger.warning(f"Failed to parse batch JSON from Qwen response: {e}")
|
|
1019
1050
|
|
|
1051
|
+
# Try to extract objects from partial/truncated JSON using regex
|
|
1052
|
+
# Look for "name": "value" patterns in the objects array
|
|
1053
|
+
name_matches = re.findall(r'"name"\s*:\s*"([^"]+)"', response)
|
|
1054
|
+
if name_matches:
|
|
1055
|
+
objects = [n for n in name_matches if len(n) < 100 and n.strip()]
|
|
1056
|
+
logger.info(f"Extracted {len(objects)} objects from partial JSON: {objects}")
|
|
1057
|
+
if objects:
|
|
1058
|
+
return objects, None
|
|
1059
|
+
|
|
1060
|
+
# Look for simple string arrays: ["item1", "item2"]
|
|
1061
|
+
array_match = re.search(r'"objects"\s*:\s*\[([^\]]*)', response)
|
|
1062
|
+
if array_match:
|
|
1063
|
+
items = re.findall(r'"([^"]+)"', array_match.group(1))
|
|
1064
|
+
objects = [i for i in items if len(i) < 100 and i.strip() and i not in ("name", "color", "location")]
|
|
1065
|
+
if objects:
|
|
1066
|
+
logger.info(f"Extracted {len(objects)} objects from array: {objects}")
|
|
1067
|
+
|
|
1068
|
+
# Try to extract description from malformed JSON
|
|
1069
|
+
desc_match = re.search(r'"description["\*]*\s*:\s*"([^"]+)"', response)
|
|
1070
|
+
if desc_match:
|
|
1071
|
+
description = desc_match.group(1).strip()
|
|
1072
|
+
logger.info(f"Extracted description from partial JSON: {description}")
|
|
1073
|
+
return objects, description
|
|
1074
|
+
|
|
1075
|
+
if objects:
|
|
1076
|
+
return objects, None
|
|
1077
|
+
|
|
1020
1078
|
# Fallback to standard parser
|
|
1021
1079
|
return _parse_objects_and_description(response)
|
|
1022
1080
|
|
|
@@ -1061,7 +1119,7 @@ def extract_objects_qwen(
|
|
|
1061
1119
|
Returns:
|
|
1062
1120
|
ObjectsResult with detected objects and contextual descriptions
|
|
1063
1121
|
"""
|
|
1064
|
-
logger.info(f"extract_objects_qwen called: file={file_path},
|
|
1122
|
+
logger.info(f"extract_objects_qwen called: file={file_path}, lut_path={lut_path}, timestamps={timestamps}")
|
|
1065
1123
|
|
|
1066
1124
|
settings = get_settings()
|
|
1067
1125
|
# Resolve model name (handles "auto")
|
|
@@ -1094,24 +1152,22 @@ def extract_objects_qwen(
|
|
|
1094
1152
|
else:
|
|
1095
1153
|
context = context.copy() # Don't modify the original
|
|
1096
1154
|
|
|
1097
|
-
if
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
)
|
|
1108
|
-
logger.info(f"Added log footage context hint (no LUT, color_transfer={color_transfer})")
|
|
1155
|
+
# Determine if we need auto-normalization (LOG footage without LUT)
|
|
1156
|
+
has_lut = lut_path and os.path.exists(lut_path)
|
|
1157
|
+
auto_normalize = is_log_footage and not has_lut
|
|
1158
|
+
|
|
1159
|
+
if has_lut:
|
|
1160
|
+
# LUT applied - colors are corrected
|
|
1161
|
+
logger.info(f"LOG footage detected, applying LUT: {lut_path}")
|
|
1162
|
+
elif auto_normalize:
|
|
1163
|
+
# LOG detected, no LUT - will apply auto-normalization
|
|
1164
|
+
logger.info(f"LOG footage detected ({color_transfer}), applying auto-normalization")
|
|
1109
1165
|
|
|
1110
1166
|
# IMPORTANT: Extract frames BEFORE loading the model!
|
|
1111
1167
|
# ffmpeg can crash (SIGABRT) when forked from a process with MPS/Metal loaded.
|
|
1112
1168
|
if progress_callback:
|
|
1113
1169
|
progress_callback("Extracting frames...", None, None)
|
|
1114
|
-
frame_paths = _extract_frames_at_timestamps(file_path, temp_dir, timestamps, lut_path=lut_path)
|
|
1170
|
+
frame_paths = _extract_frames_at_timestamps(file_path, temp_dir, timestamps, lut_path=lut_path, auto_normalize=auto_normalize)
|
|
1115
1171
|
total_frames = len([p for p in frame_paths if p])
|
|
1116
1172
|
|
|
1117
1173
|
if total_frames == 0:
|
|
@@ -1201,6 +1257,7 @@ def _extract_frames_at_timestamps(
|
|
|
1201
1257
|
timestamps: list[float],
|
|
1202
1258
|
max_width: int = 1280,
|
|
1203
1259
|
lut_path: str | None = None,
|
|
1260
|
+
auto_normalize: bool = False,
|
|
1204
1261
|
) -> list[str]:
|
|
1205
1262
|
"""Extract frames at specific timestamps, resized for VLM inference.
|
|
1206
1263
|
|
|
@@ -1214,6 +1271,8 @@ def _extract_frames_at_timestamps(
|
|
|
1214
1271
|
timestamps: List of timestamps to extract (in seconds)
|
|
1215
1272
|
max_width: Maximum width for scaling (default 1280)
|
|
1216
1273
|
lut_path: Optional path to a .cube LUT file for color correction
|
|
1274
|
+
auto_normalize: If True and no LUT, apply automatic color normalization
|
|
1275
|
+
for LOG footage (boosts contrast and saturation)
|
|
1217
1276
|
"""
|
|
1218
1277
|
import subprocess
|
|
1219
1278
|
|
|
@@ -1223,16 +1282,28 @@ def _extract_frames_at_timestamps(
|
|
|
1223
1282
|
|
|
1224
1283
|
logger.info(f"Extracting {len(timestamps)} frames from {file_path} at timestamps {timestamps}")
|
|
1225
1284
|
|
|
1226
|
-
#
|
|
1227
|
-
|
|
1228
|
-
|
|
1285
|
+
# Use ffmpeg with color correction if LUT provided OR auto-normalize requested
|
|
1286
|
+
use_ffmpeg_color = (lut_path and os.path.exists(lut_path)) or auto_normalize
|
|
1287
|
+
|
|
1288
|
+
if use_ffmpeg_color:
|
|
1289
|
+
# Build color correction filter
|
|
1290
|
+
if lut_path and os.path.exists(lut_path):
|
|
1291
|
+
logger.info(f"Applying LUT: {lut_path}")
|
|
1292
|
+
color_filter = f"lut3d='{lut_path}'"
|
|
1293
|
+
else:
|
|
1294
|
+
# Auto-normalize for LOG footage: apply S-curve + saturation boost
|
|
1295
|
+
# This converts flat LOG footage to a more viewable range for VLM analysis
|
|
1296
|
+
# curves: S-curve to add contrast (lift shadows, compress highlights)
|
|
1297
|
+
# eq: boost saturation since LOG footage is very desaturated
|
|
1298
|
+
logger.info("Applying auto-normalization for LOG footage (no LUT configured)")
|
|
1299
|
+
color_filter = "curves=master='0/0 0.15/0.30 0.5/0.5 0.85/0.70 1/1',eq=saturation=1.4:contrast=1.1"
|
|
1300
|
+
|
|
1229
1301
|
for i, ts in enumerate(timestamps):
|
|
1230
1302
|
output_path = os.path.join(output_dir, f"frame_{i:04d}.jpg")
|
|
1231
1303
|
try:
|
|
1232
|
-
# Build filter chain:
|
|
1304
|
+
# Build filter chain: color correction + scale
|
|
1233
1305
|
scale_filter = f"scale={max_width}:{max_width}:force_original_aspect_ratio=decrease"
|
|
1234
|
-
|
|
1235
|
-
vf = f"{lut_filter},{scale_filter}"
|
|
1306
|
+
vf = f"{color_filter},{scale_filter}"
|
|
1236
1307
|
|
|
1237
1308
|
cmd = [
|
|
1238
1309
|
"ffmpeg",
|
|
@@ -1255,9 +1326,10 @@ def _extract_frames_at_timestamps(
|
|
|
1255
1326
|
|
|
1256
1327
|
if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
|
|
1257
1328
|
frame_paths.append(output_path)
|
|
1258
|
-
|
|
1329
|
+
correction_type = "LUT" if (lut_path and os.path.exists(lut_path)) else "auto-normalized"
|
|
1330
|
+
logger.info(f"Extracted frame {i} at {ts:.2f}s ({correction_type}): {output_path}")
|
|
1259
1331
|
else:
|
|
1260
|
-
logger.warning(f"Frame at {ts:.2f}s: could not extract with
|
|
1332
|
+
logger.warning(f"Frame at {ts:.2f}s: could not extract with color correction")
|
|
1261
1333
|
frame_paths.append("")
|
|
1262
1334
|
except subprocess.CalledProcessError as e:
|
|
1263
1335
|
logger.warning(f"Frame at {ts:.2f}s: ffmpeg failed: {e}")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|