media-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. cli/clip.py +79 -0
  2. cli/faces.py +91 -0
  3. cli/metadata.py +68 -0
  4. cli/motion.py +77 -0
  5. cli/objects.py +94 -0
  6. cli/ocr.py +93 -0
  7. cli/scenes.py +57 -0
  8. cli/telemetry.py +65 -0
  9. cli/transcript.py +76 -0
  10. media_engine/__init__.py +7 -0
  11. media_engine/_version.py +34 -0
  12. media_engine/app.py +80 -0
  13. media_engine/batch/__init__.py +56 -0
  14. media_engine/batch/models.py +99 -0
  15. media_engine/batch/processor.py +1131 -0
  16. media_engine/batch/queue.py +232 -0
  17. media_engine/batch/state.py +30 -0
  18. media_engine/batch/timing.py +321 -0
  19. media_engine/cli.py +17 -0
  20. media_engine/config.py +674 -0
  21. media_engine/extractors/__init__.py +75 -0
  22. media_engine/extractors/clip.py +401 -0
  23. media_engine/extractors/faces.py +459 -0
  24. media_engine/extractors/frame_buffer.py +351 -0
  25. media_engine/extractors/frames.py +402 -0
  26. media_engine/extractors/metadata/__init__.py +127 -0
  27. media_engine/extractors/metadata/apple.py +169 -0
  28. media_engine/extractors/metadata/arri.py +118 -0
  29. media_engine/extractors/metadata/avchd.py +208 -0
  30. media_engine/extractors/metadata/avchd_gps.py +270 -0
  31. media_engine/extractors/metadata/base.py +688 -0
  32. media_engine/extractors/metadata/blackmagic.py +139 -0
  33. media_engine/extractors/metadata/camera_360.py +276 -0
  34. media_engine/extractors/metadata/canon.py +290 -0
  35. media_engine/extractors/metadata/dji.py +371 -0
  36. media_engine/extractors/metadata/dv.py +121 -0
  37. media_engine/extractors/metadata/ffmpeg.py +76 -0
  38. media_engine/extractors/metadata/generic.py +119 -0
  39. media_engine/extractors/metadata/gopro.py +256 -0
  40. media_engine/extractors/metadata/red.py +305 -0
  41. media_engine/extractors/metadata/registry.py +114 -0
  42. media_engine/extractors/metadata/sony.py +442 -0
  43. media_engine/extractors/metadata/tesla.py +157 -0
  44. media_engine/extractors/motion.py +765 -0
  45. media_engine/extractors/objects.py +245 -0
  46. media_engine/extractors/objects_qwen.py +754 -0
  47. media_engine/extractors/ocr.py +268 -0
  48. media_engine/extractors/scenes.py +82 -0
  49. media_engine/extractors/shot_type.py +217 -0
  50. media_engine/extractors/telemetry.py +262 -0
  51. media_engine/extractors/transcribe.py +579 -0
  52. media_engine/extractors/translate.py +121 -0
  53. media_engine/extractors/vad.py +263 -0
  54. media_engine/main.py +68 -0
  55. media_engine/py.typed +0 -0
  56. media_engine/routers/__init__.py +15 -0
  57. media_engine/routers/batch.py +78 -0
  58. media_engine/routers/health.py +93 -0
  59. media_engine/routers/models.py +211 -0
  60. media_engine/routers/settings.py +87 -0
  61. media_engine/routers/utils.py +135 -0
  62. media_engine/schemas.py +581 -0
  63. media_engine/utils/__init__.py +5 -0
  64. media_engine/utils/logging.py +54 -0
  65. media_engine/utils/memory.py +49 -0
  66. media_engine-0.1.0.dist-info/METADATA +276 -0
  67. media_engine-0.1.0.dist-info/RECORD +70 -0
  68. media_engine-0.1.0.dist-info/WHEEL +4 -0
  69. media_engine-0.1.0.dist-info/entry_points.txt +11 -0
  70. media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,211 @@
1
+ """Model checking endpoints."""
2
+
3
+ import logging
4
+ import threading
5
+ import time
6
+ import uuid
7
+
8
+ from fastapi import APIRouter, HTTPException
9
+
10
+ from media_engine.config import get_free_memory_gb
11
+ from media_engine.extractors import (
12
+ unload_clip_model,
13
+ unload_face_model,
14
+ unload_qwen_model,
15
+ unload_whisper_model,
16
+ unload_yolo_model,
17
+ )
18
+
19
+ router = APIRouter(tags=["models"])
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # Store for model check results
23
+ _model_check_results: dict[str, dict] = {}
24
+ _model_check_status: dict[str, str] = {} # "running", "complete", "error"
25
+
26
+
27
+ def _run_model_checks(check_id: str) -> None:
28
+ """Background task to check which models can load."""
29
+ from media_engine.extractors.clip import get_clip_backend
30
+ from media_engine.extractors.objects_qwen import _get_qwen_model
31
+
32
+ results: dict[str, dict] = {}
33
+ _model_check_status[check_id] = "running"
34
+
35
+ try:
36
+ # Test Qwen 2B
37
+ logger.info("Testing Qwen 2B model...")
38
+ start = time.time()
39
+ try:
40
+ _get_qwen_model("Qwen/Qwen2-VL-2B-Instruct")
41
+ results["qwen_2b"] = {
42
+ "canLoad": True,
43
+ "error": None,
44
+ "loadTimeSeconds": round(time.time() - start, 1),
45
+ }
46
+ unload_qwen_model()
47
+ except Exception as e:
48
+ results["qwen_2b"] = {
49
+ "canLoad": False,
50
+ "error": str(e),
51
+ "loadTimeSeconds": round(time.time() - start, 1),
52
+ }
53
+
54
+ # Test Whisper large-v3
55
+ logger.info("Testing Whisper large-v3 model...")
56
+ start = time.time()
57
+ try:
58
+ from media_engine.config import has_cuda, is_apple_silicon
59
+
60
+ if is_apple_silicon():
61
+ # Create a tiny silent audio file to test model loading
62
+ import tempfile
63
+ import wave
64
+
65
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
66
+ temp_path = f.name
67
+ # Write a minimal valid WAV file (0.1 second of silence)
68
+ with wave.open(f.name, "w") as wav:
69
+ wav.setnchannels(1)
70
+ wav.setsampwidth(2)
71
+ wav.setframerate(16000)
72
+ wav.writeframes(b"\x00" * 3200) # 0.1s of silence
73
+
74
+ try:
75
+ import mlx_whisper # type: ignore[import-not-found]
76
+
77
+ # This will load the model and transcribe the silent audio
78
+ mlx_whisper.transcribe(
79
+ temp_path,
80
+ path_or_hf_repo="mlx-community/whisper-large-v3-mlx",
81
+ )
82
+ finally:
83
+ import os
84
+
85
+ os.unlink(temp_path)
86
+ elif has_cuda():
87
+ from faster_whisper import WhisperModel # type: ignore[import-not-found]
88
+
89
+ WhisperModel("large-v3", device="cuda")
90
+ else:
91
+ import whisper # type: ignore[import-not-found]
92
+
93
+ whisper.load_model("large-v3")
94
+ results["whisper_large"] = {
95
+ "canLoad": True,
96
+ "error": None,
97
+ "loadTimeSeconds": round(time.time() - start, 1),
98
+ }
99
+ unload_whisper_model()
100
+ except Exception as e:
101
+ results["whisper_large"] = {
102
+ "canLoad": False,
103
+ "error": str(e),
104
+ "loadTimeSeconds": round(time.time() - start, 1),
105
+ }
106
+
107
+ # Test CLIP
108
+ logger.info("Testing CLIP model...")
109
+ start = time.time()
110
+ try:
111
+ get_clip_backend()
112
+ results["clip"] = {
113
+ "canLoad": True,
114
+ "error": None,
115
+ "loadTimeSeconds": round(time.time() - start, 1),
116
+ }
117
+ unload_clip_model()
118
+ except Exception as e:
119
+ results["clip"] = {
120
+ "canLoad": False,
121
+ "error": str(e),
122
+ "loadTimeSeconds": round(time.time() - start, 1),
123
+ }
124
+
125
+ # Test YOLO
126
+ logger.info("Testing YOLO model...")
127
+ start = time.time()
128
+ try:
129
+ from ultralytics import YOLO # type: ignore[import-not-found]
130
+
131
+ YOLO("yolov8m.pt")
132
+ results["yolo"] = {
133
+ "canLoad": True,
134
+ "error": None,
135
+ "loadTimeSeconds": round(time.time() - start, 1),
136
+ }
137
+ unload_yolo_model()
138
+ except Exception as e:
139
+ results["yolo"] = {
140
+ "canLoad": False,
141
+ "error": str(e),
142
+ "loadTimeSeconds": round(time.time() - start, 1),
143
+ }
144
+
145
+ # Test Face detection (DeepFace)
146
+ logger.info("Testing Face detection model...")
147
+ start = time.time()
148
+ try:
149
+ from deepface import DeepFace # type: ignore[import-not-found]
150
+
151
+ DeepFace.build_model("Facenet")
152
+ results["faces"] = {
153
+ "canLoad": True,
154
+ "error": None,
155
+ "loadTimeSeconds": round(time.time() - start, 1),
156
+ }
157
+ unload_face_model()
158
+ except Exception as e:
159
+ results["faces"] = {
160
+ "canLoad": False,
161
+ "error": str(e),
162
+ "loadTimeSeconds": round(time.time() - start, 1),
163
+ }
164
+
165
+ _model_check_results[check_id] = {
166
+ "results": results,
167
+ "freeMemoryGb": get_free_memory_gb(),
168
+ }
169
+ _model_check_status[check_id] = "complete"
170
+ logger.info(f"Model check {check_id} complete: {results}")
171
+
172
+ except Exception as e:
173
+ logger.error(f"Model check {check_id} failed: {e}")
174
+ _model_check_status[check_id] = "error"
175
+ _model_check_results[check_id] = {"error": str(e)}
176
+
177
+
178
+ @router.post("/check-models")
179
+ async def start_model_check():
180
+ """Start checking which models can actually load.
181
+
182
+ Returns immediately with a check_id. Poll GET /check-models/{check_id} for results.
183
+ Takes 30-60 seconds to complete.
184
+ """
185
+ check_id = str(uuid.uuid4())[:8]
186
+
187
+ # Start background thread
188
+ thread = threading.Thread(target=_run_model_checks, args=(check_id,), daemon=True)
189
+ thread.start()
190
+
191
+ return {"check_id": check_id, "status": "running"}
192
+
193
+
194
+ @router.get("/check-models/{check_id}")
195
+ async def get_model_check_result(check_id: str):
196
+ """Get the result of a model check.
197
+
198
+ Returns status: "running", "complete", or "error".
199
+ When complete, includes models dict with load results.
200
+ """
201
+ status = _model_check_status.get(check_id, "not_found")
202
+
203
+ if status == "not_found":
204
+ raise HTTPException(status_code=404, detail=f"Check ID {check_id} not found")
205
+
206
+ if status == "running":
207
+ return {"check_id": check_id, "status": "running"}
208
+
209
+ # Complete or error - return results
210
+ result = _model_check_results.get(check_id, {})
211
+ return {"check_id": check_id, "status": status, **result}
@@ -0,0 +1,87 @@
1
+ """Settings endpoints."""
2
+
3
+ import logging
4
+
5
+ from fastapi import APIRouter
6
+
7
+ from media_engine.config import get_settings, reload_settings, save_config_to_file
8
+ from media_engine.schemas import SettingsResponse, SettingsUpdate
9
+
10
+ router = APIRouter(tags=["settings"])
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ @router.get("/settings", response_model=SettingsResponse)
15
+ async def get_settings_endpoint():
16
+ """Get current settings.
17
+
18
+ Returns all settings with sensitive values (like hf_token) masked.
19
+ """
20
+ settings = get_settings()
21
+ return SettingsResponse(
22
+ api_version=settings.api_version,
23
+ log_level=settings.log_level,
24
+ whisper_model=settings.whisper_model,
25
+ fallback_language=settings.fallback_language,
26
+ hf_token_set=bool(settings.hf_token),
27
+ diarization_model=settings.diarization_model,
28
+ face_sample_fps=settings.face_sample_fps,
29
+ object_sample_fps=settings.object_sample_fps,
30
+ min_face_size=settings.min_face_size,
31
+ object_detector=settings.object_detector,
32
+ qwen_model=settings.qwen_model,
33
+ qwen_frames_per_scene=settings.qwen_frames_per_scene,
34
+ yolo_model=settings.yolo_model,
35
+ clip_model=settings.clip_model,
36
+ ocr_languages=settings.ocr_languages,
37
+ temp_dir=settings.temp_dir,
38
+ )
39
+
40
+
41
+ @router.put("/settings", response_model=SettingsResponse)
42
+ async def update_settings(update: SettingsUpdate):
43
+ """Update settings.
44
+
45
+ Only provided fields are updated. Changes are persisted to config file.
46
+ Set hf_token to empty string to clear it.
47
+ """
48
+ settings = get_settings()
49
+
50
+ # Update only provided fields
51
+ update_data = update.model_dump(exclude_unset=True)
52
+
53
+ for field, value in update_data.items():
54
+ if field == "hf_token":
55
+ # Allow clearing token with empty string
56
+ if value == "":
57
+ value = None
58
+ setattr(settings, field, value)
59
+ else:
60
+ setattr(settings, field, value)
61
+
62
+ # Save to config file
63
+ save_config_to_file(settings)
64
+
65
+ # Reload to ensure consistency
66
+ new_settings = reload_settings()
67
+
68
+ logger.info(f"Settings updated: {list(update_data.keys())}")
69
+
70
+ return SettingsResponse(
71
+ api_version=new_settings.api_version,
72
+ log_level=new_settings.log_level,
73
+ whisper_model=new_settings.whisper_model,
74
+ fallback_language=new_settings.fallback_language,
75
+ hf_token_set=bool(new_settings.hf_token),
76
+ diarization_model=new_settings.diarization_model,
77
+ face_sample_fps=new_settings.face_sample_fps,
78
+ object_sample_fps=new_settings.object_sample_fps,
79
+ min_face_size=new_settings.min_face_size,
80
+ object_detector=new_settings.object_detector,
81
+ qwen_model=new_settings.qwen_model,
82
+ qwen_frames_per_scene=new_settings.qwen_frames_per_scene,
83
+ yolo_model=new_settings.yolo_model,
84
+ clip_model=new_settings.clip_model,
85
+ ocr_languages=new_settings.ocr_languages,
86
+ temp_dir=new_settings.temp_dir,
87
+ )
@@ -0,0 +1,135 @@
1
+ """Utility endpoints."""
2
+
3
+ import logging
4
+ import os
5
+ import signal
6
+ import threading
7
+ import time
8
+
9
+ from fastapi import APIRouter, HTTPException
10
+
11
+ router = APIRouter(tags=["utils"])
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ @router.post("/shutdown")
16
+ async def shutdown_engine():
17
+ """Gracefully shutdown the engine.
18
+
19
+ Call this before killing the process to ensure clean resource cleanup.
20
+ """
21
+ from media_engine.app import cleanup_resources
22
+
23
+ logger.info("Shutdown requested via API")
24
+ cleanup_resources()
25
+
26
+ # Schedule process exit after response is sent
27
+ def delayed_exit():
28
+ time.sleep(0.5)
29
+ os.kill(os.getpid(), signal.SIGTERM)
30
+
31
+ thread = threading.Thread(target=delayed_exit, daemon=True)
32
+ thread.start()
33
+
34
+ return {"status": "shutting_down"}
35
+
36
+
37
+ @router.get("/extractors")
38
+ async def list_extractors():
39
+ """List available extractors and their descriptions."""
40
+ return {
41
+ "extractors": [
42
+ {
43
+ "name": "metadata",
44
+ "description": "Video metadata (duration, resolution, codec, device, GPS)",
45
+ "enable_flag": "enable_metadata",
46
+ },
47
+ {
48
+ "name": "transcript",
49
+ "description": "Audio transcription using Whisper",
50
+ "enable_flag": "enable_transcript",
51
+ },
52
+ {
53
+ "name": "scenes",
54
+ "description": "Scene boundary detection",
55
+ "enable_flag": "enable_scenes",
56
+ },
57
+ {
58
+ "name": "faces",
59
+ "description": "Face detection with embeddings",
60
+ "enable_flag": "enable_faces",
61
+ },
62
+ {
63
+ "name": "objects",
64
+ "description": "Object detection with YOLO (fast, bounding boxes)",
65
+ "enable_flag": "enable_objects",
66
+ },
67
+ {
68
+ "name": "visual",
69
+ "description": "Scene descriptions with Qwen2-VL (slower, richer)",
70
+ "enable_flag": "enable_visual",
71
+ },
72
+ {
73
+ "name": "clip",
74
+ "description": "CLIP visual embeddings per scene",
75
+ "enable_flag": "enable_clip",
76
+ },
77
+ {
78
+ "name": "ocr",
79
+ "description": "Text extraction from video frames",
80
+ "enable_flag": "enable_ocr",
81
+ },
82
+ {
83
+ "name": "telemetry",
84
+ "description": "GPS/flight path (always extracted automatically)",
85
+ },
86
+ ]
87
+ }
88
+
89
+
90
+ @router.post("/encode_text")
91
+ async def encode_text(request: dict):
92
+ """Encode a text query to a CLIP embedding for text-to-image search.
93
+
94
+ Request body:
95
+ text: str - The text query to encode
96
+ model_name: str (optional) - CLIP model name (e.g., "ViT-B-32")
97
+ translate: bool (optional) - Whether to translate non-English queries to English (default: true)
98
+
99
+ Returns:
100
+ embedding: list[float] - The normalized CLIP embedding (512 or 768 dimensions)
101
+ model: str - The model used for encoding
102
+ original_text: str - The original query text
103
+ translated_text: str - The text that was actually encoded (may be translated)
104
+ detected_language: str | None - Detected language of the original text
105
+ was_translated: bool - Whether the text was translated
106
+ """
107
+ from media_engine.extractors.clip import encode_text_query, get_clip_backend
108
+ from media_engine.extractors.translate import translate_query_for_clip
109
+
110
+ text = request.get("text", "")
111
+ if not text:
112
+ raise HTTPException(status_code=400, detail="Text query is required")
113
+
114
+ model_name = request.get("model_name")
115
+ enable_translation = request.get("translate", True)
116
+
117
+ try:
118
+ # Translate query if needed
119
+ translated_text, detected_lang, was_translated = translate_query_for_clip(text, enable_translation=enable_translation)
120
+
121
+ # Encode the (possibly translated) text
122
+ embedding = encode_text_query(translated_text, model_name)
123
+ backend = get_clip_backend(model_name)
124
+
125
+ return {
126
+ "embedding": embedding,
127
+ "model": backend.get_model_name(),
128
+ "original_text": text,
129
+ "translated_text": translated_text,
130
+ "detected_language": detected_lang,
131
+ "was_translated": was_translated,
132
+ }
133
+ except Exception as e:
134
+ logger.error(f"Text encoding failed: {e}")
135
+ raise HTTPException(status_code=500, detail=str(e))