media-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. cli/clip.py +79 -0
  2. cli/faces.py +91 -0
  3. cli/metadata.py +68 -0
  4. cli/motion.py +77 -0
  5. cli/objects.py +94 -0
  6. cli/ocr.py +93 -0
  7. cli/scenes.py +57 -0
  8. cli/telemetry.py +65 -0
  9. cli/transcript.py +76 -0
  10. media_engine/__init__.py +7 -0
  11. media_engine/_version.py +34 -0
  12. media_engine/app.py +80 -0
  13. media_engine/batch/__init__.py +56 -0
  14. media_engine/batch/models.py +99 -0
  15. media_engine/batch/processor.py +1131 -0
  16. media_engine/batch/queue.py +232 -0
  17. media_engine/batch/state.py +30 -0
  18. media_engine/batch/timing.py +321 -0
  19. media_engine/cli.py +17 -0
  20. media_engine/config.py +674 -0
  21. media_engine/extractors/__init__.py +75 -0
  22. media_engine/extractors/clip.py +401 -0
  23. media_engine/extractors/faces.py +459 -0
  24. media_engine/extractors/frame_buffer.py +351 -0
  25. media_engine/extractors/frames.py +402 -0
  26. media_engine/extractors/metadata/__init__.py +127 -0
  27. media_engine/extractors/metadata/apple.py +169 -0
  28. media_engine/extractors/metadata/arri.py +118 -0
  29. media_engine/extractors/metadata/avchd.py +208 -0
  30. media_engine/extractors/metadata/avchd_gps.py +270 -0
  31. media_engine/extractors/metadata/base.py +688 -0
  32. media_engine/extractors/metadata/blackmagic.py +139 -0
  33. media_engine/extractors/metadata/camera_360.py +276 -0
  34. media_engine/extractors/metadata/canon.py +290 -0
  35. media_engine/extractors/metadata/dji.py +371 -0
  36. media_engine/extractors/metadata/dv.py +121 -0
  37. media_engine/extractors/metadata/ffmpeg.py +76 -0
  38. media_engine/extractors/metadata/generic.py +119 -0
  39. media_engine/extractors/metadata/gopro.py +256 -0
  40. media_engine/extractors/metadata/red.py +305 -0
  41. media_engine/extractors/metadata/registry.py +114 -0
  42. media_engine/extractors/metadata/sony.py +442 -0
  43. media_engine/extractors/metadata/tesla.py +157 -0
  44. media_engine/extractors/motion.py +765 -0
  45. media_engine/extractors/objects.py +245 -0
  46. media_engine/extractors/objects_qwen.py +754 -0
  47. media_engine/extractors/ocr.py +268 -0
  48. media_engine/extractors/scenes.py +82 -0
  49. media_engine/extractors/shot_type.py +217 -0
  50. media_engine/extractors/telemetry.py +262 -0
  51. media_engine/extractors/transcribe.py +579 -0
  52. media_engine/extractors/translate.py +121 -0
  53. media_engine/extractors/vad.py +263 -0
  54. media_engine/main.py +68 -0
  55. media_engine/py.typed +0 -0
  56. media_engine/routers/__init__.py +15 -0
  57. media_engine/routers/batch.py +78 -0
  58. media_engine/routers/health.py +93 -0
  59. media_engine/routers/models.py +211 -0
  60. media_engine/routers/settings.py +87 -0
  61. media_engine/routers/utils.py +135 -0
  62. media_engine/schemas.py +581 -0
  63. media_engine/utils/__init__.py +5 -0
  64. media_engine/utils/logging.py +54 -0
  65. media_engine/utils/memory.py +49 -0
  66. media_engine-0.1.0.dist-info/METADATA +276 -0
  67. media_engine-0.1.0.dist-info/RECORD +70 -0
  68. media_engine-0.1.0.dist-info/WHEEL +4 -0
  69. media_engine-0.1.0.dist-info/entry_points.txt +11 -0
  70. media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,263 @@
1
+ """Voice Activity Detection using WebRTC VAD.
2
+
3
+ Fast detection of speech presence in audio files.
4
+ Used to skip Whisper transcription for silent/ambient clips.
5
+ """
6
+
7
+ import logging
8
+ import subprocess
9
+ import tempfile
10
+ import wave
11
+ from enum import StrEnum
12
+ from pathlib import Path
13
+
14
+ import webrtcvad # type: ignore[import-not-found]
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class AudioContent(StrEnum):
20
+ """Classification of audio content.
21
+
22
+ Simplified categories for UI display:
23
+ - NO_AUDIO: File has no audio track (images, some video files)
24
+ - SPEECH: Audio with speech detected (should run Whisper)
25
+ - AUDIO: Audio present but no speech (ambient/music/silent - skip Whisper)
26
+ - UNKNOWN: Could not determine (extraction failed)
27
+ """
28
+
29
+ NO_AUDIO = "no_audio"
30
+ SPEECH = "speech"
31
+ AUDIO = "audio" # Has audio but no speech (ambient, music, or silent)
32
+ UNKNOWN = "unknown"
33
+
34
+
35
+ def _extract_audio(video_path: str, output_path: str, sample_rate: int = 16000) -> bool:
36
+ """Extract audio from video file using ffmpeg.
37
+
38
+ Args:
39
+ video_path: Path to video file
40
+ output_path: Path for extracted audio (WAV)
41
+ sample_rate: Target sample rate (16000 for VAD)
42
+
43
+ Returns:
44
+ True if extraction succeeded
45
+ """
46
+ cmd = [
47
+ "ffmpeg",
48
+ "-i",
49
+ video_path,
50
+ "-vn", # No video
51
+ "-acodec",
52
+ "pcm_s16le", # 16-bit PCM
53
+ "-ar",
54
+ str(sample_rate), # Sample rate
55
+ "-ac",
56
+ "1", # Mono
57
+ "-y", # Overwrite
58
+ output_path,
59
+ ]
60
+
61
+ try:
62
+ result = subprocess.run(
63
+ cmd,
64
+ capture_output=True,
65
+ timeout=60,
66
+ )
67
+ return result.returncode == 0
68
+ except subprocess.TimeoutExpired:
69
+ logger.warning(f"Audio extraction timed out for {video_path}")
70
+ return False
71
+ except Exception as e:
72
+ logger.warning(f"Audio extraction failed for {video_path}: {e}")
73
+ return False
74
+
75
+
76
+ def _read_wav_frames(
77
+ wav_path: str,
78
+ frame_duration_ms: int = 30,
79
+ max_duration_seconds: float = 120.0,
80
+ ) -> tuple[list[bytes], int, float]:
81
+ """Read WAV file and split into frames for VAD.
82
+
83
+ Args:
84
+ wav_path: Path to WAV file
85
+ frame_duration_ms: Frame duration in milliseconds (10, 20, or 30)
86
+ max_duration_seconds: Maximum audio duration to analyze
87
+
88
+ Returns:
89
+ Tuple of (frames, sample_rate, total_duration)
90
+ """
91
+ with wave.open(wav_path, "rb") as wf:
92
+ sample_rate = wf.getframerate()
93
+ n_channels = wf.getnchannels()
94
+ sample_width = wf.getsampwidth()
95
+
96
+ if sample_rate not in (8000, 16000, 32000, 48000):
97
+ raise ValueError(f"Unsupported sample rate: {sample_rate}")
98
+ if n_channels != 1:
99
+ raise ValueError(f"Expected mono audio, got {n_channels} channels")
100
+ if sample_width != 2:
101
+ raise ValueError(f"Expected 16-bit audio, got {sample_width * 8}-bit")
102
+
103
+ # Calculate frame size
104
+ frame_size = int(sample_rate * frame_duration_ms / 1000) * sample_width
105
+ max_frames = int(max_duration_seconds * 1000 / frame_duration_ms)
106
+
107
+ frames = []
108
+ total_samples = 0
109
+
110
+ while len(frames) < max_frames:
111
+ frame = wf.readframes(int(sample_rate * frame_duration_ms / 1000))
112
+ if len(frame) < frame_size:
113
+ break
114
+ frames.append(frame)
115
+ total_samples += int(sample_rate * frame_duration_ms / 1000)
116
+
117
+ total_duration = total_samples / sample_rate
118
+ return frames, sample_rate, total_duration
119
+
120
+
121
+ def detect_voice_activity(
122
+ file_path: str,
123
+ aggressiveness: int = 2,
124
+ min_speech_duration: float = 0.5,
125
+ sample_limit_seconds: float = 120.0,
126
+ ) -> dict:
127
+ """Detect voice activity in a video/audio file using WebRTC VAD.
128
+
129
+ Args:
130
+ file_path: Path to video or audio file
131
+ aggressiveness: VAD aggressiveness (0-3, higher = less sensitive to speech)
132
+ min_speech_duration: Minimum seconds of speech to classify as "speech"
133
+ sample_limit_seconds: Maximum seconds to analyze (for long files)
134
+
135
+ Returns:
136
+ Dict with:
137
+ - audio_content: AudioContent classification
138
+ - speech_ratio: Percentage of audio that is speech (0.0-1.0)
139
+ - speech_segments: List of (start, end) tuples for speech
140
+ - total_duration: Total audio duration analyzed
141
+ """
142
+ path = Path(file_path)
143
+ if not path.exists():
144
+ logger.error(f"File not found: {file_path}")
145
+ return {
146
+ "audio_content": str(AudioContent.UNKNOWN),
147
+ "speech_ratio": 0.0,
148
+ "speech_segments": [],
149
+ "total_duration": 0.0,
150
+ }
151
+
152
+ # Create VAD instance
153
+ vad = webrtcvad.Vad(aggressiveness)
154
+ frame_duration_ms = 30 # 30ms frames
155
+
156
+ with tempfile.TemporaryDirectory() as tmpdir:
157
+ # Check if it's a video file that needs audio extraction
158
+ video_extensions = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".m4v", ".mxf"}
159
+ audio_extensions = {".wav", ".mp3", ".aac", ".m4a", ".flac", ".ogg"}
160
+
161
+ if path.suffix.lower() in video_extensions:
162
+ audio_path = Path(tmpdir) / "audio.wav"
163
+ if not _extract_audio(file_path, str(audio_path)):
164
+ logger.warning(f"Could not extract audio from {file_path}")
165
+ return {
166
+ "audio_content": str(AudioContent.UNKNOWN),
167
+ "speech_ratio": 0.0,
168
+ "speech_segments": [],
169
+ "total_duration": 0.0,
170
+ }
171
+ wav_path = str(audio_path)
172
+ elif path.suffix.lower() in audio_extensions:
173
+ # Convert to WAV format for webrtcvad
174
+ audio_path = Path(tmpdir) / "audio.wav"
175
+ if not _extract_audio(file_path, str(audio_path)):
176
+ logger.warning(f"Could not convert audio from {file_path}")
177
+ return {
178
+ "audio_content": str(AudioContent.UNKNOWN),
179
+ "speech_ratio": 0.0,
180
+ "speech_segments": [],
181
+ "total_duration": 0.0,
182
+ }
183
+ wav_path = str(audio_path)
184
+ else:
185
+ # Assume it's already a WAV file
186
+ wav_path = file_path
187
+
188
+ # Read audio frames
189
+ try:
190
+ frames, sample_rate, total_duration = _read_wav_frames(
191
+ wav_path,
192
+ frame_duration_ms=frame_duration_ms,
193
+ max_duration_seconds=sample_limit_seconds,
194
+ )
195
+ except Exception as e:
196
+ logger.warning(f"Could not read audio from {wav_path}: {e}")
197
+ return {
198
+ "audio_content": str(AudioContent.UNKNOWN),
199
+ "speech_ratio": 0.0,
200
+ "speech_segments": [],
201
+ "total_duration": 0.0,
202
+ }
203
+
204
+ if not frames:
205
+ logger.warning(f"No audio frames extracted from {file_path}")
206
+ return {
207
+ "audio_content": str(AudioContent.AUDIO), # Has audio track but empty/silent
208
+ "speech_ratio": 0.0,
209
+ "speech_segments": [],
210
+ "total_duration": 0.0,
211
+ }
212
+
213
+ # Analyze each frame
214
+ speech_frames = []
215
+ for frame in frames:
216
+ try:
217
+ is_speech = vad.is_speech(frame, sample_rate)
218
+ speech_frames.append(is_speech)
219
+ except Exception:
220
+ speech_frames.append(False)
221
+
222
+ # Calculate speech statistics
223
+ speech_count = sum(speech_frames)
224
+ total_frames = len(speech_frames)
225
+ speech_ratio = speech_count / total_frames if total_frames > 0 else 0.0
226
+ total_speech_duration = speech_count * frame_duration_ms / 1000
227
+
228
+ # Build speech segments (consecutive speech frames)
229
+ speech_segments = []
230
+ segment_start = None
231
+
232
+ for i, is_speech in enumerate(speech_frames):
233
+ time_sec = i * frame_duration_ms / 1000
234
+ if is_speech and segment_start is None:
235
+ segment_start = time_sec
236
+ elif not is_speech and segment_start is not None:
237
+ speech_segments.append((segment_start, time_sec))
238
+ segment_start = None
239
+
240
+ # Close final segment if needed
241
+ if segment_start is not None:
242
+ speech_segments.append((segment_start, total_duration))
243
+
244
+ # Classify audio content
245
+ if total_speech_duration >= min_speech_duration and speech_ratio > 0.1:
246
+ audio_content = AudioContent.SPEECH
247
+ else:
248
+ # Audio present but no speech detected (silent, ambient, or music)
249
+ audio_content = AudioContent.AUDIO
250
+
251
+ logger.info(f"VAD result for {path.name}: {audio_content} " f"(speech_ratio={speech_ratio:.2%}, duration={total_speech_duration:.1f}s)")
252
+
253
+ return {
254
+ "audio_content": str(audio_content),
255
+ "speech_ratio": round(speech_ratio, 3),
256
+ "speech_segments": speech_segments,
257
+ "total_duration": round(total_duration, 2),
258
+ }
259
+
260
+
261
+ def unload_vad_model():
262
+ """No-op for WebRTC VAD (no model to unload)."""
263
+ pass
media_engine/main.py ADDED
@@ -0,0 +1,68 @@
1
+ """FastAPI application for Media Engine."""
2
+
3
+ # Prevent fork crashes on macOS with Hugging Face tokenizers library.
4
+ # The tokenizers library registers atfork handlers that panic when the process forks
5
+ # (e.g., to run ffmpeg via subprocess). This must be set BEFORE any imports.
6
+ import os
7
+
8
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
9
+
10
+ # On macOS, use 'spawn' instead of 'fork' for multiprocessing to avoid crashes
11
+ # with libraries that aren't fork-safe (tokenizers, PyTorch, etc.)
12
+ import multiprocessing
13
+ import sys
14
+
15
+ if sys.platform == "darwin":
16
+ try:
17
+ multiprocessing.set_start_method("spawn", force=True)
18
+ except RuntimeError:
19
+ pass # Already set
20
+
21
+ # Setup logging before any other imports
22
+ # ruff: noqa: E402 (imports after environment setup is intentional)
23
+ from media_engine.utils.logging import setup_logging
24
+
25
+ setup_logging()
26
+
27
+ # Create the FastAPI application
28
+ from media_engine.app import create_app
29
+
30
+ app = create_app()
31
+
32
+ # Re-export batch state for backward compatibility with tests
33
+ # These were previously defined directly in main.py
34
+ # ruff: noqa: F401 (re-exports are intentional)
35
+ from media_engine.batch import state as _batch_state
36
+ from media_engine.batch.models import JOB_TTL_SECONDS # noqa: F401
37
+ from media_engine.batch.queue import ( # noqa: F401
38
+ cleanup_expired_batch_jobs as _cleanup_expired_batch_jobs,
39
+ )
40
+ from media_engine.batch.state import ( # noqa: F401
41
+ batch_jobs,
42
+ batch_jobs_lock,
43
+ batch_queue,
44
+ batch_queue_lock,
45
+ )
46
+
47
+ batch_running = _batch_state._batch_state["running"]
48
+
49
+
50
+ # Make batch_running assignable at module level for tests
51
+ # ruff: noqa: N807 (module-level __getattr__ and __setattr__ are valid Python)
52
+ def __getattr__(name: str): # noqa: N807
53
+ if name == "batch_running":
54
+ return _batch_state._batch_state["running"]
55
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
56
+
57
+
58
+ def __setattr__(name: str, value): # noqa: N807
59
+ if name == "batch_running":
60
+ _batch_state._batch_state["running"] = value
61
+ else:
62
+ globals()[name] = value
63
+
64
+
65
+ if __name__ == "__main__":
66
+ import uvicorn
67
+
68
+ uvicorn.run(app, host="0.0.0.0", port=8000)
media_engine/py.typed ADDED
File without changes
@@ -0,0 +1,15 @@
1
+ """API routers for Media Engine."""
2
+
3
+ from media_engine.routers.batch import router as batch_router
4
+ from media_engine.routers.health import router as health_router
5
+ from media_engine.routers.models import router as models_router
6
+ from media_engine.routers.settings import router as settings_router
7
+ from media_engine.routers.utils import router as utils_router
8
+
9
+ __all__ = [
10
+ "batch_router",
11
+ "health_router",
12
+ "models_router",
13
+ "settings_router",
14
+ "utils_router",
15
+ ]
@@ -0,0 +1,78 @@
1
+ """Batch processing endpoints."""
2
+
3
+ import asyncio
4
+ import logging
5
+ import threading
6
+ import uuid
7
+ from pathlib import Path
8
+
9
+ from fastapi import APIRouter, HTTPException
10
+
11
+ from media_engine.batch.models import BatchJobStatus, BatchRequest
12
+ from media_engine.batch.processor import run_batch_job
13
+ from media_engine.batch.queue import (
14
+ create_batch_sync,
15
+ delete_batch_sync,
16
+ get_batch_sync,
17
+ )
18
+
19
+ router = APIRouter(tags=["batch"])
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ @router.post("/batch")
24
+ async def create_batch(request: BatchRequest) -> dict[str, str]:
25
+ """Create a new batch extraction job (memory-efficient extractor-first processing).
26
+
27
+ Only one batch runs at a time. If a batch is already running, new batches
28
+ are queued and will start automatically when the current batch finishes.
29
+ """
30
+ # Validate all files exist
31
+ for file_path in request.files:
32
+ if not Path(file_path).exists():
33
+ raise HTTPException(status_code=404, detail=f"File not found: {file_path}")
34
+
35
+ batch_id = str(uuid.uuid4())[:8]
36
+
37
+ # Run lock operations in thread pool to avoid blocking event loop
38
+ should_start, _, _ = await asyncio.to_thread(create_batch_sync, batch_id, request)
39
+
40
+ # Start immediately if no batch running
41
+ if should_start:
42
+ thread = threading.Thread(target=run_batch_job, args=(batch_id, request))
43
+ thread.start()
44
+
45
+ return {"batch_id": batch_id}
46
+
47
+
48
+ @router.get("/batch/{batch_id}")
49
+ async def get_batch(batch_id: str, status_only: bool = False) -> BatchJobStatus:
50
+ """Get batch job status and results.
51
+
52
+ Args:
53
+ batch_id: The batch ID to look up
54
+ status_only: If True, return only status/progress without large result data.
55
+ Use this for polling progress to avoid transferring large embeddings/transcripts.
56
+ """
57
+ # Run lock acquisition in thread pool to avoid blocking event loop
58
+ result = await asyncio.to_thread(get_batch_sync, batch_id, status_only)
59
+ if result is None:
60
+ raise HTTPException(status_code=404, detail=f"Batch not found: {batch_id}")
61
+ return result
62
+
63
+
64
+ @router.delete("/batch/{batch_id}")
65
+ async def delete_batch(batch_id: str) -> dict[str, str]:
66
+ """Delete a batch job and free its memory.
67
+
68
+ Jobs can be deleted at any time. If the batch is queued, it will be
69
+ removed from the queue. If running, deletion will not stop processing
70
+ - it will just remove the status tracking.
71
+ """
72
+ # Run lock acquisition in thread pool to avoid blocking event loop
73
+ found, _ = await asyncio.to_thread(delete_batch_sync, batch_id)
74
+ if not found:
75
+ raise HTTPException(status_code=404, detail=f"Batch not found: {batch_id}")
76
+
77
+ logger.info(f"Deleted batch job {batch_id}")
78
+ return {"status": "deleted", "batch_id": batch_id}
@@ -0,0 +1,93 @@
1
+ """Health and monitoring endpoints."""
2
+
3
+ import logging
4
+ import os
5
+ import subprocess
6
+ from typing import Any
7
+
8
+ from fastapi import APIRouter, HTTPException
9
+
10
+ from media_engine import __version__
11
+ from media_engine.config import get_settings, get_vram_summary
12
+ from media_engine.schemas import HealthResponse
13
+
14
+ router = APIRouter(tags=["health"])
15
+ logger = logging.getLogger(__name__)
16
+
17
+ LOG_FILE = "/tmp/media_engine.log"
18
+
19
+
20
+ @router.get("/health", response_model=HealthResponse)
21
+ async def health():
22
+ """Health check endpoint."""
23
+ settings = get_settings()
24
+ return HealthResponse(
25
+ status="ok",
26
+ version=__version__,
27
+ api_version=settings.api_version,
28
+ )
29
+
30
+
31
+ @router.get("/logs")
32
+ async def get_logs(
33
+ lines: int = 100,
34
+ level: str | None = None,
35
+ ) -> dict[str, Any]:
36
+ """Get recent log entries for debugging.
37
+
38
+ Args:
39
+ lines: Number of lines to return (default 100, max 1000)
40
+ level: Filter by log level (DEBUG, INFO, WARNING, ERROR)
41
+
42
+ Returns:
43
+ Dict with log lines and metadata
44
+ """
45
+ lines = min(lines, 1000) # Cap at 1000 lines
46
+
47
+ if not os.path.exists(LOG_FILE):
48
+ return {"lines": [], "total": 0, "returned": 0, "file": LOG_FILE}
49
+
50
+ try:
51
+ # Use tail to efficiently read last N lines without loading entire file
52
+ # Read more lines if filtering by level (we'll filter down after)
53
+ read_lines = lines * 10 if level else lines
54
+
55
+ result = subprocess.run(
56
+ ["tail", "-n", str(read_lines), LOG_FILE],
57
+ capture_output=True,
58
+ text=True,
59
+ timeout=5,
60
+ )
61
+
62
+ if result.returncode != 0:
63
+ raise HTTPException(status_code=500, detail=f"tail failed: {result.stderr}")
64
+
65
+ all_lines = result.stdout.splitlines()
66
+
67
+ # Filter by level if specified
68
+ if level:
69
+ level_upper = level.upper()
70
+ all_lines = [line for line in all_lines if f" {level_upper} " in line]
71
+ # Take only requested number after filtering
72
+ all_lines = all_lines[-lines:]
73
+
74
+ return {
75
+ "lines": all_lines,
76
+ "total": len(all_lines), # Note: this is approximate when using tail
77
+ "returned": len(all_lines),
78
+ "file": LOG_FILE,
79
+ }
80
+ except subprocess.TimeoutExpired:
81
+ raise HTTPException(status_code=500, detail="Timeout reading logs")
82
+ except Exception as e:
83
+ raise HTTPException(status_code=500, detail=f"Failed to read logs: {e}")
84
+
85
+
86
+ @router.get("/hardware")
87
+ async def hardware():
88
+ """Get hardware capabilities and auto-selected models.
89
+
90
+ Returns information about available GPU/VRAM and which models
91
+ will be used with the current "auto" settings.
92
+ """
93
+ return get_vram_summary()