PyPI - media-engine - Versions diffs - 0.1.0__py3-none-any.whl - Mend

media-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

cli/clip.py +79 -0
cli/faces.py +91 -0
cli/metadata.py +68 -0
cli/motion.py +77 -0
cli/objects.py +94 -0
cli/ocr.py +93 -0
cli/scenes.py +57 -0
cli/telemetry.py +65 -0
cli/transcript.py +76 -0
media_engine/__init__.py +7 -0
media_engine/_version.py +34 -0
media_engine/app.py +80 -0
media_engine/batch/__init__.py +56 -0
media_engine/batch/models.py +99 -0
media_engine/batch/processor.py +1131 -0
media_engine/batch/queue.py +232 -0
media_engine/batch/state.py +30 -0
media_engine/batch/timing.py +321 -0
media_engine/cli.py +17 -0
media_engine/config.py +674 -0
media_engine/extractors/__init__.py +75 -0
media_engine/extractors/clip.py +401 -0
media_engine/extractors/faces.py +459 -0
media_engine/extractors/frame_buffer.py +351 -0
media_engine/extractors/frames.py +402 -0
media_engine/extractors/metadata/__init__.py +127 -0
media_engine/extractors/metadata/apple.py +169 -0
media_engine/extractors/metadata/arri.py +118 -0
media_engine/extractors/metadata/avchd.py +208 -0
media_engine/extractors/metadata/avchd_gps.py +270 -0
media_engine/extractors/metadata/base.py +688 -0
media_engine/extractors/metadata/blackmagic.py +139 -0
media_engine/extractors/metadata/camera_360.py +276 -0
media_engine/extractors/metadata/canon.py +290 -0
media_engine/extractors/metadata/dji.py +371 -0
media_engine/extractors/metadata/dv.py +121 -0
media_engine/extractors/metadata/ffmpeg.py +76 -0
media_engine/extractors/metadata/generic.py +119 -0
media_engine/extractors/metadata/gopro.py +256 -0
media_engine/extractors/metadata/red.py +305 -0
media_engine/extractors/metadata/registry.py +114 -0
media_engine/extractors/metadata/sony.py +442 -0
media_engine/extractors/metadata/tesla.py +157 -0
media_engine/extractors/motion.py +765 -0
media_engine/extractors/objects.py +245 -0
media_engine/extractors/objects_qwen.py +754 -0
media_engine/extractors/ocr.py +268 -0
media_engine/extractors/scenes.py +82 -0
media_engine/extractors/shot_type.py +217 -0
media_engine/extractors/telemetry.py +262 -0
media_engine/extractors/transcribe.py +579 -0
media_engine/extractors/translate.py +121 -0
media_engine/extractors/vad.py +263 -0
media_engine/main.py +68 -0
media_engine/py.typed +0 -0
media_engine/routers/__init__.py +15 -0
media_engine/routers/batch.py +78 -0
media_engine/routers/health.py +93 -0
media_engine/routers/models.py +211 -0
media_engine/routers/settings.py +87 -0
media_engine/routers/utils.py +135 -0
media_engine/schemas.py +581 -0
media_engine/utils/__init__.py +5 -0
media_engine/utils/logging.py +54 -0
media_engine/utils/memory.py +49 -0
media_engine-0.1.0.dist-info/METADATA +276 -0
media_engine-0.1.0.dist-info/RECORD +70 -0
media_engine-0.1.0.dist-info/WHEEL +4 -0
media_engine-0.1.0.dist-info/entry_points.txt +11 -0
media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0

media_engine/extractors/vad.py ADDED Viewed

@@ -0,0 +1,263 @@
+"""Voice Activity Detection using WebRTC VAD.
+Fast detection of speech presence in audio files.
+Used to skip Whisper transcription for silent/ambient clips.
+"""
+import logging
+import subprocess
+import tempfile
+import wave
+from enum import StrEnum
+from pathlib import Path
+import webrtcvad  # type: ignore[import-not-found]
+logger = logging.getLogger(__name__)
+class AudioContent(StrEnum):
+    """Classification of audio content.
+    Simplified categories for UI display:
+    - NO_AUDIO: File has no audio track (images, some video files)
+    - SPEECH: Audio with speech detected (should run Whisper)
+    - AUDIO: Audio present but no speech (ambient/music/silent - skip Whisper)
+    - UNKNOWN: Could not determine (extraction failed)
+    """
+    NO_AUDIO = "no_audio"
+    SPEECH = "speech"
+    AUDIO = "audio"  # Has audio but no speech (ambient, music, or silent)
+    UNKNOWN = "unknown"
+def _extract_audio(video_path: str, output_path: str, sample_rate: int = 16000) -> bool:
+    """Extract audio from video file using ffmpeg.
+    Args:
+        video_path: Path to video file
+        output_path: Path for extracted audio (WAV)
+        sample_rate: Target sample rate (16000 for VAD)
+    Returns:
+        True if extraction succeeded
+    """
+    cmd = [
+        "ffmpeg",
+        "-i",
+        video_path,
+        "-vn",  # No video
+        "-acodec",
+        "pcm_s16le",  # 16-bit PCM
+        "-ar",
+        str(sample_rate),  # Sample rate
+        "-ac",
+        "1",  # Mono
+        "-y",  # Overwrite
+        output_path,
+    ]
+    try:
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            timeout=60,
+        )
+        return result.returncode == 0
+    except subprocess.TimeoutExpired:
+        logger.warning(f"Audio extraction timed out for {video_path}")
+        return False
+    except Exception as e:
+        logger.warning(f"Audio extraction failed for {video_path}: {e}")
+        return False
+def _read_wav_frames(
+    wav_path: str,
+    frame_duration_ms: int = 30,
+    max_duration_seconds: float = 120.0,
+) -> tuple[list[bytes], int, float]:
+    """Read WAV file and split into frames for VAD.
+    Args:
+        wav_path: Path to WAV file
+        frame_duration_ms: Frame duration in milliseconds (10, 20, or 30)
+        max_duration_seconds: Maximum audio duration to analyze
+    Returns:
+        Tuple of (frames, sample_rate, total_duration)
+    """
+    with wave.open(wav_path, "rb") as wf:
+        sample_rate = wf.getframerate()
+        n_channels = wf.getnchannels()
+        sample_width = wf.getsampwidth()
+        if sample_rate not in (8000, 16000, 32000, 48000):
+            raise ValueError(f"Unsupported sample rate: {sample_rate}")
+        if n_channels != 1:
+            raise ValueError(f"Expected mono audio, got {n_channels} channels")
+        if sample_width != 2:
+            raise ValueError(f"Expected 16-bit audio, got {sample_width * 8}-bit")
+        # Calculate frame size
+        frame_size = int(sample_rate * frame_duration_ms / 1000) * sample_width
+        max_frames = int(max_duration_seconds * 1000 / frame_duration_ms)
+        frames = []
+        total_samples = 0
+        while len(frames) < max_frames:
+            frame = wf.readframes(int(sample_rate * frame_duration_ms / 1000))
+            if len(frame) < frame_size:
+                break
+            frames.append(frame)
+            total_samples += int(sample_rate * frame_duration_ms / 1000)
+        total_duration = total_samples / sample_rate
+        return frames, sample_rate, total_duration
+def detect_voice_activity(
+    file_path: str,
+    aggressiveness: int = 2,
+    min_speech_duration: float = 0.5,
+    sample_limit_seconds: float = 120.0,
+) -> dict:
+    """Detect voice activity in a video/audio file using WebRTC VAD.
+    Args:
+        file_path: Path to video or audio file
+        aggressiveness: VAD aggressiveness (0-3, higher = less sensitive to speech)
+        min_speech_duration: Minimum seconds of speech to classify as "speech"
+        sample_limit_seconds: Maximum seconds to analyze (for long files)
+    Returns:
+        Dict with:
+        - audio_content: AudioContent classification
+        - speech_ratio: Percentage of audio that is speech (0.0-1.0)
+        - speech_segments: List of (start, end) tuples for speech
+        - total_duration: Total audio duration analyzed
+    """
+    path = Path(file_path)
+    if not path.exists():
+        logger.error(f"File not found: {file_path}")
+        return {
+            "audio_content": str(AudioContent.UNKNOWN),
+            "speech_ratio": 0.0,
+            "speech_segments": [],
+            "total_duration": 0.0,
+        }
+    # Create VAD instance
+    vad = webrtcvad.Vad(aggressiveness)
+    frame_duration_ms = 30  # 30ms frames
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # Check if it's a video file that needs audio extraction
+        video_extensions = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".m4v", ".mxf"}
+        audio_extensions = {".wav", ".mp3", ".aac", ".m4a", ".flac", ".ogg"}
+        if path.suffix.lower() in video_extensions:
+            audio_path = Path(tmpdir) / "audio.wav"
+            if not _extract_audio(file_path, str(audio_path)):
+                logger.warning(f"Could not extract audio from {file_path}")
+                return {
+                    "audio_content": str(AudioContent.UNKNOWN),
+                    "speech_ratio": 0.0,
+                    "speech_segments": [],
+                    "total_duration": 0.0,
+                }
+            wav_path = str(audio_path)
+        elif path.suffix.lower() in audio_extensions:
+            # Convert to WAV format for webrtcvad
+            audio_path = Path(tmpdir) / "audio.wav"
+            if not _extract_audio(file_path, str(audio_path)):
+                logger.warning(f"Could not convert audio from {file_path}")
+                return {
+                    "audio_content": str(AudioContent.UNKNOWN),
+                    "speech_ratio": 0.0,
+                    "speech_segments": [],
+                    "total_duration": 0.0,
+                }
+            wav_path = str(audio_path)
+        else:
+            # Assume it's already a WAV file
+            wav_path = file_path
+        # Read audio frames
+        try:
+            frames, sample_rate, total_duration = _read_wav_frames(
+                wav_path,
+                frame_duration_ms=frame_duration_ms,
+                max_duration_seconds=sample_limit_seconds,
+            )
+        except Exception as e:
+            logger.warning(f"Could not read audio from {wav_path}: {e}")
+            return {
+                "audio_content": str(AudioContent.UNKNOWN),
+                "speech_ratio": 0.0,
+                "speech_segments": [],
+                "total_duration": 0.0,
+            }
+        if not frames:
+            logger.warning(f"No audio frames extracted from {file_path}")
+            return {
+                "audio_content": str(AudioContent.AUDIO),  # Has audio track but empty/silent
+                "speech_ratio": 0.0,
+                "speech_segments": [],
+                "total_duration": 0.0,
+            }
+        # Analyze each frame
+        speech_frames = []
+        for frame in frames:
+            try:
+                is_speech = vad.is_speech(frame, sample_rate)
+                speech_frames.append(is_speech)
+            except Exception:
+                speech_frames.append(False)
+    # Calculate speech statistics
+    speech_count = sum(speech_frames)
+    total_frames = len(speech_frames)
+    speech_ratio = speech_count / total_frames if total_frames > 0 else 0.0
+    total_speech_duration = speech_count * frame_duration_ms / 1000
+    # Build speech segments (consecutive speech frames)
+    speech_segments = []
+    segment_start = None
+    for i, is_speech in enumerate(speech_frames):
+        time_sec = i * frame_duration_ms / 1000
+        if is_speech and segment_start is None:
+            segment_start = time_sec
+        elif not is_speech and segment_start is not None:
+            speech_segments.append((segment_start, time_sec))
+            segment_start = None
+    # Close final segment if needed
+    if segment_start is not None:
+        speech_segments.append((segment_start, total_duration))
+    # Classify audio content
+    if total_speech_duration >= min_speech_duration and speech_ratio > 0.1:
+        audio_content = AudioContent.SPEECH
+    else:
+        # Audio present but no speech detected (silent, ambient, or music)
+        audio_content = AudioContent.AUDIO
+    logger.info(f"VAD result for {path.name}: {audio_content} " f"(speech_ratio={speech_ratio:.2%}, duration={total_speech_duration:.1f}s)")
+    return {
+        "audio_content": str(audio_content),
+        "speech_ratio": round(speech_ratio, 3),
+        "speech_segments": speech_segments,
+        "total_duration": round(total_duration, 2),
+    }
+def unload_vad_model():
+    """No-op for WebRTC VAD (no model to unload)."""
+    pass

media_engine/main.py ADDED Viewed

@@ -0,0 +1,68 @@
+"""FastAPI application for Media Engine."""
+# Prevent fork crashes on macOS with Hugging Face tokenizers library.
+# The tokenizers library registers atfork handlers that panic when the process forks
+# (e.g., to run ffmpeg via subprocess). This must be set BEFORE any imports.
+import os
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+# On macOS, use 'spawn' instead of 'fork' for multiprocessing to avoid crashes
+# with libraries that aren't fork-safe (tokenizers, PyTorch, etc.)
+import multiprocessing
+import sys
+if sys.platform == "darwin":
+    try:
+        multiprocessing.set_start_method("spawn", force=True)
+    except RuntimeError:
+        pass  # Already set
+# Setup logging before any other imports
+# ruff: noqa: E402 (imports after environment setup is intentional)
+from media_engine.utils.logging import setup_logging
+setup_logging()
+# Create the FastAPI application
+from media_engine.app import create_app
+app = create_app()
+# Re-export batch state for backward compatibility with tests
+# These were previously defined directly in main.py
+# ruff: noqa: F401 (re-exports are intentional)
+from media_engine.batch import state as _batch_state
+from media_engine.batch.models import JOB_TTL_SECONDS  # noqa: F401
+from media_engine.batch.queue import (  # noqa: F401
+    cleanup_expired_batch_jobs as _cleanup_expired_batch_jobs,
+)
+from media_engine.batch.state import (  # noqa: F401
+    batch_jobs,
+    batch_jobs_lock,
+    batch_queue,
+    batch_queue_lock,
+)
+batch_running = _batch_state._batch_state["running"]
+# Make batch_running assignable at module level for tests
+# ruff: noqa: N807 (module-level __getattr__ and __setattr__ are valid Python)
+def __getattr__(name: str):  # noqa: N807
+    if name == "batch_running":
+        return _batch_state._batch_state["running"]
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+def __setattr__(name: str, value):  # noqa: N807
+    if name == "batch_running":
+        _batch_state._batch_state["running"] = value
+    else:
+        globals()[name] = value
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

media_engine/py.typed ADDED Viewed

File without changes

media_engine/routers/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""API routers for Media Engine."""
+from media_engine.routers.batch import router as batch_router
+from media_engine.routers.health import router as health_router
+from media_engine.routers.models import router as models_router
+from media_engine.routers.settings import router as settings_router
+from media_engine.routers.utils import router as utils_router
+__all__ = [
+    "batch_router",
+    "health_router",
+    "models_router",
+    "settings_router",
+    "utils_router",
+]

media_engine/routers/batch.py ADDED Viewed

@@ -0,0 +1,78 @@
+"""Batch processing endpoints."""
+import asyncio
+import logging
+import threading
+import uuid
+from pathlib import Path
+from fastapi import APIRouter, HTTPException
+from media_engine.batch.models import BatchJobStatus, BatchRequest
+from media_engine.batch.processor import run_batch_job
+from media_engine.batch.queue import (
+    create_batch_sync,
+    delete_batch_sync,
+    get_batch_sync,
+)
+router = APIRouter(tags=["batch"])
+logger = logging.getLogger(__name__)
+@router.post("/batch")
+async def create_batch(request: BatchRequest) -> dict[str, str]:
+    """Create a new batch extraction job (memory-efficient extractor-first processing).
+    Only one batch runs at a time. If a batch is already running, new batches
+    are queued and will start automatically when the current batch finishes.
+    """
+    # Validate all files exist
+    for file_path in request.files:
+        if not Path(file_path).exists():
+            raise HTTPException(status_code=404, detail=f"File not found: {file_path}")
+    batch_id = str(uuid.uuid4())[:8]
+    # Run lock operations in thread pool to avoid blocking event loop
+    should_start, _, _ = await asyncio.to_thread(create_batch_sync, batch_id, request)
+    # Start immediately if no batch running
+    if should_start:
+        thread = threading.Thread(target=run_batch_job, args=(batch_id, request))
+        thread.start()
+    return {"batch_id": batch_id}
+@router.get("/batch/{batch_id}")
+async def get_batch(batch_id: str, status_only: bool = False) -> BatchJobStatus:
+    """Get batch job status and results.
+    Args:
+        batch_id: The batch ID to look up
+        status_only: If True, return only status/progress without large result data.
+            Use this for polling progress to avoid transferring large embeddings/transcripts.
+    """
+    # Run lock acquisition in thread pool to avoid blocking event loop
+    result = await asyncio.to_thread(get_batch_sync, batch_id, status_only)
+    if result is None:
+        raise HTTPException(status_code=404, detail=f"Batch not found: {batch_id}")
+    return result
+@router.delete("/batch/{batch_id}")
+async def delete_batch(batch_id: str) -> dict[str, str]:
+    """Delete a batch job and free its memory.
+    Jobs can be deleted at any time. If the batch is queued, it will be
+    removed from the queue. If running, deletion will not stop processing
+    - it will just remove the status tracking.
+    """
+    # Run lock acquisition in thread pool to avoid blocking event loop
+    found, _ = await asyncio.to_thread(delete_batch_sync, batch_id)
+    if not found:
+        raise HTTPException(status_code=404, detail=f"Batch not found: {batch_id}")
+    logger.info(f"Deleted batch job {batch_id}")
+    return {"status": "deleted", "batch_id": batch_id}

media_engine/routers/health.py ADDED Viewed

@@ -0,0 +1,93 @@
+"""Health and monitoring endpoints."""
+import logging
+import os
+import subprocess
+from typing import Any
+from fastapi import APIRouter, HTTPException
+from media_engine import __version__
+from media_engine.config import get_settings, get_vram_summary
+from media_engine.schemas import HealthResponse
+router = APIRouter(tags=["health"])
+logger = logging.getLogger(__name__)
+LOG_FILE = "/tmp/media_engine.log"
+@router.get("/health", response_model=HealthResponse)
+async def health():
+    """Health check endpoint."""
+    settings = get_settings()
+    return HealthResponse(
+        status="ok",
+        version=__version__,
+        api_version=settings.api_version,
+    )
+@router.get("/logs")
+async def get_logs(
+    lines: int = 100,
+    level: str | None = None,
+) -> dict[str, Any]:
+    """Get recent log entries for debugging.
+    Args:
+        lines: Number of lines to return (default 100, max 1000)
+        level: Filter by log level (DEBUG, INFO, WARNING, ERROR)
+    Returns:
+        Dict with log lines and metadata
+    """
+    lines = min(lines, 1000)  # Cap at 1000 lines
+    if not os.path.exists(LOG_FILE):
+        return {"lines": [], "total": 0, "returned": 0, "file": LOG_FILE}
+    try:
+        # Use tail to efficiently read last N lines without loading entire file
+        # Read more lines if filtering by level (we'll filter down after)
+        read_lines = lines * 10 if level else lines
+        result = subprocess.run(
+            ["tail", "-n", str(read_lines), LOG_FILE],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+        if result.returncode != 0:
+            raise HTTPException(status_code=500, detail=f"tail failed: {result.stderr}")
+        all_lines = result.stdout.splitlines()
+        # Filter by level if specified
+        if level:
+            level_upper = level.upper()
+            all_lines = [line for line in all_lines if f" {level_upper} " in line]
+            # Take only requested number after filtering
+            all_lines = all_lines[-lines:]
+        return {
+            "lines": all_lines,
+            "total": len(all_lines),  # Note: this is approximate when using tail
+            "returned": len(all_lines),
+            "file": LOG_FILE,
+        }
+    except subprocess.TimeoutExpired:
+        raise HTTPException(status_code=500, detail="Timeout reading logs")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to read logs: {e}")
+@router.get("/hardware")
+async def hardware():
+    """Get hardware capabilities and auto-selected models.
+    Returns information about available GPU/VRAM and which models
+    will be used with the current "auto" settings.
+    """
+    return get_vram_summary()