media-engine 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/clip.py +79 -0
- cli/faces.py +91 -0
- cli/metadata.py +68 -0
- cli/motion.py +77 -0
- cli/objects.py +94 -0
- cli/ocr.py +93 -0
- cli/scenes.py +57 -0
- cli/telemetry.py +65 -0
- cli/transcript.py +76 -0
- media_engine/__init__.py +7 -0
- media_engine/_version.py +34 -0
- media_engine/app.py +80 -0
- media_engine/batch/__init__.py +56 -0
- media_engine/batch/models.py +99 -0
- media_engine/batch/processor.py +1131 -0
- media_engine/batch/queue.py +232 -0
- media_engine/batch/state.py +30 -0
- media_engine/batch/timing.py +321 -0
- media_engine/cli.py +17 -0
- media_engine/config.py +674 -0
- media_engine/extractors/__init__.py +75 -0
- media_engine/extractors/clip.py +401 -0
- media_engine/extractors/faces.py +459 -0
- media_engine/extractors/frame_buffer.py +351 -0
- media_engine/extractors/frames.py +402 -0
- media_engine/extractors/metadata/__init__.py +127 -0
- media_engine/extractors/metadata/apple.py +169 -0
- media_engine/extractors/metadata/arri.py +118 -0
- media_engine/extractors/metadata/avchd.py +208 -0
- media_engine/extractors/metadata/avchd_gps.py +270 -0
- media_engine/extractors/metadata/base.py +688 -0
- media_engine/extractors/metadata/blackmagic.py +139 -0
- media_engine/extractors/metadata/camera_360.py +276 -0
- media_engine/extractors/metadata/canon.py +290 -0
- media_engine/extractors/metadata/dji.py +371 -0
- media_engine/extractors/metadata/dv.py +121 -0
- media_engine/extractors/metadata/ffmpeg.py +76 -0
- media_engine/extractors/metadata/generic.py +119 -0
- media_engine/extractors/metadata/gopro.py +256 -0
- media_engine/extractors/metadata/red.py +305 -0
- media_engine/extractors/metadata/registry.py +114 -0
- media_engine/extractors/metadata/sony.py +442 -0
- media_engine/extractors/metadata/tesla.py +157 -0
- media_engine/extractors/motion.py +765 -0
- media_engine/extractors/objects.py +245 -0
- media_engine/extractors/objects_qwen.py +754 -0
- media_engine/extractors/ocr.py +268 -0
- media_engine/extractors/scenes.py +82 -0
- media_engine/extractors/shot_type.py +217 -0
- media_engine/extractors/telemetry.py +262 -0
- media_engine/extractors/transcribe.py +579 -0
- media_engine/extractors/translate.py +121 -0
- media_engine/extractors/vad.py +263 -0
- media_engine/main.py +68 -0
- media_engine/py.typed +0 -0
- media_engine/routers/__init__.py +15 -0
- media_engine/routers/batch.py +78 -0
- media_engine/routers/health.py +93 -0
- media_engine/routers/models.py +211 -0
- media_engine/routers/settings.py +87 -0
- media_engine/routers/utils.py +135 -0
- media_engine/schemas.py +581 -0
- media_engine/utils/__init__.py +5 -0
- media_engine/utils/logging.py +54 -0
- media_engine/utils/memory.py +49 -0
- media_engine-0.1.0.dist-info/METADATA +276 -0
- media_engine-0.1.0.dist-info/RECORD +70 -0
- media_engine-0.1.0.dist-info/WHEEL +4 -0
- media_engine-0.1.0.dist-info/entry_points.txt +11 -0
- media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
cli/transcript.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Transcribe audio from video file."""
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import sys
|
|
8
|
+
import time
|
|
9
|
+
|
|
10
|
+
from media_engine.extractors import extract_transcript
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def main():
|
|
14
|
+
parser = argparse.ArgumentParser(description="Transcribe audio from video file")
|
|
15
|
+
parser.add_argument("file", help="Path to video file")
|
|
16
|
+
parser.add_argument(
|
|
17
|
+
"--model",
|
|
18
|
+
type=str,
|
|
19
|
+
default="auto",
|
|
20
|
+
choices=["auto", "tiny", "small", "medium", "large-v3"],
|
|
21
|
+
help="Whisper model size (default: auto)",
|
|
22
|
+
)
|
|
23
|
+
parser.add_argument(
|
|
24
|
+
"--language",
|
|
25
|
+
type=str,
|
|
26
|
+
default=None,
|
|
27
|
+
help="Force language code (e.g., 'en', 'no')",
|
|
28
|
+
)
|
|
29
|
+
parser.add_argument(
|
|
30
|
+
"--fallback-language",
|
|
31
|
+
type=str,
|
|
32
|
+
default="en",
|
|
33
|
+
help="Fallback language for short clips (default: en)",
|
|
34
|
+
)
|
|
35
|
+
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
|
|
36
|
+
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
|
37
|
+
|
|
38
|
+
args = parser.parse_args()
|
|
39
|
+
|
|
40
|
+
if args.verbose:
|
|
41
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
42
|
+
else:
|
|
43
|
+
logging.basicConfig(level=logging.WARNING)
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
start_time = time.perf_counter()
|
|
47
|
+
result = extract_transcript(
|
|
48
|
+
args.file,
|
|
49
|
+
model=args.model,
|
|
50
|
+
language=args.language,
|
|
51
|
+
fallback_language=args.fallback_language,
|
|
52
|
+
)
|
|
53
|
+
elapsed = time.perf_counter() - start_time
|
|
54
|
+
|
|
55
|
+
if args.json:
|
|
56
|
+
output = result.model_dump()
|
|
57
|
+
output["elapsed_seconds"] = round(elapsed, 2)
|
|
58
|
+
print(json.dumps(output, indent=2, default=str))
|
|
59
|
+
else:
|
|
60
|
+
print(f"File: {args.file}")
|
|
61
|
+
print(f"Language: {result.language} (confidence: {result.confidence:.2f})")
|
|
62
|
+
print(f"Segments: {len(result.segments)}")
|
|
63
|
+
print()
|
|
64
|
+
for seg in result.segments:
|
|
65
|
+
speaker = f"[{seg.speaker}] " if seg.speaker else ""
|
|
66
|
+
print(f" [{seg.start:.2f}s - {seg.end:.2f}s] {speaker}{seg.text}")
|
|
67
|
+
print()
|
|
68
|
+
print(f"Elapsed: {elapsed:.2f}s")
|
|
69
|
+
|
|
70
|
+
except Exception as e:
|
|
71
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
72
|
+
sys.exit(1)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
if __name__ == "__main__":
|
|
76
|
+
main()
|
media_engine/__init__.py
ADDED
media_engine/_version.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
TYPE_CHECKING = False
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
20
|
+
else:
|
|
21
|
+
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
23
|
+
|
|
24
|
+
version: str
|
|
25
|
+
__version__: str
|
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
|
27
|
+
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
30
|
+
|
|
31
|
+
__version__ = version = '0.1.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 0)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
media_engine/app.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""FastAPI app factory for Media Engine."""
|
|
2
|
+
|
|
3
|
+
import atexit
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
from fastapi import FastAPI
|
|
7
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
8
|
+
|
|
9
|
+
from media_engine import __version__
|
|
10
|
+
from media_engine.batch.timing import save_timing_history
|
|
11
|
+
from media_engine.extractors import (
|
|
12
|
+
shutdown_ffprobe_pool,
|
|
13
|
+
unload_clip_model,
|
|
14
|
+
unload_face_model,
|
|
15
|
+
unload_ocr_model,
|
|
16
|
+
unload_qwen_model,
|
|
17
|
+
unload_vad_model,
|
|
18
|
+
unload_whisper_model,
|
|
19
|
+
unload_yolo_model,
|
|
20
|
+
)
|
|
21
|
+
from media_engine.routers import (
|
|
22
|
+
batch_router,
|
|
23
|
+
health_router,
|
|
24
|
+
models_router,
|
|
25
|
+
settings_router,
|
|
26
|
+
utils_router,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def cleanup_resources() -> None:
|
|
33
|
+
"""Clean up all resources.
|
|
34
|
+
|
|
35
|
+
Note: This runs during Python shutdown via atexit, so we must be careful
|
|
36
|
+
not to import new modules or use logging (file handlers may be closed).
|
|
37
|
+
"""
|
|
38
|
+
try:
|
|
39
|
+
# Save timing history before shutdown
|
|
40
|
+
save_timing_history()
|
|
41
|
+
shutdown_ffprobe_pool()
|
|
42
|
+
unload_whisper_model()
|
|
43
|
+
unload_qwen_model()
|
|
44
|
+
unload_yolo_model()
|
|
45
|
+
unload_clip_model()
|
|
46
|
+
unload_ocr_model()
|
|
47
|
+
unload_face_model()
|
|
48
|
+
unload_vad_model()
|
|
49
|
+
except Exception:
|
|
50
|
+
pass # Suppress errors during shutdown
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def create_app() -> FastAPI:
|
|
54
|
+
"""Create and configure the FastAPI application."""
|
|
55
|
+
app = FastAPI(
|
|
56
|
+
title="Media Engine",
|
|
57
|
+
description="AI-powered video extraction API",
|
|
58
|
+
version=__version__,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Add CORS middleware
|
|
62
|
+
app.add_middleware(
|
|
63
|
+
CORSMiddleware,
|
|
64
|
+
allow_origins=["*"],
|
|
65
|
+
allow_credentials=True,
|
|
66
|
+
allow_methods=["*"],
|
|
67
|
+
allow_headers=["*"],
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# Include routers
|
|
71
|
+
app.include_router(batch_router)
|
|
72
|
+
app.include_router(health_router)
|
|
73
|
+
app.include_router(settings_router)
|
|
74
|
+
app.include_router(models_router)
|
|
75
|
+
app.include_router(utils_router)
|
|
76
|
+
|
|
77
|
+
# Register cleanup on exit
|
|
78
|
+
atexit.register(cleanup_resources)
|
|
79
|
+
|
|
80
|
+
return app
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Batch processing system for Polybos Media Engine."""
|
|
2
|
+
|
|
3
|
+
from media_engine.batch.models import (
|
|
4
|
+
BatchFileStatus,
|
|
5
|
+
BatchJobStatus,
|
|
6
|
+
BatchRequest,
|
|
7
|
+
ExtractorTiming,
|
|
8
|
+
JobProgress,
|
|
9
|
+
)
|
|
10
|
+
from media_engine.batch.processor import run_batch_job
|
|
11
|
+
from media_engine.batch.queue import (
|
|
12
|
+
cleanup_expired_batch_jobs,
|
|
13
|
+
start_next_batch,
|
|
14
|
+
update_queue_positions,
|
|
15
|
+
)
|
|
16
|
+
from media_engine.batch.state import (
|
|
17
|
+
batch_jobs,
|
|
18
|
+
batch_jobs_lock,
|
|
19
|
+
batch_queue,
|
|
20
|
+
batch_queue_lock,
|
|
21
|
+
is_batch_running,
|
|
22
|
+
set_batch_running,
|
|
23
|
+
)
|
|
24
|
+
from media_engine.batch.timing import (
|
|
25
|
+
calculate_queue_eta,
|
|
26
|
+
get_enabled_extractors_from_request,
|
|
27
|
+
load_timing_history,
|
|
28
|
+
save_timing_history,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
# Models
|
|
33
|
+
"BatchFileStatus",
|
|
34
|
+
"BatchJobStatus",
|
|
35
|
+
"BatchRequest",
|
|
36
|
+
"ExtractorTiming",
|
|
37
|
+
"JobProgress",
|
|
38
|
+
# State
|
|
39
|
+
"batch_jobs",
|
|
40
|
+
"batch_jobs_lock",
|
|
41
|
+
"batch_queue",
|
|
42
|
+
"batch_queue_lock",
|
|
43
|
+
"is_batch_running",
|
|
44
|
+
"set_batch_running",
|
|
45
|
+
# Queue
|
|
46
|
+
"cleanup_expired_batch_jobs",
|
|
47
|
+
"start_next_batch",
|
|
48
|
+
"update_queue_positions",
|
|
49
|
+
# Timing
|
|
50
|
+
"calculate_queue_eta",
|
|
51
|
+
"get_enabled_extractors_from_request",
|
|
52
|
+
"load_timing_history",
|
|
53
|
+
"save_timing_history",
|
|
54
|
+
# Processor
|
|
55
|
+
"run_batch_job",
|
|
56
|
+
]
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Pydantic models for batch processing."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class JobProgress(BaseModel):
|
|
10
|
+
"""Progress within current extraction step."""
|
|
11
|
+
|
|
12
|
+
message: str # e.g., "Loading model...", "Processing frame 2/5"
|
|
13
|
+
current: int | None = None
|
|
14
|
+
total: int | None = None
|
|
15
|
+
# ETA tracking
|
|
16
|
+
stage_elapsed_seconds: float | None = None # Time spent in current stage
|
|
17
|
+
eta_seconds: float | None = None # Estimated seconds remaining for current stage
|
|
18
|
+
# Total ETA fields (for full batch/queue visibility)
|
|
19
|
+
total_eta_seconds: float | None = None # Total time remaining for entire batch
|
|
20
|
+
queue_eta_seconds: float | None = None # Total time remaining for all queued batches
|
|
21
|
+
queued_batches: int | None = None # Number of batches waiting in queue
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# Job TTL for automatic cleanup (1 hour)
|
|
25
|
+
JOB_TTL_SECONDS = 3600
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class BatchRequest(BaseModel):
|
|
29
|
+
"""Request for batch extraction.
|
|
30
|
+
|
|
31
|
+
Model selection is configured via global settings (GET/PUT /settings).
|
|
32
|
+
This keeps batch requests simple and hardware config in one place.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
files: list[str]
|
|
36
|
+
enable_metadata: bool = True
|
|
37
|
+
enable_vad: bool = False # Voice Activity Detection
|
|
38
|
+
enable_scenes: bool = False
|
|
39
|
+
enable_transcript: bool = False
|
|
40
|
+
enable_faces: bool = False
|
|
41
|
+
enable_objects: bool = False # YOLO object detection (fast, bounding boxes)
|
|
42
|
+
enable_visual: bool = False # Qwen VLM scene descriptions (slower, richer)
|
|
43
|
+
enable_clip: bool = False
|
|
44
|
+
enable_ocr: bool = False
|
|
45
|
+
enable_motion: bool = False
|
|
46
|
+
|
|
47
|
+
# Context for Whisper
|
|
48
|
+
language: str | None = None # Force specific language (ISO 639-1 code, e.g., "en", "no")
|
|
49
|
+
language_hints: list[str] | None = None # Hints (currently unused by Whisper)
|
|
50
|
+
context_hint: str | None = None
|
|
51
|
+
# Context for Qwen VLM - per-file context mapping (file path -> context dict)
|
|
52
|
+
# Example: {"/path/video1.mp4": {"location": "Oslo"}, "/path/video2.mp4": {"location": "Bergen"}}
|
|
53
|
+
contexts: dict[str, dict[str, str]] | None = None
|
|
54
|
+
# Per-file timestamps for visual/VLM analysis (file path -> list of timestamps)
|
|
55
|
+
# Example: {"/path/video1.mp4": [10.0, 30.0], "/path/video2.mp4": [5.0, 15.0, 25.0]}
|
|
56
|
+
visual_timestamps: dict[str, list[float]] | None = None
|
|
57
|
+
# Optional LUT path for visual analysis (e.g., for log footage color correction)
|
|
58
|
+
# Applied to extracted frames before sending to Qwen
|
|
59
|
+
lut_path: str | None = None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class BatchFileStatus(BaseModel):
|
|
63
|
+
"""Status for a single file in a batch."""
|
|
64
|
+
|
|
65
|
+
file: str
|
|
66
|
+
filename: str
|
|
67
|
+
status: str # pending, running, completed, failed
|
|
68
|
+
results: dict[str, Any] = {}
|
|
69
|
+
error: str | None = None
|
|
70
|
+
timings: dict[str, float] = {} # extractor -> seconds
|
|
71
|
+
extractor_status: dict[str, str] = {} # extractor -> pending/active/completed/failed/skipped
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class ExtractorTiming(BaseModel):
|
|
75
|
+
"""Timing for a single extractor stage."""
|
|
76
|
+
|
|
77
|
+
extractor: str
|
|
78
|
+
started_at: datetime
|
|
79
|
+
completed_at: datetime | None = None
|
|
80
|
+
duration_seconds: float | None = None
|
|
81
|
+
files_processed: int = 0
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class BatchJobStatus(BaseModel):
|
|
85
|
+
"""Status of a batch extraction job."""
|
|
86
|
+
|
|
87
|
+
batch_id: str
|
|
88
|
+
status: str # queued, pending, running, completed, failed
|
|
89
|
+
queue_position: int | None = None # Position in queue (1 = next to run), None if not queued
|
|
90
|
+
current_extractor: str | None = None
|
|
91
|
+
progress: JobProgress | None = None
|
|
92
|
+
files: list[BatchFileStatus] = []
|
|
93
|
+
created_at: datetime
|
|
94
|
+
completed_at: datetime | None = None
|
|
95
|
+
# Timing and resource metrics
|
|
96
|
+
extractor_timings: list[ExtractorTiming] = []
|
|
97
|
+
elapsed_seconds: float | None = None
|
|
98
|
+
memory_mb: int | None = None # Current process memory
|
|
99
|
+
peak_memory_mb: int | None = None # Peak process memory during batch
|