media-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. cli/clip.py +79 -0
  2. cli/faces.py +91 -0
  3. cli/metadata.py +68 -0
  4. cli/motion.py +77 -0
  5. cli/objects.py +94 -0
  6. cli/ocr.py +93 -0
  7. cli/scenes.py +57 -0
  8. cli/telemetry.py +65 -0
  9. cli/transcript.py +76 -0
  10. media_engine/__init__.py +7 -0
  11. media_engine/_version.py +34 -0
  12. media_engine/app.py +80 -0
  13. media_engine/batch/__init__.py +56 -0
  14. media_engine/batch/models.py +99 -0
  15. media_engine/batch/processor.py +1131 -0
  16. media_engine/batch/queue.py +232 -0
  17. media_engine/batch/state.py +30 -0
  18. media_engine/batch/timing.py +321 -0
  19. media_engine/cli.py +17 -0
  20. media_engine/config.py +674 -0
  21. media_engine/extractors/__init__.py +75 -0
  22. media_engine/extractors/clip.py +401 -0
  23. media_engine/extractors/faces.py +459 -0
  24. media_engine/extractors/frame_buffer.py +351 -0
  25. media_engine/extractors/frames.py +402 -0
  26. media_engine/extractors/metadata/__init__.py +127 -0
  27. media_engine/extractors/metadata/apple.py +169 -0
  28. media_engine/extractors/metadata/arri.py +118 -0
  29. media_engine/extractors/metadata/avchd.py +208 -0
  30. media_engine/extractors/metadata/avchd_gps.py +270 -0
  31. media_engine/extractors/metadata/base.py +688 -0
  32. media_engine/extractors/metadata/blackmagic.py +139 -0
  33. media_engine/extractors/metadata/camera_360.py +276 -0
  34. media_engine/extractors/metadata/canon.py +290 -0
  35. media_engine/extractors/metadata/dji.py +371 -0
  36. media_engine/extractors/metadata/dv.py +121 -0
  37. media_engine/extractors/metadata/ffmpeg.py +76 -0
  38. media_engine/extractors/metadata/generic.py +119 -0
  39. media_engine/extractors/metadata/gopro.py +256 -0
  40. media_engine/extractors/metadata/red.py +305 -0
  41. media_engine/extractors/metadata/registry.py +114 -0
  42. media_engine/extractors/metadata/sony.py +442 -0
  43. media_engine/extractors/metadata/tesla.py +157 -0
  44. media_engine/extractors/motion.py +765 -0
  45. media_engine/extractors/objects.py +245 -0
  46. media_engine/extractors/objects_qwen.py +754 -0
  47. media_engine/extractors/ocr.py +268 -0
  48. media_engine/extractors/scenes.py +82 -0
  49. media_engine/extractors/shot_type.py +217 -0
  50. media_engine/extractors/telemetry.py +262 -0
  51. media_engine/extractors/transcribe.py +579 -0
  52. media_engine/extractors/translate.py +121 -0
  53. media_engine/extractors/vad.py +263 -0
  54. media_engine/main.py +68 -0
  55. media_engine/py.typed +0 -0
  56. media_engine/routers/__init__.py +15 -0
  57. media_engine/routers/batch.py +78 -0
  58. media_engine/routers/health.py +93 -0
  59. media_engine/routers/models.py +211 -0
  60. media_engine/routers/settings.py +87 -0
  61. media_engine/routers/utils.py +135 -0
  62. media_engine/schemas.py +581 -0
  63. media_engine/utils/__init__.py +5 -0
  64. media_engine/utils/logging.py +54 -0
  65. media_engine/utils/memory.py +49 -0
  66. media_engine-0.1.0.dist-info/METADATA +276 -0
  67. media_engine-0.1.0.dist-info/RECORD +70 -0
  68. media_engine-0.1.0.dist-info/WHEEL +4 -0
  69. media_engine-0.1.0.dist-info/entry_points.txt +11 -0
  70. media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,232 @@
1
+ """Queue management for batch processing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import threading
7
+ from datetime import datetime, timezone
8
+ from typing import TYPE_CHECKING
9
+
10
+ if TYPE_CHECKING:
11
+ from media_engine.batch.models import BatchRequest
12
+
13
+ from media_engine.batch.models import JOB_TTL_SECONDS
14
+ from media_engine.batch.state import (
15
+ batch_jobs,
16
+ batch_jobs_lock,
17
+ batch_queue,
18
+ batch_queue_lock,
19
+ set_batch_running,
20
+ )
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ def cleanup_expired_batch_jobs() -> int:
26
+ """Remove completed/failed batch jobs older than TTL.
27
+
28
+ Returns:
29
+ Number of batch jobs removed
30
+ """
31
+ now = datetime.now(timezone.utc)
32
+ removed = 0
33
+
34
+ with batch_jobs_lock:
35
+ expired = [
36
+ bid
37
+ for bid, batch in batch_jobs.items()
38
+ if batch.status in ("completed", "failed") and batch.completed_at is not None and (now - batch.completed_at).total_seconds() > JOB_TTL_SECONDS
39
+ ]
40
+ for bid in expired:
41
+ del batch_jobs[bid]
42
+ removed += 1
43
+
44
+ if removed > 0:
45
+ logger.info(f"Cleaned up {removed} expired batch jobs")
46
+
47
+ return removed
48
+
49
+
50
+ def update_queue_positions() -> None:
51
+ """Update queue_position for all queued batches."""
52
+ with batch_queue_lock:
53
+ with batch_jobs_lock:
54
+ for i, (bid, _) in enumerate(batch_queue):
55
+ if bid in batch_jobs:
56
+ batch_jobs[bid].queue_position = i + 1 # 1-indexed
57
+
58
+
59
+ def start_next_batch() -> None:
60
+ """Start the next batch from the queue if one exists.
61
+
62
+ Called when a batch completes or fails. Sets batch_running = False
63
+ if no more batches in queue.
64
+ """
65
+ from media_engine.batch.processor import run_batch_job
66
+
67
+ with batch_queue_lock:
68
+ if not batch_queue:
69
+ set_batch_running(False)
70
+ logger.info("Batch queue empty, no more batches to run")
71
+ return
72
+
73
+ # Pop the next batch from queue
74
+ next_batch_id, next_request = batch_queue.pop(0)
75
+ logger.info(f"Starting next batch from queue: {next_batch_id}")
76
+
77
+ # Update queue positions for remaining batches
78
+ update_queue_positions()
79
+
80
+ # Update batch status from queued to pending
81
+ with batch_jobs_lock:
82
+ if next_batch_id in batch_jobs:
83
+ batch_jobs[next_batch_id].status = "pending"
84
+ batch_jobs[next_batch_id].queue_position = None
85
+
86
+ # Start the batch in a new thread
87
+ thread = threading.Thread(target=run_batch_job, args=(next_batch_id, next_request))
88
+ thread.start()
89
+
90
+
91
+ def create_batch_sync(batch_id: str, request: BatchRequest) -> tuple[bool, int | None, str]:
92
+ """Synchronous helper to create batch (runs in thread pool).
93
+
94
+ Returns:
95
+ (should_start, queue_position, status)
96
+ """
97
+ from pathlib import Path
98
+
99
+ from media_engine.batch.models import BatchFileStatus, BatchJobStatus
100
+ from media_engine.batch.state import is_batch_running, set_batch_running
101
+
102
+ # Cleanup expired batch jobs
103
+ cleanup_expired_batch_jobs()
104
+
105
+ # Check if we should start immediately or queue
106
+ with batch_queue_lock:
107
+ should_start = not is_batch_running()
108
+ if should_start:
109
+ set_batch_running(True)
110
+ queue_position = None
111
+ status = "pending"
112
+ logger.info(f"Starting batch {batch_id} immediately (no batch running)")
113
+ else:
114
+ # Add to queue
115
+ batch_queue.append((batch_id, request))
116
+ queue_position = len(batch_queue)
117
+ status = "queued"
118
+ logger.info(f"Queued batch {batch_id} at position {queue_position}")
119
+
120
+ # Build initial extractor status for each file
121
+ # Order matches processing order in run_batch_job
122
+ # frame_decode is enabled if any visual extractor needs it
123
+ frame_decode_needed = any(
124
+ [
125
+ request.enable_objects,
126
+ request.enable_faces,
127
+ request.enable_ocr,
128
+ request.enable_clip,
129
+ ]
130
+ )
131
+ extractor_flags = [
132
+ ("metadata", request.enable_metadata),
133
+ ("telemetry", True), # Always runs
134
+ ("vad", request.enable_vad),
135
+ ("motion", request.enable_motion),
136
+ ("scenes", request.enable_scenes),
137
+ ("frame_decode", frame_decode_needed),
138
+ ("objects", request.enable_objects),
139
+ ("faces", request.enable_faces),
140
+ ("ocr", request.enable_ocr),
141
+ ("clip", request.enable_clip),
142
+ ("visual", request.enable_visual),
143
+ ("transcript", request.enable_transcript),
144
+ ]
145
+ initial_extractor_status = {name: "pending" if enabled else "skipped" for name, enabled in extractor_flags}
146
+
147
+ batch = BatchJobStatus(
148
+ batch_id=batch_id,
149
+ status=status,
150
+ queue_position=queue_position,
151
+ files=[
152
+ BatchFileStatus(
153
+ file=f,
154
+ filename=Path(f).name,
155
+ status="pending",
156
+ extractor_status=initial_extractor_status.copy(),
157
+ )
158
+ for f in request.files
159
+ ],
160
+ created_at=datetime.now(timezone.utc),
161
+ )
162
+
163
+ with batch_jobs_lock:
164
+ batch_jobs[batch_id] = batch
165
+
166
+ return should_start, queue_position, status
167
+
168
+
169
+ def get_batch_sync(batch_id: str, status_only: bool = False):
170
+ """Synchronous helper to get batch status (runs in thread pool).
171
+
172
+ Args:
173
+ batch_id: The batch ID to look up
174
+ status_only: If True, return status/progress without large result data
175
+ """
176
+ from media_engine.batch.models import BatchFileStatus, BatchJobStatus
177
+
178
+ with batch_jobs_lock:
179
+ batch = batch_jobs.get(batch_id)
180
+ if batch is None:
181
+ return None
182
+
183
+ if status_only:
184
+ # Return a copy with results stripped out (keep status, progress, timings)
185
+ return BatchJobStatus(
186
+ batch_id=batch.batch_id,
187
+ status=batch.status,
188
+ queue_position=batch.queue_position,
189
+ current_extractor=batch.current_extractor,
190
+ progress=batch.progress,
191
+ files=[
192
+ BatchFileStatus(
193
+ file=f.file,
194
+ filename=f.filename,
195
+ status=f.status,
196
+ results={}, # Empty - no large data
197
+ error=f.error,
198
+ timings=f.timings,
199
+ extractor_status=f.extractor_status,
200
+ )
201
+ for f in batch.files
202
+ ],
203
+ created_at=batch.created_at,
204
+ completed_at=batch.completed_at,
205
+ extractor_timings=batch.extractor_timings,
206
+ elapsed_seconds=batch.elapsed_seconds,
207
+ memory_mb=batch.memory_mb,
208
+ peak_memory_mb=batch.peak_memory_mb,
209
+ )
210
+
211
+ return batch
212
+
213
+
214
+ def delete_batch_sync(batch_id: str) -> tuple[bool, bool]:
215
+ """Synchronous helper to delete batch (runs in thread pool).
216
+
217
+ Returns:
218
+ (found, was_queued) - whether batch was found and if it was queued
219
+ """
220
+ with batch_jobs_lock:
221
+ if batch_id not in batch_jobs:
222
+ return False, False
223
+ was_queued = batch_jobs[batch_id].status == "queued"
224
+ del batch_jobs[batch_id]
225
+
226
+ # If it was queued, remove from queue and update positions
227
+ if was_queued:
228
+ with batch_queue_lock:
229
+ batch_queue[:] = [(bid, req) for bid, req in batch_queue if bid != batch_id]
230
+ update_queue_positions()
231
+
232
+ return True, was_queued
@@ -0,0 +1,30 @@
1
+ """Global state for batch processing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import threading
6
+ from typing import TYPE_CHECKING
7
+
8
+ if TYPE_CHECKING:
9
+ from media_engine.batch.models import BatchJobStatus, BatchRequest
10
+
11
+ # In-memory batch store
12
+ batch_jobs: dict[str, BatchJobStatus] = {}
13
+ batch_jobs_lock = threading.Lock()
14
+
15
+ # Batch queue - only one batch runs at a time, others wait in queue
16
+ batch_queue: list[tuple[str, BatchRequest]] = [] # (batch_id, request) tuples
17
+ batch_queue_lock = threading.Lock()
18
+
19
+ # Use mutable container for batch_running state
20
+ _batch_state = {"running": False}
21
+
22
+
23
+ def is_batch_running() -> bool:
24
+ """Check if a batch is currently running."""
25
+ return _batch_state["running"]
26
+
27
+
28
+ def set_batch_running(value: bool) -> None:
29
+ """Set the batch running state."""
30
+ _batch_state["running"] = value
@@ -0,0 +1,321 @@
1
+ """ETA prediction system for batch processing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ import threading
8
+ import time
9
+ from pathlib import Path
10
+ from typing import TYPE_CHECKING
11
+
12
+ if TYPE_CHECKING:
13
+ from media_engine.batch.models import BatchRequest
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # Historical timing data for ETA predictions
18
+ # Key: (extractor, resolution_bucket) -> list of processing times in seconds
19
+ _timing_history: dict[tuple[str, str], list[float]] = {}
20
+ _timing_history_lock = threading.Lock()
21
+ _timing_history_dirty = False # Track if we need to save
22
+ _timing_history_last_save = 0.0 # Last save timestamp
23
+
24
+ # Keep last N samples per bucket for rolling average
25
+ _MAX_TIMING_SAMPLES = 20
26
+
27
+ # Timing history persistence
28
+ _TIMING_HISTORY_FILE = Path.home() / ".config" / "polybos" / "timing_history.json"
29
+ _TIMING_SAVE_INTERVAL = 30.0 # Save at most every 30 seconds
30
+
31
+
32
+ def load_timing_history() -> None:
33
+ """Load timing history from disk on startup."""
34
+ global _timing_history
35
+ if not _TIMING_HISTORY_FILE.exists():
36
+ return
37
+ try:
38
+ with open(_TIMING_HISTORY_FILE) as f:
39
+ data = json.load(f)
40
+ # Convert string keys back to tuples
41
+ with _timing_history_lock:
42
+ for key_str, values in data.items():
43
+ # Key format: "extractor|resolution"
44
+ parts = key_str.split("|")
45
+ if len(parts) == 2:
46
+ _timing_history[(parts[0], parts[1])] = values[-_MAX_TIMING_SAMPLES:]
47
+ logger.info(f"Loaded timing history: {len(_timing_history)} buckets")
48
+ except Exception as e:
49
+ logger.warning(f"Failed to load timing history: {e}")
50
+
51
+
52
+ def save_timing_history() -> None:
53
+ """Save timing history to disk."""
54
+ global _timing_history_dirty, _timing_history_last_save
55
+ with _timing_history_lock:
56
+ if not _timing_history:
57
+ return
58
+ # Convert tuple keys to strings for JSON
59
+ data = {f"{k[0]}|{k[1]}": v for k, v in _timing_history.items()}
60
+ try:
61
+ _TIMING_HISTORY_FILE.parent.mkdir(parents=True, exist_ok=True)
62
+ with open(_TIMING_HISTORY_FILE, "w") as f:
63
+ json.dump(data, f, indent=2)
64
+ _timing_history_dirty = False
65
+ _timing_history_last_save = time.time()
66
+ logger.debug(f"Saved timing history: {len(data)} buckets")
67
+ except Exception as e:
68
+ logger.warning(f"Failed to save timing history: {e}")
69
+
70
+
71
+ def get_resolution_bucket(width: int | None, height: int | None) -> str:
72
+ """Get resolution bucket for timing predictions."""
73
+ if width is None or height is None:
74
+ return "unknown"
75
+ pixels = width * height
76
+ if pixels <= 921600: # 1280x720
77
+ return "720p"
78
+ elif pixels <= 2073600: # 1920x1080
79
+ return "1080p"
80
+ elif pixels <= 3686400: # 2560x1440
81
+ return "1440p"
82
+ elif pixels <= 8294400: # 3840x2160
83
+ return "4k"
84
+ elif pixels <= 14745600: # 5120x2880
85
+ return "5k"
86
+ else:
87
+ return "8k+"
88
+
89
+
90
+ def record_timing(
91
+ extractor: str,
92
+ resolution_bucket: str,
93
+ seconds: float,
94
+ units: float | None = None,
95
+ ) -> None:
96
+ """Record processing rate for future ETA predictions.
97
+
98
+ Args:
99
+ extractor: Name of the extractor (transcript, visual, objects, etc.)
100
+ resolution_bucket: Resolution category (720p, 1080p, 4k, etc.)
101
+ seconds: Wall clock time to process
102
+ units: Normalization units - depends on extractor:
103
+ - transcript: duration in minutes (stores seconds per minute)
104
+ - visual: number of timestamps (stores seconds per timestamp)
105
+ - objects/faces/ocr/clip: number of frames (stores seconds per frame)
106
+ - If None, stores raw seconds (for metadata, telemetry, etc.)
107
+ """
108
+ global _timing_history_dirty
109
+
110
+ # Calculate rate (seconds per unit) or use raw seconds
111
+ if units and units > 0:
112
+ rate = seconds / units
113
+ else:
114
+ rate = seconds
115
+
116
+ key = (extractor, resolution_bucket)
117
+ with _timing_history_lock:
118
+ if key not in _timing_history:
119
+ _timing_history[key] = []
120
+ _timing_history[key].append(rate)
121
+ # Keep only recent samples
122
+ if len(_timing_history[key]) > _MAX_TIMING_SAMPLES:
123
+ _timing_history[key] = _timing_history[key][-_MAX_TIMING_SAMPLES:]
124
+ sample_count = len(_timing_history[key])
125
+ avg = sum(_timing_history[key]) / sample_count
126
+ _timing_history_dirty = True
127
+
128
+ unit_label = "/unit" if units else "s"
129
+ logger.debug(f"Recorded timing: {extractor}@{resolution_bucket} = {rate:.2f}{unit_label} " f"(avg: {avg:.2f}{unit_label} from {sample_count} samples)")
130
+ # Save periodically (not on every update to reduce disk I/O)
131
+ if _timing_history_dirty and time.time() - _timing_history_last_save > _TIMING_SAVE_INTERVAL:
132
+ save_timing_history()
133
+
134
+
135
+ def get_predicted_rate(extractor: str, resolution_bucket: str) -> float | None:
136
+ """Get predicted processing rate based on historical data.
137
+
138
+ Returns the average rate (seconds per unit) for the given extractor and resolution.
139
+ Multiply by the number of units to get predicted time.
140
+ """
141
+ key = (extractor, resolution_bucket)
142
+ with _timing_history_lock:
143
+ if key in _timing_history and _timing_history[key]:
144
+ return sum(_timing_history[key]) / len(_timing_history[key])
145
+ return None
146
+
147
+
148
+ # Default processing rates (seconds per unit) when no historical data
149
+ # Used as fallback for ETA predictions
150
+ DEFAULT_RATES: dict[str, float] = {
151
+ "metadata": 1.0, # ~1 second per file
152
+ "telemetry": 0.5, # ~0.5 seconds per file
153
+ "vad": 0.5, # ~0.5 seconds per minute of video
154
+ # Sub-extractors within visual_processing (per frame rates)
155
+ "motion": 0.5, # ~0.5 seconds per file (analyzes whole video)
156
+ "scenes": 0.3, # ~0.3 seconds per file
157
+ "frame_decode": 0.05, # ~0.05 seconds per frame
158
+ "objects": 0.3, # ~0.3 seconds per frame (YOLO)
159
+ "faces": 0.2, # ~0.2 seconds per frame
160
+ "ocr": 0.3, # ~0.3 seconds per frame
161
+ "clip": 0.15, # ~0.15 seconds per frame
162
+ # Separate stages
163
+ "visual": 5.0, # ~5 seconds per timestamp (Qwen VLM is slow)
164
+ "transcript": 3.0, # ~3 seconds per minute of video
165
+ }
166
+
167
+ # Extractor processing order - must match run_batch_job()
168
+ EXTRACTOR_ORDER = [
169
+ "metadata",
170
+ "telemetry",
171
+ "vad",
172
+ "visual_processing", # Combined: motion, scenes, frame_decode, objects, faces, ocr, clip
173
+ "visual", # Qwen VLM
174
+ "transcript",
175
+ ]
176
+
177
+
178
+ def predict_extractor_time(
179
+ extractor: str,
180
+ resolution_bucket: str,
181
+ duration_seconds: float,
182
+ num_frames: int | None = None,
183
+ num_timestamps: int | None = None,
184
+ enabled_sub_extractors: set[str] | None = None,
185
+ ) -> float:
186
+ """Predict processing time for a single extractor on a single file.
187
+
188
+ Args:
189
+ extractor: Name of the extractor
190
+ resolution_bucket: Resolution category (720p, 1080p, 4k, etc.)
191
+ duration_seconds: Video duration in seconds
192
+ num_frames: Number of frames to process (for frame-based extractors)
193
+ num_timestamps: Number of timestamps for visual/VLM analysis
194
+ enabled_sub_extractors: For visual_processing, which sub-extractors are enabled
195
+
196
+ Returns:
197
+ Predicted processing time in seconds
198
+ """
199
+ # Duration in minutes for duration-based extractors
200
+ duration_minutes = duration_seconds / 60.0
201
+
202
+ # Extractors that scale with duration
203
+ if extractor in ("vad", "transcript"):
204
+ rate = get_predicted_rate(extractor, resolution_bucket)
205
+ if rate is None:
206
+ rate = DEFAULT_RATES.get(extractor, 1.0)
207
+ return rate * duration_minutes
208
+
209
+ # visual_processing: sum up time for each enabled sub-extractor
210
+ if extractor == "visual_processing":
211
+ total_time = 0.0
212
+ sub_extractors = enabled_sub_extractors or {"motion", "scenes", "frame_decode", "objects", "faces", "ocr", "clip"}
213
+
214
+ # Smart sampling typically uses ~20-50 frames, not duration*2
215
+ # Use a more conservative estimate
216
+ estimated_frames = num_frames if num_frames else min(50, max(10, int(duration_seconds / 2)))
217
+
218
+ for sub in sub_extractors:
219
+ rate = get_predicted_rate(sub, resolution_bucket)
220
+ if rate is None:
221
+ rate = DEFAULT_RATES.get(sub, 0.1)
222
+
223
+ # motion, scenes store raw seconds per file
224
+ # frame_decode, objects, faces, ocr, clip store seconds per frame
225
+ if sub in ("motion", "scenes"):
226
+ total_time += rate # raw seconds per file
227
+ else:
228
+ total_time += rate * estimated_frames # per-frame rate × frame count
229
+
230
+ return total_time
231
+
232
+ # Visual/Qwen scales with timestamps
233
+ if extractor == "visual":
234
+ rate = get_predicted_rate(extractor, resolution_bucket)
235
+ if rate is None:
236
+ rate = DEFAULT_RATES.get(extractor, 5.0)
237
+ timestamps = num_timestamps if num_timestamps else 5
238
+ return rate * timestamps
239
+
240
+ # Fixed-time extractors (metadata, telemetry)
241
+ rate = get_predicted_rate(extractor, resolution_bucket)
242
+ if rate is None:
243
+ rate = DEFAULT_RATES.get(extractor, 1.0)
244
+ return rate
245
+
246
+
247
+ def get_enabled_extractors_from_request(
248
+ request: BatchRequest,
249
+ ) -> tuple[set[str], set[str]]:
250
+ """Get the set of enabled extractors from a batch request.
251
+
252
+ Returns:
253
+ Tuple of (main_extractors, sub_extractors within visual_processing)
254
+ """
255
+ enabled = {"metadata", "telemetry"} # Always enabled
256
+ sub_extractors: set[str] = set()
257
+
258
+ if request.enable_vad:
259
+ enabled.add("vad")
260
+
261
+ # Track which sub-extractors are enabled within visual_processing
262
+ if request.enable_motion:
263
+ sub_extractors.add("motion")
264
+ if request.enable_scenes:
265
+ sub_extractors.add("scenes")
266
+ if request.enable_objects:
267
+ sub_extractors.update({"frame_decode", "objects"})
268
+ if request.enable_faces:
269
+ sub_extractors.update({"frame_decode", "faces"})
270
+ if request.enable_ocr:
271
+ sub_extractors.update({"frame_decode", "ocr"})
272
+ if request.enable_clip:
273
+ sub_extractors.update({"frame_decode", "clip"})
274
+
275
+ # visual_processing runs if any sub-extractor is enabled
276
+ if sub_extractors:
277
+ enabled.add("visual_processing")
278
+
279
+ if request.enable_visual:
280
+ enabled.add("visual")
281
+ if request.enable_transcript:
282
+ enabled.add("transcript")
283
+
284
+ return enabled, sub_extractors
285
+
286
+
287
+ def calculate_queue_eta() -> tuple[float, int]:
288
+ """Calculate total ETA for all queued batches.
289
+
290
+ Returns: (total_seconds, batch_count)
291
+ """
292
+ from media_engine.batch.state import batch_queue, batch_queue_lock
293
+
294
+ total_eta = 0.0
295
+ batch_count = 0
296
+
297
+ with batch_queue_lock:
298
+ for _, request in batch_queue:
299
+ batch_count += 1
300
+ enabled, sub_extractors = get_enabled_extractors_from_request(request)
301
+
302
+ # Estimate time for each file in queued batch
303
+ for _file_path in request.files:
304
+ # Use average duration estimate if metadata not yet available
305
+ duration = 60.0 # Default: 1 minute estimate
306
+ resolution = "1080p" # Default resolution
307
+
308
+ for ext in enabled:
309
+ if ext in EXTRACTOR_ORDER:
310
+ total_eta += predict_extractor_time(
311
+ ext,
312
+ resolution,
313
+ duration,
314
+ enabled_sub_extractors=sub_extractors if ext == "visual_processing" else None,
315
+ )
316
+
317
+ return total_eta, batch_count
318
+
319
+
320
+ # Load timing history on module import
321
+ load_timing_history()
media_engine/cli.py ADDED
@@ -0,0 +1,17 @@
1
+ """CLI entry point for meng-server."""
2
+
3
+ import uvicorn
4
+
5
+
6
+ def run_server() -> None:
7
+ """Run the Media Engine API server."""
8
+ uvicorn.run(
9
+ "media_engine.main:app",
10
+ host="0.0.0.0",
11
+ port=8001,
12
+ reload=False,
13
+ )
14
+
15
+
16
+ if __name__ == "__main__":
17
+ run_server()