media-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. cli/clip.py +79 -0
  2. cli/faces.py +91 -0
  3. cli/metadata.py +68 -0
  4. cli/motion.py +77 -0
  5. cli/objects.py +94 -0
  6. cli/ocr.py +93 -0
  7. cli/scenes.py +57 -0
  8. cli/telemetry.py +65 -0
  9. cli/transcript.py +76 -0
  10. media_engine/__init__.py +7 -0
  11. media_engine/_version.py +34 -0
  12. media_engine/app.py +80 -0
  13. media_engine/batch/__init__.py +56 -0
  14. media_engine/batch/models.py +99 -0
  15. media_engine/batch/processor.py +1131 -0
  16. media_engine/batch/queue.py +232 -0
  17. media_engine/batch/state.py +30 -0
  18. media_engine/batch/timing.py +321 -0
  19. media_engine/cli.py +17 -0
  20. media_engine/config.py +674 -0
  21. media_engine/extractors/__init__.py +75 -0
  22. media_engine/extractors/clip.py +401 -0
  23. media_engine/extractors/faces.py +459 -0
  24. media_engine/extractors/frame_buffer.py +351 -0
  25. media_engine/extractors/frames.py +402 -0
  26. media_engine/extractors/metadata/__init__.py +127 -0
  27. media_engine/extractors/metadata/apple.py +169 -0
  28. media_engine/extractors/metadata/arri.py +118 -0
  29. media_engine/extractors/metadata/avchd.py +208 -0
  30. media_engine/extractors/metadata/avchd_gps.py +270 -0
  31. media_engine/extractors/metadata/base.py +688 -0
  32. media_engine/extractors/metadata/blackmagic.py +139 -0
  33. media_engine/extractors/metadata/camera_360.py +276 -0
  34. media_engine/extractors/metadata/canon.py +290 -0
  35. media_engine/extractors/metadata/dji.py +371 -0
  36. media_engine/extractors/metadata/dv.py +121 -0
  37. media_engine/extractors/metadata/ffmpeg.py +76 -0
  38. media_engine/extractors/metadata/generic.py +119 -0
  39. media_engine/extractors/metadata/gopro.py +256 -0
  40. media_engine/extractors/metadata/red.py +305 -0
  41. media_engine/extractors/metadata/registry.py +114 -0
  42. media_engine/extractors/metadata/sony.py +442 -0
  43. media_engine/extractors/metadata/tesla.py +157 -0
  44. media_engine/extractors/motion.py +765 -0
  45. media_engine/extractors/objects.py +245 -0
  46. media_engine/extractors/objects_qwen.py +754 -0
  47. media_engine/extractors/ocr.py +268 -0
  48. media_engine/extractors/scenes.py +82 -0
  49. media_engine/extractors/shot_type.py +217 -0
  50. media_engine/extractors/telemetry.py +262 -0
  51. media_engine/extractors/transcribe.py +579 -0
  52. media_engine/extractors/translate.py +121 -0
  53. media_engine/extractors/vad.py +263 -0
  54. media_engine/main.py +68 -0
  55. media_engine/py.typed +0 -0
  56. media_engine/routers/__init__.py +15 -0
  57. media_engine/routers/batch.py +78 -0
  58. media_engine/routers/health.py +93 -0
  59. media_engine/routers/models.py +211 -0
  60. media_engine/routers/settings.py +87 -0
  61. media_engine/routers/utils.py +135 -0
  62. media_engine/schemas.py +581 -0
  63. media_engine/utils/__init__.py +5 -0
  64. media_engine/utils/logging.py +54 -0
  65. media_engine/utils/memory.py +49 -0
  66. media_engine-0.1.0.dist-info/METADATA +276 -0
  67. media_engine-0.1.0.dist-info/RECORD +70 -0
  68. media_engine-0.1.0.dist-info/WHEEL +4 -0
  69. media_engine-0.1.0.dist-info/entry_points.txt +11 -0
  70. media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,581 @@
1
+ """Pydantic schemas for request/response models."""
2
+
3
+ from datetime import datetime
4
+ from enum import StrEnum
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+
9
+ class MediaDeviceType(StrEnum):
10
+ """Type of media capture device."""
11
+
12
+ DRONE = "drone"
13
+ CAMERA = "camera"
14
+ CINEMA_CAMERA = "cinema_camera"
15
+ PHONE = "phone"
16
+ ACTION_CAMERA = "action_camera"
17
+ CAMERA_360 = "360_camera"
18
+ DASHCAM = "dashcam"
19
+ UNKNOWN = "unknown"
20
+
21
+
22
+ class DetectionMethod(StrEnum):
23
+ """Method used for detection."""
24
+
25
+ METADATA = "metadata"
26
+ XML_SIDECAR = "xml_sidecar"
27
+ CLIP = "clip"
28
+
29
+
30
+ class MediaType(StrEnum):
31
+ """Type of media file."""
32
+
33
+ VIDEO = "video"
34
+ IMAGE = "image"
35
+ AUDIO = "audio"
36
+ UNKNOWN = "unknown"
37
+
38
+
39
+ # File extension sets for media type detection (matches Rust MediaType::from_extension)
40
+ VIDEO_EXTENSIONS: set[str] = {
41
+ ".mp4",
42
+ ".mov",
43
+ ".mxf",
44
+ ".avi",
45
+ ".mkv",
46
+ ".m4v",
47
+ ".webm",
48
+ ".mts",
49
+ ".m2ts",
50
+ ".ts",
51
+ ".vob",
52
+ ".mpg",
53
+ ".mpeg",
54
+ ".wmv",
55
+ ".flv",
56
+ # RAW video formats
57
+ ".braw",
58
+ ".r3d",
59
+ ".ari",
60
+ }
61
+
62
+ IMAGE_EXTENSIONS: set[str] = {
63
+ ".jpg",
64
+ ".jpeg",
65
+ ".png",
66
+ ".gif",
67
+ ".webp",
68
+ ".heic",
69
+ ".heif",
70
+ ".tiff",
71
+ ".tif",
72
+ ".bmp",
73
+ # RAW image formats
74
+ ".arw",
75
+ ".cr2",
76
+ ".cr3",
77
+ ".nef",
78
+ ".dng",
79
+ ".raf",
80
+ ".orf",
81
+ ".rw2",
82
+ ".pef",
83
+ ".srw",
84
+ ".x3f",
85
+ }
86
+
87
+ AUDIO_EXTENSIONS: set[str] = {
88
+ ".wav",
89
+ ".mp3",
90
+ ".aac",
91
+ ".m4a",
92
+ ".flac",
93
+ ".ogg",
94
+ ".aiff",
95
+ ".wma",
96
+ ".opus",
97
+ ".ape",
98
+ ".wv",
99
+ }
100
+
101
+
102
+ def get_media_type(file_path: str) -> MediaType:
103
+ """Determine media type from file extension."""
104
+ from pathlib import Path
105
+
106
+ ext = Path(file_path).suffix.lower()
107
+ if ext in VIDEO_EXTENSIONS:
108
+ return MediaType.VIDEO
109
+ elif ext in IMAGE_EXTENSIONS:
110
+ return MediaType.IMAGE
111
+ elif ext in AUDIO_EXTENSIONS:
112
+ return MediaType.AUDIO
113
+ return MediaType.UNKNOWN
114
+
115
+
116
+ # === Request Models ===
117
+
118
+
119
+ # === Response Models ===
120
+
121
+
122
+ class Resolution(BaseModel):
123
+ """Video resolution."""
124
+
125
+ width: int
126
+ height: int
127
+
128
+
129
+ class VideoCodec(BaseModel):
130
+ """Video codec details."""
131
+
132
+ name: str # h264, hevc, prores, etc.
133
+ profile: str | None = None # Main 10, High, etc.
134
+ bit_depth: int | None = None # 8, 10, 12
135
+ pixel_format: str | None = None # yuv420p, yuv420p10le, etc.
136
+
137
+
138
+ class AudioInfo(BaseModel):
139
+ """Audio stream information."""
140
+
141
+ codec: str | None = None # pcm_s16be, aac, etc.
142
+ sample_rate: int | None = None # 48000, 44100, etc.
143
+ channels: int | None = None # 1, 2, 6, etc.
144
+ bit_depth: int | None = None # 16, 24, 32
145
+ bitrate: int | None = None # Audio bitrate in bps
146
+
147
+
148
+ class Codec(BaseModel):
149
+ """Video/audio codec info (simplified for backwards compat)."""
150
+
151
+ video: str | None = None
152
+ audio: str | None = None
153
+
154
+
155
+ class GPS(BaseModel):
156
+ """GPS coordinates."""
157
+
158
+ latitude: float
159
+ longitude: float
160
+ altitude: float | None = None
161
+
162
+
163
+ class GPSTrackPoint(BaseModel):
164
+ """Single point in a GPS track."""
165
+
166
+ latitude: float
167
+ longitude: float
168
+ altitude: float | None = None
169
+ timestamp: float | None = None # Video timestamp in seconds
170
+
171
+
172
+ class GPSTrack(BaseModel):
173
+ """GPS track extracted from video."""
174
+
175
+ points: list[GPSTrackPoint]
176
+ source: str # Source of track data (e.g., "avchd_sei", "srt_sidecar")
177
+
178
+ @property
179
+ def count(self) -> int:
180
+ """Number of points in track."""
181
+ return len(self.points)
182
+
183
+ @property
184
+ def bounds(self) -> dict[str, float] | None:
185
+ """Bounding box of track (min/max lat/lon)."""
186
+ if not self.points:
187
+ return None
188
+ lats = [p.latitude for p in self.points]
189
+ lons = [p.longitude for p in self.points]
190
+ return {
191
+ "min_lat": min(lats),
192
+ "max_lat": max(lats),
193
+ "min_lon": min(lons),
194
+ "max_lon": max(lons),
195
+ }
196
+
197
+
198
+ class ColorSpace(BaseModel):
199
+ """Color space information for LOG/HDR footage."""
200
+
201
+ transfer: str | None = None # Gamma/transfer function (e.g., "slog3", "bt709", "hlg")
202
+ primaries: str | None = None # Color primaries (e.g., "sgamut3", "bt709", "bt2020")
203
+ matrix: str | None = None # Color matrix (e.g., "bt709", "bt2020nc")
204
+ lut_file: str | None = None # Reference to LUT file for conversion
205
+ detection_method: DetectionMethod = DetectionMethod.METADATA
206
+
207
+
208
+ class Stereo3DMode(StrEnum):
209
+ """3D video format/layout."""
210
+
211
+ MVC = "mvc" # H.264 Multiview Video Coding (3D Blu-ray, consumer 3D camcorders)
212
+ SIDE_BY_SIDE = "side_by_side" # Left/right frames side by side (half width each)
213
+ SIDE_BY_SIDE_FULL = "side_by_side_full" # Full width SBS (doubled width)
214
+ TOP_BOTTOM = "top_bottom" # Left/right frames stacked (half height each)
215
+ TOP_BOTTOM_FULL = "top_bottom_full" # Full height TAB (doubled height)
216
+ FRAME_SEQUENTIAL = "frame_sequential" # Alternating L/R frames
217
+ DUAL_STREAM = "dual_stream" # Separate files for each eye
218
+
219
+
220
+ class Stereo3D(BaseModel):
221
+ """Stereoscopic 3D video information."""
222
+
223
+ mode: Stereo3DMode
224
+ eye_count: int = 2 # Number of views (usually 2)
225
+ has_left_eye: bool = True
226
+ has_right_eye: bool = True
227
+ detection_method: DetectionMethod = DetectionMethod.METADATA
228
+
229
+
230
+ class LensInfo(BaseModel):
231
+ """Lens and camera settings."""
232
+
233
+ model: str | None = None # Lens model name (e.g., "XT14X5.8")
234
+ focal_length: float | None = None # Focal length in mm
235
+ focal_length_35mm: float | None = None # 35mm equivalent focal length
236
+ aperture: float | None = None # f-number (e.g., 2.8)
237
+ focus_distance: float | None = None # Focus distance in meters
238
+ iris: str | None = None # Iris setting as string (e.g., "F2.8")
239
+ detection_method: DetectionMethod = DetectionMethod.METADATA
240
+
241
+
242
+ class DeviceInfo(BaseModel):
243
+ """Source device information."""
244
+
245
+ make: str | None = None
246
+ model: str | None = None
247
+ serial_number: str | None = None
248
+ software: str | None = None
249
+ type: MediaDeviceType | None = None
250
+ detection_method: DetectionMethod = DetectionMethod.METADATA
251
+ confidence: float = 1.0
252
+
253
+
254
+ class ShotType(BaseModel):
255
+ """Shot type classification."""
256
+
257
+ primary: str # aerial, interview, b-roll, studio, etc.
258
+ confidence: float
259
+ detection_method: str = "clip"
260
+
261
+
262
+ class KeyframeInfo(BaseModel):
263
+ """Keyframe (I-frame) information from video stream.
264
+
265
+ Useful for detecting scene cuts: irregular keyframe intervals
266
+ often indicate actual cuts, while fixed intervals (e.g., every 2s)
267
+ indicate standard GOP compression.
268
+ """
269
+
270
+ timestamps: list[float] # Keyframe timestamps in seconds
271
+ count: int # Number of keyframes
272
+ is_fixed_interval: bool # True if keyframes are at regular intervals (GOP)
273
+ avg_interval: float | None = None # Average interval between keyframes
274
+
275
+
276
+ class SpannedRecording(BaseModel):
277
+ """Information about spanned recordings (e.g., AVCHD files split at 2GB).
278
+
279
+ When a camera splits a long recording across multiple files, this tracks
280
+ which files belong together and the total recording duration.
281
+ """
282
+
283
+ is_continuation: bool # True if this file is NOT the first of the recording
284
+ sibling_files: list[str] # Other files in this recording (filenames only)
285
+ total_duration: float # Total duration of the complete recording in seconds
286
+ file_index: int # Position of this file in the recording (0-based)
287
+
288
+
289
+ class Metadata(BaseModel):
290
+ """Video metadata."""
291
+
292
+ duration: float
293
+ resolution: Resolution
294
+ codec: Codec # Simplified codec info for backwards compat
295
+ video_codec: VideoCodec | None = None # Detailed video codec info
296
+ audio: AudioInfo | None = None # Audio stream info
297
+ fps: float | None = None
298
+ bitrate: int | None = None # Total bitrate in bps
299
+ file_size: int # File size in bytes
300
+ timecode: str | None = None # Start timecode (e.g., "01:15:07:17")
301
+ created_at: datetime | None = None
302
+ device: DeviceInfo | None = None
303
+ gps: GPS | None = None
304
+ gps_track: GPSTrack | None = None # Full GPS track if available
305
+ color_space: ColorSpace | None = None
306
+ lens: LensInfo | None = None
307
+ shot_type: ShotType | None = None
308
+ keyframes: KeyframeInfo | None = None
309
+ spanned_recording: SpannedRecording | None = None # For split recordings (AVCHD)
310
+ stereo_3d: Stereo3D | None = None # Stereoscopic 3D video info
311
+
312
+
313
+ class TranscriptSegment(BaseModel):
314
+ """Single transcript segment."""
315
+
316
+ start: float
317
+ end: float
318
+ text: str
319
+ speaker: str | None = None # Speaker ID from diarization (e.g., "SPEAKER_00")
320
+
321
+
322
+ class TranscriptHints(BaseModel):
323
+ """Language hints used during transcription."""
324
+
325
+ language_hints: list[str] = Field(default_factory=list)
326
+ context_hint: str | None = None
327
+ fallback_applied: bool = False
328
+
329
+
330
+ class Transcript(BaseModel):
331
+ """Full transcript result."""
332
+
333
+ language: str
334
+ confidence: float
335
+ duration: float
336
+ speaker_count: int | None = None # Number of speakers detected (None if diarization disabled)
337
+ hints_used: TranscriptHints
338
+ segments: list[TranscriptSegment]
339
+
340
+
341
+ class BoundingBox(BaseModel):
342
+ """Bounding box for detected objects."""
343
+
344
+ x: int
345
+ y: int
346
+ width: int
347
+ height: int
348
+
349
+
350
+ class FaceDetection(BaseModel):
351
+ """Single face detection."""
352
+
353
+ timestamp: float
354
+ bbox: BoundingBox
355
+ confidence: float
356
+ embedding: list[float]
357
+ image_base64: str | None = None # Base64-encoded JPEG of cropped face
358
+ needs_review: bool = False # Flag for uncertain detections
359
+ review_reason: str | None = None # Why review is needed
360
+
361
+
362
+ class FacesResult(BaseModel):
363
+ """Face detection results."""
364
+
365
+ count: int
366
+ unique_estimate: int
367
+ detections: list[FaceDetection]
368
+
369
+
370
+ class SceneDetection(BaseModel):
371
+ """Single scene segment."""
372
+
373
+ index: int
374
+ start: float
375
+ end: float
376
+ duration: float
377
+
378
+
379
+ class ScenesResult(BaseModel):
380
+ """Scene detection results."""
381
+
382
+ count: int
383
+ detections: list[SceneDetection]
384
+
385
+
386
+ class ObjectDetection(BaseModel):
387
+ """Single object detection."""
388
+
389
+ timestamp: float
390
+ label: str
391
+ confidence: float
392
+ bbox: BoundingBox
393
+
394
+
395
+ class ObjectsResult(BaseModel):
396
+ """Object detection results."""
397
+
398
+ summary: dict[str, int]
399
+ detections: list[ObjectDetection]
400
+ descriptions: list[str] | None = None # Scene descriptions from VLM
401
+ error: str | None = None # Error code if extraction failed (e.g., "out_of_memory")
402
+
403
+
404
+ class ClipSegment(BaseModel):
405
+ """CLIP embedding for a segment."""
406
+
407
+ start: float
408
+ end: float
409
+ scene_index: int | None = None
410
+ embedding: list[float]
411
+
412
+
413
+ class ClipResult(BaseModel):
414
+ """CLIP embedding results."""
415
+
416
+ model: str
417
+ segments: list[ClipSegment]
418
+
419
+
420
+ class OcrDetection(BaseModel):
421
+ """Single OCR detection."""
422
+
423
+ timestamp: float
424
+ text: str
425
+ confidence: float
426
+ bbox: BoundingBox
427
+
428
+
429
+ class OcrResult(BaseModel):
430
+ """OCR results."""
431
+
432
+ detections: list[OcrDetection]
433
+
434
+
435
+ class MotionSegment(BaseModel):
436
+ """A segment of video with consistent camera motion."""
437
+
438
+ start: float
439
+ end: float
440
+ motion_type: str # static, pan_left, pan_right, tilt_up, tilt_down, zoom_in, zoom_out, handheld
441
+ intensity: float # Average flow magnitude
442
+
443
+
444
+ class MotionResult(BaseModel):
445
+ """Camera motion analysis results."""
446
+
447
+ duration: float
448
+ fps: float
449
+ primary_motion: str # Most common motion type
450
+ segments: list[MotionSegment]
451
+ avg_intensity: float
452
+ is_stable: bool # True if mostly static/tripod
453
+
454
+
455
+ class TelemetryPoint(BaseModel):
456
+ """Single telemetry point from drone/camera."""
457
+
458
+ timestamp: float # Seconds from start of video
459
+ recorded_at: datetime | None = None # Actual datetime from telemetry
460
+ latitude: float
461
+ longitude: float
462
+ altitude: float | None = None # Absolute altitude in meters
463
+ relative_altitude: float | None = None # Altitude above takeoff
464
+ # Camera settings
465
+ iso: int | None = None
466
+ shutter: float | None = None # Shutter speed as fraction (1/100 = 0.01)
467
+ aperture: float | None = None # f-number
468
+ focal_length: float | None = None
469
+ color_mode: str | None = None # d_log, d_cinelike, etc.
470
+
471
+
472
+ class TelemetryResult(BaseModel):
473
+ """Telemetry/flight path results."""
474
+
475
+ source: str # "dji_srt", "gopro", etc.
476
+ sample_rate: float # Points per second
477
+ duration: float # Total duration in seconds
478
+ points: list[TelemetryPoint]
479
+
480
+ def to_gpx(self) -> str:
481
+ """Export telemetry as GPX track."""
482
+ lines = [
483
+ '<?xml version="1.0" encoding="UTF-8"?>',
484
+ '<gpx version="1.1" creator="Polybos Media Engine">',
485
+ " <trk>",
486
+ " <name>Flight Path</name>",
487
+ " <trkseg>",
488
+ ]
489
+ for pt in self.points:
490
+ ele = f"<ele>{pt.altitude}</ele>" if pt.altitude else ""
491
+ time = ""
492
+ if pt.recorded_at:
493
+ time = f"<time>{pt.recorded_at.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3]}Z</time>"
494
+ lines.append(f' <trkpt lat="{pt.latitude}" lon="{pt.longitude}">{ele}{time}</trkpt>')
495
+ lines.extend([" </trkseg>", " </trk>", "</gpx>"])
496
+ return "\n".join(lines)
497
+
498
+
499
+ class HealthResponse(BaseModel):
500
+ """Response from /health endpoint."""
501
+
502
+ status: str
503
+ version: str
504
+ api_version: str
505
+
506
+
507
+ class SettingsResponse(BaseModel):
508
+ """Response from GET /settings endpoint.
509
+
510
+ All settings are returned, with sensitive values (hf_token) masked.
511
+ """
512
+
513
+ # API settings
514
+ api_version: str
515
+ log_level: str
516
+
517
+ # Whisper settings
518
+ whisper_model: str
519
+ fallback_language: str
520
+
521
+ # Speaker diarization
522
+ hf_token_set: bool # True if token is configured (actual value is masked)
523
+ diarization_model: str
524
+
525
+ # Processing settings
526
+ face_sample_fps: float
527
+ object_sample_fps: float
528
+ min_face_size: int
529
+
530
+ # Object detection
531
+ object_detector: str
532
+ qwen_model: str
533
+ qwen_frames_per_scene: int
534
+ yolo_model: str
535
+
536
+ # CLIP
537
+ clip_model: str
538
+
539
+ # OCR
540
+ ocr_languages: list[str]
541
+
542
+ # Temp directory
543
+ temp_dir: str
544
+
545
+
546
+ class SettingsUpdate(BaseModel):
547
+ """Request body for PUT /settings endpoint.
548
+
549
+ All fields are optional - only provided fields are updated.
550
+ """
551
+
552
+ # API settings
553
+ log_level: str | None = None
554
+
555
+ # Whisper settings
556
+ whisper_model: str | None = None
557
+ fallback_language: str | None = None
558
+
559
+ # Speaker diarization
560
+ hf_token: str | None = None # Set to empty string to clear
561
+ diarization_model: str | None = None
562
+
563
+ # Processing settings
564
+ face_sample_fps: float | None = None
565
+ object_sample_fps: float | None = None
566
+ min_face_size: int | None = None
567
+
568
+ # Object detection
569
+ object_detector: str | None = None
570
+ qwen_model: str | None = None
571
+ qwen_frames_per_scene: int | None = None
572
+ yolo_model: str | None = None
573
+
574
+ # CLIP
575
+ clip_model: str | None = None
576
+
577
+ # OCR
578
+ ocr_languages: list[str] | None = None
579
+
580
+ # Temp directory
581
+ temp_dir: str | None = None
@@ -0,0 +1,5 @@
1
+ """Utility functions for Polybos Media Engine."""
2
+
3
+ from media_engine.utils.memory import clear_memory, get_memory_mb
4
+
5
+ __all__ = ["clear_memory", "get_memory_mb"]
@@ -0,0 +1,54 @@
1
+ """Non-blocking queue-based logging setup."""
2
+
3
+ import atexit
4
+ import logging
5
+ import logging.handlers
6
+ import os
7
+ import queue
8
+ import sys
9
+
10
+
11
+ def setup_logging() -> logging.handlers.QueueListener:
12
+ """Configure non-blocking logging using a queue.
13
+
14
+ Returns the QueueListener so it can be stopped on shutdown.
15
+ """
16
+ # When running under a parent process that doesn't read our stdout/stderr,
17
+ # writes block when the pipe buffer fills up. Redirect to /dev/null to prevent this.
18
+ is_interactive = sys.stdout.isatty() and sys.stderr.isatty()
19
+ if not is_interactive:
20
+ # Redirect stdout/stderr to /dev/null to prevent blocking writes
21
+ devnull = open(os.devnull, "w")
22
+ sys.stdout = devnull
23
+ sys.stderr = devnull
24
+
25
+ # Configure non-blocking logging using a queue
26
+ log_queue: queue.Queue[logging.LogRecord] = queue.Queue(-1) # Unlimited size
27
+ queue_handler = logging.handlers.QueueHandler(log_queue)
28
+
29
+ # Always log to file (this is the only output when running non-interactively)
30
+ file_handler = logging.FileHandler("/tmp/media_engine.log")
31
+ log_formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s")
32
+ file_handler.setFormatter(log_formatter)
33
+
34
+ # Build handler list - only include stderr if running interactively
35
+ handlers: list[logging.Handler] = [file_handler]
36
+ if is_interactive:
37
+ stream_handler = logging.StreamHandler()
38
+ stream_handler.setFormatter(log_formatter)
39
+ handlers.append(stream_handler)
40
+
41
+ # QueueListener handles the actual I/O in a separate thread
42
+ queue_listener = logging.handlers.QueueListener(log_queue, *handlers, respect_handler_level=True)
43
+ queue_listener.start()
44
+
45
+ # Register cleanup on exit
46
+ atexit.register(queue_listener.stop)
47
+
48
+ # Configure root logger to use queue handler (non-blocking)
49
+ logging.basicConfig(
50
+ level=logging.INFO,
51
+ handlers=[queue_handler],
52
+ )
53
+
54
+ return queue_listener
@@ -0,0 +1,49 @@
1
+ """Memory management utilities."""
2
+
3
+ import gc
4
+
5
+
6
+ def clear_memory() -> None:
7
+ """Force garbage collection and clear GPU/MPS caches.
8
+
9
+ Call before loading heavy AI models to free up memory.
10
+ """
11
+ # Multiple gc passes to handle circular references
12
+ for _ in range(3):
13
+ gc.collect()
14
+
15
+ try:
16
+ import torch
17
+
18
+ if torch.cuda.is_available():
19
+ torch.cuda.empty_cache()
20
+ torch.cuda.synchronize()
21
+ if hasattr(torch, "mps"):
22
+ if hasattr(torch.mps, "empty_cache"):
23
+ torch.mps.empty_cache()
24
+ if hasattr(torch.mps, "synchronize"):
25
+ torch.mps.synchronize()
26
+ except ImportError:
27
+ pass
28
+
29
+ # Also try mlx cleanup
30
+ try:
31
+ import mlx.core as mx
32
+
33
+ mx.metal.clear_cache()
34
+ except (ImportError, AttributeError):
35
+ pass
36
+
37
+ # Final gc pass after GPU cleanup
38
+ gc.collect()
39
+
40
+
41
+ def get_memory_mb() -> int:
42
+ """Get current process memory usage in MB."""
43
+ try:
44
+ import psutil # type: ignore[import-not-found]
45
+
46
+ process = psutil.Process()
47
+ return process.memory_info().rss // (1024 * 1024)
48
+ except ImportError:
49
+ return 0