media-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. cli/clip.py +79 -0
  2. cli/faces.py +91 -0
  3. cli/metadata.py +68 -0
  4. cli/motion.py +77 -0
  5. cli/objects.py +94 -0
  6. cli/ocr.py +93 -0
  7. cli/scenes.py +57 -0
  8. cli/telemetry.py +65 -0
  9. cli/transcript.py +76 -0
  10. media_engine/__init__.py +7 -0
  11. media_engine/_version.py +34 -0
  12. media_engine/app.py +80 -0
  13. media_engine/batch/__init__.py +56 -0
  14. media_engine/batch/models.py +99 -0
  15. media_engine/batch/processor.py +1131 -0
  16. media_engine/batch/queue.py +232 -0
  17. media_engine/batch/state.py +30 -0
  18. media_engine/batch/timing.py +321 -0
  19. media_engine/cli.py +17 -0
  20. media_engine/config.py +674 -0
  21. media_engine/extractors/__init__.py +75 -0
  22. media_engine/extractors/clip.py +401 -0
  23. media_engine/extractors/faces.py +459 -0
  24. media_engine/extractors/frame_buffer.py +351 -0
  25. media_engine/extractors/frames.py +402 -0
  26. media_engine/extractors/metadata/__init__.py +127 -0
  27. media_engine/extractors/metadata/apple.py +169 -0
  28. media_engine/extractors/metadata/arri.py +118 -0
  29. media_engine/extractors/metadata/avchd.py +208 -0
  30. media_engine/extractors/metadata/avchd_gps.py +270 -0
  31. media_engine/extractors/metadata/base.py +688 -0
  32. media_engine/extractors/metadata/blackmagic.py +139 -0
  33. media_engine/extractors/metadata/camera_360.py +276 -0
  34. media_engine/extractors/metadata/canon.py +290 -0
  35. media_engine/extractors/metadata/dji.py +371 -0
  36. media_engine/extractors/metadata/dv.py +121 -0
  37. media_engine/extractors/metadata/ffmpeg.py +76 -0
  38. media_engine/extractors/metadata/generic.py +119 -0
  39. media_engine/extractors/metadata/gopro.py +256 -0
  40. media_engine/extractors/metadata/red.py +305 -0
  41. media_engine/extractors/metadata/registry.py +114 -0
  42. media_engine/extractors/metadata/sony.py +442 -0
  43. media_engine/extractors/metadata/tesla.py +157 -0
  44. media_engine/extractors/motion.py +765 -0
  45. media_engine/extractors/objects.py +245 -0
  46. media_engine/extractors/objects_qwen.py +754 -0
  47. media_engine/extractors/ocr.py +268 -0
  48. media_engine/extractors/scenes.py +82 -0
  49. media_engine/extractors/shot_type.py +217 -0
  50. media_engine/extractors/telemetry.py +262 -0
  51. media_engine/extractors/transcribe.py +579 -0
  52. media_engine/extractors/translate.py +121 -0
  53. media_engine/extractors/vad.py +263 -0
  54. media_engine/main.py +68 -0
  55. media_engine/py.typed +0 -0
  56. media_engine/routers/__init__.py +15 -0
  57. media_engine/routers/batch.py +78 -0
  58. media_engine/routers/health.py +93 -0
  59. media_engine/routers/models.py +211 -0
  60. media_engine/routers/settings.py +87 -0
  61. media_engine/routers/utils.py +135 -0
  62. media_engine/schemas.py +581 -0
  63. media_engine/utils/__init__.py +5 -0
  64. media_engine/utils/logging.py +54 -0
  65. media_engine/utils/memory.py +49 -0
  66. media_engine-0.1.0.dist-info/METADATA +276 -0
  67. media_engine-0.1.0.dist-info/RECORD +70 -0
  68. media_engine-0.1.0.dist-info/WHEEL +4 -0
  69. media_engine-0.1.0.dist-info/entry_points.txt +11 -0
  70. media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
media_engine/config.py ADDED
@@ -0,0 +1,674 @@
1
+ """Configuration settings for Polybos Media Engine."""
2
+
3
+ import json
4
+ import logging
5
+ import platform
6
+ from enum import StrEnum
7
+ from functools import lru_cache
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from pydantic import BaseModel
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # =============================================================================
16
+ # Default Constants
17
+ # =============================================================================
18
+
19
+ # Config file location
20
+ DEFAULT_CONFIG_PATH = Path.home() / ".config" / "polybos" / "config.json"
21
+
22
+ # API
23
+ DEFAULT_API_VERSION = "1.0"
24
+ DEFAULT_LOG_LEVEL = "INFO"
25
+
26
+ # Whisper speech-to-text
27
+ DEFAULT_WHISPER_MODEL = "auto" # Auto-select based on VRAM
28
+ DEFAULT_FALLBACK_LANGUAGE = "en"
29
+
30
+ # Speaker diarization
31
+ DEFAULT_DIARIZATION_MODEL = "pyannote/speaker-diarization-3.1"
32
+
33
+ # Processing
34
+ DEFAULT_FACE_SAMPLE_FPS = 1.0
35
+ DEFAULT_OBJECT_SAMPLE_FPS = 2.0
36
+ DEFAULT_MIN_FACE_SIZE = 80
37
+
38
+ # Object detection (Qwen VLM)
39
+ DEFAULT_QWEN_MODEL = "auto" # Auto-select based on VRAM
40
+ DEFAULT_QWEN_FRAMES_PER_SCENE = 1
41
+ DEFAULT_OBJECT_DETECTOR = "auto" # Auto-select based on VRAM
42
+
43
+ # OCR - Latin script languages (see https://www.jaided.ai/easyocr/)
44
+ # For CJK: ch_sim, ch_tra, ja, ko
45
+ # Note: Finnish (fi) not supported by EasyOCR
46
+ DEFAULT_OCR_LANGUAGES: list[str] = [
47
+ "en", # English
48
+ "no", # Norwegian
49
+ "de", # German
50
+ "fr", # French
51
+ "es", # Spanish
52
+ "it", # Italian
53
+ "pt", # Portuguese
54
+ "nl", # Dutch
55
+ "sv", # Swedish
56
+ "da", # Danish
57
+ "pl", # Polish
58
+ ]
59
+
60
+ # Temp directory
61
+ DEFAULT_TEMP_DIR = "/tmp/polybos"
62
+
63
+
64
+ # =============================================================================
65
+ # Enums
66
+ # =============================================================================
67
+
68
+
69
+ class DeviceType(StrEnum):
70
+ """Compute device types."""
71
+
72
+ MPS = "mps"
73
+ CUDA = "cuda"
74
+ CPU = "cpu"
75
+
76
+
77
+ class ObjectDetector(StrEnum):
78
+ """Object detection backend."""
79
+
80
+ YOLO = "yolo"
81
+ QWEN = "qwen"
82
+
83
+
84
+ # =============================================================================
85
+ # Settings (loaded from JSON config file)
86
+ # =============================================================================
87
+
88
+
89
+ class Settings(BaseModel):
90
+ """Application settings loaded from JSON config file.
91
+
92
+ Config file location: ~/.config/polybos/config.json
93
+
94
+ Model settings support "auto" to automatically select based on VRAM:
95
+ - whisper_model: "auto" | "tiny" | "small" | "medium" | "large-v3"
96
+ - qwen_model: "auto" | "Qwen/Qwen2-VL-2B-Instruct" | "Qwen/Qwen2-VL-7B-Instruct"
97
+ - yolo_model: "auto" | "yolov8n.pt" | "yolov8s.pt" | "yolov8m.pt" | "yolov8l.pt" | "yolov8x.pt"
98
+ - clip_model: "auto" | "ViT-B-16" | "ViT-B-32" | "ViT-L-14"
99
+ - object_detector: "auto" | "yolo" | "qwen"
100
+ """
101
+
102
+ # API settings
103
+ api_version: str = DEFAULT_API_VERSION
104
+ log_level: str = DEFAULT_LOG_LEVEL
105
+
106
+ # Whisper settings ("auto" = select based on VRAM)
107
+ whisper_model: str = DEFAULT_WHISPER_MODEL
108
+ fallback_language: str = DEFAULT_FALLBACK_LANGUAGE
109
+
110
+ # Speaker diarization settings
111
+ hf_token: str | None = None # HuggingFace token for pyannote models
112
+ diarization_model: str = DEFAULT_DIARIZATION_MODEL
113
+
114
+ # Processing settings
115
+ face_sample_fps: float = DEFAULT_FACE_SAMPLE_FPS
116
+ object_sample_fps: float = DEFAULT_OBJECT_SAMPLE_FPS
117
+ min_face_size: int = DEFAULT_MIN_FACE_SIZE
118
+
119
+ # Object detection settings ("auto" = select based on VRAM)
120
+ object_detector: str = DEFAULT_OBJECT_DETECTOR # "auto", "yolo", or "qwen"
121
+ qwen_model: str = DEFAULT_QWEN_MODEL
122
+ qwen_frames_per_scene: int = DEFAULT_QWEN_FRAMES_PER_SCENE
123
+
124
+ # YOLO model ("auto" = select based on VRAM)
125
+ yolo_model: str = "auto"
126
+
127
+ # CLIP model ("auto" = select based on VRAM)
128
+ clip_model: str = "auto"
129
+
130
+ # OCR settings
131
+ ocr_languages: list[str] = DEFAULT_OCR_LANGUAGES.copy()
132
+
133
+ # Temp directory for processing
134
+ temp_dir: str = DEFAULT_TEMP_DIR
135
+
136
+ def get_whisper_model(self) -> str:
137
+ """Get resolved Whisper model (handles 'auto')."""
138
+ if self.whisper_model == "auto":
139
+ return get_auto_whisper_model()
140
+ return self.whisper_model
141
+
142
+ def get_qwen_model(self) -> str:
143
+ """Get resolved Qwen model (handles 'auto')."""
144
+ if self.qwen_model == "auto":
145
+ return get_auto_qwen_model()
146
+ return self.qwen_model
147
+
148
+ def get_yolo_model(self) -> str:
149
+ """Get resolved YOLO model (handles 'auto')."""
150
+ if self.yolo_model == "auto":
151
+ return get_auto_yolo_model()
152
+ return self.yolo_model
153
+
154
+ def get_clip_model(self) -> str:
155
+ """Get resolved CLIP model (handles 'auto')."""
156
+ if self.clip_model == "auto":
157
+ return get_auto_clip_model()
158
+ return self.clip_model
159
+
160
+ def get_object_detector(self) -> ObjectDetector:
161
+ """Get resolved object detector (handles 'auto')."""
162
+ if self.object_detector == "auto":
163
+ return get_auto_object_detector()
164
+ return ObjectDetector(self.object_detector)
165
+
166
+
167
+ def get_config_path() -> Path:
168
+ """Get the config file path."""
169
+ return DEFAULT_CONFIG_PATH
170
+
171
+
172
+ def load_config_from_file(config_path: Path | None = None) -> dict[str, Any]:
173
+ """Load configuration from JSON file.
174
+
175
+ Args:
176
+ config_path: Optional path to config file. Defaults to ~/.config/polybos/config.json
177
+
178
+ Returns:
179
+ Dictionary of settings (empty if file doesn't exist)
180
+ """
181
+ path = config_path or DEFAULT_CONFIG_PATH
182
+
183
+ if not path.exists():
184
+ return {}
185
+
186
+ try:
187
+ with open(path) as f:
188
+ return json.load(f)
189
+ except (json.JSONDecodeError, OSError) as e:
190
+ logger.warning(f"Failed to load config from {path}: {e}")
191
+ return {}
192
+
193
+
194
+ def save_config_to_file(settings: Settings, config_path: Path | None = None) -> None:
195
+ """Save configuration to JSON file.
196
+
197
+ Args:
198
+ settings: Settings instance to save
199
+ config_path: Optional path to config file. Defaults to ~/.config/polybos/config.json
200
+ """
201
+ path = config_path or DEFAULT_CONFIG_PATH
202
+
203
+ # Create directory if needed
204
+ path.parent.mkdir(parents=True, exist_ok=True)
205
+
206
+ with open(path, "w") as f:
207
+ json.dump(settings.model_dump(), f, indent=2)
208
+
209
+ logger.info(f"Saved config to {path}")
210
+
211
+
212
+ # Cached settings instance
213
+ _settings: Settings | None = None
214
+
215
+
216
+ def get_settings() -> Settings:
217
+ """Get settings instance (loaded from config file on first call)."""
218
+ global _settings
219
+
220
+ if _settings is None:
221
+ config_data = load_config_from_file()
222
+ _settings = Settings(**config_data)
223
+ if config_data:
224
+ logger.info(f"Loaded settings from {DEFAULT_CONFIG_PATH}")
225
+ else:
226
+ logger.info("Using default settings")
227
+
228
+ return _settings
229
+
230
+
231
+ def reload_settings() -> Settings:
232
+ """Reload settings from config file."""
233
+ global _settings
234
+ _settings = None
235
+ return get_settings()
236
+
237
+
238
+ def is_apple_silicon() -> bool:
239
+ """Check if running on Apple Silicon."""
240
+ return platform.system() == "Darwin" and platform.machine() == "arm64"
241
+
242
+
243
+ def has_cuda() -> bool:
244
+ """Check if CUDA is available."""
245
+ try:
246
+ import torch
247
+
248
+ return torch.cuda.is_available()
249
+ except ImportError:
250
+ return False
251
+
252
+
253
+ def get_device() -> DeviceType:
254
+ """Get the best available compute device."""
255
+ if is_apple_silicon():
256
+ return DeviceType.MPS
257
+ elif has_cuda():
258
+ return DeviceType.CUDA
259
+ else:
260
+ return DeviceType.CPU
261
+
262
+
263
+ # =============================================================================
264
+ # VRAM Detection and Auto Model Selection
265
+ # =============================================================================
266
+
267
+
268
+ def get_gpu_name() -> str | None:
269
+ """Get the GPU name/model.
270
+
271
+ Returns:
272
+ GPU name string (e.g., "NVIDIA GeForce RTX 4090", "Apple M2 Max")
273
+ or None if no GPU is available.
274
+ """
275
+ if has_cuda():
276
+ try:
277
+ import torch
278
+
279
+ return torch.cuda.get_device_name(0)
280
+ except Exception:
281
+ return None
282
+ elif is_apple_silicon():
283
+ try:
284
+ import subprocess
285
+
286
+ # Get chip name from system_profiler
287
+ result = subprocess.run(
288
+ ["sysctl", "-n", "machdep.cpu.brand_string"],
289
+ capture_output=True,
290
+ text=True,
291
+ )
292
+ cpu_brand = result.stdout.strip()
293
+ # Extract Apple chip name if present
294
+ if "Apple" in cpu_brand:
295
+ return cpu_brand
296
+ # Fallback: try to get from SPHardwareDataType
297
+ result = subprocess.run(
298
+ ["system_profiler", "SPHardwareDataType"],
299
+ capture_output=True,
300
+ text=True,
301
+ )
302
+ for line in result.stdout.split("\n"):
303
+ if "Chip:" in line:
304
+ return line.split(":")[-1].strip()
305
+ return "Apple Silicon"
306
+ except Exception:
307
+ return "Apple Silicon"
308
+ return None
309
+
310
+
311
+ @lru_cache(maxsize=1)
312
+ def get_available_vram_gb() -> float:
313
+ """Get available GPU memory in GB.
314
+
315
+ Returns:
316
+ Available VRAM in GB. For Apple Silicon, returns unified memory.
317
+ Returns 0 if no GPU is available.
318
+ """
319
+ if has_cuda():
320
+ try:
321
+ import torch
322
+
323
+ props = torch.cuda.get_device_properties(0)
324
+ total_gb = props.total_memory / (1024**3)
325
+ logger.info(f"CUDA GPU: {props.name}, {total_gb:.1f}GB VRAM")
326
+ return total_gb
327
+ except Exception as e:
328
+ logger.warning(f"Failed to get CUDA memory: {e}")
329
+ return 0
330
+
331
+ elif is_apple_silicon():
332
+ try:
333
+ import subprocess
334
+
335
+ # Get total system memory (unified memory on Apple Silicon)
336
+ result = subprocess.run(
337
+ ["sysctl", "-n", "hw.memsize"],
338
+ capture_output=True,
339
+ text=True,
340
+ )
341
+ total_bytes = int(result.stdout.strip())
342
+ total_gb = total_bytes / (1024**3)
343
+ logger.info(f"Apple Silicon: {total_gb:.0f}GB unified memory")
344
+ return total_gb
345
+ except Exception as e:
346
+ logger.warning(f"Failed to get Apple Silicon memory: {e}")
347
+ return 8.0 # Conservative default for M1/M2
348
+
349
+ return 0
350
+
351
+
352
+ def get_free_memory_gb() -> float:
353
+ """Get memory available for ML models without causing excessive swapping.
354
+
355
+ Uses psutil for cross-platform memory detection, which correctly accounts
356
+ for free, inactive, and purgeable memory on macOS.
357
+
358
+ Returns:
359
+ Estimated GB available for loading models.
360
+ """
361
+ try:
362
+ import psutil
363
+
364
+ mem = psutil.virtual_memory()
365
+ # psutil.available is the memory that can be given to processes
366
+ # without swapping - this is what we want
367
+ available_gb = mem.available / (1024**3)
368
+
369
+ # Leave a 1GB buffer for system processes
370
+ available_for_models = max(0.0, available_gb - 1.0)
371
+
372
+ logger.info(f"Memory: {mem.total / (1024**3):.0f}GB total, " f"{mem.available / (1024**3):.1f}GB available, " f"{available_for_models:.1f}GB for models")
373
+ return available_for_models
374
+
375
+ except ImportError:
376
+ logger.warning("psutil not installed, using fallback memory detection")
377
+
378
+ # Fallback without psutil
379
+ try:
380
+ import sys
381
+
382
+ if sys.platform == "darwin":
383
+ import subprocess
384
+
385
+ # macOS fallback: use total memory * 0.5 as conservative estimate
386
+ result = subprocess.run(
387
+ ["sysctl", "-n", "hw.memsize"],
388
+ capture_output=True,
389
+ text=True,
390
+ timeout=5,
391
+ )
392
+ total_bytes = int(result.stdout.strip())
393
+ total_gb = total_bytes / (1024**3)
394
+ return total_gb * 0.5 # Conservative: assume 50% available
395
+ else:
396
+ # Linux: try /proc/meminfo
397
+ with open("/proc/meminfo") as f:
398
+ for line in f:
399
+ if line.startswith("MemAvailable:"):
400
+ kb = int(line.split()[1])
401
+ return kb / (1024**2)
402
+
403
+ except Exception as e:
404
+ logger.warning(f"Failed to get free memory: {e}")
405
+
406
+ return 8.0 # Conservative fallback
407
+
408
+
409
+ def get_auto_whisper_model() -> str:
410
+ """Select Whisper model based on available VRAM.
411
+
412
+ | VRAM | Model | Size | Quality |
413
+ |----------|----------|--------|---------|
414
+ | <4GB | tiny | 75MB | Basic |
415
+ | 4-6GB | small | 488MB | Good |
416
+ | 6-10GB | medium | 1.5GB | Better |
417
+ | 10GB+ | large-v3 | 3GB | Best |
418
+ """
419
+ vram = get_available_vram_gb()
420
+
421
+ if vram >= 10:
422
+ model = "large-v3"
423
+ elif vram >= 6:
424
+ model = "medium"
425
+ elif vram >= 4:
426
+ model = "small"
427
+ else:
428
+ model = "tiny"
429
+
430
+ logger.info(f"Auto-selected Whisper model: {model} (VRAM: {vram:.1f}GB)")
431
+ return model
432
+
433
+
434
+ def get_auto_qwen_model() -> str:
435
+ """Select Qwen2-VL model based on available VRAM.
436
+
437
+ | VRAM | Model | Size | Quality |
438
+ |----------|----------------|-------|---------|
439
+ | <8GB | (use YOLO) | - | Basic |
440
+ | 8-16GB | Qwen2-VL-2B | ~5GB | Good |
441
+ | 16GB+ | Qwen2-VL-7B | ~15GB | Best |
442
+ """
443
+ vram = get_available_vram_gb()
444
+
445
+ if vram >= 16:
446
+ model = "Qwen/Qwen2-VL-7B-Instruct"
447
+ elif vram >= 8:
448
+ model = "Qwen/Qwen2-VL-2B-Instruct"
449
+ else:
450
+ # Not enough VRAM for Qwen, should use YOLO instead
451
+ model = "Qwen/Qwen2-VL-2B-Instruct"
452
+ logger.warning(f"Low VRAM ({vram:.1f}GB) - consider using YOLO instead of Qwen")
453
+
454
+ logger.info(f"Auto-selected Qwen model: {model} (VRAM: {vram:.1f}GB)")
455
+ return model
456
+
457
+
458
+ def get_auto_object_detector() -> ObjectDetector:
459
+ """Select object detector based on available VRAM.
460
+
461
+ YOLO is faster and uses less memory.
462
+ Qwen provides better scene understanding but needs more VRAM.
463
+ """
464
+ vram = get_available_vram_gb()
465
+
466
+ if vram >= 8:
467
+ detector = ObjectDetector.QWEN
468
+ else:
469
+ detector = ObjectDetector.YOLO
470
+
471
+ logger.info(f"Auto-selected object detector: {detector} (VRAM: {vram:.1f}GB)")
472
+ return detector
473
+
474
+
475
+ def get_auto_yolo_model() -> str:
476
+ """Select YOLO model based on available VRAM.
477
+
478
+ | VRAM | Model | Size | Speed |
479
+ |----------|-----------|--------|---------|
480
+ | <2GB | yolov8n | 6MB | Fastest |
481
+ | 2-4GB | yolov8s | 22MB | Fast |
482
+ | 4-8GB | yolov8m | 52MB | Medium |
483
+ | 8-16GB | yolov8l | 87MB | Slow |
484
+ | 16GB+ | yolov8x | 136MB | Slowest |
485
+ """
486
+ vram = get_available_vram_gb()
487
+
488
+ if vram >= 16:
489
+ model = "yolov8x.pt"
490
+ elif vram >= 8:
491
+ model = "yolov8l.pt"
492
+ elif vram >= 4:
493
+ model = "yolov8m.pt"
494
+ elif vram >= 2:
495
+ model = "yolov8s.pt"
496
+ else:
497
+ model = "yolov8n.pt"
498
+
499
+ logger.info(f"Auto-selected YOLO model: {model} (VRAM: {vram:.1f}GB)")
500
+ return model
501
+
502
+
503
+ def get_auto_clip_model() -> str:
504
+ """Select CLIP model based on available VRAM.
505
+
506
+ | VRAM | Model | Size | Quality |
507
+ |----------|-----------|---------|---------|
508
+ | <2GB | ViT-B-16 | 335MB | Good |
509
+ | 2-4GB | ViT-B-32 | 338MB | Good |
510
+ | 4GB+ | ViT-L-14 | 933MB | Best |
511
+
512
+ Note: ViT-B-16 and ViT-B-32 are similar size but different patch sizes.
513
+ ViT-B-32 is slightly faster, ViT-B-16 has better detail recognition.
514
+ """
515
+ vram = get_available_vram_gb()
516
+
517
+ if vram >= 4:
518
+ model = "ViT-L-14"
519
+ elif vram >= 2:
520
+ model = "ViT-B-32"
521
+ else:
522
+ model = "ViT-B-16"
523
+
524
+ logger.info(f"Auto-selected CLIP model: {model} (VRAM: {vram:.1f}GB)")
525
+ return model
526
+
527
+
528
+ def get_vram_summary() -> dict:
529
+ """Get a summary of VRAM and auto-selected models.
530
+
531
+ Useful for frontend to display hardware capabilities.
532
+ """
533
+ vram = get_available_vram_gb() # Total VRAM (cached)
534
+ free_mem = get_free_memory_gb() # Currently available (not cached)
535
+ device = get_device()
536
+
537
+ # Model memory requirements
538
+ qwen_2b_needs = 5.0
539
+ qwen_7b_needs = 15.0
540
+ whisper_large_needs = 6.0
541
+
542
+ return {
543
+ "device": str(device),
544
+ "gpu_name": get_gpu_name(),
545
+ "vram_gb": round(vram, 1),
546
+ "free_memory_gb": round(free_mem, 1),
547
+ "auto_whisper_model": get_auto_whisper_model(),
548
+ "auto_qwen_model": get_auto_qwen_model() if vram >= 8 else None,
549
+ "auto_yolo_model": get_auto_yolo_model(),
550
+ "auto_clip_model": get_auto_clip_model(),
551
+ "auto_object_detector": str(get_auto_object_detector()),
552
+ # What the hardware CAN support (based on total VRAM)
553
+ "recommendations": {
554
+ "can_use_large_whisper": vram >= 10,
555
+ "can_use_qwen": vram >= 8,
556
+ "can_use_qwen_7b": vram >= 16,
557
+ "can_use_clip_l14": vram >= 4,
558
+ "can_use_yolo_xlarge": vram >= 16,
559
+ },
560
+ # What can load RIGHT NOW (based on free memory)
561
+ "available_now": {
562
+ "qwen_2b": free_mem >= qwen_2b_needs,
563
+ "qwen_7b": free_mem >= qwen_7b_needs,
564
+ "whisper_large": free_mem >= whisper_large_needs,
565
+ "whisper_medium": free_mem >= 4.0,
566
+ "whisper_small": free_mem >= 2.0,
567
+ "yolo": True, # YOLO is always available (~100MB)
568
+ "clip": free_mem >= 1.0,
569
+ },
570
+ }
571
+
572
+
573
+ # =============================================================================
574
+ # Memory Monitoring
575
+ # =============================================================================
576
+
577
+ # Approximate memory requirements for models (in GB)
578
+ MODEL_MEMORY_REQUIREMENTS: dict[str, float] = {
579
+ # Whisper models
580
+ "tiny": 1.0,
581
+ "small": 2.0,
582
+ "medium": 4.0,
583
+ "large-v3": 6.0,
584
+ # YOLO models
585
+ "yolov8n.pt": 0.2,
586
+ "yolov8s.pt": 0.3,
587
+ "yolov8m.pt": 0.5,
588
+ "yolov8l.pt": 0.8,
589
+ "yolov8x.pt": 1.2,
590
+ # Qwen VLM
591
+ "Qwen/Qwen2-VL-2B-Instruct": 6.0,
592
+ "Qwen/Qwen2-VL-7B-Instruct": 16.0,
593
+ # CLIP
594
+ "ViT-B-16": 0.4,
595
+ "ViT-B-32": 0.4,
596
+ "ViT-L-14": 1.0,
597
+ # OCR
598
+ "easyocr": 3.0,
599
+ # Face detection
600
+ "deepface": 0.5,
601
+ }
602
+
603
+
604
+ def get_available_memory_gb() -> tuple[float, float]:
605
+ """Get available system RAM and GPU memory in GB.
606
+
607
+ Returns:
608
+ (available_ram_gb, available_vram_gb)
609
+ """
610
+ try:
611
+ import psutil # type: ignore[import-not-found]
612
+
613
+ mem = psutil.virtual_memory()
614
+ available_ram = mem.available / (1024**3)
615
+ except ImportError:
616
+ logger.warning("psutil not installed, cannot monitor RAM")
617
+ available_ram = 8.0 # Conservative default
618
+
619
+ # GPU memory
620
+ available_vram = 0.0
621
+ if has_cuda():
622
+ try:
623
+ import torch
624
+
625
+ total = torch.cuda.get_device_properties(0).total_memory
626
+ allocated = torch.cuda.memory_allocated(0)
627
+ available_vram = (total - allocated) / (1024**3)
628
+ except Exception:
629
+ available_vram = get_available_vram_gb()
630
+ elif is_apple_silicon():
631
+ # Unified memory - estimate from system available
632
+ available_vram = available_ram * 0.75
633
+
634
+ return available_ram, available_vram
635
+
636
+
637
+ def check_memory_before_load(model_name: str, clear_memory_func: Any | None = None) -> bool:
638
+ """Check if enough memory is available before loading a model.
639
+
640
+ If memory is low, attempts to free memory by calling the clear function.
641
+
642
+ Args:
643
+ model_name: Name of the model to load (must be in MODEL_MEMORY_REQUIREMENTS)
644
+ clear_memory_func: Optional function to call to free memory (e.g., gc.collect)
645
+
646
+ Returns:
647
+ True if enough memory is available, False otherwise
648
+ """
649
+ required_gb = MODEL_MEMORY_REQUIREMENTS.get(model_name, 2.0) # Default 2GB
650
+ ram, vram = get_available_memory_gb()
651
+
652
+ # Use VRAM for GPU models, RAM for CPU
653
+ device = get_device()
654
+ available = vram if device != DeviceType.CPU else ram
655
+
656
+ if available < required_gb:
657
+ logger.warning(f"Low memory ({available:.1f}GB available) for {model_name} " f"({required_gb:.1f}GB required)")
658
+
659
+ # Try to free memory
660
+ if clear_memory_func is not None:
661
+ logger.info("Attempting to free memory...")
662
+ clear_memory_func()
663
+
664
+ # Re-check
665
+ ram, vram = get_available_memory_gb()
666
+ available = vram if device != DeviceType.CPU else ram
667
+
668
+ if available < required_gb:
669
+ logger.error(f"Still insufficient memory ({available:.1f}GB) for {model_name}")
670
+ return False
671
+ else:
672
+ logger.info(f"Memory freed, now {available:.1f}GB available")
673
+
674
+ return True