media-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. cli/clip.py +79 -0
  2. cli/faces.py +91 -0
  3. cli/metadata.py +68 -0
  4. cli/motion.py +77 -0
  5. cli/objects.py +94 -0
  6. cli/ocr.py +93 -0
  7. cli/scenes.py +57 -0
  8. cli/telemetry.py +65 -0
  9. cli/transcript.py +76 -0
  10. media_engine/__init__.py +7 -0
  11. media_engine/_version.py +34 -0
  12. media_engine/app.py +80 -0
  13. media_engine/batch/__init__.py +56 -0
  14. media_engine/batch/models.py +99 -0
  15. media_engine/batch/processor.py +1131 -0
  16. media_engine/batch/queue.py +232 -0
  17. media_engine/batch/state.py +30 -0
  18. media_engine/batch/timing.py +321 -0
  19. media_engine/cli.py +17 -0
  20. media_engine/config.py +674 -0
  21. media_engine/extractors/__init__.py +75 -0
  22. media_engine/extractors/clip.py +401 -0
  23. media_engine/extractors/faces.py +459 -0
  24. media_engine/extractors/frame_buffer.py +351 -0
  25. media_engine/extractors/frames.py +402 -0
  26. media_engine/extractors/metadata/__init__.py +127 -0
  27. media_engine/extractors/metadata/apple.py +169 -0
  28. media_engine/extractors/metadata/arri.py +118 -0
  29. media_engine/extractors/metadata/avchd.py +208 -0
  30. media_engine/extractors/metadata/avchd_gps.py +270 -0
  31. media_engine/extractors/metadata/base.py +688 -0
  32. media_engine/extractors/metadata/blackmagic.py +139 -0
  33. media_engine/extractors/metadata/camera_360.py +276 -0
  34. media_engine/extractors/metadata/canon.py +290 -0
  35. media_engine/extractors/metadata/dji.py +371 -0
  36. media_engine/extractors/metadata/dv.py +121 -0
  37. media_engine/extractors/metadata/ffmpeg.py +76 -0
  38. media_engine/extractors/metadata/generic.py +119 -0
  39. media_engine/extractors/metadata/gopro.py +256 -0
  40. media_engine/extractors/metadata/red.py +305 -0
  41. media_engine/extractors/metadata/registry.py +114 -0
  42. media_engine/extractors/metadata/sony.py +442 -0
  43. media_engine/extractors/metadata/tesla.py +157 -0
  44. media_engine/extractors/motion.py +765 -0
  45. media_engine/extractors/objects.py +245 -0
  46. media_engine/extractors/objects_qwen.py +754 -0
  47. media_engine/extractors/ocr.py +268 -0
  48. media_engine/extractors/scenes.py +82 -0
  49. media_engine/extractors/shot_type.py +217 -0
  50. media_engine/extractors/telemetry.py +262 -0
  51. media_engine/extractors/transcribe.py +579 -0
  52. media_engine/extractors/translate.py +121 -0
  53. media_engine/extractors/vad.py +263 -0
  54. media_engine/main.py +68 -0
  55. media_engine/py.typed +0 -0
  56. media_engine/routers/__init__.py +15 -0
  57. media_engine/routers/batch.py +78 -0
  58. media_engine/routers/health.py +93 -0
  59. media_engine/routers/models.py +211 -0
  60. media_engine/routers/settings.py +87 -0
  61. media_engine/routers/utils.py +135 -0
  62. media_engine/schemas.py +581 -0
  63. media_engine/utils/__init__.py +5 -0
  64. media_engine/utils/logging.py +54 -0
  65. media_engine/utils/memory.py +49 -0
  66. media_engine-0.1.0.dist-info/METADATA +276 -0
  67. media_engine-0.1.0.dist-info/RECORD +70 -0
  68. media_engine-0.1.0.dist-info/WHEEL +4 -0
  69. media_engine-0.1.0.dist-info/entry_points.txt +11 -0
  70. media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,245 @@
1
+ """Object detection using YOLO."""
2
+
3
+ import gc
4
+ import logging
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from media_engine.config import DeviceType, get_device
9
+ from media_engine.extractors.frame_buffer import SharedFrameBuffer
10
+ from media_engine.schemas import (
11
+ BoundingBox,
12
+ ObjectDetection,
13
+ ObjectsResult,
14
+ )
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Singleton YOLO model (lazy loaded)
19
+ _yolo_model: Any = None
20
+ _yolo_model_name: str | None = None
21
+
22
+
23
+ def unload_yolo_model() -> None:
24
+ """Unload the YOLO model to free memory."""
25
+ global _yolo_model, _yolo_model_name
26
+
27
+ if _yolo_model is None:
28
+ return
29
+
30
+ logger.info("Unloading YOLO model to free memory")
31
+
32
+ try:
33
+ # Clear CUDA/MPS cache
34
+ import torch
35
+
36
+ del _yolo_model
37
+ _yolo_model = None
38
+ _yolo_model_name = None
39
+
40
+ gc.collect()
41
+
42
+ if torch.cuda.is_available():
43
+ torch.cuda.synchronize()
44
+ torch.cuda.empty_cache()
45
+ elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
46
+ if hasattr(torch.mps, "synchronize"):
47
+ torch.mps.synchronize()
48
+ if hasattr(torch.mps, "empty_cache"):
49
+ torch.mps.empty_cache()
50
+
51
+ gc.collect()
52
+ logger.info("YOLO model unloaded")
53
+ except Exception as e:
54
+ logger.warning(f"Error unloading YOLO model: {e}")
55
+ _yolo_model = None
56
+ _yolo_model_name = None
57
+
58
+
59
+ def _get_yolo_model(model_name: str) -> Any:
60
+ """Get or create the YOLO model (singleton with model switching)."""
61
+ global _yolo_model, _yolo_model_name
62
+
63
+ # If model name changed, unload old model
64
+ if _yolo_model is not None and _yolo_model_name != model_name:
65
+ logger.info(f"Switching YOLO model from {_yolo_model_name} to {model_name}")
66
+ unload_yolo_model()
67
+
68
+ if _yolo_model is None:
69
+ from ultralytics import YOLO # type: ignore[import-not-found]
70
+
71
+ logger.info(f"Loading YOLO model: {model_name}")
72
+ _yolo_model = YOLO(model_name)
73
+ _yolo_model_name = model_name
74
+
75
+ return _yolo_model
76
+
77
+
78
+ def extract_objects(
79
+ file_path: str,
80
+ frame_buffer: SharedFrameBuffer,
81
+ min_confidence: float = 0.6,
82
+ min_size: int = 50,
83
+ model_name: str = "yolov8m.pt",
84
+ ) -> ObjectsResult:
85
+ """Extract objects from video frames using YOLO.
86
+
87
+ Args:
88
+ file_path: Path to video file (used for logging)
89
+ frame_buffer: Pre-decoded frames from SharedFrameBuffer
90
+ min_confidence: Minimum detection confidence (0.6 recommended)
91
+ min_size: Minimum object size in pixels (filters noise)
92
+ model_name: YOLO model (yolov8m.pt recommended for accuracy)
93
+
94
+ Returns:
95
+ ObjectsResult with unique objects and summary
96
+ """
97
+ path = Path(file_path)
98
+ if not path.exists():
99
+ raise FileNotFoundError(f"Video file not found: {file_path}")
100
+
101
+ # Determine device for GPU acceleration
102
+ device = get_device()
103
+ device_str = "mps" if device == DeviceType.MPS else "cuda" if device == DeviceType.CUDA else "cpu"
104
+
105
+ # Load model (singleton)
106
+ model = _get_yolo_model(model_name)
107
+
108
+ # Process frames from shared buffer
109
+ raw_detections: list[ObjectDetection] = []
110
+
111
+ logger.info(f"Processing {len(frame_buffer.frames)} frames for object detection")
112
+ for ts in sorted(frame_buffer.frames.keys()):
113
+ shared_frame = frame_buffer.frames[ts]
114
+ try:
115
+ results = model(shared_frame.bgr, verbose=False, device=device_str)
116
+
117
+ for result in results:
118
+ boxes = result.boxes
119
+ if boxes is None:
120
+ continue
121
+
122
+ for i in range(len(boxes)):
123
+ confidence = float(boxes.conf[i])
124
+ if confidence < min_confidence:
125
+ continue
126
+
127
+ # Get bounding box
128
+ x1, y1, x2, y2 = boxes.xyxy[i].tolist()
129
+ width = int(x2 - x1)
130
+ height = int(y2 - y1)
131
+
132
+ # Filter small detections
133
+ if width < min_size or height < min_size:
134
+ continue
135
+
136
+ # Get class label
137
+ class_id = int(boxes.cls[i])
138
+ label = model.names[class_id] if model.names else str(class_id)
139
+
140
+ raw_detections.append(
141
+ ObjectDetection(
142
+ timestamp=round(ts, 2),
143
+ label=label,
144
+ confidence=round(confidence, 3),
145
+ bbox=BoundingBox(
146
+ x=int(x1),
147
+ y=int(y1),
148
+ width=width,
149
+ height=height,
150
+ ),
151
+ )
152
+ )
153
+
154
+ except Exception as e:
155
+ logger.warning(f"Failed to process frame at {ts}s: {e}")
156
+
157
+ # Deduplicate - track unique objects
158
+ unique_detections, summary = _deduplicate_objects(raw_detections)
159
+
160
+ logger.info(f"Detected {len(raw_detections)} objects, " f"{len(unique_detections)} unique across {len(summary)} types")
161
+
162
+ return ObjectsResult(
163
+ summary=summary,
164
+ detections=unique_detections,
165
+ )
166
+
167
+
168
+ def _bbox_iou(box1: BoundingBox, box2: BoundingBox) -> float:
169
+ """Calculate IoU of two bounding boxes."""
170
+ x1 = max(box1.x, box2.x)
171
+ y1 = max(box1.y, box2.y)
172
+ x2 = min(box1.x + box1.width, box2.x + box2.width)
173
+ y2 = min(box1.y + box1.height, box2.y + box2.height)
174
+
175
+ if x2 <= x1 or y2 <= y1:
176
+ return 0.0
177
+
178
+ intersection = (x2 - x1) * (y2 - y1)
179
+ area1 = box1.width * box1.height
180
+ area2 = box2.width * box2.height
181
+ union = area1 + area2 - intersection
182
+
183
+ return intersection / union if union > 0 else 0.0
184
+
185
+
186
+ def _deduplicate_objects(
187
+ detections: list[ObjectDetection],
188
+ max_time_gap: float = 5.0,
189
+ min_iou: float = 0.3,
190
+ ) -> tuple[list[ObjectDetection], dict[str, int]]:
191
+ """Deduplicate objects using position tracking.
192
+
193
+ Groups detections of same object type that overlap across frames.
194
+ Returns unique objects (best detection per tracked object).
195
+ """
196
+ if not detections:
197
+ return [], {}
198
+
199
+ # Group by label first
200
+ by_label: dict[str, list[ObjectDetection]] = {}
201
+ for det in detections:
202
+ if det.label not in by_label:
203
+ by_label[det.label] = []
204
+ by_label[det.label].append(det)
205
+
206
+ unique_objects: list[ObjectDetection] = []
207
+ summary: dict[str, int] = {}
208
+
209
+ for label, label_dets in by_label.items():
210
+ # Sort by timestamp
211
+ sorted_dets = sorted(label_dets, key=lambda d: d.timestamp)
212
+
213
+ # Track unique instances of this object type
214
+ tracked: list[list[ObjectDetection]] = []
215
+
216
+ for det in sorted_dets:
217
+ matched_track = None
218
+
219
+ # Find matching track (same position in recent frames)
220
+ for track_idx, track in enumerate(tracked):
221
+ last_det = track[-1]
222
+ time_diff = det.timestamp - last_det.timestamp
223
+
224
+ if time_diff <= max_time_gap:
225
+ iou = _bbox_iou(det.bbox, last_det.bbox)
226
+ if iou >= min_iou:
227
+ matched_track = track_idx
228
+ break
229
+
230
+ if matched_track is not None:
231
+ tracked[matched_track].append(det)
232
+ else:
233
+ tracked.append([det])
234
+
235
+ # Keep best detection per track
236
+ for track in tracked:
237
+ best = max(track, key=lambda d: d.confidence)
238
+ unique_objects.append(best)
239
+
240
+ summary[label] = len(tracked)
241
+
242
+ # Sort by timestamp
243
+ unique_objects.sort(key=lambda d: d.timestamp)
244
+
245
+ return unique_objects, summary