media-engine 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/clip.py +79 -0
- cli/faces.py +91 -0
- cli/metadata.py +68 -0
- cli/motion.py +77 -0
- cli/objects.py +94 -0
- cli/ocr.py +93 -0
- cli/scenes.py +57 -0
- cli/telemetry.py +65 -0
- cli/transcript.py +76 -0
- media_engine/__init__.py +7 -0
- media_engine/_version.py +34 -0
- media_engine/app.py +80 -0
- media_engine/batch/__init__.py +56 -0
- media_engine/batch/models.py +99 -0
- media_engine/batch/processor.py +1131 -0
- media_engine/batch/queue.py +232 -0
- media_engine/batch/state.py +30 -0
- media_engine/batch/timing.py +321 -0
- media_engine/cli.py +17 -0
- media_engine/config.py +674 -0
- media_engine/extractors/__init__.py +75 -0
- media_engine/extractors/clip.py +401 -0
- media_engine/extractors/faces.py +459 -0
- media_engine/extractors/frame_buffer.py +351 -0
- media_engine/extractors/frames.py +402 -0
- media_engine/extractors/metadata/__init__.py +127 -0
- media_engine/extractors/metadata/apple.py +169 -0
- media_engine/extractors/metadata/arri.py +118 -0
- media_engine/extractors/metadata/avchd.py +208 -0
- media_engine/extractors/metadata/avchd_gps.py +270 -0
- media_engine/extractors/metadata/base.py +688 -0
- media_engine/extractors/metadata/blackmagic.py +139 -0
- media_engine/extractors/metadata/camera_360.py +276 -0
- media_engine/extractors/metadata/canon.py +290 -0
- media_engine/extractors/metadata/dji.py +371 -0
- media_engine/extractors/metadata/dv.py +121 -0
- media_engine/extractors/metadata/ffmpeg.py +76 -0
- media_engine/extractors/metadata/generic.py +119 -0
- media_engine/extractors/metadata/gopro.py +256 -0
- media_engine/extractors/metadata/red.py +305 -0
- media_engine/extractors/metadata/registry.py +114 -0
- media_engine/extractors/metadata/sony.py +442 -0
- media_engine/extractors/metadata/tesla.py +157 -0
- media_engine/extractors/motion.py +765 -0
- media_engine/extractors/objects.py +245 -0
- media_engine/extractors/objects_qwen.py +754 -0
- media_engine/extractors/ocr.py +268 -0
- media_engine/extractors/scenes.py +82 -0
- media_engine/extractors/shot_type.py +217 -0
- media_engine/extractors/telemetry.py +262 -0
- media_engine/extractors/transcribe.py +579 -0
- media_engine/extractors/translate.py +121 -0
- media_engine/extractors/vad.py +263 -0
- media_engine/main.py +68 -0
- media_engine/py.typed +0 -0
- media_engine/routers/__init__.py +15 -0
- media_engine/routers/batch.py +78 -0
- media_engine/routers/health.py +93 -0
- media_engine/routers/models.py +211 -0
- media_engine/routers/settings.py +87 -0
- media_engine/routers/utils.py +135 -0
- media_engine/schemas.py +581 -0
- media_engine/utils/__init__.py +5 -0
- media_engine/utils/logging.py +54 -0
- media_engine/utils/memory.py +49 -0
- media_engine-0.1.0.dist-info/METADATA +276 -0
- media_engine-0.1.0.dist-info/RECORD +70 -0
- media_engine-0.1.0.dist-info/WHEEL +4 -0
- media_engine-0.1.0.dist-info/entry_points.txt +11 -0
- media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
"""Object detection using YOLO."""
|
|
2
|
+
|
|
3
|
+
import gc
|
|
4
|
+
import logging
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from media_engine.config import DeviceType, get_device
|
|
9
|
+
from media_engine.extractors.frame_buffer import SharedFrameBuffer
|
|
10
|
+
from media_engine.schemas import (
|
|
11
|
+
BoundingBox,
|
|
12
|
+
ObjectDetection,
|
|
13
|
+
ObjectsResult,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
# Singleton YOLO model (lazy loaded)
|
|
19
|
+
_yolo_model: Any = None
|
|
20
|
+
_yolo_model_name: str | None = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def unload_yolo_model() -> None:
|
|
24
|
+
"""Unload the YOLO model to free memory."""
|
|
25
|
+
global _yolo_model, _yolo_model_name
|
|
26
|
+
|
|
27
|
+
if _yolo_model is None:
|
|
28
|
+
return
|
|
29
|
+
|
|
30
|
+
logger.info("Unloading YOLO model to free memory")
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
# Clear CUDA/MPS cache
|
|
34
|
+
import torch
|
|
35
|
+
|
|
36
|
+
del _yolo_model
|
|
37
|
+
_yolo_model = None
|
|
38
|
+
_yolo_model_name = None
|
|
39
|
+
|
|
40
|
+
gc.collect()
|
|
41
|
+
|
|
42
|
+
if torch.cuda.is_available():
|
|
43
|
+
torch.cuda.synchronize()
|
|
44
|
+
torch.cuda.empty_cache()
|
|
45
|
+
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
|
46
|
+
if hasattr(torch.mps, "synchronize"):
|
|
47
|
+
torch.mps.synchronize()
|
|
48
|
+
if hasattr(torch.mps, "empty_cache"):
|
|
49
|
+
torch.mps.empty_cache()
|
|
50
|
+
|
|
51
|
+
gc.collect()
|
|
52
|
+
logger.info("YOLO model unloaded")
|
|
53
|
+
except Exception as e:
|
|
54
|
+
logger.warning(f"Error unloading YOLO model: {e}")
|
|
55
|
+
_yolo_model = None
|
|
56
|
+
_yolo_model_name = None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _get_yolo_model(model_name: str) -> Any:
|
|
60
|
+
"""Get or create the YOLO model (singleton with model switching)."""
|
|
61
|
+
global _yolo_model, _yolo_model_name
|
|
62
|
+
|
|
63
|
+
# If model name changed, unload old model
|
|
64
|
+
if _yolo_model is not None and _yolo_model_name != model_name:
|
|
65
|
+
logger.info(f"Switching YOLO model from {_yolo_model_name} to {model_name}")
|
|
66
|
+
unload_yolo_model()
|
|
67
|
+
|
|
68
|
+
if _yolo_model is None:
|
|
69
|
+
from ultralytics import YOLO # type: ignore[import-not-found]
|
|
70
|
+
|
|
71
|
+
logger.info(f"Loading YOLO model: {model_name}")
|
|
72
|
+
_yolo_model = YOLO(model_name)
|
|
73
|
+
_yolo_model_name = model_name
|
|
74
|
+
|
|
75
|
+
return _yolo_model
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def extract_objects(
|
|
79
|
+
file_path: str,
|
|
80
|
+
frame_buffer: SharedFrameBuffer,
|
|
81
|
+
min_confidence: float = 0.6,
|
|
82
|
+
min_size: int = 50,
|
|
83
|
+
model_name: str = "yolov8m.pt",
|
|
84
|
+
) -> ObjectsResult:
|
|
85
|
+
"""Extract objects from video frames using YOLO.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
file_path: Path to video file (used for logging)
|
|
89
|
+
frame_buffer: Pre-decoded frames from SharedFrameBuffer
|
|
90
|
+
min_confidence: Minimum detection confidence (0.6 recommended)
|
|
91
|
+
min_size: Minimum object size in pixels (filters noise)
|
|
92
|
+
model_name: YOLO model (yolov8m.pt recommended for accuracy)
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
ObjectsResult with unique objects and summary
|
|
96
|
+
"""
|
|
97
|
+
path = Path(file_path)
|
|
98
|
+
if not path.exists():
|
|
99
|
+
raise FileNotFoundError(f"Video file not found: {file_path}")
|
|
100
|
+
|
|
101
|
+
# Determine device for GPU acceleration
|
|
102
|
+
device = get_device()
|
|
103
|
+
device_str = "mps" if device == DeviceType.MPS else "cuda" if device == DeviceType.CUDA else "cpu"
|
|
104
|
+
|
|
105
|
+
# Load model (singleton)
|
|
106
|
+
model = _get_yolo_model(model_name)
|
|
107
|
+
|
|
108
|
+
# Process frames from shared buffer
|
|
109
|
+
raw_detections: list[ObjectDetection] = []
|
|
110
|
+
|
|
111
|
+
logger.info(f"Processing {len(frame_buffer.frames)} frames for object detection")
|
|
112
|
+
for ts in sorted(frame_buffer.frames.keys()):
|
|
113
|
+
shared_frame = frame_buffer.frames[ts]
|
|
114
|
+
try:
|
|
115
|
+
results = model(shared_frame.bgr, verbose=False, device=device_str)
|
|
116
|
+
|
|
117
|
+
for result in results:
|
|
118
|
+
boxes = result.boxes
|
|
119
|
+
if boxes is None:
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
for i in range(len(boxes)):
|
|
123
|
+
confidence = float(boxes.conf[i])
|
|
124
|
+
if confidence < min_confidence:
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
# Get bounding box
|
|
128
|
+
x1, y1, x2, y2 = boxes.xyxy[i].tolist()
|
|
129
|
+
width = int(x2 - x1)
|
|
130
|
+
height = int(y2 - y1)
|
|
131
|
+
|
|
132
|
+
# Filter small detections
|
|
133
|
+
if width < min_size or height < min_size:
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
# Get class label
|
|
137
|
+
class_id = int(boxes.cls[i])
|
|
138
|
+
label = model.names[class_id] if model.names else str(class_id)
|
|
139
|
+
|
|
140
|
+
raw_detections.append(
|
|
141
|
+
ObjectDetection(
|
|
142
|
+
timestamp=round(ts, 2),
|
|
143
|
+
label=label,
|
|
144
|
+
confidence=round(confidence, 3),
|
|
145
|
+
bbox=BoundingBox(
|
|
146
|
+
x=int(x1),
|
|
147
|
+
y=int(y1),
|
|
148
|
+
width=width,
|
|
149
|
+
height=height,
|
|
150
|
+
),
|
|
151
|
+
)
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
except Exception as e:
|
|
155
|
+
logger.warning(f"Failed to process frame at {ts}s: {e}")
|
|
156
|
+
|
|
157
|
+
# Deduplicate - track unique objects
|
|
158
|
+
unique_detections, summary = _deduplicate_objects(raw_detections)
|
|
159
|
+
|
|
160
|
+
logger.info(f"Detected {len(raw_detections)} objects, " f"{len(unique_detections)} unique across {len(summary)} types")
|
|
161
|
+
|
|
162
|
+
return ObjectsResult(
|
|
163
|
+
summary=summary,
|
|
164
|
+
detections=unique_detections,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _bbox_iou(box1: BoundingBox, box2: BoundingBox) -> float:
|
|
169
|
+
"""Calculate IoU of two bounding boxes."""
|
|
170
|
+
x1 = max(box1.x, box2.x)
|
|
171
|
+
y1 = max(box1.y, box2.y)
|
|
172
|
+
x2 = min(box1.x + box1.width, box2.x + box2.width)
|
|
173
|
+
y2 = min(box1.y + box1.height, box2.y + box2.height)
|
|
174
|
+
|
|
175
|
+
if x2 <= x1 or y2 <= y1:
|
|
176
|
+
return 0.0
|
|
177
|
+
|
|
178
|
+
intersection = (x2 - x1) * (y2 - y1)
|
|
179
|
+
area1 = box1.width * box1.height
|
|
180
|
+
area2 = box2.width * box2.height
|
|
181
|
+
union = area1 + area2 - intersection
|
|
182
|
+
|
|
183
|
+
return intersection / union if union > 0 else 0.0
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _deduplicate_objects(
|
|
187
|
+
detections: list[ObjectDetection],
|
|
188
|
+
max_time_gap: float = 5.0,
|
|
189
|
+
min_iou: float = 0.3,
|
|
190
|
+
) -> tuple[list[ObjectDetection], dict[str, int]]:
|
|
191
|
+
"""Deduplicate objects using position tracking.
|
|
192
|
+
|
|
193
|
+
Groups detections of same object type that overlap across frames.
|
|
194
|
+
Returns unique objects (best detection per tracked object).
|
|
195
|
+
"""
|
|
196
|
+
if not detections:
|
|
197
|
+
return [], {}
|
|
198
|
+
|
|
199
|
+
# Group by label first
|
|
200
|
+
by_label: dict[str, list[ObjectDetection]] = {}
|
|
201
|
+
for det in detections:
|
|
202
|
+
if det.label not in by_label:
|
|
203
|
+
by_label[det.label] = []
|
|
204
|
+
by_label[det.label].append(det)
|
|
205
|
+
|
|
206
|
+
unique_objects: list[ObjectDetection] = []
|
|
207
|
+
summary: dict[str, int] = {}
|
|
208
|
+
|
|
209
|
+
for label, label_dets in by_label.items():
|
|
210
|
+
# Sort by timestamp
|
|
211
|
+
sorted_dets = sorted(label_dets, key=lambda d: d.timestamp)
|
|
212
|
+
|
|
213
|
+
# Track unique instances of this object type
|
|
214
|
+
tracked: list[list[ObjectDetection]] = []
|
|
215
|
+
|
|
216
|
+
for det in sorted_dets:
|
|
217
|
+
matched_track = None
|
|
218
|
+
|
|
219
|
+
# Find matching track (same position in recent frames)
|
|
220
|
+
for track_idx, track in enumerate(tracked):
|
|
221
|
+
last_det = track[-1]
|
|
222
|
+
time_diff = det.timestamp - last_det.timestamp
|
|
223
|
+
|
|
224
|
+
if time_diff <= max_time_gap:
|
|
225
|
+
iou = _bbox_iou(det.bbox, last_det.bbox)
|
|
226
|
+
if iou >= min_iou:
|
|
227
|
+
matched_track = track_idx
|
|
228
|
+
break
|
|
229
|
+
|
|
230
|
+
if matched_track is not None:
|
|
231
|
+
tracked[matched_track].append(det)
|
|
232
|
+
else:
|
|
233
|
+
tracked.append([det])
|
|
234
|
+
|
|
235
|
+
# Keep best detection per track
|
|
236
|
+
for track in tracked:
|
|
237
|
+
best = max(track, key=lambda d: d.confidence)
|
|
238
|
+
unique_objects.append(best)
|
|
239
|
+
|
|
240
|
+
summary[label] = len(tracked)
|
|
241
|
+
|
|
242
|
+
# Sort by timestamp
|
|
243
|
+
unique_objects.sort(key=lambda d: d.timestamp)
|
|
244
|
+
|
|
245
|
+
return unique_objects, summary
|