matrice-streaming 0.1.14__py3-none-any.whl → 0.1.65__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrice_streaming/__init__.py +44 -32
- matrice_streaming/streaming_gateway/camera_streamer/__init__.py +68 -1
- matrice_streaming/streaming_gateway/camera_streamer/async_camera_worker.py +1388 -0
- matrice_streaming/streaming_gateway/camera_streamer/async_ffmpeg_worker.py +966 -0
- matrice_streaming/streaming_gateway/camera_streamer/camera_streamer.py +188 -24
- matrice_streaming/streaming_gateway/camera_streamer/device_detection.py +507 -0
- matrice_streaming/streaming_gateway/camera_streamer/encoding_pool_manager.py +136 -0
- matrice_streaming/streaming_gateway/camera_streamer/ffmpeg_camera_streamer.py +1048 -0
- matrice_streaming/streaming_gateway/camera_streamer/ffmpeg_config.py +192 -0
- matrice_streaming/streaming_gateway/camera_streamer/ffmpeg_worker_manager.py +470 -0
- matrice_streaming/streaming_gateway/camera_streamer/gstreamer_camera_streamer.py +1368 -0
- matrice_streaming/streaming_gateway/camera_streamer/gstreamer_worker.py +1063 -0
- matrice_streaming/streaming_gateway/camera_streamer/gstreamer_worker_manager.py +546 -0
- matrice_streaming/streaming_gateway/camera_streamer/message_builder.py +60 -15
- matrice_streaming/streaming_gateway/camera_streamer/nvdec.py +1330 -0
- matrice_streaming/streaming_gateway/camera_streamer/nvdec_worker_manager.py +412 -0
- matrice_streaming/streaming_gateway/camera_streamer/platform_pipelines.py +680 -0
- matrice_streaming/streaming_gateway/camera_streamer/stream_statistics.py +111 -4
- matrice_streaming/streaming_gateway/camera_streamer/video_capture_manager.py +223 -27
- matrice_streaming/streaming_gateway/camera_streamer/worker_manager.py +694 -0
- matrice_streaming/streaming_gateway/debug/__init__.py +27 -2
- matrice_streaming/streaming_gateway/debug/benchmark.py +727 -0
- matrice_streaming/streaming_gateway/debug/debug_gstreamer_gateway.py +599 -0
- matrice_streaming/streaming_gateway/debug/debug_streaming_gateway.py +245 -95
- matrice_streaming/streaming_gateway/debug/debug_utils.py +29 -0
- matrice_streaming/streaming_gateway/debug/test_videoplayback.py +318 -0
- matrice_streaming/streaming_gateway/dynamic_camera_manager.py +656 -39
- matrice_streaming/streaming_gateway/metrics_reporter.py +676 -139
- matrice_streaming/streaming_gateway/streaming_action.py +71 -20
- matrice_streaming/streaming_gateway/streaming_gateway.py +1026 -78
- matrice_streaming/streaming_gateway/streaming_gateway_utils.py +175 -20
- matrice_streaming/streaming_gateway/streaming_status_listener.py +89 -0
- {matrice_streaming-0.1.14.dist-info → matrice_streaming-0.1.65.dist-info}/METADATA +1 -1
- matrice_streaming-0.1.65.dist-info/RECORD +56 -0
- matrice_streaming-0.1.14.dist-info/RECORD +0 -38
- {matrice_streaming-0.1.14.dist-info → matrice_streaming-0.1.65.dist-info}/WHEEL +0 -0
- {matrice_streaming-0.1.14.dist-info → matrice_streaming-0.1.65.dist-info}/licenses/LICENSE.txt +0 -0
- {matrice_streaming-0.1.14.dist-info → matrice_streaming-0.1.65.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,546 @@
|
|
|
1
|
+
"""Worker manager for coordinating multiple GStreamer async camera workers.
|
|
2
|
+
|
|
3
|
+
This module manages a pool of GStreamer worker processes, distributing cameras
|
|
4
|
+
across them for hardware-accelerated video encoding.
|
|
5
|
+
"""
|
|
6
|
+
import logging
|
|
7
|
+
import multiprocessing
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
import time
|
|
11
|
+
import signal
|
|
12
|
+
from typing import List, Dict, Any, Optional
|
|
13
|
+
|
|
14
|
+
from .gstreamer_worker import run_gstreamer_worker, is_gstreamer_available
|
|
15
|
+
from .camera_streamer import CameraStreamer
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class GStreamerWorkerManager:
|
|
19
|
+
"""Manages multiple GStreamer async camera worker processes.
|
|
20
|
+
|
|
21
|
+
This manager coordinates worker processes using GStreamer pipelines
|
|
22
|
+
for efficient hardware/software video encoding. It follows the same
|
|
23
|
+
API as WorkerManager for drop-in replacement.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
camera_configs: List[Dict[str, Any]],
|
|
29
|
+
stream_config: Dict[str, Any],
|
|
30
|
+
num_workers: Optional[int] = None,
|
|
31
|
+
cpu_percentage: float = 0.9,
|
|
32
|
+
max_cameras_per_worker: int = 100,
|
|
33
|
+
gstreamer_encoder: str = "auto",
|
|
34
|
+
gstreamer_codec: str = "h264",
|
|
35
|
+
gstreamer_preset: str = "low-latency",
|
|
36
|
+
gpu_id: int = 0,
|
|
37
|
+
# Platform-specific parameters
|
|
38
|
+
platform: str = "auto",
|
|
39
|
+
use_hardware_decode: bool = True,
|
|
40
|
+
use_hardware_jpeg: bool = True,
|
|
41
|
+
jetson_use_nvmm: bool = True,
|
|
42
|
+
frame_optimizer_mode: str = "hash-only",
|
|
43
|
+
fallback_on_error: bool = True,
|
|
44
|
+
verbose_pipeline_logging: bool = False,
|
|
45
|
+
):
|
|
46
|
+
"""Initialize GStreamer worker manager.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
camera_configs: List of all camera configurations
|
|
50
|
+
stream_config: Streaming configuration (Redis, Kafka, etc.)
|
|
51
|
+
num_workers: Number of worker processes (auto-calculated if None)
|
|
52
|
+
cpu_percentage: Percentage of CPU cores to use (default: 90%)
|
|
53
|
+
max_cameras_per_worker: Maximum cameras per worker
|
|
54
|
+
gstreamer_encoder: Encoder type (auto, nvenc, x264, openh264, jpeg)
|
|
55
|
+
gstreamer_codec: Codec (h264, h265)
|
|
56
|
+
gstreamer_preset: NVENC preset
|
|
57
|
+
gpu_id: GPU device ID for NVENC
|
|
58
|
+
platform: Platform override (auto, jetson, desktop-gpu, intel, amd, cpu)
|
|
59
|
+
use_hardware_decode: Enable hardware decode
|
|
60
|
+
use_hardware_jpeg: Enable hardware JPEG encoding
|
|
61
|
+
jetson_use_nvmm: Use NVMM zero-copy on Jetson
|
|
62
|
+
frame_optimizer_mode: Frame optimization mode (hash-only, dual-appsink, disabled)
|
|
63
|
+
fallback_on_error: Fallback to CPU pipeline on errors
|
|
64
|
+
verbose_pipeline_logging: Enable verbose pipeline logging
|
|
65
|
+
"""
|
|
66
|
+
if not is_gstreamer_available():
|
|
67
|
+
raise RuntimeError(
|
|
68
|
+
"GStreamer not available. Install with: "
|
|
69
|
+
"pip install PyGObject && apt-get install gstreamer1.0-plugins-*"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
self.camera_configs = camera_configs
|
|
73
|
+
self.stream_config = stream_config
|
|
74
|
+
self.gstreamer_encoder = gstreamer_encoder
|
|
75
|
+
self.gstreamer_codec = gstreamer_codec
|
|
76
|
+
self.gstreamer_preset = gstreamer_preset
|
|
77
|
+
self.gpu_id = gpu_id
|
|
78
|
+
# Platform-specific settings
|
|
79
|
+
self.platform = platform
|
|
80
|
+
self.use_hardware_decode = use_hardware_decode
|
|
81
|
+
self.use_hardware_jpeg = use_hardware_jpeg
|
|
82
|
+
self.jetson_use_nvmm = jetson_use_nvmm
|
|
83
|
+
self.frame_optimizer_mode = frame_optimizer_mode
|
|
84
|
+
self.fallback_on_error = fallback_on_error
|
|
85
|
+
self.verbose_pipeline_logging = verbose_pipeline_logging
|
|
86
|
+
|
|
87
|
+
# Platform detection (NEW)
|
|
88
|
+
from .device_detection import PlatformDetector, PlatformType
|
|
89
|
+
self.platform_detector = PlatformDetector.get_instance()
|
|
90
|
+
self.platform_info = self.platform_detector.detect()
|
|
91
|
+
|
|
92
|
+
self.logger = logging.getLogger(__name__)
|
|
93
|
+
self.logger.info(
|
|
94
|
+
f"Detected platform: {self.platform_info.platform_type.value}, "
|
|
95
|
+
f"Model: {self.platform_info.model}"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Platform-aware worker calculation (NEW)
|
|
99
|
+
if num_workers is None:
|
|
100
|
+
cpu_count = os.cpu_count() or 4
|
|
101
|
+
num_cameras = len(camera_configs)
|
|
102
|
+
|
|
103
|
+
# Adjust based on platform capabilities
|
|
104
|
+
if self.platform_info.platform_type == PlatformType.JETSON:
|
|
105
|
+
# Jetson has fewer cores, limit workers
|
|
106
|
+
# Hardware encoding means each worker can handle more cameras
|
|
107
|
+
target_per_worker = 30 # Jetson HW encoding is efficient
|
|
108
|
+
calculated = max(4, min(num_cameras // target_per_worker, 8))
|
|
109
|
+
self.logger.info(f"Jetson platform: limiting to max 8 workers")
|
|
110
|
+
|
|
111
|
+
elif self.platform_info.platform_type in (PlatformType.INTEL_GPU, PlatformType.AMD_GPU):
|
|
112
|
+
# VAAPI acceleration, moderate parallelism
|
|
113
|
+
target_per_worker = 25
|
|
114
|
+
calculated = max(4, min(num_cameras // target_per_worker, 16))
|
|
115
|
+
|
|
116
|
+
elif self.platform_info.platform_type == PlatformType.DESKTOP_NVIDIA_GPU:
|
|
117
|
+
# Desktop GPU, good parallelism
|
|
118
|
+
if cpu_count >= 16 or num_cameras >= 100:
|
|
119
|
+
target_per_worker = 25
|
|
120
|
+
calculated = max(4, min(num_cameras // target_per_worker, 50))
|
|
121
|
+
else:
|
|
122
|
+
calculated = max(4, int(cpu_count * cpu_percentage))
|
|
123
|
+
|
|
124
|
+
else: # CPU-only
|
|
125
|
+
# CPU-only needs more workers for parallelism
|
|
126
|
+
calculated = max(4, int(cpu_count * cpu_percentage))
|
|
127
|
+
|
|
128
|
+
self.num_workers = min(calculated, num_cameras) if num_cameras > 0 else calculated
|
|
129
|
+
else:
|
|
130
|
+
self.num_workers = num_workers
|
|
131
|
+
|
|
132
|
+
self.max_cameras_per_worker = max_cameras_per_worker
|
|
133
|
+
|
|
134
|
+
self.logger.info(
|
|
135
|
+
f"GStreamerWorkerManager: {self.num_workers} workers for {len(camera_configs)} cameras, "
|
|
136
|
+
f"encoder={gstreamer_encoder}, codec={gstreamer_codec}, gpu={gpu_id}"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Always use spawn context for GStreamer workers
|
|
140
|
+
# GStreamer has global state that doesn't work well with fork after initialization
|
|
141
|
+
self._mp_ctx = multiprocessing.get_context('spawn')
|
|
142
|
+
|
|
143
|
+
# Multiprocessing primitives (using spawn context)
|
|
144
|
+
self.stop_event = self._mp_ctx.Event()
|
|
145
|
+
self.health_queue = self._mp_ctx.Queue()
|
|
146
|
+
|
|
147
|
+
# Workers
|
|
148
|
+
self.workers: List[multiprocessing.Process] = []
|
|
149
|
+
self.worker_camera_assignments: Dict[int, List[Dict[str, Any]]] = {}
|
|
150
|
+
|
|
151
|
+
# Health monitoring
|
|
152
|
+
self.last_health_reports: Dict[int, Dict[str, Any]] = {}
|
|
153
|
+
|
|
154
|
+
# Dynamic camera management
|
|
155
|
+
self.command_queues: Dict[int, multiprocessing.Queue] = {}
|
|
156
|
+
self.response_queue = self._mp_ctx.Queue()
|
|
157
|
+
self.camera_to_worker: Dict[str, int] = {}
|
|
158
|
+
self.worker_camera_count: Dict[int, int] = {}
|
|
159
|
+
|
|
160
|
+
def start(self):
|
|
161
|
+
"""Start all workers."""
|
|
162
|
+
try:
|
|
163
|
+
self._distribute_cameras()
|
|
164
|
+
|
|
165
|
+
self.logger.info(f"Starting {self.num_workers} GStreamer workers...")
|
|
166
|
+
for worker_id in range(self.num_workers):
|
|
167
|
+
self._start_worker(worker_id)
|
|
168
|
+
|
|
169
|
+
self.logger.info(
|
|
170
|
+
f"All GStreamer workers started! "
|
|
171
|
+
f"Streaming {len(self.camera_configs)} cameras"
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
except Exception as exc:
|
|
175
|
+
self.logger.error(f"Failed to start workers: {exc}")
|
|
176
|
+
self.stop()
|
|
177
|
+
raise
|
|
178
|
+
|
|
179
|
+
def _distribute_cameras(self):
|
|
180
|
+
"""Distribute cameras across workers."""
|
|
181
|
+
total = len(self.camera_configs)
|
|
182
|
+
per_worker = total // self.num_workers
|
|
183
|
+
remainder = total % self.num_workers
|
|
184
|
+
|
|
185
|
+
self.logger.info(f"Distributing {total} cameras: ~{per_worker} per worker")
|
|
186
|
+
|
|
187
|
+
idx = 0
|
|
188
|
+
for worker_id in range(self.num_workers):
|
|
189
|
+
count = per_worker + (1 if worker_id < remainder else 0)
|
|
190
|
+
worker_cameras = self.camera_configs[idx:idx + count]
|
|
191
|
+
self.worker_camera_assignments[worker_id] = worker_cameras
|
|
192
|
+
|
|
193
|
+
self.logger.info(
|
|
194
|
+
f"GStreamer Worker {worker_id}: {len(worker_cameras)} cameras"
|
|
195
|
+
)
|
|
196
|
+
idx += count
|
|
197
|
+
|
|
198
|
+
def _start_worker(self, worker_id: int):
|
|
199
|
+
"""Start a single GStreamer worker process."""
|
|
200
|
+
worker_cameras = self.worker_camera_assignments.get(worker_id, [])
|
|
201
|
+
|
|
202
|
+
# Create command queue (using spawn context)
|
|
203
|
+
command_queue = self._mp_ctx.Queue()
|
|
204
|
+
self.command_queues[worker_id] = command_queue
|
|
205
|
+
|
|
206
|
+
# Track cameras
|
|
207
|
+
self.worker_camera_count[worker_id] = len(worker_cameras)
|
|
208
|
+
for cam in worker_cameras:
|
|
209
|
+
stream_key = cam.get('stream_key')
|
|
210
|
+
if stream_key:
|
|
211
|
+
self.camera_to_worker[stream_key] = worker_id
|
|
212
|
+
|
|
213
|
+
# Build worker stream config with optimal batch parameters
|
|
214
|
+
worker_stream_config = self.stream_config.copy()
|
|
215
|
+
|
|
216
|
+
# Calculate optimal batch parameters based on per-worker camera count
|
|
217
|
+
num_worker_cameras = len(worker_cameras)
|
|
218
|
+
if num_worker_cameras > 0 and worker_stream_config.get('enable_batching', True):
|
|
219
|
+
batch_params = CameraStreamer.calculate_batch_parameters(num_worker_cameras)
|
|
220
|
+
worker_stream_config.update({
|
|
221
|
+
'enable_batching': True,
|
|
222
|
+
'batch_size': batch_params['batch_size'],
|
|
223
|
+
'batch_timeout': batch_params['batch_timeout']
|
|
224
|
+
})
|
|
225
|
+
self.logger.info(
|
|
226
|
+
f"Worker {worker_id}: Optimized batching for {num_worker_cameras} cameras - "
|
|
227
|
+
f"batch_size={batch_params['batch_size']}, "
|
|
228
|
+
f"batch_timeout={batch_params['batch_timeout']*1000:.1f}ms"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
try:
|
|
232
|
+
# Use the spawn context stored in __init__
|
|
233
|
+
worker = self._mp_ctx.Process(
|
|
234
|
+
target=run_gstreamer_worker,
|
|
235
|
+
args=(
|
|
236
|
+
worker_id,
|
|
237
|
+
worker_cameras,
|
|
238
|
+
worker_stream_config,
|
|
239
|
+
self.stop_event,
|
|
240
|
+
self.health_queue,
|
|
241
|
+
command_queue,
|
|
242
|
+
self.response_queue,
|
|
243
|
+
self.gstreamer_encoder,
|
|
244
|
+
self.gstreamer_codec,
|
|
245
|
+
self.gstreamer_preset,
|
|
246
|
+
self.gpu_id,
|
|
247
|
+
self.platform,
|
|
248
|
+
self.use_hardware_decode,
|
|
249
|
+
self.use_hardware_jpeg,
|
|
250
|
+
self.jetson_use_nvmm,
|
|
251
|
+
self.frame_optimizer_mode,
|
|
252
|
+
self.fallback_on_error,
|
|
253
|
+
self.verbose_pipeline_logging,
|
|
254
|
+
),
|
|
255
|
+
name=f"GStreamerWorker-{worker_id}",
|
|
256
|
+
daemon=False
|
|
257
|
+
)
|
|
258
|
+
worker.start()
|
|
259
|
+
self.workers.append(worker)
|
|
260
|
+
|
|
261
|
+
self.logger.info(
|
|
262
|
+
f"Started GStreamer worker {worker_id} (PID: {worker.pid}) "
|
|
263
|
+
f"with {len(worker_cameras)} cameras (context: spawn)"
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
except Exception as exc:
|
|
267
|
+
self.logger.error(f"Failed to start worker {worker_id}: {exc}")
|
|
268
|
+
raise
|
|
269
|
+
|
|
270
|
+
def monitor(self, duration: Optional[float] = None):
|
|
271
|
+
"""Monitor workers and collect health reports."""
|
|
272
|
+
self.logger.info("Starting GStreamer health monitoring...")
|
|
273
|
+
|
|
274
|
+
start_time = time.time()
|
|
275
|
+
last_summary_time = start_time
|
|
276
|
+
|
|
277
|
+
try:
|
|
278
|
+
while not self.stop_event.is_set():
|
|
279
|
+
if duration and (time.time() - start_time) >= duration:
|
|
280
|
+
break
|
|
281
|
+
|
|
282
|
+
# Collect health reports
|
|
283
|
+
while not self.health_queue.empty():
|
|
284
|
+
try:
|
|
285
|
+
report = self.health_queue.get_nowait()
|
|
286
|
+
worker_id = report['worker_id']
|
|
287
|
+
self.last_health_reports[worker_id] = report
|
|
288
|
+
|
|
289
|
+
if report['status'] in ['error', 'stopped']:
|
|
290
|
+
self.logger.warning(
|
|
291
|
+
f"GStreamer Worker {worker_id}: {report['status']} "
|
|
292
|
+
f"(error: {report.get('error')})"
|
|
293
|
+
)
|
|
294
|
+
except Exception as exc:
|
|
295
|
+
self.logger.error(f"Health report error: {exc}")
|
|
296
|
+
|
|
297
|
+
# Check workers
|
|
298
|
+
for i, worker in enumerate(self.workers):
|
|
299
|
+
if not worker.is_alive() and not self.stop_event.is_set():
|
|
300
|
+
self.logger.error(
|
|
301
|
+
f"GStreamer Worker {i} (PID: {worker.pid}) died! "
|
|
302
|
+
f"Exit code: {worker.exitcode}"
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
# Print summary
|
|
306
|
+
if time.time() - last_summary_time >= 10.0:
|
|
307
|
+
self._print_health_summary()
|
|
308
|
+
last_summary_time = time.time()
|
|
309
|
+
|
|
310
|
+
time.sleep(0.5)
|
|
311
|
+
|
|
312
|
+
except KeyboardInterrupt:
|
|
313
|
+
self.logger.info("Monitoring interrupted")
|
|
314
|
+
|
|
315
|
+
def _print_health_summary(self):
|
|
316
|
+
"""Print health summary."""
|
|
317
|
+
running = sum(1 for w in self.workers if w.is_alive())
|
|
318
|
+
total_cameras = sum(
|
|
319
|
+
r.get('active_cameras', 0) for r in self.last_health_reports.values()
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
self.logger.info(
|
|
323
|
+
f"GStreamer Health: {running}/{len(self.workers)} workers, "
|
|
324
|
+
f"{total_cameras} cameras"
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
# Log encoder info
|
|
328
|
+
for wid, report in sorted(self.last_health_reports.items()):
|
|
329
|
+
encoder = report.get('encoder', 'unknown')
|
|
330
|
+
metrics = report.get('metrics', {})
|
|
331
|
+
self.logger.debug(
|
|
332
|
+
f" Worker {wid}: encoder={encoder}, "
|
|
333
|
+
f"frames={metrics.get('frames_encoded', 0)}, "
|
|
334
|
+
f"errors={metrics.get('encoding_errors', 0)}"
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
def stop(self, timeout: float = 15.0):
|
|
338
|
+
"""Stop all workers."""
|
|
339
|
+
self.logger.info("Stopping GStreamer workers...")
|
|
340
|
+
|
|
341
|
+
self.stop_event.set()
|
|
342
|
+
|
|
343
|
+
for i, worker in enumerate(self.workers):
|
|
344
|
+
if worker.is_alive():
|
|
345
|
+
self.logger.info(f"Waiting for GStreamer worker {i}...")
|
|
346
|
+
worker.join(timeout=timeout)
|
|
347
|
+
|
|
348
|
+
if worker.is_alive():
|
|
349
|
+
self.logger.warning(f"Terminating worker {i}")
|
|
350
|
+
worker.terminate()
|
|
351
|
+
worker.join(timeout=5.0)
|
|
352
|
+
|
|
353
|
+
self.logger.info("="*60)
|
|
354
|
+
self.logger.info("GSTREAMER SHUTDOWN COMPLETE")
|
|
355
|
+
self.logger.info("="*60)
|
|
356
|
+
self._print_final_summary()
|
|
357
|
+
|
|
358
|
+
def _print_final_summary(self):
|
|
359
|
+
"""Print final summary."""
|
|
360
|
+
total = sum(len(c) for c in self.worker_camera_assignments.values())
|
|
361
|
+
|
|
362
|
+
self.logger.info(f"Total cameras assigned: {total}")
|
|
363
|
+
self.logger.info(f"Workers started: {len(self.workers)}")
|
|
364
|
+
self.logger.info(f"Encoder: {self.gstreamer_encoder}, Codec: {self.gstreamer_codec}")
|
|
365
|
+
|
|
366
|
+
normal = sum(1 for w in self.workers if w.exitcode == 0)
|
|
367
|
+
errors = sum(1 for w in self.workers if w.exitcode and w.exitcode != 0)
|
|
368
|
+
|
|
369
|
+
self.logger.info(f"Exit status: {normal} normal, {errors} errors")
|
|
370
|
+
|
|
371
|
+
def run(self, duration: Optional[float] = None):
|
|
372
|
+
"""Start workers and monitor."""
|
|
373
|
+
try:
|
|
374
|
+
signal.signal(signal.SIGINT, self._signal_handler)
|
|
375
|
+
signal.signal(signal.SIGTERM, self._signal_handler)
|
|
376
|
+
|
|
377
|
+
self.start()
|
|
378
|
+
self.monitor(duration=duration)
|
|
379
|
+
|
|
380
|
+
except Exception as exc:
|
|
381
|
+
self.logger.error(f"Error in run loop: {exc}", exc_info=True)
|
|
382
|
+
|
|
383
|
+
finally:
|
|
384
|
+
self.stop()
|
|
385
|
+
|
|
386
|
+
def _signal_handler(self, signum, frame):
|
|
387
|
+
"""Handle shutdown signals."""
|
|
388
|
+
signal_name = signal.Signals(signum).name
|
|
389
|
+
self.logger.info(f"Received {signal_name}, shutting down...")
|
|
390
|
+
self.stop_event.set()
|
|
391
|
+
|
|
392
|
+
# ========================================================================
|
|
393
|
+
# Dynamic Camera Management (same API as WorkerManager)
|
|
394
|
+
# ========================================================================
|
|
395
|
+
|
|
396
|
+
def add_camera(self, camera_config: Dict[str, Any]) -> bool:
|
|
397
|
+
"""Add a camera to least-loaded worker."""
|
|
398
|
+
stream_key = camera_config.get('stream_key')
|
|
399
|
+
|
|
400
|
+
if not stream_key:
|
|
401
|
+
return False
|
|
402
|
+
|
|
403
|
+
if stream_key in self.camera_to_worker:
|
|
404
|
+
self.logger.warning(f"Camera {stream_key} already exists")
|
|
405
|
+
return False
|
|
406
|
+
|
|
407
|
+
target = self._find_least_loaded_worker()
|
|
408
|
+
if target is None:
|
|
409
|
+
self.logger.error("All workers at capacity")
|
|
410
|
+
return False
|
|
411
|
+
|
|
412
|
+
command = {
|
|
413
|
+
'type': 'add_camera',
|
|
414
|
+
'camera_config': camera_config,
|
|
415
|
+
'timestamp': time.time()
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
try:
|
|
419
|
+
self.command_queues[target].put(command, timeout=5.0)
|
|
420
|
+
self.camera_to_worker[stream_key] = target
|
|
421
|
+
self.worker_camera_count[target] += 1
|
|
422
|
+
self.logger.info(f"Sent add_camera for {stream_key} to worker {target}")
|
|
423
|
+
return True
|
|
424
|
+
except Exception as exc:
|
|
425
|
+
self.logger.error(f"Failed to add camera: {exc}")
|
|
426
|
+
return False
|
|
427
|
+
|
|
428
|
+
def remove_camera(self, stream_key: str) -> bool:
|
|
429
|
+
"""Remove a camera."""
|
|
430
|
+
if stream_key not in self.camera_to_worker:
|
|
431
|
+
return False
|
|
432
|
+
|
|
433
|
+
worker_id = self.camera_to_worker[stream_key]
|
|
434
|
+
|
|
435
|
+
command = {
|
|
436
|
+
'type': 'remove_camera',
|
|
437
|
+
'stream_key': stream_key,
|
|
438
|
+
'timestamp': time.time()
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
try:
|
|
442
|
+
self.command_queues[worker_id].put(command, timeout=5.0)
|
|
443
|
+
del self.camera_to_worker[stream_key]
|
|
444
|
+
self.worker_camera_count[worker_id] -= 1
|
|
445
|
+
self.logger.info(f"Sent remove_camera for {stream_key}")
|
|
446
|
+
return True
|
|
447
|
+
except Exception as exc:
|
|
448
|
+
self.logger.error(f"Failed to remove camera: {exc}")
|
|
449
|
+
return False
|
|
450
|
+
|
|
451
|
+
def update_camera(self, camera_config: Dict[str, Any]) -> bool:
|
|
452
|
+
"""Update a camera's configuration."""
|
|
453
|
+
stream_key = camera_config.get('stream_key')
|
|
454
|
+
|
|
455
|
+
if not stream_key:
|
|
456
|
+
return False
|
|
457
|
+
|
|
458
|
+
if stream_key not in self.camera_to_worker:
|
|
459
|
+
return self.add_camera(camera_config)
|
|
460
|
+
|
|
461
|
+
worker_id = self.camera_to_worker[stream_key]
|
|
462
|
+
|
|
463
|
+
command = {
|
|
464
|
+
'type': 'update_camera',
|
|
465
|
+
'camera_config': camera_config,
|
|
466
|
+
'stream_key': stream_key,
|
|
467
|
+
'timestamp': time.time()
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
try:
|
|
471
|
+
self.command_queues[worker_id].put(command, timeout=5.0)
|
|
472
|
+
return True
|
|
473
|
+
except Exception as exc:
|
|
474
|
+
self.logger.error(f"Failed to update camera: {exc}")
|
|
475
|
+
return False
|
|
476
|
+
|
|
477
|
+
def _find_least_loaded_worker(self) -> Optional[int]:
|
|
478
|
+
"""Find worker with least cameras."""
|
|
479
|
+
available = []
|
|
480
|
+
for wid, count in self.worker_camera_count.items():
|
|
481
|
+
if count < self.max_cameras_per_worker and wid in self.command_queues:
|
|
482
|
+
if wid < len(self.workers) and self.workers[wid].is_alive():
|
|
483
|
+
available.append((wid, count))
|
|
484
|
+
|
|
485
|
+
if not available:
|
|
486
|
+
return None
|
|
487
|
+
|
|
488
|
+
return min(available, key=lambda x: x[1])[0]
|
|
489
|
+
|
|
490
|
+
def get_camera_assignments(self) -> Dict[str, int]:
|
|
491
|
+
"""Get camera-to-worker assignments."""
|
|
492
|
+
return self.camera_to_worker.copy()
|
|
493
|
+
|
|
494
|
+
def _flush_health_queue(self):
|
|
495
|
+
"""Consume all pending health reports from the queue."""
|
|
496
|
+
while not self.health_queue.empty():
|
|
497
|
+
try:
|
|
498
|
+
report = self.health_queue.get_nowait()
|
|
499
|
+
worker_id = report.get('worker_id')
|
|
500
|
+
if worker_id is not None:
|
|
501
|
+
self.last_health_reports[worker_id] = report
|
|
502
|
+
except Exception:
|
|
503
|
+
break
|
|
504
|
+
|
|
505
|
+
def get_worker_statistics(self) -> Dict[str, Any]:
|
|
506
|
+
"""Get worker statistics."""
|
|
507
|
+
# First, flush all pending health reports from the queue
|
|
508
|
+
self._flush_health_queue()
|
|
509
|
+
|
|
510
|
+
# Aggregate per-camera stats from all workers
|
|
511
|
+
per_camera_stats = {}
|
|
512
|
+
for worker_id, report in self.last_health_reports.items():
|
|
513
|
+
worker_camera_stats = report.get('per_camera_stats', {})
|
|
514
|
+
per_camera_stats.update(worker_camera_stats)
|
|
515
|
+
|
|
516
|
+
return {
|
|
517
|
+
'worker_type': 'gstreamer',
|
|
518
|
+
'num_workers': len(self.workers),
|
|
519
|
+
'running_workers': sum(1 for w in self.workers if w.is_alive()),
|
|
520
|
+
'total_cameras': sum(self.worker_camera_count.values()),
|
|
521
|
+
'camera_assignments': self.camera_to_worker.copy(),
|
|
522
|
+
'worker_camera_counts': self.worker_camera_count.copy(),
|
|
523
|
+
'encoder': self.gstreamer_encoder,
|
|
524
|
+
'codec': self.gstreamer_codec,
|
|
525
|
+
'gpu_id': self.gpu_id,
|
|
526
|
+
'per_camera_stats': per_camera_stats,
|
|
527
|
+
'health_reports': {
|
|
528
|
+
wid: {
|
|
529
|
+
'status': r.get('status'),
|
|
530
|
+
'active_cameras': r.get('active_cameras', 0),
|
|
531
|
+
'encoder': r.get('encoder'),
|
|
532
|
+
'metrics': r.get('metrics', {}),
|
|
533
|
+
}
|
|
534
|
+
for wid, r in self.last_health_reports.items()
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
def __enter__(self):
|
|
539
|
+
"""Context manager entry."""
|
|
540
|
+
self.start()
|
|
541
|
+
return self
|
|
542
|
+
|
|
543
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
544
|
+
"""Context manager exit."""
|
|
545
|
+
self.stop()
|
|
546
|
+
|
|
@@ -6,6 +6,15 @@ from datetime import datetime, timezone
|
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from typing import Dict, Union, Optional, Any
|
|
8
8
|
|
|
9
|
+
# memoryview is built-in, no import needed
|
|
10
|
+
|
|
11
|
+
# Try to import xxhash for 10x faster hashing (optional dependency)
|
|
12
|
+
try:
|
|
13
|
+
import xxhash
|
|
14
|
+
HAS_XXHASH = True
|
|
15
|
+
except ImportError:
|
|
16
|
+
HAS_XXHASH = False
|
|
17
|
+
|
|
9
18
|
|
|
10
19
|
class StreamMessageBuilder:
|
|
11
20
|
"""Builds stream messages with proper structure and metadata."""
|
|
@@ -90,15 +99,18 @@ class StreamMessageBuilder:
|
|
|
90
99
|
input_order: int,
|
|
91
100
|
last_read_time: float,
|
|
92
101
|
last_write_time: float,
|
|
93
|
-
last_process_time: float
|
|
102
|
+
last_process_time: float,
|
|
103
|
+
cached_frame_id: Optional[str] = None,
|
|
94
104
|
) -> Dict[str, Any]:
|
|
95
105
|
"""Build complete stream message.
|
|
96
|
-
|
|
106
|
+
|
|
107
|
+
Supports both normal frames (with content) and cached frames (empty content + cached_frame_id).
|
|
108
|
+
|
|
97
109
|
Args:
|
|
98
|
-
frame_data: Encoded frame bytes
|
|
110
|
+
frame_data: Encoded frame bytes (can be empty for cached frames)
|
|
99
111
|
stream_key: Stream identifier
|
|
100
112
|
stream_group_key: Stream group identifier
|
|
101
|
-
codec: Video codec
|
|
113
|
+
codec: Video codec (use "cached" for cached frames)
|
|
102
114
|
metadata: Frame metadata
|
|
103
115
|
topic: Topic name
|
|
104
116
|
broker_config: Broker configuration string
|
|
@@ -106,15 +118,18 @@ class StreamMessageBuilder:
|
|
|
106
118
|
last_read_time: Last read time
|
|
107
119
|
last_write_time: Last write time
|
|
108
120
|
last_process_time: Last process time
|
|
109
|
-
|
|
121
|
+
cached_frame_id: Frame ID to use cached results from (None for normal frames)
|
|
122
|
+
|
|
110
123
|
Returns:
|
|
111
124
|
Complete message dictionary
|
|
112
125
|
"""
|
|
113
|
-
#
|
|
126
|
+
# Store content as raw bytes (NO base64 encoding for performance)
|
|
127
|
+
# Redis/Kafka will handle binary data directly
|
|
128
|
+
# For cached frames, frame_data will be empty bytes
|
|
114
129
|
if frame_data and not self.strip_input_content:
|
|
115
|
-
|
|
130
|
+
content_data = frame_data # Raw bytes
|
|
116
131
|
else:
|
|
117
|
-
|
|
132
|
+
content_data = b""
|
|
118
133
|
|
|
119
134
|
# Build input stream
|
|
120
135
|
input_stream = {
|
|
@@ -157,14 +172,17 @@ class StreamMessageBuilder:
|
|
|
157
172
|
"last_write_time_sec": last_write_time,
|
|
158
173
|
"last_process_time_sec": last_process_time,
|
|
159
174
|
},
|
|
160
|
-
"content":
|
|
161
|
-
"input_hash": (
|
|
162
|
-
hashlib.md5(frame_data, usedforsecurity=False).hexdigest()
|
|
163
|
-
if frame_data
|
|
164
|
-
else None
|
|
165
|
-
),
|
|
175
|
+
"content": content_data, # Raw binary data (no base64) - empty for cached frames
|
|
176
|
+
"input_hash": self._compute_fast_hash(frame_data) if frame_data else None,
|
|
166
177
|
}
|
|
167
178
|
|
|
179
|
+
# Add cached_frame_id if this is a cached frame
|
|
180
|
+
if cached_frame_id:
|
|
181
|
+
input_stream["cached_frame_id"] = cached_frame_id
|
|
182
|
+
# Include similarity score for debugging/metrics
|
|
183
|
+
if "similarity_score" in metadata:
|
|
184
|
+
input_stream["similarity_score"] = metadata["similarity_score"]
|
|
185
|
+
|
|
168
186
|
# Add passthrough metadata
|
|
169
187
|
passthrough_keys = {
|
|
170
188
|
"similarity_score", "skip_reason",
|
|
@@ -219,8 +237,35 @@ class StreamMessageBuilder:
|
|
|
219
237
|
@staticmethod
|
|
220
238
|
def _get_high_precision_timestamp() -> str:
|
|
221
239
|
"""Get high precision timestamp with microsecond granularity.
|
|
222
|
-
|
|
240
|
+
|
|
223
241
|
Returns:
|
|
224
242
|
Timestamp string
|
|
225
243
|
"""
|
|
226
244
|
return datetime.now(timezone.utc).strftime("%Y-%m-%d-%H:%M:%S.%f UTC")
|
|
245
|
+
|
|
246
|
+
@staticmethod
|
|
247
|
+
def _compute_fast_hash(data: Union[bytes, memoryview]) -> str:
|
|
248
|
+
"""Compute fast hash of frame data for deduplication.
|
|
249
|
+
|
|
250
|
+
Uses xxhash if available (10x faster than MD5), otherwise falls back
|
|
251
|
+
to blake2b (3x faster than MD5).
|
|
252
|
+
|
|
253
|
+
Supports both bytes and memoryview (zero-copy optimization).
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
data: Frame data bytes or memoryview
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
Hexadecimal hash string
|
|
260
|
+
"""
|
|
261
|
+
# Convert memoryview to bytes if needed (xxhash and blake2b support both)
|
|
262
|
+
# Both hash functions support buffer protocol, so no extra copy needed
|
|
263
|
+
if HAS_XXHASH:
|
|
264
|
+
# xxhash: ~5μs for 50KB (10x faster than MD5)
|
|
265
|
+
# Supports buffer protocol directly (no copy)
|
|
266
|
+
return xxhash.xxh64(data).hexdigest()
|
|
267
|
+
else:
|
|
268
|
+
# blake2b fallback: ~15μs for 50KB (3x faster than MD5)
|
|
269
|
+
# Note: blake2b is available in Python 3.6+ standard library
|
|
270
|
+
# Also supports buffer protocol directly (no copy)
|
|
271
|
+
return hashlib.blake2b(data, digest_size=16).hexdigest()
|