matrice-streaming 0.1.60__py3-none-any.whl → 0.1.61__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrice_streaming/streaming_gateway/camera_streamer/__init__.py +26 -2
- matrice_streaming/streaming_gateway/camera_streamer/nvdec.py +961 -0
- matrice_streaming/streaming_gateway/camera_streamer/nvdec_worker_manager.py +380 -0
- matrice_streaming/streaming_gateway/streaming_gateway.py +182 -11
- {matrice_streaming-0.1.60.dist-info → matrice_streaming-0.1.61.dist-info}/METADATA +1 -1
- {matrice_streaming-0.1.60.dist-info → matrice_streaming-0.1.61.dist-info}/RECORD +9 -7
- {matrice_streaming-0.1.60.dist-info → matrice_streaming-0.1.61.dist-info}/WHEEL +0 -0
- {matrice_streaming-0.1.60.dist-info → matrice_streaming-0.1.61.dist-info}/licenses/LICENSE.txt +0 -0
- {matrice_streaming-0.1.60.dist-info → matrice_streaming-0.1.61.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
"""NVDEC Worker Manager for StreamingGateway integration.
|
|
2
|
+
|
|
3
|
+
This module provides a simplified manager for the NVDEC hardware decoding backend.
|
|
4
|
+
Unlike other backends, NVDEC uses static camera configuration at startup and outputs
|
|
5
|
+
to CUDA IPC ring buffers (NV12 format) for zero-copy GPU inference pipelines.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import multiprocessing as mp
|
|
10
|
+
import time
|
|
11
|
+
from typing import Dict, List, Optional, Any
|
|
12
|
+
|
|
13
|
+
from .nvdec import (
|
|
14
|
+
nvdec_pool_process,
|
|
15
|
+
StreamConfig,
|
|
16
|
+
CUPY_AVAILABLE,
|
|
17
|
+
PYNVCODEC_AVAILABLE,
|
|
18
|
+
RING_BUFFER_AVAILABLE,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def is_nvdec_available() -> bool:
|
|
25
|
+
"""Check if NVDEC backend is available.
|
|
26
|
+
|
|
27
|
+
Requires:
|
|
28
|
+
- CuPy with CUDA support
|
|
29
|
+
- PyNvVideoCodec for NVDEC hardware decode
|
|
30
|
+
- cuda_shm_ring_buffer module for CUDA IPC
|
|
31
|
+
"""
|
|
32
|
+
return CUPY_AVAILABLE and PYNVCODEC_AVAILABLE and RING_BUFFER_AVAILABLE
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def get_available_gpu_count() -> int:
|
|
36
|
+
"""Detect the number of available CUDA GPUs.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Number of available GPUs, or 1 if detection fails.
|
|
40
|
+
"""
|
|
41
|
+
if not CUPY_AVAILABLE:
|
|
42
|
+
return 1
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
import cupy as cp
|
|
46
|
+
return cp.cuda.runtime.getDeviceCount()
|
|
47
|
+
except Exception as e:
|
|
48
|
+
logger.warning(f"Failed to detect GPU count: {e}, defaulting to 1")
|
|
49
|
+
return 1
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class NVDECWorkerManager:
|
|
53
|
+
"""Manager for NVDEC worker processes - static camera configuration.
|
|
54
|
+
|
|
55
|
+
This manager wraps the existing nvdec_pool_process function to integrate
|
|
56
|
+
with StreamingGateway. Key differences from other worker managers:
|
|
57
|
+
|
|
58
|
+
- Static camera configuration (no dynamic add/remove)
|
|
59
|
+
- Outputs to CUDA IPC ring buffers (not Redis/Kafka)
|
|
60
|
+
- NV12 format output (50% smaller than RGB)
|
|
61
|
+
- One worker process per GPU
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
camera_configs: List[Dict[str, Any]],
|
|
67
|
+
stream_config: Dict[str, Any], # Unused but kept for interface consistency
|
|
68
|
+
gpu_id: int = 0,
|
|
69
|
+
num_gpus: int = 0, # 0 = auto-detect all available GPUs
|
|
70
|
+
nvdec_pool_size: int = 8,
|
|
71
|
+
nvdec_burst_size: int = 4,
|
|
72
|
+
frame_width: int = 640,
|
|
73
|
+
frame_height: int = 640,
|
|
74
|
+
num_slots: int = 32,
|
|
75
|
+
target_fps: int = 0, # 0 = use per-camera FPS from config
|
|
76
|
+
duration_sec: float = 0, # 0 = infinite
|
|
77
|
+
):
|
|
78
|
+
"""Initialize NVDEC Worker Manager.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
camera_configs: List of camera configuration dicts with keys:
|
|
82
|
+
- camera_id or stream_key: Unique identifier (used for ring buffer naming)
|
|
83
|
+
- source: Video file path or RTSP URL
|
|
84
|
+
- width: Optional frame width (default: frame_width)
|
|
85
|
+
- height: Optional frame height (default: frame_height)
|
|
86
|
+
- fps: FPS limit for this camera (used by default)
|
|
87
|
+
stream_config: Stream configuration (unused, for interface consistency)
|
|
88
|
+
gpu_id: Primary GPU device ID (starting GPU for round-robin assignment)
|
|
89
|
+
num_gpus: Number of GPUs to use (0 = auto-detect all available GPUs)
|
|
90
|
+
nvdec_pool_size: Number of NVDEC decoders per GPU
|
|
91
|
+
nvdec_burst_size: Frames per stream before rotating to next
|
|
92
|
+
frame_width: Default output frame width (used if camera config doesn't specify)
|
|
93
|
+
frame_height: Default output frame height (used if camera config doesn't specify)
|
|
94
|
+
num_slots: Ring buffer slots per camera
|
|
95
|
+
target_fps: Global FPS override (0 = use per-camera FPS from config)
|
|
96
|
+
duration_sec: Duration to run (0 = infinite until stop)
|
|
97
|
+
"""
|
|
98
|
+
if not is_nvdec_available():
|
|
99
|
+
raise RuntimeError(
|
|
100
|
+
"NVDEC not available. Requires CuPy, PyNvVideoCodec, and cuda_shm_ring_buffer"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
self.camera_configs = camera_configs
|
|
104
|
+
self.stream_config = stream_config
|
|
105
|
+
self.gpu_id = gpu_id
|
|
106
|
+
|
|
107
|
+
# Auto-detect GPUs if num_gpus is 0
|
|
108
|
+
if num_gpus <= 0:
|
|
109
|
+
detected_gpus = get_available_gpu_count()
|
|
110
|
+
self.num_gpus = min(detected_gpus, 8) # Max 8 GPUs
|
|
111
|
+
logger.info(f"Auto-detected {detected_gpus} GPU(s), using {self.num_gpus}")
|
|
112
|
+
else:
|
|
113
|
+
self.num_gpus = min(num_gpus, 8) # Max 8 GPUs
|
|
114
|
+
self.nvdec_pool_size = nvdec_pool_size
|
|
115
|
+
self.nvdec_burst_size = nvdec_burst_size
|
|
116
|
+
self.frame_width = frame_width
|
|
117
|
+
self.frame_height = frame_height
|
|
118
|
+
self.num_slots = num_slots
|
|
119
|
+
self.target_fps = target_fps
|
|
120
|
+
self.duration_sec = duration_sec if duration_sec > 0 else float('inf')
|
|
121
|
+
|
|
122
|
+
self._workers: List[mp.Process] = []
|
|
123
|
+
self._stop_event: Optional[mp.Event] = None
|
|
124
|
+
self._result_queue: Optional[mp.Queue] = None
|
|
125
|
+
self._shared_frame_count: Optional[mp.Value] = None
|
|
126
|
+
self._start_time: Optional[float] = None
|
|
127
|
+
self._is_running = False
|
|
128
|
+
|
|
129
|
+
# Convert camera configs to StreamConfig objects and assign to GPUs
|
|
130
|
+
self._stream_configs: List[StreamConfig] = []
|
|
131
|
+
self._gpu_camera_assignments: Dict[int, List[StreamConfig]] = {
|
|
132
|
+
i: [] for i in range(self.num_gpus)
|
|
133
|
+
}
|
|
134
|
+
self._camera_to_gpu: Dict[str, int] = {}
|
|
135
|
+
|
|
136
|
+
self._prepare_camera_configs()
|
|
137
|
+
|
|
138
|
+
logger.info(
|
|
139
|
+
f"NVDECWorkerManager initialized: {len(camera_configs)} cameras, "
|
|
140
|
+
f"{self.num_gpus} GPU(s), pool_size={nvdec_pool_size}"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def _prepare_camera_configs(self):
|
|
144
|
+
"""Convert dict configs to StreamConfig and distribute across GPUs.
|
|
145
|
+
|
|
146
|
+
Ring buffers are named using camera_id for SHM identification.
|
|
147
|
+
Per-camera FPS from config is used by default (target_fps=0 means use config FPS).
|
|
148
|
+
"""
|
|
149
|
+
for i, config in enumerate(self.camera_configs):
|
|
150
|
+
# Extract camera ID (support both camera_id and stream_key)
|
|
151
|
+
# This ID is used for naming the CUDA IPC ring buffer
|
|
152
|
+
camera_id = config.get('camera_id') or config.get('stream_key') or f"cam_{i:04d}"
|
|
153
|
+
|
|
154
|
+
# Extract video source
|
|
155
|
+
source = config.get('source') or config.get('video_path')
|
|
156
|
+
if not source:
|
|
157
|
+
logger.warning(f"Camera {camera_id} has no source, skipping")
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
# Extract dimensions (use per-camera config or fallback to defaults)
|
|
161
|
+
width = config.get('width') or self.frame_width
|
|
162
|
+
height = config.get('height') or self.frame_height
|
|
163
|
+
|
|
164
|
+
# Determine FPS: use global override if set, otherwise per-camera FPS from config
|
|
165
|
+
if self.target_fps > 0:
|
|
166
|
+
# Global FPS override is set
|
|
167
|
+
fps = self.target_fps
|
|
168
|
+
else:
|
|
169
|
+
# Use per-camera FPS from config (default streaming FPS)
|
|
170
|
+
fps = config.get('fps', 10) # Default to 10 FPS if not specified
|
|
171
|
+
|
|
172
|
+
# Assign to GPU (round-robin starting from gpu_id)
|
|
173
|
+
gpu_id = (self.gpu_id + i) % self.num_gpus
|
|
174
|
+
|
|
175
|
+
stream_config = StreamConfig(
|
|
176
|
+
camera_id=camera_id,
|
|
177
|
+
video_path=source,
|
|
178
|
+
width=width,
|
|
179
|
+
height=height,
|
|
180
|
+
target_fps=fps,
|
|
181
|
+
gpu_id=gpu_id,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
self._stream_configs.append(stream_config)
|
|
185
|
+
self._gpu_camera_assignments[gpu_id].append(stream_config)
|
|
186
|
+
self._camera_to_gpu[camera_id] = gpu_id
|
|
187
|
+
|
|
188
|
+
logger.debug(f"Camera {camera_id}: source={source}, {width}x{height}@{fps}fps, GPU{gpu_id}")
|
|
189
|
+
|
|
190
|
+
def start(self) -> None:
|
|
191
|
+
"""Start NVDEC worker processes (one per GPU)."""
|
|
192
|
+
if self._is_running:
|
|
193
|
+
logger.warning("NVDECWorkerManager is already running")
|
|
194
|
+
return
|
|
195
|
+
|
|
196
|
+
if not self._stream_configs:
|
|
197
|
+
logger.warning("No cameras configured, nothing to start")
|
|
198
|
+
return
|
|
199
|
+
|
|
200
|
+
ctx = mp.get_context("spawn")
|
|
201
|
+
self._stop_event = ctx.Event()
|
|
202
|
+
self._result_queue = ctx.Queue()
|
|
203
|
+
self._shared_frame_count = ctx.Value('i', 0)
|
|
204
|
+
self._start_time = time.perf_counter()
|
|
205
|
+
|
|
206
|
+
# Start one process per GPU that has cameras
|
|
207
|
+
for gpu_id in range(self.num_gpus):
|
|
208
|
+
gpu_cameras = self._gpu_camera_assignments[gpu_id]
|
|
209
|
+
if not gpu_cameras:
|
|
210
|
+
continue
|
|
211
|
+
|
|
212
|
+
p = ctx.Process(
|
|
213
|
+
target=nvdec_pool_process,
|
|
214
|
+
args=(
|
|
215
|
+
gpu_id, # process_id
|
|
216
|
+
gpu_cameras, # camera_configs (List[StreamConfig])
|
|
217
|
+
self.nvdec_pool_size, # pool_size
|
|
218
|
+
self.duration_sec, # duration_sec
|
|
219
|
+
self._result_queue, # result_queue
|
|
220
|
+
self._stop_event, # stop_event
|
|
221
|
+
self.nvdec_burst_size, # burst_size
|
|
222
|
+
self.num_slots, # num_slots
|
|
223
|
+
self.target_fps, # target_fps
|
|
224
|
+
self._shared_frame_count, # shared_frame_count
|
|
225
|
+
),
|
|
226
|
+
name=f"NVDECWorker-GPU{gpu_id}",
|
|
227
|
+
daemon=False,
|
|
228
|
+
)
|
|
229
|
+
p.start()
|
|
230
|
+
self._workers.append(p)
|
|
231
|
+
logger.info(f"Started NVDEC worker on GPU {gpu_id} with {len(gpu_cameras)} cameras")
|
|
232
|
+
|
|
233
|
+
self._is_running = True
|
|
234
|
+
logger.info(f"NVDECWorkerManager started: {len(self._workers)} workers")
|
|
235
|
+
|
|
236
|
+
def stop(self, timeout: float = 15.0) -> None:
|
|
237
|
+
"""Stop all worker processes.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
timeout: Maximum time to wait for each worker to stop gracefully
|
|
241
|
+
"""
|
|
242
|
+
if not self._is_running:
|
|
243
|
+
logger.warning("NVDECWorkerManager is not running")
|
|
244
|
+
return
|
|
245
|
+
|
|
246
|
+
logger.info("Stopping NVDECWorkerManager...")
|
|
247
|
+
|
|
248
|
+
# Signal workers to stop
|
|
249
|
+
if self._stop_event:
|
|
250
|
+
self._stop_event.set()
|
|
251
|
+
|
|
252
|
+
# Wait for workers to finish
|
|
253
|
+
for p in self._workers:
|
|
254
|
+
p.join(timeout=timeout)
|
|
255
|
+
if p.is_alive():
|
|
256
|
+
logger.warning(f"Worker {p.name} did not stop gracefully, terminating")
|
|
257
|
+
p.terminate()
|
|
258
|
+
p.join(timeout=2.0)
|
|
259
|
+
|
|
260
|
+
self._workers.clear()
|
|
261
|
+
self._is_running = False
|
|
262
|
+
logger.info("NVDECWorkerManager stopped")
|
|
263
|
+
|
|
264
|
+
def get_worker_statistics(self) -> Dict[str, Any]:
|
|
265
|
+
"""Return statistics from workers.
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
Dict with keys:
|
|
269
|
+
- num_workers: Number of worker processes
|
|
270
|
+
- running_workers: Number of currently running workers
|
|
271
|
+
- total_cameras: Total cameras across all workers
|
|
272
|
+
- gpu_assignments: Cameras per GPU
|
|
273
|
+
- total_frames: Total frames processed (from shared counter)
|
|
274
|
+
- elapsed_sec: Time since start
|
|
275
|
+
- aggregate_fps: Overall FPS
|
|
276
|
+
- per_stream_fps: Average FPS per camera
|
|
277
|
+
- backend: 'nvdec'
|
|
278
|
+
- gpu_results: Per-GPU results from result queue
|
|
279
|
+
"""
|
|
280
|
+
stats = {
|
|
281
|
+
'backend': 'nvdec',
|
|
282
|
+
'num_workers': len(self._workers),
|
|
283
|
+
'running_workers': sum(1 for p in self._workers if p.is_alive()),
|
|
284
|
+
'total_cameras': len(self._stream_configs),
|
|
285
|
+
'gpu_assignments': {
|
|
286
|
+
gpu_id: len(cameras)
|
|
287
|
+
for gpu_id, cameras in self._gpu_camera_assignments.items()
|
|
288
|
+
},
|
|
289
|
+
'nvdec_config': {
|
|
290
|
+
'gpu_id': self.gpu_id,
|
|
291
|
+
'num_gpus': self.num_gpus,
|
|
292
|
+
'pool_size': self.nvdec_pool_size,
|
|
293
|
+
'burst_size': self.nvdec_burst_size,
|
|
294
|
+
'frame_size': f"{self.frame_width}x{self.frame_height}",
|
|
295
|
+
'num_slots': self.num_slots,
|
|
296
|
+
'target_fps': self.target_fps,
|
|
297
|
+
},
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
# Add frame count and FPS
|
|
301
|
+
if self._shared_frame_count:
|
|
302
|
+
total_frames = self._shared_frame_count.value
|
|
303
|
+
stats['total_frames'] = total_frames
|
|
304
|
+
|
|
305
|
+
if self._start_time:
|
|
306
|
+
elapsed = time.perf_counter() - self._start_time
|
|
307
|
+
stats['elapsed_sec'] = elapsed
|
|
308
|
+
stats['aggregate_fps'] = total_frames / elapsed if elapsed > 0 else 0
|
|
309
|
+
stats['per_stream_fps'] = (
|
|
310
|
+
stats['aggregate_fps'] / len(self._stream_configs)
|
|
311
|
+
if self._stream_configs else 0
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
# Collect any available results from queue (non-blocking)
|
|
315
|
+
gpu_results = []
|
|
316
|
+
if self._result_queue:
|
|
317
|
+
while True:
|
|
318
|
+
try:
|
|
319
|
+
result = self._result_queue.get_nowait()
|
|
320
|
+
gpu_results.append(result)
|
|
321
|
+
except:
|
|
322
|
+
break
|
|
323
|
+
stats['gpu_results'] = gpu_results
|
|
324
|
+
|
|
325
|
+
return stats
|
|
326
|
+
|
|
327
|
+
def get_camera_assignments(self) -> Dict[str, int]:
|
|
328
|
+
"""Return mapping of camera_id to GPU ID.
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
Dict mapping camera_id -> gpu_id
|
|
332
|
+
"""
|
|
333
|
+
return self._camera_to_gpu.copy()
|
|
334
|
+
|
|
335
|
+
def add_camera(self, camera_config: Dict[str, Any]) -> bool:
|
|
336
|
+
"""Not supported - NVDEC uses static camera configuration.
|
|
337
|
+
|
|
338
|
+
Raises:
|
|
339
|
+
NotImplementedError: Always raised
|
|
340
|
+
"""
|
|
341
|
+
raise NotImplementedError(
|
|
342
|
+
"NVDEC backend uses static camera configuration. "
|
|
343
|
+
"Cameras must be configured at initialization."
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
def remove_camera(self, stream_key: str) -> bool:
|
|
347
|
+
"""Not supported - NVDEC uses static camera configuration.
|
|
348
|
+
|
|
349
|
+
Raises:
|
|
350
|
+
NotImplementedError: Always raised
|
|
351
|
+
"""
|
|
352
|
+
raise NotImplementedError(
|
|
353
|
+
"NVDEC backend uses static camera configuration. "
|
|
354
|
+
"Cameras cannot be removed at runtime."
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
def update_camera(self, camera_config: Dict[str, Any]) -> bool:
|
|
358
|
+
"""Not supported - NVDEC uses static camera configuration.
|
|
359
|
+
|
|
360
|
+
Raises:
|
|
361
|
+
NotImplementedError: Always raised
|
|
362
|
+
"""
|
|
363
|
+
raise NotImplementedError(
|
|
364
|
+
"NVDEC backend uses static camera configuration. "
|
|
365
|
+
"Cameras cannot be updated at runtime."
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
@property
|
|
369
|
+
def is_running(self) -> bool:
|
|
370
|
+
"""Check if the manager is currently running."""
|
|
371
|
+
return self._is_running
|
|
372
|
+
|
|
373
|
+
def __enter__(self):
|
|
374
|
+
"""Context manager entry."""
|
|
375
|
+
self.start()
|
|
376
|
+
return self
|
|
377
|
+
|
|
378
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
379
|
+
"""Context manager exit."""
|
|
380
|
+
self.stop()
|
|
@@ -17,6 +17,7 @@ from .dynamic_camera_manager import DynamicCameraManager, DynamicCameraManagerFo
|
|
|
17
17
|
|
|
18
18
|
USE_FFMPEG = os.getenv("USE_FFMPEG", "false").lower() == "true"
|
|
19
19
|
USE_GSTREAMER = os.getenv("USE_GSTREAMER", "false").lower() == "true"
|
|
20
|
+
USE_NVDEC = os.getenv("USE_NVDEC", "false").lower() == "true"
|
|
20
21
|
|
|
21
22
|
# GStreamer imports (optional - graceful degradation)
|
|
22
23
|
GSTREAMER_AVAILABLE = False
|
|
@@ -44,6 +45,15 @@ except (ImportError, FileNotFoundError):
|
|
|
44
45
|
# FFmpeg not available or not installed
|
|
45
46
|
pass
|
|
46
47
|
|
|
48
|
+
# NVDEC imports (optional - graceful degradation)
|
|
49
|
+
NVDEC_AVAILABLE = False
|
|
50
|
+
try:
|
|
51
|
+
from .camera_streamer.nvdec_worker_manager import NVDECWorkerManager, is_nvdec_available
|
|
52
|
+
NVDEC_AVAILABLE = is_nvdec_available()
|
|
53
|
+
except ImportError:
|
|
54
|
+
# NVDEC not available (requires CuPy, PyNvVideoCodec)
|
|
55
|
+
pass
|
|
56
|
+
|
|
47
57
|
|
|
48
58
|
class StreamingGateway:
|
|
49
59
|
"""Simplified streaming gateway for managing camera streams."""
|
|
@@ -87,6 +97,16 @@ class StreamingGateway:
|
|
|
87
97
|
ffmpeg_threads: int = 1, # FFmpeg decode threads per stream
|
|
88
98
|
ffmpeg_low_latency: bool = True, # Enable low-latency flags
|
|
89
99
|
ffmpeg_pixel_format: str = "bgr24",# Output pixel format
|
|
100
|
+
# NVDEC options (CUDA IPC ring buffer output)
|
|
101
|
+
use_nvdec: bool = USE_NVDEC, # Use NVDEC hardware decode + CUDA IPC output
|
|
102
|
+
nvdec_gpu_id: int = 0, # Primary GPU device ID (starting GPU)
|
|
103
|
+
nvdec_num_gpus: int = 0, # Number of GPUs (0=auto-detect all available)
|
|
104
|
+
nvdec_pool_size: int = 8, # NVDEC decoders per GPU
|
|
105
|
+
nvdec_burst_size: int = 4, # Frames per stream before rotating
|
|
106
|
+
nvdec_frame_width: int = 640, # Output frame width
|
|
107
|
+
nvdec_frame_height: int = 640, # Output frame height
|
|
108
|
+
nvdec_num_slots: int = 32, # Ring buffer slots per camera
|
|
109
|
+
nvdec_target_fps: int = 0, # FPS override (0=use per-camera FPS from config)
|
|
90
110
|
# SHM configuration (centralized)
|
|
91
111
|
shm_slot_count: int = 1000, # Ring buffer size per camera (increased for consumer lag)
|
|
92
112
|
):
|
|
@@ -123,6 +143,15 @@ class StreamingGateway:
|
|
|
123
143
|
ffmpeg_threads: Number of FFmpeg decode threads per stream
|
|
124
144
|
ffmpeg_low_latency: Enable FFmpeg low-latency flags
|
|
125
145
|
ffmpeg_pixel_format: Output pixel format (bgr24, rgb24, nv12)
|
|
146
|
+
use_nvdec: Use NVDEC hardware decode with CUDA IPC output (requires CuPy, PyNvVideoCodec)
|
|
147
|
+
nvdec_gpu_id: Primary/starting GPU device ID for round-robin camera assignment
|
|
148
|
+
nvdec_num_gpus: Number of GPUs to use (0=auto-detect all available GPUs)
|
|
149
|
+
nvdec_pool_size: Number of NVDEC decoders per GPU
|
|
150
|
+
nvdec_burst_size: Frames per stream before rotating to next stream
|
|
151
|
+
nvdec_frame_width: Default output frame width (used if camera config doesn't specify)
|
|
152
|
+
nvdec_frame_height: Default output frame height (used if camera config doesn't specify)
|
|
153
|
+
nvdec_num_slots: Ring buffer slots per camera (named by camera_id)
|
|
154
|
+
nvdec_target_fps: Global FPS override (0=use per-camera FPS from camera config)
|
|
126
155
|
shm_slot_count: Number of frame slots per camera ring buffer for SHM mode (default: 300)
|
|
127
156
|
"""
|
|
128
157
|
if not session:
|
|
@@ -168,6 +197,17 @@ class StreamingGateway:
|
|
|
168
197
|
self.ffmpeg_low_latency = ffmpeg_low_latency
|
|
169
198
|
self.ffmpeg_pixel_format = ffmpeg_pixel_format
|
|
170
199
|
|
|
200
|
+
# NVDEC configuration
|
|
201
|
+
self.use_nvdec = use_nvdec
|
|
202
|
+
self.nvdec_gpu_id = nvdec_gpu_id
|
|
203
|
+
self.nvdec_num_gpus = nvdec_num_gpus
|
|
204
|
+
self.nvdec_pool_size = nvdec_pool_size
|
|
205
|
+
self.nvdec_burst_size = nvdec_burst_size
|
|
206
|
+
self.nvdec_frame_width = nvdec_frame_width
|
|
207
|
+
self.nvdec_frame_height = nvdec_frame_height
|
|
208
|
+
self.nvdec_num_slots = nvdec_num_slots
|
|
209
|
+
self.nvdec_target_fps = nvdec_target_fps
|
|
210
|
+
|
|
171
211
|
# SHM configuration (centralized for all workers)
|
|
172
212
|
self.shm_slot_count = shm_slot_count
|
|
173
213
|
|
|
@@ -178,9 +218,17 @@ class StreamingGateway:
|
|
|
178
218
|
"Install FFmpeg from https://ffmpeg.org/download.html"
|
|
179
219
|
)
|
|
180
220
|
|
|
221
|
+
# Validate NVDEC availability if requested
|
|
222
|
+
if use_nvdec and not NVDEC_AVAILABLE:
|
|
223
|
+
raise RuntimeError(
|
|
224
|
+
"NVDEC requested but not available. "
|
|
225
|
+
"Requires CuPy, PyNvVideoCodec, and cuda_shm_ring_buffer module."
|
|
226
|
+
)
|
|
227
|
+
|
|
181
228
|
# Validate exclusive backend selection
|
|
182
|
-
|
|
183
|
-
|
|
229
|
+
backends_enabled = sum([use_gstreamer, use_ffmpeg, use_nvdec])
|
|
230
|
+
if backends_enabled > 1:
|
|
231
|
+
raise ValueError("Cannot enable multiple backends (GStreamer, FFmpeg, NVDEC) simultaneously")
|
|
184
232
|
|
|
185
233
|
# Initialize utility for API interactions
|
|
186
234
|
self.gateway_util = StreamingGatewayUtil(session, streaming_gateway_id, server_id, action_id=action_id)
|
|
@@ -228,15 +276,54 @@ class StreamingGateway:
|
|
|
228
276
|
raise ValueError(f"Input config {i} must be an InputStream instance")
|
|
229
277
|
|
|
230
278
|
# Initialize streaming backend based on configuration
|
|
231
|
-
# Options: use_ffmpeg, use_gstreamer, use_async_workers (AsyncCameraWorker), or CameraStreamer
|
|
279
|
+
# Options: use_nvdec, use_ffmpeg, use_gstreamer, use_async_workers (AsyncCameraWorker), or CameraStreamer
|
|
232
280
|
self.camera_streamer: Optional[CameraStreamer] = None
|
|
233
281
|
self.worker_manager: Optional[WorkerManager] = None
|
|
234
282
|
self.gstreamer_streamer: Optional[Any] = None # GStreamerCameraStreamer
|
|
235
283
|
self.gstreamer_worker_manager: Optional[Any] = None # GStreamerWorkerManager
|
|
236
284
|
self.ffmpeg_streamer: Optional[Any] = None # FFmpegCameraStreamer
|
|
237
285
|
self.ffmpeg_worker_manager: Optional[Any] = None # FFmpegWorkerManager
|
|
286
|
+
self.nvdec_worker_manager: Optional[Any] = None # NVDECWorkerManager
|
|
238
287
|
|
|
239
|
-
if self.
|
|
288
|
+
if self.use_nvdec:
|
|
289
|
+
# NVDEC-based streaming flow (CUDA IPC output, static camera config)
|
|
290
|
+
logging.info(
|
|
291
|
+
f"Initializing NVDEC worker flow - GPUs: {nvdec_num_gpus}, "
|
|
292
|
+
f"pool_size: {nvdec_pool_size}, output: NV12 ({nvdec_frame_width}x{nvdec_frame_height})"
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
# Build stream config (unused by NVDEC but needed for interface consistency)
|
|
296
|
+
stream_config = build_stream_config(
|
|
297
|
+
gateway_util=self.gateway_util,
|
|
298
|
+
server_type=server_type,
|
|
299
|
+
service_id=streaming_gateway_id,
|
|
300
|
+
stream_maxlen=self.shm_slot_count,
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
# Convert InputStream configs to camera_config dicts
|
|
304
|
+
camera_configs = [
|
|
305
|
+
input_stream_to_camera_config(inp) for inp in self.inputs_config
|
|
306
|
+
]
|
|
307
|
+
|
|
308
|
+
self.nvdec_worker_manager = NVDECWorkerManager(
|
|
309
|
+
camera_configs=camera_configs,
|
|
310
|
+
stream_config=stream_config,
|
|
311
|
+
gpu_id=nvdec_gpu_id,
|
|
312
|
+
num_gpus=nvdec_num_gpus,
|
|
313
|
+
nvdec_pool_size=nvdec_pool_size,
|
|
314
|
+
nvdec_burst_size=nvdec_burst_size,
|
|
315
|
+
frame_width=nvdec_frame_width,
|
|
316
|
+
frame_height=nvdec_frame_height,
|
|
317
|
+
num_slots=nvdec_num_slots,
|
|
318
|
+
target_fps=nvdec_target_fps,
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
# NVDEC uses static camera configuration - no dynamic camera manager
|
|
322
|
+
# Set camera_manager to None to indicate static mode
|
|
323
|
+
self.camera_manager = None
|
|
324
|
+
logging.info("NVDEC backend initialized (static camera configuration)")
|
|
325
|
+
|
|
326
|
+
elif self.use_ffmpeg:
|
|
240
327
|
# FFmpeg-based streaming flow
|
|
241
328
|
# Build stream config for workers
|
|
242
329
|
stream_config = build_stream_config(
|
|
@@ -444,12 +531,15 @@ class StreamingGateway:
|
|
|
444
531
|
)
|
|
445
532
|
|
|
446
533
|
# Initialize with current camera configurations
|
|
447
|
-
|
|
534
|
+
# (skip for NVDEC which uses static configuration)
|
|
535
|
+
if self.camera_manager is not None:
|
|
536
|
+
self.camera_manager.initialize_from_config(self.inputs_config)
|
|
448
537
|
|
|
449
|
-
# Initialize event system (if enabled)
|
|
538
|
+
# Initialize event system (if enabled and camera_manager exists)
|
|
539
|
+
# NVDEC doesn't support dynamic cameras, so event listening is disabled
|
|
450
540
|
self.event_listener: Optional[EventListener] = None
|
|
451
|
-
|
|
452
|
-
if self.enable_event_listening:
|
|
541
|
+
|
|
542
|
+
if self.enable_event_listening and self.camera_manager is not None:
|
|
453
543
|
try:
|
|
454
544
|
self.event_listener = EventListener(
|
|
455
545
|
session=self.session,
|
|
@@ -459,6 +549,8 @@ class StreamingGateway:
|
|
|
459
549
|
except Exception as e:
|
|
460
550
|
logging.warning(f"Could not initialize event system: {e}")
|
|
461
551
|
logging.info("Continuing without event listening")
|
|
552
|
+
elif self.enable_event_listening and self.use_nvdec:
|
|
553
|
+
logging.info("Event listening disabled for NVDEC backend (static camera configuration)")
|
|
462
554
|
|
|
463
555
|
# State management
|
|
464
556
|
self.is_streaming = False
|
|
@@ -537,7 +629,9 @@ class StreamingGateway:
|
|
|
537
629
|
self._register_as_active()
|
|
538
630
|
|
|
539
631
|
try:
|
|
540
|
-
if self.
|
|
632
|
+
if self.use_nvdec:
|
|
633
|
+
success = self._start_nvdec_worker_streaming()
|
|
634
|
+
elif self.use_ffmpeg:
|
|
541
635
|
if self.use_async_workers:
|
|
542
636
|
success = self._start_ffmpeg_worker_streaming()
|
|
543
637
|
else:
|
|
@@ -754,6 +848,45 @@ class StreamingGateway:
|
|
|
754
848
|
|
|
755
849
|
return True
|
|
756
850
|
|
|
851
|
+
def _start_nvdec_worker_streaming(self) -> bool:
|
|
852
|
+
"""Start streaming using NVDEC hardware decode with CUDA IPC output.
|
|
853
|
+
|
|
854
|
+
NVDEC outputs NV12 frames to CUDA IPC ring buffers for zero-copy
|
|
855
|
+
GPU inference pipelines. Unlike other backends, NVDEC:
|
|
856
|
+
- Uses static camera configuration (no dynamic add/remove)
|
|
857
|
+
- Outputs to CUDA IPC ring buffers (not Redis/Kafka)
|
|
858
|
+
- Outputs NV12 format (50% smaller than RGB)
|
|
859
|
+
|
|
860
|
+
Returns:
|
|
861
|
+
bool: True if started successfully, False otherwise
|
|
862
|
+
"""
|
|
863
|
+
num_cameras = len(self.inputs_config) if self.inputs_config else 0
|
|
864
|
+
logging.info(
|
|
865
|
+
f"Starting NVDEC worker streaming with {num_cameras} cameras "
|
|
866
|
+
f"(GPUs: {self.nvdec_num_gpus}, pool_size: {self.nvdec_pool_size}, "
|
|
867
|
+
f"output: NV12 {self.nvdec_frame_width}x{self.nvdec_frame_height})"
|
|
868
|
+
)
|
|
869
|
+
|
|
870
|
+
# Build stream key mappings for tracking
|
|
871
|
+
if self.inputs_config:
|
|
872
|
+
for i, input_config in enumerate(self.inputs_config):
|
|
873
|
+
stream_key = input_config.camera_key or f"stream_{i}"
|
|
874
|
+
camera_id = input_config.camera_id or stream_key
|
|
875
|
+
self._stream_key_to_camera_id[stream_key] = camera_id
|
|
876
|
+
self._my_stream_keys.add(stream_key)
|
|
877
|
+
|
|
878
|
+
# Start the NVDEC worker manager
|
|
879
|
+
try:
|
|
880
|
+
self.nvdec_worker_manager.start()
|
|
881
|
+
logging.info(
|
|
882
|
+
f"Started NVDECWorkerManager with {self.nvdec_num_gpus} GPU(s), "
|
|
883
|
+
f"{num_cameras} cameras"
|
|
884
|
+
)
|
|
885
|
+
return True
|
|
886
|
+
except Exception as exc:
|
|
887
|
+
logging.error(f"Failed to start NVDECWorkerManager: {exc}", exc_info=True)
|
|
888
|
+
return False
|
|
889
|
+
|
|
757
890
|
def _start_ffmpeg_worker_streaming(self) -> bool:
|
|
758
891
|
"""Start streaming using FFmpeg worker processes.
|
|
759
892
|
|
|
@@ -867,7 +1000,16 @@ class StreamingGateway:
|
|
|
867
1000
|
logging.error(f"Error stopping event listener: {exc}")
|
|
868
1001
|
|
|
869
1002
|
# Stop streaming backend based on which flow is active
|
|
870
|
-
if self.
|
|
1003
|
+
if self.use_nvdec:
|
|
1004
|
+
# Stop NVDEC backend
|
|
1005
|
+
if self.nvdec_worker_manager:
|
|
1006
|
+
try:
|
|
1007
|
+
logging.info("Stopping NVDECWorkerManager")
|
|
1008
|
+
self.nvdec_worker_manager.stop()
|
|
1009
|
+
logging.info("NVDEC worker manager stopped")
|
|
1010
|
+
except Exception as exc:
|
|
1011
|
+
logging.error(f"Error stopping NVDECWorkerManager: {exc}")
|
|
1012
|
+
elif self.use_ffmpeg:
|
|
871
1013
|
# Stop FFmpeg backends
|
|
872
1014
|
if self.use_async_workers:
|
|
873
1015
|
# Stop FFmpegWorkerManager
|
|
@@ -1000,9 +1142,28 @@ class StreamingGateway:
|
|
|
1000
1142
|
stats["use_async_workers"] = self.use_async_workers
|
|
1001
1143
|
stats["use_gstreamer"] = self.use_gstreamer
|
|
1002
1144
|
stats["use_ffmpeg"] = self.use_ffmpeg
|
|
1145
|
+
stats["use_nvdec"] = self.use_nvdec
|
|
1003
1146
|
|
|
1004
1147
|
# Add backend-specific statistics
|
|
1005
|
-
if self.
|
|
1148
|
+
if self.use_nvdec:
|
|
1149
|
+
# NVDEC statistics
|
|
1150
|
+
stats["nvdec_config"] = {
|
|
1151
|
+
"gpu_id": self.nvdec_gpu_id,
|
|
1152
|
+
"num_gpus": self.nvdec_num_gpus,
|
|
1153
|
+
"pool_size": self.nvdec_pool_size,
|
|
1154
|
+
"burst_size": self.nvdec_burst_size,
|
|
1155
|
+
"frame_width": self.nvdec_frame_width,
|
|
1156
|
+
"frame_height": self.nvdec_frame_height,
|
|
1157
|
+
"num_slots": self.nvdec_num_slots,
|
|
1158
|
+
"target_fps": self.nvdec_target_fps,
|
|
1159
|
+
}
|
|
1160
|
+
if self.nvdec_worker_manager:
|
|
1161
|
+
try:
|
|
1162
|
+
stats["worker_stats"] = self.nvdec_worker_manager.get_worker_statistics()
|
|
1163
|
+
stats["camera_assignments"] = self.nvdec_worker_manager.get_camera_assignments()
|
|
1164
|
+
except Exception as exc:
|
|
1165
|
+
logging.warning(f"Failed to get NVDEC worker stats: {exc}")
|
|
1166
|
+
elif self.use_ffmpeg:
|
|
1006
1167
|
# FFmpeg statistics
|
|
1007
1168
|
stats["ffmpeg_config"] = {
|
|
1008
1169
|
"hwaccel": self.ffmpeg_hwaccel,
|
|
@@ -1130,6 +1291,16 @@ class StreamingGateway:
|
|
|
1130
1291
|
"gstreamer_frame_optimizer_mode": self.gstreamer_frame_optimizer_mode,
|
|
1131
1292
|
"gstreamer_fallback_on_error": self.gstreamer_fallback_on_error,
|
|
1132
1293
|
"gstreamer_verbose_logging": self.gstreamer_verbose_logging,
|
|
1294
|
+
# NVDEC configuration
|
|
1295
|
+
"use_nvdec": self.use_nvdec,
|
|
1296
|
+
"nvdec_gpu_id": self.nvdec_gpu_id,
|
|
1297
|
+
"nvdec_num_gpus": self.nvdec_num_gpus,
|
|
1298
|
+
"nvdec_pool_size": self.nvdec_pool_size,
|
|
1299
|
+
"nvdec_burst_size": self.nvdec_burst_size,
|
|
1300
|
+
"nvdec_frame_width": self.nvdec_frame_width,
|
|
1301
|
+
"nvdec_frame_height": self.nvdec_frame_height,
|
|
1302
|
+
"nvdec_num_slots": self.nvdec_num_slots,
|
|
1303
|
+
"nvdec_target_fps": self.nvdec_target_fps,
|
|
1133
1304
|
}
|
|
1134
1305
|
|
|
1135
1306
|
def _emergency_cleanup(self):
|