matrice-streaming 0.1.54__py3-none-any.whl → 0.1.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,10 +2,6 @@
2
2
 
3
3
  This module implements an async event loop worker that handles multiple cameras
4
4
  in a single process using asyncio for efficient I/O-bound operations.
5
-
6
- Supports two capture architectures (controlled by use_blocking_threads flag):
7
- 1. Legacy: Per-camera asyncio tasks with ThreadPoolExecutor for capture
8
- 2. Optimized: Dedicated blocking capture thread per camera with shared frame queue
9
5
  """
10
6
  import asyncio
11
7
  import logging
@@ -13,15 +9,11 @@ import time
13
9
  import multiprocessing
14
10
  import os
15
11
  import psutil
16
- import threading
17
- import queue
18
12
  from concurrent.futures import ThreadPoolExecutor
19
- from dataclasses import dataclass, field
20
13
  from typing import Dict, Any, Optional, List, Union, Tuple
21
14
  from collections import deque
22
15
  import cv2
23
16
  from pathlib import Path
24
- import numpy as np
25
17
 
26
18
 
27
19
  # =========================
@@ -80,425 +72,6 @@ cv2.setUseOptimized(True)
80
72
  cv2.ocl.setUseOpenCL(False)
81
73
 
82
74
 
83
- # =========================
84
- # FEATURE FLAGS
85
- # =========================
86
- USE_BLOCKING_THREADS = os.getenv("USE_BLOCKING_THREADS", "true").lower() == "true"
87
-
88
-
89
- # =========================
90
- # OPTIMIZED CAPTURE ARCHITECTURE
91
- # =========================
92
-
93
- @dataclass
94
- class CapturedFrame:
95
- """Represents a captured frame ready for processing.
96
-
97
- Used for thread-to-async communication in blocking capture mode.
98
- Contains frame data and metadata needed for processing.
99
- """
100
- stream_key: str
101
- frame: np.ndarray
102
- timestamp_ns: int
103
- frame_counter: int
104
- width: int
105
- height: int
106
- camera_config: Dict[str, Any]
107
- capture_time_ms: float
108
- # SHM mode fields (populated by capture thread if SHM enabled)
109
- shm_frame_idx: Optional[int] = None
110
- shm_slot: Optional[int] = None
111
- is_similar: bool = False
112
- similarity_score: float = 0.0
113
- reference_frame_idx: Optional[int] = None
114
-
115
-
116
- @dataclass
117
- class ShmMetadataItem:
118
- """Lightweight metadata item for batched Redis writes in SHM mode.
119
-
120
- When SHM is enabled, capture threads write frames directly to SHM
121
- and enqueue only this metadata for the async loop to batch and send to Redis.
122
- """
123
- stream_key: str
124
- stream_group_key: str
125
- topic: str
126
- shm_name: str
127
- frame_idx: int
128
- slot: int
129
- ts_ns: int
130
- width: int
131
- height: int
132
- format: str
133
- is_similar: bool = False
134
- reference_frame_idx: Optional[int] = None
135
- similarity_score: Optional[float] = None
136
- camera_location: str = "Unknown"
137
- frame_counter: int = 0
138
-
139
-
140
- class CameraCapture:
141
- """Blocking capture thread for a single camera.
142
-
143
- Runs in a dedicated thread, captures frames at target FPS,
144
- and either writes to SHM (with metadata enqueue) or enqueues
145
- full frames for async processing.
146
-
147
- Key design decisions:
148
- - Uses time.sleep() for FPS throttling (not async) - no coroutine overhead
149
- - Uses stop_event.wait() for interruptible sleep - fast shutdown
150
- - Infinite retry with exponential backoff for camera reconnection
151
- - Directly writes to SHM when enabled - minimal latency
152
- """
153
-
154
- # Retry settings
155
- MIN_RETRY_COOLDOWN = 5 # 5 second minimum backoff
156
- MAX_RETRY_COOLDOWN = 30 # 30 second maximum backoff
157
- MAX_CONSECUTIVE_FAILURES = 10 # Max failures before reconnect
158
-
159
- def __init__(
160
- self,
161
- camera_config: Dict[str, Any],
162
- frame_queue: queue.Queue,
163
- stop_event: threading.Event,
164
- capture_manager: 'VideoCaptureManager',
165
- frame_optimizer: Optional['FrameOptimizer'] = None,
166
- # SHM support
167
- use_shm: bool = False,
168
- shm_buffer: Optional['ShmRingBuffer'] = None,
169
- shm_frame_format: str = "BGR",
170
- # Performance options
171
- drop_stale_frames: bool = True,
172
- buffer_size: int = 1,
173
- ):
174
- """Initialize capture thread for a single camera.
175
-
176
- Args:
177
- camera_config: Camera configuration dictionary
178
- frame_queue: Queue for sending frames/metadata to async loop
179
- stop_event: Event to signal thread shutdown
180
- capture_manager: VideoCaptureManager for source handling
181
- frame_optimizer: FrameOptimizer for similarity detection (optional)
182
- use_shm: If True, write frames to SHM and enqueue metadata only
183
- shm_buffer: ShmRingBuffer instance for this camera (if use_shm)
184
- shm_frame_format: Frame format for SHM storage
185
- drop_stale_frames: Use grab/grab/retrieve pattern for latest frame
186
- buffer_size: VideoCapture buffer size
187
- """
188
- self.camera_config = camera_config
189
- self.stream_key = camera_config['stream_key']
190
- self.stream_group_key = camera_config.get('stream_group_key', 'default')
191
- self.topic = camera_config['topic']
192
- self.source = camera_config['source']
193
- self.target_fps = camera_config.get('fps', 30)
194
- self.width = camera_config.get('width')
195
- self.height = camera_config.get('height')
196
- self.camera_location = camera_config.get('camera_location', 'Unknown')
197
- self.simulate_video_file_stream = camera_config.get('simulate_video_file_stream', False)
198
-
199
- self.frame_queue = frame_queue
200
- self.stop_event = stop_event
201
- self.capture_manager = capture_manager
202
- self.frame_optimizer = frame_optimizer
203
-
204
- # SHM configuration
205
- self.use_shm = use_shm
206
- self.shm_buffer = shm_buffer
207
- self.shm_frame_format = shm_frame_format
208
-
209
- # Performance settings
210
- self.drop_stale_frames = drop_stale_frames
211
- self.buffer_size = buffer_size
212
- self.frame_interval = 1.0 / self.target_fps
213
-
214
- # State
215
- self._thread: Optional[threading.Thread] = None
216
- self._frame_counter = 0
217
- self._last_shm_frame_idx: Optional[int] = None
218
- self._is_running = False
219
-
220
- self.logger = logging.getLogger(f"CameraCapture-{self.stream_key}")
221
-
222
- def start(self) -> None:
223
- """Start the capture thread."""
224
- if self._thread and self._thread.is_alive():
225
- self.logger.warning(f"Capture thread for {self.stream_key} already running")
226
- return
227
-
228
- self._is_running = True
229
- self._thread = threading.Thread(
230
- target=self._capture_loop,
231
- name=f"capture-{self.stream_key}",
232
- daemon=True
233
- )
234
- self._thread.start()
235
- self.logger.info(f"Started capture thread for {self.stream_key}")
236
-
237
- def stop(self, timeout: float = 5.0) -> None:
238
- """Stop the capture thread gracefully."""
239
- self._is_running = False
240
- if self._thread and self._thread.is_alive():
241
- self._thread.join(timeout=timeout)
242
- if self._thread.is_alive():
243
- self.logger.warning(f"Capture thread for {self.stream_key} did not stop in time")
244
- else:
245
- self.logger.info(f"Capture thread for {self.stream_key} stopped")
246
-
247
- def is_alive(self) -> bool:
248
- """Check if capture thread is running."""
249
- return self._thread is not None and self._thread.is_alive()
250
-
251
- def _capture_loop(self) -> None:
252
- """Main blocking capture loop with infinite retry.
253
-
254
- Structure mirrors the original _camera_handler but runs in a blocking thread:
255
- - Outer loop: Infinite retry for camera reconnection
256
- - Inner loop: Frame capture and processing
257
- """
258
- retry_cycle = 0
259
- source_type = None
260
-
261
- # OUTER LOOP: Infinite retry for reconnection
262
- while not self.stop_event.is_set() and self._is_running:
263
- cap = None
264
- consecutive_failures = 0
265
-
266
- try:
267
- # Prepare source (download if URL)
268
- prepared_source = self.capture_manager.prepare_source(
269
- self.source, self.stream_key
270
- )
271
-
272
- # Open capture (blocking)
273
- cap, source_type = self.capture_manager.open_capture(
274
- prepared_source, self.width, self.height
275
- )
276
-
277
- # Get actual dimensions
278
- actual_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
279
- actual_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
280
-
281
- if self.width or self.height:
282
- actual_width, actual_height = FrameProcessor.calculate_actual_dimensions(
283
- actual_width, actual_height, self.width, self.height
284
- )
285
-
286
- # Reset retry on success
287
- retry_cycle = 0
288
-
289
- self.logger.info(
290
- f"Camera {self.stream_key} connected - "
291
- f"{actual_width}x{actual_height} @ {self.target_fps} FPS (type: {source_type})"
292
- )
293
-
294
- # INNER LOOP: Capture frames
295
- while not self.stop_event.is_set() and self._is_running:
296
- frame_start = time.time()
297
-
298
- # Read latest frame
299
- ret, frame = self._read_latest_frame(cap)
300
- read_time = time.time() - frame_start
301
-
302
- if not ret:
303
- consecutive_failures += 1
304
-
305
- # Handle video file end
306
- if source_type == "video_file":
307
- if self.simulate_video_file_stream:
308
- self.logger.info(
309
- f"Video {self.stream_key} ended, restarting "
310
- f"(simulate_video_file_stream=True)"
311
- )
312
- self.stop_event.wait(1.0) # Brief pause
313
- break # Restart video
314
- else:
315
- self.logger.info(f"Video {self.stream_key} ended (no loop)")
316
- return # Exit completely
317
-
318
- # For cameras, check failure threshold
319
- if consecutive_failures >= self.MAX_CONSECUTIVE_FAILURES:
320
- self.logger.warning(
321
- f"Camera {self.stream_key} - {self.MAX_CONSECUTIVE_FAILURES} "
322
- f"consecutive failures, reconnecting..."
323
- )
324
- break # Reconnect
325
-
326
- self.stop_event.wait(0.1)
327
- continue
328
-
329
- # Reset failure counter
330
- consecutive_failures = 0
331
- self._frame_counter += 1
332
-
333
- # Resize if needed
334
- if self.width or self.height:
335
- frame = FrameProcessor.resize_frame(frame, self.width, self.height)
336
-
337
- # Process and enqueue frame
338
- self._process_and_enqueue_frame(
339
- frame, actual_width, actual_height, read_time
340
- )
341
-
342
- # FPS throttling (blocking sleep)
343
- elapsed = time.time() - frame_start
344
- sleep_time = self.frame_interval - elapsed
345
- if sleep_time > 0:
346
- # Use wait() for interruptible sleep
347
- self.stop_event.wait(sleep_time)
348
-
349
- except Exception as exc:
350
- self.logger.error(f"Camera {self.stream_key} error: {exc}", exc_info=True)
351
- finally:
352
- if cap:
353
- try:
354
- cap.release()
355
- except Exception:
356
- pass
357
-
358
- # Check if we should retry
359
- if self.stop_event.is_set() or not self._is_running:
360
- break
361
-
362
- # For video files with looping, restart immediately
363
- if source_type == "video_file" and self.simulate_video_file_stream:
364
- continue
365
-
366
- # Exponential backoff for camera reconnection
367
- cooldown = min(
368
- self.MAX_RETRY_COOLDOWN,
369
- self.MIN_RETRY_COOLDOWN + retry_cycle
370
- )
371
- self.logger.info(
372
- f"Retrying camera {self.stream_key} in {cooldown}s (retry {retry_cycle})"
373
- )
374
- self.stop_event.wait(cooldown)
375
- retry_cycle += 1
376
-
377
- self.logger.info(f"Capture thread for {self.stream_key} exiting")
378
-
379
- def _read_latest_frame(self, cap: cv2.VideoCapture) -> Tuple[bool, Optional[np.ndarray]]:
380
- """Read latest frame, dropping stale buffered frames.
381
-
382
- Uses grab/grab/retrieve pattern when drop_stale_frames is True.
383
- """
384
- if self.drop_stale_frames:
385
- cap.grab() # Clear stale frame
386
- ret = cap.grab() # Get current frame
387
- else:
388
- ret = cap.grab()
389
-
390
- if not ret:
391
- return False, None
392
-
393
- ret, frame = cap.retrieve()
394
- return ret, frame
395
-
396
- def _process_and_enqueue_frame(
397
- self,
398
- frame: np.ndarray,
399
- width: int,
400
- height: int,
401
- read_time: float
402
- ) -> None:
403
- """Process frame and add to queue for async loop.
404
-
405
- In SHM mode: Write to SHM, enqueue metadata only
406
- In JPEG mode: Enqueue full frame for encoding in async loop
407
- """
408
- ts_ns = int(time.time() * 1e9)
409
-
410
- # Check frame similarity if optimizer available
411
- is_similar = False
412
- similarity_score = 0.0
413
- reference_frame_idx = None
414
-
415
- if self.frame_optimizer:
416
- is_similar, similarity_score = self.frame_optimizer.is_similar(
417
- frame, self.stream_key
418
- )
419
- if is_similar:
420
- reference_frame_idx = self._last_shm_frame_idx
421
-
422
- if self.use_shm and self.shm_buffer:
423
- # SHM MODE: Write frame to SHM, enqueue metadata only
424
- if is_similar and reference_frame_idx is not None:
425
- # Similar frame - just enqueue metadata with reference
426
- metadata = ShmMetadataItem(
427
- stream_key=self.stream_key,
428
- stream_group_key=self.stream_group_key,
429
- topic=self.topic,
430
- shm_name=self.shm_buffer.shm_name,
431
- frame_idx=reference_frame_idx,
432
- slot=-1, # No new slot for similar frame
433
- ts_ns=ts_ns,
434
- width=width,
435
- height=height,
436
- format=self.shm_frame_format,
437
- is_similar=True,
438
- reference_frame_idx=reference_frame_idx,
439
- similarity_score=similarity_score,
440
- camera_location=self.camera_location,
441
- frame_counter=self._frame_counter,
442
- )
443
- else:
444
- # Different frame - write to SHM
445
- raw_bytes = self._convert_frame_for_shm(frame)
446
- frame_idx, slot = self.shm_buffer.write_frame(raw_bytes)
447
- self._last_shm_frame_idx = frame_idx
448
-
449
- metadata = ShmMetadataItem(
450
- stream_key=self.stream_key,
451
- stream_group_key=self.stream_group_key,
452
- topic=self.topic,
453
- shm_name=self.shm_buffer.shm_name,
454
- frame_idx=frame_idx,
455
- slot=slot,
456
- ts_ns=ts_ns,
457
- width=width,
458
- height=height,
459
- format=self.shm_frame_format,
460
- is_similar=False,
461
- camera_location=self.camera_location,
462
- frame_counter=self._frame_counter,
463
- )
464
-
465
- # Enqueue metadata (non-blocking, drop if queue full)
466
- try:
467
- self.frame_queue.put_nowait(metadata)
468
- except queue.Full:
469
- self.logger.warning(f"Frame queue full, dropping metadata for {self.stream_key}")
470
- else:
471
- # JPEG MODE: Enqueue full frame for async processing
472
- captured = CapturedFrame(
473
- stream_key=self.stream_key,
474
- frame=frame,
475
- timestamp_ns=ts_ns,
476
- frame_counter=self._frame_counter,
477
- width=width,
478
- height=height,
479
- camera_config=self.camera_config,
480
- capture_time_ms=read_time * 1000,
481
- is_similar=is_similar,
482
- similarity_score=similarity_score,
483
- reference_frame_idx=reference_frame_idx,
484
- )
485
-
486
- try:
487
- self.frame_queue.put_nowait(captured)
488
- except queue.Full:
489
- self.logger.warning(f"Frame queue full, dropping frame for {self.stream_key}")
490
-
491
- def _convert_frame_for_shm(self, frame: np.ndarray) -> bytes:
492
- """Convert frame to target format for SHM storage."""
493
- if self.shm_frame_format == "RGB":
494
- return cv2.cvtColor(frame, cv2.COLOR_BGR2RGB).tobytes()
495
- elif self.shm_frame_format == "NV12":
496
- from matrice_common.stream.shm_ring_buffer import bgr_to_nv12
497
- return bgr_to_nv12(frame)
498
- else: # BGR (default)
499
- return frame.tobytes()
500
-
501
-
502
75
  class AsyncCameraWorker:
503
76
  """Async worker process that handles multiple cameras concurrently.
504
77
 
@@ -530,10 +103,6 @@ class AsyncCameraWorker:
530
103
  pin_cpu_affinity: bool = True, # Pin worker to specific CPU cores
531
104
  total_workers: int = 1, # Total worker count for CPU affinity calculation
532
105
  buffer_size: int = 1, # Minimal buffer for low latency (cv2_bench uses 1)
533
- # ================================================================
534
- # BLOCKING THREADS: Optimized capture architecture (Phase 1)
535
- # ================================================================
536
- use_blocking_threads: bool = USE_BLOCKING_THREADS, # Use blocking capture threads per camera
537
106
  ):
538
107
  """Initialize async camera worker.
539
108
 
@@ -554,7 +123,6 @@ class AsyncCameraWorker:
554
123
  pin_cpu_affinity: Pin worker process to specific CPU cores for cache locality
555
124
  total_workers: Total number of workers (for CPU affinity calculation)
556
125
  buffer_size: VideoCapture buffer size (1 = minimal latency)
557
- use_blocking_threads: Use blocking capture threads instead of asyncio tasks (optimized mode)
558
126
  """
559
127
  self.worker_id = worker_id
560
128
  self.camera_configs = camera_configs
@@ -691,39 +259,6 @@ class AsyncCameraWorker:
691
259
  )
692
260
  self._log_system_resources("INIT")
693
261
 
694
- # ========================================================================
695
- # BLOCKING THREADS: Data structures for optimized capture architecture
696
- # ========================================================================
697
- self.use_blocking_threads = use_blocking_threads
698
-
699
- if use_blocking_threads:
700
- # Frame queue for thread-to-async communication
701
- # Sized to handle burst: ~2 seconds of frames per camera
702
- queue_size = max(1000, len(camera_configs) * 60)
703
- self._frame_queue: queue.Queue = queue.Queue(maxsize=queue_size)
704
-
705
- # Thread stop event (separate from process stop_event for cleaner shutdown)
706
- self._thread_stop_event = threading.Event()
707
-
708
- # Camera capture threads (stream_key -> CameraCapture)
709
- self._capture_threads: Dict[str, CameraCapture] = {}
710
-
711
- # Pending SHM metadata for batched Redis writes
712
- self._shm_metadata_batch: List[ShmMetadataItem] = []
713
- self._metadata_batch_lock = threading.Lock()
714
-
715
- self.logger.info(
716
- f"Worker {worker_id}: Blocking threads mode ENABLED "
717
- f"(queue_size={queue_size}, per-camera capture threads)"
718
- )
719
- else:
720
- self._frame_queue = None
721
- self._thread_stop_event = None
722
- self._capture_threads = {}
723
- self._shm_metadata_batch = []
724
- self._metadata_batch_lock = None
725
- self.logger.info(f"Worker {worker_id}: Legacy asyncio tasks mode")
726
-
727
262
  async def _log_metrics(self) -> None:
728
263
  """Log comprehensive worker metrics periodically."""
729
264
  try:
@@ -811,118 +346,50 @@ class AsyncCameraWorker:
811
346
  raise
812
347
 
813
348
  async def run(self):
814
- """Main worker loop - starts capture threads or async tasks based on mode."""
349
+ """Main worker loop - starts async tasks for all cameras and handles commands."""
815
350
  try:
816
- # Initialize async resources (Redis client, etc.)
351
+ # Initialize async resources
817
352
  await self.initialize()
818
353
 
819
- if self.use_blocking_threads:
820
- # ================================================================
821
- # OPTIMIZED MODE: Blocking capture threads + single async processor
822
- # ================================================================
823
- await self._run_blocking_threads_mode()
824
- else:
825
- # ================================================================
826
- # LEGACY MODE: Per-camera asyncio tasks
827
- # ================================================================
828
- await self._run_legacy_async_mode()
829
-
830
- except Exception as exc:
831
- self.logger.error(f"Worker {self.worker_id}: Fatal error in run loop: {exc}", exc_info=True)
832
- self._report_health("error", error=str(exc))
833
- raise
834
-
835
- async def _run_legacy_async_mode(self):
836
- """Legacy mode: Per-camera asyncio tasks with ThreadPoolExecutor for capture."""
837
- self.logger.info(f"Worker {self.worker_id}: Starting legacy asyncio mode")
838
-
839
- # Start initial camera tasks using internal method
840
- for camera_config in self.camera_configs:
841
- await self._add_camera_internal(camera_config)
842
-
843
- # Report initial health
844
- self._report_health("running", len(self.camera_tasks))
354
+ # Start initial camera tasks using internal method
355
+ for camera_config in self.camera_configs:
356
+ await self._add_camera_internal(camera_config)
845
357
 
846
- # Start command handler task if command queue is provided
847
- command_task = None
848
- if self.command_queue:
849
- command_task = asyncio.create_task(
850
- self._command_handler(),
851
- name="command-handler"
852
- )
853
- self.logger.info(f"Worker {self.worker_id}: Command handler started")
854
-
855
- # Monitor tasks and stop event
856
- while not self.stop_event.is_set():
857
- # Check for completed/failed tasks
858
- for stream_key, task in list(self.camera_tasks.items()):
859
- if task.done():
860
- try:
861
- # Check if task raised exception
862
- task.result()
863
- self.logger.warning(f"Worker {self.worker_id}: Camera {stream_key} task completed")
864
- except Exception as exc:
865
- self.logger.error(f"Worker {self.worker_id}: Camera {stream_key} task failed: {exc}")
866
-
867
- # Remove completed task
868
- del self.camera_tasks[stream_key]
869
-
870
- # Report health periodically
358
+ # Report initial health
871
359
  self._report_health("running", len(self.camera_tasks))
872
360
 
873
- # Sleep briefly
874
- await asyncio.sleep(1.0)
875
-
876
- # Stop event set - graceful shutdown
877
- self.logger.info(f"Worker {self.worker_id}: Stop event detected, shutting down...")
878
-
879
- # Cancel command handler if running
880
- if command_task and not command_task.done():
881
- command_task.cancel()
882
- try:
883
- await command_task
884
- except asyncio.CancelledError:
885
- pass
886
-
887
- await self._shutdown()
888
-
889
- async def _run_blocking_threads_mode(self):
890
- """Optimized mode: Blocking capture threads + single async frame processor.
891
-
892
- Architecture:
893
- - One CameraCapture thread per camera (blocking I/O, time.sleep for FPS)
894
- - Threads write to SHM (if enabled) and enqueue metadata to frame_queue
895
- - Single async loop polls frame_queue and batches Redis writes
896
- - Eliminates per-camera asyncio task overhead
897
- """
898
- self.logger.info(f"Worker {self.worker_id}: Starting blocking threads mode")
899
-
900
- # Start capture threads for all cameras
901
- self._start_capture_threads()
361
+ # Start command handler task if command queue is provided
362
+ command_task = None
363
+ if self.command_queue:
364
+ command_task = asyncio.create_task(
365
+ self._command_handler(),
366
+ name="command-handler"
367
+ )
368
+ self.logger.info(f"Worker {self.worker_id}: Command handler started")
369
+
370
+ # Monitor tasks and stop event
371
+ while not self.stop_event.is_set():
372
+ # Check for completed/failed tasks
373
+ for stream_key, task in list(self.camera_tasks.items()):
374
+ if task.done():
375
+ try:
376
+ # Check if task raised exception
377
+ task.result()
378
+ self.logger.warning(f"Worker {self.worker_id}: Camera {stream_key} task completed")
379
+ except Exception as exc:
380
+ self.logger.error(f"Worker {self.worker_id}: Camera {stream_key} task failed: {exc}")
381
+
382
+ # Remove completed task
383
+ del self.camera_tasks[stream_key]
902
384
 
903
- # Report initial health
904
- active_cameras = len(self._capture_threads)
905
- self._report_health("running", active_cameras)
385
+ # Report health periodically
386
+ self._report_health("running", len(self.camera_tasks))
906
387
 
907
- # Start command handler task if command queue is provided
908
- command_task = None
909
- if self.command_queue:
910
- command_task = asyncio.create_task(
911
- self._command_handler(),
912
- name="command-handler"
913
- )
914
- self.logger.info(f"Worker {self.worker_id}: Command handler started")
388
+ # Sleep briefly
389
+ await asyncio.sleep(1.0)
915
390
 
916
- # Run the frame processor loop
917
- try:
918
- await self._run_frame_processor()
919
- finally:
920
391
  # Stop event set - graceful shutdown
921
- self.logger.info(f"Worker {self.worker_id}: Stop event detected, shutting down threads...")
922
-
923
- # Signal threads to stop
924
- if self._thread_stop_event:
925
- self._thread_stop_event.set()
392
+ self.logger.info(f"Worker {self.worker_id}: Stop event detected, shutting down...")
926
393
 
927
394
  # Cancel command handler if running
928
395
  if command_task and not command_task.done():
@@ -934,276 +401,10 @@ class AsyncCameraWorker:
934
401
 
935
402
  await self._shutdown()
936
403
 
937
- def _start_capture_threads(self) -> None:
938
- """Start blocking capture threads for all cameras."""
939
- for camera_config in self.camera_configs:
940
- self._start_capture_thread(camera_config)
941
-
942
- self.logger.info(
943
- f"Worker {self.worker_id}: Started {len(self._capture_threads)} capture threads"
944
- )
945
-
946
- def _start_capture_thread(self, camera_config: Dict[str, Any]) -> None:
947
- """Start a capture thread for a single camera."""
948
- stream_key = camera_config.get('stream_key')
949
- if not stream_key:
950
- self.logger.error("Camera config missing stream_key")
951
- return
952
-
953
- if stream_key in self._capture_threads:
954
- self.logger.warning(f"Capture thread for {stream_key} already exists")
955
- return
956
-
957
- # Get or create SHM buffer if needed
958
- shm_buffer = None
959
- if self.use_shm:
960
- width = camera_config.get('width', 1920)
961
- height = camera_config.get('height', 1080)
962
- shm_buffer = self._get_or_create_shm_buffer(stream_key, width, height)
963
-
964
- # Create and start capture thread
965
- capture = CameraCapture(
966
- camera_config=camera_config,
967
- frame_queue=self._frame_queue,
968
- stop_event=self._thread_stop_event,
969
- capture_manager=self.capture_manager,
970
- frame_optimizer=self.frame_optimizer if self.frame_optimizer.enabled else None,
971
- use_shm=self.use_shm,
972
- shm_buffer=shm_buffer,
973
- shm_frame_format=self.shm_frame_format,
974
- drop_stale_frames=self.drop_stale_frames,
975
- buffer_size=self.buffer_size,
976
- )
977
- capture.start()
978
- self._capture_threads[stream_key] = capture
979
-
980
- async def _run_frame_processor(self) -> None:
981
- """Main async loop that polls frame queue and batches Redis writes.
982
-
983
- This replaces per-camera asyncio tasks with a single efficient loop:
984
- - Polls frame queue (non-blocking)
985
- - Batches metadata for Redis writes
986
- - Flushes batches periodically or when full
987
- - Reports health
988
- """
989
- poll_interval_ms = 1 # 1ms polling when idle
990
- batch_timeout_ms = 25 # 25ms max batch wait (allows 4 flushes in 100ms SLA)
991
- batch_size_limit = 100 # Max items per batch
992
- health_report_interval = 1.0 # Report health every second
993
-
994
- last_health_report = time.time()
995
- last_batch_flush = time.time()
996
-
997
- self.logger.info(
998
- f"Worker {self.worker_id}: Frame processor started "
999
- f"(batch_timeout={batch_timeout_ms}ms, batch_size={batch_size_limit})"
1000
- )
1001
-
1002
- while not self.stop_event.is_set():
1003
- try:
1004
- frames_processed = 0
1005
- process_start = time.time()
1006
-
1007
- # Poll queue for available items (non-blocking)
1008
- while True:
1009
- try:
1010
- item = self._frame_queue.get_nowait()
1011
- await self._process_queue_item(item)
1012
- frames_processed += 1
1013
-
1014
- # Check if batch is full or timeout reached
1015
- if len(self._shm_metadata_batch) >= batch_size_limit:
1016
- await self._flush_metadata_batch()
1017
- last_batch_flush = time.time()
1018
- elif (time.time() - last_batch_flush) * 1000 > batch_timeout_ms:
1019
- await self._flush_metadata_batch()
1020
- last_batch_flush = time.time()
1021
-
1022
- # Limit items per iteration to prevent starvation
1023
- if frames_processed >= batch_size_limit * 2:
1024
- break
1025
-
1026
- except queue.Empty:
1027
- break
1028
-
1029
- # Flush any remaining items if timeout reached
1030
- if self._shm_metadata_batch and (time.time() - last_batch_flush) * 1000 > batch_timeout_ms:
1031
- await self._flush_metadata_batch()
1032
- last_batch_flush = time.time()
1033
-
1034
- # Report health periodically
1035
- current_time = time.time()
1036
- if current_time - last_health_report >= health_report_interval:
1037
- active_threads = sum(1 for c in self._capture_threads.values() if c.is_alive())
1038
- self._report_health("running", active_threads)
1039
- last_health_report = current_time
1040
-
1041
- # Brief yield to prevent busy-waiting when queue is empty
1042
- if frames_processed == 0:
1043
- await asyncio.sleep(poll_interval_ms / 1000)
1044
-
1045
- except Exception as exc:
1046
- self.logger.error(
1047
- f"Worker {self.worker_id}: Error in frame processor: {exc}",
1048
- exc_info=True
1049
- )
1050
- await asyncio.sleep(0.1) # Brief pause on error
1051
-
1052
- # Final flush of remaining items
1053
- if self._shm_metadata_batch:
1054
- await self._flush_metadata_batch()
1055
-
1056
- self.logger.info(f"Worker {self.worker_id}: Frame processor stopped")
1057
-
1058
- async def _process_queue_item(self, item: Union[ShmMetadataItem, CapturedFrame]) -> None:
1059
- """Process an item from the frame queue.
1060
-
1061
- Args:
1062
- item: Either ShmMetadataItem (SHM mode) or CapturedFrame (JPEG mode)
1063
- """
1064
- if isinstance(item, ShmMetadataItem):
1065
- # SHM mode: Collect metadata for batched write
1066
- self._shm_metadata_batch.append(item)
1067
- # Update statistics
1068
- if item.is_similar:
1069
- self.statistics.increment_frames_skipped()
1070
- else:
1071
- self.statistics.increment_frames_sent()
1072
-
1073
- elif isinstance(item, CapturedFrame):
1074
- # JPEG mode: Process and send frame
1075
- await self._process_captured_frame(item)
1076
-
1077
- async def _process_captured_frame(self, captured: CapturedFrame) -> None:
1078
- """Process a captured frame in JPEG mode.
1079
-
1080
- Args:
1081
- captured: CapturedFrame from queue
1082
- """
1083
- stream_key = captured.stream_key
1084
- config = captured.camera_config
1085
-
1086
- # Get timing stats
1087
- last_read, last_write, last_process = self.statistics.get_timing(stream_key)
1088
- input_order = self.statistics.get_next_input_order(stream_key)
1089
-
1090
- if captured.is_similar and captured.reference_frame_idx is not None:
1091
- # Similar frame - send cached reference
1092
- reference_frame_id = self._last_sent_frame_ids.get(stream_key)
1093
- if reference_frame_id:
1094
- metadata = self.message_builder.build_frame_metadata(
1095
- config['source'], {}, config.get('fps', 30), config.get('quality', 90),
1096
- captured.width, captured.height, "camera",
1097
- captured.frame_counter, False, None, None, config.get('camera_location', 'Unknown')
1098
- )
1099
- metadata["similarity_score"] = captured.similarity_score
1100
-
1101
- message = self.message_builder.build_message(
1102
- frame_data=b"",
1103
- stream_key=stream_key,
1104
- stream_group_key=config.get('stream_group_key', 'default'),
1105
- codec="cached",
1106
- metadata=metadata,
1107
- topic=config['topic'],
1108
- broker_config=self.stream_config.get('bootstrap_servers', 'localhost:9092'),
1109
- input_order=input_order,
1110
- last_read_time=last_read,
1111
- last_write_time=last_write,
1112
- last_process_time=last_process,
1113
- cached_frame_id=reference_frame_id,
1114
- )
1115
-
1116
- write_start = time.time()
1117
- await self.redis_client.add_message(config['topic'], message)
1118
- write_time = time.time() - write_start
1119
-
1120
- self.statistics.increment_frames_skipped()
1121
- self.statistics.update_timing(stream_key, captured.capture_time_ms / 1000, write_time, write_time, 0, 0)
1122
- return
1123
-
1124
- # Encode frame
1125
- quality = config.get('quality', 90)
1126
- frame_data, codec = await self._encode_frame_async(captured.frame, quality)
1127
-
1128
- # Build and send message
1129
- metadata = self.message_builder.build_frame_metadata(
1130
- config['source'], {}, config.get('fps', 30), quality,
1131
- captured.width, captured.height, "camera",
1132
- captured.frame_counter, False, None, None, config.get('camera_location', 'Unknown')
1133
- )
1134
- metadata["encoding_type"] = "jpeg"
1135
-
1136
- message = self.message_builder.build_message(
1137
- frame_data, stream_key, config.get('stream_group_key', 'default'),
1138
- codec, metadata, config['topic'],
1139
- self.stream_config.get('bootstrap_servers', 'localhost:9092'),
1140
- input_order, last_read, last_write, last_process,
1141
- cached_frame_id=None,
1142
- )
1143
-
1144
- write_start = time.time()
1145
- await self.redis_client.add_message(config['topic'], message)
1146
- write_time = time.time() - write_start
1147
-
1148
- # Track frame_id for future cached references
1149
- new_frame_id = message.get("frame_id")
1150
- if new_frame_id:
1151
- self._last_sent_frame_ids[stream_key] = new_frame_id
1152
-
1153
- # Update statistics
1154
- self.statistics.increment_frames_sent()
1155
- frame_size = len(frame_data) if frame_data else 0
1156
- self.statistics.update_timing(
1157
- stream_key, captured.capture_time_ms / 1000, write_time,
1158
- write_time + captured.capture_time_ms / 1000, frame_size, 0
1159
- )
1160
-
1161
- async def _flush_metadata_batch(self) -> None:
1162
- """Flush pending SHM metadata to Redis as a batch.
1163
-
1164
- Uses Redis pipeline for efficient multi-message writes.
1165
- """
1166
- if not self._shm_metadata_batch:
1167
- return
1168
-
1169
- batch = self._shm_metadata_batch
1170
- self._shm_metadata_batch = []
1171
-
1172
- batch_start = time.time()
1173
-
1174
- try:
1175
- # Send each metadata item (batching is handled by redis_client)
1176
- for item in batch:
1177
- await self.redis_client.add_shm_metadata(
1178
- stream_name=item.topic,
1179
- cam_id=item.stream_key,
1180
- shm_name=item.shm_name,
1181
- frame_idx=item.frame_idx,
1182
- slot=item.slot if item.slot >= 0 else None,
1183
- ts_ns=item.ts_ns,
1184
- width=item.width,
1185
- height=item.height,
1186
- format=item.format,
1187
- is_similar=item.is_similar,
1188
- reference_frame_idx=item.reference_frame_idx,
1189
- similarity_score=item.similarity_score,
1190
- stream_group_key=item.stream_group_key,
1191
- camera_location=item.camera_location,
1192
- frame_counter=item.frame_counter,
1193
- )
1194
-
1195
- batch_time = (time.time() - batch_start) * 1000
1196
- if batch_time > 50: # Log slow batches
1197
- self.logger.warning(
1198
- f"Worker {self.worker_id}: Slow batch flush - "
1199
- f"{len(batch)} items in {batch_time:.1f}ms"
1200
- )
1201
-
1202
404
  except Exception as exc:
1203
- self.logger.error(
1204
- f"Worker {self.worker_id}: Failed to flush metadata batch: {exc}",
1205
- exc_info=True
1206
- )
405
+ self.logger.error(f"Worker {self.worker_id}: Fatal error in run loop: {exc}", exc_info=True)
406
+ self._report_health("error", error=str(exc))
407
+ raise
1207
408
 
1208
409
  async def _read_latest_frame(
1209
410
  self,
@@ -1801,22 +1002,8 @@ class AsyncCameraWorker:
1801
1002
  # ========================================================================
1802
1003
 
1803
1004
  async def _command_handler(self):
1804
- """Process commands from the manager (runs in async loop).
1805
-
1806
- Phase 6: Adaptive backoff polling to reduce overhead for rare commands.
1807
- - Start at 100ms poll interval
1808
- - Exponential backoff when idle (up to 1s)
1809
- - Speed up after command received (down to 50ms)
1810
- """
1811
- self.logger.info(f"Worker {self.worker_id}: Command handler started (adaptive polling)")
1812
-
1813
- # Adaptive polling parameters (Phase 6)
1814
- MIN_POLL_INTERVAL = 0.05 # 50ms - fast when active
1815
- MAX_POLL_INTERVAL = 1.0 # 1s - slow when idle
1816
- INITIAL_POLL_INTERVAL = 0.1 # 100ms - starting point
1817
- BACKOFF_MULTIPLIER = 1.5 # Exponential backoff factor
1818
-
1819
- poll_interval = INITIAL_POLL_INTERVAL
1005
+ """Process commands from the manager (runs in async loop)."""
1006
+ self.logger.info(f"Worker {self.worker_id}: Command handler started")
1820
1007
 
1821
1008
  while not self.stop_event.is_set():
1822
1009
  try:
@@ -1828,12 +1015,9 @@ class AsyncCameraWorker:
1828
1015
 
1829
1016
  if command:
1830
1017
  await self._process_command(command)
1831
- # Speed up polling after receiving command (more likely to get another)
1832
- poll_interval = MIN_POLL_INTERVAL
1833
1018
  else:
1834
- # Exponential backoff when idle to reduce polling overhead
1835
- poll_interval = min(poll_interval * BACKOFF_MULTIPLIER, MAX_POLL_INTERVAL)
1836
- await asyncio.sleep(poll_interval)
1019
+ # Small sleep when no commands to avoid busy-waiting
1020
+ await asyncio.sleep(0.1)
1837
1021
 
1838
1022
  except asyncio.CancelledError:
1839
1023
  self.logger.info(f"Worker {self.worker_id}: Command handler cancelled")
@@ -1984,35 +1168,7 @@ class AsyncCameraWorker:
1984
1168
  """Gracefully shutdown worker - cancel tasks and cleanup."""
1985
1169
  self.logger.info(f"Worker {self.worker_id}: Starting graceful shutdown")
1986
1170
 
1987
- # ================================================================
1988
- # BLOCKING THREADS: Stop capture threads first
1989
- # ================================================================
1990
- if self.use_blocking_threads and self._thread_stop_event:
1991
- self.logger.info(f"Worker {self.worker_id}: Stopping capture threads...")
1992
- self._thread_stop_event.set()
1993
-
1994
- # Stop each capture thread
1995
- for stream_key, capture in list(self._capture_threads.items()):
1996
- try:
1997
- capture.stop()
1998
- self.logger.debug(f"Worker {self.worker_id}: Stopped capture thread for {stream_key}")
1999
- except Exception as e:
2000
- self.logger.warning(f"Worker {self.worker_id}: Error stopping capture {stream_key}: {e}")
2001
- self._capture_threads.clear()
2002
-
2003
- # Drain frame queue to prevent blocking
2004
- if self._frame_queue:
2005
- drained = 0
2006
- while not self._frame_queue.empty():
2007
- try:
2008
- self._frame_queue.get_nowait()
2009
- drained += 1
2010
- except queue.Empty:
2011
- break
2012
- if drained > 0:
2013
- self.logger.debug(f"Worker {self.worker_id}: Drained {drained} items from frame queue")
2014
-
2015
- # Cancel all camera tasks (legacy mode)
1171
+ # Cancel all camera tasks
2016
1172
  for stream_key, task in self.camera_tasks.items():
2017
1173
  if not task.done():
2018
1174
  task.cancel()
@@ -2022,7 +1178,7 @@ class AsyncCameraWorker:
2022
1178
  if self.camera_tasks:
2023
1179
  await asyncio.gather(*self.camera_tasks.values(), return_exceptions=True)
2024
1180
 
2025
- # Release all captures (legacy mode uses self.captures)
1181
+ # Release all captures
2026
1182
  for stream_key, cap in list(self.captures.items()):
2027
1183
  cap.release()
2028
1184
  self.logger.info(f"Worker {self.worker_id}: Released capture {stream_key}")
@@ -2161,10 +1317,6 @@ def run_async_worker(
2161
1317
  pin_cpu_affinity: bool = True,
2162
1318
  total_workers: int = 1,
2163
1319
  buffer_size: int = 1,
2164
- # ================================================================
2165
- # BLOCKING THREADS: Optimized capture architecture (Phase 1)
2166
- # ================================================================
2167
- use_blocking_threads: bool = USE_BLOCKING_THREADS,
2168
1320
  ):
2169
1321
  """Entry point for async worker process.
2170
1322
 
@@ -2185,7 +1337,6 @@ def run_async_worker(
2185
1337
  pin_cpu_affinity: Pin worker process to specific CPU cores
2186
1338
  total_workers: Total number of workers for CPU affinity calculation
2187
1339
  buffer_size: VideoCapture buffer size (1 = minimal latency)
2188
- use_blocking_threads: Use blocking capture threads instead of asyncio tasks
2189
1340
  """
2190
1341
  # Setup logging for this process
2191
1342
  logging.basicConfig(
@@ -2227,8 +1378,6 @@ def run_async_worker(
2227
1378
  pin_cpu_affinity=pin_cpu_affinity,
2228
1379
  total_workers=total_workers,
2229
1380
  buffer_size=buffer_size,
2230
- # BLOCKING THREADS: Pass through optimized capture architecture parameter
2231
- use_blocking_threads=use_blocking_threads,
2232
1381
  )
2233
1382
 
2234
1383
  # Run event loop