matrice-inference 0.1.22__py3-none-any.whl → 0.1.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of matrice-inference might be problematic. Click here for more details.

@@ -131,6 +131,7 @@ class MatriceDeployServer:
131
131
  self.streaming_pipeline = None
132
132
  self.app_deployment = None
133
133
  self.stream_manager = None
134
+ self.camera_config_monitor = None
134
135
 
135
136
  # Initialize utilities
136
137
  self.utils = None
@@ -401,6 +402,9 @@ class MatriceDeployServer:
401
402
 
402
403
  logging.info("Streaming pipeline initialized successfully")
403
404
 
405
+ # Start camera config monitor if app deployment is available
406
+ self._start_camera_config_monitor()
407
+
404
408
  except Exception as e:
405
409
  logging.error(f"Failed to initialize streaming pipeline: {str(e)}")
406
410
  raise
@@ -415,6 +419,37 @@ class MatriceDeployServer:
415
419
  finally:
416
420
  loop.close()
417
421
 
422
+ def _start_camera_config_monitor(self):
423
+ """Start the camera config monitor if app deployment is available."""
424
+ try:
425
+ if not self.app_deployment:
426
+ logging.info("No app deployment configured, skipping camera config monitor")
427
+ return
428
+
429
+ if not self.streaming_pipeline:
430
+ logging.warning("Streaming pipeline not initialized, skipping camera config monitor")
431
+ return
432
+
433
+ # Get check interval from job params (default: 60 seconds)
434
+ check_interval = self.job_params.get("camera_config_check_interval", 60)
435
+
436
+ # Import and create the monitor
437
+ from matrice_inference.server.stream.camera_config_monitor import CameraConfigMonitor
438
+
439
+ self.camera_config_monitor = CameraConfigMonitor(
440
+ app_deployment=self.app_deployment,
441
+ streaming_pipeline=self.streaming_pipeline,
442
+ check_interval=check_interval
443
+ )
444
+
445
+ # Start monitoring
446
+ self.camera_config_monitor.start()
447
+ logging.info(f"Camera config monitor started (check interval: {check_interval}s)")
448
+
449
+ except Exception as e:
450
+ logging.error(f"Failed to start camera config monitor: {str(e)}")
451
+ # Don't raise - monitor is optional
452
+
418
453
  def _stop_pipeline_in_new_loop(self):
419
454
  """Stop the pipeline in a new event loop (for use when already in an event loop)."""
420
455
  import asyncio
@@ -462,6 +497,14 @@ class MatriceDeployServer:
462
497
  # Signal shutdown to all components
463
498
  self._shutdown_event.set()
464
499
 
500
+ # Stop camera config monitor
501
+ if self.camera_config_monitor:
502
+ try:
503
+ self.camera_config_monitor.stop()
504
+ logging.info("Camera config monitor stopped")
505
+ except Exception as exc:
506
+ logging.error("Error stopping camera config monitor: %s", str(exc))
507
+
465
508
  # Stop streaming pipeline
466
509
  if self.streaming_pipeline:
467
510
  try:
@@ -0,0 +1,221 @@
1
+ """Background monitor for camera configuration updates."""
2
+
3
+ import hashlib
4
+ import json
5
+ import logging
6
+ import threading
7
+ import time
8
+ from typing import Dict, Optional
9
+
10
+ from matrice_inference.server.stream.utils import CameraConfig
11
+
12
+
13
+ class CameraConfigMonitor:
14
+ """Monitors and syncs camera configurations from app deployment API."""
15
+
16
+ DEFAULT_CHECK_INTERVAL = 120 # seconds
17
+
18
+ def __init__(
19
+ self,
20
+ app_deployment,
21
+ streaming_pipeline,
22
+ check_interval: int = DEFAULT_CHECK_INTERVAL
23
+ ):
24
+ """Initialize the camera config monitor.
25
+
26
+ Args:
27
+ app_deployment: AppDeployment instance to fetch configs
28
+ streaming_pipeline: StreamingPipeline instance to update
29
+ check_interval: Seconds between config checks
30
+ """
31
+ self.app_deployment = app_deployment
32
+ self.streaming_pipeline = streaming_pipeline
33
+ self.check_interval = max(10, int(check_interval)) # Minimum 10 seconds
34
+ self.running = False
35
+ self.thread: Optional[threading.Thread] = None
36
+ self.logger = logging.getLogger(__name__)
37
+
38
+ # Track camera configs by hash to detect changes (thread-safe access)
39
+ self.camera_hashes: Dict[str, str] = {}
40
+ self._hashes_lock = threading.Lock()
41
+
42
+ def start(self) -> None:
43
+ """Start the background monitoring thread."""
44
+ if self.running:
45
+ self.logger.warning("Camera config monitor already running")
46
+ return
47
+
48
+ self.running = True
49
+ self.thread = threading.Thread(
50
+ target=self._monitor_loop,
51
+ name="CameraConfigMonitor",
52
+ daemon=False
53
+ )
54
+ self.thread.start()
55
+ self.logger.info(f"Started camera config monitor (check interval: {self.check_interval}s)")
56
+
57
+ def stop(self) -> None:
58
+ """Stop the background monitoring thread."""
59
+ if not self.running:
60
+ return
61
+
62
+ self.running = False
63
+ if self.thread:
64
+ self.thread.join(timeout=5.0)
65
+ self.logger.info("Stopped camera config monitor")
66
+
67
+ def _monitor_loop(self) -> None:
68
+ """Main monitoring loop - periodically sync camera configs."""
69
+ while self.running:
70
+ try:
71
+ self._sync_camera_configs()
72
+ except Exception as e:
73
+ self.logger.error(f"Error syncing camera configs: {e}")
74
+
75
+ # Sleep in small intervals to allow quick shutdown
76
+ for _ in range(self.check_interval):
77
+ if not self.running:
78
+ break
79
+ time.sleep(1)
80
+
81
+ def _sync_camera_configs(self) -> None:
82
+ """Fetch latest configs from API and sync with pipeline."""
83
+ try:
84
+ # Fetch current configs from app deployment API
85
+ latest_configs = self.app_deployment.get_camera_configs()
86
+
87
+ if not latest_configs:
88
+ self.logger.debug("No camera configs returned from API")
89
+ return
90
+
91
+ # Process each camera config
92
+ for camera_id, camera_config in latest_configs.items():
93
+ self._process_camera_config(camera_id, camera_config)
94
+
95
+ # Optional: Remove cameras that are no longer in API
96
+ # Uncomment if you want to auto-remove deleted cameras
97
+ # self._remove_deleted_cameras(latest_configs)
98
+
99
+ except Exception as e:
100
+ self.logger.error(f"Failed to sync camera configs: {e}")
101
+
102
+ def _process_camera_config(self, camera_id: str, camera_config: CameraConfig) -> None:
103
+ """Process a single camera config - add new or update changed."""
104
+ try:
105
+ # Calculate config hash to detect changes
106
+ config_hash = self._hash_camera_config(camera_config)
107
+
108
+ # Thread-safe read of previous hash
109
+ with self._hashes_lock:
110
+ previous_hash = self.camera_hashes.get(camera_id)
111
+
112
+ # Check if this is a new camera or config changed
113
+ if previous_hash is None:
114
+ # New camera - add it
115
+ self._add_new_camera(camera_id, camera_config, config_hash)
116
+ elif previous_hash != config_hash:
117
+ # Config changed - update it
118
+ self._update_changed_camera(camera_id, camera_config, config_hash)
119
+ else:
120
+ # No change - skip
121
+ self.logger.debug(f"Camera {camera_id} config unchanged")
122
+
123
+ except Exception as e:
124
+ self.logger.error(f"Error processing camera {camera_id}: {e}")
125
+
126
+ def _add_new_camera(self, camera_id: str, camera_config: CameraConfig, config_hash: str) -> None:
127
+ """Add a new camera to the pipeline."""
128
+ try:
129
+ # Use asyncio to add camera config
130
+ import asyncio
131
+ loop = asyncio.new_event_loop()
132
+ asyncio.set_event_loop(loop)
133
+ try:
134
+ success = loop.run_until_complete(
135
+ self.streaming_pipeline.add_camera_config(camera_config)
136
+ )
137
+ if success:
138
+ # Thread-safe write
139
+ with self._hashes_lock:
140
+ self.camera_hashes[camera_id] = config_hash
141
+ self.logger.info(f"Added new camera: {camera_id}")
142
+ else:
143
+ self.logger.warning(f"Failed to add camera: {camera_id}")
144
+ finally:
145
+ loop.close()
146
+
147
+ except Exception as e:
148
+ self.logger.error(f"Error adding camera {camera_id}: {e}")
149
+
150
+ def _update_changed_camera(self, camera_id: str, camera_config: CameraConfig, config_hash: str) -> None:
151
+ """Update an existing camera with changed config."""
152
+ try:
153
+ # Use asyncio to update camera config
154
+ import asyncio
155
+ loop = asyncio.new_event_loop()
156
+ asyncio.set_event_loop(loop)
157
+ try:
158
+ success = loop.run_until_complete(
159
+ self.streaming_pipeline.update_camera_config(camera_config)
160
+ )
161
+ if success:
162
+ # Thread-safe write
163
+ with self._hashes_lock:
164
+ self.camera_hashes[camera_id] = config_hash
165
+ self.logger.info(f"Updated camera config: {camera_id}")
166
+ else:
167
+ self.logger.warning(f"Failed to update camera: {camera_id}")
168
+ finally:
169
+ loop.close()
170
+
171
+ except Exception as e:
172
+ self.logger.error(f"Error updating camera {camera_id}: {e}")
173
+
174
+ def _remove_deleted_cameras(self, latest_configs: Dict[str, CameraConfig]) -> None:
175
+ """Remove cameras that are no longer in the API response."""
176
+ # Thread-safe read
177
+ with self._hashes_lock:
178
+ current_camera_ids = set(self.camera_hashes.keys())
179
+
180
+ latest_camera_ids = set(latest_configs.keys())
181
+ deleted_camera_ids = current_camera_ids - latest_camera_ids
182
+
183
+ for camera_id in deleted_camera_ids:
184
+ try:
185
+ import asyncio
186
+ loop = asyncio.new_event_loop()
187
+ asyncio.set_event_loop(loop)
188
+ try:
189
+ success = loop.run_until_complete(
190
+ self.streaming_pipeline.remove_camera_config(camera_id)
191
+ )
192
+ if success:
193
+ # Thread-safe delete
194
+ with self._hashes_lock:
195
+ del self.camera_hashes[camera_id]
196
+ self.logger.info(f"Removed deleted camera: {camera_id}")
197
+ finally:
198
+ loop.close()
199
+ except Exception as e:
200
+ self.logger.error(f"Error removing camera {camera_id}: {e}")
201
+
202
+ def _hash_camera_config(self, camera_config: CameraConfig) -> str:
203
+ """Generate a hash of the camera config to detect changes."""
204
+ try:
205
+ # Create a dict with relevant config fields
206
+ config_dict = {
207
+ "camera_id": camera_config.camera_id,
208
+ "input_topic": camera_config.input_topic,
209
+ "output_topic": camera_config.output_topic,
210
+ "stream_config": camera_config.stream_config,
211
+ "enabled": camera_config.enabled
212
+ }
213
+
214
+ # Convert to JSON string (sorted for consistency) and hash
215
+ config_str = json.dumps(config_dict, sort_keys=True)
216
+ return hashlib.md5(config_str.encode()).hexdigest()
217
+
218
+ except Exception as e:
219
+ self.logger.error(f"Error hashing camera config: {e}")
220
+ return ""
221
+
@@ -13,7 +13,32 @@ from typing import Dict, Any, Optional
13
13
  from matrice_inference.server.stream.utils import CameraConfig, StreamMessage
14
14
 
15
15
  class ConsumerWorker:
16
- """Handles message consumption from streams with optimized processing."""
16
+ """Handles message consumption from streams with optimized processing.
17
+
18
+ Frame ID Management:
19
+ -------------------
20
+ This worker ensures frame_id uniqueness and consistency throughout the pipeline:
21
+
22
+ 1. Frame ID Source Priority:
23
+ - Upstream frame_id (from message data) - preferred
24
+ - Message key (if UUID-like)
25
+ - Generated unique ID (camera_id + worker_id + uuid4)
26
+
27
+ 2. Frame Caching:
28
+ - Frames are cached to Redis using: stream:frames:{frame_id}
29
+ - frame_id MUST be unique across all apps and cameras
30
+ - The same frame_id is used throughout the entire pipeline
31
+
32
+ 3. Multi-App Safety:
33
+ - Each app deployment has unique camera_ids
34
+ - Generated IDs include camera_id + worker_id + uuid4 for uniqueness
35
+ - Redis prefix ensures isolation between different frame types
36
+
37
+ 4. Frame ID Flow:
38
+ Consumer → Inference → Post-Processing → Producer
39
+ The frame_id is preserved in task_data["frame_id"] at each stage
40
+ and included in the final output message for client retrieval.
41
+ """
17
42
 
18
43
  DEFAULT_PRIORITY = 1
19
44
  DEFAULT_DB = 0
@@ -209,7 +234,25 @@ class ConsumerWorker:
209
234
  return b""
210
235
 
211
236
  def _process_message(self, message_data: Dict[str, Any]) -> None:
212
- """Process incoming message and add to inference queue."""
237
+ """Process incoming message and add to inference queue.
238
+
239
+ This method:
240
+ 1. Extracts/generates a unique frame_id
241
+ 2. Handles codec-specific processing (H.264, H.265, JPEG, etc.)
242
+ 3. Caches the frame content to Redis with the frame_id
243
+ 4. Enqueues the task for inference with frame_id preserved
244
+
245
+ Frame ID Consistency:
246
+ - The frame_id is determined once at the start of processing
247
+ - The same frame_id is used for cache writes and task data
248
+ - frame_id is propagated through the entire pipeline
249
+ - Output messages include the frame_id for client retrieval
250
+
251
+ Multi-App Safety:
252
+ - frame_id uniqueness ensures no collisions between apps
253
+ - Redis prefix (stream:frames:) provides namespace isolation
254
+ - Cache writes are non-blocking to prevent pipeline delays
255
+ """
213
256
  try:
214
257
  message_key = self._extract_message_key(message_data)
215
258
  data = self._parse_message_data(message_data)
@@ -394,16 +437,36 @@ class ConsumerWorker:
394
437
  return {}
395
438
 
396
439
  def _determine_frame_id(self, data: Dict[str, Any], message_data: Dict[str, Any]) -> str:
397
- """Determine frame ID from message data."""
440
+ """Determine frame ID from message data with guaranteed uniqueness.
441
+
442
+ Priority:
443
+ 1. Existing frame_id from upstream (UUID expected)
444
+ 2. Message key (if it looks like a UUID)
445
+ 3. Generate unique ID with camera context
446
+ """
447
+ # First priority: explicit frame_id from upstream
398
448
  frame_id = data.get("frame_id")
399
- if frame_id:
400
- return frame_id
449
+ if frame_id and isinstance(frame_id, str) and frame_id.strip():
450
+ self.logger.debug(f"Using upstream frame_id: {frame_id}")
451
+ return str(frame_id).strip()
401
452
 
453
+ # Second priority: message key (if it's a UUID-like string)
402
454
  fallback_key = message_data.get("key") or data.get("input_name")
403
455
  if fallback_key:
404
- return str(fallback_key)
405
-
406
- return f"{self.camera_id}_{int(time.time() * 1000)}"
456
+ key_str = str(fallback_key)
457
+ # Check if it looks like a UUID (contains dashes and right length)
458
+ if "-" in key_str and len(key_str) >= 32:
459
+ self.logger.debug(f"Using message key as frame_id: {key_str}")
460
+ return key_str
461
+
462
+ # Last resort: generate unique ID with camera, worker, and high-precision timestamp
463
+ import uuid
464
+ generated_id = f"{self.camera_id}_{self.worker_id}_{uuid.uuid4()}"
465
+ self.logger.warning(
466
+ f"No upstream frame_id found, generated unique ID: {generated_id} "
467
+ f"(message_key: {fallback_key})"
468
+ )
469
+ return generated_id
407
470
 
408
471
  def _enrich_input_stream(self, input_stream: Dict[str, Any], frame_id: str) -> None:
409
472
  """Add frame_id to input_stream if not present."""
@@ -444,15 +507,44 @@ class ConsumerWorker:
444
507
  frame_id: Unique frame identifier (uuid expected)
445
508
  content_b64: Base64-encoded JPEG string
446
509
  """
510
+ if not self.frame_cache:
511
+ self.logger.debug("Frame cache not configured, skipping cache write")
512
+ return
513
+
514
+ # Validate frame_id
515
+ if not frame_id or not isinstance(frame_id, str):
516
+ self.logger.warning(
517
+ f"Invalid frame_id for caching: {frame_id!r} (type: {type(frame_id).__name__})"
518
+ )
519
+ return
520
+
521
+ frame_id = frame_id.strip()
522
+ if not frame_id:
523
+ self.logger.warning("Empty frame_id after stripping, skipping cache")
524
+ return
525
+
526
+ # Validate content
527
+ if not content_b64 or not isinstance(content_b64, str):
528
+ self.logger.warning(
529
+ f"Invalid content for frame_id={frame_id}: "
530
+ f"type={type(content_b64).__name__}, "
531
+ f"len={len(content_b64) if content_b64 else 0}"
532
+ )
533
+ return
534
+
447
535
  try:
448
- if not self.frame_cache:
449
- return
450
- if not frame_id or not isinstance(frame_id, str):
451
- return
452
- if not content_b64 or not isinstance(content_b64, str):
453
- return
536
+ content_len = len(content_b64)
537
+ self.logger.debug(
538
+ f"Caching frame: frame_id={frame_id}, camera={self.camera_id}, "
539
+ f"worker={self.worker_id}, content_size={content_len} bytes"
540
+ )
454
541
  self.frame_cache.put(frame_id, content_b64)
542
+ self.logger.debug(f"Successfully queued frame {frame_id} for caching")
455
543
  except Exception as e:
456
544
  # Do not block pipeline on cache errors
457
- self.logger.debug(f"Frame cache put failed for frame_id={frame_id}: {e}")
545
+ self.logger.error(
546
+ f"Frame cache put failed: frame_id={frame_id}, camera={self.camera_id}, "
547
+ f"worker={self.worker_id}, error={e}",
548
+ exc_info=True
549
+ )
458
550
 
@@ -1,7 +1,8 @@
1
1
  import logging
2
2
  import queue
3
3
  import threading
4
- from typing import Optional
4
+ import time
5
+ from typing import Optional, Dict, Any
5
6
 
6
7
  try:
7
8
  import redis # type: ignore
@@ -47,6 +48,17 @@ class RedisFrameCache:
47
48
  self.queue: queue.Queue = queue.Queue(maxsize=max_queue)
48
49
  self.threads: list = []
49
50
  self._client: Optional[redis.Redis] = None
51
+
52
+ # Metrics for monitoring and debugging
53
+ self._metrics = {
54
+ "frames_queued": 0,
55
+ "frames_cached": 0,
56
+ "frames_failed": 0,
57
+ "frames_dropped": 0,
58
+ "last_cache_time": None,
59
+ "last_frame_id": None,
60
+ }
61
+ self._metrics_lock = threading.Lock()
50
62
 
51
63
  if not self._is_redis_available():
52
64
  return
@@ -92,11 +104,21 @@ class RedisFrameCache:
92
104
 
93
105
  def start(self) -> None:
94
106
  """Start the frame cache with worker threads."""
95
- if not self._client or self.running:
107
+ if not self._client:
108
+ self.logger.warning("Cannot start frame cache: Redis client not initialized")
109
+ return
110
+
111
+ if self.running:
112
+ self.logger.warning("Frame cache already running")
96
113
  return
97
114
 
98
115
  self.running = True
99
116
  self._start_worker_threads()
117
+
118
+ self.logger.info(
119
+ f"Started RedisFrameCache: prefix={self.prefix}, ttl={self.ttl_seconds}s, "
120
+ f"workers={self._worker_threads}, queue_size={self.queue.maxsize}"
121
+ )
100
122
 
101
123
  def _start_worker_threads(self) -> None:
102
124
  """Start worker threads for processing cache operations."""
@@ -138,18 +160,36 @@ class RedisFrameCache:
138
160
  """Enqueue a cache write for the given frame.
139
161
 
140
162
  Args:
141
- frame_id: unique identifier for the frame
163
+ frame_id: unique identifier for the frame (must be unique across all apps)
142
164
  base64_content: base64-encoded image string
143
165
  """
144
166
  if not self._is_cache_ready():
167
+ self.logger.debug(
168
+ f"Cache not ready for frame_id={frame_id}, skipping "
169
+ f"(running={self.running}, client={self._client is not None})"
170
+ )
145
171
  return
146
172
 
147
173
  if not self._validate_input(frame_id, base64_content):
148
174
  return
149
175
 
150
176
  try:
177
+ # Build Redis key with prefix to avoid collisions
151
178
  key = f"{self.prefix}{frame_id}"
152
- self.queue.put_nowait((key, base64_content))
179
+ content_len = len(base64_content)
180
+
181
+ self.queue.put_nowait((key, base64_content, frame_id))
182
+
183
+ # Update metrics
184
+ with self._metrics_lock:
185
+ self._metrics["frames_queued"] += 1
186
+ self._metrics["last_frame_id"] = frame_id
187
+
188
+ self.logger.debug(
189
+ f"Queued frame for caching: frame_id={frame_id}, "
190
+ f"redis_key={key}, content_size={content_len}, "
191
+ f"queue_size={self.queue.qsize()}"
192
+ )
153
193
  except queue.Full:
154
194
  self._handle_queue_full(frame_id)
155
195
 
@@ -159,17 +199,28 @@ class RedisFrameCache:
159
199
 
160
200
  def _validate_input(self, frame_id: str, base64_content: str) -> bool:
161
201
  """Validate input parameters."""
162
- if not frame_id:
163
- self.logger.warning("Empty frame_id provided")
202
+ if not frame_id or not isinstance(frame_id, str) or not frame_id.strip():
203
+ self.logger.warning(
204
+ f"Invalid frame_id: {frame_id!r} (type: {type(frame_id).__name__})"
205
+ )
164
206
  return False
165
- if not base64_content:
166
- self.logger.warning("Empty base64_content provided")
207
+ if not base64_content or not isinstance(base64_content, str):
208
+ self.logger.warning(
209
+ f"Invalid base64_content for frame_id={frame_id}: "
210
+ f"type={type(base64_content).__name__}, "
211
+ f"len={len(base64_content) if base64_content else 0}"
212
+ )
167
213
  return False
168
214
  return True
169
215
 
170
216
  def _handle_queue_full(self, frame_id: str) -> None:
171
217
  """Handle queue full condition."""
172
- self.logger.debug(f"Frame cache queue full; dropping frame_id={frame_id}")
218
+ with self._metrics_lock:
219
+ self._metrics["frames_dropped"] += 1
220
+ self.logger.warning(
221
+ f"Frame cache queue full (size={self.queue.maxsize}); "
222
+ f"dropping frame_id={frame_id}. Consider increasing max_queue or worker_threads."
223
+ )
173
224
 
174
225
  def _worker(self) -> None:
175
226
  """Worker thread for processing cache operations."""
@@ -195,22 +246,72 @@ class RedisFrameCache:
195
246
 
196
247
  def _process_cache_item(self, item: tuple) -> None:
197
248
  """Process a single cache item."""
249
+ frame_id = "unknown"
198
250
  try:
199
- key, base64_content = item
200
- self._store_frame_data(key, base64_content)
251
+ key, base64_content, frame_id = item
252
+ self._store_frame_data(key, base64_content, frame_id)
253
+ except ValueError as e:
254
+ # Handle old tuple format without frame_id for backwards compatibility
255
+ try:
256
+ key, base64_content = item
257
+ frame_id = key.replace(self.prefix, "") if key.startswith(self.prefix) else key
258
+ self._store_frame_data(key, base64_content, frame_id)
259
+ except Exception as inner_e:
260
+ self.logger.error(f"Failed to unpack cache item: {inner_e}")
261
+ with self._metrics_lock:
262
+ self._metrics["frames_failed"] += 1
201
263
  except Exception as e:
202
- self.logger.debug(f"Failed to process cache item: {e}")
264
+ self.logger.error(f"Failed to process cache item for frame_id={frame_id}: {e}")
265
+ with self._metrics_lock:
266
+ self._metrics["frames_failed"] += 1
203
267
  finally:
204
268
  self._mark_task_done()
205
269
 
206
- def _store_frame_data(self, key: str, base64_content: str) -> None:
207
- """Store frame data in Redis with TTL."""
270
+ def _store_frame_data(self, key: str, base64_content: str, frame_id: str) -> None:
271
+ """Store frame data in Redis with TTL.
272
+
273
+ Uses Redis HSET + EXPIRE for atomic TTL management.
274
+ Multiple apps can safely write to different frame_ids without conflicts.
275
+ """
276
+ start_time = time.time()
208
277
  try:
278
+ content_len = len(base64_content)
279
+ self.logger.debug(
280
+ f"Writing to Redis: frame_id={frame_id}, key={key}, "
281
+ f"content_size={content_len}, ttl={self.ttl_seconds}s"
282
+ )
283
+
209
284
  # Store base64 string in Redis hash field 'frame', then set TTL
210
285
  self._client.hset(key, "frame", base64_content)
211
286
  self._client.expire(key, self.ttl_seconds)
287
+
288
+ elapsed = time.time() - start_time
289
+
290
+ # Update metrics
291
+ with self._metrics_lock:
292
+ self._metrics["frames_cached"] += 1
293
+ self._metrics["last_cache_time"] = time.time()
294
+ self._metrics["last_frame_id"] = frame_id
295
+
296
+ self.logger.info(
297
+ f"Successfully cached frame: frame_id={frame_id}, key={key}, "
298
+ f"content_size={content_len}, ttl={self.ttl_seconds}s, "
299
+ f"elapsed={elapsed:.3f}s"
300
+ )
301
+ except redis.RedisError as e:
302
+ with self._metrics_lock:
303
+ self._metrics["frames_failed"] += 1
304
+ self.logger.error(
305
+ f"Redis error caching frame: frame_id={frame_id}, key={key}, "
306
+ f"error={e.__class__.__name__}: {e}"
307
+ )
212
308
  except Exception as e:
213
- self.logger.debug(f"Failed to cache frame {key}: {e}")
309
+ with self._metrics_lock:
310
+ self._metrics["frames_failed"] += 1
311
+ self.logger.error(
312
+ f"Unexpected error caching frame: frame_id={frame_id}, key={key}, "
313
+ f"error={e}", exc_info=True
314
+ )
214
315
 
215
316
  def _mark_task_done(self) -> None:
216
317
  """Mark queue task as done."""
@@ -218,5 +319,32 @@ class RedisFrameCache:
218
319
  self.queue.task_done()
219
320
  except Exception:
220
321
  pass
322
+
323
+ def get_metrics(self) -> Dict[str, Any]:
324
+ """Get cache performance metrics for monitoring and debugging.
325
+
326
+ Returns:
327
+ Dictionary containing cache metrics including:
328
+ - frames_queued: Total frames queued for caching
329
+ - frames_cached: Total frames successfully cached
330
+ - frames_failed: Total frames that failed to cache
331
+ - frames_dropped: Total frames dropped due to queue full
332
+ - queue_size: Current queue size
333
+ - last_cache_time: Timestamp of last successful cache
334
+ - last_frame_id: Last frame_id cached
335
+ """
336
+ with self._metrics_lock:
337
+ metrics = dict(self._metrics)
338
+
339
+ metrics.update({
340
+ "running": self.running,
341
+ "queue_size": self.queue.qsize(),
342
+ "queue_maxsize": self.queue.maxsize,
343
+ "worker_threads": self._worker_threads,
344
+ "prefix": self.prefix,
345
+ "ttl_seconds": self.ttl_seconds,
346
+ })
347
+
348
+ return metrics
221
349
 
222
350
 
@@ -107,8 +107,8 @@ class InferenceWorker:
107
107
  def _create_postprocessing_task(
108
108
  self, task_data: Dict[str, Any], result: Dict[str, Any], processing_time: float
109
109
  ) -> Dict[str, Any]:
110
- """Create post-processing task from inference result."""
111
- return {
110
+ """Create post-processing task from inference result, preserving frame_id."""
111
+ postproc_task = {
112
112
  "original_message": task_data["message"],
113
113
  "model_result": result["model_result"],
114
114
  "metadata": result["metadata"],
@@ -117,6 +117,15 @@ class InferenceWorker:
117
117
  "stream_key": task_data["stream_key"],
118
118
  "camera_config": task_data["camera_config"]
119
119
  }
120
+
121
+ # Preserve frame_id from task_data (critical for cache retrieval)
122
+ if "frame_id" in task_data:
123
+ postproc_task["frame_id"] = task_data["frame_id"]
124
+ self.logger.debug(f"Preserved frame_id in postproc task: {task_data['frame_id']}")
125
+ else:
126
+ self.logger.warning("No frame_id in task_data to preserve")
127
+
128
+ return postproc_task
120
129
 
121
130
  def _run_inference(self, task_data: Dict[str, Any]) -> Dict[str, Any]:
122
131
  """Run inference in thread pool with proper error handling and cleanup."""
@@ -126,6 +126,13 @@ class PostProcessingWorker:
126
126
  "processing_time_sec": task_data["processing_time"],
127
127
  "metadata": task_data.get("metadata", {})
128
128
  }
129
+
130
+ # Verify frame_id is present in output
131
+ if not frame_id:
132
+ self.logger.warning(
133
+ f"Output task missing frame_id for camera={task_data['original_message'].camera_id}, "
134
+ f"message_key={task_data['original_message'].message_key}"
135
+ )
129
136
 
130
137
  return {
131
138
  "camera_id": task_data["original_message"].camera_id,
@@ -353,8 +353,8 @@ class StreamingPipeline:
353
353
  del self.consumer_workers[camera_id]
354
354
 
355
355
  def get_metrics(self) -> Dict[str, Any]:
356
- """Get pipeline metrics."""
357
- return {
356
+ """Get pipeline metrics including frame cache statistics."""
357
+ metrics = {
358
358
  "running": self.running,
359
359
  "camera_count": len(self.camera_configs),
360
360
  "enabled_cameras": sum(1 for config in self.camera_configs.values() if config.enabled),
@@ -383,6 +383,18 @@ class StreamingPipeline:
383
383
  for camera_id, config in self.camera_configs.items()
384
384
  }
385
385
  }
386
+
387
+ # Add frame cache metrics if available
388
+ if self.frame_cache:
389
+ try:
390
+ metrics["frame_cache"] = self.frame_cache.get_metrics()
391
+ except Exception as e:
392
+ self.logger.warning(f"Failed to get frame cache metrics: {e}")
393
+ metrics["frame_cache"] = {"error": str(e)}
394
+ else:
395
+ metrics["frame_cache"] = {"enabled": False}
396
+
397
+ return metrics
386
398
 
387
399
  def _initialize_frame_cache(self) -> None:
388
400
  """Initialize RedisFrameCache with TTL 10 minutes, deriving connection from Redis cameras if available."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: matrice_inference
3
- Version: 0.1.22
3
+ Version: 0.1.23
4
4
  Summary: Common server utilities for Matrice.ai services
5
5
  Author-email: "Matrice.ai" <dipendra@matrice.ai>
6
6
  License-Expression: MIT
@@ -3,7 +3,7 @@ matrice_inference/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  matrice_inference/server/__init__.py,sha256=dzLoM3Ma5DjZj2QP6hTXt9dlPQR6QHzNsKkqFahTnZ8,799
4
4
  matrice_inference/server/inference_interface.py,sha256=FOoR_5EPqJItgiDyWPQeLFYayAK15dYQAvcbArs2PVc,7842
5
5
  matrice_inference/server/proxy_interface.py,sha256=lZ1Yq77JmahHLSqksmQs-Bl0WQ3QmTiWVgRZeQw6oBo,14911
6
- matrice_inference/server/server.py,sha256=es6dJua2hyz7bwDNmObMiHERXqQCnAh8i7rzK5FkWeg,41806
6
+ matrice_inference/server/server.py,sha256=qAF3TF0wGCBkXcyCAcqQQrbNyWyfRG-e85SYSH_WKzQ,43754
7
7
  matrice_inference/server/model/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
8
8
  matrice_inference/server/model/model_manager.py,sha256=RbJi9EKCTiW49mCqd7gfEOY1a1sXQXclN7kpmiZVA2E,11725
9
9
  matrice_inference/server/model/model_manager_wrapper.py,sha256=b0stRwLuoXnwC2AP2HRULa3Azgcm__PIQv7mwRQDcSo,23119
@@ -11,12 +11,13 @@ matrice_inference/server/model/triton_model_manager.py,sha256=rxDj7gFCBDHPbk-dtj
11
11
  matrice_inference/server/model/triton_server.py,sha256=KtauLqgRchfRFtRYnzpjJCbRaBHdDNWLaHwSBv5DvVY,55560
12
12
  matrice_inference/server/stream/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  matrice_inference/server/stream/app_deployment.py,sha256=b6-VDmI9xYry-6_law68Q5r-d8Fo7EpY9oW6r-260Wo,10631
14
- matrice_inference/server/stream/consumer_worker.py,sha256=5YzK05K8X8_HG01HHWRsIqSSCdCoFweo7SgVm73QCqs,20842
15
- matrice_inference/server/stream/frame_cache.py,sha256=YWkpMytiKRLebh6jmLMzfaQOQawpHBMsTltQyMxkdqo,7183
16
- matrice_inference/server/stream/inference_worker.py,sha256=jIQ6FTNXJSp95gpofpc-gbYoIthfJMCVl7Tcv1i9ta0,10161
17
- matrice_inference/server/stream/post_processing_worker.py,sha256=99ieOnXCbj3m_Sx7C8WpXf3ca1wEKkU3rKyeqKhmMCk,12407
14
+ matrice_inference/server/stream/camera_config_monitor.py,sha256=GFdSgOJ6hg5IszKqkQ3VJ2wPrPciAvIUQj7_L7xh9TU,8813
15
+ matrice_inference/server/stream/consumer_worker.py,sha256=YPHFdSIBHnBuQK0zgwUuxYalwyFGb7N0gv-Dvi0jG_0,24895
16
+ matrice_inference/server/stream/frame_cache.py,sha256=R3AdXNZsJTE3OSEhIRJdy7Q-AXWZRhX9d5hbTzud92o,12563
17
+ matrice_inference/server/stream/inference_worker.py,sha256=udFbXhAH_fJeqxzZh2MsG85UE62qC0qQCnftiqKtuSk,10591
18
+ matrice_inference/server/stream/post_processing_worker.py,sha256=Tt-ymq2dHpY_v-dzG4zKQAAhHUGhE7OhWuQ1BR_7K3k,12713
18
19
  matrice_inference/server/stream/producer_worker.py,sha256=wjS5c3zNhdA5TCFVINnAkJmaVCxHpx_y6GoszEdeDaY,8220
19
- matrice_inference/server/stream/stream_pipeline.py,sha256=KopK-_irjtgOvC_VY-cvExsWeCSaTEoJVk05d5y8EZk,16968
20
+ matrice_inference/server/stream/stream_pipeline.py,sha256=5eiUyKEgFuSYrLa0o3HuYg4XOvTeWmTGmJjStEloq9o,17453
20
21
  matrice_inference/server/stream/utils.py,sha256=z2qKhnyk_6MpQfPvOWHkRltnB2oVH6MW_9jwecxqAlI,507
21
22
  matrice_inference/tmp/abstract_model_manager.py,sha256=_UUWAa0pjld5SfOWNGG-9WuMaOiT5PlrRWPoAzeYg-M,1733
22
23
  matrice_inference/tmp/batch_manager.py,sha256=r6TU_UFZoDGYAfEhBT3WGXf8NmpT-h3UvRyFJmL9Uew,9639
@@ -30,8 +31,8 @@ matrice_inference/tmp/aggregator/latency.py,sha256=ObRb_2gZZTqoT2ezHd2G9I4tcz3wp
30
31
  matrice_inference/tmp/aggregator/pipeline.py,sha256=qleXlCvPbqidOd12Hy0MvVzr1LqIMIazK1xc_YpjpBc,44825
31
32
  matrice_inference/tmp/aggregator/publisher.py,sha256=MrFXqM8w_cXVMLYEGZoAX4huLjKrZsjC_yXKuxIGh-M,19305
32
33
  matrice_inference/tmp/aggregator/synchronizer.py,sha256=HiGvWIsABxWQZ1-jT-9uBexmDCgN7GJnWkvKGfuVoUA,26899
33
- matrice_inference-0.1.22.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
34
- matrice_inference-0.1.22.dist-info/METADATA,sha256=Brjkeqi-SID-zNj0QGQgZiAc02Gj-G2W5qluTUCFST4,1031
35
- matrice_inference-0.1.22.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
36
- matrice_inference-0.1.22.dist-info/top_level.txt,sha256=h7i-nE-ZEKLmslZGWAdfa78m5oY8Bes04TTL8TtBvjM,18
37
- matrice_inference-0.1.22.dist-info/RECORD,,
34
+ matrice_inference-0.1.23.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
35
+ matrice_inference-0.1.23.dist-info/METADATA,sha256=BDk8QRdcRZEG-nO_emElH8W4yzloNIkJUoHaP05TOLw,1031
36
+ matrice_inference-0.1.23.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
37
+ matrice_inference-0.1.23.dist-info/top_level.txt,sha256=h7i-nE-ZEKLmslZGWAdfa78m5oY8Bes04TTL8TtBvjM,18
38
+ matrice_inference-0.1.23.dist-info/RECORD,,