matrice-inference 0.1.2__py3-none-any.whl → 0.1.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of matrice-inference might be problematic. Click here for more details.

@@ -1,50 +1,92 @@
1
1
  import logging
2
- import threading
3
2
  import queue
4
-
3
+ import threading
4
+ import time
5
+ from typing import Optional, Dict, Any
5
6
 
6
7
  try:
7
8
  import redis # type: ignore
8
- except Exception: # pragma: no cover
9
+ except ImportError: # pragma: no cover
9
10
  redis = None # type: ignore
10
11
 
11
12
 
12
13
  class RedisFrameCache:
13
- """Non-blocking Redis cache for frames keyed by frame_id.
14
+ """Non-blocking Redis cache for frames with optimized resource management.
14
15
 
15
16
  Stores base64 string content under key 'stream:frames:{frame_id}' with field 'frame'.
16
17
  Each insert sets or refreshes the TTL.
17
18
  """
18
19
 
20
+ DEFAULT_TTL_SECONDS = 300
21
+ DEFAULT_MAX_QUEUE = 10000
22
+ DEFAULT_WORKER_THREADS = 2
23
+ DEFAULT_CONNECT_TIMEOUT = 2.0
24
+ DEFAULT_SOCKET_TIMEOUT = 0.5
25
+ DEFAULT_HEALTH_CHECK_INTERVAL = 30
26
+ DEFAULT_PREFIX = "stream:frames:"
27
+
19
28
  def __init__(
20
29
  self,
21
30
  host: str = "localhost",
22
31
  port: int = 6379,
23
32
  db: int = 0,
24
- password: str = None,
25
- username: str = None,
26
- ttl_seconds: int = 300,
27
- prefix: str = "stream:frames:",
28
- max_queue: int = 10000,
29
- worker_threads: int = 2,
30
- connect_timeout: float = 2.0,
31
- socket_timeout: float = 0.5,
33
+ password: Optional[str] = None,
34
+ username: Optional[str] = None,
35
+ ttl_seconds: int = DEFAULT_TTL_SECONDS,
36
+ prefix: str = DEFAULT_PREFIX,
37
+ max_queue: int = DEFAULT_MAX_QUEUE,
38
+ worker_threads: int = DEFAULT_WORKER_THREADS,
39
+ connect_timeout: float = DEFAULT_CONNECT_TIMEOUT,
40
+ socket_timeout: float = DEFAULT_SOCKET_TIMEOUT,
32
41
  ) -> None:
33
- self.logger = logging.getLogger(__name__ + ".frame_cache")
34
- self.ttl_seconds = int(ttl_seconds)
42
+ self.logger = logging.getLogger(f"{__name__}.frame_cache")
43
+ self.ttl_seconds = max(1, int(ttl_seconds))
35
44
  self.prefix = prefix
36
- self.queue: "queue.Queue" = queue.Queue(maxsize=max_queue)
37
- self.threads = []
38
45
  self.running = False
39
- self._client = None
40
46
  self._worker_threads = max(1, int(worker_threads))
41
47
 
48
+ self.queue: queue.Queue = queue.Queue(maxsize=max_queue)
49
+ self.threads: list = []
50
+ self._client: Optional[redis.Redis] = None
51
+
52
+ # Metrics for monitoring and debugging
53
+ self._metrics = {
54
+ "frames_queued": 0,
55
+ "frames_cached": 0,
56
+ "frames_failed": 0,
57
+ "frames_dropped": 0,
58
+ "last_cache_time": None,
59
+ "last_frame_id": None,
60
+ }
61
+ self._metrics_lock = threading.Lock()
62
+
63
+ if not self._is_redis_available():
64
+ return
65
+
66
+ self._client = self._create_redis_client(
67
+ host, port, db, password, username, connect_timeout, socket_timeout
68
+ )
69
+
70
+ def _is_redis_available(self) -> bool:
71
+ """Check if Redis package is available."""
42
72
  if redis is None:
43
73
  self.logger.warning("redis package not installed; frame caching disabled")
44
- return
74
+ return False
75
+ return True
45
76
 
77
+ def _create_redis_client(
78
+ self,
79
+ host: str,
80
+ port: int,
81
+ db: int,
82
+ password: Optional[str],
83
+ username: Optional[str],
84
+ connect_timeout: float,
85
+ socket_timeout: float
86
+ ) -> Optional[redis.Redis]:
87
+ """Create Redis client with proper error handling."""
46
88
  try:
47
- self._client = redis.Redis(
89
+ return redis.Redis(
48
90
  host=host,
49
91
  port=port,
50
92
  db=db,
@@ -52,76 +94,257 @@ class RedisFrameCache:
52
94
  username=username,
53
95
  socket_connect_timeout=connect_timeout,
54
96
  socket_timeout=socket_timeout,
55
- health_check_interval=30,
97
+ health_check_interval=self.DEFAULT_HEALTH_CHECK_INTERVAL,
56
98
  retry_on_timeout=True,
57
- decode_responses=True, # store strings directly
99
+ decode_responses=True,
58
100
  )
59
101
  except Exception as e:
60
- self.logger.warning("Failed to init Redis client: %s", e)
61
- self._client = None
102
+ self.logger.warning(f"Failed to initialize Redis client: {e}")
103
+ return None
62
104
 
63
105
  def start(self) -> None:
64
- if not self._client or self.running:
106
+ """Start the frame cache with worker threads."""
107
+ if not self._client:
108
+ self.logger.warning("Cannot start frame cache: Redis client not initialized")
65
109
  return
110
+
111
+ if self.running:
112
+ self.logger.warning("Frame cache already running")
113
+ return
114
+
66
115
  self.running = True
116
+ self._start_worker_threads()
117
+
118
+ self.logger.info(
119
+ f"Started RedisFrameCache: prefix={self.prefix}, ttl={self.ttl_seconds}s, "
120
+ f"workers={self._worker_threads}, queue_size={self.queue.maxsize}"
121
+ )
122
+
123
+ def _start_worker_threads(self) -> None:
124
+ """Start worker threads for processing cache operations."""
67
125
  for i in range(self._worker_threads):
68
- t = threading.Thread(target=self._worker, name=f"FrameCache-{i}", daemon=True)
69
- t.start()
70
- self.threads.append(t)
126
+ thread = threading.Thread(
127
+ target=self._worker,
128
+ name=f"FrameCache-{i}",
129
+ daemon=True
130
+ )
131
+ thread.start()
132
+ self.threads.append(thread)
71
133
 
72
134
  def stop(self) -> None:
135
+ """Stop the frame cache and cleanup resources."""
73
136
  if not self.running:
74
137
  return
138
+
75
139
  self.running = False
140
+ self._stop_worker_threads()
141
+ self.threads.clear()
142
+
143
+ def _stop_worker_threads(self) -> None:
144
+ """Stop all worker threads gracefully."""
145
+ # Signal threads to stop
76
146
  for _ in self.threads:
77
147
  try:
78
148
  self.queue.put_nowait(None)
79
- except Exception:
149
+ except queue.Full:
80
150
  pass
81
- for t in self.threads:
151
+
152
+ # Wait for threads to finish
153
+ for thread in self.threads:
82
154
  try:
83
- t.join(timeout=2.0)
84
- except Exception:
85
- pass
86
- self.threads.clear()
155
+ thread.join(timeout=2.0)
156
+ except Exception as e:
157
+ self.logger.warning(f"Error joining thread {thread.name}: {e}")
87
158
 
88
159
  def put(self, frame_id: str, base64_content: str) -> None:
89
160
  """Enqueue a cache write for the given frame.
90
161
 
91
- - frame_id: unique identifier
92
- - base64_content: base64-encoded image string
162
+ Args:
163
+ frame_id: unique identifier for the frame (must be unique across all apps)
164
+ base64_content: base64-encoded image string
93
165
  """
94
- if not self._client or not self.running:
166
+ if not self._is_cache_ready():
167
+ self.logger.debug(
168
+ f"Cache not ready for frame_id={frame_id}, skipping "
169
+ f"(running={self.running}, client={self._client is not None})"
170
+ )
95
171
  return
96
- if not frame_id or not base64_content:
172
+
173
+ if not self._validate_input(frame_id, base64_content):
97
174
  return
175
+
98
176
  try:
177
+ # Build Redis key with prefix to avoid collisions
99
178
  key = f"{self.prefix}{frame_id}"
100
- self.queue.put_nowait((key, base64_content))
179
+ content_len = len(base64_content)
180
+
181
+ self.queue.put_nowait((key, base64_content, frame_id))
182
+
183
+ # Update metrics
184
+ with self._metrics_lock:
185
+ self._metrics["frames_queued"] += 1
186
+ self._metrics["last_frame_id"] = frame_id
187
+
188
+ self.logger.debug(
189
+ f"Queued frame for caching: frame_id={frame_id}, "
190
+ f"redis_key={key}, content_size={content_len}, "
191
+ f"queue_size={self.queue.qsize()}"
192
+ )
101
193
  except queue.Full:
102
- # Drop silently; never block pipeline
103
- self.logger.debug("Frame cache queue full; dropping frame_id=%s", frame_id)
194
+ self._handle_queue_full(frame_id)
195
+
196
+ def _is_cache_ready(self) -> bool:
197
+ """Check if cache is ready for operations."""
198
+ return bool(self._client and self.running)
199
+
200
+ def _validate_input(self, frame_id: str, base64_content: str) -> bool:
201
+ """Validate input parameters."""
202
+ if not frame_id or not isinstance(frame_id, str) or not frame_id.strip():
203
+ self.logger.warning(
204
+ f"Invalid frame_id: {frame_id!r} (type: {type(frame_id).__name__})"
205
+ )
206
+ return False
207
+ if not base64_content or not isinstance(base64_content, str):
208
+ self.logger.warning(
209
+ f"Invalid base64_content for frame_id={frame_id}: "
210
+ f"type={type(base64_content).__name__}, "
211
+ f"len={len(base64_content) if base64_content else 0}"
212
+ )
213
+ return False
214
+ return True
215
+
216
+ def _handle_queue_full(self, frame_id: str) -> None:
217
+ """Handle queue full condition."""
218
+ with self._metrics_lock:
219
+ self._metrics["frames_dropped"] += 1
220
+ self.logger.warning(
221
+ f"Frame cache queue full (size={self.queue.maxsize}); "
222
+ f"dropping frame_id={frame_id}. Consider increasing max_queue or worker_threads."
223
+ )
104
224
 
105
225
  def _worker(self) -> None:
226
+ """Worker thread for processing cache operations."""
106
227
  while self.running:
107
- try:
108
- item = self.queue.get(timeout=0.5)
109
- except queue.Empty:
110
- continue
228
+ item = self._get_work_item()
111
229
  if item is None:
230
+ continue
231
+ if self._is_stop_signal(item):
112
232
  break
113
- key, base64_content = item
233
+
234
+ self._process_cache_item(item)
235
+
236
+ def _get_work_item(self) -> Optional[tuple]:
237
+ """Get work item from queue with timeout."""
238
+ try:
239
+ return self.queue.get(timeout=0.5)
240
+ except queue.Empty:
241
+ return None
242
+
243
+ def _is_stop_signal(self, item: tuple) -> bool:
244
+ """Check if item is a stop signal."""
245
+ return item is None
246
+
247
+ def _process_cache_item(self, item: tuple) -> None:
248
+ """Process a single cache item."""
249
+ frame_id = "unknown"
250
+ try:
251
+ key, base64_content, frame_id = item
252
+ self._store_frame_data(key, base64_content, frame_id)
253
+ except ValueError as e:
254
+ # Handle old tuple format without frame_id for backwards compatibility
114
255
  try:
115
- # Store base64 string in a Redis hash field 'frame', then set TTL
116
- # Mimics the Go backend behavior
117
- self._client.hset(key, "frame", base64_content)
118
- self._client.expire(key, self.ttl_seconds)
119
- except Exception as e:
120
- self.logger.debug("Failed to cache frame %s: %s", key, e)
121
- finally:
122
- try:
123
- self.queue.task_done()
124
- except Exception:
125
- pass
256
+ key, base64_content = item
257
+ frame_id = key.replace(self.prefix, "") if key.startswith(self.prefix) else key
258
+ self._store_frame_data(key, base64_content, frame_id)
259
+ except Exception as inner_e:
260
+ self.logger.error(f"Failed to unpack cache item: {inner_e}")
261
+ with self._metrics_lock:
262
+ self._metrics["frames_failed"] += 1
263
+ except Exception as e:
264
+ self.logger.error(f"Failed to process cache item for frame_id={frame_id}: {e}")
265
+ with self._metrics_lock:
266
+ self._metrics["frames_failed"] += 1
267
+ finally:
268
+ self._mark_task_done()
269
+
270
+ def _store_frame_data(self, key: str, base64_content: str, frame_id: str) -> None:
271
+ """Store frame data in Redis with TTL.
272
+
273
+ Uses Redis HSET + EXPIRE for atomic TTL management.
274
+ Multiple apps can safely write to different frame_ids without conflicts.
275
+ """
276
+ start_time = time.time()
277
+ try:
278
+ content_len = len(base64_content)
279
+ self.logger.debug(
280
+ f"Writing to Redis: frame_id={frame_id}, key={key}, "
281
+ f"content_size={content_len}, ttl={self.ttl_seconds}s"
282
+ )
283
+
284
+ # Store base64 string in Redis hash field 'frame', then set TTL
285
+ self._client.hset(key, "frame", base64_content)
286
+ self._client.expire(key, self.ttl_seconds)
287
+
288
+ elapsed = time.time() - start_time
289
+
290
+ # Update metrics
291
+ with self._metrics_lock:
292
+ self._metrics["frames_cached"] += 1
293
+ self._metrics["last_cache_time"] = time.time()
294
+ self._metrics["last_frame_id"] = frame_id
295
+
296
+ self.logger.info(
297
+ f"Successfully cached frame: frame_id={frame_id}, key={key}, "
298
+ f"content_size={content_len}, ttl={self.ttl_seconds}s, "
299
+ f"elapsed={elapsed:.3f}s"
300
+ )
301
+ except redis.RedisError as e:
302
+ with self._metrics_lock:
303
+ self._metrics["frames_failed"] += 1
304
+ self.logger.error(
305
+ f"Redis error caching frame: frame_id={frame_id}, key={key}, "
306
+ f"error={e.__class__.__name__}: {e}"
307
+ )
308
+ except Exception as e:
309
+ with self._metrics_lock:
310
+ self._metrics["frames_failed"] += 1
311
+ self.logger.error(
312
+ f"Unexpected error caching frame: frame_id={frame_id}, key={key}, "
313
+ f"error={e}", exc_info=True
314
+ )
315
+
316
+ def _mark_task_done(self) -> None:
317
+ """Mark queue task as done."""
318
+ try:
319
+ self.queue.task_done()
320
+ except Exception:
321
+ pass
322
+
323
+ def get_metrics(self) -> Dict[str, Any]:
324
+ """Get cache performance metrics for monitoring and debugging.
325
+
326
+ Returns:
327
+ Dictionary containing cache metrics including:
328
+ - frames_queued: Total frames queued for caching
329
+ - frames_cached: Total frames successfully cached
330
+ - frames_failed: Total frames that failed to cache
331
+ - frames_dropped: Total frames dropped due to queue full
332
+ - queue_size: Current queue size
333
+ - last_cache_time: Timestamp of last successful cache
334
+ - last_frame_id: Last frame_id cached
335
+ """
336
+ with self._metrics_lock:
337
+ metrics = dict(self._metrics)
338
+
339
+ metrics.update({
340
+ "running": self.running,
341
+ "queue_size": self.queue.qsize(),
342
+ "queue_maxsize": self.queue.maxsize,
343
+ "worker_threads": self._worker_threads,
344
+ "prefix": self.prefix,
345
+ "ttl_seconds": self.ttl_seconds,
346
+ })
347
+
348
+ return metrics
126
349
 
127
350