matrice-inference 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of matrice-inference might be problematic. Click here for more details.

Files changed (37) hide show
  1. matrice_inference/__init__.py +72 -0
  2. matrice_inference/py.typed +0 -0
  3. matrice_inference/server/__init__.py +23 -0
  4. matrice_inference/server/inference_interface.py +176 -0
  5. matrice_inference/server/model/__init__.py +1 -0
  6. matrice_inference/server/model/model_manager.py +274 -0
  7. matrice_inference/server/model/model_manager_wrapper.py +550 -0
  8. matrice_inference/server/model/triton_model_manager.py +290 -0
  9. matrice_inference/server/model/triton_server.py +1248 -0
  10. matrice_inference/server/proxy_interface.py +371 -0
  11. matrice_inference/server/server.py +1004 -0
  12. matrice_inference/server/stream/__init__.py +0 -0
  13. matrice_inference/server/stream/app_deployment.py +228 -0
  14. matrice_inference/server/stream/consumer_worker.py +201 -0
  15. matrice_inference/server/stream/frame_cache.py +127 -0
  16. matrice_inference/server/stream/inference_worker.py +163 -0
  17. matrice_inference/server/stream/post_processing_worker.py +230 -0
  18. matrice_inference/server/stream/producer_worker.py +147 -0
  19. matrice_inference/server/stream/stream_pipeline.py +451 -0
  20. matrice_inference/server/stream/utils.py +23 -0
  21. matrice_inference/tmp/abstract_model_manager.py +58 -0
  22. matrice_inference/tmp/aggregator/__init__.py +18 -0
  23. matrice_inference/tmp/aggregator/aggregator.py +330 -0
  24. matrice_inference/tmp/aggregator/analytics.py +906 -0
  25. matrice_inference/tmp/aggregator/ingestor.py +438 -0
  26. matrice_inference/tmp/aggregator/latency.py +597 -0
  27. matrice_inference/tmp/aggregator/pipeline.py +968 -0
  28. matrice_inference/tmp/aggregator/publisher.py +431 -0
  29. matrice_inference/tmp/aggregator/synchronizer.py +594 -0
  30. matrice_inference/tmp/batch_manager.py +239 -0
  31. matrice_inference/tmp/overall_inference_testing.py +338 -0
  32. matrice_inference/tmp/triton_utils.py +638 -0
  33. matrice_inference-0.1.2.dist-info/METADATA +28 -0
  34. matrice_inference-0.1.2.dist-info/RECORD +37 -0
  35. matrice_inference-0.1.2.dist-info/WHEEL +5 -0
  36. matrice_inference-0.1.2.dist-info/licenses/LICENSE.txt +21 -0
  37. matrice_inference-0.1.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,438 @@
1
+ import logging
2
+ import time
3
+ import threading
4
+ from typing import Dict, Optional, List, Tuple
5
+ from queue import Empty, PriorityQueue, Full
6
+ from matrice_common.session import Session
7
+ from matrice_common.stream.kafka_stream import MatriceKafkaDeployment
8
+ import itertools
9
+
10
+
11
+
12
+ class ResultsIngestor:
13
+ """
14
+ Streams and manages results from multiple deployments.
15
+ Handles result collection, queuing, and distribution with enhanced structure consistency.
16
+ """
17
+
18
+ def __init__(
19
+ self, deployment_ids: List[str], session: Session, consumer_timeout: float = 300, action_id: str = ""
20
+ ):
21
+ """
22
+ Initialize the results streamer.
23
+
24
+ Args:
25
+ deployment_ids: List of deployment IDs
26
+ session: Session object for authentication
27
+ consumer_timeout: Timeout for consuming results from deployments
28
+ """
29
+ self.deployment_ids = deployment_ids
30
+ self.session = session
31
+ self.deployments_stream_utils: Dict[str, MatriceKafkaDeployment] = {}
32
+ for deployment_id in self.deployment_ids:
33
+ self.deployments_stream_utils[deployment_id] = MatriceKafkaDeployment(
34
+ self.session,
35
+ deployment_id,
36
+ type="client",
37
+ consumer_group_id=f"aggregator-{deployment_id}",
38
+ consumer_group_instance_id=f"aggregator-{deployment_id}-{action_id}",
39
+ )
40
+
41
+ self.consumer_timeout = consumer_timeout
42
+
43
+ # Result queues for each deployment (now using PriorityQueue)
44
+ self.results_queues: Dict[str, PriorityQueue] = {}
45
+
46
+ # Streaming threads
47
+ self.results_streaming_threads: Dict[str, threading.Thread] = {}
48
+
49
+ # Control flags
50
+ self._stop_streaming = threading.Event()
51
+ self._is_streaming = False
52
+ self._lock = threading.Lock() # Main state lock
53
+ self._stats_lock = threading.Lock() # Separate lock for better performance
54
+
55
+ # Counter for ordering within (deployment_id, stream_key, stream_group_key)
56
+ self._counters: Dict[Tuple[str, str, str], itertools.count] = {}
57
+ # Global per-deployment sequence for PriorityQueue tie-breaking across streams
58
+ self._queue_seq_counters: Dict[str, itertools.count] = {}
59
+
60
+ # Track last seen input_order for reset detection
61
+ self._last_input_order: Dict[Tuple[str, str], int] = {}
62
+
63
+ # Track session/epoch for each (deployment_id, stream_key) to handle resets
64
+ self._session_counters: Dict[Tuple[str, str], int] = {}
65
+
66
+ # Statistics
67
+ self.stats = {
68
+ "results_consumed": 0,
69
+ "results_processed": 0,
70
+ "errors": 0,
71
+ "last_error": None,
72
+ "last_error_time": None,
73
+ "queue_sizes": {},
74
+ "start_time": None,
75
+ "consumer_timeout": consumer_timeout,
76
+ }
77
+
78
+ # Initialize queues
79
+ self._initialize_queues()
80
+
81
+ def _initialize_queues(self):
82
+ """Initialize result priority queues for each deployment."""
83
+ for deployment_id in self.deployment_ids:
84
+ self.results_queues[deployment_id] = PriorityQueue()
85
+ self.stats["queue_sizes"][deployment_id] = 0
86
+ # Initialize per-deployment sequence counter
87
+ self._queue_seq_counters[deployment_id] = itertools.count()
88
+
89
+ def start_streaming(self) -> bool:
90
+ """
91
+ Start streaming results from all deployments.
92
+
93
+ Returns:
94
+ bool: True if streaming started successfully, False otherwise
95
+ """
96
+ with self._lock:
97
+ if self._is_streaming:
98
+ logging.warning("Results streaming is already running")
99
+ return True
100
+
101
+ self._stop_streaming.clear()
102
+ self._is_streaming = True
103
+ self.stats["start_time"] = time.time()
104
+
105
+ try:
106
+ # Start result streaming threads for each deployment
107
+ for deployment_id in self.deployment_ids:
108
+ thread = threading.Thread(
109
+ target=self._stream_results_to_queue,
110
+ args=(deployment_id, self.results_queues[deployment_id]),
111
+ name=f"ResultStreamer-{deployment_id}",
112
+ daemon=True,
113
+ )
114
+ thread.start()
115
+ self.results_streaming_threads[deployment_id] = thread
116
+ logging.info(
117
+ f"Started result streaming for deployment: {deployment_id}"
118
+ )
119
+
120
+ logging.info(
121
+ f"Started results streaming for {len(self.deployment_ids)} deployments"
122
+ )
123
+ return True
124
+
125
+ except Exception as exc:
126
+ logging.error(f"Failed to start results streaming: {exc}")
127
+ self.stop_streaming()
128
+ return False
129
+
130
+ def stop_streaming(self):
131
+ """Stop all streaming operations."""
132
+ with self._lock:
133
+ if not self._is_streaming:
134
+ logging.info("Results streaming is not running")
135
+ return
136
+
137
+ self._is_streaming = False
138
+ self._stop_streaming.set()
139
+
140
+ logging.info("Stopping results streaming...")
141
+
142
+ # Stop deployment streaming
143
+ for deployment_id in self.deployment_ids:
144
+ try:
145
+ for stream_utils in self.deployments_stream_utils.values():
146
+ stream_utils.sync_kafka.close()
147
+ except Exception as exc:
148
+ logging.error(
149
+ f"Error stopping streaming for deployment {deployment_id}: {exc}"
150
+ )
151
+
152
+ # Wait for streaming threads to complete
153
+ for deployment_id, thread in self.results_streaming_threads.items():
154
+ try:
155
+ if thread.is_alive():
156
+ thread.join(timeout=5.0)
157
+ if thread.is_alive():
158
+ logging.warning(
159
+ f"Result streaming thread for {deployment_id} did not stop gracefully"
160
+ )
161
+ except Exception as exc:
162
+ logging.error(
163
+ f"Error joining thread for deployment {deployment_id}: {exc}"
164
+ )
165
+
166
+ # Clear threads
167
+ self.results_streaming_threads.clear()
168
+
169
+ logging.info("Results streaming stopped")
170
+
171
+ def _get_priority_counter(self, deployment_id: str, stream_key: str, stream_group_key: str) -> int:
172
+ """
173
+ Get a monotonically increasing counter per (deployment_id, stream_key, stream_group_key) for ordering.
174
+
175
+ Returns:
176
+ int: Priority counter for ordering within the same stream
177
+ """
178
+ key = (deployment_id, stream_key, stream_group_key)
179
+
180
+ # Initialize counters if needed
181
+ if key not in self._counters:
182
+ self._counters[key] = itertools.count()
183
+
184
+ return next(self._counters[key])
185
+
186
+ def _get_queue_sequence(self, deployment_id: str) -> int:
187
+ """
188
+ Get a global per-deployment sequence number for tie-breaking across different streams
189
+ placed in the same PriorityQueue. This prevents Python from trying to compare dicts
190
+ when primary priorities are equal.
191
+ """
192
+ if deployment_id not in self._queue_seq_counters:
193
+ self._queue_seq_counters[deployment_id] = itertools.count()
194
+ return next(self._queue_seq_counters[deployment_id])
195
+
196
+ def _stream_results_to_queue(
197
+ self, deployment_id: str, results_queue: PriorityQueue
198
+ ):
199
+ """
200
+ Stream results from a deployment to its result queue.
201
+
202
+ Args:
203
+ deployment_id: ID of the deployment
204
+ results_queue: Priority queue to store results ordered by input_order
205
+ """
206
+ logging.info(f"Starting result streaming for deployment: {deployment_id}")
207
+
208
+ while not self._stop_streaming.is_set():
209
+ try:
210
+ # Consume result with timeout
211
+ result = self.deployments_stream_utils[deployment_id].consume_message(
212
+ self.consumer_timeout
213
+ )
214
+
215
+ if result is not None:
216
+ # Handle the structured response format from stream_worker.py
217
+ result_value = result.get("value", {})
218
+ input_streams = result_value.get("input_streams", [])
219
+ # Handle both input_stream if key in dict and input_data if key is not in dict
220
+ input_data = input_streams[0] if input_streams else {}
221
+ input_stream = input_data.get("input_stream", input_data)
222
+ # input_order = input_stream.get("input_order")
223
+ camera_info = input_stream.get("camera_info") or {}
224
+ stream_key = camera_info.get("camera_name")
225
+ stream_group_key = camera_info.get("camera_group") or "default_group"
226
+
227
+ if not stream_key:
228
+ logging.warning(
229
+ f"Missing stream_key for deployment {deployment_id}, skipping result. "
230
+ f"Stream key: {stream_key}, Stream group: {stream_group_key}"
231
+ )
232
+ continue
233
+
234
+ order = self._get_priority_counter(deployment_id, stream_key, stream_group_key)
235
+ seq = self._get_queue_sequence(deployment_id)
236
+ # Create enhanced result object with the structured response
237
+ enhanced_result = {
238
+ "deployment_id": deployment_id,
239
+ "stream_key": stream_key,
240
+ "stream_group_key": stream_group_key,
241
+ "input_order": order,
242
+ "timestamp": time.time(),
243
+ "result": result_value, # TODO: check if should send this or just agg_summary
244
+ }
245
+
246
+ try:
247
+ # Include a sequence tie-breaker to avoid comparing dicts when priorities are equal
248
+ results_queue.put((order, seq, enhanced_result), block=False)
249
+
250
+ with self._stats_lock:
251
+ self.stats["results_consumed"] += 1
252
+ self.stats["queue_sizes"][deployment_id] = results_queue.qsize()
253
+
254
+ except Full:
255
+ # Queue is full - reduce logging frequency for performance
256
+ with self._stats_lock:
257
+ self.stats["errors"] += 1
258
+ self.stats["last_error"] = "Queue full"
259
+ self.stats["last_error_time"] = time.time()
260
+ # Only log every 10th queue full error
261
+ if self.stats["errors"] % 10 == 1:
262
+ logging.warning(f"Result queue full for deployment {deployment_id}, dropping result (#{self.stats['errors']})")
263
+ except Exception as exc:
264
+ # Other enqueue errors
265
+ with self._stats_lock:
266
+ self.stats["errors"] += 1
267
+ self.stats["last_error"] = str(exc)
268
+ self.stats["last_error_time"] = time.time()
269
+ # Only log every 10th enqueue error
270
+ if self.stats["errors"] % 10 == 1:
271
+ logging.error(f"Failed to enqueue result for deployment {deployment_id}: {exc} (#{self.stats['errors']})")
272
+
273
+ except Exception as exc:
274
+ if not self._stop_streaming.is_set():
275
+ with self._stats_lock:
276
+ self.stats["errors"] += 1
277
+ self.stats["last_error"] = str(exc)
278
+ self.stats["last_error_time"] = time.time()
279
+ # Reduce logging frequency for performance
280
+ if self.stats["errors"] % 10 == 1:
281
+ logging.error(f"Error streaming results for deployment {deployment_id}: {exc} (#{self.stats['errors']})", exc_info=True)
282
+
283
+ # Add small delay to prevent tight error loops
284
+ time.sleep(0.1)
285
+
286
+ logging.info(f"Stopped result streaming for deployment: {deployment_id}")
287
+
288
+ def get_results(self, deployment_id: str, timeout: float = 1.0) -> Optional[Dict]:
289
+ """
290
+ Get a result from a specific deployment's priority queue.
291
+
292
+ Args:
293
+ deployment_id: ID of the deployment
294
+ timeout: Timeout for getting the result
295
+
296
+ Returns:
297
+ Dict: Result dictionary or None if timeout/no result
298
+ """
299
+ if deployment_id not in self.results_queues:
300
+ logging.error(f"No queue found for deployment: {deployment_id}")
301
+ return None
302
+
303
+ try:
304
+ priority_result = self.results_queues[deployment_id].get(timeout=timeout)
305
+ self.results_queues[deployment_id].task_done()
306
+
307
+ with self._stats_lock:
308
+ self.stats["queue_sizes"][deployment_id] = self.results_queues[deployment_id].qsize()
309
+
310
+ # Handle both 2-tuple (legacy) and 3-tuple (with seq) queue items
311
+ if isinstance(priority_result, tuple):
312
+ if len(priority_result) >= 3:
313
+ return priority_result[2]
314
+ elif len(priority_result) == 2:
315
+ return priority_result[1]
316
+ return priority_result
317
+ except Empty:
318
+ return None
319
+ except Exception as exc:
320
+ logging.error(
321
+ f"Error getting result from deployment {deployment_id}: {exc}"
322
+ )
323
+ return None
324
+
325
+ def get_all_results(self, timeout: float = 1.0) -> List[Dict]:
326
+ """
327
+ Get results from all deployment queues.
328
+
329
+ Args:
330
+ timeout: Timeout for getting results
331
+
332
+ Returns:
333
+ List[Dict]: List of result dictionaries
334
+ """
335
+ results = []
336
+
337
+ for deployment_id in self.results_queues.keys():
338
+ result = self.get_results(deployment_id, timeout=timeout)
339
+ if result:
340
+ results.append(result)
341
+
342
+ return results
343
+
344
+ def get_stats(self) -> Dict:
345
+ """Get current statistics."""
346
+ with self._stats_lock:
347
+ stats = self.stats.copy()
348
+
349
+ # Add runtime statistics
350
+ if stats["start_time"]:
351
+ stats["runtime_seconds"] = time.time() - stats["start_time"]
352
+
353
+ # Add queue statistics
354
+ total_queue_size = sum(stats["queue_sizes"].values())
355
+ stats["total_queue_size"] = total_queue_size
356
+
357
+ return stats
358
+
359
+ def get_health_status(self) -> Dict:
360
+ """Get health status of the results streamer."""
361
+ health = {
362
+ "status": "healthy",
363
+ "is_streaming": self._is_streaming,
364
+ "deployments": len(self.deployment_ids),
365
+ "active_threads": len(
366
+ [t for t in self.results_streaming_threads.values() if t.is_alive()]
367
+ ),
368
+ "queue_sizes": {},
369
+ "errors": self.stats["errors"],
370
+ }
371
+
372
+ # Check queue sizes
373
+ total_queue_size = 0
374
+ for deployment_id, queue in self.results_queues.items():
375
+ queue_size = queue.qsize()
376
+ health["queue_sizes"][deployment_id] = queue_size
377
+ total_queue_size += queue_size
378
+
379
+ # Mark as degraded if queue is getting full
380
+ if queue_size > 1000:
381
+ health["status"] = "degraded"
382
+ health["reason"] = f"Queue for {deployment_id} nearly full ({queue_size})"
383
+ logging.warning(f"Ingestor degraded: {deployment_id} queue has {queue_size} items")
384
+
385
+ # Check for recent errors (within last 60 seconds)
386
+ if (
387
+ self.stats["last_error_time"]
388
+ and (time.time() - self.stats["last_error_time"]) < 60
389
+ ):
390
+ health["status"] = "degraded"
391
+ health["reason"] = f"Recent error: {self.stats['last_error']}"
392
+ logging.warning(f"Ingestor degraded due to recent error: {self.stats['last_error']}")
393
+
394
+ # Check if threads are running when they should be
395
+ if self._is_streaming:
396
+ dead_threads = []
397
+ for deployment_id, thread in self.results_streaming_threads.items():
398
+ if not thread.is_alive():
399
+ dead_threads.append(deployment_id)
400
+
401
+ if dead_threads:
402
+ health["status"] = "degraded"
403
+ health["reason"] = f"Dead threads for deployments: {', '.join(dead_threads)}"
404
+ logging.warning(f"Ingestor degraded: dead threads for {len(dead_threads)} deployments: {', '.join(dead_threads)}")
405
+
406
+ # Check if not streaming when it should be
407
+ if not self._is_streaming:
408
+ health["status"] = "unhealthy"
409
+ health["reason"] = "Ingestor is not streaming"
410
+ logging.error("Ingestor is not streaming")
411
+
412
+ return health
413
+
414
+ def cleanup(self):
415
+ """Clean up all resources."""
416
+ logging.info("Cleaning up results streamer...")
417
+
418
+ # Stop streaming if running
419
+ if self._is_streaming:
420
+ self.stop_streaming()
421
+
422
+ # Clear all queues
423
+ for deployment_id, queue in self.results_queues.items():
424
+ try:
425
+ while not queue.empty():
426
+ queue.get_nowait()
427
+ queue.task_done()
428
+ except Exception:
429
+ pass
430
+
431
+ self.results_queues.clear()
432
+
433
+ # Clear tracking data
434
+ self._counters.clear()
435
+ self._last_input_order.clear()
436
+ self._session_counters.clear()
437
+
438
+ logging.info("Results streamer cleanup completed")