matrice-inference 0.1.33__tar.gz → 0.1.58__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/PKG-INFO +1 -1
  2. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/matrice_inference.egg-info/PKG-INFO +1 -1
  3. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/matrice_inference.egg-info/SOURCES.txt +10 -3
  4. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/__init__.py +11 -1
  5. matrice_inference-0.1.58/src/matrice_inference/server/inference_interface.py +459 -0
  6. matrice_inference-0.1.58/src/matrice_inference/server/model/model_manager.py +530 -0
  7. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/model/model_manager_wrapper.py +31 -10
  8. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/proxy_interface.py +4 -1
  9. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/server.py +49 -18
  10. matrice_inference-0.1.58/src/matrice_inference/server/stream/DATA_FLOW_DIAGRAM.md +360 -0
  11. matrice_inference-0.1.58/src/matrice_inference/server/stream/STREAMING_PIPELINE_ARCHITECTURE.md +1162 -0
  12. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/stream/analytics_publisher.py +51 -24
  13. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/stream/app_deployment.py +389 -14
  14. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/stream/camera_config_monitor.py +41 -21
  15. matrice_inference-0.1.58/src/matrice_inference/server/stream/consumer_manager.py +474 -0
  16. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/stream/frame_cache.py +59 -28
  17. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/stream/inference_metric_logger.py +176 -82
  18. matrice_inference-0.1.58/src/matrice_inference/server/stream/inference_worker.py +561 -0
  19. matrice_inference-0.1.58/src/matrice_inference/server/stream/post_processing_manager.py +287 -0
  20. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/stream/producer_worker.py +124 -16
  21. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/stream/stream_pipeline.py +476 -134
  22. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/stream/worker_metrics.py +20 -28
  23. matrice_inference-0.1.58/tests/test_frame_cache_optimizations.py +275 -0
  24. matrice_inference-0.1.58/tests/test_integration_real_components.py +311 -0
  25. matrice_inference-0.1.58/tests/test_msgpack_simple.py +107 -0
  26. matrice_inference-0.1.58/tests/test_msgpack_unpacking.py +137 -0
  27. matrice_inference-0.1.58/tests/test_streaming_pipeline_e2e.py +516 -0
  28. matrice_inference-0.1.33/src/matrice_inference/server/inference_interface.py +0 -176
  29. matrice_inference-0.1.33/src/matrice_inference/server/model/model_manager.py +0 -274
  30. matrice_inference-0.1.33/src/matrice_inference/server/stream/consumer_worker.py +0 -574
  31. matrice_inference-0.1.33/src/matrice_inference/server/stream/inference_worker.py +0 -285
  32. matrice_inference-0.1.33/src/matrice_inference/server/stream/post_processing_worker.py +0 -429
  33. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/LICENSE.txt +0 -0
  34. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/README.md +0 -0
  35. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/matrice_inference.egg-info/dependency_links.txt +0 -0
  36. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/matrice_inference.egg-info/not-zip-safe +0 -0
  37. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/matrice_inference.egg-info/top_level.txt +0 -0
  38. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/pyproject.toml +0 -0
  39. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/setup.cfg +0 -0
  40. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/setup.py +0 -0
  41. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/__init__.py +0 -0
  42. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/py.typed +0 -0
  43. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/model/__init__.py +0 -0
  44. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/model/triton_model_manager.py +0 -0
  45. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/model/triton_server.py +0 -0
  46. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/stream/__init__.py +0 -0
  47. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/stream/app_event_listener.py +0 -0
  48. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/stream/deployment_refresh_listener.py +0 -0
  49. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/stream/metric_publisher.py +0 -0
  50. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/server/stream/utils.py +0 -0
  51. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/tmp/abstract_model_manager.py +0 -0
  52. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/tmp/aggregator/__init__.py +0 -0
  53. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/tmp/aggregator/aggregator.py +0 -0
  54. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/tmp/aggregator/analytics.py +0 -0
  55. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/tmp/aggregator/ingestor.py +0 -0
  56. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/tmp/aggregator/latency.py +0 -0
  57. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/tmp/aggregator/pipeline.py +0 -0
  58. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/tmp/aggregator/publisher.py +0 -0
  59. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/tmp/aggregator/synchronizer.py +0 -0
  60. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/tmp/batch_manager.py +0 -0
  61. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/tmp/overall_inference_testing.py +0 -0
  62. {matrice_inference-0.1.33 → matrice_inference-0.1.58}/src/matrice_inference/tmp/triton_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: matrice_inference
3
- Version: 0.1.33
3
+ Version: 0.1.58
4
4
  Summary: Common server utilities for Matrice.ai services
5
5
  Author-email: "Matrice.ai" <dipendra@matrice.ai>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: matrice_inference
3
- Version: 0.1.33
3
+ Version: 0.1.58
4
4
  Summary: Common server utilities for Matrice.ai services
5
5
  Author-email: "Matrice.ai" <dipendra@matrice.ai>
6
6
  License-Expression: MIT
@@ -18,18 +18,20 @@ src/matrice_inference/server/model/model_manager.py
18
18
  src/matrice_inference/server/model/model_manager_wrapper.py
19
19
  src/matrice_inference/server/model/triton_model_manager.py
20
20
  src/matrice_inference/server/model/triton_server.py
21
+ src/matrice_inference/server/stream/DATA_FLOW_DIAGRAM.md
22
+ src/matrice_inference/server/stream/STREAMING_PIPELINE_ARCHITECTURE.md
21
23
  src/matrice_inference/server/stream/__init__.py
22
24
  src/matrice_inference/server/stream/analytics_publisher.py
23
25
  src/matrice_inference/server/stream/app_deployment.py
24
26
  src/matrice_inference/server/stream/app_event_listener.py
25
27
  src/matrice_inference/server/stream/camera_config_monitor.py
26
- src/matrice_inference/server/stream/consumer_worker.py
28
+ src/matrice_inference/server/stream/consumer_manager.py
27
29
  src/matrice_inference/server/stream/deployment_refresh_listener.py
28
30
  src/matrice_inference/server/stream/frame_cache.py
29
31
  src/matrice_inference/server/stream/inference_metric_logger.py
30
32
  src/matrice_inference/server/stream/inference_worker.py
31
33
  src/matrice_inference/server/stream/metric_publisher.py
32
- src/matrice_inference/server/stream/post_processing_worker.py
34
+ src/matrice_inference/server/stream/post_processing_manager.py
33
35
  src/matrice_inference/server/stream/producer_worker.py
34
36
  src/matrice_inference/server/stream/stream_pipeline.py
35
37
  src/matrice_inference/server/stream/utils.py
@@ -45,4 +47,9 @@ src/matrice_inference/tmp/aggregator/ingestor.py
45
47
  src/matrice_inference/tmp/aggregator/latency.py
46
48
  src/matrice_inference/tmp/aggregator/pipeline.py
47
49
  src/matrice_inference/tmp/aggregator/publisher.py
48
- src/matrice_inference/tmp/aggregator/synchronizer.py
50
+ src/matrice_inference/tmp/aggregator/synchronizer.py
51
+ tests/test_frame_cache_optimizations.py
52
+ tests/test_integration_real_components.py
53
+ tests/test_msgpack_simple.py
54
+ tests/test_msgpack_unpacking.py
55
+ tests/test_streaming_pipeline_e2e.py
@@ -1,12 +1,22 @@
1
1
  import os
2
2
  import logging
3
+ from logging.handlers import RotatingFileHandler
3
4
 
4
5
  # Define paths
5
6
  log_path = os.path.join(os.getcwd(), "deploy_server.log")
6
7
 
7
8
  # Create handlers explicitly
8
9
  console_handler = logging.StreamHandler()
9
- file_handler = logging.FileHandler(log_path)
10
+
11
+ # Use RotatingFileHandler with 0.5 GB max size and 3 backup files
12
+ # When the log reaches 0.5 GB, it's rotated to deploy_server.log.1, deploy_server.log.2, etc.
13
+ # Oldest logs are automatically deleted when backup count is exceeded
14
+ file_handler = RotatingFileHandler(
15
+ log_path,
16
+ maxBytes=500 * 1024 * 1024, # 0.5 GB = 500 MB
17
+ backupCount=3, # Keep 3 backup files (total ~2 GB max: 0.5GB current + 3x0.5GB backups)
18
+ encoding='utf-8'
19
+ )
10
20
 
11
21
  # Set levels
12
22
  console_handler.setLevel(logging.INFO)
@@ -0,0 +1,459 @@
1
+ """
2
+ InferenceInterface: Thread-safe inference with unified event loop management.
3
+
4
+ THREAD SAFETY & CONCURRENT REQUEST HANDLING:
5
+ ============================================
6
+
7
+ This module solves the greenlet thread context switching problem that occurs when:
8
+ 1. Streaming frames are being processed continuously through the StreamingPipeline
9
+ 2. Direct API calls (e.g., identity images for face recognition) arrive simultaneously
10
+
11
+ The Problem:
12
+ -----------
13
+ - Streaming frames use the StreamingPipeline's dedicated event loop
14
+ - Direct API calls may try to use a different event loop or thread context
15
+ - Models loaded with gevent/greenlet cannot switch between different thread contexts
16
+ - This causes: "Cannot switch to a different thread" errors
17
+
18
+ The Solution:
19
+ ------------
20
+ 1. StreamingPipeline creates a single dedicated event loop at startup
21
+ 2. All model instances are loaded in this event loop
22
+ 3. InferenceInterface stores a reference to this pipeline event loop
23
+ 4. ALL inference requests (streaming + direct API) use asyncio.run_coroutine_threadsafe()
24
+ to execute in the pipeline's event loop, regardless of which thread they originate from
25
+ 5. High-priority requests (identity images) get longer timeouts and always complete
26
+
27
+ Benefits:
28
+ --------
29
+ - No greenlet thread context errors
30
+ - Identity images work during streaming
31
+ - Natural frame skipping: If identity processing takes time, streaming frames may be
32
+ dropped from the queue, which is acceptable for continuous video streams
33
+ - Simple, robust, maintainable solution
34
+
35
+ Usage:
36
+ -----
37
+ 1. StreamingPipeline calls: inference_interface.set_pipeline_event_loop(event_loop)
38
+ 2. All inference calls automatically use this event loop via run_coroutine_threadsafe()
39
+ 3. Direct API calls set is_high_priority=True for guaranteed execution
40
+ """
41
+
42
+ from matrice_inference.server.model.model_manager_wrapper import ModelManagerWrapper
43
+ from typing import Dict, Any, Optional, Tuple, Union
44
+ from datetime import datetime, timezone
45
+ import logging
46
+ import time
47
+ import asyncio
48
+ from matrice_analytics.post_processing.post_processor import PostProcessor
49
+
50
+ class InferenceInterface:
51
+ """Interface for proxying requests to model servers with optional post-processing."""
52
+
53
+ def __init__(
54
+ self,
55
+ model_manager_wrapper: ModelManagerWrapper,
56
+ post_processor: Optional[PostProcessor] = None,
57
+ ):
58
+ """
59
+ Initialize the inference interface.
60
+
61
+ Args:
62
+ model_manager: Model manager for model inference
63
+ post_processor: Post processor for post-processing
64
+ """
65
+ self.logger = logging.getLogger(__name__)
66
+ self.model_manager_wrapper = model_manager_wrapper
67
+ self.post_processor = post_processor
68
+ self.latest_inference_time = datetime.now(timezone.utc)
69
+ self.pipeline_event_loop: Optional[asyncio.AbstractEventLoop] = None
70
+
71
+ # Track concurrent inference requests for monitoring
72
+ self._active_inference_count = 0
73
+ self._inference_count_lock = asyncio.Lock() if asyncio else None
74
+
75
+ def get_latest_inference_time(self) -> datetime:
76
+ """Get the latest inference time."""
77
+ return self.latest_inference_time
78
+
79
+ def set_pipeline_event_loop(self, event_loop: asyncio.AbstractEventLoop) -> None:
80
+ """Set the pipeline event loop for thread-safe async operations.
81
+
82
+ Args:
83
+ event_loop: Event loop from StreamingPipeline
84
+ """
85
+ self.pipeline_event_loop = event_loop
86
+ self.logger.info("Pipeline event loop registered for thread-safe inference")
87
+
88
+ def has_async_predict(self) -> bool:
89
+ """Check if async_predict is available in the underlying model manager.
90
+
91
+ Returns:
92
+ bool: True if async_predict is available, False otherwise
93
+ """
94
+ try:
95
+ # Check if model_manager_wrapper has model_manager attribute
96
+ if not hasattr(self.model_manager_wrapper, 'model_manager'):
97
+ return False
98
+
99
+ model_manager = self.model_manager_wrapper.model_manager
100
+
101
+ # Check if model_manager has async_predict and it's not None
102
+ if hasattr(model_manager, 'async_predict') and model_manager.async_predict is not None:
103
+ return True
104
+
105
+ return False
106
+ except Exception as e:
107
+ self.logger.warning(f"Error checking async_predict availability: {e}")
108
+ return False
109
+
110
+ async def inference(
111
+ self,
112
+ input: Any,
113
+ extra_params: Optional[Dict[str, Any]] = None,
114
+ apply_post_processing: bool = False,
115
+ post_processing_config: Optional[Union[Dict[str, Any], str]] = None,
116
+ stream_key: Optional[str] = None,
117
+ stream_info: Optional[Dict[str, Any]] = None,
118
+ camera_info: Optional[Dict[str, Any]] = None,
119
+ pipeline_event_loop: Optional[asyncio.AbstractEventLoop] = None,
120
+ is_high_priority: bool = False,
121
+ ) -> Tuple[Any, Optional[Dict[str, Any]]]:
122
+ """Perform inference using the appropriate client with optional post-processing.
123
+
124
+ Args:
125
+ input: Primary input data (e.g., image bytes, numpy array)
126
+ extra_params: Additional parameters for inference (optional)
127
+ apply_post_processing: Whether to apply post-processing
128
+ post_processing_config: Configuration for post-processing
129
+ stream_key: Unique identifier for the input stream
130
+ stream_info: Additional metadata about the stream (optional)
131
+ camera_info: Additional metadata about the camera/source (optional)
132
+ pipeline_event_loop: Event loop from StreamingPipeline (if available)
133
+ is_high_priority: If True, this is a high-priority request (e.g., identity image)
134
+
135
+ Returns:
136
+ A tuple containing:
137
+ - The inference results (raw or post-processed)
138
+ - Metadata about the inference and post-processing (if applicable)
139
+
140
+ Note:
141
+ High-priority requests (like identity images for face recognition) will
142
+ always execute successfully. During their execution, streaming frames may
143
+ be naturally skipped if the inference queue fills up, which is acceptable
144
+ for continuous streaming scenarios.
145
+ """
146
+ if input is None:
147
+ raise ValueError("Input cannot be None")
148
+
149
+ # Log high-priority requests for monitoring
150
+ if is_high_priority:
151
+ self.logger.info(f"Processing high-priority inference request (stream_key={stream_key})")
152
+
153
+ # Measure model inference time
154
+ model_start_time = time.time()
155
+
156
+ # Update latest inference time
157
+ self.latest_inference_time = datetime.now(timezone.utc)
158
+
159
+ # Run model inference with proper thread-safety
160
+ try:
161
+ # Use provided event loop or fall back to stored pipeline event loop
162
+ event_loop_to_use = pipeline_event_loop or self.pipeline_event_loop
163
+
164
+ # If event loop is available and has async_predict, use thread-safe async inference
165
+ if event_loop_to_use and self.has_async_predict():
166
+ # Run async inference in pipeline's event loop from any thread
167
+ # This ensures identity images and streaming frames use the same event loop
168
+ # This prevents greenlet/gevent thread context switching errors
169
+ self.logger.debug(
170
+ f"Using thread-safe async inference via pipeline event loop "
171
+ f"(priority={'high' if is_high_priority else 'normal'})"
172
+ )
173
+ future = asyncio.run_coroutine_threadsafe(
174
+ self.model_manager_wrapper.async_inference(
175
+ input=input,
176
+ extra_params=extra_params,
177
+ stream_key=stream_key,
178
+ stream_info=stream_info
179
+ ),
180
+ event_loop_to_use
181
+ )
182
+ # High-priority requests get longer timeout
183
+ timeout = 120.0 if is_high_priority else 60.0
184
+ raw_results, success = future.result(timeout=timeout)
185
+ else:
186
+ # Fall back to sync inference (no async support or no event loop)
187
+ self.logger.debug("Using synchronous inference (no async support or event loop)")
188
+ raw_results, success = self.model_manager_wrapper.inference(
189
+ input=input,
190
+ extra_params=extra_params,
191
+ stream_key=stream_key,
192
+ stream_info=stream_info
193
+ )
194
+
195
+ model_inference_time = time.time() - model_start_time
196
+
197
+ if not success:
198
+ raise RuntimeError("Model inference failed")
199
+
200
+ self.logger.debug(
201
+ f"Model inference executed stream_key={stream_key} "
202
+ f"time={model_inference_time:.4f}s priority={'high' if is_high_priority else 'normal'}"
203
+ )
204
+
205
+ except Exception as exc:
206
+ # Add context about greenlet errors
207
+ error_msg = str(exc)
208
+ if "greenlet" in error_msg.lower() or "cannot switch" in error_msg.lower():
209
+ self.logger.error(
210
+ f"Greenlet thread context error detected. This typically means the model "
211
+ f"is being accessed from multiple threads without proper event loop coordination. "
212
+ f"Error: {error_msg}",
213
+ exc_info=True
214
+ )
215
+ else:
216
+ self.logger.error(f"Model inference failed: {error_msg}", exc_info=True)
217
+ raise RuntimeError(f"Model inference failed: {error_msg}") from exc
218
+
219
+ # If no post-processing requested, return raw results
220
+ if not apply_post_processing or not self.post_processor:
221
+ return raw_results, {
222
+ "timing_metadata": {
223
+ "model_inference_time_sec": model_inference_time,
224
+ "post_processing_time_sec": 0.0,
225
+ "total_time_sec": model_inference_time,
226
+ }
227
+ }
228
+
229
+ # Apply post-processing using PostProcessor
230
+ try:
231
+ post_processing_start_time = time.time()
232
+
233
+ # Use PostProcessor.process() method directly
234
+ result = await self.post_processor.process(
235
+ data=raw_results,
236
+ config=post_processing_config, # Use stream_key as fallback if no config
237
+ input_bytes=input if isinstance(input, bytes) else None,
238
+ stream_key=stream_key,
239
+ stream_info=stream_info
240
+ )
241
+
242
+ post_processing_time = time.time() - post_processing_start_time
243
+
244
+ # Format the response based on PostProcessor result
245
+ if result.is_success():
246
+ # For face recognition use case, return empty raw results
247
+ processed_raw_results = [] if (
248
+ hasattr(result, 'usecase') and result.usecase == 'face_recognition'
249
+ ) else raw_results
250
+
251
+ # Extract agg_summary from result data if available
252
+ agg_summary = {}
253
+ if hasattr(result, 'data') and isinstance(result.data, dict):
254
+ agg_summary = result.data.get("agg_summary", {})
255
+
256
+ post_processing_result = {
257
+ "status": "success",
258
+ "processing_time": result.processing_time,
259
+ "usecase": getattr(result, 'usecase', ''),
260
+ "category": getattr(result, 'category', ''),
261
+ "summary": getattr(result, 'summary', ''),
262
+ "insights": getattr(result, 'insights', []),
263
+ "metrics": getattr(result, 'metrics', {}),
264
+ "predictions": getattr(result, 'predictions', []),
265
+ "agg_summary": agg_summary,
266
+ "stream_key": stream_key or "default_stream",
267
+ "timing_metadata": {
268
+ "model_inference_time_sec": model_inference_time,
269
+ "post_processing_time_sec": post_processing_time,
270
+ "total_time_sec": model_inference_time + post_processing_time,
271
+ }
272
+ }
273
+
274
+ return processed_raw_results, post_processing_result
275
+ else:
276
+ # Post-processing failed
277
+ self.logger.error(f"Post-processing failed: {result.error_message}")
278
+ return raw_results, {
279
+ "status": "post_processing_failed",
280
+ "error": result.error_message,
281
+ "error_type": getattr(result, 'error_type', 'ProcessingError'),
282
+ "processing_time": result.processing_time,
283
+ "processed_data": raw_results,
284
+ "stream_key": stream_key or "default_stream",
285
+ "timing_metadata": {
286
+ "model_inference_time_sec": model_inference_time,
287
+ "post_processing_time_sec": post_processing_time,
288
+ "total_time_sec": model_inference_time + post_processing_time,
289
+ }
290
+ }
291
+
292
+ except Exception as e:
293
+ post_processing_time = time.time() - post_processing_start_time
294
+ self.logger.error(f"Post-processing exception: {str(e)}", exc_info=True)
295
+
296
+ return raw_results, {
297
+ "status": "post_processing_failed",
298
+ "error": str(e),
299
+ "error_type": type(e).__name__,
300
+ "processed_data": raw_results,
301
+ "stream_key": stream_key or "default_stream",
302
+ "timing_metadata": {
303
+ "model_inference_time_sec": model_inference_time,
304
+ "post_processing_time_sec": post_processing_time,
305
+ "total_time_sec": model_inference_time + post_processing_time,
306
+ }
307
+ }
308
+
309
+ async def async_inference(
310
+ self,
311
+ input: Any,
312
+ extra_params: Optional[Dict[str, Any]] = None,
313
+ apply_post_processing: bool = False,
314
+ post_processing_config: Optional[Union[Dict[str, Any], str]] = None,
315
+ stream_key: Optional[str] = None,
316
+ stream_info: Optional[Dict[str, Any]] = None,
317
+ camera_info: Optional[Dict[str, Any]] = None,
318
+ pipeline_event_loop: Optional[asyncio.AbstractEventLoop] = None,
319
+ ) -> Tuple[Any, Optional[Dict[str, Any]]]:
320
+ """Perform asynchronous inference using async_predict when available.
321
+
322
+ This method MUST be called within the pipeline's event loop.
323
+ For calls from other threads, use the regular inference() method which
324
+ handles thread-safety automatically.
325
+
326
+ Args:
327
+ input: Primary input data (e.g., image bytes, numpy array)
328
+ extra_params: Additional parameters for inference (optional)
329
+ apply_post_processing: Whether to apply post-processing
330
+ post_processing_config: Configuration for post-processing
331
+ stream_key: Unique identifier for the input stream
332
+ stream_info: Additional metadata about the stream (optional)
333
+ camera_info: Additional metadata about the camera/source (optional)
334
+ pipeline_event_loop: Event loop from StreamingPipeline (optional, for validation)
335
+
336
+ Returns:
337
+ A tuple containing:
338
+ - The inference results (raw or post-processed)
339
+ - Metadata about the inference and post-processing (if applicable)
340
+ """
341
+ if input is None:
342
+ raise ValueError("Input cannot be None")
343
+
344
+ # Measure model inference time
345
+ model_start_time = time.time()
346
+
347
+ # Update latest inference time
348
+ self.latest_inference_time = datetime.now(timezone.utc)
349
+
350
+ # Run asynchronous model inference
351
+ try:
352
+ raw_results, success = await self.model_manager_wrapper.async_inference(
353
+ input=input,
354
+ extra_params=extra_params,
355
+ stream_key=stream_key,
356
+ stream_info=stream_info
357
+ )
358
+ model_inference_time = time.time() - model_start_time
359
+
360
+ if not success:
361
+ raise RuntimeError("Model inference failed")
362
+
363
+ self.logger.debug(
364
+ f"Async model inference executed stream_key={stream_key} time={model_inference_time:.4f}s"
365
+ )
366
+
367
+ except Exception as exc:
368
+ self.logger.error(f"Async model inference failed: {str(exc)}", exc_info=True)
369
+ raise RuntimeError(f"Async model inference failed: {str(exc)}") from exc
370
+
371
+ # If no post-processing requested, return raw results
372
+ if not apply_post_processing or not self.post_processor:
373
+ return raw_results, {
374
+ "timing_metadata": {
375
+ "model_inference_time_sec": model_inference_time,
376
+ "post_processing_time_sec": 0.0,
377
+ "total_time_sec": model_inference_time,
378
+ }
379
+ }
380
+
381
+ # Apply post-processing using PostProcessor
382
+ try:
383
+ post_processing_start_time = time.time()
384
+
385
+ # Use PostProcessor.process() method directly (async)
386
+ result = await self.post_processor.process(
387
+ data=raw_results,
388
+ config=post_processing_config,
389
+ input_bytes=input if isinstance(input, bytes) else None,
390
+ stream_key=stream_key,
391
+ stream_info=stream_info
392
+ )
393
+
394
+ post_processing_time = time.time() - post_processing_start_time
395
+
396
+ # Format the response based on PostProcessor result
397
+ if result.is_success():
398
+ # For face recognition use case, return empty raw results
399
+ processed_raw_results = [] if (
400
+ hasattr(result, 'usecase') and result.usecase == 'face_recognition'
401
+ ) else raw_results
402
+
403
+ # Extract agg_summary from result data if available
404
+ agg_summary = {}
405
+ if hasattr(result, 'data') and isinstance(result.data, dict):
406
+ agg_summary = result.data.get("agg_summary", {})
407
+
408
+ post_processing_result = {
409
+ "status": "success",
410
+ "processing_time": result.processing_time,
411
+ "usecase": getattr(result, 'usecase', ''),
412
+ "category": getattr(result, 'category', ''),
413
+ "summary": getattr(result, 'summary', ''),
414
+ "insights": getattr(result, 'insights', []),
415
+ "metrics": getattr(result, 'metrics', {}),
416
+ "predictions": getattr(result, 'predictions', []),
417
+ "agg_summary": agg_summary,
418
+ "stream_key": stream_key or "default_stream",
419
+ "timing_metadata": {
420
+ "model_inference_time_sec": model_inference_time,
421
+ "post_processing_time_sec": post_processing_time,
422
+ "total_time_sec": model_inference_time + post_processing_time,
423
+ }
424
+ }
425
+
426
+ return processed_raw_results, post_processing_result
427
+ else:
428
+ # Post-processing failed
429
+ self.logger.error(f"Post-processing failed: {result.error_message}")
430
+ return raw_results, {
431
+ "status": "post_processing_failed",
432
+ "error": result.error_message,
433
+ "error_type": getattr(result, 'error_type', 'ProcessingError'),
434
+ "processing_time": result.processing_time,
435
+ "processed_data": raw_results,
436
+ "stream_key": stream_key or "default_stream",
437
+ "timing_metadata": {
438
+ "model_inference_time_sec": model_inference_time,
439
+ "post_processing_time_sec": post_processing_time,
440
+ "total_time_sec": model_inference_time + post_processing_time,
441
+ }
442
+ }
443
+
444
+ except Exception as e:
445
+ post_processing_time = time.time() - post_processing_start_time
446
+ self.logger.error(f"Post-processing exception: {str(e)}", exc_info=True)
447
+
448
+ return raw_results, {
449
+ "status": "post_processing_failed",
450
+ "error": str(e),
451
+ "error_type": type(e).__name__,
452
+ "processed_data": raw_results,
453
+ "stream_key": stream_key or "default_stream",
454
+ "timing_metadata": {
455
+ "model_inference_time_sec": model_inference_time,
456
+ "post_processing_time_sec": post_processing_time,
457
+ "total_time_sec": model_inference_time + post_processing_time,
458
+ }
459
+ }