matrice-inference 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of matrice-inference might be problematic. Click here for more details.

Files changed (37) hide show
  1. matrice_inference/__init__.py +72 -0
  2. matrice_inference/py.typed +0 -0
  3. matrice_inference/server/__init__.py +23 -0
  4. matrice_inference/server/inference_interface.py +176 -0
  5. matrice_inference/server/model/__init__.py +1 -0
  6. matrice_inference/server/model/model_manager.py +274 -0
  7. matrice_inference/server/model/model_manager_wrapper.py +550 -0
  8. matrice_inference/server/model/triton_model_manager.py +290 -0
  9. matrice_inference/server/model/triton_server.py +1248 -0
  10. matrice_inference/server/proxy_interface.py +371 -0
  11. matrice_inference/server/server.py +1004 -0
  12. matrice_inference/server/stream/__init__.py +0 -0
  13. matrice_inference/server/stream/app_deployment.py +228 -0
  14. matrice_inference/server/stream/consumer_worker.py +201 -0
  15. matrice_inference/server/stream/frame_cache.py +127 -0
  16. matrice_inference/server/stream/inference_worker.py +163 -0
  17. matrice_inference/server/stream/post_processing_worker.py +230 -0
  18. matrice_inference/server/stream/producer_worker.py +147 -0
  19. matrice_inference/server/stream/stream_pipeline.py +451 -0
  20. matrice_inference/server/stream/utils.py +23 -0
  21. matrice_inference/tmp/abstract_model_manager.py +58 -0
  22. matrice_inference/tmp/aggregator/__init__.py +18 -0
  23. matrice_inference/tmp/aggregator/aggregator.py +330 -0
  24. matrice_inference/tmp/aggregator/analytics.py +906 -0
  25. matrice_inference/tmp/aggregator/ingestor.py +438 -0
  26. matrice_inference/tmp/aggregator/latency.py +597 -0
  27. matrice_inference/tmp/aggregator/pipeline.py +968 -0
  28. matrice_inference/tmp/aggregator/publisher.py +431 -0
  29. matrice_inference/tmp/aggregator/synchronizer.py +594 -0
  30. matrice_inference/tmp/batch_manager.py +239 -0
  31. matrice_inference/tmp/overall_inference_testing.py +338 -0
  32. matrice_inference/tmp/triton_utils.py +638 -0
  33. matrice_inference-0.1.2.dist-info/METADATA +28 -0
  34. matrice_inference-0.1.2.dist-info/RECORD +37 -0
  35. matrice_inference-0.1.2.dist-info/WHEEL +5 -0
  36. matrice_inference-0.1.2.dist-info/licenses/LICENSE.txt +21 -0
  37. matrice_inference-0.1.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,290 @@
1
+ import logging
2
+ import numpy as np
3
+ import requests
4
+ import time
5
+ from typing import Tuple, Any, List, Union
6
+ from matrice_inference.server.model.triton_server import TritonServer, TritonInference
7
+
8
+ class TritonModelManager:
9
+ """Model manager for Triton Inference Server, aligned with pipeline and inference interface."""
10
+
11
+ def __init__(
12
+ self,
13
+ model_name: str,
14
+ model_path: str,
15
+ runtime_framework: str,
16
+ internal_server_type: str,
17
+ internal_port: int,
18
+ internal_host: str,
19
+ input_size: Union[int, List[int]] = 640, # Priority Obj det
20
+ num_classes: int = 10,
21
+ num_model_instances: int = 1,
22
+ use_dynamic_batching: bool = False,
23
+ max_batch_size: int = 8,
24
+ is_yolo: bool = False,
25
+ is_ocr: bool = False,
26
+ use_trt_accelerator: bool = False,
27
+ ):
28
+ try:
29
+ if internal_server_type not in ["rest", "grpc"]:
30
+ logging.warning(f"Invalid internal_server_type '{internal_server_type}', defaulting to 'rest'")
31
+
32
+ self.internal_server_type = internal_server_type
33
+ self.internal_port = internal_port
34
+ self.internal_host = internal_host
35
+ self.use_dynamic_batching = use_dynamic_batching
36
+ self.max_batch_size = max_batch_size
37
+
38
+ self.triton_server = TritonServer(
39
+ model_name=model_name,
40
+ model_path=model_path,
41
+ runtime_framework=runtime_framework,
42
+ input_size=input_size,
43
+ num_classes=num_classes,
44
+ dynamic_batching=use_dynamic_batching,
45
+ num_model_instances=num_model_instances,
46
+ max_batch_size=max_batch_size,
47
+ connection_protocol=internal_server_type,
48
+ is_yolo=is_yolo,
49
+ is_ocr=is_ocr,
50
+ use_trt_accelerator=use_trt_accelerator,
51
+ )
52
+
53
+ logging.info(f"Starting Triton server on {internal_host}:{internal_port}...")
54
+ self.triton_server_process = self.triton_server.setup(internal_port)
55
+
56
+ logging.info("Waiting for Triton server to be ready...")
57
+ self._wait_for_ready()
58
+
59
+ self.client = TritonInference(
60
+ server_type=self.triton_server.connection_protocol,
61
+ model_name=model_name,
62
+ internal_port=internal_port,
63
+ internal_host=internal_host,
64
+ runtime_framework=self.triton_server.runtime_framework,
65
+ is_yolo = self.triton_server.is_yolo,
66
+ input_size=input_size,
67
+ )
68
+
69
+ logging.info(f"Initialized TritonModelManager with {num_model_instances} client instances, protocol: {self.triton_server.connection_protocol}")
70
+
71
+ except Exception as e:
72
+ logging.error(f"Failed to initialize TritonModelManager: {str(e)}", exc_info=True)
73
+ raise
74
+
75
+ def _wait_for_ready(self):
76
+ """Wait for Triton server to be ready with fixed retries and 5s sleep."""
77
+ max_attempts = 30 # 150 seconds wait time
78
+ for attempt in range(max_attempts):
79
+ try:
80
+ if self.internal_server_type == "rest":
81
+ response = requests.get(
82
+ f"http://{self.internal_host}:{self.internal_port}/v2/health/ready",
83
+ timeout=5
84
+ )
85
+ if response.status_code == 200:
86
+ logging.info("========= Triton server is ready (REST) =========")
87
+ break
88
+ else:
89
+ logging.info(f"Attempt {attempt + 1}/{max_attempts} - server not ready, retrying in 5 seconds...")
90
+ time.sleep(5)
91
+
92
+ else: # gRPC
93
+ try:
94
+ import tritonclient.grpc as grpcclient
95
+ except ImportError:
96
+ grpcclient = None
97
+
98
+ if grpcclient is None:
99
+ raise ImportError("tritonclient.grpc required for gRPC")
100
+
101
+ with grpcclient.InferenceServerClient(f"{self.internal_host}:{self.internal_port}") as client:
102
+ if client.is_server_ready():
103
+ logging.info("========= Triton server is ready (gRPC) =========")
104
+ break
105
+ else:
106
+ logging.info(f"Attempt {attempt + 1}/{max_attempts} - server not ready, retrying in 5 seconds...")
107
+ time.sleep(5)
108
+
109
+ except Exception as e:
110
+ if attempt < max_attempts - 1:
111
+ logging.info(f"Attempt {attempt + 1}/{max_attempts} failed, retrying in 5 seconds... (Error: {str(e)})")
112
+ time.sleep(5)
113
+ else:
114
+ logging.error("Triton server failed to become ready after maximum attempts")
115
+ raise
116
+
117
+ def inference(
118
+ self,
119
+ input: bytes,
120
+ ) -> Tuple[Any, bool]:
121
+ """Perform synchronous single inference using TritonInference client.
122
+
123
+ Args:
124
+ input: Primary input data (e.g., image bytes).
125
+
126
+ Returns:
127
+ Tuple of (results, success_flag).
128
+ """
129
+ if input is None:
130
+ raise ValueError("Input data cannot be None")
131
+ try:
132
+ client = self.client
133
+ if not client:
134
+ raise RuntimeError("No Triton client available")
135
+ results = client.inference(input)
136
+ results = client.format_response(results)
137
+ return results, True
138
+ except Exception as e:
139
+ logging.error(f"Triton sync inference failed for: {str(e)}", exc_info=True)
140
+ return None, False
141
+
142
+ async def async_inference(
143
+ self,
144
+ input: Union[bytes, np.ndarray],
145
+ ) -> Tuple[Any, bool]:
146
+ """Perform asynchronous single inference using TritonInference client.
147
+ Args:
148
+ input: Primary input data (Image bytes or numpy array).
149
+
150
+ Returns:
151
+ Tuple of (results, success_flag).
152
+ """
153
+
154
+
155
+ if input is None:
156
+ logging.error("Input data cannot be None")
157
+ raise ValueError("Input data cannot be None")
158
+ try:
159
+ client = self.client
160
+ if not client:
161
+ logging.error("No Triton client available")
162
+ raise RuntimeError("No Triton client available")
163
+ results = await client.async_inference(input)
164
+ results = client.format_response(results)
165
+ logging.info(f"Async inference result: {results}")
166
+ return results, True
167
+ except Exception as e:
168
+ logging.error(f"Triton async inference failed: {e}")
169
+ return {"error": str(e), "predictions": None}, False
170
+
171
+ def batch_inference(
172
+ self,
173
+ input: List[bytes],
174
+ ) -> Tuple[List[Any], bool]:
175
+ """Perform synchronous batch inference using TritonInference client.
176
+
177
+ Args:
178
+ input: List of primary input data (e.g., image bytes).
179
+
180
+ Returns:
181
+ Tuple of (results_list, success_flag).
182
+ """
183
+ if not input:
184
+ raise ValueError("Batch input cannot be None")
185
+ try:
186
+ client = self.client
187
+ if not client:
188
+ raise RuntimeError("No Triton client available")
189
+ results = []
190
+
191
+ if self.use_dynamic_batching:
192
+ input_array = self._preprocess_batch_inputs(input, client)
193
+ batch_results = client.inference(input_array)
194
+ results = self._split_batch_results(batch_results, len(input))
195
+ else:
196
+ for inp in input:
197
+ result = client.inference(inp)
198
+ results.append(result)
199
+
200
+ results = [client.format_response(result) for result in results]
201
+ return results, True
202
+ except Exception as e:
203
+ logging.error(f"Triton sync batch inference failed for: {str(e)}", exc_info=True)
204
+ return None, False
205
+
206
+ async def async_batch_inference(
207
+ self,
208
+ input: List[bytes],
209
+ ) -> Tuple[List[Any], bool]:
210
+ """Perform asynchronous batch inference using TritonInference client.
211
+
212
+ Args:
213
+ input: List of primary input data (e.g., image bytes).
214
+
215
+ Returns:
216
+ Tuple of (results_list, success_flag).
217
+ """
218
+ if not input:
219
+ raise ValueError("Batch input cannot be None")
220
+ try:
221
+ client = self.client
222
+ if not client:
223
+ raise RuntimeError("No Triton client available")
224
+ results = []
225
+
226
+ if self.use_dynamic_batching:
227
+ input_array = self._preprocess_batch_inputs(input, client)
228
+ batch_results = await client.async_inference(input_array)
229
+ split_results = self._split_batch_results(batch_results, len(input))
230
+ results = [client.format_response(r) for r in split_results]
231
+ else:
232
+ for inp in input:
233
+ res = await client.async_inference(inp)
234
+ results.append(client.format_response(res))
235
+
236
+ return results, True
237
+ except Exception as e:
238
+ logging.error(f"Triton async batch inference failed for: {str(e)}", exc_info=True)
239
+ return None, False
240
+
241
+ def _preprocess_batch_inputs(self, input: List[bytes], client: TritonInference) -> np.ndarray:
242
+ """Preprocess batch inputs for Triton dynamic batching.
243
+
244
+ Args:
245
+ input: List of input data (e.g., image bytes).
246
+ client: TritonInference client for shape and data type information.
247
+
248
+ Returns:
249
+ Preprocessed NumPy array for batch inference.
250
+ """
251
+ try:
252
+ batch_inputs = []
253
+ for inp in input:
254
+ arr = client._preprocess_input(inp)
255
+
256
+ if arr.ndim == 4 and arr.shape[0] == 1:
257
+ arr = np.squeeze(arr, axis=0)
258
+
259
+ if arr.ndim != 3:
260
+ logging.warning(f"Unexpected input shape {arr.shape}, expected (C,H,W) after preprocessing")
261
+
262
+ batch_inputs.append(arr)
263
+
264
+ # Stack into final batch (B, C, H, W)
265
+ stacked = np.stack(batch_inputs, axis=0)
266
+ # Ensure C-contiguous (important for Triton)
267
+ return np.ascontiguousarray(stacked)
268
+
269
+ except Exception as e:
270
+ logging.error(f"Failed to preprocess batch inputs: {str(e)}", exc_info=True)
271
+ raise
272
+
273
+
274
+ def _split_batch_results(self, batch_results: np.ndarray, batch_size: int) -> List[Any]:
275
+ """Split batch results into individual results.
276
+
277
+ Args:
278
+ batch_results: NumPy array of batch inference results.
279
+ batch_size: Number of inputs in the batch.
280
+
281
+ Returns:
282
+ List of individual results.
283
+ """
284
+ try:
285
+ if batch_results.ndim == 1:
286
+ return [batch_results] * batch_size
287
+ return [batch_results[i] for i in range(batch_size)]
288
+ except Exception as e:
289
+ logging.error(f"Failed to split batch results: {str(e)}", exc_info=True)
290
+ raise