imb 1.0.1__tar.gz → 1.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,37 +1,22 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.1
2
2
  Name: imb
3
- Version: 1.0.1
3
+ Version: 1.0.3
4
4
  Summary: Python library for run inference of deep learning models in different backends
5
5
  Home-page: https://github.com/TheConstant3/InferenceMultiBackend
6
6
  Author: p-constant
7
7
  Author-email: nikshorop@gmail.com
8
+ License: UNKNOWN
9
+ Platform: UNKNOWN
8
10
  Classifier: Programming Language :: Python :: 3.8
9
11
  Classifier: License :: OSI Approved :: MIT License
10
12
  Classifier: Operating System :: OS Independent
11
13
  Requires-Python: >=3.8
12
14
  Description-Content-Type: text/markdown
13
- License-File: LICENSE
14
- Requires-Dist: numpy
15
15
  Provides-Extra: triton
16
- Requires-Dist: tritonclient[all]>=2.38.0; extra == "triton"
17
16
  Provides-Extra: onnxcpu
18
- Requires-Dist: onnxruntime>=1.16.0; extra == "onnxcpu"
19
17
  Provides-Extra: onnxgpu
20
- Requires-Dist: onnxruntime-gpu>=1.16.0; extra == "onnxgpu"
21
18
  Provides-Extra: all
22
- Requires-Dist: tritonclient[all]>=2.38.0; extra == "all"
23
- Requires-Dist: onnxruntime>=1.16.0; extra == "all"
24
- Requires-Dist: onnxruntime-gpu>=1.16.0; extra == "all"
25
- Dynamic: author
26
- Dynamic: author-email
27
- Dynamic: classifier
28
- Dynamic: description
29
- Dynamic: description-content-type
30
- Dynamic: home-page
31
- Dynamic: provides-extra
32
- Dynamic: requires-dist
33
- Dynamic: requires-python
34
- Dynamic: summary
19
+ License-File: LICENSE
35
20
 
36
21
  # InferenceMultiBackend
37
22
 
@@ -55,6 +40,8 @@ For support all implemented clients:
55
40
 
56
41
  OnnxClient usage example
57
42
  ```
43
+ from imb.onnx import OnnxClient
44
+
58
45
  onnx_client = OnnxClient(
59
46
  model_path='model.onnx',
60
47
  model_name='any name',
@@ -64,15 +51,19 @@ onnx_client = OnnxClient(
64
51
  fixed_batch=True,
65
52
  warmup=True
66
53
  )
54
+
67
55
  # if model has fixed input size (except batch size) then sample_inputs will be created
68
56
  sample_inputs = onnx_client.sample_inputs
69
57
  print('inputs shapes', [o.shape for o in sample_inputs])
58
+
70
59
  outputs = onnx_client(*sample_inputs)
71
60
  print('outputs shapes', [(o_name, o_value.shape) for o_name, o_value in outputs.items()])
72
61
  ```
73
62
 
74
63
  TritonClient usage example
75
64
  ```
65
+ from imb.triton import TritonClient
66
+
76
67
  triton_client = TritonClient(
77
68
  url='localhost:8000',
78
69
  model_name='arcface',
@@ -87,9 +78,11 @@ triton_client = TritonClient(
87
78
  return_dict=True,
88
79
  warmup=False
89
80
  )
81
+
90
82
  # if model has fixed input size (except batch size) then sample_inputs will be created
91
83
  sample_inputs = triton_client.sample_inputs
92
84
  print('inputs shapes', [o.shape for o in sample_inputs])
85
+
93
86
  outputs = triton_client(*sample_inputs)
94
87
  print('outputs shapes', [(o_name, o_value.shape) for o_name, o_value in outputs.items()])
95
88
  ```
@@ -103,3 +96,4 @@ fixed_batch - if fixed batch is True, then each batch will have fixed size (padd
103
96
  warmup - if True, model will run several calls on sample_inputs while initialization.
104
97
 
105
98
  return_dict - if True, __call__ return dict {'output_name1': output_value1, ...}, else [output_value1, ...]
99
+
@@ -20,6 +20,8 @@ For support all implemented clients:
20
20
 
21
21
  OnnxClient usage example
22
22
  ```
23
+ from imb.onnx import OnnxClient
24
+
23
25
  onnx_client = OnnxClient(
24
26
  model_path='model.onnx',
25
27
  model_name='any name',
@@ -29,15 +31,19 @@ onnx_client = OnnxClient(
29
31
  fixed_batch=True,
30
32
  warmup=True
31
33
  )
34
+
32
35
  # if model has fixed input size (except batch size) then sample_inputs will be created
33
36
  sample_inputs = onnx_client.sample_inputs
34
37
  print('inputs shapes', [o.shape for o in sample_inputs])
38
+
35
39
  outputs = onnx_client(*sample_inputs)
36
40
  print('outputs shapes', [(o_name, o_value.shape) for o_name, o_value in outputs.items()])
37
41
  ```
38
42
 
39
43
  TritonClient usage example
40
44
  ```
45
+ from imb.triton import TritonClient
46
+
41
47
  triton_client = TritonClient(
42
48
  url='localhost:8000',
43
49
  model_name='arcface',
@@ -52,9 +58,11 @@ triton_client = TritonClient(
52
58
  return_dict=True,
53
59
  warmup=False
54
60
  )
61
+
55
62
  # if model has fixed input size (except batch size) then sample_inputs will be created
56
63
  sample_inputs = triton_client.sample_inputs
57
64
  print('inputs shapes', [o.shape for o in sample_inputs])
65
+
58
66
  outputs = triton_client(*sample_inputs)
59
67
  print('outputs shapes', [(o_name, o_value.shape) for o_name, o_value in outputs.items()])
60
68
  ```
@@ -3,6 +3,7 @@ from typing import Any, Dict, List, Literal, Optional, Tuple
3
3
  import tritonclient.http as httpclient
4
4
  import tritonclient.grpc as grpcclient
5
5
  import tritonclient.utils.cuda_shared_memory as cudashm
6
+ import tritonclient.utils.shared_memory as shm
6
7
  from google.protobuf.json_format import MessageToJson
7
8
  from tritonclient import utils
8
9
  from .base import BaseClient
@@ -11,30 +12,53 @@ import json
11
12
  import time
12
13
 
13
14
 
15
+ class ShmHandlerWrapper:
16
+ def __init__(self, handler: Any, name: str, size: int):
17
+ self.handler = handler
18
+ self.name = name
19
+ self.size = size
20
+
21
+
22
+
14
23
  class TritonClient(BaseClient):
15
24
  def __init__(self, url: str,
16
25
  model_name: str,
17
26
  max_batch_size: int = 0,
18
27
  sample_inputs: Optional[List[np.ndarray]] = None,
19
- timeout: int = 10,
20
- resend_count: int = 10,
21
28
  fixed_batch: bool = True,
22
29
  is_async: bool = False,
23
- cuda_shm: bool = False,
24
- max_shm_regions: int = 2,
30
+ use_cuda_shm: bool = False,
31
+ use_system_shm: bool = False,
32
+ max_shm_regions: int = 0,
25
33
  scheme: Literal["http", "grpc"] = "http",
26
34
  return_dict: bool = True,
27
35
  warmup: bool = False
28
36
  ):
37
+ """_summary_
38
+
39
+ Args:
40
+ url (str): url of the triton server
41
+ model_name (str): name of the model endpoint
42
+ max_batch_size (int, optional): max batch size. Defaults to 0 (get value from triton config).
43
+ sample_inputs (Optional[List[np.ndarray]], optional): samples for warmup. Defaults to None (zeros array).
44
+ fixed_batch (bool, optional): use fixed batch size, using padding for smaller batch. Defaults to True.
45
+ is_async (bool, optional): async inference. Defaults to False.
46
+ use_cuda_shm (bool, optional): use cuda shared memory. Defaults to False.
47
+ use_system_shm (bool, optional): use system shared memory. Defaults to False.
48
+ max_shm_regions (int, optional): max clients for shared memory. Will unregister old regions. Defaults to 0.
49
+ scheme (Literal["http", "grpc"], optional): scheme for triton client. Defaults to "http".
50
+ return_dict (bool, optional): return dict or list of values. Defaults to True.
51
+ warmup (bool, optional): warmup model. Defaults to False.
52
+ """
29
53
  super().__init__()
54
+ assert not (use_cuda_shm and use_system_shm), 'shm and cuda_shm are mutually exclusive'
30
55
  self.model_name = model_name
31
56
  self.scheme = scheme
32
57
  self.client_module = httpclient if scheme == "http" else grpcclient
33
58
  self.url = url
34
59
  self.is_async = is_async
35
- self.cuda_shm = cuda_shm
36
- self.triton_timeout = timeout
37
- self.resend_count = resend_count
60
+ self.use_cuda_shm = use_cuda_shm
61
+ self.use_system_shm = use_system_shm
38
62
  self.max_shm_regions = max_shm_regions
39
63
  self.return_dict = return_dict
40
64
 
@@ -57,14 +81,16 @@ class TritonClient(BaseClient):
57
81
  if warmup:
58
82
  self.warmup_model()
59
83
 
60
- self.input_shm_handles = [None for _ in range(len(self.inputs_names))]
61
- self.output_shm_handles = [None for _ in range(len(self.outputs_names))]
84
+ self.input_shm_handlers: List[Optional[ShmHandlerWrapper]] = \
85
+ [None for _ in range(len(self.inputs_names))]
86
+ self.output_shm_handlers: List[Optional[ShmHandlerWrapper]] = \
87
+ [None for _ in range(len(self.outputs_names))]
62
88
 
63
- if self.cuda_shm:
89
+ if self.use_cuda_shm or self.use_system_shm:
64
90
  assert is_async == False and fixed_batch == True
65
91
  self._fill_output_dynamic_axis()
66
92
  self._create_input_output_shm_handles()
67
- self._register_cuda_shm_regions()
93
+ self._register_shm_regions()
68
94
 
69
95
  def io_summary(self):
70
96
  return {
@@ -84,7 +110,8 @@ class TritonClient(BaseClient):
84
110
 
85
111
  "fixed_batch": self.fixed_batch,
86
112
  "async": self.is_async,
87
- "cuda_shm": self.cuda_shm,
113
+ "cuda_shm": self.use_cuda_shm,
114
+ "shm": self.use_system_shm,
88
115
  "max_shm_regions": self.max_shm_regions,
89
116
  }
90
117
 
@@ -97,9 +124,7 @@ class TritonClient(BaseClient):
97
124
  self.triton_client = self.client_module.InferenceServerClient(
98
125
  url=self.url,
99
126
  verbose=False,
100
- ssl=False,
101
- network_timeout=self.triton_timeout,
102
- connection_timeout=self.triton_timeout
127
+ ssl=False
103
128
  )
104
129
 
105
130
  def _load_model_params(self, user_max_batch_size: int) -> None:
@@ -150,13 +175,16 @@ class TritonClient(BaseClient):
150
175
  -1 in output_shape for output_shape in self.outputs_shapes
151
176
  )
152
177
  if has_dynamic_shapes:
153
- start_cuda_shm_flag = self.cuda_shm
154
- self.cuda_shm = False
178
+ start_cuda_shm_flag = self.use_cuda_shm
179
+ start_system_shm_flag = self.use_system_shm
180
+ self.use_cuda_shm = False
181
+ self.use_system_shm = False
155
182
  outputs = self.forward(*self.sample_inputs)
156
183
  self.outputs_shapes = [
157
184
  list(outputs[output_name].shape) for output_name in self.outputs_names
158
185
  ]
159
- self.cuda_shm = start_cuda_shm_flag
186
+ self.use_cuda_shm = start_cuda_shm_flag
187
+ self.use_system_shm = start_system_shm_flag
160
188
 
161
189
  @staticmethod
162
190
  def _parse_io_params(io_params: List[Dict]) -> Tuple[List[str], List[np.dtype], List[List[int]], List[str]]:
@@ -212,12 +240,14 @@ class TritonClient(BaseClient):
212
240
  Get old regions names for unregister
213
241
 
214
242
  Args:
215
- regions_statuses (list): responce of get_cuda_shared_memory_status from triton
243
+ regions_statuses (list): responce of get_shared_memory_status from triton
216
244
  new_triton_shm_name (str): name of new region
217
245
 
218
246
  Returns:
219
247
  List[str]: old regions names for unregister
220
248
  """
249
+ if self.max_shm_regions < 1:
250
+ return []
221
251
  i_sep = len(new_triton_shm_name) - new_triton_shm_name[::-1].index('_') - 1
222
252
  region_name = new_triton_shm_name[:i_sep]
223
253
  registrated_regions = [
@@ -231,44 +261,35 @@ class TritonClient(BaseClient):
231
261
  old_regions = [name for name, _ in registrated_regions[:count_old_regions]]
232
262
  return old_regions
233
263
 
234
- def _register_cuda_shm_regions(self):
235
- """
236
- Register CUDA shared memory regions in Triton
264
+ def _create_shm_handle(self, shape: List[int], dtype: np.dtype, name: str) -> ShmHandlerWrapper:
237
265
  """
238
- if self.scheme == "grpc":
239
- regions_statuses = self.triton_client.get_cuda_shared_memory_status(as_json=True)['regions']
240
- else:
241
- regions_statuses = self.triton_client.get_cuda_shared_memory_status()
242
-
243
- for shm_handle in self.input_shm_handles + self.output_shm_handles:
244
- old_regions_names = self._get_old_regions_names(regions_statuses, shm_handle._triton_shm_name)
245
- for old_region_name in old_regions_names:
246
- self.triton_client.unregister_cuda_shared_memory(old_region_name)
247
- self.triton_client.register_cuda_shared_memory(
248
- shm_handle._triton_shm_name, cudashm.get_raw_handle(shm_handle), 0, shm_handle._byte_size
249
- )
250
-
251
- def _create_cuda_shm_handle(self, shape: List[int], dtype: np.dtype, name: str) -> Any:
252
- """
253
- Create CUDA shared memory handle
266
+ Create shared memory handle
254
267
 
255
268
  Args:
256
- shape (List[int]): Shape of cuda shared memory region
269
+ shape (List[int]): Shape of shared memory region
257
270
  dtype (np.dtype): Data type of input/output data
258
271
  name (str): Input/output name
259
272
 
260
273
  Returns:
261
- Any: CUDA shared memory handle
274
+ Any: shared memory handle
262
275
  """
263
276
  byte_size = int(np.prod(shape) * np.dtype(dtype).itemsize)
264
277
  shm_name = self._generate_shm_name(name)
265
- return cudashm.create_shared_memory_region(shm_name, byte_size, 0)
278
+ if self.use_cuda_shm:
279
+ shm_handle = cudashm.create_shared_memory_region(
280
+ shm_name, byte_size, 0
281
+ )
282
+ else:
283
+ shm_handle = shm.create_shared_memory_region(
284
+ shm_name, shm_name, byte_size
285
+ )
286
+ return ShmHandlerWrapper(shm_handle, shm_name, byte_size)
266
287
 
267
- def _create_cuda_shm_handles_for_io(self, shapes: List[List[int]],
288
+ def _create_shm_handles_for_io(self, shapes: List[List[int]],
268
289
  dtypes: List[np.dtype],
269
- names: List[str]) -> List[Any]:
290
+ names: List[str]) -> List[ShmHandlerWrapper]:
270
291
  """
271
- Create CUDA shared memory handles for inputs or outputs
292
+ Create shared memory handles for inputs or outputs
272
293
 
273
294
  Args:
274
295
  shapes (List[List[int]]): Shapes of cuda shared memory regions
@@ -276,24 +297,24 @@ class TritonClient(BaseClient):
276
297
  names (List[str]): Input/output names
277
298
 
278
299
  Returns:
279
- List[Any]: CUDA shared memory handles
300
+ List[ShmHandlerWrapper]: shared memory handles
280
301
  """
281
- return [self._create_cuda_shm_handle(shape, dtype, name)
302
+ return [self._create_shm_handle(shape, dtype, name)
282
303
  for shape, dtype, name in zip(shapes, dtypes, names)]
283
304
 
284
305
  def _create_input_output_shm_handles(self) -> None:
285
306
  """
286
- Create CUDA shared memory handles for inputs and outputs
307
+ Create shared memory handles for inputs and outputs
287
308
  """
288
- self.input_shm_handles = self._create_cuda_shm_handles_for_io(
309
+ self.input_shm_handlers = self._create_shm_handles_for_io(
289
310
  self.inputs_shapes, self.np_inputs_dtypes, self.inputs_names
290
311
  )
291
- self.output_shm_handles = self._create_cuda_shm_handles_for_io(
312
+ self.output_shm_handlers = self._create_shm_handles_for_io(
292
313
  self.outputs_shapes, self.np_outputs_dtypes, self.outputs_names
293
314
  )
294
315
 
295
316
  def _create_triton_input(self, input_data: np.ndarray, input_name: str,
296
- config_input_format: str, shm_handle = None) -> Any:
317
+ config_input_format: str, shm_handler: Optional[ShmHandlerWrapper] = None) -> Any:
297
318
  """
298
319
  Create triton InferInput
299
320
 
@@ -301,27 +322,28 @@ class TritonClient(BaseClient):
301
322
  input_data (np.ndarray): data for send to model
302
323
  input_name (str): name of input
303
324
  config_input_format (str): triton input format
304
- shm_handle (_type_, optional): CUDA shared memory handle. Defaults to None.
325
+ shm_handler (ShmHandlerWrapper, optional): shared memory handler. Defaults to None.
305
326
 
306
327
  Returns:
307
328
  Any: triton InferInput for sending request
308
329
  """
309
330
  infer_input = self.client_module.InferInput(input_name, input_data.shape, config_input_format)
310
- if self.cuda_shm:
311
- cudashm.set_shared_memory_region(shm_handle, [input_data])
312
- infer_input.set_shared_memory(shm_handle._triton_shm_name, shm_handle._byte_size)
331
+ if self.use_cuda_shm or self.use_system_shm:
332
+ shm_utils = cudashm if self.use_cuda_shm else shm
333
+ shm_utils.set_shared_memory_region(shm_handler.handler, [input_data])
334
+ infer_input.set_shared_memory(shm_handler.name, shm_handler.size)
313
335
  else:
314
336
  infer_input.set_data_from_numpy(input_data)
315
337
  return infer_input
316
338
 
317
- def _create_triton_output(self, output_name: str, binary: bool = True, shm_handle = None) -> Any:
339
+ def _create_triton_output(self, output_name: str, binary: bool = True, shm_handler: Optional[ShmHandlerWrapper] = None) -> Any:
318
340
  """
319
341
  Create triton InferRequestedOutput
320
342
 
321
343
  Args:
322
344
  output_name (str): output name
323
345
  binary (bool, optional): Whether the output is binary. Defaults to True.
324
- shm_handle (_type_, optional): CUDA shared memory handle. Defaults to None.
346
+ shm_handler (ShmHandlerWrapper, optional): shared memory handler. Defaults to None.
325
347
 
326
348
  Returns:
327
349
  Any: triton InferRequestedOutput for receiving response
@@ -330,10 +352,39 @@ class TritonClient(BaseClient):
330
352
  infer_output = self.client_module.InferRequestedOutput(output_name)
331
353
  else:
332
354
  infer_output = self.client_module.InferRequestedOutput(output_name, binary_data=binary)
333
- if self.cuda_shm:
334
- infer_output.set_shared_memory(shm_handle._triton_shm_name, shm_handle._byte_size)
355
+ if self.use_cuda_shm or self.use_system_shm:
356
+ infer_output.set_shared_memory(shm_handler.name, shm_handler.size)
335
357
  return infer_output
336
358
 
359
+ def _register_shm_regions(self):
360
+ """
361
+ Register shared memory regions in Triton
362
+ """
363
+ get_shared_memory_status = self.triton_client.get_cuda_shared_memory_status \
364
+ if self.use_cuda_shm else self.triton_client.get_system_shared_memory_status
365
+
366
+ unregister_shared_memory = self.triton_client.unregister_cuda_shared_memory \
367
+ if self.use_cuda_shm else self.triton_client.unregister_system_shared_memory
368
+
369
+ if self.scheme == "grpc":
370
+ regions_statuses = get_shared_memory_status(as_json=True)['regions']
371
+ else:
372
+ regions_statuses = get_shared_memory_status()
373
+
374
+ for shm_handler in self.input_shm_handlers + self.output_shm_handlers:
375
+ old_regions_names = self._get_old_regions_names(regions_statuses, shm_handler.name)
376
+ for old_region_name in old_regions_names:
377
+ unregister_shared_memory(old_region_name)
378
+
379
+ if self.use_cuda_shm:
380
+ self.triton_client.register_cuda_shared_memory(
381
+ shm_handler.name, cudashm.get_raw_handle(shm_handler.handler), 0, shm_handler.size
382
+ )
383
+ else:
384
+ self.triton_client.register_system_shared_memory(
385
+ shm_handler.name, shm_handler.name, shm_handler.size
386
+ )
387
+
337
388
  def _postprocess_triton_result(self, triton_response: Any, padding_size: int) -> Dict[str, np.ndarray]:
338
389
  """
339
390
  Postprocess triton response.
@@ -346,15 +397,17 @@ class TritonClient(BaseClient):
346
397
  Dict[str, np.ndarray]: dict of output name and output data
347
398
  """
348
399
  result = dict()
349
- for output_name, shm_op_handle in zip(self.outputs_names, self.output_shm_handles):
350
- if self.cuda_shm:
400
+ for output_name, shm_op_handle in zip(self.outputs_names, self.output_shm_handlers):
401
+ if self.use_cuda_shm or self.use_system_shm:
351
402
  if self.scheme == "grpc":
352
403
  # output = triton_response.get_output(output_name, as_json=True) # WARN: bug in tritonclient library, return None
353
404
  output = json.loads(MessageToJson(triton_response.get_output(output_name)))
354
405
  else:
355
406
  output = triton_response.get_output(output_name)
356
- result[output_name] = cudashm.get_contents_as_numpy(
357
- shm_op_handle,
407
+
408
+ shm_utils = shm if self.use_system_shm else cudashm
409
+ result[output_name] = shm_utils.get_contents_as_numpy(
410
+ shm_op_handle.handler,
358
411
  utils.triton_to_np_dtype(output["datatype"]),
359
412
  output["shape"],
360
413
  )
@@ -375,17 +428,17 @@ class TritonClient(BaseClient):
375
428
 
376
429
  for i_batch in range(count_batches):
377
430
  triton_inputs = []
378
- for input_name, config_input_format, shm_ip_handle in \
379
- zip(self.inputs_names, self.triton_inputs_dtypes, self.input_shm_handles):
431
+ for input_name, config_input_format, shm_ip_handler in \
432
+ zip(self.inputs_names, self.triton_inputs_dtypes, self.input_shm_handlers):
380
433
  triton_input = self._create_triton_input(
381
- inputs_batches[input_name][i_batch], input_name, config_input_format, shm_ip_handle
434
+ inputs_batches[input_name][i_batch], input_name, config_input_format, shm_ip_handler
382
435
  )
383
436
  triton_inputs.append(triton_input)
384
437
 
385
438
  triton_outputs = []
386
- for output_name, shm_op_handle in zip(self.outputs_names, self.output_shm_handles):
439
+ for output_name, shm_op_handlers in zip(self.outputs_names, self.output_shm_handlers):
387
440
  triton_output = self._create_triton_output(
388
- output_name, binary=True, shm_handle=shm_op_handle
441
+ output_name, binary=True, shm_handler=shm_op_handlers
389
442
  )
390
443
  triton_outputs.append(triton_output)
391
444
 
@@ -413,14 +466,14 @@ class TritonClient(BaseClient):
413
466
  for i_batch in range(count_batches):
414
467
  triton_inputs = []
415
468
  for input_name, config_input_format, shm_ip_handle in \
416
- zip(self.inputs_names, self.triton_inputs_dtypes, self.input_shm_handles):
469
+ zip(self.inputs_names, self.triton_inputs_dtypes, self.input_shm_handlers):
417
470
  triton_input = self._create_triton_input(
418
471
  inputs_batches[input_name][i_batch], input_name, config_input_format, shm_ip_handle
419
472
  )
420
473
  triton_inputs.append(triton_input)
421
474
 
422
475
  triton_outputs = []
423
- for output_name, shm_op_handle in zip(self.outputs_names, self.output_shm_handles):
476
+ for output_name, shm_op_handle in zip(self.outputs_names, self.output_shm_handlers):
424
477
  triton_output = self._create_triton_output(
425
478
  output_name, binary=True, shm_handle=shm_op_handle
426
479
  )
@@ -1,37 +1,22 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.1
2
2
  Name: imb
3
- Version: 1.0.1
3
+ Version: 1.0.3
4
4
  Summary: Python library for run inference of deep learning models in different backends
5
5
  Home-page: https://github.com/TheConstant3/InferenceMultiBackend
6
6
  Author: p-constant
7
7
  Author-email: nikshorop@gmail.com
8
+ License: UNKNOWN
9
+ Platform: UNKNOWN
8
10
  Classifier: Programming Language :: Python :: 3.8
9
11
  Classifier: License :: OSI Approved :: MIT License
10
12
  Classifier: Operating System :: OS Independent
11
13
  Requires-Python: >=3.8
12
14
  Description-Content-Type: text/markdown
13
- License-File: LICENSE
14
- Requires-Dist: numpy
15
15
  Provides-Extra: triton
16
- Requires-Dist: tritonclient[all]>=2.38.0; extra == "triton"
17
16
  Provides-Extra: onnxcpu
18
- Requires-Dist: onnxruntime>=1.16.0; extra == "onnxcpu"
19
17
  Provides-Extra: onnxgpu
20
- Requires-Dist: onnxruntime-gpu>=1.16.0; extra == "onnxgpu"
21
18
  Provides-Extra: all
22
- Requires-Dist: tritonclient[all]>=2.38.0; extra == "all"
23
- Requires-Dist: onnxruntime>=1.16.0; extra == "all"
24
- Requires-Dist: onnxruntime-gpu>=1.16.0; extra == "all"
25
- Dynamic: author
26
- Dynamic: author-email
27
- Dynamic: classifier
28
- Dynamic: description
29
- Dynamic: description-content-type
30
- Dynamic: home-page
31
- Dynamic: provides-extra
32
- Dynamic: requires-dist
33
- Dynamic: requires-python
34
- Dynamic: summary
19
+ License-File: LICENSE
35
20
 
36
21
  # InferenceMultiBackend
37
22
 
@@ -55,6 +40,8 @@ For support all implemented clients:
55
40
 
56
41
  OnnxClient usage example
57
42
  ```
43
+ from imb.onnx import OnnxClient
44
+
58
45
  onnx_client = OnnxClient(
59
46
  model_path='model.onnx',
60
47
  model_name='any name',
@@ -64,15 +51,19 @@ onnx_client = OnnxClient(
64
51
  fixed_batch=True,
65
52
  warmup=True
66
53
  )
54
+
67
55
  # if model has fixed input size (except batch size) then sample_inputs will be created
68
56
  sample_inputs = onnx_client.sample_inputs
69
57
  print('inputs shapes', [o.shape for o in sample_inputs])
58
+
70
59
  outputs = onnx_client(*sample_inputs)
71
60
  print('outputs shapes', [(o_name, o_value.shape) for o_name, o_value in outputs.items()])
72
61
  ```
73
62
 
74
63
  TritonClient usage example
75
64
  ```
65
+ from imb.triton import TritonClient
66
+
76
67
  triton_client = TritonClient(
77
68
  url='localhost:8000',
78
69
  model_name='arcface',
@@ -87,9 +78,11 @@ triton_client = TritonClient(
87
78
  return_dict=True,
88
79
  warmup=False
89
80
  )
81
+
90
82
  # if model has fixed input size (except batch size) then sample_inputs will be created
91
83
  sample_inputs = triton_client.sample_inputs
92
84
  print('inputs shapes', [o.shape for o in sample_inputs])
85
+
93
86
  outputs = triton_client(*sample_inputs)
94
87
  print('outputs shapes', [(o_name, o_value.shape) for o_name, o_value in outputs.items()])
95
88
  ```
@@ -103,3 +96,4 @@ fixed_batch - if fixed batch is True, then each batch will have fixed size (padd
103
96
  warmup - if True, model will run several calls on sample_inputs while initialization.
104
97
 
105
98
  return_dict - if True, __call__ return dict {'output_name1': output_value1, ...}, else [output_value1, ...]
99
+
@@ -19,7 +19,7 @@ extras_require["all"] = list(chain(extras_require.values()))
19
19
 
20
20
  setup(
21
21
  name='imb',
22
- version='1.0.1',
22
+ version='1.0.3',
23
23
  author='p-constant',
24
24
  author_email='nikshorop@gmail.com',
25
25
  description='Python library for run inference of deep learning models in different backends',
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes