imb 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
imb/__init__.py CHANGED
@@ -1 +0,0 @@
1
- from .inference_clients import OnnxClient, TritonClient
@@ -3,6 +3,7 @@ from typing import Any, Dict, List, Literal, Optional, Tuple
3
3
  import tritonclient.http as httpclient
4
4
  import tritonclient.grpc as grpcclient
5
5
  import tritonclient.utils.cuda_shared_memory as cudashm
6
+ import tritonclient.utils.shared_memory as shm
6
7
  from google.protobuf.json_format import MessageToJson
7
8
  from tritonclient import utils
8
9
  from .base import BaseClient
@@ -11,6 +12,14 @@ import json
11
12
  import time
12
13
 
13
14
 
15
+ class ShmHandlerWrapper:
16
+ def __init__(self, handler: Any, name: str, size: int):
17
+ self.handler = handler
18
+ self.name = name
19
+ self.size = size
20
+
21
+
22
+
14
23
  class TritonClient(BaseClient):
15
24
  def __init__(self, url: str,
16
25
  model_name: str,
@@ -20,19 +29,40 @@ class TritonClient(BaseClient):
20
29
  resend_count: int = 10,
21
30
  fixed_batch: bool = True,
22
31
  is_async: bool = False,
23
- cuda_shm: bool = False,
24
- max_shm_regions: int = 2,
32
+ use_cuda_shm: bool = False,
33
+ use_system_shm: bool = False,
34
+ max_shm_regions: int = 0,
25
35
  scheme: Literal["http", "grpc"] = "http",
26
36
  return_dict: bool = True,
27
37
  warmup: bool = False
28
38
  ):
39
+ """_summary_
40
+
41
+ Args:
42
+ url (str): url of the triton server
43
+ model_name (str): name of the model endpoint
44
+ max_batch_size (int, optional): max batch size. Defaults to 0 (get value from triton config).
45
+ sample_inputs (Optional[List[np.ndarray]], optional): samples for warmup. Defaults to None (zeros array).
46
+ timeout (int, optional): triton client timeout. Defaults to 10.
47
+ resend_count (int, optional): triton client resend count. Defaults to 10.
48
+ fixed_batch (bool, optional): use fixed batch size, using padding for smaller batch. Defaults to True.
49
+ is_async (bool, optional): async inference. Defaults to False.
50
+ use_cuda_shm (bool, optional): use cuda shared memory. Defaults to False.
51
+ use_system_shm (bool, optional): use system shared memory. Defaults to False.
52
+ max_shm_regions (int, optional): max clients for shared memory. Will unregister old regions. Defaults to 0.
53
+ scheme (Literal["http", "grpc"], optional): scheme for triton client. Defaults to "http".
54
+ return_dict (bool, optional): return dict or list of values. Defaults to True.
55
+ warmup (bool, optional): warmup model. Defaults to False.
56
+ """
29
57
  super().__init__()
58
+ assert not (use_cuda_shm and use_system_shm), 'shm and cuda_shm are mutually exclusive'
30
59
  self.model_name = model_name
31
60
  self.scheme = scheme
32
61
  self.client_module = httpclient if scheme == "http" else grpcclient
33
62
  self.url = url
34
63
  self.is_async = is_async
35
- self.cuda_shm = cuda_shm
64
+ self.use_cuda_shm = use_cuda_shm
65
+ self.use_system_shm = use_system_shm
36
66
  self.triton_timeout = timeout
37
67
  self.resend_count = resend_count
38
68
  self.max_shm_regions = max_shm_regions
@@ -57,14 +87,16 @@ class TritonClient(BaseClient):
57
87
  if warmup:
58
88
  self.warmup_model()
59
89
 
60
- self.input_shm_handles = [None for _ in range(len(self.inputs_names))]
61
- self.output_shm_handles = [None for _ in range(len(self.outputs_names))]
90
+ self.input_shm_handlers: List[Optional[ShmHandlerWrapper]] = \
91
+ [None for _ in range(len(self.inputs_names))]
92
+ self.output_shm_handlers: List[Optional[ShmHandlerWrapper]] = \
93
+ [None for _ in range(len(self.outputs_names))]
62
94
 
63
- if self.cuda_shm:
95
+ if self.use_cuda_shm or self.use_system_shm:
64
96
  assert is_async == False and fixed_batch == True
65
97
  self._fill_output_dynamic_axis()
66
98
  self._create_input_output_shm_handles()
67
- self._register_cuda_shm_regions()
99
+ self._register_shm_regions()
68
100
 
69
101
  def io_summary(self):
70
102
  return {
@@ -84,7 +116,8 @@ class TritonClient(BaseClient):
84
116
 
85
117
  "fixed_batch": self.fixed_batch,
86
118
  "async": self.is_async,
87
- "cuda_shm": self.cuda_shm,
119
+ "cuda_shm": self.use_cuda_shm,
120
+ "shm": self.use_system_shm,
88
121
  "max_shm_regions": self.max_shm_regions,
89
122
  }
90
123
 
@@ -150,13 +183,16 @@ class TritonClient(BaseClient):
150
183
  -1 in output_shape for output_shape in self.outputs_shapes
151
184
  )
152
185
  if has_dynamic_shapes:
153
- start_cuda_shm_flag = self.cuda_shm
154
- self.cuda_shm = False
186
+ start_cuda_shm_flag = self.use_cuda_shm
187
+ start_system_shm_flag = self.use_system_shm
188
+ self.use_cuda_shm = False
189
+ self.use_system_shm = False
155
190
  outputs = self.forward(*self.sample_inputs)
156
191
  self.outputs_shapes = [
157
192
  list(outputs[output_name].shape) for output_name in self.outputs_names
158
193
  ]
159
- self.cuda_shm = start_cuda_shm_flag
194
+ self.use_cuda_shm = start_cuda_shm_flag
195
+ self.use_system_shm = start_system_shm_flag
160
196
 
161
197
  @staticmethod
162
198
  def _parse_io_params(io_params: List[Dict]) -> Tuple[List[str], List[np.dtype], List[List[int]], List[str]]:
@@ -212,12 +248,14 @@ class TritonClient(BaseClient):
212
248
  Get old regions names for unregister
213
249
 
214
250
  Args:
215
- regions_statuses (list): responce of get_cuda_shared_memory_status from triton
251
+ regions_statuses (list): responce of get_shared_memory_status from triton
216
252
  new_triton_shm_name (str): name of new region
217
253
 
218
254
  Returns:
219
255
  List[str]: old regions names for unregister
220
256
  """
257
+ if self.max_shm_regions < 1:
258
+ return []
221
259
  i_sep = len(new_triton_shm_name) - new_triton_shm_name[::-1].index('_') - 1
222
260
  region_name = new_triton_shm_name[:i_sep]
223
261
  registrated_regions = [
@@ -231,44 +269,35 @@ class TritonClient(BaseClient):
231
269
  old_regions = [name for name, _ in registrated_regions[:count_old_regions]]
232
270
  return old_regions
233
271
 
234
- def _register_cuda_shm_regions(self):
235
- """
236
- Register CUDA shared memory regions in Triton
272
+ def _create_shm_handle(self, shape: List[int], dtype: np.dtype, name: str) -> ShmHandlerWrapper:
237
273
  """
238
- if self.scheme == "grpc":
239
- regions_statuses = self.triton_client.get_cuda_shared_memory_status(as_json=True)['regions']
240
- else:
241
- regions_statuses = self.triton_client.get_cuda_shared_memory_status()
242
-
243
- for shm_handle in self.input_shm_handles + self.output_shm_handles:
244
- old_regions_names = self._get_old_regions_names(regions_statuses, shm_handle._triton_shm_name)
245
- for old_region_name in old_regions_names:
246
- self.triton_client.unregister_cuda_shared_memory(old_region_name)
247
- self.triton_client.register_cuda_shared_memory(
248
- shm_handle._triton_shm_name, cudashm.get_raw_handle(shm_handle), 0, shm_handle._byte_size
249
- )
250
-
251
- def _create_cuda_shm_handle(self, shape: List[int], dtype: np.dtype, name: str) -> Any:
252
- """
253
- Create CUDA shared memory handle
274
+ Create shared memory handle
254
275
 
255
276
  Args:
256
- shape (List[int]): Shape of cuda shared memory region
277
+ shape (List[int]): Shape of shared memory region
257
278
  dtype (np.dtype): Data type of input/output data
258
279
  name (str): Input/output name
259
280
 
260
281
  Returns:
261
- Any: CUDA shared memory handle
282
+ Any: shared memory handle
262
283
  """
263
284
  byte_size = int(np.prod(shape) * np.dtype(dtype).itemsize)
264
285
  shm_name = self._generate_shm_name(name)
265
- return cudashm.create_shared_memory_region(shm_name, byte_size, 0)
286
+ if self.use_cuda_shm:
287
+ shm_handle = cudashm.create_shared_memory_region(
288
+ shm_name, byte_size, 0
289
+ )
290
+ else:
291
+ shm_handle = shm.create_shared_memory_region(
292
+ shm_name, shm_name, byte_size
293
+ )
294
+ return ShmHandlerWrapper(shm_handle, shm_name, byte_size)
266
295
 
267
- def _create_cuda_shm_handles_for_io(self, shapes: List[List[int]],
296
+ def _create_shm_handles_for_io(self, shapes: List[List[int]],
268
297
  dtypes: List[np.dtype],
269
- names: List[str]) -> List[Any]:
298
+ names: List[str]) -> List[ShmHandlerWrapper]:
270
299
  """
271
- Create CUDA shared memory handles for inputs or outputs
300
+ Create shared memory handles for inputs or outputs
272
301
 
273
302
  Args:
274
303
  shapes (List[List[int]]): Shapes of cuda shared memory regions
@@ -276,24 +305,24 @@ class TritonClient(BaseClient):
276
305
  names (List[str]): Input/output names
277
306
 
278
307
  Returns:
279
- List[Any]: CUDA shared memory handles
308
+ List[ShmHandlerWrapper]: shared memory handles
280
309
  """
281
- return [self._create_cuda_shm_handle(shape, dtype, name)
310
+ return [self._create_shm_handle(shape, dtype, name)
282
311
  for shape, dtype, name in zip(shapes, dtypes, names)]
283
312
 
284
313
  def _create_input_output_shm_handles(self) -> None:
285
314
  """
286
- Create CUDA shared memory handles for inputs and outputs
315
+ Create shared memory handles for inputs and outputs
287
316
  """
288
- self.input_shm_handles = self._create_cuda_shm_handles_for_io(
317
+ self.input_shm_handlers = self._create_shm_handles_for_io(
289
318
  self.inputs_shapes, self.np_inputs_dtypes, self.inputs_names
290
319
  )
291
- self.output_shm_handles = self._create_cuda_shm_handles_for_io(
320
+ self.output_shm_handlers = self._create_shm_handles_for_io(
292
321
  self.outputs_shapes, self.np_outputs_dtypes, self.outputs_names
293
322
  )
294
323
 
295
324
  def _create_triton_input(self, input_data: np.ndarray, input_name: str,
296
- config_input_format: str, shm_handle = None) -> Any:
325
+ config_input_format: str, shm_handler: Optional[ShmHandlerWrapper] = None) -> Any:
297
326
  """
298
327
  Create triton InferInput
299
328
 
@@ -301,27 +330,28 @@ class TritonClient(BaseClient):
301
330
  input_data (np.ndarray): data for send to model
302
331
  input_name (str): name of input
303
332
  config_input_format (str): triton input format
304
- shm_handle (_type_, optional): CUDA shared memory handle. Defaults to None.
333
+ shm_handler (ShmHandlerWrapper, optional): shared memory handler. Defaults to None.
305
334
 
306
335
  Returns:
307
336
  Any: triton InferInput for sending request
308
337
  """
309
338
  infer_input = self.client_module.InferInput(input_name, input_data.shape, config_input_format)
310
- if self.cuda_shm:
311
- cudashm.set_shared_memory_region(shm_handle, [input_data])
312
- infer_input.set_shared_memory(shm_handle._triton_shm_name, shm_handle._byte_size)
339
+ if self.use_cuda_shm or self.use_system_shm:
340
+ shm_utils = cudashm if self.use_cuda_shm else shm
341
+ shm_utils.set_shared_memory_region(shm_handler.handler, [input_data])
342
+ infer_input.set_shared_memory(shm_handler.name, shm_handler.size)
313
343
  else:
314
344
  infer_input.set_data_from_numpy(input_data)
315
345
  return infer_input
316
346
 
317
- def _create_triton_output(self, output_name: str, binary: bool = True, shm_handle = None) -> Any:
347
+ def _create_triton_output(self, output_name: str, binary: bool = True, shm_handler: Optional[ShmHandlerWrapper] = None) -> Any:
318
348
  """
319
349
  Create triton InferRequestedOutput
320
350
 
321
351
  Args:
322
352
  output_name (str): output name
323
353
  binary (bool, optional): Whether the output is binary. Defaults to True.
324
- shm_handle (_type_, optional): CUDA shared memory handle. Defaults to None.
354
+ shm_handler (ShmHandlerWrapper, optional): shared memory handler. Defaults to None.
325
355
 
326
356
  Returns:
327
357
  Any: triton InferRequestedOutput for receiving response
@@ -330,10 +360,39 @@ class TritonClient(BaseClient):
330
360
  infer_output = self.client_module.InferRequestedOutput(output_name)
331
361
  else:
332
362
  infer_output = self.client_module.InferRequestedOutput(output_name, binary_data=binary)
333
- if self.cuda_shm:
334
- infer_output.set_shared_memory(shm_handle._triton_shm_name, shm_handle._byte_size)
363
+ if self.use_cuda_shm or self.use_system_shm:
364
+ infer_output.set_shared_memory(shm_handler.name, shm_handler.size)
335
365
  return infer_output
336
366
 
367
+ def _register_shm_regions(self):
368
+ """
369
+ Register shared memory regions in Triton
370
+ """
371
+ get_shared_memory_status = self.triton_client.get_cuda_shared_memory_status \
372
+ if self.use_cuda_shm else self.triton_client.get_system_shared_memory_status
373
+
374
+ unregister_shared_memory = self.triton_client.unregister_cuda_shared_memory \
375
+ if self.use_cuda_shm else self.triton_client.unregister_system_shared_memory
376
+
377
+ if self.scheme == "grpc":
378
+ regions_statuses = get_shared_memory_status(as_json=True)['regions']
379
+ else:
380
+ regions_statuses = get_shared_memory_status()
381
+
382
+ for shm_handler in self.input_shm_handlers + self.output_shm_handlers:
383
+ old_regions_names = self._get_old_regions_names(regions_statuses, shm_handler.name)
384
+ for old_region_name in old_regions_names:
385
+ unregister_shared_memory(old_region_name)
386
+
387
+ if self.use_cuda_shm:
388
+ self.triton_client.register_cuda_shared_memory(
389
+ shm_handler.name, cudashm.get_raw_handle(shm_handler.handler), 0, shm_handler.size
390
+ )
391
+ else:
392
+ self.triton_client.register_system_shared_memory(
393
+ shm_handler.name, shm_handler.name, shm_handler.size
394
+ )
395
+
337
396
  def _postprocess_triton_result(self, triton_response: Any, padding_size: int) -> Dict[str, np.ndarray]:
338
397
  """
339
398
  Postprocess triton response.
@@ -346,15 +405,17 @@ class TritonClient(BaseClient):
346
405
  Dict[str, np.ndarray]: dict of output name and output data
347
406
  """
348
407
  result = dict()
349
- for output_name, shm_op_handle in zip(self.outputs_names, self.output_shm_handles):
350
- if self.cuda_shm:
408
+ for output_name, shm_op_handle in zip(self.outputs_names, self.output_shm_handlers):
409
+ if self.use_cuda_shm or self.use_system_shm:
351
410
  if self.scheme == "grpc":
352
411
  # output = triton_response.get_output(output_name, as_json=True) # WARN: bug in tritonclient library, return None
353
412
  output = json.loads(MessageToJson(triton_response.get_output(output_name)))
354
413
  else:
355
414
  output = triton_response.get_output(output_name)
356
- result[output_name] = cudashm.get_contents_as_numpy(
357
- shm_op_handle,
415
+
416
+ shm_utils = shm if self.use_system_shm else cudashm
417
+ result[output_name] = shm_utils.get_contents_as_numpy(
418
+ shm_op_handle.handler,
358
419
  utils.triton_to_np_dtype(output["datatype"]),
359
420
  output["shape"],
360
421
  )
@@ -375,17 +436,17 @@ class TritonClient(BaseClient):
375
436
 
376
437
  for i_batch in range(count_batches):
377
438
  triton_inputs = []
378
- for input_name, config_input_format, shm_ip_handle in \
379
- zip(self.inputs_names, self.triton_inputs_dtypes, self.input_shm_handles):
439
+ for input_name, config_input_format, shm_ip_handler in \
440
+ zip(self.inputs_names, self.triton_inputs_dtypes, self.input_shm_handlers):
380
441
  triton_input = self._create_triton_input(
381
- inputs_batches[input_name][i_batch], input_name, config_input_format, shm_ip_handle
442
+ inputs_batches[input_name][i_batch], input_name, config_input_format, shm_ip_handler
382
443
  )
383
444
  triton_inputs.append(triton_input)
384
445
 
385
446
  triton_outputs = []
386
- for output_name, shm_op_handle in zip(self.outputs_names, self.output_shm_handles):
447
+ for output_name, shm_op_handlers in zip(self.outputs_names, self.output_shm_handlers):
387
448
  triton_output = self._create_triton_output(
388
- output_name, binary=True, shm_handle=shm_op_handle
449
+ output_name, binary=True, shm_handler=shm_op_handlers
389
450
  )
390
451
  triton_outputs.append(triton_output)
391
452
 
@@ -413,14 +474,14 @@ class TritonClient(BaseClient):
413
474
  for i_batch in range(count_batches):
414
475
  triton_inputs = []
415
476
  for input_name, config_input_format, shm_ip_handle in \
416
- zip(self.inputs_names, self.triton_inputs_dtypes, self.input_shm_handles):
477
+ zip(self.inputs_names, self.triton_inputs_dtypes, self.input_shm_handlers):
417
478
  triton_input = self._create_triton_input(
418
479
  inputs_batches[input_name][i_batch], input_name, config_input_format, shm_ip_handle
419
480
  )
420
481
  triton_inputs.append(triton_input)
421
482
 
422
483
  triton_outputs = []
423
- for output_name, shm_op_handle in zip(self.outputs_names, self.output_shm_handles):
484
+ for output_name, shm_op_handle in zip(self.outputs_names, self.output_shm_handlers):
424
485
  triton_output = self._create_triton_output(
425
486
  output_name, binary=True, shm_handle=shm_op_handle
426
487
  )
@@ -0,0 +1,113 @@
1
+ Metadata-Version: 2.2
2
+ Name: imb
3
+ Version: 1.0.2
4
+ Summary: Python library for run inference of deep learning models in different backends
5
+ Home-page: https://github.com/TheConstant3/InferenceMultiBackend
6
+ Author: p-constant
7
+ Author-email: nikshorop@gmail.com
8
+ Classifier: Programming Language :: Python :: 3.8
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.8
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: numpy
15
+ Provides-Extra: triton
16
+ Requires-Dist: tritonclient[all]>=2.38.0; extra == "triton"
17
+ Provides-Extra: onnxcpu
18
+ Requires-Dist: onnxruntime>=1.16.0; extra == "onnxcpu"
19
+ Provides-Extra: onnxgpu
20
+ Requires-Dist: onnxruntime-gpu>=1.16.0; extra == "onnxgpu"
21
+ Provides-Extra: all
22
+ Requires-Dist: tritonclient[all]>=2.38.0; extra == "all"
23
+ Requires-Dist: onnxruntime>=1.16.0; extra == "all"
24
+ Requires-Dist: onnxruntime-gpu>=1.16.0; extra == "all"
25
+ Dynamic: author
26
+ Dynamic: author-email
27
+ Dynamic: classifier
28
+ Dynamic: description
29
+ Dynamic: description-content-type
30
+ Dynamic: home-page
31
+ Dynamic: provides-extra
32
+ Dynamic: requires-dist
33
+ Dynamic: requires-python
34
+ Dynamic: summary
35
+
36
+ # InferenceMultiBackend
37
+
38
+ Python library for run inference of deep learning models in different backends
39
+
40
+ ## Installation
41
+
42
+ For use triton inference client:
43
+ ```pip install imb[triton]```
44
+
45
+ For use onnxruntime-gpu client:
46
+ ```pip install imb[onnxgpu]```
47
+
48
+ For use onnxruntime client:
49
+ ```pip install imb[onnxcpu]```
50
+
51
+ For support all implemented clients:
52
+ ```pip install imb[all]```
53
+
54
+ ## Usage
55
+
56
+ OnnxClient usage example
57
+ ```
58
+ from imb.onnx import OnnxClient
59
+
60
+ onnx_client = OnnxClient(
61
+ model_path='model.onnx',
62
+ model_name='any name',
63
+ providers=['CUDAExecutionProvider', 'CPUExecutionProvider'],
64
+ max_batch_size=16,
65
+ return_dict=True,
66
+ fixed_batch=True,
67
+ warmup=True
68
+ )
69
+
70
+ # if model has fixed input size (except batch size) then sample_inputs will be created
71
+ sample_inputs = onnx_client.sample_inputs
72
+ print('inputs shapes', [o.shape for o in sample_inputs])
73
+
74
+ outputs = onnx_client(*sample_inputs)
75
+ print('outputs shapes', [(o_name, o_value.shape) for o_name, o_value in outputs.items()])
76
+ ```
77
+
78
+ TritonClient usage example
79
+ ```
80
+ from imb.triton import TritonClient
81
+
82
+ triton_client = TritonClient(
83
+ url='localhost:8000',
84
+ model_name='arcface',
85
+ max_batch_size=16,
86
+ timeout=10,
87
+ resend_count=10,
88
+ fixed_batch=True,
89
+ is_async=False,
90
+ cuda_shm=False,
91
+ max_shm_regions=2,
92
+ scheme='http',
93
+ return_dict=True,
94
+ warmup=False
95
+ )
96
+
97
+ # if model has fixed input size (except batch size) then sample_inputs will be created
98
+ sample_inputs = triton_client.sample_inputs
99
+ print('inputs shapes', [o.shape for o in sample_inputs])
100
+
101
+ outputs = triton_client(*sample_inputs)
102
+ print('outputs shapes', [(o_name, o_value.shape) for o_name, o_value in outputs.items()])
103
+ ```
104
+
105
+ ## Notes
106
+
107
+ max_batch_size - maximum batch size for inference. If input data larger that max_batch_size, then input data will be splitted to several batches.
108
+
109
+ fixed_batch - if fixed batch is True, then each batch will have fixed size (padding the smallest batch to max_batch_size).
110
+
111
+ warmup - if True, model will run several calls on sample_inputs while initialization.
112
+
113
+ return_dict - if True, __call__ return dict {'output_name1': output_value1, ...}, else [output_value1, ...]
@@ -0,0 +1,9 @@
1
+ imb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ imb/base.py,sha256=oBmiTu4rHgzED5kCxKPvS9e3PhI229Pj5lxuPm7ep6M,5189
3
+ imb/onnx.py,sha256=g3vQBJPeln0YUOQ1X9RjZce8AAi-7SXntLyevOZZdG8,4100
4
+ imb/triton.py,sha256=92d3tvCniWGSnC1UyjkQ5OcXgSbsBnX6T2hoewLal0k,21796
5
+ imb-1.0.2.dist-info/LICENSE,sha256=pAZXnNE2dxxwXFIduGyn1gpvPefJtUYOYZOi3yeGG94,1068
6
+ imb-1.0.2.dist-info/METADATA,sha256=lEzhVDdcdNHZeECQPisnQcZDjueOFP8zuhVTDh4Vi3s,3314
7
+ imb-1.0.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
8
+ imb-1.0.2.dist-info/top_level.txt,sha256=kY8Fp1i_MzTZhuoVhVexG762D8HBd-THfX_lfw4EZmY,4
9
+ imb-1.0.2.dist-info/RECORD,,
@@ -1,2 +0,0 @@
1
- from .onnx import OnnxClient
2
- from .triton import TritonClient
@@ -1,30 +0,0 @@
1
- Metadata-Version: 2.2
2
- Name: imb
3
- Version: 1.0.0
4
- Summary: Python library for run inference of deep learning models in different backends
5
- Home-page: https://github.com/TheConstant3/InferenceMultiBackend
6
- Author: p-constant
7
- Author-email: nikshorop@gmail.com
8
- Classifier: Programming Language :: Python :: 3.8
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.8
12
- Description-Content-Type: text/markdown
13
- License-File: LICENSE
14
- Requires-Dist: onnxruntime-gpu>=1.16.0
15
- Requires-Dist: tritonclient[all]>=2.38.0
16
- Requires-Dist: numpy>=1.19.4
17
- Dynamic: author
18
- Dynamic: author-email
19
- Dynamic: classifier
20
- Dynamic: description
21
- Dynamic: description-content-type
22
- Dynamic: home-page
23
- Dynamic: requires-dist
24
- Dynamic: requires-python
25
- Dynamic: summary
26
-
27
- # InferenceMultiBackend
28
-
29
- Python library for run inference of deep learning models in different backends
30
-
@@ -1,10 +0,0 @@
1
- imb/__init__.py,sha256=8XoaonMp09UWmynubLMIu2bln41iKgIdWj-wxgsQjnk,55
2
- imb/inference_clients/__init__.py,sha256=Glv4yD0QdtZmCOiYFbILSl90VhxdwvPoH9gFczHlVFk,61
3
- imb/inference_clients/base.py,sha256=oBmiTu4rHgzED5kCxKPvS9e3PhI229Pj5lxuPm7ep6M,5189
4
- imb/inference_clients/onnx.py,sha256=g3vQBJPeln0YUOQ1X9RjZce8AAi-7SXntLyevOZZdG8,4100
5
- imb/inference_clients/triton.py,sha256=hdnCtDjoRAl_Ss49_ayvW3-VhsYcY2MbNqh3ax6y-18,18629
6
- imb-1.0.0.dist-info/LICENSE,sha256=pAZXnNE2dxxwXFIduGyn1gpvPefJtUYOYZOi3yeGG94,1068
7
- imb-1.0.0.dist-info/METADATA,sha256=NZcJPx91mzPg4Zo9FZxlMQE4c6zB2s_yPVhhRVxPBzM,898
8
- imb-1.0.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
9
- imb-1.0.0.dist-info/top_level.txt,sha256=kY8Fp1i_MzTZhuoVhVexG762D8HBd-THfX_lfw4EZmY,4
10
- imb-1.0.0.dist-info/RECORD,,
File without changes
File without changes
File without changes
File without changes