PyPI - imb - Versions diffs - 1.0.1__tar.gz → 1.0.3__tar.gz - Mend

imb 1.0.1tar.gz → 1.0.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

{imb-1.0.1 → imb-1.0.3}/PKG-INFO +14 -20
{imb-1.0.1 → imb-1.0.3}/README.md +8 -0
{imb-1.0.1 → imb-1.0.3}/imb/triton.py +122 -69
{imb-1.0.1 → imb-1.0.3}/imb.egg-info/PKG-INFO +14 -20
{imb-1.0.1 → imb-1.0.3}/setup.py +1 -1
{imb-1.0.1 → imb-1.0.3}/LICENSE +0 -0
{imb-1.0.1 → imb-1.0.3}/imb/__init__.py +0 -0
{imb-1.0.1 → imb-1.0.3}/imb/base.py +0 -0
{imb-1.0.1 → imb-1.0.3}/imb/onnx.py +0 -0
{imb-1.0.1 → imb-1.0.3}/imb.egg-info/SOURCES.txt +0 -0
{imb-1.0.1 → imb-1.0.3}/imb.egg-info/dependency_links.txt +0 -0
{imb-1.0.1 → imb-1.0.3}/imb.egg-info/requires.txt +0 -0
{imb-1.0.1 → imb-1.0.3}/imb.egg-info/top_level.txt +0 -0
{imb-1.0.1 → imb-1.0.3}/setup.cfg +0 -0

{imb-1.0.1 → imb-1.0.3}/PKG-INFO RENAMED Viewed

@@ -1,37 +1,22 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.1
 Name: imb
-Version: 1.0.1
+Version: 1.0.3
 Summary: Python library for run inference of deep learning models in different backends
 Home-page: https://github.com/TheConstant3/InferenceMultiBackend
 Author: p-constant
 Author-email: nikshorop@gmail.com
+License: UNKNOWN
+Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3.8
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: OS Independent
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
-License-File: LICENSE
-Requires-Dist: numpy
 Provides-Extra: triton
-Requires-Dist: tritonclient[all]>=2.38.0; extra == "triton"
 Provides-Extra: onnxcpu
-Requires-Dist: onnxruntime>=1.16.0; extra == "onnxcpu"
 Provides-Extra: onnxgpu
-Requires-Dist: onnxruntime-gpu>=1.16.0; extra == "onnxgpu"
 Provides-Extra: all
-Requires-Dist: tritonclient[all]>=2.38.0; extra == "all"
-Requires-Dist: onnxruntime>=1.16.0; extra == "all"
-Requires-Dist: onnxruntime-gpu>=1.16.0; extra == "all"
-Dynamic: author
-Dynamic: author-email
-Dynamic: classifier
-Dynamic: description
-Dynamic: description-content-type
-Dynamic: home-page
-Dynamic: provides-extra
-Dynamic: requires-dist
-Dynamic: requires-python
-Dynamic: summary
+License-File: LICENSE
 # InferenceMultiBackend
@@ -55,6 +40,8 @@ For support all implemented clients:
 OnnxClient usage example
 ```
+from imb.onnx import OnnxClient
 onnx_client = OnnxClient(
     model_path='model.onnx',
     model_name='any name',
@@ -64,15 +51,19 @@ onnx_client = OnnxClient(
     fixed_batch=True,
     warmup=True
 )
 # if model has fixed input size (except batch size) then sample_inputs will be created
 sample_inputs = onnx_client.sample_inputs
 print('inputs shapes', [o.shape for o in sample_inputs])
 outputs = onnx_client(*sample_inputs)
 print('outputs shapes', [(o_name, o_value.shape) for o_name, o_value in outputs.items()])
 ```
 TritonClient usage example
 ```
+from imb.triton import TritonClient
 triton_client = TritonClient(
     url='localhost:8000',
     model_name='arcface',
@@ -87,9 +78,11 @@ triton_client = TritonClient(
     return_dict=True,
     warmup=False
 )
 # if model has fixed input size (except batch size) then sample_inputs will be created
 sample_inputs = triton_client.sample_inputs
 print('inputs shapes', [o.shape for o in sample_inputs])
 outputs = triton_client(*sample_inputs)
 print('outputs shapes', [(o_name, o_value.shape) for o_name, o_value in outputs.items()])
 ```
@@ -103,3 +96,4 @@ fixed_batch - if fixed batch is True, then each batch will have fixed size (padd
 warmup - if True, model will run several calls on sample_inputs while initialization.
 return_dict - if True, __call__ return dict {'output_name1': output_value1, ...}, else [output_value1, ...]

{imb-1.0.1 → imb-1.0.3}/README.md RENAMED Viewed

@@ -20,6 +20,8 @@ For support all implemented clients:
 OnnxClient usage example
 ```
+from imb.onnx import OnnxClient
 onnx_client = OnnxClient(
     model_path='model.onnx',
     model_name='any name',
@@ -29,15 +31,19 @@ onnx_client = OnnxClient(
     fixed_batch=True,
     warmup=True
 )
 # if model has fixed input size (except batch size) then sample_inputs will be created
 sample_inputs = onnx_client.sample_inputs
 print('inputs shapes', [o.shape for o in sample_inputs])
 outputs = onnx_client(*sample_inputs)
 print('outputs shapes', [(o_name, o_value.shape) for o_name, o_value in outputs.items()])
 ```
 TritonClient usage example
 ```
+from imb.triton import TritonClient
 triton_client = TritonClient(
     url='localhost:8000',
     model_name='arcface',
@@ -52,9 +58,11 @@ triton_client = TritonClient(
     return_dict=True,
     warmup=False
 )
 # if model has fixed input size (except batch size) then sample_inputs will be created
 sample_inputs = triton_client.sample_inputs
 print('inputs shapes', [o.shape for o in sample_inputs])
 outputs = triton_client(*sample_inputs)
 print('outputs shapes', [(o_name, o_value.shape) for o_name, o_value in outputs.items()])
 ```

{imb-1.0.1 → imb-1.0.3}/imb/triton.py RENAMED Viewed

@@ -3,6 +3,7 @@ from typing import Any, Dict, List, Literal, Optional, Tuple
 import tritonclient.http as httpclient
 import tritonclient.grpc as grpcclient
 import tritonclient.utils.cuda_shared_memory as cudashm
+import tritonclient.utils.shared_memory as shm
 from google.protobuf.json_format import MessageToJson
 from tritonclient import utils
 from .base import BaseClient
@@ -11,30 +12,53 @@ import json
 import time
+class ShmHandlerWrapper:
+    def __init__(self, handler: Any, name: str, size: int):
+        self.handler = handler
+        self.name = name
+        self.size = size
 class TritonClient(BaseClient):
     def __init__(self, url: str,
                  model_name: str,
                  max_batch_size: int = 0,
                  sample_inputs: Optional[List[np.ndarray]] = None,
-                 timeout: int = 10,
-                 resend_count: int = 10,
                  fixed_batch: bool = True,
                  is_async: bool = False,
-                 cuda_shm: bool = False,
-                 max_shm_regions: int = 2,
+                 use_cuda_shm: bool = False,
+                 use_system_shm: bool = False,
+                 max_shm_regions: int = 0,
                  scheme: Literal["http", "grpc"] = "http",
                  return_dict: bool = True,
                  warmup: bool = False
                  ):
+        """_summary_
+        Args:
+            url (str): url of the triton server
+            model_name (str): name of the model endpoint
+            max_batch_size (int, optional): max batch size. Defaults to 0 (get value from triton config).
+            sample_inputs (Optional[List[np.ndarray]], optional): samples for warmup. Defaults to None (zeros array).
+            fixed_batch (bool, optional): use fixed batch size, using padding for smaller batch. Defaults to True.
+            is_async (bool, optional): async inference. Defaults to False.
+            use_cuda_shm (bool, optional): use cuda shared memory. Defaults to False.
+            use_system_shm (bool, optional): use system shared memory. Defaults to False.
+            max_shm_regions (int, optional): max clients for shared memory. Will unregister old regions. Defaults to 0.
+            scheme (Literal["http", "grpc"], optional): scheme for triton client. Defaults to "http".
+            return_dict (bool, optional): return dict or list of values. Defaults to True.
+            warmup (bool, optional): warmup model. Defaults to False.
+        """
         super().__init__()
+        assert not (use_cuda_shm and use_system_shm), 'shm and cuda_shm are mutually exclusive'
         self.model_name = model_name
         self.scheme = scheme
         self.client_module = httpclient if scheme == "http" else grpcclient
         self.url = url
         self.is_async = is_async
-        self.cuda_shm = cuda_shm
-        self.triton_timeout = timeout
-        self.resend_count = resend_count
+        self.use_cuda_shm = use_cuda_shm
+        self.use_system_shm = use_system_shm
         self.max_shm_regions = max_shm_regions
         self.return_dict = return_dict
@@ -57,14 +81,16 @@ class TritonClient(BaseClient):
         if warmup:
             self.warmup_model()
-        self.input_shm_handles = [None for _ in range(len(self.inputs_names))]
-        self.output_shm_handles = [None for _ in range(len(self.outputs_names))]
+        self.input_shm_handlers: List[Optional[ShmHandlerWrapper]] = \
+            [None for _ in range(len(self.inputs_names))]
+        self.output_shm_handlers: List[Optional[ShmHandlerWrapper]] = \
+            [None for _ in range(len(self.outputs_names))]
-        if self.cuda_shm:
+        if self.use_cuda_shm or self.use_system_shm:
             assert is_async == False and fixed_batch == True
             self._fill_output_dynamic_axis()
             self._create_input_output_shm_handles()
-            self._register_cuda_shm_regions()
+            self._register_shm_regions()
     def io_summary(self):
         return {
@@ -84,7 +110,8 @@ class TritonClient(BaseClient):
             "fixed_batch": self.fixed_batch,
             "async": self.is_async,
-            "cuda_shm": self.cuda_shm,
+            "cuda_shm": self.use_cuda_shm,
+            "shm": self.use_system_shm,
             "max_shm_regions": self.max_shm_regions,
         }
@@ -97,9 +124,7 @@ class TritonClient(BaseClient):
         self.triton_client = self.client_module.InferenceServerClient(
                                 url=self.url,
                                 verbose=False,
-                                ssl=False,
-                                network_timeout=self.triton_timeout,
-                                connection_timeout=self.triton_timeout
+                                ssl=False
                             )
     def _load_model_params(self, user_max_batch_size: int) -> None:
@@ -150,13 +175,16 @@ class TritonClient(BaseClient):
                 -1 in output_shape for output_shape in self.outputs_shapes
             )
         if has_dynamic_shapes:
-            start_cuda_shm_flag = self.cuda_shm
-            self.cuda_shm = False
+            start_cuda_shm_flag = self.use_cuda_shm
+            start_system_shm_flag = self.use_system_shm
+            self.use_cuda_shm = False
+            self.use_system_shm = False
             outputs = self.forward(*self.sample_inputs)
             self.outputs_shapes = [
                 list(outputs[output_name].shape) for output_name in self.outputs_names
                 ]
-            self.cuda_shm = start_cuda_shm_flag
+            self.use_cuda_shm = start_cuda_shm_flag
+            self.use_system_shm = start_system_shm_flag
     @staticmethod
     def _parse_io_params(io_params: List[Dict]) -> Tuple[List[str], List[np.dtype], List[List[int]], List[str]]:
@@ -212,12 +240,14 @@ class TritonClient(BaseClient):
         Get old regions names for unregister
         Args:
-            regions_statuses (list): responce of get_cuda_shared_memory_status from triton
+            regions_statuses (list): responce of get_shared_memory_status from triton
             new_triton_shm_name (str): name of new region
         Returns:
             List[str]: old regions names for unregister
         """
+        if self.max_shm_regions < 1:
+            return []
         i_sep = len(new_triton_shm_name) - new_triton_shm_name[::-1].index('_') - 1
         region_name = new_triton_shm_name[:i_sep]
         registrated_regions = [
@@ -231,44 +261,35 @@ class TritonClient(BaseClient):
             old_regions = [name for name, _ in registrated_regions[:count_old_regions]]
         return old_regions
-    def _register_cuda_shm_regions(self):
-        """
-        Register CUDA shared memory regions in Triton
+    def _create_shm_handle(self, shape: List[int], dtype: np.dtype, name: str) -> ShmHandlerWrapper:
         """
-        if self.scheme == "grpc":
-            regions_statuses = self.triton_client.get_cuda_shared_memory_status(as_json=True)['regions']
-        else:
-            regions_statuses = self.triton_client.get_cuda_shared_memory_status()
-        for shm_handle in self.input_shm_handles + self.output_shm_handles:
-            old_regions_names = self._get_old_regions_names(regions_statuses, shm_handle._triton_shm_name)
-            for old_region_name in old_regions_names:
-                self.triton_client.unregister_cuda_shared_memory(old_region_name)
-            self.triton_client.register_cuda_shared_memory(
-                shm_handle._triton_shm_name, cudashm.get_raw_handle(shm_handle), 0, shm_handle._byte_size
-            )
-    def _create_cuda_shm_handle(self, shape: List[int], dtype: np.dtype, name: str) -> Any:
-        """
-        Create CUDA shared memory handle
+        Create shared memory handle
         Args:
-            shape (List[int]): Shape of cuda shared memory region
+            shape (List[int]): Shape of shared memory region
             dtype (np.dtype): Data type of input/output data
             name (str): Input/output name
         Returns:
-            Any: CUDA shared memory handle
+            Any: shared memory handle
         """
         byte_size = int(np.prod(shape) * np.dtype(dtype).itemsize)
         shm_name = self._generate_shm_name(name)
-        return cudashm.create_shared_memory_region(shm_name, byte_size, 0)
+        if self.use_cuda_shm:
+            shm_handle = cudashm.create_shared_memory_region(
+                shm_name, byte_size, 0
+                )
+        else:
+            shm_handle = shm.create_shared_memory_region(
+                shm_name, shm_name, byte_size
+                )
+        return ShmHandlerWrapper(shm_handle, shm_name, byte_size)
-    def _create_cuda_shm_handles_for_io(self, shapes: List[List[int]],
+    def _create_shm_handles_for_io(self, shapes: List[List[int]],
                                         dtypes: List[np.dtype],
-                                        names: List[str]) -> List[Any]:
+                                        names: List[str]) -> List[ShmHandlerWrapper]:
         """
-        Create CUDA shared memory handles for inputs or outputs
+        Create shared memory handles for inputs or outputs
         Args:
             shapes (List[List[int]]): Shapes of cuda shared memory regions
@@ -276,24 +297,24 @@ class TritonClient(BaseClient):
             names (List[str]): Input/output names
         Returns:
-            List[Any]: CUDA shared memory handles
+            List[ShmHandlerWrapper]: shared memory handles
         """
-        return [self._create_cuda_shm_handle(shape, dtype, name)
+        return [self._create_shm_handle(shape, dtype, name)
                 for shape, dtype, name in zip(shapes, dtypes, names)]
     def _create_input_output_shm_handles(self) -> None:
         """
-        Create CUDA shared memory handles for inputs and outputs
+        Create shared memory handles for inputs and outputs
         """
-        self.input_shm_handles = self._create_cuda_shm_handles_for_io(
+        self.input_shm_handlers = self._create_shm_handles_for_io(
             self.inputs_shapes, self.np_inputs_dtypes, self.inputs_names
         )
-        self.output_shm_handles = self._create_cuda_shm_handles_for_io(
+        self.output_shm_handlers = self._create_shm_handles_for_io(
             self.outputs_shapes, self.np_outputs_dtypes, self.outputs_names
         )
     def _create_triton_input(self, input_data: np.ndarray, input_name: str,
-                             config_input_format: str, shm_handle = None) -> Any:
+                             config_input_format: str, shm_handler: Optional[ShmHandlerWrapper] = None) -> Any:
         """
         Create triton InferInput
@@ -301,27 +322,28 @@ class TritonClient(BaseClient):
             input_data (np.ndarray): data for send to model
             input_name (str): name of input
             config_input_format (str): triton input format
-            shm_handle (_type_, optional): CUDA shared memory handle. Defaults to None.
+            shm_handler (ShmHandlerWrapper, optional): shared memory handler. Defaults to None.
         Returns:
             Any: triton InferInput for sending request
         """
         infer_input = self.client_module.InferInput(input_name, input_data.shape, config_input_format)
-        if self.cuda_shm:
-            cudashm.set_shared_memory_region(shm_handle, [input_data])
-            infer_input.set_shared_memory(shm_handle._triton_shm_name, shm_handle._byte_size)
+        if self.use_cuda_shm or self.use_system_shm:
+            shm_utils = cudashm if self.use_cuda_shm else shm
+            shm_utils.set_shared_memory_region(shm_handler.handler, [input_data])
+            infer_input.set_shared_memory(shm_handler.name, shm_handler.size)
         else:
             infer_input.set_data_from_numpy(input_data)
         return infer_input
-    def _create_triton_output(self, output_name: str, binary: bool = True, shm_handle = None) -> Any:
+    def _create_triton_output(self, output_name: str, binary: bool = True, shm_handler: Optional[ShmHandlerWrapper] = None) -> Any:
         """
         Create triton InferRequestedOutput
         Args:
             output_name (str): output name
             binary (bool, optional): Whether the output is binary. Defaults to True.
-            shm_handle (_type_, optional): CUDA shared memory handle. Defaults to None.
+            shm_handler (ShmHandlerWrapper, optional): shared memory handler. Defaults to None.
         Returns:
             Any: triton InferRequestedOutput for receiving response
@@ -330,10 +352,39 @@ class TritonClient(BaseClient):
             infer_output = self.client_module.InferRequestedOutput(output_name)
         else:
             infer_output = self.client_module.InferRequestedOutput(output_name, binary_data=binary)
-        if self.cuda_shm:
-            infer_output.set_shared_memory(shm_handle._triton_shm_name, shm_handle._byte_size)
+        if self.use_cuda_shm or self.use_system_shm:
+            infer_output.set_shared_memory(shm_handler.name, shm_handler.size)
         return infer_output
+    def _register_shm_regions(self):
+        """
+        Register shared memory regions in Triton
+        """
+        get_shared_memory_status = self.triton_client.get_cuda_shared_memory_status \
+            if self.use_cuda_shm else self.triton_client.get_system_shared_memory_status
+        unregister_shared_memory = self.triton_client.unregister_cuda_shared_memory \
+            if self.use_cuda_shm else self.triton_client.unregister_system_shared_memory
+        if self.scheme == "grpc":
+            regions_statuses = get_shared_memory_status(as_json=True)['regions']
+        else:
+            regions_statuses = get_shared_memory_status()
+        for shm_handler in self.input_shm_handlers + self.output_shm_handlers:
+            old_regions_names = self._get_old_regions_names(regions_statuses, shm_handler.name)
+            for old_region_name in old_regions_names:
+                unregister_shared_memory(old_region_name)
+            if self.use_cuda_shm:
+                self.triton_client.register_cuda_shared_memory(
+                    shm_handler.name, cudashm.get_raw_handle(shm_handler.handler), 0, shm_handler.size
+                )
+            else:
+                self.triton_client.register_system_shared_memory(
+                    shm_handler.name, shm_handler.name, shm_handler.size
+                )
     def _postprocess_triton_result(self, triton_response: Any, padding_size: int) -> Dict[str, np.ndarray]:
         """
         Postprocess triton response.
@@ -346,15 +397,17 @@ class TritonClient(BaseClient):
             Dict[str, np.ndarray]: dict of output name and output data
         """
         result = dict()
-        for output_name, shm_op_handle in zip(self.outputs_names, self.output_shm_handles):
-            if self.cuda_shm:
+        for output_name, shm_op_handle in zip(self.outputs_names, self.output_shm_handlers):
+            if self.use_cuda_shm or self.use_system_shm:
                 if self.scheme == "grpc":
                     # output = triton_response.get_output(output_name, as_json=True) # WARN: bug in tritonclient library, return None
                     output = json.loads(MessageToJson(triton_response.get_output(output_name)))
                 else:
                     output = triton_response.get_output(output_name)
-                result[output_name] = cudashm.get_contents_as_numpy(
-                    shm_op_handle,
+                shm_utils = shm if self.use_system_shm else cudashm
+                result[output_name] = shm_utils.get_contents_as_numpy(
+                    shm_op_handle.handler,
                     utils.triton_to_np_dtype(output["datatype"]),
                     output["shape"],
                 )
@@ -375,17 +428,17 @@ class TritonClient(BaseClient):
         for i_batch in range(count_batches):
             triton_inputs = []
-            for input_name, config_input_format, shm_ip_handle in \
-                    zip(self.inputs_names, self.triton_inputs_dtypes, self.input_shm_handles):
+            for input_name, config_input_format, shm_ip_handler in \
+                    zip(self.inputs_names, self.triton_inputs_dtypes, self.input_shm_handlers):
                 triton_input = self._create_triton_input(
-                    inputs_batches[input_name][i_batch], input_name, config_input_format, shm_ip_handle
+                    inputs_batches[input_name][i_batch], input_name, config_input_format, shm_ip_handler
                     )
                 triton_inputs.append(triton_input)
             triton_outputs = []
-            for output_name, shm_op_handle in zip(self.outputs_names, self.output_shm_handles):
+            for output_name, shm_op_handlers in zip(self.outputs_names, self.output_shm_handlers):
                 triton_output = self._create_triton_output(
-                    output_name, binary=True, shm_handle=shm_op_handle
+                    output_name, binary=True, shm_handler=shm_op_handlers
                     )
                 triton_outputs.append(triton_output)
@@ -413,14 +466,14 @@ class TritonClient(BaseClient):
         for i_batch in range(count_batches):
             triton_inputs = []
             for input_name, config_input_format, shm_ip_handle in \
-                    zip(self.inputs_names, self.triton_inputs_dtypes, self.input_shm_handles):
+                    zip(self.inputs_names, self.triton_inputs_dtypes, self.input_shm_handlers):
                 triton_input = self._create_triton_input(
                     inputs_batches[input_name][i_batch], input_name, config_input_format, shm_ip_handle
                     )
                 triton_inputs.append(triton_input)
             triton_outputs = []
-            for output_name, shm_op_handle in zip(self.outputs_names, self.output_shm_handles):
+            for output_name, shm_op_handle in zip(self.outputs_names, self.output_shm_handlers):
                 triton_output = self._create_triton_output(
                     output_name, binary=True, shm_handle=shm_op_handle
                     )

{imb-1.0.1 → imb-1.0.3}/imb.egg-info/PKG-INFO RENAMED Viewed

@@ -1,37 +1,22 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.1
 Name: imb
-Version: 1.0.1
+Version: 1.0.3
 Summary: Python library for run inference of deep learning models in different backends
 Home-page: https://github.com/TheConstant3/InferenceMultiBackend
 Author: p-constant
 Author-email: nikshorop@gmail.com
+License: UNKNOWN
+Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3.8
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: OS Independent
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
-License-File: LICENSE
-Requires-Dist: numpy
 Provides-Extra: triton
-Requires-Dist: tritonclient[all]>=2.38.0; extra == "triton"
 Provides-Extra: onnxcpu
-Requires-Dist: onnxruntime>=1.16.0; extra == "onnxcpu"
 Provides-Extra: onnxgpu
-Requires-Dist: onnxruntime-gpu>=1.16.0; extra == "onnxgpu"
 Provides-Extra: all
-Requires-Dist: tritonclient[all]>=2.38.0; extra == "all"
-Requires-Dist: onnxruntime>=1.16.0; extra == "all"
-Requires-Dist: onnxruntime-gpu>=1.16.0; extra == "all"
-Dynamic: author
-Dynamic: author-email
-Dynamic: classifier
-Dynamic: description
-Dynamic: description-content-type
-Dynamic: home-page
-Dynamic: provides-extra
-Dynamic: requires-dist
-Dynamic: requires-python
-Dynamic: summary
+License-File: LICENSE
 # InferenceMultiBackend
@@ -55,6 +40,8 @@ For support all implemented clients:
 OnnxClient usage example
 ```
+from imb.onnx import OnnxClient
 onnx_client = OnnxClient(
     model_path='model.onnx',
     model_name='any name',
@@ -64,15 +51,19 @@ onnx_client = OnnxClient(
     fixed_batch=True,
     warmup=True
 )
 # if model has fixed input size (except batch size) then sample_inputs will be created
 sample_inputs = onnx_client.sample_inputs
 print('inputs shapes', [o.shape for o in sample_inputs])
 outputs = onnx_client(*sample_inputs)
 print('outputs shapes', [(o_name, o_value.shape) for o_name, o_value in outputs.items()])
 ```
 TritonClient usage example
 ```
+from imb.triton import TritonClient
 triton_client = TritonClient(
     url='localhost:8000',
     model_name='arcface',
@@ -87,9 +78,11 @@ triton_client = TritonClient(
     return_dict=True,
     warmup=False
 )
 # if model has fixed input size (except batch size) then sample_inputs will be created
 sample_inputs = triton_client.sample_inputs
 print('inputs shapes', [o.shape for o in sample_inputs])
 outputs = triton_client(*sample_inputs)
 print('outputs shapes', [(o_name, o_value.shape) for o_name, o_value in outputs.items()])
 ```
@@ -103,3 +96,4 @@ fixed_batch - if fixed batch is True, then each batch will have fixed size (padd
 warmup - if True, model will run several calls on sample_inputs while initialization.
 return_dict - if True, __call__ return dict {'output_name1': output_value1, ...}, else [output_value1, ...]

{imb-1.0.1 → imb-1.0.3}/setup.py RENAMED Viewed

@@ -19,7 +19,7 @@ extras_require["all"] = list(chain(extras_require.values()))
 setup(
     name='imb',
-    version='1.0.1',
+    version='1.0.3',
     author='p-constant',
     author_email='nikshorop@gmail.com',
     description='Python library for run inference of deep learning models in different backends',