PyPI - inference-models - Versions diffs - 0.18.3__py3-none-any.whl - Mend

inference-models 0.18.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (195) hide show

inference_models/__init__.py +36 -0
inference_models/configuration.py +72 -0
inference_models/constants.py +2 -0
inference_models/entities.py +5 -0
inference_models/errors.py +137 -0
inference_models/logger.py +52 -0
inference_models/model_pipelines/__init__.py +0 -0
inference_models/model_pipelines/auto_loaders/__init__.py +0 -0
inference_models/model_pipelines/auto_loaders/core.py +120 -0
inference_models/model_pipelines/auto_loaders/pipelines_registry.py +36 -0
inference_models/model_pipelines/face_and_gaze_detection/__init__.py +0 -0
inference_models/model_pipelines/face_and_gaze_detection/mediapipe_l2cs.py +200 -0
inference_models/models/__init__.py +0 -0
inference_models/models/auto_loaders/__init__.py +0 -0
inference_models/models/auto_loaders/access_manager.py +168 -0
inference_models/models/auto_loaders/auto_negotiation.py +1329 -0
inference_models/models/auto_loaders/auto_resolution_cache.py +129 -0
inference_models/models/auto_loaders/constants.py +7 -0
inference_models/models/auto_loaders/core.py +1341 -0
inference_models/models/auto_loaders/dependency_models.py +52 -0
inference_models/models/auto_loaders/entities.py +57 -0
inference_models/models/auto_loaders/models_registry.py +497 -0
inference_models/models/auto_loaders/presentation_utils.py +333 -0
inference_models/models/auto_loaders/ranking.py +413 -0
inference_models/models/auto_loaders/utils.py +31 -0
inference_models/models/base/__init__.py +0 -0
inference_models/models/base/classification.py +123 -0
inference_models/models/base/depth_estimation.py +62 -0
inference_models/models/base/documents_parsing.py +111 -0
inference_models/models/base/embeddings.py +66 -0
inference_models/models/base/instance_segmentation.py +87 -0
inference_models/models/base/keypoints_detection.py +93 -0
inference_models/models/base/object_detection.py +143 -0
inference_models/models/base/semantic_segmentation.py +74 -0
inference_models/models/base/types.py +5 -0
inference_models/models/clip/__init__.py +0 -0
inference_models/models/clip/clip_onnx.py +148 -0
inference_models/models/clip/clip_pytorch.py +104 -0
inference_models/models/clip/preprocessing.py +162 -0
inference_models/models/common/__init__.py +0 -0
inference_models/models/common/cuda.py +30 -0
inference_models/models/common/model_packages.py +25 -0
inference_models/models/common/onnx.py +379 -0
inference_models/models/common/roboflow/__init__.py +0 -0
inference_models/models/common/roboflow/model_packages.py +361 -0
inference_models/models/common/roboflow/post_processing.py +436 -0
inference_models/models/common/roboflow/pre_processing.py +1332 -0
inference_models/models/common/torch.py +20 -0
inference_models/models/common/trt.py +266 -0
inference_models/models/deep_lab_v3_plus/__init__.py +0 -0
inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_onnx.py +282 -0
inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_torch.py +264 -0
inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_trt.py +313 -0
inference_models/models/depth_anything_v2/__init__.py +0 -0
inference_models/models/depth_anything_v2/depth_anything_v2_hf.py +77 -0
inference_models/models/dinov3/__init__.py +0 -0
inference_models/models/dinov3/dinov3_classification_onnx.py +348 -0
inference_models/models/dinov3/dinov3_classification_torch.py +323 -0
inference_models/models/doctr/__init__.py +0 -0
inference_models/models/doctr/doctr_torch.py +304 -0
inference_models/models/easy_ocr/__init__.py +0 -0
inference_models/models/easy_ocr/easy_ocr_torch.py +222 -0
inference_models/models/florence2/__init__.py +0 -0
inference_models/models/florence2/florence2_hf.py +897 -0
inference_models/models/grounding_dino/__init__.py +0 -0
inference_models/models/grounding_dino/grounding_dino_torch.py +227 -0
inference_models/models/l2cs/__init__.py +0 -0
inference_models/models/l2cs/l2cs_onnx.py +216 -0
inference_models/models/mediapipe_face_detection/__init__.py +0 -0
inference_models/models/mediapipe_face_detection/face_detection.py +203 -0
inference_models/models/moondream2/__init__.py +0 -0
inference_models/models/moondream2/moondream2_hf.py +281 -0
inference_models/models/owlv2/__init__.py +0 -0
inference_models/models/owlv2/cache.py +182 -0
inference_models/models/owlv2/entities.py +112 -0
inference_models/models/owlv2/owlv2_hf.py +695 -0
inference_models/models/owlv2/reference_dataset.py +291 -0
inference_models/models/paligemma/__init__.py +0 -0
inference_models/models/paligemma/paligemma_hf.py +209 -0
inference_models/models/perception_encoder/__init__.py +0 -0
inference_models/models/perception_encoder/perception_encoder_pytorch.py +197 -0
inference_models/models/perception_encoder/vision_encoder/__init__.py +0 -0
inference_models/models/perception_encoder/vision_encoder/config.py +160 -0
inference_models/models/perception_encoder/vision_encoder/pe.py +742 -0
inference_models/models/perception_encoder/vision_encoder/rope.py +344 -0
inference_models/models/perception_encoder/vision_encoder/tokenizer.py +342 -0
inference_models/models/perception_encoder/vision_encoder/transforms.py +33 -0
inference_models/models/qwen25vl/__init__.py +1 -0
inference_models/models/qwen25vl/qwen25vl_hf.py +285 -0
inference_models/models/resnet/__init__.py +0 -0
inference_models/models/resnet/resnet_classification_onnx.py +330 -0
inference_models/models/resnet/resnet_classification_torch.py +305 -0
inference_models/models/resnet/resnet_classification_trt.py +369 -0
inference_models/models/rfdetr/__init__.py +0 -0
inference_models/models/rfdetr/backbone_builder.py +101 -0
inference_models/models/rfdetr/class_remapping.py +41 -0
inference_models/models/rfdetr/common.py +115 -0
inference_models/models/rfdetr/default_labels.py +108 -0
inference_models/models/rfdetr/dinov2_with_windowed_attn.py +1330 -0
inference_models/models/rfdetr/misc.py +26 -0
inference_models/models/rfdetr/ms_deform_attn.py +180 -0
inference_models/models/rfdetr/ms_deform_attn_func.py +60 -0
inference_models/models/rfdetr/position_encoding.py +166 -0
inference_models/models/rfdetr/post_processor.py +83 -0
inference_models/models/rfdetr/projector.py +373 -0
inference_models/models/rfdetr/rfdetr_backbone_pytorch.py +394 -0
inference_models/models/rfdetr/rfdetr_base_pytorch.py +807 -0
inference_models/models/rfdetr/rfdetr_instance_segmentation_onnx.py +206 -0
inference_models/models/rfdetr/rfdetr_instance_segmentation_pytorch.py +373 -0
inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py +227 -0
inference_models/models/rfdetr/rfdetr_object_detection_onnx.py +244 -0
inference_models/models/rfdetr/rfdetr_object_detection_pytorch.py +470 -0
inference_models/models/rfdetr/rfdetr_object_detection_trt.py +270 -0
inference_models/models/rfdetr/segmentation_head.py +273 -0
inference_models/models/rfdetr/transformer.py +767 -0
inference_models/models/roboflow_instant/__init__.py +0 -0
inference_models/models/roboflow_instant/roboflow_instant_hf.py +141 -0
inference_models/models/sam/__init__.py +0 -0
inference_models/models/sam/cache.py +147 -0
inference_models/models/sam/entities.py +25 -0
inference_models/models/sam/sam_torch.py +675 -0
inference_models/models/sam2/__init__.py +0 -0
inference_models/models/sam2/cache.py +162 -0
inference_models/models/sam2/entities.py +43 -0
inference_models/models/sam2/sam2_torch.py +905 -0
inference_models/models/sam2_rt/__init__.py +0 -0
inference_models/models/sam2_rt/sam2_pytorch.py +119 -0
inference_models/models/smolvlm/__init__.py +0 -0
inference_models/models/smolvlm/smolvlm_hf.py +245 -0
inference_models/models/trocr/__init__.py +0 -0
inference_models/models/trocr/trocr_hf.py +53 -0
inference_models/models/vit/__init__.py +0 -0
inference_models/models/vit/vit_classification_huggingface.py +319 -0
inference_models/models/vit/vit_classification_onnx.py +326 -0
inference_models/models/vit/vit_classification_trt.py +365 -0
inference_models/models/yolact/__init__.py +1 -0
inference_models/models/yolact/yolact_instance_segmentation_onnx.py +336 -0
inference_models/models/yolact/yolact_instance_segmentation_trt.py +361 -0
inference_models/models/yolo_world/__init__.py +1 -0
inference_models/models/yolonas/__init__.py +0 -0
inference_models/models/yolonas/nms.py +44 -0
inference_models/models/yolonas/yolonas_object_detection_onnx.py +204 -0
inference_models/models/yolonas/yolonas_object_detection_trt.py +230 -0
inference_models/models/yolov10/__init__.py +0 -0
inference_models/models/yolov10/yolov10_object_detection_onnx.py +187 -0
inference_models/models/yolov10/yolov10_object_detection_trt.py +215 -0
inference_models/models/yolov11/__init__.py +0 -0
inference_models/models/yolov11/yolov11_onnx.py +28 -0
inference_models/models/yolov11/yolov11_torch_script.py +25 -0
inference_models/models/yolov11/yolov11_trt.py +21 -0
inference_models/models/yolov12/__init__.py +0 -0
inference_models/models/yolov12/yolov12_onnx.py +7 -0
inference_models/models/yolov12/yolov12_torch_script.py +7 -0
inference_models/models/yolov12/yolov12_trt.py +7 -0
inference_models/models/yolov5/__init__.py +0 -0
inference_models/models/yolov5/nms.py +99 -0
inference_models/models/yolov5/yolov5_instance_segmentation_onnx.py +225 -0
inference_models/models/yolov5/yolov5_instance_segmentation_trt.py +255 -0
inference_models/models/yolov5/yolov5_object_detection_onnx.py +192 -0
inference_models/models/yolov5/yolov5_object_detection_trt.py +218 -0
inference_models/models/yolov7/__init__.py +0 -0
inference_models/models/yolov7/yolov7_instance_segmentation_onnx.py +226 -0
inference_models/models/yolov7/yolov7_instance_segmentation_trt.py +253 -0
inference_models/models/yolov8/__init__.py +0 -0
inference_models/models/yolov8/yolov8_classification_onnx.py +181 -0
inference_models/models/yolov8/yolov8_instance_segmentation_onnx.py +239 -0
inference_models/models/yolov8/yolov8_instance_segmentation_torch_script.py +201 -0
inference_models/models/yolov8/yolov8_instance_segmentation_trt.py +268 -0
inference_models/models/yolov8/yolov8_key_points_detection_onnx.py +263 -0
inference_models/models/yolov8/yolov8_key_points_detection_torch_script.py +218 -0
inference_models/models/yolov8/yolov8_key_points_detection_trt.py +287 -0
inference_models/models/yolov8/yolov8_object_detection_onnx.py +213 -0
inference_models/models/yolov8/yolov8_object_detection_torch_script.py +166 -0
inference_models/models/yolov8/yolov8_object_detection_trt.py +231 -0
inference_models/models/yolov9/__init__.py +0 -0
inference_models/models/yolov9/yolov9_onnx.py +7 -0
inference_models/models/yolov9/yolov9_torch_script.py +7 -0
inference_models/models/yolov9/yolov9_trt.py +7 -0
inference_models/runtime_introspection/__init__.py +0 -0
inference_models/runtime_introspection/core.py +410 -0
inference_models/utils/__init__.py +0 -0
inference_models/utils/download.py +608 -0
inference_models/utils/environment.py +28 -0
inference_models/utils/file_system.py +51 -0
inference_models/utils/hashing.py +7 -0
inference_models/utils/imports.py +48 -0
inference_models/utils/onnx_introspection.py +17 -0
inference_models/weights_providers/__init__.py +0 -0
inference_models/weights_providers/core.py +20 -0
inference_models/weights_providers/entities.py +159 -0
inference_models/weights_providers/roboflow.py +601 -0
inference_models-0.18.3.dist-info/METADATA +466 -0
inference_models-0.18.3.dist-info/RECORD +195 -0
inference_models-0.18.3.dist-info/WHEEL +5 -0
inference_models-0.18.3.dist-info/top_level.txt +1 -0

inference_models/models/yolact/yolact_instance_segmentation_trt.py ADDED Viewed

@@ -0,0 +1,361 @@
+from threading import Lock
+from typing import List, Optional, Tuple, Union
+import numpy as np
+import torch
+import torchvision
+from inference_models import InstanceDetections, InstanceSegmentationModel
+from inference_models.configuration import DEFAULT_DEVICE
+from inference_models.entities import ColorFormat
+from inference_models.errors import (
+    CorruptedModelPackageError,
+    MissingDependencyError,
+    ModelRuntimeError,
+)
+from inference_models.models.common.cuda import (
+    use_cuda_context,
+    use_primary_cuda_context,
+)
+from inference_models.models.common.model_packages import get_model_package_contents
+from inference_models.models.common.roboflow.model_packages import (
+    InferenceConfig,
+    PreProcessingMetadata,
+    ResizeMode,
+    TRTConfig,
+    parse_class_names_file,
+    parse_inference_config,
+    parse_trt_config,
+)
+from inference_models.models.common.roboflow.post_processing import (
+    align_instance_segmentation_results,
+    crop_masks_to_boxes,
+)
+from inference_models.models.common.roboflow.pre_processing import (
+    pre_process_network_input,
+)
+from inference_models.models.common.trt import (
+    get_engine_inputs_and_outputs,
+    infer_from_trt_engine,
+    load_model,
+)
+try:
+    import tensorrt as trt
+except ImportError as import_error:
+    raise MissingDependencyError(
+        message=f"Could not import YOLOv8 model with TRT backend - this error means that some additional dependencies "
+        f"are not installed in the environment. If you run the `inference-models` library directly in your Python "
+        f"program, make sure the following extras of the package are installed: `trt10` - installation can only "
+        f"succeed for Linux and Windows machines with Cuda 12 installed. Jetson devices, should have TRT 10.x "
+        f"installed for all builds with Jetpack 6. "
+        f"If you see this error using Roboflow infrastructure, make sure the service you use does support the model. "
+        f"You can also contact Roboflow to get support.",
+        help_url="https://todo",
+    ) from import_error
+try:
+    import pycuda.driver as cuda
+except ImportError as import_error:
+    raise MissingDependencyError(
+        message="TODO", help_url="https://todo"
+    ) from import_error
+class YOLOACTForInstanceSegmentationTRT(
+    InstanceSegmentationModel[
+        torch.Tensor,
+        PreProcessingMetadata,
+        Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor],
+    ]
+):
+    @classmethod
+    def from_pretrained(
+        cls,
+        model_name_or_path: str,
+        device: torch.device = DEFAULT_DEVICE,
+        engine_host_code_allowed: bool = False,
+        **kwargs,
+    ) -> "YOLOACTForInstanceSegmentationTRT":
+        if device.type != "cuda":
+            raise ModelRuntimeError(
+                message=f"TRT engine only runs on CUDA device - {device} device detected.",
+                help_url="https://todo",
+            )
+        model_package_content = get_model_package_contents(
+            model_package_dir=model_name_or_path,
+            elements=[
+                "class_names.txt",
+                "inference_config.json",
+                "trt_config.json",
+                "engine.plan",
+            ],
+        )
+        class_names = parse_class_names_file(
+            class_names_path=model_package_content["class_names.txt"]
+        )
+        inference_config = parse_inference_config(
+            config_path=model_package_content["inference_config.json"],
+            allowed_resize_modes={
+                ResizeMode.STRETCH_TO,
+                ResizeMode.LETTERBOX,
+                ResizeMode.CENTER_CROP,
+                ResizeMode.LETTERBOX_REFLECT_EDGES,
+            },
+        )
+        trt_config = parse_trt_config(
+            config_path=model_package_content["trt_config.json"]
+        )
+        cuda.init()
+        cuda_device = cuda.Device(device.index or 0)
+        with use_primary_cuda_context(cuda_device=cuda_device) as cuda_context:
+            engine = load_model(
+                model_path=model_package_content["engine.plan"],
+                engine_host_code_allowed=engine_host_code_allowed,
+            )
+            execution_context = engine.create_execution_context()
+        inputs, outputs = get_engine_inputs_and_outputs(engine=engine)
+        if len(inputs) != 1:
+            raise CorruptedModelPackageError(
+                message=f"Implementation assume single model input, found: {len(inputs)}.",
+                help_url="https://todo",
+            )
+        if len(outputs) != 5:
+            raise CorruptedModelPackageError(
+                message=f"Implementation assume 5 model outputs, found: {len(outputs)}.",
+                help_url="https://todo",
+            )
+        return cls(
+            engine=engine,
+            input_name=inputs[0],
+            output_name=outputs[0],
+            class_names=class_names,
+            inference_config=inference_config,
+            trt_config=trt_config,
+            device=device,
+            cuda_context=cuda_context,
+            execution_context=execution_context,
+        )
+    def __init__(
+        self,
+        engine: trt.ICudaEngine,
+        input_name: str,
+        output_name: str,
+        class_names: List[str],
+        inference_config: InferenceConfig,
+        trt_config: TRTConfig,
+        device: torch.device,
+        cuda_context: cuda.Context,
+        execution_context: trt.IExecutionContext,
+    ):
+        self._engine = engine
+        self._input_name = input_name
+        self._output_names = [output_name]
+        self._class_names = class_names
+        self._inference_config = inference_config
+        self._trt_config = trt_config
+        self._device = device
+        self._cuda_context = cuda_context
+        self._execution_context = execution_context
+        self._lock = Lock()
+    @property
+    def class_names(self) -> List[str]:
+        return self._class_names
+    def pre_process(
+        self,
+        images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
+        input_color_format: Optional[ColorFormat] = None,
+        **kwargs,
+    ) -> Tuple[torch.Tensor, List[PreProcessingMetadata]]:
+        return pre_process_network_input(
+            images=images,
+            image_pre_processing=self._inference_config.image_pre_processing,
+            network_input=self._inference_config.network_input,
+            target_device=self._device,
+            input_color_format=input_color_format,
+        )
+    def forward(
+        self, pre_processed_images: torch.Tensor, **kwargs
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        with self._lock:
+            with use_cuda_context(context=self._cuda_context):
+                (
+                    all_loc_data,
+                    all_conf_data,
+                    all_mask_data,
+                    all_prior_data,
+                    all_proto_data,
+                ) = ([], [], [], [], [])
+                for image in pre_processed_images:
+                    loc_data, conf_data, mask_data, prior_data, proto_data = (
+                        infer_from_trt_engine(
+                            pre_processed_images=image.unsqueeze(0).contiguous(),
+                            trt_config=self._trt_config,
+                            engine=self._engine,
+                            context=self._execution_context,
+                            device=self._device,
+                            input_name=self._input_name,
+                            outputs=self._output_names,
+                        )
+                    )
+                    all_loc_data.append(loc_data)
+                    all_conf_data.append(conf_data)
+                    all_mask_data.append(mask_data)
+                    all_prior_data.append(prior_data)
+                    all_proto_data.append(proto_data)
+                return (
+                    torch.cat(all_loc_data, dim=0),
+                    torch.cat(all_conf_data, dim=0),
+                    torch.cat(all_mask_data, dim=0),
+                    torch.stack(all_prior_data, dim=0),
+                    torch.cat(all_proto_data, dim=0),
+                )
+    def post_process(
+        self,
+        model_results: Tuple[
+            torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor
+        ],
+        pre_processing_meta: List[PreProcessingMetadata],
+        conf_thresh: float = 0.25,
+        iou_thresh: float = 0.45,
+        max_detections: int = 100,
+        class_agnostic: bool = False,
+        **kwargs,
+    ) -> List[InstanceDetections]:
+        all_loc_data, all_conf_data, all_mask_data, all_prior_data, all_proto_data = (
+            model_results
+        )
+        batch_size = all_loc_data.shape[0]
+        num_priors = all_loc_data.shape[1]
+        boxes = torch.zeros((batch_size, num_priors, 4), device=self._device)
+        for batch_element_id, (
+            batch_element_loc_data,
+            batch_element_priors,
+            image_prep_meta,
+        ) in enumerate(zip(all_loc_data, all_prior_data, pre_processing_meta)):
+            image_boxes = decode_predicted_bboxes(
+                loc_data=batch_element_loc_data,
+                priors=batch_element_priors,
+            )
+            inference_height, inference_width = (
+                image_prep_meta.inference_size.height,
+                image_prep_meta.inference_size.width,
+            )
+            scale = torch.tensor(
+                [inference_width, inference_height, inference_width, inference_height],
+                device=self._device,
+            )
+            image_boxes = image_boxes.mul_(scale)
+            boxes[batch_element_id, :, :] = image_boxes
+        all_conf_data = all_conf_data[:, :, 1:]  # remove background class
+        instances = torch.cat([boxes, all_conf_data, all_mask_data], dim=2)
+        nms_results = run_nms_for_instance_segmentation(
+            output=instances,
+            conf_thresh=conf_thresh,
+            iou_thresh=iou_thresh,
+            max_detections=max_detections,
+            class_agnostic=class_agnostic,
+        )
+        final_results = []
+        for image_bboxes, image_protos, image_meta in zip(
+            nms_results, all_proto_data, pre_processing_meta
+        ):
+            pre_processed_masks = image_protos @ image_bboxes[:, 6:].T
+            pre_processed_masks = 1 / (1 + torch.exp(-pre_processed_masks))
+            pre_processed_masks = torch.permute(pre_processed_masks, (2, 0, 1))
+            cropped_masks = crop_masks_to_boxes(
+                image_bboxes[:, :4], pre_processed_masks
+            )
+            padding = (
+                image_meta.pad_left,
+                image_meta.pad_top,
+                image_meta.pad_right,
+                image_meta.pad_bottom,
+            )
+            aligned_boxes, aligned_masks = align_instance_segmentation_results(
+                image_bboxes=image_bboxes,
+                masks=cropped_masks,
+                padding=padding,
+                scale_height=image_meta.scale_height,
+                scale_width=image_meta.scale_width,
+                original_size=image_meta.original_size,
+                size_after_pre_processing=image_meta.size_after_pre_processing,
+                inference_size=image_meta.inference_size,
+                static_crop_offset=image_meta.static_crop_offset,
+                binarization_threshold=0.5,
+            )
+            final_results.append(
+                InstanceDetections(
+                    xyxy=aligned_boxes[:, :4].round().int(),
+                    class_id=aligned_boxes[:, 5].int(),
+                    confidence=aligned_boxes[:, 4],
+                    mask=aligned_masks,
+                )
+            )
+        return final_results
+def decode_predicted_bboxes(
+    loc_data: torch.Tensor, priors: torch.Tensor
+) -> torch.Tensor:
+    variances = torch.tensor([0.1, 0.2], device=loc_data.device)
+    boxes = torch.cat(
+        [
+            priors[:, :2] + loc_data[:, :2] * variances[0] * priors[:, 2:],
+            priors[:, 2:] * torch.exp(loc_data[:, 2:] * variances[1]),
+        ],
+        dim=1,
+    )
+    boxes[:, :2] -= boxes[:, 2:] / 2
+    boxes[:, 2:] += boxes[:, :2]
+    return boxes
+def run_nms_for_instance_segmentation(
+    output: torch.Tensor,
+    conf_thresh: float = 0.25,
+    iou_thresh: float = 0.45,
+    max_detections: int = 100,
+    class_agnostic: bool = False,
+) -> List[torch.Tensor]:
+    bs = output.shape[0]
+    boxes = output[:, :, :4]  # (N, 19248, 4)
+    scores = output[:, :, 4:-32]  # (N, 19248, num_classes)
+    masks = output[:, :, -32:]
+    results = []
+    for b in range(bs):
+        bboxes = boxes[b]  # (19248, 4)
+        class_scores = scores[b]  # (19248, 80)
+        box_masks = masks[b]
+        class_conf, class_ids = class_scores.max(1)  # (8400,), (8400,)
+        mask = class_conf > conf_thresh
+        if mask.sum() == 0:
+            results.append(torch.zeros((0, 38), device=output.device))
+            continue
+        bboxes = bboxes[mask]
+        class_conf = class_conf[mask]
+        class_ids = class_ids[mask]
+        box_masks = box_masks[mask]
+        # Class-agnostic NMS -> use dummy class ids
+        nms_class_ids = torch.zeros_like(class_ids) if class_agnostic else class_ids
+        keep = torchvision.ops.batched_nms(
+            bboxes, class_conf, nms_class_ids, iou_thresh
+        )
+        keep = keep[:max_detections]
+        detections = torch.cat(
+            [
+                bboxes[keep],
+                class_conf[keep].unsqueeze(1),
+                class_ids[keep].unsqueeze(1).float(),
+                box_masks[keep],
+            ],
+            dim=1,
+        )  # [x1, y1, x2, y2, conf, cls]
+        results.append(detections)
+    return results

inference_models/models/yolo_world/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # TODO: implement in the future - clarify caching in new inference first

inference_models/models/yolonas/__init__.py ADDED Viewed

File without changes

inference_models/models/yolonas/nms.py ADDED Viewed

@@ -0,0 +1,44 @@
+from typing import List
+import torch
+import torchvision
+def run_yolonas_nms_for_object_detection(
+    output: torch.Tensor,
+    conf_thresh: float = 0.25,
+    iou_thresh: float = 0.45,
+    max_detections: int = 100,
+    class_agnostic: bool = False,
+) -> List[torch.Tensor]:
+    bs = output.shape[0]
+    boxes = output[:, :, :4]
+    scores = output[:, :, 4:]
+    results = []
+    for b in range(bs):
+        # Combine transpose & max for efficiency
+        class_scores = scores[b]  # (8400, cls_num)
+        class_conf, class_ids = torch.max(class_scores, dim=-1)
+        mask = class_conf > conf_thresh
+        if not torch.any(mask):
+            results.append(torch.zeros((0, 6), device=output.device))
+            continue
+        bboxes = boxes[b][mask]
+        class_conf = class_conf[mask]
+        class_ids = class_ids[mask]
+        nms_class_ids = torch.zeros_like(class_ids) if class_agnostic else class_ids
+        keep = torchvision.ops.batched_nms(
+            bboxes, class_conf, nms_class_ids, iou_thresh
+        )
+        if keep.numel() > max_detections:
+            keep = keep[:max_detections]
+        detections = torch.cat(
+            (
+                bboxes[keep],
+                class_conf[keep, None],  # unsqueeze(1) is replaced with None
+                class_ids[keep, None].float(),
+            ),
+            1,
+        )  # [x1, y1, x2, y2, conf, cls]
+        results.append(detections)
+    return results

inference_models/models/yolonas/yolonas_object_detection_onnx.py ADDED Viewed

@@ -0,0 +1,204 @@
+from threading import Lock
+from typing import List, Optional, Tuple, Union
+import numpy as np
+import torch
+from inference_models import Detections, ObjectDetectionModel
+from inference_models.configuration import DEFAULT_DEVICE
+from inference_models.entities import ColorFormat
+from inference_models.errors import (
+    CorruptedModelPackageError,
+    EnvironmentConfigurationError,
+    MissingDependencyError,
+)
+from inference_models.models.common.model_packages import get_model_package_contents
+from inference_models.models.common.onnx import (
+    run_session_with_batch_size_limit,
+    set_execution_provider_defaults,
+)
+from inference_models.models.common.roboflow.model_packages import (
+    InferenceConfig,
+    PreProcessingMetadata,
+    ResizeMode,
+    parse_class_names_file,
+    parse_inference_config,
+)
+from inference_models.models.common.roboflow.post_processing import rescale_detections
+from inference_models.models.common.roboflow.pre_processing import (
+    pre_process_network_input,
+)
+from inference_models.models.yolonas.nms import run_yolonas_nms_for_object_detection
+from inference_models.utils.onnx_introspection import (
+    get_selected_onnx_execution_providers,
+)
+try:
+    import onnxruntime
+except ImportError as import_error:
+    raise MissingDependencyError(
+        message=f"Could not import Yolo NAS model with ONNX backend - this error means that some additional dependencies "
+        f"are not installed in the environment. If you run the `inference-models` library directly in your Python "
+        f"program, make sure the following extras of the package are installed: \n"
+        f"\t* `onnx-cpu` - when you wish to use library with CPU support only\n"
+        f"\t* `onnx-cu12` - for running on GPU with Cuda 12 installed\n"
+        f"\t* `onnx-cu118` - for running on GPU with Cuda 11.8 installed\n"
+        f"\t* `onnx-jp6-cu126` - for running on Jetson with Jetpack 6\n"
+        f"If you see this error using Roboflow infrastructure, make sure the service you use does support the model. "
+        f"You can also contact Roboflow to get support.",
+        help_url="https://todo",
+    ) from import_error
+class YOLONasForObjectDetectionOnnx(
+    ObjectDetectionModel[torch.Tensor, PreProcessingMetadata, torch.Tensor]
+):
+    @classmethod
+    def from_pretrained(
+        cls,
+        model_name_or_path: str,
+        onnx_execution_providers: Optional[List[Union[str, tuple]]] = None,
+        default_onnx_trt_options: bool = True,
+        device: torch.device = DEFAULT_DEVICE,
+        **kwargs,
+    ) -> "YOLONasForObjectDetectionOnnx":
+        if onnx_execution_providers is None:
+            onnx_execution_providers = get_selected_onnx_execution_providers()
+        if not onnx_execution_providers:
+            raise EnvironmentConfigurationError(
+                message=f"Could not initialize model - selected backend is ONNX which requires execution provider to "
+                f"be specified - explicitly in `from_pretrained(...)` method or via env variable "
+                f"`ONNXRUNTIME_EXECUTION_PROVIDERS`. If you run model locally - adjust your setup, otherwise "
+                f"contact the platform support.",
+                help_url="https://todo",
+            )
+        onnx_execution_providers = set_execution_provider_defaults(
+            providers=onnx_execution_providers,
+            model_package_path=model_name_or_path,
+            device=device,
+            default_onnx_trt_options=default_onnx_trt_options,
+        )
+        model_package_content = get_model_package_contents(
+            model_package_dir=model_name_or_path,
+            elements=[
+                "class_names.txt",
+                "inference_config.json",
+                "weights.onnx",
+            ],
+        )
+        class_names = parse_class_names_file(
+            class_names_path=model_package_content["class_names.txt"]
+        )
+        inference_config = parse_inference_config(
+            config_path=model_package_content["inference_config.json"],
+            allowed_resize_modes={
+                ResizeMode.STRETCH_TO,
+                ResizeMode.LETTERBOX,
+                ResizeMode.CENTER_CROP,
+                ResizeMode.LETTERBOX_REFLECT_EDGES,
+            },
+        )
+        if inference_config.post_processing.type != "nms":
+            raise CorruptedModelPackageError(
+                message="Expected NMS to be the post-processing",
+                help_url="https://todo",
+            )
+        if inference_config.post_processing.fused is True:
+            raise CorruptedModelPackageError(
+                message="Model implementation does not support fused NMS",
+                help_url="https://todo",
+            )
+        session = onnxruntime.InferenceSession(
+            path_or_bytes=model_package_content["weights.onnx"],
+            providers=onnx_execution_providers,
+        )
+        input_batch_size = session.get_inputs()[0].shape[0]
+        if isinstance(input_batch_size, str):
+            input_batch_size = None
+        input_name = session.get_inputs()[0].name
+        return cls(
+            session=session,
+            input_name=input_name,
+            class_names=class_names,
+            inference_config=inference_config,
+            device=device,
+            input_batch_size=input_batch_size,
+        )
+    def __init__(
+        self,
+        session: onnxruntime.InferenceSession,
+        input_name: str,
+        inference_config: InferenceConfig,
+        class_names: List[str],
+        device: torch.device,
+        input_batch_size: Optional[int],
+    ):
+        self._session = session
+        self._input_name = input_name
+        self._inference_config = inference_config
+        self._class_names = class_names
+        self._device = device
+        self._input_batch_size = input_batch_size
+        self._session_thread_lock = Lock()
+    @property
+    def class_names(self) -> List[str]:
+        return self._class_names
+    def pre_process(
+        self,
+        images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
+        input_color_format: Optional[ColorFormat] = None,
+        **kwargs,
+    ) -> Tuple[torch.Tensor, List[PreProcessingMetadata]]:
+        return pre_process_network_input(
+            images=images,
+            image_pre_processing=self._inference_config.image_pre_processing,
+            network_input=self._inference_config.network_input,
+            target_device=self._device,
+            input_color_format=input_color_format,
+        )
+    def forward(self, pre_processed_images: torch.Tensor, **kwargs) -> torch.Tensor:
+        with self._session_thread_lock:
+            boxes, class_confs = run_session_with_batch_size_limit(
+                session=self._session,
+                inputs={self._input_name: pre_processed_images},
+                min_batch_size=self._input_batch_size,
+                max_batch_size=self._input_batch_size,
+            )
+            return torch.cat([boxes, class_confs], dim=-1)
+    def post_process(
+        self,
+        model_results: torch.Tensor,
+        pre_processing_meta: List[PreProcessingMetadata],
+        conf_thresh: float = 0.25,
+        iou_thresh: float = 0.45,
+        max_detections: int = 100,
+        class_agnostic: bool = False,
+        **kwargs,
+    ) -> List[Detections]:
+        nms_results = run_yolonas_nms_for_object_detection(
+            output=model_results,
+            conf_thresh=conf_thresh,
+            iou_thresh=iou_thresh,
+            max_detections=max_detections,
+            class_agnostic=class_agnostic,
+        )
+        rescaled_results = rescale_detections(
+            detections=nms_results,
+            images_metadata=pre_processing_meta,
+        )
+        results = []
+        for result in rescaled_results:
+            results.append(
+                Detections(
+                    xyxy=result[:, :4].round().int(),
+                    class_id=result[:, 5].int(),
+                    confidence=result[:, 4],
+                )
+            )
+        return results