PyPI - inference-models - Versions diffs - 0.18.3__py3-none-any.whl - Mend

inference-models 0.18.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (195) hide show

inference_models/__init__.py +36 -0
inference_models/configuration.py +72 -0
inference_models/constants.py +2 -0
inference_models/entities.py +5 -0
inference_models/errors.py +137 -0
inference_models/logger.py +52 -0
inference_models/model_pipelines/__init__.py +0 -0
inference_models/model_pipelines/auto_loaders/__init__.py +0 -0
inference_models/model_pipelines/auto_loaders/core.py +120 -0
inference_models/model_pipelines/auto_loaders/pipelines_registry.py +36 -0
inference_models/model_pipelines/face_and_gaze_detection/__init__.py +0 -0
inference_models/model_pipelines/face_and_gaze_detection/mediapipe_l2cs.py +200 -0
inference_models/models/__init__.py +0 -0
inference_models/models/auto_loaders/__init__.py +0 -0
inference_models/models/auto_loaders/access_manager.py +168 -0
inference_models/models/auto_loaders/auto_negotiation.py +1329 -0
inference_models/models/auto_loaders/auto_resolution_cache.py +129 -0
inference_models/models/auto_loaders/constants.py +7 -0
inference_models/models/auto_loaders/core.py +1341 -0
inference_models/models/auto_loaders/dependency_models.py +52 -0
inference_models/models/auto_loaders/entities.py +57 -0
inference_models/models/auto_loaders/models_registry.py +497 -0
inference_models/models/auto_loaders/presentation_utils.py +333 -0
inference_models/models/auto_loaders/ranking.py +413 -0
inference_models/models/auto_loaders/utils.py +31 -0
inference_models/models/base/__init__.py +0 -0
inference_models/models/base/classification.py +123 -0
inference_models/models/base/depth_estimation.py +62 -0
inference_models/models/base/documents_parsing.py +111 -0
inference_models/models/base/embeddings.py +66 -0
inference_models/models/base/instance_segmentation.py +87 -0
inference_models/models/base/keypoints_detection.py +93 -0
inference_models/models/base/object_detection.py +143 -0
inference_models/models/base/semantic_segmentation.py +74 -0
inference_models/models/base/types.py +5 -0
inference_models/models/clip/__init__.py +0 -0
inference_models/models/clip/clip_onnx.py +148 -0
inference_models/models/clip/clip_pytorch.py +104 -0
inference_models/models/clip/preprocessing.py +162 -0
inference_models/models/common/__init__.py +0 -0
inference_models/models/common/cuda.py +30 -0
inference_models/models/common/model_packages.py +25 -0
inference_models/models/common/onnx.py +379 -0
inference_models/models/common/roboflow/__init__.py +0 -0
inference_models/models/common/roboflow/model_packages.py +361 -0
inference_models/models/common/roboflow/post_processing.py +436 -0
inference_models/models/common/roboflow/pre_processing.py +1332 -0
inference_models/models/common/torch.py +20 -0
inference_models/models/common/trt.py +266 -0
inference_models/models/deep_lab_v3_plus/__init__.py +0 -0
inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_onnx.py +282 -0
inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_torch.py +264 -0
inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_trt.py +313 -0
inference_models/models/depth_anything_v2/__init__.py +0 -0
inference_models/models/depth_anything_v2/depth_anything_v2_hf.py +77 -0
inference_models/models/dinov3/__init__.py +0 -0
inference_models/models/dinov3/dinov3_classification_onnx.py +348 -0
inference_models/models/dinov3/dinov3_classification_torch.py +323 -0
inference_models/models/doctr/__init__.py +0 -0
inference_models/models/doctr/doctr_torch.py +304 -0
inference_models/models/easy_ocr/__init__.py +0 -0
inference_models/models/easy_ocr/easy_ocr_torch.py +222 -0
inference_models/models/florence2/__init__.py +0 -0
inference_models/models/florence2/florence2_hf.py +897 -0
inference_models/models/grounding_dino/__init__.py +0 -0
inference_models/models/grounding_dino/grounding_dino_torch.py +227 -0
inference_models/models/l2cs/__init__.py +0 -0
inference_models/models/l2cs/l2cs_onnx.py +216 -0
inference_models/models/mediapipe_face_detection/__init__.py +0 -0
inference_models/models/mediapipe_face_detection/face_detection.py +203 -0
inference_models/models/moondream2/__init__.py +0 -0
inference_models/models/moondream2/moondream2_hf.py +281 -0
inference_models/models/owlv2/__init__.py +0 -0
inference_models/models/owlv2/cache.py +182 -0
inference_models/models/owlv2/entities.py +112 -0
inference_models/models/owlv2/owlv2_hf.py +695 -0
inference_models/models/owlv2/reference_dataset.py +291 -0
inference_models/models/paligemma/__init__.py +0 -0
inference_models/models/paligemma/paligemma_hf.py +209 -0
inference_models/models/perception_encoder/__init__.py +0 -0
inference_models/models/perception_encoder/perception_encoder_pytorch.py +197 -0
inference_models/models/perception_encoder/vision_encoder/__init__.py +0 -0
inference_models/models/perception_encoder/vision_encoder/config.py +160 -0
inference_models/models/perception_encoder/vision_encoder/pe.py +742 -0
inference_models/models/perception_encoder/vision_encoder/rope.py +344 -0
inference_models/models/perception_encoder/vision_encoder/tokenizer.py +342 -0
inference_models/models/perception_encoder/vision_encoder/transforms.py +33 -0
inference_models/models/qwen25vl/__init__.py +1 -0
inference_models/models/qwen25vl/qwen25vl_hf.py +285 -0
inference_models/models/resnet/__init__.py +0 -0
inference_models/models/resnet/resnet_classification_onnx.py +330 -0
inference_models/models/resnet/resnet_classification_torch.py +305 -0
inference_models/models/resnet/resnet_classification_trt.py +369 -0
inference_models/models/rfdetr/__init__.py +0 -0
inference_models/models/rfdetr/backbone_builder.py +101 -0
inference_models/models/rfdetr/class_remapping.py +41 -0
inference_models/models/rfdetr/common.py +115 -0
inference_models/models/rfdetr/default_labels.py +108 -0
inference_models/models/rfdetr/dinov2_with_windowed_attn.py +1330 -0
inference_models/models/rfdetr/misc.py +26 -0
inference_models/models/rfdetr/ms_deform_attn.py +180 -0
inference_models/models/rfdetr/ms_deform_attn_func.py +60 -0
inference_models/models/rfdetr/position_encoding.py +166 -0
inference_models/models/rfdetr/post_processor.py +83 -0
inference_models/models/rfdetr/projector.py +373 -0
inference_models/models/rfdetr/rfdetr_backbone_pytorch.py +394 -0
inference_models/models/rfdetr/rfdetr_base_pytorch.py +807 -0
inference_models/models/rfdetr/rfdetr_instance_segmentation_onnx.py +206 -0
inference_models/models/rfdetr/rfdetr_instance_segmentation_pytorch.py +373 -0
inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py +227 -0
inference_models/models/rfdetr/rfdetr_object_detection_onnx.py +244 -0
inference_models/models/rfdetr/rfdetr_object_detection_pytorch.py +470 -0
inference_models/models/rfdetr/rfdetr_object_detection_trt.py +270 -0
inference_models/models/rfdetr/segmentation_head.py +273 -0
inference_models/models/rfdetr/transformer.py +767 -0
inference_models/models/roboflow_instant/__init__.py +0 -0
inference_models/models/roboflow_instant/roboflow_instant_hf.py +141 -0
inference_models/models/sam/__init__.py +0 -0
inference_models/models/sam/cache.py +147 -0
inference_models/models/sam/entities.py +25 -0
inference_models/models/sam/sam_torch.py +675 -0
inference_models/models/sam2/__init__.py +0 -0
inference_models/models/sam2/cache.py +162 -0
inference_models/models/sam2/entities.py +43 -0
inference_models/models/sam2/sam2_torch.py +905 -0
inference_models/models/sam2_rt/__init__.py +0 -0
inference_models/models/sam2_rt/sam2_pytorch.py +119 -0
inference_models/models/smolvlm/__init__.py +0 -0
inference_models/models/smolvlm/smolvlm_hf.py +245 -0
inference_models/models/trocr/__init__.py +0 -0
inference_models/models/trocr/trocr_hf.py +53 -0
inference_models/models/vit/__init__.py +0 -0
inference_models/models/vit/vit_classification_huggingface.py +319 -0
inference_models/models/vit/vit_classification_onnx.py +326 -0
inference_models/models/vit/vit_classification_trt.py +365 -0
inference_models/models/yolact/__init__.py +1 -0
inference_models/models/yolact/yolact_instance_segmentation_onnx.py +336 -0
inference_models/models/yolact/yolact_instance_segmentation_trt.py +361 -0
inference_models/models/yolo_world/__init__.py +1 -0
inference_models/models/yolonas/__init__.py +0 -0
inference_models/models/yolonas/nms.py +44 -0
inference_models/models/yolonas/yolonas_object_detection_onnx.py +204 -0
inference_models/models/yolonas/yolonas_object_detection_trt.py +230 -0
inference_models/models/yolov10/__init__.py +0 -0
inference_models/models/yolov10/yolov10_object_detection_onnx.py +187 -0
inference_models/models/yolov10/yolov10_object_detection_trt.py +215 -0
inference_models/models/yolov11/__init__.py +0 -0
inference_models/models/yolov11/yolov11_onnx.py +28 -0
inference_models/models/yolov11/yolov11_torch_script.py +25 -0
inference_models/models/yolov11/yolov11_trt.py +21 -0
inference_models/models/yolov12/__init__.py +0 -0
inference_models/models/yolov12/yolov12_onnx.py +7 -0
inference_models/models/yolov12/yolov12_torch_script.py +7 -0
inference_models/models/yolov12/yolov12_trt.py +7 -0
inference_models/models/yolov5/__init__.py +0 -0
inference_models/models/yolov5/nms.py +99 -0
inference_models/models/yolov5/yolov5_instance_segmentation_onnx.py +225 -0
inference_models/models/yolov5/yolov5_instance_segmentation_trt.py +255 -0
inference_models/models/yolov5/yolov5_object_detection_onnx.py +192 -0
inference_models/models/yolov5/yolov5_object_detection_trt.py +218 -0
inference_models/models/yolov7/__init__.py +0 -0
inference_models/models/yolov7/yolov7_instance_segmentation_onnx.py +226 -0
inference_models/models/yolov7/yolov7_instance_segmentation_trt.py +253 -0
inference_models/models/yolov8/__init__.py +0 -0
inference_models/models/yolov8/yolov8_classification_onnx.py +181 -0
inference_models/models/yolov8/yolov8_instance_segmentation_onnx.py +239 -0
inference_models/models/yolov8/yolov8_instance_segmentation_torch_script.py +201 -0
inference_models/models/yolov8/yolov8_instance_segmentation_trt.py +268 -0
inference_models/models/yolov8/yolov8_key_points_detection_onnx.py +263 -0
inference_models/models/yolov8/yolov8_key_points_detection_torch_script.py +218 -0
inference_models/models/yolov8/yolov8_key_points_detection_trt.py +287 -0
inference_models/models/yolov8/yolov8_object_detection_onnx.py +213 -0
inference_models/models/yolov8/yolov8_object_detection_torch_script.py +166 -0
inference_models/models/yolov8/yolov8_object_detection_trt.py +231 -0
inference_models/models/yolov9/__init__.py +0 -0
inference_models/models/yolov9/yolov9_onnx.py +7 -0
inference_models/models/yolov9/yolov9_torch_script.py +7 -0
inference_models/models/yolov9/yolov9_trt.py +7 -0
inference_models/runtime_introspection/__init__.py +0 -0
inference_models/runtime_introspection/core.py +410 -0
inference_models/utils/__init__.py +0 -0
inference_models/utils/download.py +608 -0
inference_models/utils/environment.py +28 -0
inference_models/utils/file_system.py +51 -0
inference_models/utils/hashing.py +7 -0
inference_models/utils/imports.py +48 -0
inference_models/utils/onnx_introspection.py +17 -0
inference_models/weights_providers/__init__.py +0 -0
inference_models/weights_providers/core.py +20 -0
inference_models/weights_providers/entities.py +159 -0
inference_models/weights_providers/roboflow.py +601 -0
inference_models-0.18.3.dist-info/METADATA +466 -0
inference_models-0.18.3.dist-info/RECORD +195 -0
inference_models-0.18.3.dist-info/WHEEL +5 -0
inference_models-0.18.3.dist-info/top_level.txt +1 -0

inference_models/models/common/roboflow/post_processing.py ADDED Viewed

@@ -0,0 +1,436 @@
+from typing import List, Literal, Tuple
+import torch
+import torchvision
+from torchvision.transforms import functional
+from inference_models.entities import ImageDimensions
+from inference_models.models.common.roboflow.model_packages import (
+    PreProcessingMetadata,
+    StaticCropOffset,
+)
+def run_nms_for_object_detection(
+    output: torch.Tensor,
+    conf_thresh: float = 0.25,
+    iou_thresh: float = 0.45,
+    max_detections: int = 100,
+    class_agnostic: bool = False,
+    box_format: Literal["xywh", "xyxy"] = "xywh",
+) -> List[torch.Tensor]:
+    bs = output.shape[0]
+    boxes = output[:, :4, :]
+    scores = output[:, 4:, :]
+    results = []
+    for b in range(bs):
+        # Combine transpose & max for efficiency
+        class_scores = scores[b]  # (80, 8400)
+        class_conf, class_ids = class_scores.max(0)  # (8400,), (8400,)
+        mask = class_conf > conf_thresh
+        if not torch.any(mask):
+            results.append(torch.zeros((0, 6), device=output.device))
+            continue
+        bboxes = boxes[b][:, mask].T  # (num, 4) -- selects and then transposes
+        class_conf = class_conf[mask]
+        class_ids = class_ids[mask]
+        if box_format == "xywh":
+            # Vectorized [x, y, w, h] -> [x1, y1, x2, y2]
+            xy = bboxes[:, :2]
+            wh = bboxes[:, 2:]
+            half_wh = wh / 2
+            xyxy = torch.cat((xy - half_wh, xy + half_wh), 1)
+        else:
+            xyxy = bboxes
+        # Class-agnostic NMS -> use dummy class ids
+        nms_class_ids = torch.zeros_like(class_ids) if class_agnostic else class_ids
+        # NMS and limiting max detections
+        keep = torchvision.ops.batched_nms(xyxy, class_conf, nms_class_ids, iou_thresh)
+        if keep.numel() > max_detections:
+            keep = keep[:max_detections]
+        detections = torch.cat(
+            (
+                xyxy[keep],
+                class_conf[keep, None],  # unsqueeze(1) is replaced with None
+                class_ids[keep, None].float(),
+            ),
+            1,
+        )  # [x1, y1, x2, y2, conf, cls]
+        results.append(detections)
+    return results
+def post_process_nms_fused_model_output(
+    output: torch.Tensor,
+    conf_thresh: float = 0.25,
+) -> List[torch.Tensor]:
+    bs = output.shape[0]
+    nms_results = []
+    for batch_element_id in range(bs):
+        batch_element_result = output[batch_element_id]
+        batch_element_result = batch_element_result[
+            batch_element_result[:, 4] >= conf_thresh
+        ]
+        nms_results.append(batch_element_result)
+    return nms_results
+def run_nms_for_instance_segmentation(
+    output: torch.Tensor,
+    conf_thresh: float = 0.25,
+    iou_thresh: float = 0.45,
+    max_detections: int = 100,
+    class_agnostic: bool = False,
+    box_format: Literal["xywh", "xyxy"] = "xywh",
+) -> List[torch.Tensor]:
+    bs = output.shape[0]
+    boxes = output[:, :4, :]  # (N, 4, 8400)
+    scores = output[:, 4:-32, :]  # (N, 80, 8400)
+    masks = output[:, -32:, :]
+    results = []
+    for b in range(bs):
+        bboxes = boxes[b].T  # (8400, 4)
+        class_scores = scores[b].T  # (8400, 80)
+        box_masks = masks[b].T
+        class_conf, class_ids = class_scores.max(1)  # (8400,), (8400,)
+        mask = class_conf > conf_thresh
+        if mask.sum() == 0:
+            results.append(torch.zeros((0, 38), device=output.device))
+            continue
+        bboxes = bboxes[mask]
+        class_conf = class_conf[mask]
+        class_ids = class_ids[mask]
+        box_masks = box_masks[mask]
+        if box_format == "xywh":
+            # Vectorized [x, y, w, h] -> [x1, y1, x2, y2]
+            xy = bboxes[:, :2]
+            wh = bboxes[:, 2:]
+            half_wh = wh / 2
+            xyxy = torch.cat((xy - half_wh, xy + half_wh), 1)
+        else:
+            xyxy = bboxes
+        # Class-agnostic NMS -> use dummy class ids
+        nms_class_ids = torch.zeros_like(class_ids) if class_agnostic else class_ids
+        keep = torchvision.ops.batched_nms(xyxy, class_conf, nms_class_ids, iou_thresh)
+        keep = keep[:max_detections]
+        detections = torch.cat(
+            [
+                xyxy[keep],
+                class_conf[keep].unsqueeze(1),
+                class_ids[keep].unsqueeze(1).float(),
+                box_masks[keep],
+            ],
+            dim=1,
+        )  # [x1, y1, x2, y2, conf, cls]
+        results.append(detections)
+    return results
+def run_nms_for_key_points_detection(
+    output: torch.Tensor,
+    num_classes: int,
+    key_points_slots_in_prediction: int,
+    conf_thresh: float = 0.25,
+    iou_thresh: float = 0.45,
+    max_detections: int = 100,
+    class_agnostic: bool = False,
+) -> List[torch.Tensor]:
+    bs = output.shape[0]
+    boxes = output[:, :4, :]
+    scores = output[:, 4 : 4 + num_classes, :]
+    key_points = output[:, 4 + num_classes :, :]
+    results = []
+    for b in range(bs):
+        class_scores = scores[b]
+        class_conf, class_ids = class_scores.max(0)
+        mask = class_conf > conf_thresh
+        if not torch.any(mask):
+            results.append(
+                torch.zeros(
+                    (0, 6 + key_points_slots_in_prediction * 3), device=output.device
+                )
+            )
+            continue
+        bboxes = boxes[b][:, mask].T
+        image_key_points = key_points[b, :, mask].T
+        class_conf = class_conf[mask]
+        class_ids = class_ids[mask]
+        xy = bboxes[:, :2]
+        wh = bboxes[:, 2:]
+        half_wh = wh / 2
+        xyxy = torch.cat((xy - half_wh, xy + half_wh), 1)
+        # Class-agnostic NMS -> use dummy class ids
+        nms_class_ids = torch.zeros_like(class_ids) if class_agnostic else class_ids
+        # NMS and limiting max detections
+        keep = torchvision.ops.batched_nms(xyxy, class_conf, nms_class_ids, iou_thresh)
+        if keep.numel() > max_detections:
+            keep = keep[:max_detections]
+        detections = torch.cat(
+            (
+                xyxy[keep],
+                class_conf[keep, None],  # unsqueeze(1) is replaced with None
+                class_ids[keep, None].float(),
+                image_key_points[keep],
+            ),
+            1,
+        )  # [x1, y1, x2, y2, conf, cls, keypoints....]
+        results.append(detections)
+    return results
+def rescale_detections(
+    detections: List[torch.Tensor], images_metadata: List[PreProcessingMetadata]
+) -> List[torch.Tensor]:
+    for image_detections, metadata in zip(detections, images_metadata):
+        _ = rescale_image_detections(
+            image_detections=image_detections, image_metadata=metadata
+        )
+    return detections
+def rescale_image_detections(
+    image_detections: torch.Tensor,
+    image_metadata: PreProcessingMetadata,
+) -> torch.Tensor:
+    # in-place processing
+    offsets = torch.as_tensor(
+        [
+            image_metadata.pad_left,
+            image_metadata.pad_top,
+            image_metadata.pad_left,
+            image_metadata.pad_top,
+        ],
+        dtype=image_detections.dtype,
+        device=image_detections.device,
+    )
+    image_detections[:, :4].sub_(offsets)  # in-place subtraction for speed/memory
+    scale = torch.as_tensor(
+        [
+            image_metadata.scale_width,
+            image_metadata.scale_height,
+            image_metadata.scale_width,
+            image_metadata.scale_height,
+        ],
+        dtype=image_detections.dtype,
+        device=image_detections.device,
+    )
+    image_detections[:, :4].div_(scale)
+    if (
+        image_metadata.static_crop_offset.offset_x != 0
+        or image_metadata.static_crop_offset.offset_y != 0
+    ):
+        static_crop_offsets = torch.as_tensor(
+            [
+                image_metadata.static_crop_offset.offset_x,
+                image_metadata.static_crop_offset.offset_y,
+                image_metadata.static_crop_offset.offset_x,
+                image_metadata.static_crop_offset.offset_y,
+            ],
+            dtype=image_detections.dtype,
+            device=image_detections.device,
+        )
+        image_detections[:, :4].add_(static_crop_offsets)
+    return image_detections
+def rescale_key_points_detections(
+    detections: List[torch.Tensor],
+    images_metadata: List[PreProcessingMetadata],
+    num_classes: int,
+    key_points_slots_in_prediction: int,
+) -> List[torch.Tensor]:
+    for image_detections, metadata in zip(detections, images_metadata):
+        offsets = torch.as_tensor(
+            [metadata.pad_left, metadata.pad_top, metadata.pad_left, metadata.pad_top],
+            dtype=image_detections.dtype,
+            device=image_detections.device,
+        )
+        image_detections[:, :4].sub_(offsets)  # in-place subtraction for speed/memory
+        scale = torch.as_tensor(
+            [
+                metadata.scale_width,
+                metadata.scale_height,
+                metadata.scale_width,
+                metadata.scale_height,
+            ],
+            dtype=image_detections.dtype,
+            device=image_detections.device,
+        )
+        image_detections[:, :4].div_(scale)
+        key_points_offsets = torch.as_tensor(
+            [metadata.pad_left, metadata.pad_top, 0],
+            dtype=image_detections.dtype,
+            device=image_detections.device,
+        ).repeat(key_points_slots_in_prediction)
+        image_detections[:, 6:].sub_(key_points_offsets)
+        key_points_scale = torch.as_tensor(
+            [metadata.scale_width, metadata.scale_height, 1.0],
+            dtype=image_detections.dtype,
+            device=image_detections.device,
+        ).repeat(key_points_slots_in_prediction)
+        image_detections[:, 6:].div_(key_points_scale)
+        if (
+            metadata.static_crop_offset.offset_x != 0
+            or metadata.static_crop_offset.offset_y != 0
+        ):
+            static_crop_offset_length = (image_detections.shape[1] - 6) // 3
+            static_crop_offsets = torch.as_tensor(
+                [
+                    metadata.static_crop_offset.offset_x,
+                    metadata.static_crop_offset.offset_y,
+                    0,
+                ]
+                * static_crop_offset_length,
+                dtype=image_detections.dtype,
+                device=image_detections.device,
+            )
+            image_detections[:, 6:].add_(static_crop_offsets)
+            static_crop_offsets = torch.as_tensor(
+                [
+                    metadata.static_crop_offset.offset_x,
+                    metadata.static_crop_offset.offset_y,
+                    metadata.static_crop_offset.offset_x,
+                    metadata.static_crop_offset.offset_y,
+                ],
+                dtype=image_detections.dtype,
+                device=image_detections.device,
+            )
+            image_detections[:, :4].add_(static_crop_offsets)
+    return detections
+def preprocess_segmentation_masks(
+    protos: torch.Tensor,
+    masks_in: torch.Tensor,
+) -> torch.Tensor:
+    return torch.einsum("chw,nc->nhw", protos, masks_in)
+def crop_masks_to_boxes(
+    boxes: torch.Tensor,
+    masks: torch.Tensor,
+    scaling: float = 0.25,
+) -> torch.Tensor:
+    n, h, w = masks.shape
+    scaled_boxes = boxes * scaling
+    x1, y1, x2, y2 = (
+        scaled_boxes[:, 0][:, None, None],
+        scaled_boxes[:, 1][:, None, None],
+        scaled_boxes[:, 2][:, None, None],
+        scaled_boxes[:, 3][:, None, None],
+    )
+    rows = torch.arange(w, device=masks.device)[None, None, :]  # shape: [1, 1, w]
+    cols = torch.arange(h, device=masks.device)[None, :, None]  # shape: [1, h, 1]
+    crop_mask = (rows >= x1) & (rows < x2) & (cols >= y1) & (cols < y2)
+    return masks * crop_mask
+def align_instance_segmentation_results(
+    image_bboxes: torch.Tensor,
+    masks: torch.Tensor,
+    padding: Tuple[int, int, int, int],
+    scale_width: float,
+    scale_height: float,
+    original_size: ImageDimensions,
+    size_after_pre_processing: ImageDimensions,
+    inference_size: ImageDimensions,
+    static_crop_offset: StaticCropOffset,
+    binarization_threshold: float = 0.0,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    if image_bboxes.shape[0] == 0:
+        empty_masks = torch.empty(
+            size=(0, size_after_pre_processing.height, size_after_pre_processing.width),
+            dtype=torch.bool,
+            device=image_bboxes.device,
+        )
+        return image_bboxes, empty_masks
+    pad_left, pad_top, pad_right, pad_bottom = padding
+    offsets = torch.tensor(
+        [pad_left, pad_top, pad_left, pad_top],
+        device=image_bboxes.device,
+    )
+    image_bboxes[:, :4].sub_(offsets)
+    scale = torch.as_tensor(
+        [scale_width, scale_height, scale_width, scale_height],
+        dtype=image_bboxes.dtype,
+        device=image_bboxes.device,
+    )
+    image_bboxes[:, :4].div_(scale)
+    n, mh, mw = masks.shape
+    mask_h_scale = mh / inference_size.height
+    mask_w_scale = mw / inference_size.width
+    mask_pad_top, mask_pad_bottom, mask_pad_left, mask_pad_right = (
+        round(mask_h_scale * pad_top),
+        round(mask_h_scale * pad_bottom),
+        round(mask_w_scale * pad_left),
+        round(mask_w_scale * pad_right),
+    )
+    if (
+        mask_pad_top < 0
+        or mask_pad_bottom < 0
+        or mask_pad_left < 0
+        or mask_pad_right < 0
+    ):
+        masks = torch.nn.functional.pad(
+            masks,
+            (
+                abs(min(mask_pad_left, 0)),
+                abs(min(mask_pad_right, 0)),
+                abs(min(mask_pad_top, 0)),
+                abs(min(mask_pad_bottom, 0)),
+            ),
+            "constant",
+            0,
+        )
+        padded_mask_offset_top = max(mask_pad_top, 0)
+        padded_mask_offset_bottom = max(mask_pad_bottom, 0)
+        padded_mask_offset_left = max(mask_pad_left, 0)
+        padded_mask_offset_right = max(mask_pad_right, 0)
+        masks = masks[
+            :,
+            padded_mask_offset_top : masks.shape[1] - padded_mask_offset_bottom,
+            padded_mask_offset_left : masks.shape[2] - padded_mask_offset_right,
+        ]
+    else:
+        masks = masks[
+            :, mask_pad_top : mh - mask_pad_bottom, mask_pad_left : mw - mask_pad_right
+        ]
+    masks = (
+        functional.resize(
+            masks,
+            [size_after_pre_processing.height, size_after_pre_processing.width],
+            interpolation=functional.InterpolationMode.BILINEAR,
+        )
+        .gt_(binarization_threshold)
+        .to(dtype=torch.bool)
+    )
+    if static_crop_offset.offset_x > 0 or static_crop_offset.offset_y > 0:
+        mask_canvas = torch.zeros(
+            (
+                masks.shape[0],
+                original_size.height,
+                original_size.width,
+            ),
+            dtype=torch.bool,
+            device=masks.device,
+        )
+        mask_canvas[
+            :,
+            static_crop_offset.offset_y : static_crop_offset.offset_y + masks.shape[1],
+            static_crop_offset.offset_x : static_crop_offset.offset_x + masks.shape[2],
+        ] = masks
+        static_crop_offsets = torch.as_tensor(
+            [
+                static_crop_offset.offset_x,
+                static_crop_offset.offset_y,
+                static_crop_offset.offset_x,
+                static_crop_offset.offset_y,
+            ],
+            dtype=image_bboxes.dtype,
+            device=image_bboxes.device,
+        )
+        image_bboxes[:, :4].add_(static_crop_offsets)
+        masks = mask_canvas
+    return image_bboxes, masks