inference-models 0.18.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inference_models/__init__.py +36 -0
- inference_models/configuration.py +72 -0
- inference_models/constants.py +2 -0
- inference_models/entities.py +5 -0
- inference_models/errors.py +137 -0
- inference_models/logger.py +52 -0
- inference_models/model_pipelines/__init__.py +0 -0
- inference_models/model_pipelines/auto_loaders/__init__.py +0 -0
- inference_models/model_pipelines/auto_loaders/core.py +120 -0
- inference_models/model_pipelines/auto_loaders/pipelines_registry.py +36 -0
- inference_models/model_pipelines/face_and_gaze_detection/__init__.py +0 -0
- inference_models/model_pipelines/face_and_gaze_detection/mediapipe_l2cs.py +200 -0
- inference_models/models/__init__.py +0 -0
- inference_models/models/auto_loaders/__init__.py +0 -0
- inference_models/models/auto_loaders/access_manager.py +168 -0
- inference_models/models/auto_loaders/auto_negotiation.py +1329 -0
- inference_models/models/auto_loaders/auto_resolution_cache.py +129 -0
- inference_models/models/auto_loaders/constants.py +7 -0
- inference_models/models/auto_loaders/core.py +1341 -0
- inference_models/models/auto_loaders/dependency_models.py +52 -0
- inference_models/models/auto_loaders/entities.py +57 -0
- inference_models/models/auto_loaders/models_registry.py +497 -0
- inference_models/models/auto_loaders/presentation_utils.py +333 -0
- inference_models/models/auto_loaders/ranking.py +413 -0
- inference_models/models/auto_loaders/utils.py +31 -0
- inference_models/models/base/__init__.py +0 -0
- inference_models/models/base/classification.py +123 -0
- inference_models/models/base/depth_estimation.py +62 -0
- inference_models/models/base/documents_parsing.py +111 -0
- inference_models/models/base/embeddings.py +66 -0
- inference_models/models/base/instance_segmentation.py +87 -0
- inference_models/models/base/keypoints_detection.py +93 -0
- inference_models/models/base/object_detection.py +143 -0
- inference_models/models/base/semantic_segmentation.py +74 -0
- inference_models/models/base/types.py +5 -0
- inference_models/models/clip/__init__.py +0 -0
- inference_models/models/clip/clip_onnx.py +148 -0
- inference_models/models/clip/clip_pytorch.py +104 -0
- inference_models/models/clip/preprocessing.py +162 -0
- inference_models/models/common/__init__.py +0 -0
- inference_models/models/common/cuda.py +30 -0
- inference_models/models/common/model_packages.py +25 -0
- inference_models/models/common/onnx.py +379 -0
- inference_models/models/common/roboflow/__init__.py +0 -0
- inference_models/models/common/roboflow/model_packages.py +361 -0
- inference_models/models/common/roboflow/post_processing.py +436 -0
- inference_models/models/common/roboflow/pre_processing.py +1332 -0
- inference_models/models/common/torch.py +20 -0
- inference_models/models/common/trt.py +266 -0
- inference_models/models/deep_lab_v3_plus/__init__.py +0 -0
- inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_onnx.py +282 -0
- inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_torch.py +264 -0
- inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_trt.py +313 -0
- inference_models/models/depth_anything_v2/__init__.py +0 -0
- inference_models/models/depth_anything_v2/depth_anything_v2_hf.py +77 -0
- inference_models/models/dinov3/__init__.py +0 -0
- inference_models/models/dinov3/dinov3_classification_onnx.py +348 -0
- inference_models/models/dinov3/dinov3_classification_torch.py +323 -0
- inference_models/models/doctr/__init__.py +0 -0
- inference_models/models/doctr/doctr_torch.py +304 -0
- inference_models/models/easy_ocr/__init__.py +0 -0
- inference_models/models/easy_ocr/easy_ocr_torch.py +222 -0
- inference_models/models/florence2/__init__.py +0 -0
- inference_models/models/florence2/florence2_hf.py +897 -0
- inference_models/models/grounding_dino/__init__.py +0 -0
- inference_models/models/grounding_dino/grounding_dino_torch.py +227 -0
- inference_models/models/l2cs/__init__.py +0 -0
- inference_models/models/l2cs/l2cs_onnx.py +216 -0
- inference_models/models/mediapipe_face_detection/__init__.py +0 -0
- inference_models/models/mediapipe_face_detection/face_detection.py +203 -0
- inference_models/models/moondream2/__init__.py +0 -0
- inference_models/models/moondream2/moondream2_hf.py +281 -0
- inference_models/models/owlv2/__init__.py +0 -0
- inference_models/models/owlv2/cache.py +182 -0
- inference_models/models/owlv2/entities.py +112 -0
- inference_models/models/owlv2/owlv2_hf.py +695 -0
- inference_models/models/owlv2/reference_dataset.py +291 -0
- inference_models/models/paligemma/__init__.py +0 -0
- inference_models/models/paligemma/paligemma_hf.py +209 -0
- inference_models/models/perception_encoder/__init__.py +0 -0
- inference_models/models/perception_encoder/perception_encoder_pytorch.py +197 -0
- inference_models/models/perception_encoder/vision_encoder/__init__.py +0 -0
- inference_models/models/perception_encoder/vision_encoder/config.py +160 -0
- inference_models/models/perception_encoder/vision_encoder/pe.py +742 -0
- inference_models/models/perception_encoder/vision_encoder/rope.py +344 -0
- inference_models/models/perception_encoder/vision_encoder/tokenizer.py +342 -0
- inference_models/models/perception_encoder/vision_encoder/transforms.py +33 -0
- inference_models/models/qwen25vl/__init__.py +1 -0
- inference_models/models/qwen25vl/qwen25vl_hf.py +285 -0
- inference_models/models/resnet/__init__.py +0 -0
- inference_models/models/resnet/resnet_classification_onnx.py +330 -0
- inference_models/models/resnet/resnet_classification_torch.py +305 -0
- inference_models/models/resnet/resnet_classification_trt.py +369 -0
- inference_models/models/rfdetr/__init__.py +0 -0
- inference_models/models/rfdetr/backbone_builder.py +101 -0
- inference_models/models/rfdetr/class_remapping.py +41 -0
- inference_models/models/rfdetr/common.py +115 -0
- inference_models/models/rfdetr/default_labels.py +108 -0
- inference_models/models/rfdetr/dinov2_with_windowed_attn.py +1330 -0
- inference_models/models/rfdetr/misc.py +26 -0
- inference_models/models/rfdetr/ms_deform_attn.py +180 -0
- inference_models/models/rfdetr/ms_deform_attn_func.py +60 -0
- inference_models/models/rfdetr/position_encoding.py +166 -0
- inference_models/models/rfdetr/post_processor.py +83 -0
- inference_models/models/rfdetr/projector.py +373 -0
- inference_models/models/rfdetr/rfdetr_backbone_pytorch.py +394 -0
- inference_models/models/rfdetr/rfdetr_base_pytorch.py +807 -0
- inference_models/models/rfdetr/rfdetr_instance_segmentation_onnx.py +206 -0
- inference_models/models/rfdetr/rfdetr_instance_segmentation_pytorch.py +373 -0
- inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py +227 -0
- inference_models/models/rfdetr/rfdetr_object_detection_onnx.py +244 -0
- inference_models/models/rfdetr/rfdetr_object_detection_pytorch.py +470 -0
- inference_models/models/rfdetr/rfdetr_object_detection_trt.py +270 -0
- inference_models/models/rfdetr/segmentation_head.py +273 -0
- inference_models/models/rfdetr/transformer.py +767 -0
- inference_models/models/roboflow_instant/__init__.py +0 -0
- inference_models/models/roboflow_instant/roboflow_instant_hf.py +141 -0
- inference_models/models/sam/__init__.py +0 -0
- inference_models/models/sam/cache.py +147 -0
- inference_models/models/sam/entities.py +25 -0
- inference_models/models/sam/sam_torch.py +675 -0
- inference_models/models/sam2/__init__.py +0 -0
- inference_models/models/sam2/cache.py +162 -0
- inference_models/models/sam2/entities.py +43 -0
- inference_models/models/sam2/sam2_torch.py +905 -0
- inference_models/models/sam2_rt/__init__.py +0 -0
- inference_models/models/sam2_rt/sam2_pytorch.py +119 -0
- inference_models/models/smolvlm/__init__.py +0 -0
- inference_models/models/smolvlm/smolvlm_hf.py +245 -0
- inference_models/models/trocr/__init__.py +0 -0
- inference_models/models/trocr/trocr_hf.py +53 -0
- inference_models/models/vit/__init__.py +0 -0
- inference_models/models/vit/vit_classification_huggingface.py +319 -0
- inference_models/models/vit/vit_classification_onnx.py +326 -0
- inference_models/models/vit/vit_classification_trt.py +365 -0
- inference_models/models/yolact/__init__.py +1 -0
- inference_models/models/yolact/yolact_instance_segmentation_onnx.py +336 -0
- inference_models/models/yolact/yolact_instance_segmentation_trt.py +361 -0
- inference_models/models/yolo_world/__init__.py +1 -0
- inference_models/models/yolonas/__init__.py +0 -0
- inference_models/models/yolonas/nms.py +44 -0
- inference_models/models/yolonas/yolonas_object_detection_onnx.py +204 -0
- inference_models/models/yolonas/yolonas_object_detection_trt.py +230 -0
- inference_models/models/yolov10/__init__.py +0 -0
- inference_models/models/yolov10/yolov10_object_detection_onnx.py +187 -0
- inference_models/models/yolov10/yolov10_object_detection_trt.py +215 -0
- inference_models/models/yolov11/__init__.py +0 -0
- inference_models/models/yolov11/yolov11_onnx.py +28 -0
- inference_models/models/yolov11/yolov11_torch_script.py +25 -0
- inference_models/models/yolov11/yolov11_trt.py +21 -0
- inference_models/models/yolov12/__init__.py +0 -0
- inference_models/models/yolov12/yolov12_onnx.py +7 -0
- inference_models/models/yolov12/yolov12_torch_script.py +7 -0
- inference_models/models/yolov12/yolov12_trt.py +7 -0
- inference_models/models/yolov5/__init__.py +0 -0
- inference_models/models/yolov5/nms.py +99 -0
- inference_models/models/yolov5/yolov5_instance_segmentation_onnx.py +225 -0
- inference_models/models/yolov5/yolov5_instance_segmentation_trt.py +255 -0
- inference_models/models/yolov5/yolov5_object_detection_onnx.py +192 -0
- inference_models/models/yolov5/yolov5_object_detection_trt.py +218 -0
- inference_models/models/yolov7/__init__.py +0 -0
- inference_models/models/yolov7/yolov7_instance_segmentation_onnx.py +226 -0
- inference_models/models/yolov7/yolov7_instance_segmentation_trt.py +253 -0
- inference_models/models/yolov8/__init__.py +0 -0
- inference_models/models/yolov8/yolov8_classification_onnx.py +181 -0
- inference_models/models/yolov8/yolov8_instance_segmentation_onnx.py +239 -0
- inference_models/models/yolov8/yolov8_instance_segmentation_torch_script.py +201 -0
- inference_models/models/yolov8/yolov8_instance_segmentation_trt.py +268 -0
- inference_models/models/yolov8/yolov8_key_points_detection_onnx.py +263 -0
- inference_models/models/yolov8/yolov8_key_points_detection_torch_script.py +218 -0
- inference_models/models/yolov8/yolov8_key_points_detection_trt.py +287 -0
- inference_models/models/yolov8/yolov8_object_detection_onnx.py +213 -0
- inference_models/models/yolov8/yolov8_object_detection_torch_script.py +166 -0
- inference_models/models/yolov8/yolov8_object_detection_trt.py +231 -0
- inference_models/models/yolov9/__init__.py +0 -0
- inference_models/models/yolov9/yolov9_onnx.py +7 -0
- inference_models/models/yolov9/yolov9_torch_script.py +7 -0
- inference_models/models/yolov9/yolov9_trt.py +7 -0
- inference_models/runtime_introspection/__init__.py +0 -0
- inference_models/runtime_introspection/core.py +410 -0
- inference_models/utils/__init__.py +0 -0
- inference_models/utils/download.py +608 -0
- inference_models/utils/environment.py +28 -0
- inference_models/utils/file_system.py +51 -0
- inference_models/utils/hashing.py +7 -0
- inference_models/utils/imports.py +48 -0
- inference_models/utils/onnx_introspection.py +17 -0
- inference_models/weights_providers/__init__.py +0 -0
- inference_models/weights_providers/core.py +20 -0
- inference_models/weights_providers/entities.py +159 -0
- inference_models/weights_providers/roboflow.py +601 -0
- inference_models-0.18.3.dist-info/METADATA +466 -0
- inference_models-0.18.3.dist-info/RECORD +195 -0
- inference_models-0.18.3.dist-info/WHEEL +5 -0
- inference_models-0.18.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from typing import Generator, Tuple
|
|
2
|
+
|
|
3
|
+
import torch
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_batch_chunks(
|
|
7
|
+
input_batch: torch.Tensor,
|
|
8
|
+
chunk_size: int,
|
|
9
|
+
) -> Generator[Tuple[torch.Tensor, int], None, None]:
|
|
10
|
+
n = input_batch.shape[0]
|
|
11
|
+
for i in range(0, n, chunk_size):
|
|
12
|
+
chunk = input_batch[i : i + chunk_size]
|
|
13
|
+
padding_size = chunk_size - chunk.shape[0]
|
|
14
|
+
if padding_size > 0:
|
|
15
|
+
padding_shape = (padding_size,) + chunk.shape[1:]
|
|
16
|
+
padding = torch.zeros(
|
|
17
|
+
padding_shape, device=input_batch.device, dtype=input_batch.dtype
|
|
18
|
+
)
|
|
19
|
+
chunk = torch.cat([chunk, padding], dim=0)
|
|
20
|
+
yield chunk, padding_size
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
from typing import List, Tuple
|
|
2
|
+
|
|
3
|
+
import torch
|
|
4
|
+
|
|
5
|
+
from inference_models.errors import (
|
|
6
|
+
CorruptedModelPackageError,
|
|
7
|
+
MissingDependencyError,
|
|
8
|
+
ModelRuntimeError,
|
|
9
|
+
)
|
|
10
|
+
from inference_models.logger import LOGGER
|
|
11
|
+
from inference_models.models.common.roboflow.model_packages import TRTConfig
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
import tensorrt as trt
|
|
15
|
+
except ImportError as import_error:
|
|
16
|
+
raise MissingDependencyError(
|
|
17
|
+
message=f"Could not TRT tools required to run models with TRT backend - this error means that some additional "
|
|
18
|
+
f"dependencies are not installed in the environment. If you run the `inference-models` library directly in your "
|
|
19
|
+
f"Python program, make sure the following extras of the package are installed: `trt10` - installation can only "
|
|
20
|
+
f"succeed for Linux and Windows machines with Cuda 12 installed. Jetson devices, should have TRT 10.x "
|
|
21
|
+
f"installed for all builds with Jetpack 6. "
|
|
22
|
+
f"If you see this error using Roboflow infrastructure, make sure the service you use does support the model. "
|
|
23
|
+
f"You can also contact Roboflow to get support.",
|
|
24
|
+
help_url="https://todo",
|
|
25
|
+
) from import_error
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
import pycuda.driver as cuda
|
|
29
|
+
except ImportError as import_error:
|
|
30
|
+
raise MissingDependencyError(
|
|
31
|
+
message="TODO",
|
|
32
|
+
help_url="https://todo",
|
|
33
|
+
) from import_error
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class InferenceTRTLogger(trt.ILogger):
|
|
37
|
+
|
|
38
|
+
def __init__(self, with_memory: bool = False):
|
|
39
|
+
super().__init__()
|
|
40
|
+
self._memory: List[Tuple[trt.ILogger.Severity, str]] = []
|
|
41
|
+
self._with_memory = with_memory
|
|
42
|
+
|
|
43
|
+
def log(self, severity: trt.ILogger.Severity, msg: str) -> None:
|
|
44
|
+
if self._with_memory:
|
|
45
|
+
self._memory.append((severity, msg))
|
|
46
|
+
severity_str = str(severity)
|
|
47
|
+
if severity_str == str(trt.Logger.VERBOSE):
|
|
48
|
+
log_function = LOGGER.debug
|
|
49
|
+
elif severity_str is str(trt.Logger.INFO):
|
|
50
|
+
log_function = LOGGER.info
|
|
51
|
+
elif severity_str is str(trt.Logger.WARNING):
|
|
52
|
+
log_function = LOGGER.warning
|
|
53
|
+
else:
|
|
54
|
+
log_function = LOGGER.error
|
|
55
|
+
log_function(msg)
|
|
56
|
+
|
|
57
|
+
def get_memory(self) -> List[Tuple[trt.ILogger.Severity, str]]:
|
|
58
|
+
return self._memory
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
TRT_LOGGER = InferenceTRTLogger()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def get_engine_inputs_and_outputs(
|
|
65
|
+
engine: trt.ICudaEngine,
|
|
66
|
+
) -> Tuple[List[str], List[str]]:
|
|
67
|
+
num_inputs = engine.num_io_tensors
|
|
68
|
+
inputs = []
|
|
69
|
+
outputs = []
|
|
70
|
+
for i in range(num_inputs):
|
|
71
|
+
name = engine.get_tensor_name(i)
|
|
72
|
+
io_mode = engine.get_tensor_mode(name)
|
|
73
|
+
if io_mode == trt.TensorIOMode.INPUT:
|
|
74
|
+
inputs.append(name)
|
|
75
|
+
elif io_mode == trt.TensorIOMode.OUTPUT:
|
|
76
|
+
outputs.append(name)
|
|
77
|
+
return inputs, outputs
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def infer_from_trt_engine(
|
|
81
|
+
pre_processed_images: torch.Tensor,
|
|
82
|
+
trt_config: TRTConfig,
|
|
83
|
+
engine: trt.ICudaEngine,
|
|
84
|
+
context: trt.IExecutionContext,
|
|
85
|
+
device: torch.device,
|
|
86
|
+
input_name: str,
|
|
87
|
+
outputs: List[str],
|
|
88
|
+
) -> List[torch.Tensor]:
|
|
89
|
+
if trt_config.static_batch_size is not None:
|
|
90
|
+
return infer_from_trt_engine_with_batch_size_boundaries(
|
|
91
|
+
pre_processed_images=pre_processed_images,
|
|
92
|
+
engine=engine,
|
|
93
|
+
context=context,
|
|
94
|
+
device=device,
|
|
95
|
+
input_name=input_name,
|
|
96
|
+
outputs=outputs,
|
|
97
|
+
min_batch_size=trt_config.static_batch_size,
|
|
98
|
+
max_batch_size=trt_config.static_batch_size,
|
|
99
|
+
)
|
|
100
|
+
return infer_from_trt_engine_with_batch_size_boundaries(
|
|
101
|
+
pre_processed_images=pre_processed_images,
|
|
102
|
+
engine=engine,
|
|
103
|
+
context=context,
|
|
104
|
+
device=device,
|
|
105
|
+
input_name=input_name,
|
|
106
|
+
outputs=outputs,
|
|
107
|
+
min_batch_size=trt_config.dynamic_batch_size_min,
|
|
108
|
+
max_batch_size=trt_config.dynamic_batch_size_max,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def infer_from_trt_engine_with_batch_size_boundaries(
|
|
113
|
+
pre_processed_images: torch.Tensor,
|
|
114
|
+
engine: trt.ICudaEngine,
|
|
115
|
+
context: trt.IExecutionContext,
|
|
116
|
+
device: torch.device,
|
|
117
|
+
input_name: str,
|
|
118
|
+
outputs: List[str],
|
|
119
|
+
min_batch_size: int,
|
|
120
|
+
max_batch_size: int,
|
|
121
|
+
) -> List[torch.Tensor]:
|
|
122
|
+
if pre_processed_images.shape[0] <= max_batch_size:
|
|
123
|
+
reminder = min_batch_size - pre_processed_images.shape[0]
|
|
124
|
+
if reminder > 0:
|
|
125
|
+
pre_processed_images = torch.cat(
|
|
126
|
+
(
|
|
127
|
+
pre_processed_images,
|
|
128
|
+
torch.zeros(
|
|
129
|
+
(reminder,) + pre_processed_images.shape[1:],
|
|
130
|
+
dtype=pre_processed_images.dtype,
|
|
131
|
+
device=pre_processed_images.device,
|
|
132
|
+
),
|
|
133
|
+
),
|
|
134
|
+
dim=0,
|
|
135
|
+
)
|
|
136
|
+
results = execute_trt_engine(
|
|
137
|
+
pre_processed_images=pre_processed_images,
|
|
138
|
+
engine=engine,
|
|
139
|
+
context=context,
|
|
140
|
+
device=device,
|
|
141
|
+
input_name=input_name,
|
|
142
|
+
outputs=outputs,
|
|
143
|
+
)
|
|
144
|
+
if reminder > 0:
|
|
145
|
+
results = [r[:-reminder] for r in results]
|
|
146
|
+
return results
|
|
147
|
+
all_results = []
|
|
148
|
+
for _ in outputs:
|
|
149
|
+
all_results.append([])
|
|
150
|
+
for i in range(0, pre_processed_images.shape[0], max_batch_size):
|
|
151
|
+
batch = pre_processed_images[i : i + max_batch_size].contiguous()
|
|
152
|
+
reminder = min_batch_size - batch.shape[0]
|
|
153
|
+
if reminder > 0:
|
|
154
|
+
batch = torch.cat(
|
|
155
|
+
(
|
|
156
|
+
pre_processed_images,
|
|
157
|
+
torch.zeros(
|
|
158
|
+
(reminder,) + batch.shape[1:],
|
|
159
|
+
dtype=pre_processed_images.dtype,
|
|
160
|
+
device=pre_processed_images.device,
|
|
161
|
+
),
|
|
162
|
+
),
|
|
163
|
+
dim=0,
|
|
164
|
+
)
|
|
165
|
+
results = execute_trt_engine(
|
|
166
|
+
pre_processed_images=batch,
|
|
167
|
+
engine=engine,
|
|
168
|
+
context=context,
|
|
169
|
+
device=device,
|
|
170
|
+
input_name=input_name,
|
|
171
|
+
outputs=outputs,
|
|
172
|
+
)
|
|
173
|
+
if reminder > 0:
|
|
174
|
+
results = [r[:-reminder] for r in results]
|
|
175
|
+
for partial_result, all_result_element in zip(results, all_results):
|
|
176
|
+
all_result_element.append(partial_result)
|
|
177
|
+
return [torch.cat(e, dim=0).contiguous() for e in all_results]
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def execute_trt_engine(
|
|
181
|
+
pre_processed_images: torch.Tensor,
|
|
182
|
+
engine: trt.ICudaEngine,
|
|
183
|
+
context: trt.IExecutionContext,
|
|
184
|
+
device: torch.device,
|
|
185
|
+
input_name: str,
|
|
186
|
+
outputs: List[str],
|
|
187
|
+
) -> List[torch.Tensor]:
|
|
188
|
+
batch_size = pre_processed_images.shape[0]
|
|
189
|
+
results = []
|
|
190
|
+
for output in outputs:
|
|
191
|
+
output_tensor_shape = engine.get_tensor_shape(output)
|
|
192
|
+
output_tensor_type = trt_dtype_to_torch(engine.get_tensor_dtype(output))
|
|
193
|
+
result = torch.empty(
|
|
194
|
+
(batch_size,) + output_tensor_shape[1:],
|
|
195
|
+
dtype=output_tensor_type,
|
|
196
|
+
device=device,
|
|
197
|
+
)
|
|
198
|
+
context.set_tensor_address(output, result.data_ptr())
|
|
199
|
+
results.append(result)
|
|
200
|
+
status = context.set_input_shape(input_name, tuple(pre_processed_images.shape))
|
|
201
|
+
if not status:
|
|
202
|
+
raise ModelRuntimeError(
|
|
203
|
+
message="Failed to set TRT model input shape during forward pass from the model.",
|
|
204
|
+
help_url="https://todo",
|
|
205
|
+
)
|
|
206
|
+
status = context.set_tensor_address(input_name, pre_processed_images.data_ptr())
|
|
207
|
+
if not status:
|
|
208
|
+
raise ModelRuntimeError(
|
|
209
|
+
message="Failed to set input tensor data pointer during forward pass from the model.",
|
|
210
|
+
help_url="https://todo",
|
|
211
|
+
)
|
|
212
|
+
stream = torch.cuda.Stream(device=device)
|
|
213
|
+
status = context.execute_async_v3(stream_handle=stream.cuda_stream)
|
|
214
|
+
if not status:
|
|
215
|
+
raise ModelRuntimeError(
|
|
216
|
+
message="Failed to complete inference from TRT model",
|
|
217
|
+
help_url="https://todo",
|
|
218
|
+
)
|
|
219
|
+
stream.synchronize()
|
|
220
|
+
return results
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def trt_dtype_to_torch(trt_dtype):
|
|
224
|
+
return {
|
|
225
|
+
trt.DataType.FLOAT: torch.float32,
|
|
226
|
+
trt.DataType.HALF: torch.float16,
|
|
227
|
+
trt.DataType.INT32: torch.int32,
|
|
228
|
+
trt.DataType.INT8: torch.int8,
|
|
229
|
+
trt.DataType.BOOL: torch.bool,
|
|
230
|
+
}[trt_dtype]
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def load_model(
|
|
234
|
+
model_path: str,
|
|
235
|
+
engine_host_code_allowed: bool = False,
|
|
236
|
+
) -> trt.ICudaEngine:
|
|
237
|
+
try:
|
|
238
|
+
local_logger = InferenceTRTLogger(with_memory=True)
|
|
239
|
+
with open(model_path, "rb") as f, trt.Runtime(local_logger) as runtime:
|
|
240
|
+
runtime.engine_host_code_allowed = engine_host_code_allowed
|
|
241
|
+
engine = runtime.deserialize_cuda_engine(f.read())
|
|
242
|
+
if engine is None:
|
|
243
|
+
logger_traces = local_logger.get_memory()
|
|
244
|
+
logger_traces_str = "\n".join(
|
|
245
|
+
f"[{severity}] {msg}" for severity, msg in logger_traces
|
|
246
|
+
)
|
|
247
|
+
raise CorruptedModelPackageError(
|
|
248
|
+
message="Could not load TRT engine due to runtime error. This error is usually caused "
|
|
249
|
+
"by model package incompatibility with runtime environment. If you selected model with "
|
|
250
|
+
"specific model package to be run - verify that your environment is compatible with your "
|
|
251
|
+
"package. If the package was selected automatically by the library - this error indicate bug. "
|
|
252
|
+
"You can help us solving this problem describing the issue: "
|
|
253
|
+
"https://github.com/roboflow/inference/issues\nBelow you can find debug information provided "
|
|
254
|
+
f"by TRT runtime, which may be helpful:\n{logger_traces_str}",
|
|
255
|
+
help_url="https://todo",
|
|
256
|
+
)
|
|
257
|
+
return engine
|
|
258
|
+
except OSError as error:
|
|
259
|
+
raise CorruptedModelPackageError(
|
|
260
|
+
message="Could not load TRT engine - file not found. This error may be caused by "
|
|
261
|
+
"corrupted model package or invalid model path that was provided. If you "
|
|
262
|
+
"initialized the model manually, running the code locally - make sure that provided "
|
|
263
|
+
"path is correct. Otherwise, contact Roboflow to solve the problem: "
|
|
264
|
+
"https://github.com/roboflow/inference/issues",
|
|
265
|
+
help_url="https://todo",
|
|
266
|
+
) from error
|
|
File without changes
|
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
from threading import Lock
|
|
2
|
+
from typing import List, Optional, Tuple, Union
|
|
3
|
+
|
|
4
|
+
import torch
|
|
5
|
+
from torchvision.transforms import functional
|
|
6
|
+
|
|
7
|
+
from inference_models import ColorFormat, SemanticSegmentationModel
|
|
8
|
+
from inference_models.configuration import DEFAULT_DEVICE
|
|
9
|
+
from inference_models.errors import (
|
|
10
|
+
EnvironmentConfigurationError,
|
|
11
|
+
MissingDependencyError,
|
|
12
|
+
)
|
|
13
|
+
from inference_models.models.base.semantic_segmentation import (
|
|
14
|
+
SemanticSegmentationResult,
|
|
15
|
+
)
|
|
16
|
+
from inference_models.models.base.types import (
|
|
17
|
+
PreprocessedInputs,
|
|
18
|
+
PreprocessingMetadata,
|
|
19
|
+
RawPrediction,
|
|
20
|
+
)
|
|
21
|
+
from inference_models.models.common.model_packages import get_model_package_contents
|
|
22
|
+
from inference_models.models.common.onnx import run_session_with_batch_size_limit
|
|
23
|
+
from inference_models.models.common.roboflow.model_packages import (
|
|
24
|
+
InferenceConfig,
|
|
25
|
+
PreProcessingMetadata,
|
|
26
|
+
ResizeMode,
|
|
27
|
+
parse_class_names_file,
|
|
28
|
+
parse_inference_config,
|
|
29
|
+
)
|
|
30
|
+
from inference_models.models.common.roboflow.pre_processing import (
|
|
31
|
+
pre_process_network_input,
|
|
32
|
+
)
|
|
33
|
+
from inference_models.utils.onnx_introspection import (
|
|
34
|
+
get_selected_onnx_execution_providers,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
import onnxruntime
|
|
39
|
+
except ImportError as import_error:
|
|
40
|
+
raise MissingDependencyError(
|
|
41
|
+
message=f"Could not import DeepLabV3Plus model with ONNX backend - this error means that some additional dependencies "
|
|
42
|
+
f"are not installed in the environment. If you run the `inference-models` library directly in your Python "
|
|
43
|
+
f"program, make sure the following extras of the package are installed: \n"
|
|
44
|
+
f"\t* `onnx-cpu` - when you wish to use library with CPU support only\n"
|
|
45
|
+
f"\t* `onnx-cu12` - for running on GPU with Cuda 12 installed\n"
|
|
46
|
+
f"\t* `onnx-cu118` - for running on GPU with Cuda 11.8 installed\n"
|
|
47
|
+
f"\t* `onnx-jp6-cu126` - for running on Jetson with Jetpack 6\n"
|
|
48
|
+
f"If you see this error using Roboflow infrastructure, make sure the service you use does support the model. "
|
|
49
|
+
f"You can also contact Roboflow to get support.",
|
|
50
|
+
help_url="https://todo",
|
|
51
|
+
) from import_error
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class DeepLabV3PlusForSemanticSegmentationOnnx(
|
|
55
|
+
SemanticSegmentationModel[torch.Tensor, PreProcessingMetadata, torch.Tensor]
|
|
56
|
+
):
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def from_pretrained(
|
|
60
|
+
cls,
|
|
61
|
+
model_name_or_path: str,
|
|
62
|
+
onnx_execution_providers: Optional[List[Union[str, tuple]]] = None,
|
|
63
|
+
default_onnx_trt_options: bool = True,
|
|
64
|
+
device: torch.device = DEFAULT_DEVICE,
|
|
65
|
+
**kwargs,
|
|
66
|
+
) -> "DeepLabV3PlusForSemanticSegmentationOnnx":
|
|
67
|
+
if onnx_execution_providers is None:
|
|
68
|
+
onnx_execution_providers = get_selected_onnx_execution_providers()
|
|
69
|
+
if not onnx_execution_providers:
|
|
70
|
+
raise EnvironmentConfigurationError(
|
|
71
|
+
message=f"Could not initialize model - selected backend is ONNX which requires execution provider to "
|
|
72
|
+
f"be specified - explicitly in `from_pretrained(...)` method or via env variable "
|
|
73
|
+
f"`ONNXRUNTIME_EXECUTION_PROVIDERS`. If you run model locally - adjust your setup, otherwise "
|
|
74
|
+
f"contact the platform support.",
|
|
75
|
+
help_url="https://todo",
|
|
76
|
+
)
|
|
77
|
+
model_package_content = get_model_package_contents(
|
|
78
|
+
model_package_dir=model_name_or_path,
|
|
79
|
+
elements=[
|
|
80
|
+
"class_names.txt",
|
|
81
|
+
"inference_config.json",
|
|
82
|
+
"weights.onnx",
|
|
83
|
+
],
|
|
84
|
+
)
|
|
85
|
+
class_names = parse_class_names_file(
|
|
86
|
+
class_names_path=model_package_content["class_names.txt"]
|
|
87
|
+
)
|
|
88
|
+
try:
|
|
89
|
+
background_class_id = [c.lower() for c in class_names].index("background")
|
|
90
|
+
except ValueError:
|
|
91
|
+
background_class_id = -1
|
|
92
|
+
inference_config = parse_inference_config(
|
|
93
|
+
config_path=model_package_content["inference_config.json"],
|
|
94
|
+
allowed_resize_modes={
|
|
95
|
+
ResizeMode.STRETCH_TO,
|
|
96
|
+
ResizeMode.LETTERBOX,
|
|
97
|
+
ResizeMode.CENTER_CROP,
|
|
98
|
+
ResizeMode.LETTERBOX_REFLECT_EDGES,
|
|
99
|
+
},
|
|
100
|
+
)
|
|
101
|
+
session = onnxruntime.InferenceSession(
|
|
102
|
+
path_or_bytes=model_package_content["weights.onnx"],
|
|
103
|
+
providers=onnx_execution_providers,
|
|
104
|
+
)
|
|
105
|
+
input_batch_size = session.get_inputs()[0].shape[0]
|
|
106
|
+
if isinstance(input_batch_size, str):
|
|
107
|
+
input_batch_size = None
|
|
108
|
+
input_name = session.get_inputs()[0].name
|
|
109
|
+
return cls(
|
|
110
|
+
session=session,
|
|
111
|
+
input_name=input_name,
|
|
112
|
+
class_names=class_names,
|
|
113
|
+
inference_config=inference_config,
|
|
114
|
+
background_class_id=background_class_id,
|
|
115
|
+
device=device,
|
|
116
|
+
input_batch_size=input_batch_size,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
def __init__(
|
|
120
|
+
self,
|
|
121
|
+
session: onnxruntime.InferenceSession,
|
|
122
|
+
input_name: str,
|
|
123
|
+
inference_config: InferenceConfig,
|
|
124
|
+
class_names: List[str],
|
|
125
|
+
background_class_id: int,
|
|
126
|
+
device: torch.device,
|
|
127
|
+
input_batch_size: Optional[int],
|
|
128
|
+
):
|
|
129
|
+
self._session = session
|
|
130
|
+
self._input_name = input_name
|
|
131
|
+
self._inference_config = inference_config
|
|
132
|
+
self._class_names = class_names
|
|
133
|
+
self._background_class_id = background_class_id
|
|
134
|
+
self._device = device
|
|
135
|
+
self._input_batch_size = input_batch_size
|
|
136
|
+
self._session_thread_lock = Lock()
|
|
137
|
+
|
|
138
|
+
@property
|
|
139
|
+
def class_names(self) -> List[str]:
|
|
140
|
+
return self._class_names
|
|
141
|
+
|
|
142
|
+
def pre_process(
|
|
143
|
+
self,
|
|
144
|
+
images: Union[torch.Tensor, List[torch.Tensor]],
|
|
145
|
+
input_color_format: Optional[ColorFormat] = None,
|
|
146
|
+
**kwargs,
|
|
147
|
+
) -> Tuple[PreprocessedInputs, PreprocessingMetadata]:
|
|
148
|
+
return pre_process_network_input(
|
|
149
|
+
images=images,
|
|
150
|
+
image_pre_processing=self._inference_config.image_pre_processing,
|
|
151
|
+
network_input=self._inference_config.network_input,
|
|
152
|
+
target_device=self._device,
|
|
153
|
+
input_color_format=input_color_format,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
def forward(
|
|
157
|
+
self, pre_processed_images: PreprocessedInputs, **kwargs
|
|
158
|
+
) -> RawPrediction:
|
|
159
|
+
with self._session_thread_lock:
|
|
160
|
+
return run_session_with_batch_size_limit(
|
|
161
|
+
session=self._session,
|
|
162
|
+
inputs={self._input_name: pre_processed_images},
|
|
163
|
+
min_batch_size=self._input_batch_size,
|
|
164
|
+
max_batch_size=self._input_batch_size,
|
|
165
|
+
)[0]
|
|
166
|
+
|
|
167
|
+
def post_process(
|
|
168
|
+
self,
|
|
169
|
+
model_results: RawPrediction,
|
|
170
|
+
pre_processing_meta: PreprocessedInputs,
|
|
171
|
+
confidence_threshold: float = 0.5,
|
|
172
|
+
**kwargs,
|
|
173
|
+
) -> List[SemanticSegmentationResult]:
|
|
174
|
+
results = []
|
|
175
|
+
for image_results, image_metadata in zip(model_results, pre_processing_meta):
|
|
176
|
+
inference_size = image_metadata.inference_size
|
|
177
|
+
mask_h_scale = model_results.shape[2] / inference_size.height
|
|
178
|
+
mask_w_scale = model_results.shape[3] / inference_size.width
|
|
179
|
+
mask_pad_top, mask_pad_bottom, mask_pad_left, mask_pad_right = (
|
|
180
|
+
round(mask_h_scale * image_metadata.pad_top),
|
|
181
|
+
round(mask_h_scale * image_metadata.pad_bottom),
|
|
182
|
+
round(mask_w_scale * image_metadata.pad_left),
|
|
183
|
+
round(mask_w_scale * image_metadata.pad_right),
|
|
184
|
+
)
|
|
185
|
+
_, mh, mw = image_results.shape
|
|
186
|
+
if (
|
|
187
|
+
mask_pad_top < 0
|
|
188
|
+
or mask_pad_bottom < 0
|
|
189
|
+
or mask_pad_left < 0
|
|
190
|
+
or mask_pad_right < 0
|
|
191
|
+
):
|
|
192
|
+
image_results = torch.nn.functional.pad(
|
|
193
|
+
image_results,
|
|
194
|
+
(
|
|
195
|
+
abs(min(mask_pad_left, 0)),
|
|
196
|
+
abs(min(mask_pad_right, 0)),
|
|
197
|
+
abs(min(mask_pad_top, 0)),
|
|
198
|
+
abs(min(mask_pad_bottom, 0)),
|
|
199
|
+
),
|
|
200
|
+
"constant",
|
|
201
|
+
self._background_class_id,
|
|
202
|
+
)
|
|
203
|
+
padded_mask_offset_top = max(mask_pad_top, 0)
|
|
204
|
+
padded_mask_offset_bottom = max(mask_pad_bottom, 0)
|
|
205
|
+
padded_mask_offset_left = max(mask_pad_left, 0)
|
|
206
|
+
padded_mask_offset_right = max(mask_pad_right, 0)
|
|
207
|
+
image_results = image_results[
|
|
208
|
+
:,
|
|
209
|
+
padded_mask_offset_top : image_results.shape[1]
|
|
210
|
+
- padded_mask_offset_bottom,
|
|
211
|
+
padded_mask_offset_left : image_results.shape[1]
|
|
212
|
+
- padded_mask_offset_right,
|
|
213
|
+
]
|
|
214
|
+
else:
|
|
215
|
+
image_results = image_results[
|
|
216
|
+
:,
|
|
217
|
+
mask_pad_top : mh - mask_pad_bottom,
|
|
218
|
+
mask_pad_left : mw - mask_pad_right,
|
|
219
|
+
]
|
|
220
|
+
if (
|
|
221
|
+
image_results.shape[1]
|
|
222
|
+
!= image_metadata.size_after_pre_processing.height
|
|
223
|
+
or image_results.shape[2]
|
|
224
|
+
!= image_metadata.size_after_pre_processing.width
|
|
225
|
+
):
|
|
226
|
+
image_results = functional.resize(
|
|
227
|
+
image_results,
|
|
228
|
+
[
|
|
229
|
+
image_metadata.size_after_pre_processing.height,
|
|
230
|
+
image_metadata.size_after_pre_processing.width,
|
|
231
|
+
],
|
|
232
|
+
interpolation=functional.InterpolationMode.BILINEAR,
|
|
233
|
+
)
|
|
234
|
+
image_results = torch.nn.functional.softmax(image_results, dim=0)
|
|
235
|
+
image_confidence, image_class_ids = torch.max(image_results, dim=0)
|
|
236
|
+
below_threshold = image_confidence < confidence_threshold
|
|
237
|
+
image_confidence[below_threshold] = 0.0
|
|
238
|
+
image_class_ids[below_threshold] = self._background_class_id
|
|
239
|
+
if (
|
|
240
|
+
image_metadata.static_crop_offset.offset_x > 0
|
|
241
|
+
or image_metadata.static_crop_offset.offset_y > 0
|
|
242
|
+
):
|
|
243
|
+
original_size_confidence_canvas = torch.zeros(
|
|
244
|
+
(
|
|
245
|
+
image_metadata.original_size.height,
|
|
246
|
+
image_metadata.original_size.width,
|
|
247
|
+
),
|
|
248
|
+
device=self._device,
|
|
249
|
+
dtype=image_confidence.dtype,
|
|
250
|
+
)
|
|
251
|
+
original_size_confidence_canvas[
|
|
252
|
+
image_metadata.static_crop_offset.offset_y : image_metadata.static_crop_offset.offset_y
|
|
253
|
+
+ image_confidence.shape[0],
|
|
254
|
+
image_metadata.static_crop_offset.offset_x : image_metadata.static_crop_offset.offset_x
|
|
255
|
+
+ image_confidence.shape[1],
|
|
256
|
+
] = image_confidence
|
|
257
|
+
original_size_confidence_class_id_canvas = (
|
|
258
|
+
torch.ones(
|
|
259
|
+
(
|
|
260
|
+
image_metadata.original_size.height,
|
|
261
|
+
image_metadata.original_size.width,
|
|
262
|
+
),
|
|
263
|
+
device=self._device,
|
|
264
|
+
dtype=image_class_ids.dtype,
|
|
265
|
+
)
|
|
266
|
+
* self._background_class_id
|
|
267
|
+
)
|
|
268
|
+
original_size_confidence_class_id_canvas[
|
|
269
|
+
image_metadata.static_crop_offset.offset_y : image_metadata.static_crop_offset.offset_y
|
|
270
|
+
+ image_class_ids.shape[0],
|
|
271
|
+
image_metadata.static_crop_offset.offset_x : image_metadata.static_crop_offset.offset_x
|
|
272
|
+
+ image_class_ids.shape[1],
|
|
273
|
+
] = image_class_ids
|
|
274
|
+
image_class_ids = original_size_confidence_class_id_canvas
|
|
275
|
+
image_confidence = original_size_confidence_canvas
|
|
276
|
+
results.append(
|
|
277
|
+
SemanticSegmentationResult(
|
|
278
|
+
segmentation_map=image_class_ids,
|
|
279
|
+
confidence=image_confidence,
|
|
280
|
+
)
|
|
281
|
+
)
|
|
282
|
+
return results
|