dgenerate-ultralytics-headless 8.3.214__py3-none-any.whl → 8.3.248__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/METADATA +13 -14
- dgenerate_ultralytics_headless-8.3.248.dist-info/RECORD +298 -0
- tests/__init__.py +5 -7
- tests/conftest.py +8 -15
- tests/test_cli.py +1 -1
- tests/test_cuda.py +5 -8
- tests/test_engine.py +1 -1
- tests/test_exports.py +57 -12
- tests/test_integrations.py +4 -4
- tests/test_python.py +84 -53
- tests/test_solutions.py +160 -151
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +56 -62
- ultralytics/cfg/datasets/Argoverse.yaml +7 -6
- ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
- ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
- ultralytics/cfg/datasets/ImageNet.yaml +1 -1
- ultralytics/cfg/datasets/VOC.yaml +15 -16
- ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
- ultralytics/cfg/datasets/coco-pose.yaml +21 -0
- ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
- ultralytics/cfg/datasets/coco8-pose.yaml +21 -0
- ultralytics/cfg/datasets/dog-pose.yaml +28 -0
- ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
- ultralytics/cfg/datasets/dota8.yaml +2 -2
- ultralytics/cfg/datasets/hand-keypoints.yaml +26 -2
- ultralytics/cfg/datasets/kitti.yaml +27 -0
- ultralytics/cfg/datasets/lvis.yaml +5 -5
- ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
- ultralytics/cfg/datasets/tiger-pose.yaml +16 -0
- ultralytics/cfg/datasets/xView.yaml +16 -16
- ultralytics/cfg/default.yaml +1 -1
- ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
- ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
- ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
- ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +1 -1
- ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +1 -1
- ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +1 -1
- ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +1 -1
- ultralytics/cfg/models/v10/yolov10b.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10l.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10m.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10n.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10s.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10x.yaml +2 -2
- ultralytics/cfg/models/v3/yolov3-tiny.yaml +1 -1
- ultralytics/cfg/models/v6/yolov6.yaml +1 -1
- ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
- ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
- ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
- ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
- ultralytics/data/__init__.py +4 -4
- ultralytics/data/annotator.py +3 -4
- ultralytics/data/augment.py +285 -475
- ultralytics/data/base.py +18 -26
- ultralytics/data/build.py +147 -25
- ultralytics/data/converter.py +36 -46
- ultralytics/data/dataset.py +46 -74
- ultralytics/data/loaders.py +42 -49
- ultralytics/data/split.py +5 -6
- ultralytics/data/split_dota.py +8 -15
- ultralytics/data/utils.py +34 -43
- ultralytics/engine/exporter.py +319 -237
- ultralytics/engine/model.py +148 -188
- ultralytics/engine/predictor.py +29 -38
- ultralytics/engine/results.py +177 -311
- ultralytics/engine/trainer.py +83 -59
- ultralytics/engine/tuner.py +23 -34
- ultralytics/engine/validator.py +39 -22
- ultralytics/hub/__init__.py +16 -19
- ultralytics/hub/auth.py +6 -12
- ultralytics/hub/google/__init__.py +7 -10
- ultralytics/hub/session.py +15 -25
- ultralytics/hub/utils.py +5 -8
- ultralytics/models/__init__.py +1 -1
- ultralytics/models/fastsam/__init__.py +1 -1
- ultralytics/models/fastsam/model.py +8 -10
- ultralytics/models/fastsam/predict.py +17 -29
- ultralytics/models/fastsam/utils.py +1 -2
- ultralytics/models/fastsam/val.py +5 -7
- ultralytics/models/nas/__init__.py +1 -1
- ultralytics/models/nas/model.py +5 -8
- ultralytics/models/nas/predict.py +7 -9
- ultralytics/models/nas/val.py +1 -2
- ultralytics/models/rtdetr/__init__.py +1 -1
- ultralytics/models/rtdetr/model.py +5 -8
- ultralytics/models/rtdetr/predict.py +15 -19
- ultralytics/models/rtdetr/train.py +10 -13
- ultralytics/models/rtdetr/val.py +21 -23
- ultralytics/models/sam/__init__.py +15 -2
- ultralytics/models/sam/amg.py +14 -20
- ultralytics/models/sam/build.py +26 -19
- ultralytics/models/sam/build_sam3.py +377 -0
- ultralytics/models/sam/model.py +29 -32
- ultralytics/models/sam/modules/blocks.py +83 -144
- ultralytics/models/sam/modules/decoders.py +19 -37
- ultralytics/models/sam/modules/encoders.py +44 -101
- ultralytics/models/sam/modules/memory_attention.py +16 -30
- ultralytics/models/sam/modules/sam.py +200 -73
- ultralytics/models/sam/modules/tiny_encoder.py +64 -83
- ultralytics/models/sam/modules/transformer.py +18 -28
- ultralytics/models/sam/modules/utils.py +174 -50
- ultralytics/models/sam/predict.py +2248 -350
- ultralytics/models/sam/sam3/__init__.py +3 -0
- ultralytics/models/sam/sam3/decoder.py +546 -0
- ultralytics/models/sam/sam3/encoder.py +529 -0
- ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
- ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
- ultralytics/models/sam/sam3/model_misc.py +199 -0
- ultralytics/models/sam/sam3/necks.py +129 -0
- ultralytics/models/sam/sam3/sam3_image.py +339 -0
- ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
- ultralytics/models/sam/sam3/vitdet.py +547 -0
- ultralytics/models/sam/sam3/vl_combiner.py +160 -0
- ultralytics/models/utils/loss.py +14 -26
- ultralytics/models/utils/ops.py +13 -17
- ultralytics/models/yolo/__init__.py +1 -1
- ultralytics/models/yolo/classify/predict.py +9 -12
- ultralytics/models/yolo/classify/train.py +11 -32
- ultralytics/models/yolo/classify/val.py +29 -28
- ultralytics/models/yolo/detect/predict.py +7 -10
- ultralytics/models/yolo/detect/train.py +11 -20
- ultralytics/models/yolo/detect/val.py +70 -58
- ultralytics/models/yolo/model.py +36 -53
- ultralytics/models/yolo/obb/predict.py +5 -14
- ultralytics/models/yolo/obb/train.py +11 -14
- ultralytics/models/yolo/obb/val.py +39 -36
- ultralytics/models/yolo/pose/__init__.py +1 -1
- ultralytics/models/yolo/pose/predict.py +6 -21
- ultralytics/models/yolo/pose/train.py +10 -15
- ultralytics/models/yolo/pose/val.py +38 -57
- ultralytics/models/yolo/segment/predict.py +14 -18
- ultralytics/models/yolo/segment/train.py +3 -6
- ultralytics/models/yolo/segment/val.py +93 -45
- ultralytics/models/yolo/world/train.py +8 -14
- ultralytics/models/yolo/world/train_world.py +11 -34
- ultralytics/models/yolo/yoloe/__init__.py +7 -7
- ultralytics/models/yolo/yoloe/predict.py +16 -23
- ultralytics/models/yolo/yoloe/train.py +30 -43
- ultralytics/models/yolo/yoloe/train_seg.py +5 -10
- ultralytics/models/yolo/yoloe/val.py +15 -20
- ultralytics/nn/__init__.py +7 -7
- ultralytics/nn/autobackend.py +145 -77
- ultralytics/nn/modules/__init__.py +60 -60
- ultralytics/nn/modules/activation.py +4 -6
- ultralytics/nn/modules/block.py +132 -216
- ultralytics/nn/modules/conv.py +52 -97
- ultralytics/nn/modules/head.py +50 -103
- ultralytics/nn/modules/transformer.py +76 -88
- ultralytics/nn/modules/utils.py +16 -21
- ultralytics/nn/tasks.py +94 -154
- ultralytics/nn/text_model.py +40 -67
- ultralytics/solutions/__init__.py +12 -12
- ultralytics/solutions/ai_gym.py +11 -17
- ultralytics/solutions/analytics.py +15 -16
- ultralytics/solutions/config.py +5 -6
- ultralytics/solutions/distance_calculation.py +10 -13
- ultralytics/solutions/heatmap.py +7 -13
- ultralytics/solutions/instance_segmentation.py +5 -8
- ultralytics/solutions/object_blurrer.py +7 -10
- ultralytics/solutions/object_counter.py +12 -19
- ultralytics/solutions/object_cropper.py +8 -14
- ultralytics/solutions/parking_management.py +33 -31
- ultralytics/solutions/queue_management.py +10 -12
- ultralytics/solutions/region_counter.py +9 -12
- ultralytics/solutions/security_alarm.py +15 -20
- ultralytics/solutions/similarity_search.py +10 -15
- ultralytics/solutions/solutions.py +75 -74
- ultralytics/solutions/speed_estimation.py +7 -10
- ultralytics/solutions/streamlit_inference.py +2 -4
- ultralytics/solutions/templates/similarity-search.html +7 -18
- ultralytics/solutions/trackzone.py +7 -10
- ultralytics/solutions/vision_eye.py +5 -8
- ultralytics/trackers/__init__.py +1 -1
- ultralytics/trackers/basetrack.py +3 -5
- ultralytics/trackers/bot_sort.py +10 -27
- ultralytics/trackers/byte_tracker.py +14 -30
- ultralytics/trackers/track.py +3 -6
- ultralytics/trackers/utils/gmc.py +11 -22
- ultralytics/trackers/utils/kalman_filter.py +37 -48
- ultralytics/trackers/utils/matching.py +12 -15
- ultralytics/utils/__init__.py +116 -116
- ultralytics/utils/autobatch.py +2 -4
- ultralytics/utils/autodevice.py +17 -18
- ultralytics/utils/benchmarks.py +32 -46
- ultralytics/utils/callbacks/base.py +8 -10
- ultralytics/utils/callbacks/clearml.py +5 -13
- ultralytics/utils/callbacks/comet.py +32 -46
- ultralytics/utils/callbacks/dvc.py +13 -18
- ultralytics/utils/callbacks/mlflow.py +4 -5
- ultralytics/utils/callbacks/neptune.py +7 -15
- ultralytics/utils/callbacks/platform.py +314 -38
- ultralytics/utils/callbacks/raytune.py +3 -4
- ultralytics/utils/callbacks/tensorboard.py +23 -31
- ultralytics/utils/callbacks/wb.py +10 -13
- ultralytics/utils/checks.py +99 -76
- ultralytics/utils/cpu.py +3 -8
- ultralytics/utils/dist.py +8 -12
- ultralytics/utils/downloads.py +20 -30
- ultralytics/utils/errors.py +6 -14
- ultralytics/utils/events.py +2 -4
- ultralytics/utils/export/__init__.py +4 -236
- ultralytics/utils/export/engine.py +237 -0
- ultralytics/utils/export/imx.py +91 -55
- ultralytics/utils/export/tensorflow.py +231 -0
- ultralytics/utils/files.py +24 -28
- ultralytics/utils/git.py +9 -11
- ultralytics/utils/instance.py +30 -51
- ultralytics/utils/logger.py +212 -114
- ultralytics/utils/loss.py +14 -22
- ultralytics/utils/metrics.py +126 -155
- ultralytics/utils/nms.py +13 -16
- ultralytics/utils/ops.py +107 -165
- ultralytics/utils/patches.py +33 -21
- ultralytics/utils/plotting.py +72 -80
- ultralytics/utils/tal.py +25 -39
- ultralytics/utils/torch_utils.py +52 -78
- ultralytics/utils/tqdm.py +20 -20
- ultralytics/utils/triton.py +13 -19
- ultralytics/utils/tuner.py +17 -5
- dgenerate_ultralytics_headless-8.3.214.dist-info/RECORD +0 -283
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/top_level.txt +0 -0
ultralytics/engine/results.py
CHANGED
|
@@ -21,12 +21,11 @@ from ultralytics.utils.plotting import Annotator, colors, save_one_box
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class BaseTensor(SimpleClass):
|
|
24
|
-
"""
|
|
25
|
-
Base tensor class with additional methods for easy manipulation and device handling.
|
|
24
|
+
"""Base tensor class with additional methods for easy manipulation and device handling.
|
|
26
25
|
|
|
27
|
-
This class provides a foundation for tensor-like objects with device management capabilities,
|
|
28
|
-
|
|
29
|
-
|
|
26
|
+
This class provides a foundation for tensor-like objects with device management capabilities, supporting both
|
|
27
|
+
PyTorch tensors and NumPy arrays. It includes methods for moving data between devices and converting between tensor
|
|
28
|
+
types.
|
|
30
29
|
|
|
31
30
|
Attributes:
|
|
32
31
|
data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints.
|
|
@@ -49,18 +48,11 @@ class BaseTensor(SimpleClass):
|
|
|
49
48
|
"""
|
|
50
49
|
|
|
51
50
|
def __init__(self, data: torch.Tensor | np.ndarray, orig_shape: tuple[int, int]) -> None:
|
|
52
|
-
"""
|
|
53
|
-
Initialize BaseTensor with prediction data and the original shape of the image.
|
|
51
|
+
"""Initialize BaseTensor with prediction data and the original shape of the image.
|
|
54
52
|
|
|
55
53
|
Args:
|
|
56
54
|
data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints.
|
|
57
55
|
orig_shape (tuple[int, int]): Original shape of the image in (height, width) format.
|
|
58
|
-
|
|
59
|
-
Examples:
|
|
60
|
-
>>> import torch
|
|
61
|
-
>>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
|
|
62
|
-
>>> orig_shape = (720, 1280)
|
|
63
|
-
>>> base_tensor = BaseTensor(data, orig_shape)
|
|
64
56
|
"""
|
|
65
57
|
assert isinstance(data, (torch.Tensor, np.ndarray)), "data must be torch.Tensor or np.ndarray"
|
|
66
58
|
self.data = data
|
|
@@ -68,8 +60,7 @@ class BaseTensor(SimpleClass):
|
|
|
68
60
|
|
|
69
61
|
@property
|
|
70
62
|
def shape(self) -> tuple[int, ...]:
|
|
71
|
-
"""
|
|
72
|
-
Return the shape of the underlying data tensor.
|
|
63
|
+
"""Return the shape of the underlying data tensor.
|
|
73
64
|
|
|
74
65
|
Returns:
|
|
75
66
|
(tuple[int, ...]): The shape of the data tensor.
|
|
@@ -83,8 +74,7 @@ class BaseTensor(SimpleClass):
|
|
|
83
74
|
return self.data.shape
|
|
84
75
|
|
|
85
76
|
def cpu(self):
|
|
86
|
-
"""
|
|
87
|
-
Return a copy of the tensor stored in CPU memory.
|
|
77
|
+
"""Return a copy of the tensor stored in CPU memory.
|
|
88
78
|
|
|
89
79
|
Returns:
|
|
90
80
|
(BaseTensor): A new BaseTensor object with the data tensor moved to CPU memory.
|
|
@@ -101,29 +91,26 @@ class BaseTensor(SimpleClass):
|
|
|
101
91
|
return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.cpu(), self.orig_shape)
|
|
102
92
|
|
|
103
93
|
def numpy(self):
|
|
104
|
-
"""
|
|
105
|
-
Return a copy of the tensor as a numpy array.
|
|
94
|
+
"""Return a copy of this object with its data converted to a NumPy array.
|
|
106
95
|
|
|
107
96
|
Returns:
|
|
108
|
-
(
|
|
97
|
+
(BaseTensor): A new instance with `data` as a NumPy array.
|
|
109
98
|
|
|
110
99
|
Examples:
|
|
111
100
|
>>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
|
|
112
101
|
>>> orig_shape = (720, 1280)
|
|
113
102
|
>>> base_tensor = BaseTensor(data, orig_shape)
|
|
114
|
-
>>>
|
|
115
|
-
>>> print(type(
|
|
103
|
+
>>> numpy_tensor = base_tensor.numpy()
|
|
104
|
+
>>> print(type(numpy_tensor.data))
|
|
116
105
|
<class 'numpy.ndarray'>
|
|
117
106
|
"""
|
|
118
107
|
return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.numpy(), self.orig_shape)
|
|
119
108
|
|
|
120
109
|
def cuda(self):
|
|
121
|
-
"""
|
|
122
|
-
Move the tensor to GPU memory.
|
|
110
|
+
"""Move the tensor to GPU memory.
|
|
123
111
|
|
|
124
112
|
Returns:
|
|
125
|
-
(BaseTensor): A new BaseTensor instance with the data moved to GPU memory
|
|
126
|
-
numpy array, otherwise returns self.
|
|
113
|
+
(BaseTensor): A new BaseTensor instance with the data moved to GPU memory.
|
|
127
114
|
|
|
128
115
|
Examples:
|
|
129
116
|
>>> import torch
|
|
@@ -137,8 +124,7 @@ class BaseTensor(SimpleClass):
|
|
|
137
124
|
return self.__class__(torch.as_tensor(self.data).cuda(), self.orig_shape)
|
|
138
125
|
|
|
139
126
|
def to(self, *args, **kwargs):
|
|
140
|
-
"""
|
|
141
|
-
Return a copy of the tensor with the specified device and dtype.
|
|
127
|
+
"""Return a copy of the tensor with the specified device and dtype.
|
|
142
128
|
|
|
143
129
|
Args:
|
|
144
130
|
*args (Any): Variable length argument list to be passed to torch.Tensor.to().
|
|
@@ -155,8 +141,7 @@ class BaseTensor(SimpleClass):
|
|
|
155
141
|
return self.__class__(torch.as_tensor(self.data).to(*args, **kwargs), self.orig_shape)
|
|
156
142
|
|
|
157
143
|
def __len__(self) -> int:
|
|
158
|
-
"""
|
|
159
|
-
Return the length of the underlying data tensor.
|
|
144
|
+
"""Return the length of the underlying data tensor.
|
|
160
145
|
|
|
161
146
|
Returns:
|
|
162
147
|
(int): The number of elements in the first dimension of the data tensor.
|
|
@@ -170,8 +155,7 @@ class BaseTensor(SimpleClass):
|
|
|
170
155
|
return len(self.data)
|
|
171
156
|
|
|
172
157
|
def __getitem__(self, idx):
|
|
173
|
-
"""
|
|
174
|
-
Return a new BaseTensor instance containing the specified indexed elements of the data tensor.
|
|
158
|
+
"""Return a new BaseTensor instance containing the specified indexed elements of the data tensor.
|
|
175
159
|
|
|
176
160
|
Args:
|
|
177
161
|
idx (int | list[int] | torch.Tensor): Index or indices to select from the data tensor.
|
|
@@ -190,12 +174,11 @@ class BaseTensor(SimpleClass):
|
|
|
190
174
|
|
|
191
175
|
|
|
192
176
|
class Results(SimpleClass, DataExportMixin):
|
|
193
|
-
"""
|
|
194
|
-
A class for storing and manipulating inference results.
|
|
177
|
+
"""A class for storing and manipulating inference results.
|
|
195
178
|
|
|
196
|
-
This class provides comprehensive functionality for handling inference results from various
|
|
197
|
-
|
|
198
|
-
|
|
179
|
+
This class provides comprehensive functionality for handling inference results from various Ultralytics models,
|
|
180
|
+
including detection, segmentation, classification, and pose estimation. It supports visualization, data export, and
|
|
181
|
+
various coordinate transformations.
|
|
199
182
|
|
|
200
183
|
Attributes:
|
|
201
184
|
orig_img (np.ndarray): The original image as a numpy array.
|
|
@@ -217,14 +200,14 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
217
200
|
cuda: Move all tensors in the Results object to GPU memory.
|
|
218
201
|
to: Move all tensors to the specified device and dtype.
|
|
219
202
|
new: Create a new Results object with the same image, path, names, and speed attributes.
|
|
220
|
-
plot: Plot detection results on an input
|
|
203
|
+
plot: Plot detection results on an input BGR image.
|
|
221
204
|
show: Display the image with annotated inference results.
|
|
222
205
|
save: Save annotated inference results image to file.
|
|
223
206
|
verbose: Return a log string for each task in the results.
|
|
224
207
|
save_txt: Save detection results to a text file.
|
|
225
208
|
save_crop: Save cropped detection images to specified directory.
|
|
226
209
|
summary: Convert inference results to a summarized dictionary.
|
|
227
|
-
to_df: Convert detection results to a Polars
|
|
210
|
+
to_df: Convert detection results to a Polars DataFrame.
|
|
228
211
|
to_json: Convert detection results to JSON format.
|
|
229
212
|
to_csv: Convert detection results to a CSV format.
|
|
230
213
|
|
|
@@ -249,8 +232,7 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
249
232
|
obb: torch.Tensor | None = None,
|
|
250
233
|
speed: dict[str, float] | None = None,
|
|
251
234
|
) -> None:
|
|
252
|
-
"""
|
|
253
|
-
Initialize the Results class for storing and manipulating inference results.
|
|
235
|
+
"""Initialize the Results class for storing and manipulating inference results.
|
|
254
236
|
|
|
255
237
|
Args:
|
|
256
238
|
orig_img (np.ndarray): The original image as a numpy array.
|
|
@@ -263,12 +245,6 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
263
245
|
obb (torch.Tensor | None): A 2D tensor of oriented bounding box coordinates for each detection.
|
|
264
246
|
speed (dict | None): A dictionary containing preprocess, inference, and postprocess speeds (ms/image).
|
|
265
247
|
|
|
266
|
-
Examples:
|
|
267
|
-
>>> results = model("path/to/image.jpg")
|
|
268
|
-
>>> result = results[0] # Get the first result
|
|
269
|
-
>>> boxes = result.boxes # Get the boxes for the first result
|
|
270
|
-
>>> masks = result.masks # Get the masks for the first result
|
|
271
|
-
|
|
272
248
|
Notes:
|
|
273
249
|
For the default pose model, keypoint indices for human body pose estimation are:
|
|
274
250
|
0: Nose, 1: Left Eye, 2: Right Eye, 3: Left Ear, 4: Right Ear
|
|
@@ -290,8 +266,7 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
290
266
|
self._keys = "boxes", "masks", "probs", "keypoints", "obb"
|
|
291
267
|
|
|
292
268
|
def __getitem__(self, idx):
|
|
293
|
-
"""
|
|
294
|
-
Return a Results object for a specific index of inference results.
|
|
269
|
+
"""Return a Results object for a specific index of inference results.
|
|
295
270
|
|
|
296
271
|
Args:
|
|
297
272
|
idx (int | slice): Index or slice to retrieve from the Results object.
|
|
@@ -307,12 +282,11 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
307
282
|
return self._apply("__getitem__", idx)
|
|
308
283
|
|
|
309
284
|
def __len__(self) -> int:
|
|
310
|
-
"""
|
|
311
|
-
Return the number of detections in the Results object.
|
|
285
|
+
"""Return the number of detections in the Results object.
|
|
312
286
|
|
|
313
287
|
Returns:
|
|
314
|
-
(int): The number of detections, determined by the length of the first non-empty
|
|
315
|
-
|
|
288
|
+
(int): The number of detections, determined by the length of the first non-empty attribute in (masks, probs,
|
|
289
|
+
keypoints, or obb).
|
|
316
290
|
|
|
317
291
|
Examples:
|
|
318
292
|
>>> results = Results(orig_img, path, names, boxes=torch.rand(5, 4))
|
|
@@ -332,15 +306,14 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
332
306
|
obb: torch.Tensor | None = None,
|
|
333
307
|
keypoints: torch.Tensor | None = None,
|
|
334
308
|
):
|
|
335
|
-
"""
|
|
336
|
-
Update the Results object with new detection data.
|
|
309
|
+
"""Update the Results object with new detection data.
|
|
337
310
|
|
|
338
|
-
This method allows updating the boxes, masks, probabilities, and oriented bounding boxes (OBB) of the
|
|
339
|
-
|
|
311
|
+
This method allows updating the boxes, masks, probabilities, and oriented bounding boxes (OBB) of the Results
|
|
312
|
+
object. It ensures that boxes are clipped to the original image shape.
|
|
340
313
|
|
|
341
314
|
Args:
|
|
342
|
-
boxes (torch.Tensor | None): A tensor of shape (N, 6) containing bounding box coordinates and
|
|
343
|
-
|
|
315
|
+
boxes (torch.Tensor | None): A tensor of shape (N, 6) containing bounding box coordinates and confidence
|
|
316
|
+
scores. The format is (x1, y1, x2, y2, conf, class).
|
|
344
317
|
masks (torch.Tensor | None): A tensor of shape (N, H, W) containing segmentation masks.
|
|
345
318
|
probs (torch.Tensor | None): A tensor of shape (num_classes,) containing class probabilities.
|
|
346
319
|
obb (torch.Tensor | None): A tensor of shape (N, 5) containing oriented bounding box coordinates.
|
|
@@ -363,8 +336,7 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
363
336
|
self.keypoints = Keypoints(keypoints, self.orig_shape)
|
|
364
337
|
|
|
365
338
|
def _apply(self, fn: str, *args, **kwargs):
|
|
366
|
-
"""
|
|
367
|
-
Apply a function to all non-empty attributes and return a new Results object with modified attributes.
|
|
339
|
+
"""Apply a function to all non-empty attributes and return a new Results object with modified attributes.
|
|
368
340
|
|
|
369
341
|
This method is internally called by methods like .to(), .cuda(), .cpu(), etc.
|
|
370
342
|
|
|
@@ -390,8 +362,7 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
390
362
|
return r
|
|
391
363
|
|
|
392
364
|
def cpu(self):
|
|
393
|
-
"""
|
|
394
|
-
Return a copy of the Results object with all its tensors moved to CPU memory.
|
|
365
|
+
"""Return a copy of the Results object with all its tensors moved to CPU memory.
|
|
395
366
|
|
|
396
367
|
This method creates a new Results object with all tensor attributes (boxes, masks, probs, keypoints, obb)
|
|
397
368
|
transferred to CPU memory. It's useful for moving data from GPU to CPU for further processing or saving.
|
|
@@ -407,8 +378,7 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
407
378
|
return self._apply("cpu")
|
|
408
379
|
|
|
409
380
|
def numpy(self):
|
|
410
|
-
"""
|
|
411
|
-
Convert all tensors in the Results object to numpy arrays.
|
|
381
|
+
"""Convert all tensors in the Results object to numpy arrays.
|
|
412
382
|
|
|
413
383
|
Returns:
|
|
414
384
|
(Results): A new Results object with all tensors converted to numpy arrays.
|
|
@@ -426,8 +396,7 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
426
396
|
return self._apply("numpy")
|
|
427
397
|
|
|
428
398
|
def cuda(self):
|
|
429
|
-
"""
|
|
430
|
-
Move all tensors in the Results object to GPU memory.
|
|
399
|
+
"""Move all tensors in the Results object to GPU memory.
|
|
431
400
|
|
|
432
401
|
Returns:
|
|
433
402
|
(Results): A new Results object with all tensors moved to CUDA device.
|
|
@@ -441,8 +410,7 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
441
410
|
return self._apply("cuda")
|
|
442
411
|
|
|
443
412
|
def to(self, *args, **kwargs):
|
|
444
|
-
"""
|
|
445
|
-
Move all tensors in the Results object to the specified device and dtype.
|
|
413
|
+
"""Move all tensors in the Results object to the specified device and dtype.
|
|
446
414
|
|
|
447
415
|
Args:
|
|
448
416
|
*args (Any): Variable length argument list to be passed to torch.Tensor.to().
|
|
@@ -460,8 +428,7 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
460
428
|
return self._apply("to", *args, **kwargs)
|
|
461
429
|
|
|
462
430
|
def new(self):
|
|
463
|
-
"""
|
|
464
|
-
Create a new Results object with the same image, path, names, and speed attributes.
|
|
431
|
+
"""Create a new Results object with the same image, path, names, and speed attributes.
|
|
465
432
|
|
|
466
433
|
Returns:
|
|
467
434
|
(Results): A new Results object with copied attributes from the original instance.
|
|
@@ -493,8 +460,7 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
493
460
|
color_mode: str = "class",
|
|
494
461
|
txt_color: tuple[int, int, int] = (255, 255, 255),
|
|
495
462
|
) -> np.ndarray:
|
|
496
|
-
"""
|
|
497
|
-
Plot detection results on an input RGB image.
|
|
463
|
+
"""Plot detection results on an input BGR image.
|
|
498
464
|
|
|
499
465
|
Args:
|
|
500
466
|
conf (bool): Whether to plot detection confidence scores.
|
|
@@ -514,10 +480,10 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
514
480
|
save (bool): Whether to save the annotated image.
|
|
515
481
|
filename (str | None): Filename to save image if save is True.
|
|
516
482
|
color_mode (str): Specify the color mode, e.g., 'instance' or 'class'.
|
|
517
|
-
txt_color (tuple[int, int, int]):
|
|
483
|
+
txt_color (tuple[int, int, int]): Text color in BGR format for classification output.
|
|
518
484
|
|
|
519
485
|
Returns:
|
|
520
|
-
(np.ndarray): Annotated image as a
|
|
486
|
+
(np.ndarray | PIL.Image.Image): Annotated image as a NumPy array (BGR) or PIL image (RGB) if `pil=True`.
|
|
521
487
|
|
|
522
488
|
Examples:
|
|
523
489
|
>>> results = model("image.jpg")
|
|
@@ -527,7 +493,7 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
527
493
|
"""
|
|
528
494
|
assert color_mode in {"instance", "class"}, f"Expected color_mode='instance' or 'class', not {color_mode}."
|
|
529
495
|
if img is None and isinstance(self.orig_img, torch.Tensor):
|
|
530
|
-
img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).
|
|
496
|
+
img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).byte().cpu().numpy()
|
|
531
497
|
|
|
532
498
|
names = self.names
|
|
533
499
|
is_obb = self.obb is not None
|
|
@@ -610,11 +576,10 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
610
576
|
if save:
|
|
611
577
|
annotator.save(filename or f"results_{Path(self.path).name}")
|
|
612
578
|
|
|
613
|
-
return annotator.
|
|
579
|
+
return annotator.result(pil)
|
|
614
580
|
|
|
615
581
|
def show(self, *args, **kwargs):
|
|
616
|
-
"""
|
|
617
|
-
Display the image with annotated inference results.
|
|
582
|
+
"""Display the image with annotated inference results.
|
|
618
583
|
|
|
619
584
|
This method plots the detection results on the original image and displays it. It's a convenient way to
|
|
620
585
|
visualize the model's predictions directly.
|
|
@@ -632,15 +597,14 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
632
597
|
self.plot(show=True, *args, **kwargs)
|
|
633
598
|
|
|
634
599
|
def save(self, filename: str | None = None, *args, **kwargs) -> str:
|
|
635
|
-
"""
|
|
636
|
-
Save annotated inference results image to file.
|
|
600
|
+
"""Save annotated inference results image to file.
|
|
637
601
|
|
|
638
602
|
This method plots the detection results on the original image and saves the annotated image to a file. It
|
|
639
603
|
utilizes the `plot` method to generate the annotated image and then saves it to the specified filename.
|
|
640
604
|
|
|
641
605
|
Args:
|
|
642
|
-
filename (str | Path | None): The filename to save the annotated image. If None, a default filename
|
|
643
|
-
|
|
606
|
+
filename (str | Path | None): The filename to save the annotated image. If None, a default filename is
|
|
607
|
+
generated based on the original image path.
|
|
644
608
|
*args (Any): Variable length argument list to be passed to the `plot` method.
|
|
645
609
|
**kwargs (Any): Arbitrary keyword arguments to be passed to the `plot` method.
|
|
646
610
|
|
|
@@ -661,15 +625,14 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
661
625
|
return filename
|
|
662
626
|
|
|
663
627
|
def verbose(self) -> str:
|
|
664
|
-
"""
|
|
665
|
-
Return a log string for each task in the results, detailing detection and classification outcomes.
|
|
628
|
+
"""Return a log string for each task in the results, detailing detection and classification outcomes.
|
|
666
629
|
|
|
667
630
|
This method generates a human-readable string summarizing the detection and classification results. It includes
|
|
668
631
|
the number of detections for each class and the top probabilities for classification tasks.
|
|
669
632
|
|
|
670
633
|
Returns:
|
|
671
|
-
(str): A formatted string containing a summary of the results. For detection tasks, it includes the
|
|
672
|
-
|
|
634
|
+
(str): A formatted string containing a summary of the results. For detection tasks, it includes the number
|
|
635
|
+
of detections per class. For classification tasks, it includes the top 5 class probabilities.
|
|
673
636
|
|
|
674
637
|
Examples:
|
|
675
638
|
>>> results = model("path/to/image.jpg")
|
|
@@ -693,8 +656,7 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
693
656
|
return "".join(f"{n} {self.names[i]}{'s' * (n > 1)}, " for i, n in enumerate(counts) if n > 0)
|
|
694
657
|
|
|
695
658
|
def save_txt(self, txt_file: str | Path, save_conf: bool = False) -> str:
|
|
696
|
-
"""
|
|
697
|
-
Save detection results to a text file.
|
|
659
|
+
"""Save detection results to a text file.
|
|
698
660
|
|
|
699
661
|
Args:
|
|
700
662
|
txt_file (str | Path): Path to the output text file.
|
|
@@ -750,8 +712,7 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
750
712
|
return str(txt_file)
|
|
751
713
|
|
|
752
714
|
def save_crop(self, save_dir: str | Path, file_name: str | Path = Path("im.jpg")):
|
|
753
|
-
"""
|
|
754
|
-
Save cropped detection images to specified directory.
|
|
715
|
+
"""Save cropped detection images to specified directory.
|
|
755
716
|
|
|
756
717
|
This method saves cropped images of detected objects to a specified directory. Each crop is saved in a
|
|
757
718
|
subdirectory named after the object's class, with the filename based on the input file_name.
|
|
@@ -760,22 +721,22 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
760
721
|
save_dir (str | Path): Directory path where cropped images will be saved.
|
|
761
722
|
file_name (str | Path): Base filename for the saved cropped images.
|
|
762
723
|
|
|
724
|
+
Examples:
|
|
725
|
+
>>> results = model("path/to/image.jpg")
|
|
726
|
+
>>> for result in results:
|
|
727
|
+
>>> result.save_crop(save_dir="path/to/crops", file_name="detection")
|
|
728
|
+
|
|
763
729
|
Notes:
|
|
764
730
|
- This method does not support Classify or Oriented Bounding Box (OBB) tasks.
|
|
765
731
|
- Crops are saved as 'save_dir/class_name/file_name.jpg'.
|
|
766
732
|
- The method will create necessary subdirectories if they don't exist.
|
|
767
733
|
- Original image is copied before cropping to avoid modifying the original.
|
|
768
|
-
|
|
769
|
-
Examples:
|
|
770
|
-
>>> results = model("path/to/image.jpg")
|
|
771
|
-
>>> for result in results:
|
|
772
|
-
>>> result.save_crop(save_dir="path/to/crops", file_name="detection")
|
|
773
734
|
"""
|
|
774
735
|
if self.probs is not None:
|
|
775
|
-
LOGGER.warning("Classify task
|
|
736
|
+
LOGGER.warning("Classify task does not support `save_crop`.")
|
|
776
737
|
return
|
|
777
738
|
if self.obb is not None:
|
|
778
|
-
LOGGER.warning("OBB task
|
|
739
|
+
LOGGER.warning("OBB task does not support `save_crop`.")
|
|
779
740
|
return
|
|
780
741
|
for d in self.boxes:
|
|
781
742
|
save_one_box(
|
|
@@ -786,11 +747,10 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
786
747
|
)
|
|
787
748
|
|
|
788
749
|
def summary(self, normalize: bool = False, decimals: int = 5) -> list[dict[str, Any]]:
|
|
789
|
-
"""
|
|
790
|
-
Convert inference results to a summarized dictionary with optional normalization for box coordinates.
|
|
750
|
+
"""Convert inference results to a summarized dictionary with optional normalization for box coordinates.
|
|
791
751
|
|
|
792
|
-
This method creates a list of detection dictionaries, each containing information about a single
|
|
793
|
-
|
|
752
|
+
This method creates a list of detection dictionaries, each containing information about a single detection or
|
|
753
|
+
classification result. For classification tasks, it returns the top class and its
|
|
794
754
|
confidence. For detection tasks, it includes class information, bounding box coordinates, and
|
|
795
755
|
optionally mask segments and keypoints.
|
|
796
756
|
|
|
@@ -799,8 +759,8 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
799
759
|
decimals (int): Number of decimal places to round the output values to.
|
|
800
760
|
|
|
801
761
|
Returns:
|
|
802
|
-
(list[dict[str, Any]]): A list of dictionaries, each containing summarized information for a single
|
|
803
|
-
or classification result. The structure of each dictionary varies based on the task type
|
|
762
|
+
(list[dict[str, Any]]): A list of dictionaries, each containing summarized information for a single
|
|
763
|
+
detection or classification result. The structure of each dictionary varies based on the task type
|
|
804
764
|
(classification or detection) and available information (boxes, masks, keypoints).
|
|
805
765
|
|
|
806
766
|
Examples:
|
|
@@ -853,12 +813,11 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
853
813
|
|
|
854
814
|
|
|
855
815
|
class Boxes(BaseTensor):
|
|
856
|
-
"""
|
|
857
|
-
A class for managing and manipulating detection boxes.
|
|
816
|
+
"""A class for managing and manipulating detection boxes.
|
|
858
817
|
|
|
859
818
|
This class provides comprehensive functionality for handling detection boxes, including their coordinates,
|
|
860
|
-
confidence scores, class labels, and optional tracking IDs. It supports various box formats and offers
|
|
861
|
-
|
|
819
|
+
confidence scores, class labels, and optional tracking IDs. It supports various box formats and offers methods for
|
|
820
|
+
easy manipulation and conversion between different coordinate systems.
|
|
862
821
|
|
|
863
822
|
Attributes:
|
|
864
823
|
data (torch.Tensor | np.ndarray): The raw tensor containing detection boxes and associated data.
|
|
@@ -890,31 +849,16 @@ class Boxes(BaseTensor):
|
|
|
890
849
|
"""
|
|
891
850
|
|
|
892
851
|
def __init__(self, boxes: torch.Tensor | np.ndarray, orig_shape: tuple[int, int]) -> None:
|
|
893
|
-
"""
|
|
894
|
-
Initialize the Boxes class with detection box data and the original image shape.
|
|
852
|
+
"""Initialize the Boxes class with detection box data and the original image shape.
|
|
895
853
|
|
|
896
|
-
This class manages detection boxes, providing easy access and manipulation of box coordinates,
|
|
897
|
-
|
|
898
|
-
|
|
854
|
+
This class manages detection boxes, providing easy access and manipulation of box coordinates, confidence
|
|
855
|
+
scores, class identifiers, and optional tracking IDs. It supports multiple formats for box coordinates,
|
|
856
|
+
including both absolute and normalized forms.
|
|
899
857
|
|
|
900
858
|
Args:
|
|
901
|
-
boxes (torch.Tensor | np.ndarray): A tensor or numpy array with detection boxes of shape
|
|
902
|
-
(num_boxes,
|
|
903
|
-
[x1, y1, x2, y2, (optional) track_id, confidence, class].
|
|
859
|
+
boxes (torch.Tensor | np.ndarray): A tensor or numpy array with detection boxes of shape (num_boxes, 6) or
|
|
860
|
+
(num_boxes, 7). Columns should contain [x1, y1, x2, y2, (optional) track_id, confidence, class].
|
|
904
861
|
orig_shape (tuple[int, int]): The original image shape as (height, width). Used for normalization.
|
|
905
|
-
|
|
906
|
-
Attributes:
|
|
907
|
-
data (torch.Tensor): The raw tensor containing detection boxes and their associated data.
|
|
908
|
-
orig_shape (tuple[int, int]): The original image size, used for normalization.
|
|
909
|
-
is_track (bool): Indicates whether tracking IDs are included in the box data.
|
|
910
|
-
|
|
911
|
-
Examples:
|
|
912
|
-
>>> import torch
|
|
913
|
-
>>> boxes = torch.tensor([[100, 50, 150, 100, 0.9, 0]])
|
|
914
|
-
>>> orig_shape = (480, 640)
|
|
915
|
-
>>> detection_boxes = Boxes(boxes, orig_shape)
|
|
916
|
-
>>> print(detection_boxes.xyxy)
|
|
917
|
-
tensor([[100., 50., 150., 100.]])
|
|
918
862
|
"""
|
|
919
863
|
if boxes.ndim == 1:
|
|
920
864
|
boxes = boxes[None, :]
|
|
@@ -926,12 +870,11 @@ class Boxes(BaseTensor):
|
|
|
926
870
|
|
|
927
871
|
@property
|
|
928
872
|
def xyxy(self) -> torch.Tensor | np.ndarray:
|
|
929
|
-
"""
|
|
930
|
-
Return bounding boxes in [x1, y1, x2, y2] format.
|
|
873
|
+
"""Return bounding boxes in [x1, y1, x2, y2] format.
|
|
931
874
|
|
|
932
875
|
Returns:
|
|
933
|
-
(torch.Tensor | np.ndarray): A tensor or numpy array of shape (n, 4) containing bounding box
|
|
934
|
-
|
|
876
|
+
(torch.Tensor | np.ndarray): A tensor or numpy array of shape (n, 4) containing bounding box coordinates in
|
|
877
|
+
[x1, y1, x2, y2] format, where n is the number of boxes.
|
|
935
878
|
|
|
936
879
|
Examples:
|
|
937
880
|
>>> results = model("image.jpg")
|
|
@@ -943,12 +886,11 @@ class Boxes(BaseTensor):
|
|
|
943
886
|
|
|
944
887
|
@property
|
|
945
888
|
def conf(self) -> torch.Tensor | np.ndarray:
|
|
946
|
-
"""
|
|
947
|
-
Return the confidence scores for each detection box.
|
|
889
|
+
"""Return the confidence scores for each detection box.
|
|
948
890
|
|
|
949
891
|
Returns:
|
|
950
|
-
(torch.Tensor | np.ndarray): A 1D tensor or array containing confidence scores for each detection,
|
|
951
|
-
|
|
892
|
+
(torch.Tensor | np.ndarray): A 1D tensor or array containing confidence scores for each detection, with
|
|
893
|
+
shape (N,) where N is the number of detections.
|
|
952
894
|
|
|
953
895
|
Examples:
|
|
954
896
|
>>> boxes = Boxes(torch.tensor([[10, 20, 30, 40, 0.9, 0]]), orig_shape=(100, 100))
|
|
@@ -960,12 +902,11 @@ class Boxes(BaseTensor):
|
|
|
960
902
|
|
|
961
903
|
@property
|
|
962
904
|
def cls(self) -> torch.Tensor | np.ndarray:
|
|
963
|
-
"""
|
|
964
|
-
Return the class ID tensor representing category predictions for each bounding box.
|
|
905
|
+
"""Return the class ID tensor representing category predictions for each bounding box.
|
|
965
906
|
|
|
966
907
|
Returns:
|
|
967
|
-
(torch.Tensor | np.ndarray): A tensor or numpy array containing the class IDs for each detection box.
|
|
968
|
-
|
|
908
|
+
(torch.Tensor | np.ndarray): A tensor or numpy array containing the class IDs for each detection box. The
|
|
909
|
+
shape is (N,), where N is the number of boxes.
|
|
969
910
|
|
|
970
911
|
Examples:
|
|
971
912
|
>>> results = model("image.jpg")
|
|
@@ -977,12 +918,11 @@ class Boxes(BaseTensor):
|
|
|
977
918
|
|
|
978
919
|
@property
|
|
979
920
|
def id(self) -> torch.Tensor | np.ndarray | None:
|
|
980
|
-
"""
|
|
981
|
-
Return the tracking IDs for each detection box if available.
|
|
921
|
+
"""Return the tracking IDs for each detection box if available.
|
|
982
922
|
|
|
983
923
|
Returns:
|
|
984
|
-
(torch.Tensor | None): A tensor containing tracking IDs for each box if tracking is enabled,
|
|
985
|
-
|
|
924
|
+
(torch.Tensor | None): A tensor containing tracking IDs for each box if tracking is enabled, otherwise None.
|
|
925
|
+
Shape is (N,) where N is the number of boxes.
|
|
986
926
|
|
|
987
927
|
Examples:
|
|
988
928
|
>>> results = model.track("path/to/video.mp4")
|
|
@@ -1003,36 +943,33 @@ class Boxes(BaseTensor):
|
|
|
1003
943
|
@property
|
|
1004
944
|
@lru_cache(maxsize=2)
|
|
1005
945
|
def xywh(self) -> torch.Tensor | np.ndarray:
|
|
1006
|
-
"""
|
|
1007
|
-
Convert bounding boxes from [x1, y1, x2, y2] format to [x, y, width, height] format.
|
|
946
|
+
"""Convert bounding boxes from [x1, y1, x2, y2] format to [x, y, width, height] format.
|
|
1008
947
|
|
|
1009
948
|
Returns:
|
|
1010
|
-
(torch.Tensor | np.ndarray): Boxes in [x_center, y_center, width, height] format, where x_center,
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
number of boxes.
|
|
949
|
+
(torch.Tensor | np.ndarray): Boxes in [x_center, y_center, width, height] format, where x_center, y_center
|
|
950
|
+
are the coordinates of the center point of the bounding box, width, height are the dimensions of the
|
|
951
|
+
bounding box and the shape of the returned tensor is (N, 4), where N is the number of boxes.
|
|
1014
952
|
|
|
1015
953
|
Examples:
|
|
1016
954
|
>>> boxes = Boxes(torch.tensor([[100, 50, 150, 100], [200, 150, 300, 250]]), orig_shape=(480, 640))
|
|
1017
955
|
>>> xywh = boxes.xywh
|
|
1018
956
|
>>> print(xywh)
|
|
1019
|
-
tensor([[
|
|
1020
|
-
[
|
|
957
|
+
tensor([[125.0000, 75.0000, 50.0000, 50.0000],
|
|
958
|
+
[250.0000, 200.0000, 100.0000, 100.0000]])
|
|
1021
959
|
"""
|
|
1022
960
|
return ops.xyxy2xywh(self.xyxy)
|
|
1023
961
|
|
|
1024
962
|
@property
|
|
1025
963
|
@lru_cache(maxsize=2)
|
|
1026
964
|
def xyxyn(self) -> torch.Tensor | np.ndarray:
|
|
1027
|
-
"""
|
|
1028
|
-
Return normalized bounding box coordinates relative to the original image size.
|
|
965
|
+
"""Return normalized bounding box coordinates relative to the original image size.
|
|
1029
966
|
|
|
1030
|
-
This property calculates and returns the bounding box coordinates in [x1, y1, x2, y2] format,
|
|
1031
|
-
|
|
967
|
+
This property calculates and returns the bounding box coordinates in [x1, y1, x2, y2] format, normalized to the
|
|
968
|
+
range [0, 1] based on the original image dimensions.
|
|
1032
969
|
|
|
1033
970
|
Returns:
|
|
1034
|
-
(torch.Tensor | np.ndarray): Normalized bounding box coordinates with shape (N, 4), where N is
|
|
1035
|
-
|
|
971
|
+
(torch.Tensor | np.ndarray): Normalized bounding box coordinates with shape (N, 4), where N is the number of
|
|
972
|
+
boxes. Each row contains [x1, y1, x2, y2] values normalized to [0, 1].
|
|
1036
973
|
|
|
1037
974
|
Examples:
|
|
1038
975
|
>>> boxes = Boxes(torch.tensor([[100, 50, 300, 400, 0.9, 0]]), orig_shape=(480, 640))
|
|
@@ -1048,16 +985,15 @@ class Boxes(BaseTensor):
|
|
|
1048
985
|
@property
|
|
1049
986
|
@lru_cache(maxsize=2)
|
|
1050
987
|
def xywhn(self) -> torch.Tensor | np.ndarray:
|
|
1051
|
-
"""
|
|
1052
|
-
Return normalized bounding boxes in [x, y, width, height] format.
|
|
988
|
+
"""Return normalized bounding boxes in [x, y, width, height] format.
|
|
1053
989
|
|
|
1054
|
-
This property calculates and returns the normalized bounding box coordinates in the format
|
|
1055
|
-
|
|
990
|
+
This property calculates and returns the normalized bounding box coordinates in the format [x_center, y_center,
|
|
991
|
+
width, height], where all values are relative to the original image dimensions.
|
|
1056
992
|
|
|
1057
993
|
Returns:
|
|
1058
|
-
(torch.Tensor | np.ndarray): Normalized bounding boxes with shape (N, 4), where N is the
|
|
1059
|
-
|
|
1060
|
-
|
|
994
|
+
(torch.Tensor | np.ndarray): Normalized bounding boxes with shape (N, 4), where N is the number of boxes.
|
|
995
|
+
Each row contains [x_center, y_center, width, height] values normalized to [0, 1] based on the original
|
|
996
|
+
image dimensions.
|
|
1061
997
|
|
|
1062
998
|
Examples:
|
|
1063
999
|
>>> boxes = Boxes(torch.tensor([[100, 50, 150, 100, 0.9, 0]]), orig_shape=(480, 640))
|
|
@@ -1072,11 +1008,10 @@ class Boxes(BaseTensor):
|
|
|
1072
1008
|
|
|
1073
1009
|
|
|
1074
1010
|
class Masks(BaseTensor):
|
|
1075
|
-
"""
|
|
1076
|
-
A class for storing and manipulating detection masks.
|
|
1011
|
+
"""A class for storing and manipulating detection masks.
|
|
1077
1012
|
|
|
1078
|
-
This class extends BaseTensor and provides functionality for handling segmentation masks,
|
|
1079
|
-
|
|
1013
|
+
This class extends BaseTensor and provides functionality for handling segmentation masks, including methods for
|
|
1014
|
+
converting between pixel and normalized coordinates.
|
|
1080
1015
|
|
|
1081
1016
|
Attributes:
|
|
1082
1017
|
data (torch.Tensor | np.ndarray): The raw tensor or array containing mask data.
|
|
@@ -1099,19 +1034,11 @@ class Masks(BaseTensor):
|
|
|
1099
1034
|
"""
|
|
1100
1035
|
|
|
1101
1036
|
def __init__(self, masks: torch.Tensor | np.ndarray, orig_shape: tuple[int, int]) -> None:
|
|
1102
|
-
"""
|
|
1103
|
-
Initialize the Masks class with detection mask data and the original image shape.
|
|
1037
|
+
"""Initialize the Masks class with detection mask data and the original image shape.
|
|
1104
1038
|
|
|
1105
1039
|
Args:
|
|
1106
1040
|
masks (torch.Tensor | np.ndarray): Detection masks with shape (num_masks, height, width).
|
|
1107
1041
|
orig_shape (tuple): The original image shape as (height, width). Used for normalization.
|
|
1108
|
-
|
|
1109
|
-
Examples:
|
|
1110
|
-
>>> import torch
|
|
1111
|
-
>>> from ultralytics.engine.results import Masks
|
|
1112
|
-
>>> masks = torch.rand(10, 160, 160) # 10 masks of 160x160 resolution
|
|
1113
|
-
>>> orig_shape = (720, 1280) # Original image shape
|
|
1114
|
-
>>> mask_obj = Masks(masks, orig_shape)
|
|
1115
1042
|
"""
|
|
1116
1043
|
if masks.ndim == 2:
|
|
1117
1044
|
masks = masks[None, :]
|
|
@@ -1120,15 +1047,14 @@ class Masks(BaseTensor):
|
|
|
1120
1047
|
@property
|
|
1121
1048
|
@lru_cache(maxsize=1)
|
|
1122
1049
|
def xyn(self) -> list[np.ndarray]:
|
|
1123
|
-
"""
|
|
1124
|
-
Return normalized xy-coordinates of the segmentation masks.
|
|
1050
|
+
"""Return normalized xy-coordinates of the segmentation masks.
|
|
1125
1051
|
|
|
1126
|
-
This property calculates and caches the normalized xy-coordinates of the segmentation masks. The coordinates
|
|
1127
|
-
|
|
1052
|
+
This property calculates and caches the normalized xy-coordinates of the segmentation masks. The coordinates are
|
|
1053
|
+
normalized relative to the original image shape.
|
|
1128
1054
|
|
|
1129
1055
|
Returns:
|
|
1130
|
-
(list[np.ndarray]): A list of numpy arrays, where each array contains the normalized xy-coordinates
|
|
1131
|
-
|
|
1056
|
+
(list[np.ndarray]): A list of numpy arrays, where each array contains the normalized xy-coordinates of a
|
|
1057
|
+
single segmentation mask. Each array has shape (N, 2), where N is the number of points in the
|
|
1132
1058
|
mask contour.
|
|
1133
1059
|
|
|
1134
1060
|
Examples:
|
|
@@ -1145,16 +1071,14 @@ class Masks(BaseTensor):
|
|
|
1145
1071
|
@property
|
|
1146
1072
|
@lru_cache(maxsize=1)
|
|
1147
1073
|
def xy(self) -> list[np.ndarray]:
|
|
1148
|
-
"""
|
|
1149
|
-
Return the [x, y] pixel coordinates for each segment in the mask tensor.
|
|
1074
|
+
"""Return the [x, y] pixel coordinates for each segment in the mask tensor.
|
|
1150
1075
|
|
|
1151
|
-
This property calculates and returns a list of pixel coordinates for each segmentation mask in the
|
|
1152
|
-
|
|
1076
|
+
This property calculates and returns a list of pixel coordinates for each segmentation mask in the Masks object.
|
|
1077
|
+
The coordinates are scaled to match the original image dimensions.
|
|
1153
1078
|
|
|
1154
1079
|
Returns:
|
|
1155
|
-
(list[np.ndarray]): A list of numpy arrays, where each array contains the [x, y] pixel
|
|
1156
|
-
|
|
1157
|
-
number of points in the segment.
|
|
1080
|
+
(list[np.ndarray]): A list of numpy arrays, where each array contains the [x, y] pixel coordinates for a
|
|
1081
|
+
single segmentation mask. Each array has shape (N, 2), where N is the number of points in the segment.
|
|
1158
1082
|
|
|
1159
1083
|
Examples:
|
|
1160
1084
|
>>> results = model("image.jpg")
|
|
@@ -1170,12 +1094,10 @@ class Masks(BaseTensor):
|
|
|
1170
1094
|
|
|
1171
1095
|
|
|
1172
1096
|
class Keypoints(BaseTensor):
|
|
1173
|
-
"""
|
|
1174
|
-
A class for storing and manipulating detection keypoints.
|
|
1097
|
+
"""A class for storing and manipulating detection keypoints.
|
|
1175
1098
|
|
|
1176
|
-
This class encapsulates functionality for handling keypoint data, including coordinate manipulation,
|
|
1177
|
-
|
|
1178
|
-
information.
|
|
1099
|
+
This class encapsulates functionality for handling keypoint data, including coordinate manipulation, normalization,
|
|
1100
|
+
and confidence values. It supports keypoint detection results with optional visibility information.
|
|
1179
1101
|
|
|
1180
1102
|
Attributes:
|
|
1181
1103
|
data (torch.Tensor): The raw tensor containing keypoint data.
|
|
@@ -1203,22 +1125,16 @@ class Keypoints(BaseTensor):
|
|
|
1203
1125
|
"""
|
|
1204
1126
|
|
|
1205
1127
|
def __init__(self, keypoints: torch.Tensor | np.ndarray, orig_shape: tuple[int, int]) -> None:
|
|
1206
|
-
"""
|
|
1207
|
-
Initialize the Keypoints object with detection keypoints and original image dimensions.
|
|
1128
|
+
"""Initialize the Keypoints object with detection keypoints and original image dimensions.
|
|
1208
1129
|
|
|
1209
|
-
This method processes the input keypoints tensor, handling both 2D and 3D formats. For 3D tensors
|
|
1210
|
-
|
|
1130
|
+
This method processes the input keypoints tensor, handling both 2D and 3D formats. For 3D tensors (x, y,
|
|
1131
|
+
confidence), it masks out low-confidence keypoints by setting their coordinates to zero.
|
|
1211
1132
|
|
|
1212
1133
|
Args:
|
|
1213
1134
|
keypoints (torch.Tensor): A tensor containing keypoint data. Shape can be either:
|
|
1214
1135
|
- (num_objects, num_keypoints, 2) for x, y coordinates only
|
|
1215
1136
|
- (num_objects, num_keypoints, 3) for x, y coordinates and confidence scores
|
|
1216
1137
|
orig_shape (tuple[int, int]): The original image dimensions (height, width).
|
|
1217
|
-
|
|
1218
|
-
Examples:
|
|
1219
|
-
>>> kpts = torch.rand(1, 17, 3) # 1 object, 17 keypoints (COCO format), x,y,conf
|
|
1220
|
-
>>> orig_shape = (720, 1280) # Original image height, width
|
|
1221
|
-
>>> keypoints = Keypoints(kpts, orig_shape)
|
|
1222
1138
|
"""
|
|
1223
1139
|
if keypoints.ndim == 2:
|
|
1224
1140
|
keypoints = keypoints[None, :]
|
|
@@ -1228,12 +1144,11 @@ class Keypoints(BaseTensor):
|
|
|
1228
1144
|
@property
|
|
1229
1145
|
@lru_cache(maxsize=1)
|
|
1230
1146
|
def xy(self) -> torch.Tensor | np.ndarray:
|
|
1231
|
-
"""
|
|
1232
|
-
Return x, y coordinates of keypoints.
|
|
1147
|
+
"""Return x, y coordinates of keypoints.
|
|
1233
1148
|
|
|
1234
1149
|
Returns:
|
|
1235
|
-
(torch.Tensor): A tensor containing the x, y coordinates of keypoints with shape (N, K, 2), where N is
|
|
1236
|
-
|
|
1150
|
+
(torch.Tensor): A tensor containing the x, y coordinates of keypoints with shape (N, K, 2), where N is the
|
|
1151
|
+
number of detections and K is the number of keypoints per detection.
|
|
1237
1152
|
|
|
1238
1153
|
Examples:
|
|
1239
1154
|
>>> results = model("image.jpg")
|
|
@@ -1252,13 +1167,12 @@ class Keypoints(BaseTensor):
|
|
|
1252
1167
|
@property
|
|
1253
1168
|
@lru_cache(maxsize=1)
|
|
1254
1169
|
def xyn(self) -> torch.Tensor | np.ndarray:
|
|
1255
|
-
"""
|
|
1256
|
-
Return normalized coordinates (x, y) of keypoints relative to the original image size.
|
|
1170
|
+
"""Return normalized coordinates (x, y) of keypoints relative to the original image size.
|
|
1257
1171
|
|
|
1258
1172
|
Returns:
|
|
1259
1173
|
(torch.Tensor | np.ndarray): A tensor or array of shape (N, K, 2) containing normalized keypoint
|
|
1260
|
-
coordinates, where N is the number of instances, K is the number of keypoints, and the last
|
|
1261
|
-
|
|
1174
|
+
coordinates, where N is the number of instances, K is the number of keypoints, and the last dimension
|
|
1175
|
+
contains [x, y] values in the range [0, 1].
|
|
1262
1176
|
|
|
1263
1177
|
Examples:
|
|
1264
1178
|
>>> keypoints = Keypoints(torch.rand(1, 17, 2), orig_shape=(480, 640))
|
|
@@ -1274,13 +1188,11 @@ class Keypoints(BaseTensor):
|
|
|
1274
1188
|
@property
|
|
1275
1189
|
@lru_cache(maxsize=1)
|
|
1276
1190
|
def conf(self) -> torch.Tensor | np.ndarray | None:
|
|
1277
|
-
"""
|
|
1278
|
-
Return confidence values for each keypoint.
|
|
1191
|
+
"""Return confidence values for each keypoint.
|
|
1279
1192
|
|
|
1280
1193
|
Returns:
|
|
1281
|
-
(torch.Tensor | None): A tensor containing confidence scores for each keypoint if available,
|
|
1282
|
-
|
|
1283
|
-
for single detection.
|
|
1194
|
+
(torch.Tensor | None): A tensor containing confidence scores for each keypoint if available, otherwise None.
|
|
1195
|
+
Shape is (num_detections, num_keypoints) for batched data or (num_keypoints,) for single detection.
|
|
1284
1196
|
|
|
1285
1197
|
Examples:
|
|
1286
1198
|
>>> keypoints = Keypoints(torch.rand(1, 17, 3), orig_shape=(640, 640)) # 1 detection, 17 keypoints
|
|
@@ -1291,11 +1203,10 @@ class Keypoints(BaseTensor):
|
|
|
1291
1203
|
|
|
1292
1204
|
|
|
1293
1205
|
class Probs(BaseTensor):
|
|
1294
|
-
"""
|
|
1295
|
-
A class for storing and manipulating classification probabilities.
|
|
1206
|
+
"""A class for storing and manipulating classification probabilities.
|
|
1296
1207
|
|
|
1297
|
-
This class extends BaseTensor and provides methods for accessing and manipulating
|
|
1298
|
-
|
|
1208
|
+
This class extends BaseTensor and provides methods for accessing and manipulating classification probabilities,
|
|
1209
|
+
including top-1 and top-5 predictions.
|
|
1299
1210
|
|
|
1300
1211
|
Attributes:
|
|
1301
1212
|
data (torch.Tensor | np.ndarray): The raw tensor or array containing classification probabilities.
|
|
@@ -1325,42 +1236,22 @@ class Probs(BaseTensor):
|
|
|
1325
1236
|
"""
|
|
1326
1237
|
|
|
1327
1238
|
def __init__(self, probs: torch.Tensor | np.ndarray, orig_shape: tuple[int, int] | None = None) -> None:
|
|
1328
|
-
"""
|
|
1329
|
-
Initialize the Probs class with classification probabilities.
|
|
1239
|
+
"""Initialize the Probs class with classification probabilities.
|
|
1330
1240
|
|
|
1331
1241
|
This class stores and manages classification probabilities, providing easy access to top predictions and their
|
|
1332
1242
|
confidences.
|
|
1333
1243
|
|
|
1334
1244
|
Args:
|
|
1335
1245
|
probs (torch.Tensor | np.ndarray): A 1D tensor or array of classification probabilities.
|
|
1336
|
-
orig_shape (tuple | None): The original image shape as (height, width). Not used in this class but kept
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
Attributes:
|
|
1340
|
-
data (torch.Tensor | np.ndarray): The raw tensor or array containing classification probabilities.
|
|
1341
|
-
top1 (int): Index of the top 1 class.
|
|
1342
|
-
top5 (list[int]): Indices of the top 5 classes.
|
|
1343
|
-
top1conf (torch.Tensor | np.ndarray): Confidence of the top 1 class.
|
|
1344
|
-
top5conf (torch.Tensor | np.ndarray): Confidences of the top 5 classes.
|
|
1345
|
-
|
|
1346
|
-
Examples:
|
|
1347
|
-
>>> import torch
|
|
1348
|
-
>>> probs = torch.tensor([0.1, 0.3, 0.2, 0.4])
|
|
1349
|
-
>>> p = Probs(probs)
|
|
1350
|
-
>>> print(p.top1)
|
|
1351
|
-
3
|
|
1352
|
-
>>> print(p.top1conf)
|
|
1353
|
-
tensor(0.4000)
|
|
1354
|
-
>>> print(p.top5)
|
|
1355
|
-
[3, 1, 2, 0]
|
|
1246
|
+
orig_shape (tuple | None): The original image shape as (height, width). Not used in this class but kept for
|
|
1247
|
+
consistency with other result classes.
|
|
1356
1248
|
"""
|
|
1357
1249
|
super().__init__(probs, orig_shape)
|
|
1358
1250
|
|
|
1359
1251
|
@property
|
|
1360
1252
|
@lru_cache(maxsize=1)
|
|
1361
1253
|
def top1(self) -> int:
|
|
1362
|
-
"""
|
|
1363
|
-
Return the index of the class with the highest probability.
|
|
1254
|
+
"""Return the index of the class with the highest probability.
|
|
1364
1255
|
|
|
1365
1256
|
Returns:
|
|
1366
1257
|
(int): Index of the class with the highest probability.
|
|
@@ -1375,8 +1266,7 @@ class Probs(BaseTensor):
|
|
|
1375
1266
|
@property
|
|
1376
1267
|
@lru_cache(maxsize=1)
|
|
1377
1268
|
def top5(self) -> list[int]:
|
|
1378
|
-
"""
|
|
1379
|
-
Return the indices of the top 5 class probabilities.
|
|
1269
|
+
"""Return the indices of the top 5 class probabilities.
|
|
1380
1270
|
|
|
1381
1271
|
Returns:
|
|
1382
1272
|
(list[int]): A list containing the indices of the top 5 class probabilities, sorted in descending order.
|
|
@@ -1391,8 +1281,7 @@ class Probs(BaseTensor):
|
|
|
1391
1281
|
@property
|
|
1392
1282
|
@lru_cache(maxsize=1)
|
|
1393
1283
|
def top1conf(self) -> torch.Tensor | np.ndarray:
|
|
1394
|
-
"""
|
|
1395
|
-
Return the confidence score of the highest probability class.
|
|
1284
|
+
"""Return the confidence score of the highest probability class.
|
|
1396
1285
|
|
|
1397
1286
|
This property retrieves the confidence score (probability) of the class with the highest predicted probability
|
|
1398
1287
|
from the classification results.
|
|
@@ -1411,16 +1300,15 @@ class Probs(BaseTensor):
|
|
|
1411
1300
|
@property
|
|
1412
1301
|
@lru_cache(maxsize=1)
|
|
1413
1302
|
def top5conf(self) -> torch.Tensor | np.ndarray:
|
|
1414
|
-
"""
|
|
1415
|
-
Return confidence scores for the top 5 classification predictions.
|
|
1303
|
+
"""Return confidence scores for the top 5 classification predictions.
|
|
1416
1304
|
|
|
1417
|
-
This property retrieves the confidence scores corresponding to the top 5 class probabilities
|
|
1418
|
-
|
|
1419
|
-
|
|
1305
|
+
This property retrieves the confidence scores corresponding to the top 5 class probabilities predicted by the
|
|
1306
|
+
model. It provides a quick way to access the most likely class predictions along with their associated
|
|
1307
|
+
confidence levels.
|
|
1420
1308
|
|
|
1421
1309
|
Returns:
|
|
1422
|
-
(torch.Tensor | np.ndarray): A tensor or array containing the confidence scores for the
|
|
1423
|
-
|
|
1310
|
+
(torch.Tensor | np.ndarray): A tensor or array containing the confidence scores for the top 5 predicted
|
|
1311
|
+
classes, sorted in descending order of probability.
|
|
1424
1312
|
|
|
1425
1313
|
Examples:
|
|
1426
1314
|
>>> results = model("image.jpg")
|
|
@@ -1432,12 +1320,10 @@ class Probs(BaseTensor):
|
|
|
1432
1320
|
|
|
1433
1321
|
|
|
1434
1322
|
class OBB(BaseTensor):
|
|
1435
|
-
"""
|
|
1436
|
-
A class for storing and manipulating Oriented Bounding Boxes (OBB).
|
|
1323
|
+
"""A class for storing and manipulating Oriented Bounding Boxes (OBB).
|
|
1437
1324
|
|
|
1438
|
-
This class provides functionality to handle oriented bounding boxes, including conversion between
|
|
1439
|
-
|
|
1440
|
-
both tracking and non-tracking scenarios.
|
|
1325
|
+
This class provides functionality to handle oriented bounding boxes, including conversion between different formats,
|
|
1326
|
+
normalization, and access to various properties of the boxes. It supports both tracking and non-tracking scenarios.
|
|
1441
1327
|
|
|
1442
1328
|
Attributes:
|
|
1443
1329
|
data (torch.Tensor): The raw OBB tensor containing box coordinates and associated data.
|
|
@@ -1466,32 +1352,19 @@ class OBB(BaseTensor):
|
|
|
1466
1352
|
"""
|
|
1467
1353
|
|
|
1468
1354
|
def __init__(self, boxes: torch.Tensor | np.ndarray, orig_shape: tuple[int, int]) -> None:
|
|
1469
|
-
"""
|
|
1470
|
-
Initialize an OBB (Oriented Bounding Box) instance with oriented bounding box data and original image shape.
|
|
1355
|
+
"""Initialize an OBB (Oriented Bounding Box) instance with oriented bounding box data and original image shape.
|
|
1471
1356
|
|
|
1472
|
-
This class stores and manipulates Oriented Bounding Boxes (OBB) for object detection tasks. It provides
|
|
1473
|
-
|
|
1357
|
+
This class stores and manipulates Oriented Bounding Boxes (OBB) for object detection tasks. It provides various
|
|
1358
|
+
properties and methods to access and transform the OBB data.
|
|
1474
1359
|
|
|
1475
1360
|
Args:
|
|
1476
|
-
boxes (torch.Tensor | np.ndarray): A tensor or numpy array containing the detection boxes,
|
|
1477
|
-
|
|
1478
|
-
|
|
1361
|
+
boxes (torch.Tensor | np.ndarray): A tensor or numpy array containing the detection boxes, with shape
|
|
1362
|
+
(num_boxes, 7) or (num_boxes, 8). The last two columns contain confidence and class values. If present,
|
|
1363
|
+
the third last column contains track IDs, and the fifth column contains rotation.
|
|
1479
1364
|
orig_shape (tuple[int, int]): Original image size, in the format (height, width).
|
|
1480
1365
|
|
|
1481
|
-
Attributes:
|
|
1482
|
-
data (torch.Tensor | np.ndarray): The raw OBB tensor.
|
|
1483
|
-
orig_shape (tuple[int, int]): The original image shape.
|
|
1484
|
-
is_track (bool): Whether the boxes include tracking IDs.
|
|
1485
|
-
|
|
1486
1366
|
Raises:
|
|
1487
1367
|
AssertionError: If the number of values per box is not 7 or 8.
|
|
1488
|
-
|
|
1489
|
-
Examples:
|
|
1490
|
-
>>> import torch
|
|
1491
|
-
>>> boxes = torch.rand(3, 7) # 3 boxes with 7 values each
|
|
1492
|
-
>>> orig_shape = (640, 480)
|
|
1493
|
-
>>> obb = OBB(boxes, orig_shape)
|
|
1494
|
-
>>> print(obb.xywhr) # Access the boxes in xywhr format
|
|
1495
1368
|
"""
|
|
1496
1369
|
if boxes.ndim == 1:
|
|
1497
1370
|
boxes = boxes[None, :]
|
|
@@ -1503,8 +1376,7 @@ class OBB(BaseTensor):
|
|
|
1503
1376
|
|
|
1504
1377
|
@property
|
|
1505
1378
|
def xywhr(self) -> torch.Tensor | np.ndarray:
|
|
1506
|
-
"""
|
|
1507
|
-
Return boxes in [x_center, y_center, width, height, rotation] format.
|
|
1379
|
+
"""Return boxes in [x_center, y_center, width, height, rotation] format.
|
|
1508
1380
|
|
|
1509
1381
|
Returns:
|
|
1510
1382
|
(torch.Tensor | np.ndarray): A tensor or numpy array containing the oriented bounding boxes with format
|
|
@@ -1521,15 +1393,14 @@ class OBB(BaseTensor):
|
|
|
1521
1393
|
|
|
1522
1394
|
@property
|
|
1523
1395
|
def conf(self) -> torch.Tensor | np.ndarray:
|
|
1524
|
-
"""
|
|
1525
|
-
Return the confidence scores for Oriented Bounding Boxes (OBBs).
|
|
1396
|
+
"""Return the confidence scores for Oriented Bounding Boxes (OBBs).
|
|
1526
1397
|
|
|
1527
1398
|
This property retrieves the confidence values associated with each OBB detection. The confidence score
|
|
1528
1399
|
represents the model's certainty in the detection.
|
|
1529
1400
|
|
|
1530
1401
|
Returns:
|
|
1531
|
-
(torch.Tensor | np.ndarray): A tensor or numpy array of shape (N,) containing confidence scores
|
|
1532
|
-
|
|
1402
|
+
(torch.Tensor | np.ndarray): A tensor or numpy array of shape (N,) containing confidence scores for N
|
|
1403
|
+
detections, where each score is in the range [0, 1].
|
|
1533
1404
|
|
|
1534
1405
|
Examples:
|
|
1535
1406
|
>>> results = model("image.jpg")
|
|
@@ -1541,12 +1412,11 @@ class OBB(BaseTensor):
|
|
|
1541
1412
|
|
|
1542
1413
|
@property
|
|
1543
1414
|
def cls(self) -> torch.Tensor | np.ndarray:
|
|
1544
|
-
"""
|
|
1545
|
-
Return the class values of the oriented bounding boxes.
|
|
1415
|
+
"""Return the class values of the oriented bounding boxes.
|
|
1546
1416
|
|
|
1547
1417
|
Returns:
|
|
1548
|
-
(torch.Tensor | np.ndarray): A tensor or numpy array containing the class values for each oriented
|
|
1549
|
-
|
|
1418
|
+
(torch.Tensor | np.ndarray): A tensor or numpy array containing the class values for each oriented bounding
|
|
1419
|
+
box. The shape is (N,), where N is the number of boxes.
|
|
1550
1420
|
|
|
1551
1421
|
Examples:
|
|
1552
1422
|
>>> results = model("image.jpg")
|
|
@@ -1559,12 +1429,11 @@ class OBB(BaseTensor):
|
|
|
1559
1429
|
|
|
1560
1430
|
@property
|
|
1561
1431
|
def id(self) -> torch.Tensor | np.ndarray | None:
|
|
1562
|
-
"""
|
|
1563
|
-
Return the tracking IDs of the oriented bounding boxes (if available).
|
|
1432
|
+
"""Return the tracking IDs of the oriented bounding boxes (if available).
|
|
1564
1433
|
|
|
1565
1434
|
Returns:
|
|
1566
|
-
(torch.Tensor | np.ndarray | None): A tensor or numpy array containing the tracking IDs for each
|
|
1567
|
-
|
|
1435
|
+
(torch.Tensor | np.ndarray | None): A tensor or numpy array containing the tracking IDs for each oriented
|
|
1436
|
+
bounding box. Returns None if tracking IDs are not available.
|
|
1568
1437
|
|
|
1569
1438
|
Examples:
|
|
1570
1439
|
>>> results = model("image.jpg", tracker=True) # Run inference with tracking
|
|
@@ -1579,12 +1448,11 @@ class OBB(BaseTensor):
|
|
|
1579
1448
|
@property
|
|
1580
1449
|
@lru_cache(maxsize=2)
|
|
1581
1450
|
def xyxyxyxy(self) -> torch.Tensor | np.ndarray:
|
|
1582
|
-
"""
|
|
1583
|
-
Convert OBB format to 8-point (xyxyxyxy) coordinate format for rotated bounding boxes.
|
|
1451
|
+
"""Convert OBB format to 8-point (xyxyxyxy) coordinate format for rotated bounding boxes.
|
|
1584
1452
|
|
|
1585
1453
|
Returns:
|
|
1586
|
-
(torch.Tensor | np.ndarray): Rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2), where N is
|
|
1587
|
-
|
|
1454
|
+
(torch.Tensor | np.ndarray): Rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2), where N is the
|
|
1455
|
+
number of boxes. Each box is represented by 4 points (x, y), starting from the top-left corner and
|
|
1588
1456
|
moving clockwise.
|
|
1589
1457
|
|
|
1590
1458
|
Examples:
|
|
@@ -1598,13 +1466,12 @@ class OBB(BaseTensor):
|
|
|
1598
1466
|
@property
|
|
1599
1467
|
@lru_cache(maxsize=2)
|
|
1600
1468
|
def xyxyxyxyn(self) -> torch.Tensor | np.ndarray:
|
|
1601
|
-
"""
|
|
1602
|
-
Convert rotated bounding boxes to normalized xyxyxyxy format.
|
|
1469
|
+
"""Convert rotated bounding boxes to normalized xyxyxyxy format.
|
|
1603
1470
|
|
|
1604
1471
|
Returns:
|
|
1605
1472
|
(torch.Tensor | np.ndarray): Normalized rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2),
|
|
1606
|
-
where N is the number of boxes. Each box is represented by 4 points (x, y), normalized relative to
|
|
1607
|
-
|
|
1473
|
+
where N is the number of boxes. Each box is represented by 4 points (x, y), normalized relative to the
|
|
1474
|
+
original image dimensions.
|
|
1608
1475
|
|
|
1609
1476
|
Examples:
|
|
1610
1477
|
>>> obb = OBB(torch.rand(10, 7), orig_shape=(640, 480)) # 10 random OBBs
|
|
@@ -1620,16 +1487,15 @@ class OBB(BaseTensor):
|
|
|
1620
1487
|
@property
|
|
1621
1488
|
@lru_cache(maxsize=2)
|
|
1622
1489
|
def xyxy(self) -> torch.Tensor | np.ndarray:
|
|
1623
|
-
"""
|
|
1624
|
-
Convert oriented bounding boxes (OBB) to axis-aligned bounding boxes in xyxy format.
|
|
1490
|
+
"""Convert oriented bounding boxes (OBB) to axis-aligned bounding boxes in xyxy format.
|
|
1625
1491
|
|
|
1626
|
-
This property calculates the minimal enclosing rectangle for each oriented bounding box and returns it in
|
|
1627
|
-
|
|
1628
|
-
|
|
1492
|
+
This property calculates the minimal enclosing rectangle for each oriented bounding box and returns it in xyxy
|
|
1493
|
+
format (x1, y1, x2, y2). This is useful for operations that require axis-aligned bounding boxes, such as IoU
|
|
1494
|
+
calculation with non-rotated boxes.
|
|
1629
1495
|
|
|
1630
1496
|
Returns:
|
|
1631
|
-
(torch.Tensor | np.ndarray): Axis-aligned bounding boxes in xyxy format with shape (N, 4), where N
|
|
1632
|
-
|
|
1497
|
+
(torch.Tensor | np.ndarray): Axis-aligned bounding boxes in xyxy format with shape (N, 4), where N is the
|
|
1498
|
+
number of boxes. Each row contains [x1, y1, x2, y2] coordinates.
|
|
1633
1499
|
|
|
1634
1500
|
Examples:
|
|
1635
1501
|
>>> import torch
|