dgenerate-ultralytics-headless 8.3.137__py3-none-any.whl → 8.3.224__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/METADATA +41 -34
- dgenerate_ultralytics_headless-8.3.224.dist-info/RECORD +285 -0
- {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/WHEEL +1 -1
- tests/__init__.py +7 -6
- tests/conftest.py +15 -39
- tests/test_cli.py +17 -17
- tests/test_cuda.py +17 -8
- tests/test_engine.py +36 -10
- tests/test_exports.py +98 -37
- tests/test_integrations.py +12 -15
- tests/test_python.py +126 -82
- tests/test_solutions.py +319 -135
- ultralytics/__init__.py +27 -9
- ultralytics/cfg/__init__.py +83 -87
- ultralytics/cfg/datasets/Argoverse.yaml +4 -4
- ultralytics/cfg/datasets/DOTAv1.5.yaml +2 -2
- ultralytics/cfg/datasets/DOTAv1.yaml +2 -2
- ultralytics/cfg/datasets/GlobalWheat2020.yaml +2 -2
- ultralytics/cfg/datasets/HomeObjects-3K.yaml +4 -5
- ultralytics/cfg/datasets/ImageNet.yaml +3 -3
- ultralytics/cfg/datasets/Objects365.yaml +24 -20
- ultralytics/cfg/datasets/SKU-110K.yaml +9 -9
- ultralytics/cfg/datasets/VOC.yaml +10 -13
- ultralytics/cfg/datasets/VisDrone.yaml +43 -33
- ultralytics/cfg/datasets/african-wildlife.yaml +5 -5
- ultralytics/cfg/datasets/brain-tumor.yaml +4 -5
- ultralytics/cfg/datasets/carparts-seg.yaml +5 -5
- ultralytics/cfg/datasets/coco-pose.yaml +26 -4
- ultralytics/cfg/datasets/coco.yaml +4 -4
- ultralytics/cfg/datasets/coco128-seg.yaml +2 -2
- ultralytics/cfg/datasets/coco128.yaml +2 -2
- ultralytics/cfg/datasets/coco8-grayscale.yaml +103 -0
- ultralytics/cfg/datasets/coco8-multispectral.yaml +2 -2
- ultralytics/cfg/datasets/coco8-pose.yaml +23 -2
- ultralytics/cfg/datasets/coco8-seg.yaml +2 -2
- ultralytics/cfg/datasets/coco8.yaml +2 -2
- ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
- ultralytics/cfg/datasets/crack-seg.yaml +5 -5
- ultralytics/cfg/datasets/dog-pose.yaml +32 -4
- ultralytics/cfg/datasets/dota8-multispectral.yaml +2 -2
- ultralytics/cfg/datasets/dota8.yaml +2 -2
- ultralytics/cfg/datasets/hand-keypoints.yaml +29 -4
- ultralytics/cfg/datasets/lvis.yaml +9 -9
- ultralytics/cfg/datasets/medical-pills.yaml +4 -5
- ultralytics/cfg/datasets/open-images-v7.yaml +7 -10
- ultralytics/cfg/datasets/package-seg.yaml +5 -5
- ultralytics/cfg/datasets/signature.yaml +4 -4
- ultralytics/cfg/datasets/tiger-pose.yaml +20 -4
- ultralytics/cfg/datasets/xView.yaml +5 -5
- ultralytics/cfg/default.yaml +96 -93
- ultralytics/cfg/trackers/botsort.yaml +16 -17
- ultralytics/cfg/trackers/bytetrack.yaml +9 -11
- ultralytics/data/__init__.py +4 -4
- ultralytics/data/annotator.py +12 -12
- ultralytics/data/augment.py +531 -564
- ultralytics/data/base.py +76 -81
- ultralytics/data/build.py +206 -42
- ultralytics/data/converter.py +179 -78
- ultralytics/data/dataset.py +121 -121
- ultralytics/data/loaders.py +114 -91
- ultralytics/data/split.py +28 -15
- ultralytics/data/split_dota.py +67 -48
- ultralytics/data/utils.py +110 -89
- ultralytics/engine/exporter.py +422 -460
- ultralytics/engine/model.py +224 -252
- ultralytics/engine/predictor.py +94 -89
- ultralytics/engine/results.py +345 -595
- ultralytics/engine/trainer.py +231 -134
- ultralytics/engine/tuner.py +279 -73
- ultralytics/engine/validator.py +53 -46
- ultralytics/hub/__init__.py +26 -28
- ultralytics/hub/auth.py +30 -16
- ultralytics/hub/google/__init__.py +34 -36
- ultralytics/hub/session.py +53 -77
- ultralytics/hub/utils.py +23 -109
- ultralytics/models/__init__.py +1 -1
- ultralytics/models/fastsam/__init__.py +1 -1
- ultralytics/models/fastsam/model.py +36 -18
- ultralytics/models/fastsam/predict.py +33 -44
- ultralytics/models/fastsam/utils.py +4 -5
- ultralytics/models/fastsam/val.py +12 -14
- ultralytics/models/nas/__init__.py +1 -1
- ultralytics/models/nas/model.py +16 -20
- ultralytics/models/nas/predict.py +12 -14
- ultralytics/models/nas/val.py +4 -5
- ultralytics/models/rtdetr/__init__.py +1 -1
- ultralytics/models/rtdetr/model.py +9 -9
- ultralytics/models/rtdetr/predict.py +22 -17
- ultralytics/models/rtdetr/train.py +20 -16
- ultralytics/models/rtdetr/val.py +79 -59
- ultralytics/models/sam/__init__.py +8 -2
- ultralytics/models/sam/amg.py +53 -38
- ultralytics/models/sam/build.py +29 -31
- ultralytics/models/sam/model.py +33 -38
- ultralytics/models/sam/modules/blocks.py +159 -182
- ultralytics/models/sam/modules/decoders.py +38 -47
- ultralytics/models/sam/modules/encoders.py +114 -133
- ultralytics/models/sam/modules/memory_attention.py +38 -31
- ultralytics/models/sam/modules/sam.py +114 -93
- ultralytics/models/sam/modules/tiny_encoder.py +268 -291
- ultralytics/models/sam/modules/transformer.py +59 -66
- ultralytics/models/sam/modules/utils.py +55 -72
- ultralytics/models/sam/predict.py +745 -341
- ultralytics/models/utils/loss.py +118 -107
- ultralytics/models/utils/ops.py +118 -71
- ultralytics/models/yolo/__init__.py +1 -1
- ultralytics/models/yolo/classify/predict.py +28 -26
- ultralytics/models/yolo/classify/train.py +50 -81
- ultralytics/models/yolo/classify/val.py +68 -61
- ultralytics/models/yolo/detect/predict.py +12 -15
- ultralytics/models/yolo/detect/train.py +56 -46
- ultralytics/models/yolo/detect/val.py +279 -223
- ultralytics/models/yolo/model.py +167 -86
- ultralytics/models/yolo/obb/predict.py +7 -11
- ultralytics/models/yolo/obb/train.py +23 -25
- ultralytics/models/yolo/obb/val.py +107 -99
- ultralytics/models/yolo/pose/__init__.py +1 -1
- ultralytics/models/yolo/pose/predict.py +12 -14
- ultralytics/models/yolo/pose/train.py +31 -69
- ultralytics/models/yolo/pose/val.py +119 -254
- ultralytics/models/yolo/segment/predict.py +21 -25
- ultralytics/models/yolo/segment/train.py +12 -66
- ultralytics/models/yolo/segment/val.py +126 -305
- ultralytics/models/yolo/world/train.py +53 -45
- ultralytics/models/yolo/world/train_world.py +51 -32
- ultralytics/models/yolo/yoloe/__init__.py +7 -7
- ultralytics/models/yolo/yoloe/predict.py +30 -37
- ultralytics/models/yolo/yoloe/train.py +89 -71
- ultralytics/models/yolo/yoloe/train_seg.py +15 -17
- ultralytics/models/yolo/yoloe/val.py +56 -41
- ultralytics/nn/__init__.py +9 -11
- ultralytics/nn/autobackend.py +179 -107
- ultralytics/nn/modules/__init__.py +67 -67
- ultralytics/nn/modules/activation.py +8 -7
- ultralytics/nn/modules/block.py +302 -323
- ultralytics/nn/modules/conv.py +61 -104
- ultralytics/nn/modules/head.py +488 -186
- ultralytics/nn/modules/transformer.py +183 -123
- ultralytics/nn/modules/utils.py +15 -20
- ultralytics/nn/tasks.py +327 -203
- ultralytics/nn/text_model.py +81 -65
- ultralytics/py.typed +1 -0
- ultralytics/solutions/__init__.py +12 -12
- ultralytics/solutions/ai_gym.py +19 -27
- ultralytics/solutions/analytics.py +36 -26
- ultralytics/solutions/config.py +29 -28
- ultralytics/solutions/distance_calculation.py +23 -24
- ultralytics/solutions/heatmap.py +17 -19
- ultralytics/solutions/instance_segmentation.py +21 -19
- ultralytics/solutions/object_blurrer.py +16 -17
- ultralytics/solutions/object_counter.py +48 -53
- ultralytics/solutions/object_cropper.py +22 -16
- ultralytics/solutions/parking_management.py +61 -58
- ultralytics/solutions/queue_management.py +19 -19
- ultralytics/solutions/region_counter.py +63 -50
- ultralytics/solutions/security_alarm.py +22 -25
- ultralytics/solutions/similarity_search.py +107 -60
- ultralytics/solutions/solutions.py +343 -262
- ultralytics/solutions/speed_estimation.py +35 -31
- ultralytics/solutions/streamlit_inference.py +104 -40
- ultralytics/solutions/templates/similarity-search.html +31 -24
- ultralytics/solutions/trackzone.py +24 -24
- ultralytics/solutions/vision_eye.py +11 -12
- ultralytics/trackers/__init__.py +1 -1
- ultralytics/trackers/basetrack.py +18 -27
- ultralytics/trackers/bot_sort.py +48 -39
- ultralytics/trackers/byte_tracker.py +94 -94
- ultralytics/trackers/track.py +7 -16
- ultralytics/trackers/utils/gmc.py +37 -69
- ultralytics/trackers/utils/kalman_filter.py +68 -76
- ultralytics/trackers/utils/matching.py +13 -17
- ultralytics/utils/__init__.py +251 -275
- ultralytics/utils/autobatch.py +19 -7
- ultralytics/utils/autodevice.py +68 -38
- ultralytics/utils/benchmarks.py +169 -130
- ultralytics/utils/callbacks/base.py +12 -13
- ultralytics/utils/callbacks/clearml.py +14 -15
- ultralytics/utils/callbacks/comet.py +139 -66
- ultralytics/utils/callbacks/dvc.py +19 -27
- ultralytics/utils/callbacks/hub.py +8 -6
- ultralytics/utils/callbacks/mlflow.py +6 -10
- ultralytics/utils/callbacks/neptune.py +11 -19
- ultralytics/utils/callbacks/platform.py +73 -0
- ultralytics/utils/callbacks/raytune.py +3 -4
- ultralytics/utils/callbacks/tensorboard.py +9 -12
- ultralytics/utils/callbacks/wb.py +33 -30
- ultralytics/utils/checks.py +163 -114
- ultralytics/utils/cpu.py +89 -0
- ultralytics/utils/dist.py +24 -20
- ultralytics/utils/downloads.py +176 -146
- ultralytics/utils/errors.py +11 -13
- ultralytics/utils/events.py +113 -0
- ultralytics/utils/export/__init__.py +7 -0
- ultralytics/utils/{export.py → export/engine.py} +81 -63
- ultralytics/utils/export/imx.py +294 -0
- ultralytics/utils/export/tensorflow.py +217 -0
- ultralytics/utils/files.py +33 -36
- ultralytics/utils/git.py +137 -0
- ultralytics/utils/instance.py +105 -120
- ultralytics/utils/logger.py +404 -0
- ultralytics/utils/loss.py +99 -61
- ultralytics/utils/metrics.py +649 -478
- ultralytics/utils/nms.py +337 -0
- ultralytics/utils/ops.py +263 -451
- ultralytics/utils/patches.py +70 -31
- ultralytics/utils/plotting.py +253 -223
- ultralytics/utils/tal.py +48 -61
- ultralytics/utils/torch_utils.py +244 -251
- ultralytics/utils/tqdm.py +438 -0
- ultralytics/utils/triton.py +22 -23
- ultralytics/utils/tuner.py +11 -10
- dgenerate_ultralytics_headless-8.3.137.dist-info/RECORD +0 -272
- {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/top_level.txt +0 -0
ultralytics/engine/results.py
CHANGED
|
@@ -5,32 +5,36 @@ Ultralytics Results, Boxes and Masks classes for handling inference results.
|
|
|
5
5
|
Usage: See https://docs.ultralytics.com/modes/predict/
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
8
10
|
from copy import deepcopy
|
|
9
11
|
from functools import lru_cache
|
|
10
12
|
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
11
14
|
|
|
12
15
|
import numpy as np
|
|
13
16
|
import torch
|
|
14
17
|
|
|
15
18
|
from ultralytics.data.augment import LetterBox
|
|
16
|
-
from ultralytics.utils import LOGGER, SimpleClass, ops
|
|
17
|
-
from ultralytics.utils.checks import check_requirements
|
|
19
|
+
from ultralytics.utils import LOGGER, DataExportMixin, SimpleClass, ops
|
|
18
20
|
from ultralytics.utils.plotting import Annotator, colors, save_one_box
|
|
19
|
-
from ultralytics.utils.torch_utils import smart_inference_mode
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class BaseTensor(SimpleClass):
|
|
23
|
-
"""
|
|
24
|
-
|
|
24
|
+
"""Base tensor class with additional methods for easy manipulation and device handling.
|
|
25
|
+
|
|
26
|
+
This class provides a foundation for tensor-like objects with device management capabilities, supporting both
|
|
27
|
+
PyTorch tensors and NumPy arrays. It includes methods for moving data between devices and converting between tensor
|
|
28
|
+
types.
|
|
25
29
|
|
|
26
30
|
Attributes:
|
|
27
31
|
data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints.
|
|
28
|
-
orig_shape (
|
|
32
|
+
orig_shape (tuple[int, int]): Original shape of the image, typically in the format (height, width).
|
|
29
33
|
|
|
30
34
|
Methods:
|
|
31
35
|
cpu: Return a copy of the tensor stored in CPU memory.
|
|
32
|
-
numpy:
|
|
33
|
-
cuda:
|
|
36
|
+
numpy: Return a copy of the tensor as a numpy array.
|
|
37
|
+
cuda: Move the tensor to GPU memory, returning a new instance if necessary.
|
|
34
38
|
to: Return a copy of the tensor with the specified device and dtype.
|
|
35
39
|
|
|
36
40
|
Examples:
|
|
@@ -43,13 +47,12 @@ class BaseTensor(SimpleClass):
|
|
|
43
47
|
>>> gpu_tensor = base_tensor.cuda()
|
|
44
48
|
"""
|
|
45
49
|
|
|
46
|
-
def __init__(self, data, orig_shape) -> None:
|
|
47
|
-
"""
|
|
48
|
-
Initialize BaseTensor with prediction data and the original shape of the image.
|
|
50
|
+
def __init__(self, data: torch.Tensor | np.ndarray, orig_shape: tuple[int, int]) -> None:
|
|
51
|
+
"""Initialize BaseTensor with prediction data and the original shape of the image.
|
|
49
52
|
|
|
50
53
|
Args:
|
|
51
54
|
data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints.
|
|
52
|
-
orig_shape (
|
|
55
|
+
orig_shape (tuple[int, int]): Original shape of the image in (height, width) format.
|
|
53
56
|
|
|
54
57
|
Examples:
|
|
55
58
|
>>> import torch
|
|
@@ -62,12 +65,11 @@ class BaseTensor(SimpleClass):
|
|
|
62
65
|
self.orig_shape = orig_shape
|
|
63
66
|
|
|
64
67
|
@property
|
|
65
|
-
def shape(self):
|
|
66
|
-
"""
|
|
67
|
-
Returns the shape of the underlying data tensor.
|
|
68
|
+
def shape(self) -> tuple[int, ...]:
|
|
69
|
+
"""Return the shape of the underlying data tensor.
|
|
68
70
|
|
|
69
71
|
Returns:
|
|
70
|
-
(
|
|
72
|
+
(tuple[int, ...]): The shape of the data tensor.
|
|
71
73
|
|
|
72
74
|
Examples:
|
|
73
75
|
>>> data = torch.rand(100, 4)
|
|
@@ -78,8 +80,7 @@ class BaseTensor(SimpleClass):
|
|
|
78
80
|
return self.data.shape
|
|
79
81
|
|
|
80
82
|
def cpu(self):
|
|
81
|
-
"""
|
|
82
|
-
Returns a copy of the tensor stored in CPU memory.
|
|
83
|
+
"""Return a copy of the tensor stored in CPU memory.
|
|
83
84
|
|
|
84
85
|
Returns:
|
|
85
86
|
(BaseTensor): A new BaseTensor object with the data tensor moved to CPU memory.
|
|
@@ -96,8 +97,7 @@ class BaseTensor(SimpleClass):
|
|
|
96
97
|
return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.cpu(), self.orig_shape)
|
|
97
98
|
|
|
98
99
|
def numpy(self):
|
|
99
|
-
"""
|
|
100
|
-
Returns a copy of the tensor as a numpy array.
|
|
100
|
+
"""Return a copy of the tensor as a numpy array.
|
|
101
101
|
|
|
102
102
|
Returns:
|
|
103
103
|
(np.ndarray): A numpy array containing the same data as the original tensor.
|
|
@@ -113,12 +113,11 @@ class BaseTensor(SimpleClass):
|
|
|
113
113
|
return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.numpy(), self.orig_shape)
|
|
114
114
|
|
|
115
115
|
def cuda(self):
|
|
116
|
-
"""
|
|
117
|
-
Moves the tensor to GPU memory.
|
|
116
|
+
"""Move the tensor to GPU memory.
|
|
118
117
|
|
|
119
118
|
Returns:
|
|
120
|
-
(BaseTensor): A new BaseTensor instance with the data moved to GPU memory if it's not already a
|
|
121
|
-
|
|
119
|
+
(BaseTensor): A new BaseTensor instance with the data moved to GPU memory if it's not already a numpy array,
|
|
120
|
+
otherwise returns self.
|
|
122
121
|
|
|
123
122
|
Examples:
|
|
124
123
|
>>> import torch
|
|
@@ -132,8 +131,7 @@ class BaseTensor(SimpleClass):
|
|
|
132
131
|
return self.__class__(torch.as_tensor(self.data).cuda(), self.orig_shape)
|
|
133
132
|
|
|
134
133
|
def to(self, *args, **kwargs):
|
|
135
|
-
"""
|
|
136
|
-
Return a copy of the tensor with the specified device and dtype.
|
|
134
|
+
"""Return a copy of the tensor with the specified device and dtype.
|
|
137
135
|
|
|
138
136
|
Args:
|
|
139
137
|
*args (Any): Variable length argument list to be passed to torch.Tensor.to().
|
|
@@ -149,9 +147,8 @@ class BaseTensor(SimpleClass):
|
|
|
149
147
|
"""
|
|
150
148
|
return self.__class__(torch.as_tensor(self.data).to(*args, **kwargs), self.orig_shape)
|
|
151
149
|
|
|
152
|
-
def __len__(self)
|
|
153
|
-
"""
|
|
154
|
-
Returns the length of the underlying data tensor.
|
|
150
|
+
def __len__(self) -> int:
|
|
151
|
+
"""Return the length of the underlying data tensor.
|
|
155
152
|
|
|
156
153
|
Returns:
|
|
157
154
|
(int): The number of elements in the first dimension of the data tensor.
|
|
@@ -165,11 +162,10 @@ class BaseTensor(SimpleClass):
|
|
|
165
162
|
return len(self.data)
|
|
166
163
|
|
|
167
164
|
def __getitem__(self, idx):
|
|
168
|
-
"""
|
|
169
|
-
Returns a new BaseTensor instance containing the specified indexed elements of the data tensor.
|
|
165
|
+
"""Return a new BaseTensor instance containing the specified indexed elements of the data tensor.
|
|
170
166
|
|
|
171
167
|
Args:
|
|
172
|
-
idx (int |
|
|
168
|
+
idx (int | list[int] | torch.Tensor): Index or indices to select from the data tensor.
|
|
173
169
|
|
|
174
170
|
Returns:
|
|
175
171
|
(BaseTensor): A new BaseTensor instance containing the indexed data.
|
|
@@ -184,16 +180,16 @@ class BaseTensor(SimpleClass):
|
|
|
184
180
|
return self.__class__(self.data[idx], self.orig_shape)
|
|
185
181
|
|
|
186
182
|
|
|
187
|
-
class Results(SimpleClass):
|
|
188
|
-
"""
|
|
189
|
-
A class for storing and manipulating inference results.
|
|
183
|
+
class Results(SimpleClass, DataExportMixin):
|
|
184
|
+
"""A class for storing and manipulating inference results.
|
|
190
185
|
|
|
191
|
-
This class provides
|
|
192
|
-
|
|
186
|
+
This class provides comprehensive functionality for handling inference results from various Ultralytics models,
|
|
187
|
+
including detection, segmentation, classification, and pose estimation. It supports visualization, data export, and
|
|
188
|
+
various coordinate transformations.
|
|
193
189
|
|
|
194
190
|
Attributes:
|
|
195
|
-
orig_img (
|
|
196
|
-
orig_shape (
|
|
191
|
+
orig_img (np.ndarray): The original image as a numpy array.
|
|
192
|
+
orig_shape (tuple[int, int]): Original image shape in (height, width) format.
|
|
197
193
|
boxes (Boxes | None): Detected bounding boxes.
|
|
198
194
|
masks (Masks | None): Segmentation masks.
|
|
199
195
|
probs (Probs | None): Classification probabilities.
|
|
@@ -205,25 +201,22 @@ class Results(SimpleClass):
|
|
|
205
201
|
save_dir (str | None): Directory to save results.
|
|
206
202
|
|
|
207
203
|
Methods:
|
|
208
|
-
update:
|
|
209
|
-
cpu:
|
|
210
|
-
numpy:
|
|
211
|
-
cuda:
|
|
212
|
-
to:
|
|
213
|
-
new:
|
|
214
|
-
plot:
|
|
215
|
-
show:
|
|
216
|
-
save:
|
|
217
|
-
verbose:
|
|
218
|
-
save_txt:
|
|
219
|
-
save_crop:
|
|
220
|
-
summary:
|
|
221
|
-
to_df:
|
|
222
|
-
to_json:
|
|
223
|
-
to_csv:
|
|
224
|
-
to_xml: Converts detection results to XML format.
|
|
225
|
-
to_html: Converts detection results to HTML format.
|
|
226
|
-
to_sql: Converts detection results to an SQL-compatible format.
|
|
204
|
+
update: Update the Results object with new detection data.
|
|
205
|
+
cpu: Return a copy of the Results object with all tensors moved to CPU memory.
|
|
206
|
+
numpy: Convert all tensors in the Results object to numpy arrays.
|
|
207
|
+
cuda: Move all tensors in the Results object to GPU memory.
|
|
208
|
+
to: Move all tensors to the specified device and dtype.
|
|
209
|
+
new: Create a new Results object with the same image, path, names, and speed attributes.
|
|
210
|
+
plot: Plot detection results on an input RGB image.
|
|
211
|
+
show: Display the image with annotated inference results.
|
|
212
|
+
save: Save annotated inference results image to file.
|
|
213
|
+
verbose: Return a log string for each task in the results.
|
|
214
|
+
save_txt: Save detection results to a text file.
|
|
215
|
+
save_crop: Save cropped detection images to specified directory.
|
|
216
|
+
summary: Convert inference results to a summarized dictionary.
|
|
217
|
+
to_df: Convert detection results to a Polars Dataframe.
|
|
218
|
+
to_json: Convert detection results to JSON format.
|
|
219
|
+
to_csv: Convert detection results to a CSV format.
|
|
227
220
|
|
|
228
221
|
Examples:
|
|
229
222
|
>>> results = model("path/to/image.jpg")
|
|
@@ -235,13 +228,21 @@ class Results(SimpleClass):
|
|
|
235
228
|
"""
|
|
236
229
|
|
|
237
230
|
def __init__(
|
|
238
|
-
self,
|
|
231
|
+
self,
|
|
232
|
+
orig_img: np.ndarray,
|
|
233
|
+
path: str,
|
|
234
|
+
names: dict[int, str],
|
|
235
|
+
boxes: torch.Tensor | None = None,
|
|
236
|
+
masks: torch.Tensor | None = None,
|
|
237
|
+
probs: torch.Tensor | None = None,
|
|
238
|
+
keypoints: torch.Tensor | None = None,
|
|
239
|
+
obb: torch.Tensor | None = None,
|
|
240
|
+
speed: dict[str, float] | None = None,
|
|
239
241
|
) -> None:
|
|
240
|
-
"""
|
|
241
|
-
Initialize the Results class for storing and manipulating inference results.
|
|
242
|
+
"""Initialize the Results class for storing and manipulating inference results.
|
|
242
243
|
|
|
243
244
|
Args:
|
|
244
|
-
orig_img (
|
|
245
|
+
orig_img (np.ndarray): The original image as a numpy array.
|
|
245
246
|
path (str): The path to the image file.
|
|
246
247
|
names (dict): A dictionary of class names.
|
|
247
248
|
boxes (torch.Tensor | None): A 2D tensor of bounding box coordinates for each detection.
|
|
@@ -249,7 +250,7 @@ class Results(SimpleClass):
|
|
|
249
250
|
probs (torch.Tensor | None): A 1D tensor of probabilities of each class for classification task.
|
|
250
251
|
keypoints (torch.Tensor | None): A 2D tensor of keypoint coordinates for each detection.
|
|
251
252
|
obb (torch.Tensor | None): A 2D tensor of oriented bounding box coordinates for each detection.
|
|
252
|
-
speed (
|
|
253
|
+
speed (dict | None): A dictionary containing preprocess, inference, and postprocess speeds (ms/image).
|
|
253
254
|
|
|
254
255
|
Examples:
|
|
255
256
|
>>> results = model("path/to/image.jpg")
|
|
@@ -278,8 +279,7 @@ class Results(SimpleClass):
|
|
|
278
279
|
self._keys = "boxes", "masks", "probs", "keypoints", "obb"
|
|
279
280
|
|
|
280
281
|
def __getitem__(self, idx):
|
|
281
|
-
"""
|
|
282
|
-
Return a Results object for a specific index of inference results.
|
|
282
|
+
"""Return a Results object for a specific index of inference results.
|
|
283
283
|
|
|
284
284
|
Args:
|
|
285
285
|
idx (int | slice): Index or slice to retrieve from the Results object.
|
|
@@ -294,13 +294,12 @@ class Results(SimpleClass):
|
|
|
294
294
|
"""
|
|
295
295
|
return self._apply("__getitem__", idx)
|
|
296
296
|
|
|
297
|
-
def __len__(self):
|
|
298
|
-
"""
|
|
299
|
-
Return the number of detections in the Results object.
|
|
297
|
+
def __len__(self) -> int:
|
|
298
|
+
"""Return the number of detections in the Results object.
|
|
300
299
|
|
|
301
300
|
Returns:
|
|
302
|
-
(int): The number of detections, determined by the length of the first non-empty
|
|
303
|
-
|
|
301
|
+
(int): The number of detections, determined by the length of the first non-empty attribute in (masks, probs,
|
|
302
|
+
keypoints, or obb).
|
|
304
303
|
|
|
305
304
|
Examples:
|
|
306
305
|
>>> results = Results(orig_img, path, names, boxes=torch.rand(5, 4))
|
|
@@ -312,16 +311,22 @@ class Results(SimpleClass):
|
|
|
312
311
|
if v is not None:
|
|
313
312
|
return len(v)
|
|
314
313
|
|
|
315
|
-
def update(
|
|
316
|
-
|
|
317
|
-
|
|
314
|
+
def update(
|
|
315
|
+
self,
|
|
316
|
+
boxes: torch.Tensor | None = None,
|
|
317
|
+
masks: torch.Tensor | None = None,
|
|
318
|
+
probs: torch.Tensor | None = None,
|
|
319
|
+
obb: torch.Tensor | None = None,
|
|
320
|
+
keypoints: torch.Tensor | None = None,
|
|
321
|
+
):
|
|
322
|
+
"""Update the Results object with new detection data.
|
|
318
323
|
|
|
319
|
-
This method allows updating the boxes, masks, probabilities, and oriented bounding boxes (OBB) of the
|
|
320
|
-
|
|
324
|
+
This method allows updating the boxes, masks, probabilities, and oriented bounding boxes (OBB) of the Results
|
|
325
|
+
object. It ensures that boxes are clipped to the original image shape.
|
|
321
326
|
|
|
322
327
|
Args:
|
|
323
|
-
boxes (torch.Tensor | None): A tensor of shape (N, 6) containing bounding box coordinates and
|
|
324
|
-
|
|
328
|
+
boxes (torch.Tensor | None): A tensor of shape (N, 6) containing bounding box coordinates and confidence
|
|
329
|
+
scores. The format is (x1, y1, x2, y2, conf, class).
|
|
325
330
|
masks (torch.Tensor | None): A tensor of shape (N, H, W) containing segmentation masks.
|
|
326
331
|
probs (torch.Tensor | None): A tensor of shape (num_classes,) containing class probabilities.
|
|
327
332
|
obb (torch.Tensor | None): A tensor of shape (N, 5) containing oriented bounding box coordinates.
|
|
@@ -343,9 +348,8 @@ class Results(SimpleClass):
|
|
|
343
348
|
if keypoints is not None:
|
|
344
349
|
self.keypoints = Keypoints(keypoints, self.orig_shape)
|
|
345
350
|
|
|
346
|
-
def _apply(self, fn, *args, **kwargs):
|
|
347
|
-
"""
|
|
348
|
-
Applies a function to all non-empty attributes and returns a new Results object with modified attributes.
|
|
351
|
+
def _apply(self, fn: str, *args, **kwargs):
|
|
352
|
+
"""Apply a function to all non-empty attributes and return a new Results object with modified attributes.
|
|
349
353
|
|
|
350
354
|
This method is internally called by methods like .to(), .cuda(), .cpu(), etc.
|
|
351
355
|
|
|
@@ -371,8 +375,7 @@ class Results(SimpleClass):
|
|
|
371
375
|
return r
|
|
372
376
|
|
|
373
377
|
def cpu(self):
|
|
374
|
-
"""
|
|
375
|
-
Returns a copy of the Results object with all its tensors moved to CPU memory.
|
|
378
|
+
"""Return a copy of the Results object with all its tensors moved to CPU memory.
|
|
376
379
|
|
|
377
380
|
This method creates a new Results object with all tensor attributes (boxes, masks, probs, keypoints, obb)
|
|
378
381
|
transferred to CPU memory. It's useful for moving data from GPU to CPU for further processing or saving.
|
|
@@ -388,8 +391,7 @@ class Results(SimpleClass):
|
|
|
388
391
|
return self._apply("cpu")
|
|
389
392
|
|
|
390
393
|
def numpy(self):
|
|
391
|
-
"""
|
|
392
|
-
Converts all tensors in the Results object to numpy arrays.
|
|
394
|
+
"""Convert all tensors in the Results object to numpy arrays.
|
|
393
395
|
|
|
394
396
|
Returns:
|
|
395
397
|
(Results): A new Results object with all tensors converted to numpy arrays.
|
|
@@ -407,8 +409,7 @@ class Results(SimpleClass):
|
|
|
407
409
|
return self._apply("numpy")
|
|
408
410
|
|
|
409
411
|
def cuda(self):
|
|
410
|
-
"""
|
|
411
|
-
Moves all tensors in the Results object to GPU memory.
|
|
412
|
+
"""Move all tensors in the Results object to GPU memory.
|
|
412
413
|
|
|
413
414
|
Returns:
|
|
414
415
|
(Results): A new Results object with all tensors moved to CUDA device.
|
|
@@ -422,8 +423,7 @@ class Results(SimpleClass):
|
|
|
422
423
|
return self._apply("cuda")
|
|
423
424
|
|
|
424
425
|
def to(self, *args, **kwargs):
|
|
425
|
-
"""
|
|
426
|
-
Moves all tensors in the Results object to the specified device and dtype.
|
|
426
|
+
"""Move all tensors in the Results object to the specified device and dtype.
|
|
427
427
|
|
|
428
428
|
Args:
|
|
429
429
|
*args (Any): Variable length argument list to be passed to torch.Tensor.to().
|
|
@@ -441,8 +441,7 @@ class Results(SimpleClass):
|
|
|
441
441
|
return self._apply("to", *args, **kwargs)
|
|
442
442
|
|
|
443
443
|
def new(self):
|
|
444
|
-
"""
|
|
445
|
-
Creates a new Results object with the same image, path, names, and speed attributes.
|
|
444
|
+
"""Create a new Results object with the same image, path, names, and speed attributes.
|
|
446
445
|
|
|
447
446
|
Returns:
|
|
448
447
|
(Results): A new Results object with copied attributes from the original instance.
|
|
@@ -455,27 +454,26 @@ class Results(SimpleClass):
|
|
|
455
454
|
|
|
456
455
|
def plot(
|
|
457
456
|
self,
|
|
458
|
-
conf=True,
|
|
459
|
-
line_width=None,
|
|
460
|
-
font_size=None,
|
|
461
|
-
font="Arial.ttf",
|
|
462
|
-
pil=False,
|
|
463
|
-
img=None,
|
|
464
|
-
im_gpu=None,
|
|
465
|
-
kpt_radius=5,
|
|
466
|
-
kpt_line=True,
|
|
467
|
-
labels=True,
|
|
468
|
-
boxes=True,
|
|
469
|
-
masks=True,
|
|
470
|
-
probs=True,
|
|
471
|
-
show=False,
|
|
472
|
-
save=False,
|
|
473
|
-
filename=None,
|
|
474
|
-
color_mode="class",
|
|
475
|
-
txt_color=(255, 255, 255),
|
|
476
|
-
):
|
|
477
|
-
"""
|
|
478
|
-
Plots detection results on an input RGB image.
|
|
457
|
+
conf: bool = True,
|
|
458
|
+
line_width: float | None = None,
|
|
459
|
+
font_size: float | None = None,
|
|
460
|
+
font: str = "Arial.ttf",
|
|
461
|
+
pil: bool = False,
|
|
462
|
+
img: np.ndarray | None = None,
|
|
463
|
+
im_gpu: torch.Tensor | None = None,
|
|
464
|
+
kpt_radius: int = 5,
|
|
465
|
+
kpt_line: bool = True,
|
|
466
|
+
labels: bool = True,
|
|
467
|
+
boxes: bool = True,
|
|
468
|
+
masks: bool = True,
|
|
469
|
+
probs: bool = True,
|
|
470
|
+
show: bool = False,
|
|
471
|
+
save: bool = False,
|
|
472
|
+
filename: str | None = None,
|
|
473
|
+
color_mode: str = "class",
|
|
474
|
+
txt_color: tuple[int, int, int] = (255, 255, 255),
|
|
475
|
+
) -> np.ndarray:
|
|
476
|
+
"""Plot detection results on an input RGB image.
|
|
479
477
|
|
|
480
478
|
Args:
|
|
481
479
|
conf (bool): Whether to plot detection confidence scores.
|
|
@@ -494,8 +492,8 @@ class Results(SimpleClass):
|
|
|
494
492
|
show (bool): Whether to display the annotated image.
|
|
495
493
|
save (bool): Whether to save the annotated image.
|
|
496
494
|
filename (str | None): Filename to save image if save is True.
|
|
497
|
-
color_mode (
|
|
498
|
-
txt_color (tuple[int, int, int]): Specify the RGB text color for classification task
|
|
495
|
+
color_mode (str): Specify the color mode, e.g., 'instance' or 'class'.
|
|
496
|
+
txt_color (tuple[int, int, int]): Specify the RGB text color for classification task.
|
|
499
497
|
|
|
500
498
|
Returns:
|
|
501
499
|
(np.ndarray): Annotated image as a numpy array.
|
|
@@ -508,7 +506,7 @@ class Results(SimpleClass):
|
|
|
508
506
|
"""
|
|
509
507
|
assert color_mode in {"instance", "class"}, f"Expected color_mode='instance' or 'class', not {color_mode}."
|
|
510
508
|
if img is None and isinstance(self.orig_img, torch.Tensor):
|
|
511
|
-
img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).
|
|
509
|
+
img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).byte().cpu().numpy()
|
|
512
510
|
|
|
513
511
|
names = self.names
|
|
514
512
|
is_obb = self.obb is not None
|
|
@@ -537,7 +535,7 @@ class Results(SimpleClass):
|
|
|
537
535
|
)
|
|
538
536
|
idx = (
|
|
539
537
|
pred_boxes.id
|
|
540
|
-
if pred_boxes.
|
|
538
|
+
if pred_boxes.is_track and color_mode == "instance"
|
|
541
539
|
else pred_boxes.cls
|
|
542
540
|
if pred_boxes and color_mode == "class"
|
|
543
541
|
else reversed(range(len(pred_masks)))
|
|
@@ -547,10 +545,10 @@ class Results(SimpleClass):
|
|
|
547
545
|
# Plot Detect results
|
|
548
546
|
if pred_boxes is not None and show_boxes:
|
|
549
547
|
for i, d in enumerate(reversed(pred_boxes)):
|
|
550
|
-
c, d_conf, id = int(d.cls), float(d.conf) if conf else None,
|
|
548
|
+
c, d_conf, id = int(d.cls), float(d.conf) if conf else None, int(d.id.item()) if d.is_track else None
|
|
551
549
|
name = ("" if id is None else f"id:{id} ") + names[c]
|
|
552
550
|
label = (f"{name} {d_conf:.2f}" if conf else name) if labels else None
|
|
553
|
-
box = d.xyxyxyxy.
|
|
551
|
+
box = d.xyxyxyxy.squeeze() if is_obb else d.xyxy.squeeze()
|
|
554
552
|
annotator.box_label(
|
|
555
553
|
box,
|
|
556
554
|
label,
|
|
@@ -564,7 +562,6 @@ class Results(SimpleClass):
|
|
|
564
562
|
else None,
|
|
565
563
|
True,
|
|
566
564
|
),
|
|
567
|
-
rotated=is_obb,
|
|
568
565
|
)
|
|
569
566
|
|
|
570
567
|
# Plot Classify results
|
|
@@ -595,8 +592,7 @@ class Results(SimpleClass):
|
|
|
595
592
|
return annotator.im if pil else annotator.result()
|
|
596
593
|
|
|
597
594
|
def show(self, *args, **kwargs):
|
|
598
|
-
"""
|
|
599
|
-
Display the image with annotated inference results.
|
|
595
|
+
"""Display the image with annotated inference results.
|
|
600
596
|
|
|
601
597
|
This method plots the detection results on the original image and displays it. It's a convenient way to
|
|
602
598
|
visualize the model's predictions directly.
|
|
@@ -613,19 +609,21 @@ class Results(SimpleClass):
|
|
|
613
609
|
"""
|
|
614
610
|
self.plot(show=True, *args, **kwargs)
|
|
615
611
|
|
|
616
|
-
def save(self, filename=None, *args, **kwargs):
|
|
617
|
-
"""
|
|
618
|
-
Saves annotated inference results image to file.
|
|
612
|
+
def save(self, filename: str | None = None, *args, **kwargs) -> str:
|
|
613
|
+
"""Save annotated inference results image to file.
|
|
619
614
|
|
|
620
615
|
This method plots the detection results on the original image and saves the annotated image to a file. It
|
|
621
616
|
utilizes the `plot` method to generate the annotated image and then saves it to the specified filename.
|
|
622
617
|
|
|
623
618
|
Args:
|
|
624
|
-
filename (str | Path | None): The filename to save the annotated image. If None, a default filename
|
|
625
|
-
|
|
619
|
+
filename (str | Path | None): The filename to save the annotated image. If None, a default filename is
|
|
620
|
+
generated based on the original image path.
|
|
626
621
|
*args (Any): Variable length argument list to be passed to the `plot` method.
|
|
627
622
|
**kwargs (Any): Arbitrary keyword arguments to be passed to the `plot` method.
|
|
628
623
|
|
|
624
|
+
Returns:
|
|
625
|
+
(str): The filename where the image was saved.
|
|
626
|
+
|
|
629
627
|
Examples:
|
|
630
628
|
>>> results = model("path/to/image.jpg")
|
|
631
629
|
>>> for result in results:
|
|
@@ -639,16 +637,15 @@ class Results(SimpleClass):
|
|
|
639
637
|
self.plot(save=True, filename=filename, *args, **kwargs)
|
|
640
638
|
return filename
|
|
641
639
|
|
|
642
|
-
def verbose(self):
|
|
643
|
-
"""
|
|
644
|
-
Returns a log string for each task in the results, detailing detection and classification outcomes.
|
|
640
|
+
def verbose(self) -> str:
|
|
641
|
+
"""Return a log string for each task in the results, detailing detection and classification outcomes.
|
|
645
642
|
|
|
646
643
|
This method generates a human-readable string summarizing the detection and classification results. It includes
|
|
647
644
|
the number of detections for each class and the top probabilities for classification tasks.
|
|
648
645
|
|
|
649
646
|
Returns:
|
|
650
|
-
(str): A formatted string containing a summary of the results. For detection tasks, it includes the
|
|
651
|
-
|
|
647
|
+
(str): A formatted string containing a summary of the results. For detection tasks, it includes the number
|
|
648
|
+
of detections per class. For classification tasks, it includes the top 5 class probabilities.
|
|
652
649
|
|
|
653
650
|
Examples:
|
|
654
651
|
>>> results = model("path/to/image.jpg")
|
|
@@ -662,18 +659,17 @@ class Results(SimpleClass):
|
|
|
662
659
|
- For classification tasks, it returns the top 5 class probabilities and their corresponding class names.
|
|
663
660
|
- The returned string is comma-separated and ends with a comma and a space.
|
|
664
661
|
"""
|
|
665
|
-
|
|
662
|
+
boxes = self.obb if self.obb is not None else self.boxes
|
|
666
663
|
if len(self) == 0:
|
|
667
|
-
return "" if probs is not None else "(no detections), "
|
|
668
|
-
if probs is not None:
|
|
669
|
-
return f"{', '.join(f'{self.names[j]} {probs.data[j]:.2f}' for j in probs.top5)}, "
|
|
670
|
-
if boxes
|
|
664
|
+
return "" if self.probs is not None else "(no detections), "
|
|
665
|
+
if self.probs is not None:
|
|
666
|
+
return f"{', '.join(f'{self.names[j]} {self.probs.data[j]:.2f}' for j in self.probs.top5)}, "
|
|
667
|
+
if boxes:
|
|
671
668
|
counts = boxes.cls.int().bincount()
|
|
672
669
|
return "".join(f"{n} {self.names[i]}{'s' * (n > 1)}, " for i, n in enumerate(counts) if n > 0)
|
|
673
670
|
|
|
674
|
-
def save_txt(self, txt_file, save_conf=False):
|
|
675
|
-
"""
|
|
676
|
-
Save detection results to a text file.
|
|
671
|
+
def save_txt(self, txt_file: str | Path, save_conf: bool = False) -> str:
|
|
672
|
+
"""Save detection results to a text file.
|
|
677
673
|
|
|
678
674
|
Args:
|
|
679
675
|
txt_file (str | Path): Path to the output text file.
|
|
@@ -710,7 +706,7 @@ class Results(SimpleClass):
|
|
|
710
706
|
elif boxes:
|
|
711
707
|
# Detect/segment/pose
|
|
712
708
|
for j, d in enumerate(boxes):
|
|
713
|
-
c, conf, id = int(d.cls), float(d.conf),
|
|
709
|
+
c, conf, id = int(d.cls), float(d.conf), int(d.id.item()) if d.is_track else None
|
|
714
710
|
line = (c, *(d.xyxyxyxyn.view(-1) if is_obb else d.xywhn.view(-1)))
|
|
715
711
|
if masks:
|
|
716
712
|
seg = masks[j].xyn[0].copy().reshape(-1) # reversed mask.xyn, (n,2) to (n*2)
|
|
@@ -726,27 +722,28 @@ class Results(SimpleClass):
|
|
|
726
722
|
with open(txt_file, "a", encoding="utf-8") as f:
|
|
727
723
|
f.writelines(text + "\n" for text in texts)
|
|
728
724
|
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
725
|
+
return str(txt_file)
|
|
726
|
+
|
|
727
|
+
def save_crop(self, save_dir: str | Path, file_name: str | Path = Path("im.jpg")):
|
|
728
|
+
"""Save cropped detection images to specified directory.
|
|
732
729
|
|
|
733
730
|
This method saves cropped images of detected objects to a specified directory. Each crop is saved in a
|
|
734
731
|
subdirectory named after the object's class, with the filename based on the input file_name.
|
|
735
732
|
|
|
736
733
|
Args:
|
|
737
734
|
save_dir (str | Path): Directory path where cropped images will be saved.
|
|
738
|
-
file_name (str | Path): Base filename for the saved cropped images.
|
|
735
|
+
file_name (str | Path): Base filename for the saved cropped images.
|
|
736
|
+
|
|
737
|
+
Examples:
|
|
738
|
+
>>> results = model("path/to/image.jpg")
|
|
739
|
+
>>> for result in results:
|
|
740
|
+
>>> result.save_crop(save_dir="path/to/crops", file_name="detection")
|
|
739
741
|
|
|
740
742
|
Notes:
|
|
741
743
|
- This method does not support Classify or Oriented Bounding Box (OBB) tasks.
|
|
742
744
|
- Crops are saved as 'save_dir/class_name/file_name.jpg'.
|
|
743
745
|
- The method will create necessary subdirectories if they don't exist.
|
|
744
746
|
- Original image is copied before cropping to avoid modifying the original.
|
|
745
|
-
|
|
746
|
-
Examples:
|
|
747
|
-
>>> results = model("path/to/image.jpg")
|
|
748
|
-
>>> for result in results:
|
|
749
|
-
>>> result.save_crop(save_dir="path/to/crops", file_name="detection")
|
|
750
747
|
"""
|
|
751
748
|
if self.probs is not None:
|
|
752
749
|
LOGGER.warning("Classify task do not support `save_crop`.")
|
|
@@ -762,12 +759,11 @@ class Results(SimpleClass):
|
|
|
762
759
|
BGR=True,
|
|
763
760
|
)
|
|
764
761
|
|
|
765
|
-
def summary(self, normalize=False, decimals=5):
|
|
766
|
-
"""
|
|
767
|
-
Converts inference results to a summarized dictionary with optional normalization for box coordinates.
|
|
762
|
+
def summary(self, normalize: bool = False, decimals: int = 5) -> list[dict[str, Any]]:
|
|
763
|
+
"""Convert inference results to a summarized dictionary with optional normalization for box coordinates.
|
|
768
764
|
|
|
769
|
-
This method creates a list of detection dictionaries, each containing information about a single
|
|
770
|
-
|
|
765
|
+
This method creates a list of detection dictionaries, each containing information about a single detection or
|
|
766
|
+
classification result. For classification tasks, it returns the top class and its
|
|
771
767
|
confidence. For detection tasks, it includes class information, bounding box coordinates, and
|
|
772
768
|
optionally mask segments and keypoints.
|
|
773
769
|
|
|
@@ -776,8 +772,8 @@ class Results(SimpleClass):
|
|
|
776
772
|
decimals (int): Number of decimal places to round the output values to.
|
|
777
773
|
|
|
778
774
|
Returns:
|
|
779
|
-
(
|
|
780
|
-
or classification result. The structure of each dictionary varies based on the task type
|
|
775
|
+
(list[dict[str, Any]]): A list of dictionaries, each containing summarized information for a single
|
|
776
|
+
detection or classification result. The structure of each dictionary varies based on the task type
|
|
781
777
|
(classification or detection) and available information (boxes, masks, keypoints).
|
|
782
778
|
|
|
783
779
|
Examples:
|
|
@@ -828,238 +824,31 @@ class Results(SimpleClass):
|
|
|
828
824
|
|
|
829
825
|
return results
|
|
830
826
|
|
|
831
|
-
def to_df(self, normalize=False, decimals=5):
|
|
832
|
-
"""
|
|
833
|
-
Converts detection results to a Pandas Dataframe.
|
|
834
|
-
|
|
835
|
-
This method converts the detection results into Pandas Dataframe format. It includes information
|
|
836
|
-
about detected objects such as bounding boxes, class names, confidence scores, and optionally
|
|
837
|
-
segmentation masks and keypoints.
|
|
838
|
-
|
|
839
|
-
Args:
|
|
840
|
-
normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions.
|
|
841
|
-
If True, coordinates will be returned as float values between 0 and 1.
|
|
842
|
-
decimals (int): Number of decimal places to round the output values to.
|
|
843
|
-
|
|
844
|
-
Returns:
|
|
845
|
-
(DataFrame): A Pandas Dataframe containing all the information in results in an organized way.
|
|
846
|
-
|
|
847
|
-
Examples:
|
|
848
|
-
>>> results = model("path/to/image.jpg")
|
|
849
|
-
>>> for result in results:
|
|
850
|
-
>>> df_result = result.to_df()
|
|
851
|
-
>>> print(df_result)
|
|
852
|
-
"""
|
|
853
|
-
import pandas as pd # scope for faster 'import ultralytics'
|
|
854
|
-
|
|
855
|
-
return pd.DataFrame(self.summary(normalize=normalize, decimals=decimals))
|
|
856
|
-
|
|
857
|
-
def to_csv(self, normalize=False, decimals=5, *args, **kwargs):
|
|
858
|
-
"""
|
|
859
|
-
Converts detection results to a CSV format.
|
|
860
|
-
|
|
861
|
-
This method serializes the detection results into a CSV format. It includes information
|
|
862
|
-
about detected objects such as bounding boxes, class names, confidence scores, and optionally
|
|
863
|
-
segmentation masks and keypoints.
|
|
864
|
-
|
|
865
|
-
Args:
|
|
866
|
-
normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions.
|
|
867
|
-
If True, coordinates will be returned as float values between 0 and 1.
|
|
868
|
-
decimals (int): Number of decimal places to round the output values to.
|
|
869
|
-
*args (Any): Variable length argument list to be passed to pandas.DataFrame.to_csv().
|
|
870
|
-
**kwargs (Any): Arbitrary keyword arguments to be passed to pandas.DataFrame.to_csv().
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
Returns:
|
|
874
|
-
(str): CSV containing all the information in results in an organized way.
|
|
875
|
-
|
|
876
|
-
Examples:
|
|
877
|
-
>>> results = model("path/to/image.jpg")
|
|
878
|
-
>>> for result in results:
|
|
879
|
-
>>> csv_result = result.to_csv()
|
|
880
|
-
>>> print(csv_result)
|
|
881
|
-
"""
|
|
882
|
-
return self.to_df(normalize=normalize, decimals=decimals).to_csv(*args, **kwargs)
|
|
883
|
-
|
|
884
|
-
def to_xml(self, normalize=False, decimals=5, *args, **kwargs):
|
|
885
|
-
"""
|
|
886
|
-
Converts detection results to XML format.
|
|
887
|
-
|
|
888
|
-
This method serializes the detection results into an XML format. It includes information
|
|
889
|
-
about detected objects such as bounding boxes, class names, confidence scores, and optionally
|
|
890
|
-
segmentation masks and keypoints.
|
|
891
|
-
|
|
892
|
-
Args:
|
|
893
|
-
normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions.
|
|
894
|
-
If True, coordinates will be returned as float values between 0 and 1.
|
|
895
|
-
decimals (int): Number of decimal places to round the output values to.
|
|
896
|
-
*args (Any): Variable length argument list to be passed to pandas.DataFrame.to_xml().
|
|
897
|
-
**kwargs (Any): Arbitrary keyword arguments to be passed to pandas.DataFrame.to_xml().
|
|
898
|
-
|
|
899
|
-
Returns:
|
|
900
|
-
(str): An XML string containing all the information in results in an organized way.
|
|
901
|
-
|
|
902
|
-
Examples:
|
|
903
|
-
>>> results = model("path/to/image.jpg")
|
|
904
|
-
>>> for result in results:
|
|
905
|
-
>>> xml_result = result.to_xml()
|
|
906
|
-
>>> print(xml_result)
|
|
907
|
-
"""
|
|
908
|
-
check_requirements("lxml")
|
|
909
|
-
df = self.to_df(normalize=normalize, decimals=decimals)
|
|
910
|
-
return '<?xml version="1.0" encoding="utf-8"?>\n<root></root>' if df.empty else df.to_xml(*args, **kwargs)
|
|
911
|
-
|
|
912
|
-
def to_html(self, normalize=False, decimals=5, index=False, *args, **kwargs):
|
|
913
|
-
"""
|
|
914
|
-
Converts detection results to HTML format.
|
|
915
|
-
|
|
916
|
-
This method serializes the detection results into an HTML format. It includes information
|
|
917
|
-
about detected objects such as bounding boxes, class names, confidence scores, and optionally
|
|
918
|
-
segmentation masks and keypoints.
|
|
919
|
-
|
|
920
|
-
Args:
|
|
921
|
-
normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions.
|
|
922
|
-
If True, coordinates will be returned as float values between 0 and 1.
|
|
923
|
-
decimals (int): Number of decimal places to round the output values to.
|
|
924
|
-
index (bool): Whether to include the DataFrame index in the HTML output.
|
|
925
|
-
*args (Any): Variable length argument list to be passed to pandas.DataFrame.to_html().
|
|
926
|
-
**kwargs (Any): Arbitrary keyword arguments to be passed to pandas.DataFrame.to_html().
|
|
927
|
-
|
|
928
|
-
Returns:
|
|
929
|
-
(str): An HTML string containing all the information in results in an organized way.
|
|
930
|
-
|
|
931
|
-
Examples:
|
|
932
|
-
>>> results = model("path/to/image.jpg")
|
|
933
|
-
>>> for result in results:
|
|
934
|
-
>>> html_result = result.to_html()
|
|
935
|
-
>>> print(html_result)
|
|
936
|
-
"""
|
|
937
|
-
df = self.to_df(normalize=normalize, decimals=decimals)
|
|
938
|
-
return "<table></table>" if df.empty else df.to_html(index=index, *args, **kwargs)
|
|
939
|
-
|
|
940
|
-
def tojson(self, normalize=False, decimals=5):
|
|
941
|
-
"""Deprecated version of to_json()."""
|
|
942
|
-
LOGGER.warning("'result.tojson()' is deprecated, replace with 'result.to_json()'.")
|
|
943
|
-
return self.to_json(normalize, decimals)
|
|
944
|
-
|
|
945
|
-
def to_json(self, normalize=False, decimals=5):
|
|
946
|
-
"""
|
|
947
|
-
Converts detection results to JSON format.
|
|
948
|
-
|
|
949
|
-
This method serializes the detection results into a JSON-compatible format. It includes information
|
|
950
|
-
about detected objects such as bounding boxes, class names, confidence scores, and optionally
|
|
951
|
-
segmentation masks and keypoints.
|
|
952
|
-
|
|
953
|
-
Args:
|
|
954
|
-
normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions.
|
|
955
|
-
If True, coordinates will be returned as float values between 0 and 1.
|
|
956
|
-
decimals (int): Number of decimal places to round the output values to.
|
|
957
|
-
|
|
958
|
-
Returns:
|
|
959
|
-
(str): A JSON string containing the serialized detection results.
|
|
960
|
-
|
|
961
|
-
Examples:
|
|
962
|
-
>>> results = model("path/to/image.jpg")
|
|
963
|
-
>>> for result in results:
|
|
964
|
-
>>> json_result = result.to_json()
|
|
965
|
-
>>> print(json_result)
|
|
966
|
-
|
|
967
|
-
Notes:
|
|
968
|
-
- For classification tasks, the JSON will contain class probabilities instead of bounding boxes.
|
|
969
|
-
- For object detection tasks, the JSON will include bounding box coordinates, class names, and
|
|
970
|
-
confidence scores.
|
|
971
|
-
- If available, segmentation masks and keypoints will also be included in the JSON output.
|
|
972
|
-
- The method uses the `summary` method internally to generate the data structure before
|
|
973
|
-
converting it to JSON.
|
|
974
|
-
"""
|
|
975
|
-
import json
|
|
976
|
-
|
|
977
|
-
return json.dumps(self.summary(normalize=normalize, decimals=decimals), indent=2)
|
|
978
|
-
|
|
979
|
-
def to_sql(self, table_name="results", normalize=False, decimals=5, db_path="results.db"):
|
|
980
|
-
"""
|
|
981
|
-
Converts detection results to an SQL-compatible format.
|
|
982
|
-
|
|
983
|
-
This method serializes the detection results into a format compatible with SQL databases.
|
|
984
|
-
It includes information about detected objects such as bounding boxes, class names, confidence scores,
|
|
985
|
-
and optionally segmentation masks, keypoints or oriented bounding boxes.
|
|
986
|
-
|
|
987
|
-
Args:
|
|
988
|
-
table_name (str): Name of the SQL table where the data will be inserted.
|
|
989
|
-
normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions.
|
|
990
|
-
If True, coordinates will be returned as float values between 0 and 1.
|
|
991
|
-
decimals (int): Number of decimal places to round the bounding boxes values to.
|
|
992
|
-
db_path (str): Path to the SQLite database file.
|
|
993
|
-
|
|
994
|
-
Examples:
|
|
995
|
-
>>> results = model("path/to/image.jpg")
|
|
996
|
-
>>> for result in results:
|
|
997
|
-
>>> result.to_sql()
|
|
998
|
-
"""
|
|
999
|
-
import json
|
|
1000
|
-
import sqlite3
|
|
1001
|
-
|
|
1002
|
-
# Convert results to a list of dictionaries
|
|
1003
|
-
data = self.summary(normalize=normalize, decimals=decimals)
|
|
1004
|
-
if len(data) == 0:
|
|
1005
|
-
LOGGER.warning("No results to save to SQL. Results dict is empty.")
|
|
1006
|
-
return
|
|
1007
|
-
|
|
1008
|
-
# Connect to the SQLite database
|
|
1009
|
-
conn = sqlite3.connect(db_path)
|
|
1010
|
-
cursor = conn.cursor()
|
|
1011
|
-
|
|
1012
|
-
# Create table if it doesn't exist
|
|
1013
|
-
columns = (
|
|
1014
|
-
"id INTEGER PRIMARY KEY AUTOINCREMENT, class_name TEXT, confidence REAL, box TEXT, masks TEXT, kpts TEXT"
|
|
1015
|
-
)
|
|
1016
|
-
cursor.execute(f"CREATE TABLE IF NOT EXISTS {table_name} ({columns})")
|
|
1017
|
-
|
|
1018
|
-
# Insert data into the table
|
|
1019
|
-
for item in data:
|
|
1020
|
-
cursor.execute(
|
|
1021
|
-
f"INSERT INTO {table_name} (class_name, confidence, box, masks, kpts) VALUES (?, ?, ?, ?, ?)",
|
|
1022
|
-
(
|
|
1023
|
-
item.get("name"),
|
|
1024
|
-
item.get("confidence"),
|
|
1025
|
-
json.dumps(item.get("box", {})),
|
|
1026
|
-
json.dumps(item.get("segments", {})),
|
|
1027
|
-
json.dumps(item.get("keypoints", {})),
|
|
1028
|
-
),
|
|
1029
|
-
)
|
|
1030
|
-
|
|
1031
|
-
# Commit and close the connection
|
|
1032
|
-
conn.commit()
|
|
1033
|
-
conn.close()
|
|
1034
|
-
|
|
1035
|
-
LOGGER.info(f"Detection results successfully written to SQL table '{table_name}' in database '{db_path}'.")
|
|
1036
|
-
|
|
1037
827
|
|
|
1038
828
|
class Boxes(BaseTensor):
|
|
1039
|
-
"""
|
|
1040
|
-
A class for managing and manipulating detection boxes.
|
|
829
|
+
"""A class for managing and manipulating detection boxes.
|
|
1041
830
|
|
|
1042
|
-
This class provides functionality for handling detection boxes, including their coordinates,
|
|
1043
|
-
class labels, and optional tracking IDs. It supports various box formats and offers methods for
|
|
1044
|
-
and conversion between different coordinate systems.
|
|
831
|
+
This class provides comprehensive functionality for handling detection boxes, including their coordinates,
|
|
832
|
+
confidence scores, class labels, and optional tracking IDs. It supports various box formats and offers methods for
|
|
833
|
+
easy manipulation and conversion between different coordinate systems.
|
|
1045
834
|
|
|
1046
835
|
Attributes:
|
|
1047
|
-
data (torch.Tensor |
|
|
1048
|
-
orig_shape (
|
|
836
|
+
data (torch.Tensor | np.ndarray): The raw tensor containing detection boxes and associated data.
|
|
837
|
+
orig_shape (tuple[int, int]): The original image dimensions (height, width).
|
|
1049
838
|
is_track (bool): Indicates whether tracking IDs are included in the box data.
|
|
1050
|
-
xyxy (torch.Tensor |
|
|
1051
|
-
conf (torch.Tensor |
|
|
1052
|
-
cls (torch.Tensor |
|
|
839
|
+
xyxy (torch.Tensor | np.ndarray): Boxes in [x1, y1, x2, y2] format.
|
|
840
|
+
conf (torch.Tensor | np.ndarray): Confidence scores for each box.
|
|
841
|
+
cls (torch.Tensor | np.ndarray): Class labels for each box.
|
|
1053
842
|
id (torch.Tensor | None): Tracking IDs for each box (if available).
|
|
1054
|
-
xywh (torch.Tensor |
|
|
1055
|
-
xyxyn (torch.Tensor |
|
|
1056
|
-
xywhn (torch.Tensor |
|
|
843
|
+
xywh (torch.Tensor | np.ndarray): Boxes in [x, y, width, height] format.
|
|
844
|
+
xyxyn (torch.Tensor | np.ndarray): Normalized [x1, y1, x2, y2] boxes relative to orig_shape.
|
|
845
|
+
xywhn (torch.Tensor | np.ndarray): Normalized [x, y, width, height] boxes relative to orig_shape.
|
|
1057
846
|
|
|
1058
847
|
Methods:
|
|
1059
|
-
cpu
|
|
1060
|
-
numpy
|
|
1061
|
-
cuda
|
|
1062
|
-
to
|
|
848
|
+
cpu: Return a copy of the object with all tensors on CPU memory.
|
|
849
|
+
numpy: Return a copy of the object with all tensors as numpy arrays.
|
|
850
|
+
cuda: Return a copy of the object with all tensors on GPU memory.
|
|
851
|
+
to: Return a copy of the object with tensors on specified device and dtype.
|
|
1063
852
|
|
|
1064
853
|
Examples:
|
|
1065
854
|
>>> import torch
|
|
@@ -1072,23 +861,21 @@ class Boxes(BaseTensor):
|
|
|
1072
861
|
>>> print(boxes.xywhn)
|
|
1073
862
|
"""
|
|
1074
863
|
|
|
1075
|
-
def __init__(self, boxes, orig_shape) -> None:
|
|
1076
|
-
"""
|
|
1077
|
-
Initialize the Boxes class with detection box data and the original image shape.
|
|
864
|
+
def __init__(self, boxes: torch.Tensor | np.ndarray, orig_shape: tuple[int, int]) -> None:
|
|
865
|
+
"""Initialize the Boxes class with detection box data and the original image shape.
|
|
1078
866
|
|
|
1079
|
-
This class manages detection boxes, providing easy access and manipulation of box coordinates,
|
|
1080
|
-
|
|
1081
|
-
|
|
867
|
+
This class manages detection boxes, providing easy access and manipulation of box coordinates, confidence
|
|
868
|
+
scores, class identifiers, and optional tracking IDs. It supports multiple formats for box coordinates,
|
|
869
|
+
including both absolute and normalized forms.
|
|
1082
870
|
|
|
1083
871
|
Args:
|
|
1084
|
-
boxes (torch.Tensor | np.ndarray): A tensor or numpy array with detection boxes of shape
|
|
1085
|
-
(num_boxes,
|
|
1086
|
-
|
|
1087
|
-
orig_shape (Tuple[int, int]): The original image shape as (height, width). Used for normalization.
|
|
872
|
+
boxes (torch.Tensor | np.ndarray): A tensor or numpy array with detection boxes of shape (num_boxes, 6) or
|
|
873
|
+
(num_boxes, 7). Columns should contain [x1, y1, x2, y2, (optional) track_id, confidence, class].
|
|
874
|
+
orig_shape (tuple[int, int]): The original image shape as (height, width). Used for normalization.
|
|
1088
875
|
|
|
1089
876
|
Attributes:
|
|
1090
877
|
data (torch.Tensor): The raw tensor containing detection boxes and their associated data.
|
|
1091
|
-
orig_shape (
|
|
878
|
+
orig_shape (tuple[int, int]): The original image size, used for normalization.
|
|
1092
879
|
is_track (bool): Indicates whether tracking IDs are included in the box data.
|
|
1093
880
|
|
|
1094
881
|
Examples:
|
|
@@ -1108,13 +895,12 @@ class Boxes(BaseTensor):
|
|
|
1108
895
|
self.orig_shape = orig_shape
|
|
1109
896
|
|
|
1110
897
|
@property
|
|
1111
|
-
def xyxy(self):
|
|
1112
|
-
"""
|
|
1113
|
-
Returns bounding boxes in [x1, y1, x2, y2] format.
|
|
898
|
+
def xyxy(self) -> torch.Tensor | np.ndarray:
|
|
899
|
+
"""Return bounding boxes in [x1, y1, x2, y2] format.
|
|
1114
900
|
|
|
1115
901
|
Returns:
|
|
1116
|
-
(torch.Tensor |
|
|
1117
|
-
|
|
902
|
+
(torch.Tensor | np.ndarray): A tensor or numpy array of shape (n, 4) containing bounding box coordinates in
|
|
903
|
+
[x1, y1, x2, y2] format, where n is the number of boxes.
|
|
1118
904
|
|
|
1119
905
|
Examples:
|
|
1120
906
|
>>> results = model("image.jpg")
|
|
@@ -1125,13 +911,12 @@ class Boxes(BaseTensor):
|
|
|
1125
911
|
return self.data[:, :4]
|
|
1126
912
|
|
|
1127
913
|
@property
|
|
1128
|
-
def conf(self):
|
|
1129
|
-
"""
|
|
1130
|
-
Returns the confidence scores for each detection box.
|
|
914
|
+
def conf(self) -> torch.Tensor | np.ndarray:
|
|
915
|
+
"""Return the confidence scores for each detection box.
|
|
1131
916
|
|
|
1132
917
|
Returns:
|
|
1133
|
-
(torch.Tensor |
|
|
1134
|
-
|
|
918
|
+
(torch.Tensor | np.ndarray): A 1D tensor or array containing confidence scores for each detection, with
|
|
919
|
+
shape (N,) where N is the number of detections.
|
|
1135
920
|
|
|
1136
921
|
Examples:
|
|
1137
922
|
>>> boxes = Boxes(torch.tensor([[10, 20, 30, 40, 0.9, 0]]), orig_shape=(100, 100))
|
|
@@ -1142,13 +927,12 @@ class Boxes(BaseTensor):
|
|
|
1142
927
|
return self.data[:, -2]
|
|
1143
928
|
|
|
1144
929
|
@property
|
|
1145
|
-
def cls(self):
|
|
1146
|
-
"""
|
|
1147
|
-
Returns the class ID tensor representing category predictions for each bounding box.
|
|
930
|
+
def cls(self) -> torch.Tensor | np.ndarray:
|
|
931
|
+
"""Return the class ID tensor representing category predictions for each bounding box.
|
|
1148
932
|
|
|
1149
933
|
Returns:
|
|
1150
|
-
(torch.Tensor |
|
|
1151
|
-
|
|
934
|
+
(torch.Tensor | np.ndarray): A tensor or numpy array containing the class IDs for each detection box. The
|
|
935
|
+
shape is (N,), where N is the number of boxes.
|
|
1152
936
|
|
|
1153
937
|
Examples:
|
|
1154
938
|
>>> results = model("image.jpg")
|
|
@@ -1159,13 +943,12 @@ class Boxes(BaseTensor):
|
|
|
1159
943
|
return self.data[:, -1]
|
|
1160
944
|
|
|
1161
945
|
@property
|
|
1162
|
-
def id(self):
|
|
1163
|
-
"""
|
|
1164
|
-
Returns the tracking IDs for each detection box if available.
|
|
946
|
+
def id(self) -> torch.Tensor | np.ndarray | None:
|
|
947
|
+
"""Return the tracking IDs for each detection box if available.
|
|
1165
948
|
|
|
1166
949
|
Returns:
|
|
1167
|
-
(torch.Tensor | None): A tensor containing tracking IDs for each box if tracking is enabled,
|
|
1168
|
-
|
|
950
|
+
(torch.Tensor | None): A tensor containing tracking IDs for each box if tracking is enabled, otherwise None.
|
|
951
|
+
Shape is (N,) where N is the number of boxes.
|
|
1169
952
|
|
|
1170
953
|
Examples:
|
|
1171
954
|
>>> results = model.track("path/to/video.mp4")
|
|
@@ -1184,15 +967,14 @@ class Boxes(BaseTensor):
|
|
|
1184
967
|
return self.data[:, -3] if self.is_track else None
|
|
1185
968
|
|
|
1186
969
|
@property
|
|
1187
|
-
@lru_cache(maxsize=2)
|
|
1188
|
-
def xywh(self):
|
|
1189
|
-
"""
|
|
1190
|
-
Convert bounding boxes from [x1, y1, x2, y2] format to [x, y, width, height] format.
|
|
970
|
+
@lru_cache(maxsize=2)
|
|
971
|
+
def xywh(self) -> torch.Tensor | np.ndarray:
|
|
972
|
+
"""Convert bounding boxes from [x1, y1, x2, y2] format to [x, y, width, height] format.
|
|
1191
973
|
|
|
1192
974
|
Returns:
|
|
1193
|
-
(torch.Tensor |
|
|
1194
|
-
the center point of the bounding box, width, height are the dimensions of the
|
|
1195
|
-
shape of the returned tensor is (N, 4), where N is the number of boxes.
|
|
975
|
+
(torch.Tensor | np.ndarray): Boxes in [x_center, y_center, width, height] format, where x_center, y_center
|
|
976
|
+
are the coordinates of the center point of the bounding box, width, height are the dimensions of the
|
|
977
|
+
bounding box and the shape of the returned tensor is (N, 4), where N is the number of boxes.
|
|
1196
978
|
|
|
1197
979
|
Examples:
|
|
1198
980
|
>>> boxes = Boxes(torch.tensor([[100, 50, 150, 100], [200, 150, 300, 250]]), orig_shape=(480, 640))
|
|
@@ -1205,16 +987,15 @@ class Boxes(BaseTensor):
|
|
|
1205
987
|
|
|
1206
988
|
@property
|
|
1207
989
|
@lru_cache(maxsize=2)
|
|
1208
|
-
def xyxyn(self):
|
|
1209
|
-
"""
|
|
1210
|
-
Returns normalized bounding box coordinates relative to the original image size.
|
|
990
|
+
def xyxyn(self) -> torch.Tensor | np.ndarray:
|
|
991
|
+
"""Return normalized bounding box coordinates relative to the original image size.
|
|
1211
992
|
|
|
1212
|
-
This property calculates and returns the bounding box coordinates in [x1, y1, x2, y2] format,
|
|
1213
|
-
|
|
993
|
+
This property calculates and returns the bounding box coordinates in [x1, y1, x2, y2] format, normalized to the
|
|
994
|
+
range [0, 1] based on the original image dimensions.
|
|
1214
995
|
|
|
1215
996
|
Returns:
|
|
1216
|
-
(torch.Tensor |
|
|
1217
|
-
|
|
997
|
+
(torch.Tensor | np.ndarray): Normalized bounding box coordinates with shape (N, 4), where N is the number of
|
|
998
|
+
boxes. Each row contains [x1, y1, x2, y2] values normalized to [0, 1].
|
|
1218
999
|
|
|
1219
1000
|
Examples:
|
|
1220
1001
|
>>> boxes = Boxes(torch.tensor([[100, 50, 300, 400, 0.9, 0]]), orig_shape=(480, 640))
|
|
@@ -1229,17 +1010,16 @@ class Boxes(BaseTensor):
|
|
|
1229
1010
|
|
|
1230
1011
|
@property
|
|
1231
1012
|
@lru_cache(maxsize=2)
|
|
1232
|
-
def xywhn(self):
|
|
1233
|
-
"""
|
|
1234
|
-
Returns normalized bounding boxes in [x, y, width, height] format.
|
|
1013
|
+
def xywhn(self) -> torch.Tensor | np.ndarray:
|
|
1014
|
+
"""Return normalized bounding boxes in [x, y, width, height] format.
|
|
1235
1015
|
|
|
1236
|
-
This property calculates and returns the normalized bounding box coordinates in the format
|
|
1237
|
-
|
|
1016
|
+
This property calculates and returns the normalized bounding box coordinates in the format [x_center, y_center,
|
|
1017
|
+
width, height], where all values are relative to the original image dimensions.
|
|
1238
1018
|
|
|
1239
1019
|
Returns:
|
|
1240
|
-
(torch.Tensor |
|
|
1241
|
-
|
|
1242
|
-
|
|
1020
|
+
(torch.Tensor | np.ndarray): Normalized bounding boxes with shape (N, 4), where N is the number of boxes.
|
|
1021
|
+
Each row contains [x_center, y_center, width, height] values normalized to [0, 1] based on the original
|
|
1022
|
+
image dimensions.
|
|
1243
1023
|
|
|
1244
1024
|
Examples:
|
|
1245
1025
|
>>> boxes = Boxes(torch.tensor([[100, 50, 150, 100, 0.9, 0]]), orig_shape=(480, 640))
|
|
@@ -1254,23 +1034,22 @@ class Boxes(BaseTensor):
|
|
|
1254
1034
|
|
|
1255
1035
|
|
|
1256
1036
|
class Masks(BaseTensor):
|
|
1257
|
-
"""
|
|
1258
|
-
A class for storing and manipulating detection masks.
|
|
1037
|
+
"""A class for storing and manipulating detection masks.
|
|
1259
1038
|
|
|
1260
|
-
This class extends BaseTensor and provides functionality for handling segmentation masks,
|
|
1261
|
-
|
|
1039
|
+
This class extends BaseTensor and provides functionality for handling segmentation masks, including methods for
|
|
1040
|
+
converting between pixel and normalized coordinates.
|
|
1262
1041
|
|
|
1263
1042
|
Attributes:
|
|
1264
|
-
data (torch.Tensor |
|
|
1043
|
+
data (torch.Tensor | np.ndarray): The raw tensor or array containing mask data.
|
|
1265
1044
|
orig_shape (tuple): Original image shape in (height, width) format.
|
|
1266
|
-
xy (
|
|
1267
|
-
xyn (
|
|
1045
|
+
xy (list[np.ndarray]): A list of segments in pixel coordinates.
|
|
1046
|
+
xyn (list[np.ndarray]): A list of normalized segments.
|
|
1268
1047
|
|
|
1269
1048
|
Methods:
|
|
1270
|
-
cpu
|
|
1271
|
-
numpy
|
|
1272
|
-
cuda
|
|
1273
|
-
to
|
|
1049
|
+
cpu: Return a copy of the Masks object with the mask tensor on CPU memory.
|
|
1050
|
+
numpy: Return a copy of the Masks object with the mask tensor as a numpy array.
|
|
1051
|
+
cuda: Return a copy of the Masks object with the mask tensor on GPU memory.
|
|
1052
|
+
to: Return a copy of the Masks object with the mask tensor on specified device and dtype.
|
|
1274
1053
|
|
|
1275
1054
|
Examples:
|
|
1276
1055
|
>>> masks_data = torch.rand(1, 160, 160)
|
|
@@ -1280,9 +1059,8 @@ class Masks(BaseTensor):
|
|
|
1280
1059
|
>>> normalized_coords = masks.xyn
|
|
1281
1060
|
"""
|
|
1282
1061
|
|
|
1283
|
-
def __init__(self, masks, orig_shape) -> None:
|
|
1284
|
-
"""
|
|
1285
|
-
Initialize the Masks class with detection mask data and the original image shape.
|
|
1062
|
+
def __init__(self, masks: torch.Tensor | np.ndarray, orig_shape: tuple[int, int]) -> None:
|
|
1063
|
+
"""Initialize the Masks class with detection mask data and the original image shape.
|
|
1286
1064
|
|
|
1287
1065
|
Args:
|
|
1288
1066
|
masks (torch.Tensor | np.ndarray): Detection masks with shape (num_masks, height, width).
|
|
@@ -1301,16 +1079,15 @@ class Masks(BaseTensor):
|
|
|
1301
1079
|
|
|
1302
1080
|
@property
|
|
1303
1081
|
@lru_cache(maxsize=1)
|
|
1304
|
-
def xyn(self):
|
|
1305
|
-
"""
|
|
1306
|
-
Returns normalized xy-coordinates of the segmentation masks.
|
|
1082
|
+
def xyn(self) -> list[np.ndarray]:
|
|
1083
|
+
"""Return normalized xy-coordinates of the segmentation masks.
|
|
1307
1084
|
|
|
1308
|
-
This property calculates and caches the normalized xy-coordinates of the segmentation masks. The coordinates
|
|
1309
|
-
|
|
1085
|
+
This property calculates and caches the normalized xy-coordinates of the segmentation masks. The coordinates are
|
|
1086
|
+
normalized relative to the original image shape.
|
|
1310
1087
|
|
|
1311
1088
|
Returns:
|
|
1312
|
-
(
|
|
1313
|
-
|
|
1089
|
+
(list[np.ndarray]): A list of numpy arrays, where each array contains the normalized xy-coordinates of a
|
|
1090
|
+
single segmentation mask. Each array has shape (N, 2), where N is the number of points in the
|
|
1314
1091
|
mask contour.
|
|
1315
1092
|
|
|
1316
1093
|
Examples:
|
|
@@ -1326,17 +1103,15 @@ class Masks(BaseTensor):
|
|
|
1326
1103
|
|
|
1327
1104
|
@property
|
|
1328
1105
|
@lru_cache(maxsize=1)
|
|
1329
|
-
def xy(self):
|
|
1330
|
-
"""
|
|
1331
|
-
Returns the [x, y] pixel coordinates for each segment in the mask tensor.
|
|
1106
|
+
def xy(self) -> list[np.ndarray]:
|
|
1107
|
+
"""Return the [x, y] pixel coordinates for each segment in the mask tensor.
|
|
1332
1108
|
|
|
1333
|
-
This property calculates and returns a list of pixel coordinates for each segmentation mask in the
|
|
1334
|
-
|
|
1109
|
+
This property calculates and returns a list of pixel coordinates for each segmentation mask in the Masks object.
|
|
1110
|
+
The coordinates are scaled to match the original image dimensions.
|
|
1335
1111
|
|
|
1336
1112
|
Returns:
|
|
1337
|
-
(
|
|
1338
|
-
|
|
1339
|
-
number of points in the segment.
|
|
1113
|
+
(list[np.ndarray]): A list of numpy arrays, where each array contains the [x, y] pixel coordinates for a
|
|
1114
|
+
single segmentation mask. Each array has shape (N, 2), where N is the number of points in the segment.
|
|
1340
1115
|
|
|
1341
1116
|
Examples:
|
|
1342
1117
|
>>> results = model("image.jpg")
|
|
@@ -1352,25 +1127,24 @@ class Masks(BaseTensor):
|
|
|
1352
1127
|
|
|
1353
1128
|
|
|
1354
1129
|
class Keypoints(BaseTensor):
|
|
1355
|
-
"""
|
|
1356
|
-
A class for storing and manipulating detection keypoints.
|
|
1130
|
+
"""A class for storing and manipulating detection keypoints.
|
|
1357
1131
|
|
|
1358
|
-
This class encapsulates functionality for handling keypoint data, including coordinate manipulation,
|
|
1359
|
-
|
|
1132
|
+
This class encapsulates functionality for handling keypoint data, including coordinate manipulation, normalization,
|
|
1133
|
+
and confidence values. It supports keypoint detection results with optional visibility information.
|
|
1360
1134
|
|
|
1361
1135
|
Attributes:
|
|
1362
1136
|
data (torch.Tensor): The raw tensor containing keypoint data.
|
|
1363
|
-
orig_shape (
|
|
1137
|
+
orig_shape (tuple[int, int]): The original image dimensions (height, width).
|
|
1364
1138
|
has_visible (bool): Indicates whether visibility information is available for keypoints.
|
|
1365
1139
|
xy (torch.Tensor): Keypoint coordinates in [x, y] format.
|
|
1366
1140
|
xyn (torch.Tensor): Normalized keypoint coordinates in [x, y] format, relative to orig_shape.
|
|
1367
1141
|
conf (torch.Tensor): Confidence values for each keypoint, if available.
|
|
1368
1142
|
|
|
1369
1143
|
Methods:
|
|
1370
|
-
cpu
|
|
1371
|
-
numpy
|
|
1372
|
-
cuda
|
|
1373
|
-
to
|
|
1144
|
+
cpu: Return a copy of the keypoints tensor on CPU memory.
|
|
1145
|
+
numpy: Return a copy of the keypoints tensor as a numpy array.
|
|
1146
|
+
cuda: Return a copy of the keypoints tensor on GPU memory.
|
|
1147
|
+
to: Return a copy of the keypoints tensor with specified device and dtype.
|
|
1374
1148
|
|
|
1375
1149
|
Examples:
|
|
1376
1150
|
>>> import torch
|
|
@@ -1383,19 +1157,17 @@ class Keypoints(BaseTensor):
|
|
|
1383
1157
|
>>> keypoints_cpu = keypoints.cpu() # Move keypoints to CPU
|
|
1384
1158
|
"""
|
|
1385
1159
|
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
"""
|
|
1389
|
-
Initializes the Keypoints object with detection keypoints and original image dimensions.
|
|
1160
|
+
def __init__(self, keypoints: torch.Tensor | np.ndarray, orig_shape: tuple[int, int]) -> None:
|
|
1161
|
+
"""Initialize the Keypoints object with detection keypoints and original image dimensions.
|
|
1390
1162
|
|
|
1391
|
-
This method processes the input keypoints tensor, handling both 2D and 3D formats. For 3D tensors
|
|
1392
|
-
|
|
1163
|
+
This method processes the input keypoints tensor, handling both 2D and 3D formats. For 3D tensors (x, y,
|
|
1164
|
+
confidence), it masks out low-confidence keypoints by setting their coordinates to zero.
|
|
1393
1165
|
|
|
1394
1166
|
Args:
|
|
1395
1167
|
keypoints (torch.Tensor): A tensor containing keypoint data. Shape can be either:
|
|
1396
1168
|
- (num_objects, num_keypoints, 2) for x, y coordinates only
|
|
1397
1169
|
- (num_objects, num_keypoints, 3) for x, y coordinates and confidence scores
|
|
1398
|
-
orig_shape (
|
|
1170
|
+
orig_shape (tuple[int, int]): The original image dimensions (height, width).
|
|
1399
1171
|
|
|
1400
1172
|
Examples:
|
|
1401
1173
|
>>> kpts = torch.rand(1, 17, 3) # 1 object, 17 keypoints (COCO format), x,y,conf
|
|
@@ -1404,21 +1176,17 @@ class Keypoints(BaseTensor):
|
|
|
1404
1176
|
"""
|
|
1405
1177
|
if keypoints.ndim == 2:
|
|
1406
1178
|
keypoints = keypoints[None, :]
|
|
1407
|
-
if keypoints.shape[2] == 3: # x, y, conf
|
|
1408
|
-
mask = keypoints[..., 2] < 0.5 # points with conf < 0.5 (not visible)
|
|
1409
|
-
keypoints[..., :2][mask] = 0
|
|
1410
1179
|
super().__init__(keypoints, orig_shape)
|
|
1411
1180
|
self.has_visible = self.data.shape[-1] == 3
|
|
1412
1181
|
|
|
1413
1182
|
@property
|
|
1414
1183
|
@lru_cache(maxsize=1)
|
|
1415
|
-
def xy(self):
|
|
1416
|
-
"""
|
|
1417
|
-
Returns x, y coordinates of keypoints.
|
|
1184
|
+
def xy(self) -> torch.Tensor | np.ndarray:
|
|
1185
|
+
"""Return x, y coordinates of keypoints.
|
|
1418
1186
|
|
|
1419
1187
|
Returns:
|
|
1420
|
-
(torch.Tensor): A tensor containing the x, y coordinates of keypoints with shape (N, K, 2), where N is
|
|
1421
|
-
|
|
1188
|
+
(torch.Tensor): A tensor containing the x, y coordinates of keypoints with shape (N, K, 2), where N is the
|
|
1189
|
+
number of detections and K is the number of keypoints per detection.
|
|
1422
1190
|
|
|
1423
1191
|
Examples:
|
|
1424
1192
|
>>> results = model("image.jpg")
|
|
@@ -1436,14 +1204,13 @@ class Keypoints(BaseTensor):
|
|
|
1436
1204
|
|
|
1437
1205
|
@property
|
|
1438
1206
|
@lru_cache(maxsize=1)
|
|
1439
|
-
def xyn(self):
|
|
1440
|
-
"""
|
|
1441
|
-
Returns normalized coordinates (x, y) of keypoints relative to the original image size.
|
|
1207
|
+
def xyn(self) -> torch.Tensor | np.ndarray:
|
|
1208
|
+
"""Return normalized coordinates (x, y) of keypoints relative to the original image size.
|
|
1442
1209
|
|
|
1443
1210
|
Returns:
|
|
1444
|
-
(torch.Tensor |
|
|
1445
|
-
coordinates, where N is the number of instances, K is the number of keypoints, and the last
|
|
1446
|
-
|
|
1211
|
+
(torch.Tensor | np.ndarray): A tensor or array of shape (N, K, 2) containing normalized keypoint
|
|
1212
|
+
coordinates, where N is the number of instances, K is the number of keypoints, and the last dimension
|
|
1213
|
+
contains [x, y] values in the range [0, 1].
|
|
1447
1214
|
|
|
1448
1215
|
Examples:
|
|
1449
1216
|
>>> keypoints = Keypoints(torch.rand(1, 17, 2), orig_shape=(480, 640))
|
|
@@ -1458,14 +1225,12 @@ class Keypoints(BaseTensor):
|
|
|
1458
1225
|
|
|
1459
1226
|
@property
|
|
1460
1227
|
@lru_cache(maxsize=1)
|
|
1461
|
-
def conf(self):
|
|
1462
|
-
"""
|
|
1463
|
-
Returns confidence values for each keypoint.
|
|
1228
|
+
def conf(self) -> torch.Tensor | np.ndarray | None:
|
|
1229
|
+
"""Return confidence values for each keypoint.
|
|
1464
1230
|
|
|
1465
1231
|
Returns:
|
|
1466
|
-
(torch.Tensor | None): A tensor containing confidence scores for each keypoint if available,
|
|
1467
|
-
|
|
1468
|
-
for single detection.
|
|
1232
|
+
(torch.Tensor | None): A tensor containing confidence scores for each keypoint if available, otherwise None.
|
|
1233
|
+
Shape is (num_detections, num_keypoints) for batched data or (num_keypoints,) for single detection.
|
|
1469
1234
|
|
|
1470
1235
|
Examples:
|
|
1471
1236
|
>>> keypoints = Keypoints(torch.rand(1, 17, 3), orig_shape=(640, 640)) # 1 detection, 17 keypoints
|
|
@@ -1476,25 +1241,24 @@ class Keypoints(BaseTensor):
|
|
|
1476
1241
|
|
|
1477
1242
|
|
|
1478
1243
|
class Probs(BaseTensor):
|
|
1479
|
-
"""
|
|
1480
|
-
A class for storing and manipulating classification probabilities.
|
|
1244
|
+
"""A class for storing and manipulating classification probabilities.
|
|
1481
1245
|
|
|
1482
|
-
This class extends BaseTensor and provides methods for accessing and manipulating
|
|
1483
|
-
|
|
1246
|
+
This class extends BaseTensor and provides methods for accessing and manipulating classification probabilities,
|
|
1247
|
+
including top-1 and top-5 predictions.
|
|
1484
1248
|
|
|
1485
1249
|
Attributes:
|
|
1486
|
-
data (torch.Tensor |
|
|
1250
|
+
data (torch.Tensor | np.ndarray): The raw tensor or array containing classification probabilities.
|
|
1487
1251
|
orig_shape (tuple | None): The original image shape as (height, width). Not used in this class.
|
|
1488
1252
|
top1 (int): Index of the class with the highest probability.
|
|
1489
|
-
top5 (
|
|
1490
|
-
top1conf (torch.Tensor |
|
|
1491
|
-
top5conf (torch.Tensor |
|
|
1253
|
+
top5 (list[int]): Indices of the top 5 classes by probability.
|
|
1254
|
+
top1conf (torch.Tensor | np.ndarray): Confidence score of the top 1 class.
|
|
1255
|
+
top5conf (torch.Tensor | np.ndarray): Confidence scores of the top 5 classes.
|
|
1492
1256
|
|
|
1493
1257
|
Methods:
|
|
1494
|
-
cpu
|
|
1495
|
-
numpy
|
|
1496
|
-
cuda
|
|
1497
|
-
to
|
|
1258
|
+
cpu: Return a copy of the probabilities tensor on CPU memory.
|
|
1259
|
+
numpy: Return a copy of the probabilities tensor as a numpy array.
|
|
1260
|
+
cuda: Return a copy of the probabilities tensor on GPU memory.
|
|
1261
|
+
to: Return a copy of the probabilities tensor with specified device and dtype.
|
|
1498
1262
|
|
|
1499
1263
|
Examples:
|
|
1500
1264
|
>>> probs = torch.tensor([0.1, 0.3, 0.6])
|
|
@@ -1509,9 +1273,8 @@ class Probs(BaseTensor):
|
|
|
1509
1273
|
tensor([0.6000, 0.3000, 0.1000])
|
|
1510
1274
|
"""
|
|
1511
1275
|
|
|
1512
|
-
def __init__(self, probs, orig_shape=None) -> None:
|
|
1513
|
-
"""
|
|
1514
|
-
Initialize the Probs class with classification probabilities.
|
|
1276
|
+
def __init__(self, probs: torch.Tensor | np.ndarray, orig_shape: tuple[int, int] | None = None) -> None:
|
|
1277
|
+
"""Initialize the Probs class with classification probabilities.
|
|
1515
1278
|
|
|
1516
1279
|
This class stores and manages classification probabilities, providing easy access to top predictions and their
|
|
1517
1280
|
confidences.
|
|
@@ -1524,7 +1287,7 @@ class Probs(BaseTensor):
|
|
|
1524
1287
|
Attributes:
|
|
1525
1288
|
data (torch.Tensor | np.ndarray): The raw tensor or array containing classification probabilities.
|
|
1526
1289
|
top1 (int): Index of the top 1 class.
|
|
1527
|
-
top5 (
|
|
1290
|
+
top5 (list[int]): Indices of the top 5 classes.
|
|
1528
1291
|
top1conf (torch.Tensor | np.ndarray): Confidence of the top 1 class.
|
|
1529
1292
|
top5conf (torch.Tensor | np.ndarray): Confidences of the top 5 classes.
|
|
1530
1293
|
|
|
@@ -1543,9 +1306,8 @@ class Probs(BaseTensor):
|
|
|
1543
1306
|
|
|
1544
1307
|
@property
|
|
1545
1308
|
@lru_cache(maxsize=1)
|
|
1546
|
-
def top1(self):
|
|
1547
|
-
"""
|
|
1548
|
-
Returns the index of the class with the highest probability.
|
|
1309
|
+
def top1(self) -> int:
|
|
1310
|
+
"""Return the index of the class with the highest probability.
|
|
1549
1311
|
|
|
1550
1312
|
Returns:
|
|
1551
1313
|
(int): Index of the class with the highest probability.
|
|
@@ -1559,12 +1321,11 @@ class Probs(BaseTensor):
|
|
|
1559
1321
|
|
|
1560
1322
|
@property
|
|
1561
1323
|
@lru_cache(maxsize=1)
|
|
1562
|
-
def top5(self):
|
|
1563
|
-
"""
|
|
1564
|
-
Returns the indices of the top 5 class probabilities.
|
|
1324
|
+
def top5(self) -> list[int]:
|
|
1325
|
+
"""Return the indices of the top 5 class probabilities.
|
|
1565
1326
|
|
|
1566
1327
|
Returns:
|
|
1567
|
-
(
|
|
1328
|
+
(list[int]): A list containing the indices of the top 5 class probabilities, sorted in descending order.
|
|
1568
1329
|
|
|
1569
1330
|
Examples:
|
|
1570
1331
|
>>> probs = Probs(torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5]))
|
|
@@ -1575,15 +1336,14 @@ class Probs(BaseTensor):
|
|
|
1575
1336
|
|
|
1576
1337
|
@property
|
|
1577
1338
|
@lru_cache(maxsize=1)
|
|
1578
|
-
def top1conf(self):
|
|
1579
|
-
"""
|
|
1580
|
-
Returns the confidence score of the highest probability class.
|
|
1339
|
+
def top1conf(self) -> torch.Tensor | np.ndarray:
|
|
1340
|
+
"""Return the confidence score of the highest probability class.
|
|
1581
1341
|
|
|
1582
1342
|
This property retrieves the confidence score (probability) of the class with the highest predicted probability
|
|
1583
1343
|
from the classification results.
|
|
1584
1344
|
|
|
1585
1345
|
Returns:
|
|
1586
|
-
(torch.Tensor |
|
|
1346
|
+
(torch.Tensor | np.ndarray): A tensor containing the confidence score of the top 1 class.
|
|
1587
1347
|
|
|
1588
1348
|
Examples:
|
|
1589
1349
|
>>> results = model("image.jpg") # classify an image
|
|
@@ -1595,17 +1355,16 @@ class Probs(BaseTensor):
|
|
|
1595
1355
|
|
|
1596
1356
|
@property
|
|
1597
1357
|
@lru_cache(maxsize=1)
|
|
1598
|
-
def top5conf(self):
|
|
1599
|
-
"""
|
|
1600
|
-
Returns confidence scores for the top 5 classification predictions.
|
|
1358
|
+
def top5conf(self) -> torch.Tensor | np.ndarray:
|
|
1359
|
+
"""Return confidence scores for the top 5 classification predictions.
|
|
1601
1360
|
|
|
1602
|
-
This property retrieves the confidence scores corresponding to the top 5 class probabilities
|
|
1603
|
-
|
|
1604
|
-
|
|
1361
|
+
This property retrieves the confidence scores corresponding to the top 5 class probabilities predicted by the
|
|
1362
|
+
model. It provides a quick way to access the most likely class predictions along with their associated
|
|
1363
|
+
confidence levels.
|
|
1605
1364
|
|
|
1606
1365
|
Returns:
|
|
1607
|
-
(torch.Tensor |
|
|
1608
|
-
|
|
1366
|
+
(torch.Tensor | np.ndarray): A tensor or array containing the confidence scores for the top 5 predicted
|
|
1367
|
+
classes, sorted in descending order of probability.
|
|
1609
1368
|
|
|
1610
1369
|
Examples:
|
|
1611
1370
|
>>> results = model("image.jpg")
|
|
@@ -1617,29 +1376,28 @@ class Probs(BaseTensor):
|
|
|
1617
1376
|
|
|
1618
1377
|
|
|
1619
1378
|
class OBB(BaseTensor):
|
|
1620
|
-
"""
|
|
1621
|
-
A class for storing and manipulating Oriented Bounding Boxes (OBB).
|
|
1379
|
+
"""A class for storing and manipulating Oriented Bounding Boxes (OBB).
|
|
1622
1380
|
|
|
1623
|
-
This class provides functionality to handle oriented bounding boxes, including conversion between
|
|
1624
|
-
|
|
1381
|
+
This class provides functionality to handle oriented bounding boxes, including conversion between different formats,
|
|
1382
|
+
normalization, and access to various properties of the boxes. It supports both tracking and non-tracking scenarios.
|
|
1625
1383
|
|
|
1626
1384
|
Attributes:
|
|
1627
1385
|
data (torch.Tensor): The raw OBB tensor containing box coordinates and associated data.
|
|
1628
1386
|
orig_shape (tuple): Original image size as (height, width).
|
|
1629
1387
|
is_track (bool): Indicates whether tracking IDs are included in the box data.
|
|
1630
|
-
xywhr (torch.Tensor |
|
|
1631
|
-
conf (torch.Tensor |
|
|
1632
|
-
cls (torch.Tensor |
|
|
1633
|
-
id (torch.Tensor |
|
|
1634
|
-
xyxyxyxy (torch.Tensor |
|
|
1635
|
-
xyxyxyxyn (torch.Tensor |
|
|
1636
|
-
xyxy (torch.Tensor |
|
|
1388
|
+
xywhr (torch.Tensor | np.ndarray): Boxes in [x_center, y_center, width, height, rotation] format.
|
|
1389
|
+
conf (torch.Tensor | np.ndarray): Confidence scores for each box.
|
|
1390
|
+
cls (torch.Tensor | np.ndarray): Class labels for each box.
|
|
1391
|
+
id (torch.Tensor | np.ndarray): Tracking IDs for each box, if available.
|
|
1392
|
+
xyxyxyxy (torch.Tensor | np.ndarray): Boxes in 8-point [x1, y1, x2, y2, x3, y3, x4, y4] format.
|
|
1393
|
+
xyxyxyxyn (torch.Tensor | np.ndarray): Normalized 8-point coordinates relative to orig_shape.
|
|
1394
|
+
xyxy (torch.Tensor | np.ndarray): Axis-aligned bounding boxes in [x1, y1, x2, y2] format.
|
|
1637
1395
|
|
|
1638
1396
|
Methods:
|
|
1639
|
-
cpu
|
|
1640
|
-
numpy
|
|
1641
|
-
cuda
|
|
1642
|
-
to
|
|
1397
|
+
cpu: Return a copy of the OBB object with all tensors on CPU memory.
|
|
1398
|
+
numpy: Return a copy of the OBB object with all tensors as numpy arrays.
|
|
1399
|
+
cuda: Return a copy of the OBB object with all tensors on GPU memory.
|
|
1400
|
+
to: Return a copy of the OBB object with tensors on specified device and dtype.
|
|
1643
1401
|
|
|
1644
1402
|
Examples:
|
|
1645
1403
|
>>> boxes = torch.tensor([[100, 50, 150, 100, 30, 0.9, 0]]) # xywhr, conf, cls
|
|
@@ -1649,22 +1407,21 @@ class OBB(BaseTensor):
|
|
|
1649
1407
|
>>> print(obb.cls)
|
|
1650
1408
|
"""
|
|
1651
1409
|
|
|
1652
|
-
def __init__(self, boxes, orig_shape) -> None:
|
|
1653
|
-
"""
|
|
1654
|
-
Initialize an OBB (Oriented Bounding Box) instance with oriented bounding box data and original image shape.
|
|
1410
|
+
def __init__(self, boxes: torch.Tensor | np.ndarray, orig_shape: tuple[int, int]) -> None:
|
|
1411
|
+
"""Initialize an OBB (Oriented Bounding Box) instance with oriented bounding box data and original image shape.
|
|
1655
1412
|
|
|
1656
|
-
This class stores and manipulates Oriented Bounding Boxes (OBB) for object detection tasks. It provides
|
|
1657
|
-
|
|
1413
|
+
This class stores and manipulates Oriented Bounding Boxes (OBB) for object detection tasks. It provides various
|
|
1414
|
+
properties and methods to access and transform the OBB data.
|
|
1658
1415
|
|
|
1659
1416
|
Args:
|
|
1660
|
-
boxes (torch.Tensor |
|
|
1661
|
-
|
|
1662
|
-
|
|
1663
|
-
orig_shape (
|
|
1417
|
+
boxes (torch.Tensor | np.ndarray): A tensor or numpy array containing the detection boxes, with shape
|
|
1418
|
+
(num_boxes, 7) or (num_boxes, 8). The last two columns contain confidence and class values. If present,
|
|
1419
|
+
the third last column contains track IDs, and the fifth column contains rotation.
|
|
1420
|
+
orig_shape (tuple[int, int]): Original image size, in the format (height, width).
|
|
1664
1421
|
|
|
1665
1422
|
Attributes:
|
|
1666
|
-
data (torch.Tensor |
|
|
1667
|
-
orig_shape (
|
|
1423
|
+
data (torch.Tensor | np.ndarray): The raw OBB tensor.
|
|
1424
|
+
orig_shape (tuple[int, int]): The original image shape.
|
|
1668
1425
|
is_track (bool): Whether the boxes include tracking IDs.
|
|
1669
1426
|
|
|
1670
1427
|
Raises:
|
|
@@ -1686,12 +1443,11 @@ class OBB(BaseTensor):
|
|
|
1686
1443
|
self.orig_shape = orig_shape
|
|
1687
1444
|
|
|
1688
1445
|
@property
|
|
1689
|
-
def xywhr(self):
|
|
1690
|
-
"""
|
|
1691
|
-
Returns boxes in [x_center, y_center, width, height, rotation] format.
|
|
1446
|
+
def xywhr(self) -> torch.Tensor | np.ndarray:
|
|
1447
|
+
"""Return boxes in [x_center, y_center, width, height, rotation] format.
|
|
1692
1448
|
|
|
1693
1449
|
Returns:
|
|
1694
|
-
(torch.Tensor |
|
|
1450
|
+
(torch.Tensor | np.ndarray): A tensor or numpy array containing the oriented bounding boxes with format
|
|
1695
1451
|
[x_center, y_center, width, height, rotation]. The shape is (N, 5) where N is the number of boxes.
|
|
1696
1452
|
|
|
1697
1453
|
Examples:
|
|
@@ -1704,16 +1460,15 @@ class OBB(BaseTensor):
|
|
|
1704
1460
|
return self.data[:, :5]
|
|
1705
1461
|
|
|
1706
1462
|
@property
|
|
1707
|
-
def conf(self):
|
|
1708
|
-
"""
|
|
1709
|
-
Returns the confidence scores for Oriented Bounding Boxes (OBBs).
|
|
1463
|
+
def conf(self) -> torch.Tensor | np.ndarray:
|
|
1464
|
+
"""Return the confidence scores for Oriented Bounding Boxes (OBBs).
|
|
1710
1465
|
|
|
1711
1466
|
This property retrieves the confidence values associated with each OBB detection. The confidence score
|
|
1712
1467
|
represents the model's certainty in the detection.
|
|
1713
1468
|
|
|
1714
1469
|
Returns:
|
|
1715
|
-
(torch.Tensor |
|
|
1716
|
-
|
|
1470
|
+
(torch.Tensor | np.ndarray): A tensor or numpy array of shape (N,) containing confidence scores for N
|
|
1471
|
+
detections, where each score is in the range [0, 1].
|
|
1717
1472
|
|
|
1718
1473
|
Examples:
|
|
1719
1474
|
>>> results = model("image.jpg")
|
|
@@ -1724,13 +1479,12 @@ class OBB(BaseTensor):
|
|
|
1724
1479
|
return self.data[:, -2]
|
|
1725
1480
|
|
|
1726
1481
|
@property
|
|
1727
|
-
def cls(self):
|
|
1728
|
-
"""
|
|
1729
|
-
Returns the class values of the oriented bounding boxes.
|
|
1482
|
+
def cls(self) -> torch.Tensor | np.ndarray:
|
|
1483
|
+
"""Return the class values of the oriented bounding boxes.
|
|
1730
1484
|
|
|
1731
1485
|
Returns:
|
|
1732
|
-
(torch.Tensor |
|
|
1733
|
-
|
|
1486
|
+
(torch.Tensor | np.ndarray): A tensor or numpy array containing the class values for each oriented bounding
|
|
1487
|
+
box. The shape is (N,), where N is the number of boxes.
|
|
1734
1488
|
|
|
1735
1489
|
Examples:
|
|
1736
1490
|
>>> results = model("image.jpg")
|
|
@@ -1742,13 +1496,12 @@ class OBB(BaseTensor):
|
|
|
1742
1496
|
return self.data[:, -1]
|
|
1743
1497
|
|
|
1744
1498
|
@property
|
|
1745
|
-
def id(self):
|
|
1746
|
-
"""
|
|
1747
|
-
Returns the tracking IDs of the oriented bounding boxes (if available).
|
|
1499
|
+
def id(self) -> torch.Tensor | np.ndarray | None:
|
|
1500
|
+
"""Return the tracking IDs of the oriented bounding boxes (if available).
|
|
1748
1501
|
|
|
1749
1502
|
Returns:
|
|
1750
|
-
(torch.Tensor |
|
|
1751
|
-
|
|
1503
|
+
(torch.Tensor | np.ndarray | None): A tensor or numpy array containing the tracking IDs for each oriented
|
|
1504
|
+
bounding box. Returns None if tracking IDs are not available.
|
|
1752
1505
|
|
|
1753
1506
|
Examples:
|
|
1754
1507
|
>>> results = model("image.jpg", tracker=True) # Run inference with tracking
|
|
@@ -1762,13 +1515,12 @@ class OBB(BaseTensor):
|
|
|
1762
1515
|
|
|
1763
1516
|
@property
|
|
1764
1517
|
@lru_cache(maxsize=2)
|
|
1765
|
-
def xyxyxyxy(self):
|
|
1766
|
-
"""
|
|
1767
|
-
Converts OBB format to 8-point (xyxyxyxy) coordinate format for rotated bounding boxes.
|
|
1518
|
+
def xyxyxyxy(self) -> torch.Tensor | np.ndarray:
|
|
1519
|
+
"""Convert OBB format to 8-point (xyxyxyxy) coordinate format for rotated bounding boxes.
|
|
1768
1520
|
|
|
1769
1521
|
Returns:
|
|
1770
|
-
(torch.Tensor |
|
|
1771
|
-
|
|
1522
|
+
(torch.Tensor | np.ndarray): Rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2), where N is the
|
|
1523
|
+
number of boxes. Each box is represented by 4 points (x, y), starting from the top-left corner and
|
|
1772
1524
|
moving clockwise.
|
|
1773
1525
|
|
|
1774
1526
|
Examples:
|
|
@@ -1781,14 +1533,13 @@ class OBB(BaseTensor):
|
|
|
1781
1533
|
|
|
1782
1534
|
@property
|
|
1783
1535
|
@lru_cache(maxsize=2)
|
|
1784
|
-
def xyxyxyxyn(self):
|
|
1785
|
-
"""
|
|
1786
|
-
Converts rotated bounding boxes to normalized xyxyxyxy format.
|
|
1536
|
+
def xyxyxyxyn(self) -> torch.Tensor | np.ndarray:
|
|
1537
|
+
"""Convert rotated bounding boxes to normalized xyxyxyxy format.
|
|
1787
1538
|
|
|
1788
1539
|
Returns:
|
|
1789
|
-
(torch.Tensor |
|
|
1790
|
-
where N is the number of boxes. Each box is represented by 4 points (x, y), normalized relative to
|
|
1791
|
-
|
|
1540
|
+
(torch.Tensor | np.ndarray): Normalized rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2),
|
|
1541
|
+
where N is the number of boxes. Each box is represented by 4 points (x, y), normalized relative to the
|
|
1542
|
+
original image dimensions.
|
|
1792
1543
|
|
|
1793
1544
|
Examples:
|
|
1794
1545
|
>>> obb = OBB(torch.rand(10, 7), orig_shape=(640, 480)) # 10 random OBBs
|
|
@@ -1803,17 +1554,16 @@ class OBB(BaseTensor):
|
|
|
1803
1554
|
|
|
1804
1555
|
@property
|
|
1805
1556
|
@lru_cache(maxsize=2)
|
|
1806
|
-
def xyxy(self):
|
|
1807
|
-
"""
|
|
1808
|
-
Converts oriented bounding boxes (OBB) to axis-aligned bounding boxes in xyxy format.
|
|
1557
|
+
def xyxy(self) -> torch.Tensor | np.ndarray:
|
|
1558
|
+
"""Convert oriented bounding boxes (OBB) to axis-aligned bounding boxes in xyxy format.
|
|
1809
1559
|
|
|
1810
|
-
This property calculates the minimal enclosing rectangle for each oriented bounding box and returns it in
|
|
1811
|
-
|
|
1812
|
-
|
|
1560
|
+
This property calculates the minimal enclosing rectangle for each oriented bounding box and returns it in xyxy
|
|
1561
|
+
format (x1, y1, x2, y2). This is useful for operations that require axis-aligned bounding boxes, such as IoU
|
|
1562
|
+
calculation with non-rotated boxes.
|
|
1813
1563
|
|
|
1814
1564
|
Returns:
|
|
1815
|
-
(torch.Tensor |
|
|
1816
|
-
|
|
1565
|
+
(torch.Tensor | np.ndarray): Axis-aligned bounding boxes in xyxy format with shape (N, 4), where N is the
|
|
1566
|
+
number of boxes. Each row contains [x1, y1, x2, y2] coordinates.
|
|
1817
1567
|
|
|
1818
1568
|
Examples:
|
|
1819
1569
|
>>> import torch
|