dgenerate-ultralytics-headless 8.3.214__py3-none-any.whl → 8.3.248__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/METADATA +13 -14
  2. dgenerate_ultralytics_headless-8.3.248.dist-info/RECORD +298 -0
  3. tests/__init__.py +5 -7
  4. tests/conftest.py +8 -15
  5. tests/test_cli.py +1 -1
  6. tests/test_cuda.py +5 -8
  7. tests/test_engine.py +1 -1
  8. tests/test_exports.py +57 -12
  9. tests/test_integrations.py +4 -4
  10. tests/test_python.py +84 -53
  11. tests/test_solutions.py +160 -151
  12. ultralytics/__init__.py +1 -1
  13. ultralytics/cfg/__init__.py +56 -62
  14. ultralytics/cfg/datasets/Argoverse.yaml +7 -6
  15. ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
  16. ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
  17. ultralytics/cfg/datasets/ImageNet.yaml +1 -1
  18. ultralytics/cfg/datasets/VOC.yaml +15 -16
  19. ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
  20. ultralytics/cfg/datasets/coco-pose.yaml +21 -0
  21. ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
  22. ultralytics/cfg/datasets/coco8-pose.yaml +21 -0
  23. ultralytics/cfg/datasets/dog-pose.yaml +28 -0
  24. ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
  25. ultralytics/cfg/datasets/dota8.yaml +2 -2
  26. ultralytics/cfg/datasets/hand-keypoints.yaml +26 -2
  27. ultralytics/cfg/datasets/kitti.yaml +27 -0
  28. ultralytics/cfg/datasets/lvis.yaml +5 -5
  29. ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
  30. ultralytics/cfg/datasets/tiger-pose.yaml +16 -0
  31. ultralytics/cfg/datasets/xView.yaml +16 -16
  32. ultralytics/cfg/default.yaml +1 -1
  33. ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
  34. ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
  35. ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
  36. ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +1 -1
  37. ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +1 -1
  38. ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +1 -1
  39. ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +1 -1
  40. ultralytics/cfg/models/v10/yolov10b.yaml +2 -2
  41. ultralytics/cfg/models/v10/yolov10l.yaml +2 -2
  42. ultralytics/cfg/models/v10/yolov10m.yaml +2 -2
  43. ultralytics/cfg/models/v10/yolov10n.yaml +2 -2
  44. ultralytics/cfg/models/v10/yolov10s.yaml +2 -2
  45. ultralytics/cfg/models/v10/yolov10x.yaml +2 -2
  46. ultralytics/cfg/models/v3/yolov3-tiny.yaml +1 -1
  47. ultralytics/cfg/models/v6/yolov6.yaml +1 -1
  48. ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
  49. ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
  50. ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
  51. ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
  52. ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
  53. ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
  54. ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
  55. ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
  56. ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
  57. ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
  58. ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
  59. ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +1 -1
  60. ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
  61. ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
  62. ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
  63. ultralytics/data/__init__.py +4 -4
  64. ultralytics/data/annotator.py +3 -4
  65. ultralytics/data/augment.py +285 -475
  66. ultralytics/data/base.py +18 -26
  67. ultralytics/data/build.py +147 -25
  68. ultralytics/data/converter.py +36 -46
  69. ultralytics/data/dataset.py +46 -74
  70. ultralytics/data/loaders.py +42 -49
  71. ultralytics/data/split.py +5 -6
  72. ultralytics/data/split_dota.py +8 -15
  73. ultralytics/data/utils.py +34 -43
  74. ultralytics/engine/exporter.py +319 -237
  75. ultralytics/engine/model.py +148 -188
  76. ultralytics/engine/predictor.py +29 -38
  77. ultralytics/engine/results.py +177 -311
  78. ultralytics/engine/trainer.py +83 -59
  79. ultralytics/engine/tuner.py +23 -34
  80. ultralytics/engine/validator.py +39 -22
  81. ultralytics/hub/__init__.py +16 -19
  82. ultralytics/hub/auth.py +6 -12
  83. ultralytics/hub/google/__init__.py +7 -10
  84. ultralytics/hub/session.py +15 -25
  85. ultralytics/hub/utils.py +5 -8
  86. ultralytics/models/__init__.py +1 -1
  87. ultralytics/models/fastsam/__init__.py +1 -1
  88. ultralytics/models/fastsam/model.py +8 -10
  89. ultralytics/models/fastsam/predict.py +17 -29
  90. ultralytics/models/fastsam/utils.py +1 -2
  91. ultralytics/models/fastsam/val.py +5 -7
  92. ultralytics/models/nas/__init__.py +1 -1
  93. ultralytics/models/nas/model.py +5 -8
  94. ultralytics/models/nas/predict.py +7 -9
  95. ultralytics/models/nas/val.py +1 -2
  96. ultralytics/models/rtdetr/__init__.py +1 -1
  97. ultralytics/models/rtdetr/model.py +5 -8
  98. ultralytics/models/rtdetr/predict.py +15 -19
  99. ultralytics/models/rtdetr/train.py +10 -13
  100. ultralytics/models/rtdetr/val.py +21 -23
  101. ultralytics/models/sam/__init__.py +15 -2
  102. ultralytics/models/sam/amg.py +14 -20
  103. ultralytics/models/sam/build.py +26 -19
  104. ultralytics/models/sam/build_sam3.py +377 -0
  105. ultralytics/models/sam/model.py +29 -32
  106. ultralytics/models/sam/modules/blocks.py +83 -144
  107. ultralytics/models/sam/modules/decoders.py +19 -37
  108. ultralytics/models/sam/modules/encoders.py +44 -101
  109. ultralytics/models/sam/modules/memory_attention.py +16 -30
  110. ultralytics/models/sam/modules/sam.py +200 -73
  111. ultralytics/models/sam/modules/tiny_encoder.py +64 -83
  112. ultralytics/models/sam/modules/transformer.py +18 -28
  113. ultralytics/models/sam/modules/utils.py +174 -50
  114. ultralytics/models/sam/predict.py +2248 -350
  115. ultralytics/models/sam/sam3/__init__.py +3 -0
  116. ultralytics/models/sam/sam3/decoder.py +546 -0
  117. ultralytics/models/sam/sam3/encoder.py +529 -0
  118. ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
  119. ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
  120. ultralytics/models/sam/sam3/model_misc.py +199 -0
  121. ultralytics/models/sam/sam3/necks.py +129 -0
  122. ultralytics/models/sam/sam3/sam3_image.py +339 -0
  123. ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
  124. ultralytics/models/sam/sam3/vitdet.py +547 -0
  125. ultralytics/models/sam/sam3/vl_combiner.py +160 -0
  126. ultralytics/models/utils/loss.py +14 -26
  127. ultralytics/models/utils/ops.py +13 -17
  128. ultralytics/models/yolo/__init__.py +1 -1
  129. ultralytics/models/yolo/classify/predict.py +9 -12
  130. ultralytics/models/yolo/classify/train.py +11 -32
  131. ultralytics/models/yolo/classify/val.py +29 -28
  132. ultralytics/models/yolo/detect/predict.py +7 -10
  133. ultralytics/models/yolo/detect/train.py +11 -20
  134. ultralytics/models/yolo/detect/val.py +70 -58
  135. ultralytics/models/yolo/model.py +36 -53
  136. ultralytics/models/yolo/obb/predict.py +5 -14
  137. ultralytics/models/yolo/obb/train.py +11 -14
  138. ultralytics/models/yolo/obb/val.py +39 -36
  139. ultralytics/models/yolo/pose/__init__.py +1 -1
  140. ultralytics/models/yolo/pose/predict.py +6 -21
  141. ultralytics/models/yolo/pose/train.py +10 -15
  142. ultralytics/models/yolo/pose/val.py +38 -57
  143. ultralytics/models/yolo/segment/predict.py +14 -18
  144. ultralytics/models/yolo/segment/train.py +3 -6
  145. ultralytics/models/yolo/segment/val.py +93 -45
  146. ultralytics/models/yolo/world/train.py +8 -14
  147. ultralytics/models/yolo/world/train_world.py +11 -34
  148. ultralytics/models/yolo/yoloe/__init__.py +7 -7
  149. ultralytics/models/yolo/yoloe/predict.py +16 -23
  150. ultralytics/models/yolo/yoloe/train.py +30 -43
  151. ultralytics/models/yolo/yoloe/train_seg.py +5 -10
  152. ultralytics/models/yolo/yoloe/val.py +15 -20
  153. ultralytics/nn/__init__.py +7 -7
  154. ultralytics/nn/autobackend.py +145 -77
  155. ultralytics/nn/modules/__init__.py +60 -60
  156. ultralytics/nn/modules/activation.py +4 -6
  157. ultralytics/nn/modules/block.py +132 -216
  158. ultralytics/nn/modules/conv.py +52 -97
  159. ultralytics/nn/modules/head.py +50 -103
  160. ultralytics/nn/modules/transformer.py +76 -88
  161. ultralytics/nn/modules/utils.py +16 -21
  162. ultralytics/nn/tasks.py +94 -154
  163. ultralytics/nn/text_model.py +40 -67
  164. ultralytics/solutions/__init__.py +12 -12
  165. ultralytics/solutions/ai_gym.py +11 -17
  166. ultralytics/solutions/analytics.py +15 -16
  167. ultralytics/solutions/config.py +5 -6
  168. ultralytics/solutions/distance_calculation.py +10 -13
  169. ultralytics/solutions/heatmap.py +7 -13
  170. ultralytics/solutions/instance_segmentation.py +5 -8
  171. ultralytics/solutions/object_blurrer.py +7 -10
  172. ultralytics/solutions/object_counter.py +12 -19
  173. ultralytics/solutions/object_cropper.py +8 -14
  174. ultralytics/solutions/parking_management.py +33 -31
  175. ultralytics/solutions/queue_management.py +10 -12
  176. ultralytics/solutions/region_counter.py +9 -12
  177. ultralytics/solutions/security_alarm.py +15 -20
  178. ultralytics/solutions/similarity_search.py +10 -15
  179. ultralytics/solutions/solutions.py +75 -74
  180. ultralytics/solutions/speed_estimation.py +7 -10
  181. ultralytics/solutions/streamlit_inference.py +2 -4
  182. ultralytics/solutions/templates/similarity-search.html +7 -18
  183. ultralytics/solutions/trackzone.py +7 -10
  184. ultralytics/solutions/vision_eye.py +5 -8
  185. ultralytics/trackers/__init__.py +1 -1
  186. ultralytics/trackers/basetrack.py +3 -5
  187. ultralytics/trackers/bot_sort.py +10 -27
  188. ultralytics/trackers/byte_tracker.py +14 -30
  189. ultralytics/trackers/track.py +3 -6
  190. ultralytics/trackers/utils/gmc.py +11 -22
  191. ultralytics/trackers/utils/kalman_filter.py +37 -48
  192. ultralytics/trackers/utils/matching.py +12 -15
  193. ultralytics/utils/__init__.py +116 -116
  194. ultralytics/utils/autobatch.py +2 -4
  195. ultralytics/utils/autodevice.py +17 -18
  196. ultralytics/utils/benchmarks.py +32 -46
  197. ultralytics/utils/callbacks/base.py +8 -10
  198. ultralytics/utils/callbacks/clearml.py +5 -13
  199. ultralytics/utils/callbacks/comet.py +32 -46
  200. ultralytics/utils/callbacks/dvc.py +13 -18
  201. ultralytics/utils/callbacks/mlflow.py +4 -5
  202. ultralytics/utils/callbacks/neptune.py +7 -15
  203. ultralytics/utils/callbacks/platform.py +314 -38
  204. ultralytics/utils/callbacks/raytune.py +3 -4
  205. ultralytics/utils/callbacks/tensorboard.py +23 -31
  206. ultralytics/utils/callbacks/wb.py +10 -13
  207. ultralytics/utils/checks.py +99 -76
  208. ultralytics/utils/cpu.py +3 -8
  209. ultralytics/utils/dist.py +8 -12
  210. ultralytics/utils/downloads.py +20 -30
  211. ultralytics/utils/errors.py +6 -14
  212. ultralytics/utils/events.py +2 -4
  213. ultralytics/utils/export/__init__.py +4 -236
  214. ultralytics/utils/export/engine.py +237 -0
  215. ultralytics/utils/export/imx.py +91 -55
  216. ultralytics/utils/export/tensorflow.py +231 -0
  217. ultralytics/utils/files.py +24 -28
  218. ultralytics/utils/git.py +9 -11
  219. ultralytics/utils/instance.py +30 -51
  220. ultralytics/utils/logger.py +212 -114
  221. ultralytics/utils/loss.py +14 -22
  222. ultralytics/utils/metrics.py +126 -155
  223. ultralytics/utils/nms.py +13 -16
  224. ultralytics/utils/ops.py +107 -165
  225. ultralytics/utils/patches.py +33 -21
  226. ultralytics/utils/plotting.py +72 -80
  227. ultralytics/utils/tal.py +25 -39
  228. ultralytics/utils/torch_utils.py +52 -78
  229. ultralytics/utils/tqdm.py +20 -20
  230. ultralytics/utils/triton.py +13 -19
  231. ultralytics/utils/tuner.py +17 -5
  232. dgenerate_ultralytics_headless-8.3.214.dist-info/RECORD +0 -283
  233. {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/WHEEL +0 -0
  234. {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/entry_points.txt +0 -0
  235. {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/licenses/LICENSE +0 -0
  236. {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/top_level.txt +0 -0
@@ -21,12 +21,11 @@ from ultralytics.utils.plotting import Annotator, colors, save_one_box
21
21
 
22
22
 
23
23
  class BaseTensor(SimpleClass):
24
- """
25
- Base tensor class with additional methods for easy manipulation and device handling.
24
+ """Base tensor class with additional methods for easy manipulation and device handling.
26
25
 
27
- This class provides a foundation for tensor-like objects with device management capabilities,
28
- supporting both PyTorch tensors and NumPy arrays. It includes methods for moving data between
29
- devices and converting between tensor types.
26
+ This class provides a foundation for tensor-like objects with device management capabilities, supporting both
27
+ PyTorch tensors and NumPy arrays. It includes methods for moving data between devices and converting between tensor
28
+ types.
30
29
 
31
30
  Attributes:
32
31
  data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints.
@@ -49,18 +48,11 @@ class BaseTensor(SimpleClass):
49
48
  """
50
49
 
51
50
  def __init__(self, data: torch.Tensor | np.ndarray, orig_shape: tuple[int, int]) -> None:
52
- """
53
- Initialize BaseTensor with prediction data and the original shape of the image.
51
+ """Initialize BaseTensor with prediction data and the original shape of the image.
54
52
 
55
53
  Args:
56
54
  data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints.
57
55
  orig_shape (tuple[int, int]): Original shape of the image in (height, width) format.
58
-
59
- Examples:
60
- >>> import torch
61
- >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
62
- >>> orig_shape = (720, 1280)
63
- >>> base_tensor = BaseTensor(data, orig_shape)
64
56
  """
65
57
  assert isinstance(data, (torch.Tensor, np.ndarray)), "data must be torch.Tensor or np.ndarray"
66
58
  self.data = data
@@ -68,8 +60,7 @@ class BaseTensor(SimpleClass):
68
60
 
69
61
  @property
70
62
  def shape(self) -> tuple[int, ...]:
71
- """
72
- Return the shape of the underlying data tensor.
63
+ """Return the shape of the underlying data tensor.
73
64
 
74
65
  Returns:
75
66
  (tuple[int, ...]): The shape of the data tensor.
@@ -83,8 +74,7 @@ class BaseTensor(SimpleClass):
83
74
  return self.data.shape
84
75
 
85
76
  def cpu(self):
86
- """
87
- Return a copy of the tensor stored in CPU memory.
77
+ """Return a copy of the tensor stored in CPU memory.
88
78
 
89
79
  Returns:
90
80
  (BaseTensor): A new BaseTensor object with the data tensor moved to CPU memory.
@@ -101,29 +91,26 @@ class BaseTensor(SimpleClass):
101
91
  return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.cpu(), self.orig_shape)
102
92
 
103
93
  def numpy(self):
104
- """
105
- Return a copy of the tensor as a numpy array.
94
+ """Return a copy of this object with its data converted to a NumPy array.
106
95
 
107
96
  Returns:
108
- (np.ndarray): A numpy array containing the same data as the original tensor.
97
+ (BaseTensor): A new instance with `data` as a NumPy array.
109
98
 
110
99
  Examples:
111
100
  >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
112
101
  >>> orig_shape = (720, 1280)
113
102
  >>> base_tensor = BaseTensor(data, orig_shape)
114
- >>> numpy_array = base_tensor.numpy()
115
- >>> print(type(numpy_array))
103
+ >>> numpy_tensor = base_tensor.numpy()
104
+ >>> print(type(numpy_tensor.data))
116
105
  <class 'numpy.ndarray'>
117
106
  """
118
107
  return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.numpy(), self.orig_shape)
119
108
 
120
109
  def cuda(self):
121
- """
122
- Move the tensor to GPU memory.
110
+ """Move the tensor to GPU memory.
123
111
 
124
112
  Returns:
125
- (BaseTensor): A new BaseTensor instance with the data moved to GPU memory if it's not already a
126
- numpy array, otherwise returns self.
113
+ (BaseTensor): A new BaseTensor instance with the data moved to GPU memory.
127
114
 
128
115
  Examples:
129
116
  >>> import torch
@@ -137,8 +124,7 @@ class BaseTensor(SimpleClass):
137
124
  return self.__class__(torch.as_tensor(self.data).cuda(), self.orig_shape)
138
125
 
139
126
  def to(self, *args, **kwargs):
140
- """
141
- Return a copy of the tensor with the specified device and dtype.
127
+ """Return a copy of the tensor with the specified device and dtype.
142
128
 
143
129
  Args:
144
130
  *args (Any): Variable length argument list to be passed to torch.Tensor.to().
@@ -155,8 +141,7 @@ class BaseTensor(SimpleClass):
155
141
  return self.__class__(torch.as_tensor(self.data).to(*args, **kwargs), self.orig_shape)
156
142
 
157
143
  def __len__(self) -> int:
158
- """
159
- Return the length of the underlying data tensor.
144
+ """Return the length of the underlying data tensor.
160
145
 
161
146
  Returns:
162
147
  (int): The number of elements in the first dimension of the data tensor.
@@ -170,8 +155,7 @@ class BaseTensor(SimpleClass):
170
155
  return len(self.data)
171
156
 
172
157
  def __getitem__(self, idx):
173
- """
174
- Return a new BaseTensor instance containing the specified indexed elements of the data tensor.
158
+ """Return a new BaseTensor instance containing the specified indexed elements of the data tensor.
175
159
 
176
160
  Args:
177
161
  idx (int | list[int] | torch.Tensor): Index or indices to select from the data tensor.
@@ -190,12 +174,11 @@ class BaseTensor(SimpleClass):
190
174
 
191
175
 
192
176
  class Results(SimpleClass, DataExportMixin):
193
- """
194
- A class for storing and manipulating inference results.
177
+ """A class for storing and manipulating inference results.
195
178
 
196
- This class provides comprehensive functionality for handling inference results from various
197
- Ultralytics models, including detection, segmentation, classification, and pose estimation.
198
- It supports visualization, data export, and various coordinate transformations.
179
+ This class provides comprehensive functionality for handling inference results from various Ultralytics models,
180
+ including detection, segmentation, classification, and pose estimation. It supports visualization, data export, and
181
+ various coordinate transformations.
199
182
 
200
183
  Attributes:
201
184
  orig_img (np.ndarray): The original image as a numpy array.
@@ -217,14 +200,14 @@ class Results(SimpleClass, DataExportMixin):
217
200
  cuda: Move all tensors in the Results object to GPU memory.
218
201
  to: Move all tensors to the specified device and dtype.
219
202
  new: Create a new Results object with the same image, path, names, and speed attributes.
220
- plot: Plot detection results on an input RGB image.
203
+ plot: Plot detection results on an input BGR image.
221
204
  show: Display the image with annotated inference results.
222
205
  save: Save annotated inference results image to file.
223
206
  verbose: Return a log string for each task in the results.
224
207
  save_txt: Save detection results to a text file.
225
208
  save_crop: Save cropped detection images to specified directory.
226
209
  summary: Convert inference results to a summarized dictionary.
227
- to_df: Convert detection results to a Polars Dataframe.
210
+ to_df: Convert detection results to a Polars DataFrame.
228
211
  to_json: Convert detection results to JSON format.
229
212
  to_csv: Convert detection results to a CSV format.
230
213
 
@@ -249,8 +232,7 @@ class Results(SimpleClass, DataExportMixin):
249
232
  obb: torch.Tensor | None = None,
250
233
  speed: dict[str, float] | None = None,
251
234
  ) -> None:
252
- """
253
- Initialize the Results class for storing and manipulating inference results.
235
+ """Initialize the Results class for storing and manipulating inference results.
254
236
 
255
237
  Args:
256
238
  orig_img (np.ndarray): The original image as a numpy array.
@@ -263,12 +245,6 @@ class Results(SimpleClass, DataExportMixin):
263
245
  obb (torch.Tensor | None): A 2D tensor of oriented bounding box coordinates for each detection.
264
246
  speed (dict | None): A dictionary containing preprocess, inference, and postprocess speeds (ms/image).
265
247
 
266
- Examples:
267
- >>> results = model("path/to/image.jpg")
268
- >>> result = results[0] # Get the first result
269
- >>> boxes = result.boxes # Get the boxes for the first result
270
- >>> masks = result.masks # Get the masks for the first result
271
-
272
248
  Notes:
273
249
  For the default pose model, keypoint indices for human body pose estimation are:
274
250
  0: Nose, 1: Left Eye, 2: Right Eye, 3: Left Ear, 4: Right Ear
@@ -290,8 +266,7 @@ class Results(SimpleClass, DataExportMixin):
290
266
  self._keys = "boxes", "masks", "probs", "keypoints", "obb"
291
267
 
292
268
  def __getitem__(self, idx):
293
- """
294
- Return a Results object for a specific index of inference results.
269
+ """Return a Results object for a specific index of inference results.
295
270
 
296
271
  Args:
297
272
  idx (int | slice): Index or slice to retrieve from the Results object.
@@ -307,12 +282,11 @@ class Results(SimpleClass, DataExportMixin):
307
282
  return self._apply("__getitem__", idx)
308
283
 
309
284
  def __len__(self) -> int:
310
- """
311
- Return the number of detections in the Results object.
285
+ """Return the number of detections in the Results object.
312
286
 
313
287
  Returns:
314
- (int): The number of detections, determined by the length of the first non-empty
315
- attribute in (masks, probs, keypoints, or obb).
288
+ (int): The number of detections, determined by the length of the first non-empty attribute in (masks, probs,
289
+ keypoints, or obb).
316
290
 
317
291
  Examples:
318
292
  >>> results = Results(orig_img, path, names, boxes=torch.rand(5, 4))
@@ -332,15 +306,14 @@ class Results(SimpleClass, DataExportMixin):
332
306
  obb: torch.Tensor | None = None,
333
307
  keypoints: torch.Tensor | None = None,
334
308
  ):
335
- """
336
- Update the Results object with new detection data.
309
+ """Update the Results object with new detection data.
337
310
 
338
- This method allows updating the boxes, masks, probabilities, and oriented bounding boxes (OBB) of the
339
- Results object. It ensures that boxes are clipped to the original image shape.
311
+ This method allows updating the boxes, masks, probabilities, and oriented bounding boxes (OBB) of the Results
312
+ object. It ensures that boxes are clipped to the original image shape.
340
313
 
341
314
  Args:
342
- boxes (torch.Tensor | None): A tensor of shape (N, 6) containing bounding box coordinates and
343
- confidence scores. The format is (x1, y1, x2, y2, conf, class).
315
+ boxes (torch.Tensor | None): A tensor of shape (N, 6) containing bounding box coordinates and confidence
316
+ scores. The format is (x1, y1, x2, y2, conf, class).
344
317
  masks (torch.Tensor | None): A tensor of shape (N, H, W) containing segmentation masks.
345
318
  probs (torch.Tensor | None): A tensor of shape (num_classes,) containing class probabilities.
346
319
  obb (torch.Tensor | None): A tensor of shape (N, 5) containing oriented bounding box coordinates.
@@ -363,8 +336,7 @@ class Results(SimpleClass, DataExportMixin):
363
336
  self.keypoints = Keypoints(keypoints, self.orig_shape)
364
337
 
365
338
  def _apply(self, fn: str, *args, **kwargs):
366
- """
367
- Apply a function to all non-empty attributes and return a new Results object with modified attributes.
339
+ """Apply a function to all non-empty attributes and return a new Results object with modified attributes.
368
340
 
369
341
  This method is internally called by methods like .to(), .cuda(), .cpu(), etc.
370
342
 
@@ -390,8 +362,7 @@ class Results(SimpleClass, DataExportMixin):
390
362
  return r
391
363
 
392
364
  def cpu(self):
393
- """
394
- Return a copy of the Results object with all its tensors moved to CPU memory.
365
+ """Return a copy of the Results object with all its tensors moved to CPU memory.
395
366
 
396
367
  This method creates a new Results object with all tensor attributes (boxes, masks, probs, keypoints, obb)
397
368
  transferred to CPU memory. It's useful for moving data from GPU to CPU for further processing or saving.
@@ -407,8 +378,7 @@ class Results(SimpleClass, DataExportMixin):
407
378
  return self._apply("cpu")
408
379
 
409
380
  def numpy(self):
410
- """
411
- Convert all tensors in the Results object to numpy arrays.
381
+ """Convert all tensors in the Results object to numpy arrays.
412
382
 
413
383
  Returns:
414
384
  (Results): A new Results object with all tensors converted to numpy arrays.
@@ -426,8 +396,7 @@ class Results(SimpleClass, DataExportMixin):
426
396
  return self._apply("numpy")
427
397
 
428
398
  def cuda(self):
429
- """
430
- Move all tensors in the Results object to GPU memory.
399
+ """Move all tensors in the Results object to GPU memory.
431
400
 
432
401
  Returns:
433
402
  (Results): A new Results object with all tensors moved to CUDA device.
@@ -441,8 +410,7 @@ class Results(SimpleClass, DataExportMixin):
441
410
  return self._apply("cuda")
442
411
 
443
412
  def to(self, *args, **kwargs):
444
- """
445
- Move all tensors in the Results object to the specified device and dtype.
413
+ """Move all tensors in the Results object to the specified device and dtype.
446
414
 
447
415
  Args:
448
416
  *args (Any): Variable length argument list to be passed to torch.Tensor.to().
@@ -460,8 +428,7 @@ class Results(SimpleClass, DataExportMixin):
460
428
  return self._apply("to", *args, **kwargs)
461
429
 
462
430
  def new(self):
463
- """
464
- Create a new Results object with the same image, path, names, and speed attributes.
431
+ """Create a new Results object with the same image, path, names, and speed attributes.
465
432
 
466
433
  Returns:
467
434
  (Results): A new Results object with copied attributes from the original instance.
@@ -493,8 +460,7 @@ class Results(SimpleClass, DataExportMixin):
493
460
  color_mode: str = "class",
494
461
  txt_color: tuple[int, int, int] = (255, 255, 255),
495
462
  ) -> np.ndarray:
496
- """
497
- Plot detection results on an input RGB image.
463
+ """Plot detection results on an input BGR image.
498
464
 
499
465
  Args:
500
466
  conf (bool): Whether to plot detection confidence scores.
@@ -514,10 +480,10 @@ class Results(SimpleClass, DataExportMixin):
514
480
  save (bool): Whether to save the annotated image.
515
481
  filename (str | None): Filename to save image if save is True.
516
482
  color_mode (str): Specify the color mode, e.g., 'instance' or 'class'.
517
- txt_color (tuple[int, int, int]): Specify the RGB text color for classification task.
483
+ txt_color (tuple[int, int, int]): Text color in BGR format for classification output.
518
484
 
519
485
  Returns:
520
- (np.ndarray): Annotated image as a numpy array.
486
+ (np.ndarray | PIL.Image.Image): Annotated image as a NumPy array (BGR) or PIL image (RGB) if `pil=True`.
521
487
 
522
488
  Examples:
523
489
  >>> results = model("image.jpg")
@@ -527,7 +493,7 @@ class Results(SimpleClass, DataExportMixin):
527
493
  """
528
494
  assert color_mode in {"instance", "class"}, f"Expected color_mode='instance' or 'class', not {color_mode}."
529
495
  if img is None and isinstance(self.orig_img, torch.Tensor):
530
- img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).to(torch.uint8).cpu().numpy()
496
+ img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).byte().cpu().numpy()
531
497
 
532
498
  names = self.names
533
499
  is_obb = self.obb is not None
@@ -610,11 +576,10 @@ class Results(SimpleClass, DataExportMixin):
610
576
  if save:
611
577
  annotator.save(filename or f"results_{Path(self.path).name}")
612
578
 
613
- return annotator.im if pil else annotator.result()
579
+ return annotator.result(pil)
614
580
 
615
581
  def show(self, *args, **kwargs):
616
- """
617
- Display the image with annotated inference results.
582
+ """Display the image with annotated inference results.
618
583
 
619
584
  This method plots the detection results on the original image and displays it. It's a convenient way to
620
585
  visualize the model's predictions directly.
@@ -632,15 +597,14 @@ class Results(SimpleClass, DataExportMixin):
632
597
  self.plot(show=True, *args, **kwargs)
633
598
 
634
599
  def save(self, filename: str | None = None, *args, **kwargs) -> str:
635
- """
636
- Save annotated inference results image to file.
600
+ """Save annotated inference results image to file.
637
601
 
638
602
  This method plots the detection results on the original image and saves the annotated image to a file. It
639
603
  utilizes the `plot` method to generate the annotated image and then saves it to the specified filename.
640
604
 
641
605
  Args:
642
- filename (str | Path | None): The filename to save the annotated image. If None, a default filename
643
- is generated based on the original image path.
606
+ filename (str | Path | None): The filename to save the annotated image. If None, a default filename is
607
+ generated based on the original image path.
644
608
  *args (Any): Variable length argument list to be passed to the `plot` method.
645
609
  **kwargs (Any): Arbitrary keyword arguments to be passed to the `plot` method.
646
610
 
@@ -661,15 +625,14 @@ class Results(SimpleClass, DataExportMixin):
661
625
  return filename
662
626
 
663
627
  def verbose(self) -> str:
664
- """
665
- Return a log string for each task in the results, detailing detection and classification outcomes.
628
+ """Return a log string for each task in the results, detailing detection and classification outcomes.
666
629
 
667
630
  This method generates a human-readable string summarizing the detection and classification results. It includes
668
631
  the number of detections for each class and the top probabilities for classification tasks.
669
632
 
670
633
  Returns:
671
- (str): A formatted string containing a summary of the results. For detection tasks, it includes the
672
- number of detections per class. For classification tasks, it includes the top 5 class probabilities.
634
+ (str): A formatted string containing a summary of the results. For detection tasks, it includes the number
635
+ of detections per class. For classification tasks, it includes the top 5 class probabilities.
673
636
 
674
637
  Examples:
675
638
  >>> results = model("path/to/image.jpg")
@@ -693,8 +656,7 @@ class Results(SimpleClass, DataExportMixin):
693
656
  return "".join(f"{n} {self.names[i]}{'s' * (n > 1)}, " for i, n in enumerate(counts) if n > 0)
694
657
 
695
658
  def save_txt(self, txt_file: str | Path, save_conf: bool = False) -> str:
696
- """
697
- Save detection results to a text file.
659
+ """Save detection results to a text file.
698
660
 
699
661
  Args:
700
662
  txt_file (str | Path): Path to the output text file.
@@ -750,8 +712,7 @@ class Results(SimpleClass, DataExportMixin):
750
712
  return str(txt_file)
751
713
 
752
714
  def save_crop(self, save_dir: str | Path, file_name: str | Path = Path("im.jpg")):
753
- """
754
- Save cropped detection images to specified directory.
715
+ """Save cropped detection images to specified directory.
755
716
 
756
717
  This method saves cropped images of detected objects to a specified directory. Each crop is saved in a
757
718
  subdirectory named after the object's class, with the filename based on the input file_name.
@@ -760,22 +721,22 @@ class Results(SimpleClass, DataExportMixin):
760
721
  save_dir (str | Path): Directory path where cropped images will be saved.
761
722
  file_name (str | Path): Base filename for the saved cropped images.
762
723
 
724
+ Examples:
725
+ >>> results = model("path/to/image.jpg")
726
+ >>> for result in results:
727
+ >>> result.save_crop(save_dir="path/to/crops", file_name="detection")
728
+
763
729
  Notes:
764
730
  - This method does not support Classify or Oriented Bounding Box (OBB) tasks.
765
731
  - Crops are saved as 'save_dir/class_name/file_name.jpg'.
766
732
  - The method will create necessary subdirectories if they don't exist.
767
733
  - Original image is copied before cropping to avoid modifying the original.
768
-
769
- Examples:
770
- >>> results = model("path/to/image.jpg")
771
- >>> for result in results:
772
- >>> result.save_crop(save_dir="path/to/crops", file_name="detection")
773
734
  """
774
735
  if self.probs is not None:
775
- LOGGER.warning("Classify task do not support `save_crop`.")
736
+ LOGGER.warning("Classify task does not support `save_crop`.")
776
737
  return
777
738
  if self.obb is not None:
778
- LOGGER.warning("OBB task do not support `save_crop`.")
739
+ LOGGER.warning("OBB task does not support `save_crop`.")
779
740
  return
780
741
  for d in self.boxes:
781
742
  save_one_box(
@@ -786,11 +747,10 @@ class Results(SimpleClass, DataExportMixin):
786
747
  )
787
748
 
788
749
  def summary(self, normalize: bool = False, decimals: int = 5) -> list[dict[str, Any]]:
789
- """
790
- Convert inference results to a summarized dictionary with optional normalization for box coordinates.
750
+ """Convert inference results to a summarized dictionary with optional normalization for box coordinates.
791
751
 
792
- This method creates a list of detection dictionaries, each containing information about a single
793
- detection or classification result. For classification tasks, it returns the top class and its
752
+ This method creates a list of detection dictionaries, each containing information about a single detection or
753
+ classification result. For classification tasks, it returns the top class and its
794
754
  confidence. For detection tasks, it includes class information, bounding box coordinates, and
795
755
  optionally mask segments and keypoints.
796
756
 
@@ -799,8 +759,8 @@ class Results(SimpleClass, DataExportMixin):
799
759
  decimals (int): Number of decimal places to round the output values to.
800
760
 
801
761
  Returns:
802
- (list[dict[str, Any]]): A list of dictionaries, each containing summarized information for a single detection
803
- or classification result. The structure of each dictionary varies based on the task type
762
+ (list[dict[str, Any]]): A list of dictionaries, each containing summarized information for a single
763
+ detection or classification result. The structure of each dictionary varies based on the task type
804
764
  (classification or detection) and available information (boxes, masks, keypoints).
805
765
 
806
766
  Examples:
@@ -853,12 +813,11 @@ class Results(SimpleClass, DataExportMixin):
853
813
 
854
814
 
855
815
  class Boxes(BaseTensor):
856
- """
857
- A class for managing and manipulating detection boxes.
816
+ """A class for managing and manipulating detection boxes.
858
817
 
859
818
  This class provides comprehensive functionality for handling detection boxes, including their coordinates,
860
- confidence scores, class labels, and optional tracking IDs. It supports various box formats and offers
861
- methods for easy manipulation and conversion between different coordinate systems.
819
+ confidence scores, class labels, and optional tracking IDs. It supports various box formats and offers methods for
820
+ easy manipulation and conversion between different coordinate systems.
862
821
 
863
822
  Attributes:
864
823
  data (torch.Tensor | np.ndarray): The raw tensor containing detection boxes and associated data.
@@ -890,31 +849,16 @@ class Boxes(BaseTensor):
890
849
  """
891
850
 
892
851
  def __init__(self, boxes: torch.Tensor | np.ndarray, orig_shape: tuple[int, int]) -> None:
893
- """
894
- Initialize the Boxes class with detection box data and the original image shape.
852
+ """Initialize the Boxes class with detection box data and the original image shape.
895
853
 
896
- This class manages detection boxes, providing easy access and manipulation of box coordinates,
897
- confidence scores, class identifiers, and optional tracking IDs. It supports multiple formats
898
- for box coordinates, including both absolute and normalized forms.
854
+ This class manages detection boxes, providing easy access and manipulation of box coordinates, confidence
855
+ scores, class identifiers, and optional tracking IDs. It supports multiple formats for box coordinates,
856
+ including both absolute and normalized forms.
899
857
 
900
858
  Args:
901
- boxes (torch.Tensor | np.ndarray): A tensor or numpy array with detection boxes of shape
902
- (num_boxes, 6) or (num_boxes, 7). Columns should contain
903
- [x1, y1, x2, y2, (optional) track_id, confidence, class].
859
+ boxes (torch.Tensor | np.ndarray): A tensor or numpy array with detection boxes of shape (num_boxes, 6) or
860
+ (num_boxes, 7). Columns should contain [x1, y1, x2, y2, (optional) track_id, confidence, class].
904
861
  orig_shape (tuple[int, int]): The original image shape as (height, width). Used for normalization.
905
-
906
- Attributes:
907
- data (torch.Tensor): The raw tensor containing detection boxes and their associated data.
908
- orig_shape (tuple[int, int]): The original image size, used for normalization.
909
- is_track (bool): Indicates whether tracking IDs are included in the box data.
910
-
911
- Examples:
912
- >>> import torch
913
- >>> boxes = torch.tensor([[100, 50, 150, 100, 0.9, 0]])
914
- >>> orig_shape = (480, 640)
915
- >>> detection_boxes = Boxes(boxes, orig_shape)
916
- >>> print(detection_boxes.xyxy)
917
- tensor([[100., 50., 150., 100.]])
918
862
  """
919
863
  if boxes.ndim == 1:
920
864
  boxes = boxes[None, :]
@@ -926,12 +870,11 @@ class Boxes(BaseTensor):
926
870
 
927
871
  @property
928
872
  def xyxy(self) -> torch.Tensor | np.ndarray:
929
- """
930
- Return bounding boxes in [x1, y1, x2, y2] format.
873
+ """Return bounding boxes in [x1, y1, x2, y2] format.
931
874
 
932
875
  Returns:
933
- (torch.Tensor | np.ndarray): A tensor or numpy array of shape (n, 4) containing bounding box
934
- coordinates in [x1, y1, x2, y2] format, where n is the number of boxes.
876
+ (torch.Tensor | np.ndarray): A tensor or numpy array of shape (n, 4) containing bounding box coordinates in
877
+ [x1, y1, x2, y2] format, where n is the number of boxes.
935
878
 
936
879
  Examples:
937
880
  >>> results = model("image.jpg")
@@ -943,12 +886,11 @@ class Boxes(BaseTensor):
943
886
 
944
887
  @property
945
888
  def conf(self) -> torch.Tensor | np.ndarray:
946
- """
947
- Return the confidence scores for each detection box.
889
+ """Return the confidence scores for each detection box.
948
890
 
949
891
  Returns:
950
- (torch.Tensor | np.ndarray): A 1D tensor or array containing confidence scores for each detection,
951
- with shape (N,) where N is the number of detections.
892
+ (torch.Tensor | np.ndarray): A 1D tensor or array containing confidence scores for each detection, with
893
+ shape (N,) where N is the number of detections.
952
894
 
953
895
  Examples:
954
896
  >>> boxes = Boxes(torch.tensor([[10, 20, 30, 40, 0.9, 0]]), orig_shape=(100, 100))
@@ -960,12 +902,11 @@ class Boxes(BaseTensor):
960
902
 
961
903
  @property
962
904
  def cls(self) -> torch.Tensor | np.ndarray:
963
- """
964
- Return the class ID tensor representing category predictions for each bounding box.
905
+ """Return the class ID tensor representing category predictions for each bounding box.
965
906
 
966
907
  Returns:
967
- (torch.Tensor | np.ndarray): A tensor or numpy array containing the class IDs for each detection box.
968
- The shape is (N,), where N is the number of boxes.
908
+ (torch.Tensor | np.ndarray): A tensor or numpy array containing the class IDs for each detection box. The
909
+ shape is (N,), where N is the number of boxes.
969
910
 
970
911
  Examples:
971
912
  >>> results = model("image.jpg")
@@ -977,12 +918,11 @@ class Boxes(BaseTensor):
977
918
 
978
919
  @property
979
920
  def id(self) -> torch.Tensor | np.ndarray | None:
980
- """
981
- Return the tracking IDs for each detection box if available.
921
+ """Return the tracking IDs for each detection box if available.
982
922
 
983
923
  Returns:
984
- (torch.Tensor | None): A tensor containing tracking IDs for each box if tracking is enabled,
985
- otherwise None. Shape is (N,) where N is the number of boxes.
924
+ (torch.Tensor | None): A tensor containing tracking IDs for each box if tracking is enabled, otherwise None.
925
+ Shape is (N,) where N is the number of boxes.
986
926
 
987
927
  Examples:
988
928
  >>> results = model.track("path/to/video.mp4")
@@ -1003,36 +943,33 @@ class Boxes(BaseTensor):
1003
943
  @property
1004
944
  @lru_cache(maxsize=2)
1005
945
  def xywh(self) -> torch.Tensor | np.ndarray:
1006
- """
1007
- Convert bounding boxes from [x1, y1, x2, y2] format to [x, y, width, height] format.
946
+ """Convert bounding boxes from [x1, y1, x2, y2] format to [x, y, width, height] format.
1008
947
 
1009
948
  Returns:
1010
- (torch.Tensor | np.ndarray): Boxes in [x_center, y_center, width, height] format, where x_center,
1011
- y_center are the coordinates of the center point of the bounding box, width, height are the
1012
- dimensions of the bounding box and the shape of the returned tensor is (N, 4), where N is the
1013
- number of boxes.
949
+ (torch.Tensor | np.ndarray): Boxes in [x_center, y_center, width, height] format, where x_center, y_center
950
+ are the coordinates of the center point of the bounding box, width, height are the dimensions of the
951
+ bounding box and the shape of the returned tensor is (N, 4), where N is the number of boxes.
1014
952
 
1015
953
  Examples:
1016
954
  >>> boxes = Boxes(torch.tensor([[100, 50, 150, 100], [200, 150, 300, 250]]), orig_shape=(480, 640))
1017
955
  >>> xywh = boxes.xywh
1018
956
  >>> print(xywh)
1019
- tensor([[100.0000, 50.0000, 50.0000, 50.0000],
1020
- [200.0000, 150.0000, 100.0000, 100.0000]])
957
+ tensor([[125.0000, 75.0000, 50.0000, 50.0000],
958
+ [250.0000, 200.0000, 100.0000, 100.0000]])
1021
959
  """
1022
960
  return ops.xyxy2xywh(self.xyxy)
1023
961
 
1024
962
  @property
1025
963
  @lru_cache(maxsize=2)
1026
964
  def xyxyn(self) -> torch.Tensor | np.ndarray:
1027
- """
1028
- Return normalized bounding box coordinates relative to the original image size.
965
+ """Return normalized bounding box coordinates relative to the original image size.
1029
966
 
1030
- This property calculates and returns the bounding box coordinates in [x1, y1, x2, y2] format,
1031
- normalized to the range [0, 1] based on the original image dimensions.
967
+ This property calculates and returns the bounding box coordinates in [x1, y1, x2, y2] format, normalized to the
968
+ range [0, 1] based on the original image dimensions.
1032
969
 
1033
970
  Returns:
1034
- (torch.Tensor | np.ndarray): Normalized bounding box coordinates with shape (N, 4), where N is
1035
- the number of boxes. Each row contains [x1, y1, x2, y2] values normalized to [0, 1].
971
+ (torch.Tensor | np.ndarray): Normalized bounding box coordinates with shape (N, 4), where N is the number of
972
+ boxes. Each row contains [x1, y1, x2, y2] values normalized to [0, 1].
1036
973
 
1037
974
  Examples:
1038
975
  >>> boxes = Boxes(torch.tensor([[100, 50, 300, 400, 0.9, 0]]), orig_shape=(480, 640))
@@ -1048,16 +985,15 @@ class Boxes(BaseTensor):
1048
985
  @property
1049
986
  @lru_cache(maxsize=2)
1050
987
  def xywhn(self) -> torch.Tensor | np.ndarray:
1051
- """
1052
- Return normalized bounding boxes in [x, y, width, height] format.
988
+ """Return normalized bounding boxes in [x, y, width, height] format.
1053
989
 
1054
- This property calculates and returns the normalized bounding box coordinates in the format
1055
- [x_center, y_center, width, height], where all values are relative to the original image dimensions.
990
+ This property calculates and returns the normalized bounding box coordinates in the format [x_center, y_center,
991
+ width, height], where all values are relative to the original image dimensions.
1056
992
 
1057
993
  Returns:
1058
- (torch.Tensor | np.ndarray): Normalized bounding boxes with shape (N, 4), where N is the
1059
- number of boxes. Each row contains [x_center, y_center, width, height] values normalized
1060
- to [0, 1] based on the original image dimensions.
994
+ (torch.Tensor | np.ndarray): Normalized bounding boxes with shape (N, 4), where N is the number of boxes.
995
+ Each row contains [x_center, y_center, width, height] values normalized to [0, 1] based on the original
996
+ image dimensions.
1061
997
 
1062
998
  Examples:
1063
999
  >>> boxes = Boxes(torch.tensor([[100, 50, 150, 100, 0.9, 0]]), orig_shape=(480, 640))
@@ -1072,11 +1008,10 @@ class Boxes(BaseTensor):
1072
1008
 
1073
1009
 
1074
1010
  class Masks(BaseTensor):
1075
- """
1076
- A class for storing and manipulating detection masks.
1011
+ """A class for storing and manipulating detection masks.
1077
1012
 
1078
- This class extends BaseTensor and provides functionality for handling segmentation masks,
1079
- including methods for converting between pixel and normalized coordinates.
1013
+ This class extends BaseTensor and provides functionality for handling segmentation masks, including methods for
1014
+ converting between pixel and normalized coordinates.
1080
1015
 
1081
1016
  Attributes:
1082
1017
  data (torch.Tensor | np.ndarray): The raw tensor or array containing mask data.
@@ -1099,19 +1034,11 @@ class Masks(BaseTensor):
1099
1034
  """
1100
1035
 
1101
1036
  def __init__(self, masks: torch.Tensor | np.ndarray, orig_shape: tuple[int, int]) -> None:
1102
- """
1103
- Initialize the Masks class with detection mask data and the original image shape.
1037
+ """Initialize the Masks class with detection mask data and the original image shape.
1104
1038
 
1105
1039
  Args:
1106
1040
  masks (torch.Tensor | np.ndarray): Detection masks with shape (num_masks, height, width).
1107
1041
  orig_shape (tuple): The original image shape as (height, width). Used for normalization.
1108
-
1109
- Examples:
1110
- >>> import torch
1111
- >>> from ultralytics.engine.results import Masks
1112
- >>> masks = torch.rand(10, 160, 160) # 10 masks of 160x160 resolution
1113
- >>> orig_shape = (720, 1280) # Original image shape
1114
- >>> mask_obj = Masks(masks, orig_shape)
1115
1042
  """
1116
1043
  if masks.ndim == 2:
1117
1044
  masks = masks[None, :]
@@ -1120,15 +1047,14 @@ class Masks(BaseTensor):
1120
1047
  @property
1121
1048
  @lru_cache(maxsize=1)
1122
1049
  def xyn(self) -> list[np.ndarray]:
1123
- """
1124
- Return normalized xy-coordinates of the segmentation masks.
1050
+ """Return normalized xy-coordinates of the segmentation masks.
1125
1051
 
1126
- This property calculates and caches the normalized xy-coordinates of the segmentation masks. The coordinates
1127
- are normalized relative to the original image shape.
1052
+ This property calculates and caches the normalized xy-coordinates of the segmentation masks. The coordinates are
1053
+ normalized relative to the original image shape.
1128
1054
 
1129
1055
  Returns:
1130
- (list[np.ndarray]): A list of numpy arrays, where each array contains the normalized xy-coordinates
1131
- of a single segmentation mask. Each array has shape (N, 2), where N is the number of points in the
1056
+ (list[np.ndarray]): A list of numpy arrays, where each array contains the normalized xy-coordinates of a
1057
+ single segmentation mask. Each array has shape (N, 2), where N is the number of points in the
1132
1058
  mask contour.
1133
1059
 
1134
1060
  Examples:
@@ -1145,16 +1071,14 @@ class Masks(BaseTensor):
1145
1071
  @property
1146
1072
  @lru_cache(maxsize=1)
1147
1073
  def xy(self) -> list[np.ndarray]:
1148
- """
1149
- Return the [x, y] pixel coordinates for each segment in the mask tensor.
1074
+ """Return the [x, y] pixel coordinates for each segment in the mask tensor.
1150
1075
 
1151
- This property calculates and returns a list of pixel coordinates for each segmentation mask in the
1152
- Masks object. The coordinates are scaled to match the original image dimensions.
1076
+ This property calculates and returns a list of pixel coordinates for each segmentation mask in the Masks object.
1077
+ The coordinates are scaled to match the original image dimensions.
1153
1078
 
1154
1079
  Returns:
1155
- (list[np.ndarray]): A list of numpy arrays, where each array contains the [x, y] pixel
1156
- coordinates for a single segmentation mask. Each array has shape (N, 2), where N is the
1157
- number of points in the segment.
1080
+ (list[np.ndarray]): A list of numpy arrays, where each array contains the [x, y] pixel coordinates for a
1081
+ single segmentation mask. Each array has shape (N, 2), where N is the number of points in the segment.
1158
1082
 
1159
1083
  Examples:
1160
1084
  >>> results = model("image.jpg")
@@ -1170,12 +1094,10 @@ class Masks(BaseTensor):
1170
1094
 
1171
1095
 
1172
1096
  class Keypoints(BaseTensor):
1173
- """
1174
- A class for storing and manipulating detection keypoints.
1097
+ """A class for storing and manipulating detection keypoints.
1175
1098
 
1176
- This class encapsulates functionality for handling keypoint data, including coordinate manipulation,
1177
- normalization, and confidence values. It supports keypoint detection results with optional visibility
1178
- information.
1099
+ This class encapsulates functionality for handling keypoint data, including coordinate manipulation, normalization,
1100
+ and confidence values. It supports keypoint detection results with optional visibility information.
1179
1101
 
1180
1102
  Attributes:
1181
1103
  data (torch.Tensor): The raw tensor containing keypoint data.
@@ -1203,22 +1125,16 @@ class Keypoints(BaseTensor):
1203
1125
  """
1204
1126
 
1205
1127
  def __init__(self, keypoints: torch.Tensor | np.ndarray, orig_shape: tuple[int, int]) -> None:
1206
- """
1207
- Initialize the Keypoints object with detection keypoints and original image dimensions.
1128
+ """Initialize the Keypoints object with detection keypoints and original image dimensions.
1208
1129
 
1209
- This method processes the input keypoints tensor, handling both 2D and 3D formats. For 3D tensors
1210
- (x, y, confidence), it masks out low-confidence keypoints by setting their coordinates to zero.
1130
+ This method processes the input keypoints tensor, handling both 2D and 3D formats. For 3D tensors (x, y,
1131
+ confidence), it masks out low-confidence keypoints by setting their coordinates to zero.
1211
1132
 
1212
1133
  Args:
1213
1134
  keypoints (torch.Tensor): A tensor containing keypoint data. Shape can be either:
1214
1135
  - (num_objects, num_keypoints, 2) for x, y coordinates only
1215
1136
  - (num_objects, num_keypoints, 3) for x, y coordinates and confidence scores
1216
1137
  orig_shape (tuple[int, int]): The original image dimensions (height, width).
1217
-
1218
- Examples:
1219
- >>> kpts = torch.rand(1, 17, 3) # 1 object, 17 keypoints (COCO format), x,y,conf
1220
- >>> orig_shape = (720, 1280) # Original image height, width
1221
- >>> keypoints = Keypoints(kpts, orig_shape)
1222
1138
  """
1223
1139
  if keypoints.ndim == 2:
1224
1140
  keypoints = keypoints[None, :]
@@ -1228,12 +1144,11 @@ class Keypoints(BaseTensor):
1228
1144
  @property
1229
1145
  @lru_cache(maxsize=1)
1230
1146
  def xy(self) -> torch.Tensor | np.ndarray:
1231
- """
1232
- Return x, y coordinates of keypoints.
1147
+ """Return x, y coordinates of keypoints.
1233
1148
 
1234
1149
  Returns:
1235
- (torch.Tensor): A tensor containing the x, y coordinates of keypoints with shape (N, K, 2), where N is
1236
- the number of detections and K is the number of keypoints per detection.
1150
+ (torch.Tensor): A tensor containing the x, y coordinates of keypoints with shape (N, K, 2), where N is the
1151
+ number of detections and K is the number of keypoints per detection.
1237
1152
 
1238
1153
  Examples:
1239
1154
  >>> results = model("image.jpg")
@@ -1252,13 +1167,12 @@ class Keypoints(BaseTensor):
1252
1167
  @property
1253
1168
  @lru_cache(maxsize=1)
1254
1169
  def xyn(self) -> torch.Tensor | np.ndarray:
1255
- """
1256
- Return normalized coordinates (x, y) of keypoints relative to the original image size.
1170
+ """Return normalized coordinates (x, y) of keypoints relative to the original image size.
1257
1171
 
1258
1172
  Returns:
1259
1173
  (torch.Tensor | np.ndarray): A tensor or array of shape (N, K, 2) containing normalized keypoint
1260
- coordinates, where N is the number of instances, K is the number of keypoints, and the last
1261
- dimension contains [x, y] values in the range [0, 1].
1174
+ coordinates, where N is the number of instances, K is the number of keypoints, and the last dimension
1175
+ contains [x, y] values in the range [0, 1].
1262
1176
 
1263
1177
  Examples:
1264
1178
  >>> keypoints = Keypoints(torch.rand(1, 17, 2), orig_shape=(480, 640))
@@ -1274,13 +1188,11 @@ class Keypoints(BaseTensor):
1274
1188
  @property
1275
1189
  @lru_cache(maxsize=1)
1276
1190
  def conf(self) -> torch.Tensor | np.ndarray | None:
1277
- """
1278
- Return confidence values for each keypoint.
1191
+ """Return confidence values for each keypoint.
1279
1192
 
1280
1193
  Returns:
1281
- (torch.Tensor | None): A tensor containing confidence scores for each keypoint if available,
1282
- otherwise None. Shape is (num_detections, num_keypoints) for batched data or (num_keypoints,)
1283
- for single detection.
1194
+ (torch.Tensor | None): A tensor containing confidence scores for each keypoint if available, otherwise None.
1195
+ Shape is (num_detections, num_keypoints) for batched data or (num_keypoints,) for single detection.
1284
1196
 
1285
1197
  Examples:
1286
1198
  >>> keypoints = Keypoints(torch.rand(1, 17, 3), orig_shape=(640, 640)) # 1 detection, 17 keypoints
@@ -1291,11 +1203,10 @@ class Keypoints(BaseTensor):
1291
1203
 
1292
1204
 
1293
1205
  class Probs(BaseTensor):
1294
- """
1295
- A class for storing and manipulating classification probabilities.
1206
+ """A class for storing and manipulating classification probabilities.
1296
1207
 
1297
- This class extends BaseTensor and provides methods for accessing and manipulating
1298
- classification probabilities, including top-1 and top-5 predictions.
1208
+ This class extends BaseTensor and provides methods for accessing and manipulating classification probabilities,
1209
+ including top-1 and top-5 predictions.
1299
1210
 
1300
1211
  Attributes:
1301
1212
  data (torch.Tensor | np.ndarray): The raw tensor or array containing classification probabilities.
@@ -1325,42 +1236,22 @@ class Probs(BaseTensor):
1325
1236
  """
1326
1237
 
1327
1238
  def __init__(self, probs: torch.Tensor | np.ndarray, orig_shape: tuple[int, int] | None = None) -> None:
1328
- """
1329
- Initialize the Probs class with classification probabilities.
1239
+ """Initialize the Probs class with classification probabilities.
1330
1240
 
1331
1241
  This class stores and manages classification probabilities, providing easy access to top predictions and their
1332
1242
  confidences.
1333
1243
 
1334
1244
  Args:
1335
1245
  probs (torch.Tensor | np.ndarray): A 1D tensor or array of classification probabilities.
1336
- orig_shape (tuple | None): The original image shape as (height, width). Not used in this class but kept
1337
- for consistency with other result classes.
1338
-
1339
- Attributes:
1340
- data (torch.Tensor | np.ndarray): The raw tensor or array containing classification probabilities.
1341
- top1 (int): Index of the top 1 class.
1342
- top5 (list[int]): Indices of the top 5 classes.
1343
- top1conf (torch.Tensor | np.ndarray): Confidence of the top 1 class.
1344
- top5conf (torch.Tensor | np.ndarray): Confidences of the top 5 classes.
1345
-
1346
- Examples:
1347
- >>> import torch
1348
- >>> probs = torch.tensor([0.1, 0.3, 0.2, 0.4])
1349
- >>> p = Probs(probs)
1350
- >>> print(p.top1)
1351
- 3
1352
- >>> print(p.top1conf)
1353
- tensor(0.4000)
1354
- >>> print(p.top5)
1355
- [3, 1, 2, 0]
1246
+ orig_shape (tuple | None): The original image shape as (height, width). Not used in this class but kept for
1247
+ consistency with other result classes.
1356
1248
  """
1357
1249
  super().__init__(probs, orig_shape)
1358
1250
 
1359
1251
  @property
1360
1252
  @lru_cache(maxsize=1)
1361
1253
  def top1(self) -> int:
1362
- """
1363
- Return the index of the class with the highest probability.
1254
+ """Return the index of the class with the highest probability.
1364
1255
 
1365
1256
  Returns:
1366
1257
  (int): Index of the class with the highest probability.
@@ -1375,8 +1266,7 @@ class Probs(BaseTensor):
1375
1266
  @property
1376
1267
  @lru_cache(maxsize=1)
1377
1268
  def top5(self) -> list[int]:
1378
- """
1379
- Return the indices of the top 5 class probabilities.
1269
+ """Return the indices of the top 5 class probabilities.
1380
1270
 
1381
1271
  Returns:
1382
1272
  (list[int]): A list containing the indices of the top 5 class probabilities, sorted in descending order.
@@ -1391,8 +1281,7 @@ class Probs(BaseTensor):
1391
1281
  @property
1392
1282
  @lru_cache(maxsize=1)
1393
1283
  def top1conf(self) -> torch.Tensor | np.ndarray:
1394
- """
1395
- Return the confidence score of the highest probability class.
1284
+ """Return the confidence score of the highest probability class.
1396
1285
 
1397
1286
  This property retrieves the confidence score (probability) of the class with the highest predicted probability
1398
1287
  from the classification results.
@@ -1411,16 +1300,15 @@ class Probs(BaseTensor):
1411
1300
  @property
1412
1301
  @lru_cache(maxsize=1)
1413
1302
  def top5conf(self) -> torch.Tensor | np.ndarray:
1414
- """
1415
- Return confidence scores for the top 5 classification predictions.
1303
+ """Return confidence scores for the top 5 classification predictions.
1416
1304
 
1417
- This property retrieves the confidence scores corresponding to the top 5 class probabilities
1418
- predicted by the model. It provides a quick way to access the most likely class predictions
1419
- along with their associated confidence levels.
1305
+ This property retrieves the confidence scores corresponding to the top 5 class probabilities predicted by the
1306
+ model. It provides a quick way to access the most likely class predictions along with their associated
1307
+ confidence levels.
1420
1308
 
1421
1309
  Returns:
1422
- (torch.Tensor | np.ndarray): A tensor or array containing the confidence scores for the
1423
- top 5 predicted classes, sorted in descending order of probability.
1310
+ (torch.Tensor | np.ndarray): A tensor or array containing the confidence scores for the top 5 predicted
1311
+ classes, sorted in descending order of probability.
1424
1312
 
1425
1313
  Examples:
1426
1314
  >>> results = model("image.jpg")
@@ -1432,12 +1320,10 @@ class Probs(BaseTensor):
1432
1320
 
1433
1321
 
1434
1322
  class OBB(BaseTensor):
1435
- """
1436
- A class for storing and manipulating Oriented Bounding Boxes (OBB).
1323
+ """A class for storing and manipulating Oriented Bounding Boxes (OBB).
1437
1324
 
1438
- This class provides functionality to handle oriented bounding boxes, including conversion between
1439
- different formats, normalization, and access to various properties of the boxes. It supports
1440
- both tracking and non-tracking scenarios.
1325
+ This class provides functionality to handle oriented bounding boxes, including conversion between different formats,
1326
+ normalization, and access to various properties of the boxes. It supports both tracking and non-tracking scenarios.
1441
1327
 
1442
1328
  Attributes:
1443
1329
  data (torch.Tensor): The raw OBB tensor containing box coordinates and associated data.
@@ -1466,32 +1352,19 @@ class OBB(BaseTensor):
1466
1352
  """
1467
1353
 
1468
1354
  def __init__(self, boxes: torch.Tensor | np.ndarray, orig_shape: tuple[int, int]) -> None:
1469
- """
1470
- Initialize an OBB (Oriented Bounding Box) instance with oriented bounding box data and original image shape.
1355
+ """Initialize an OBB (Oriented Bounding Box) instance with oriented bounding box data and original image shape.
1471
1356
 
1472
- This class stores and manipulates Oriented Bounding Boxes (OBB) for object detection tasks. It provides
1473
- various properties and methods to access and transform the OBB data.
1357
+ This class stores and manipulates Oriented Bounding Boxes (OBB) for object detection tasks. It provides various
1358
+ properties and methods to access and transform the OBB data.
1474
1359
 
1475
1360
  Args:
1476
- boxes (torch.Tensor | np.ndarray): A tensor or numpy array containing the detection boxes,
1477
- with shape (num_boxes, 7) or (num_boxes, 8). The last two columns contain confidence and class values.
1478
- If present, the third last column contains track IDs, and the fifth column contains rotation.
1361
+ boxes (torch.Tensor | np.ndarray): A tensor or numpy array containing the detection boxes, with shape
1362
+ (num_boxes, 7) or (num_boxes, 8). The last two columns contain confidence and class values. If present,
1363
+ the third last column contains track IDs, and the fifth column contains rotation.
1479
1364
  orig_shape (tuple[int, int]): Original image size, in the format (height, width).
1480
1365
 
1481
- Attributes:
1482
- data (torch.Tensor | np.ndarray): The raw OBB tensor.
1483
- orig_shape (tuple[int, int]): The original image shape.
1484
- is_track (bool): Whether the boxes include tracking IDs.
1485
-
1486
1366
  Raises:
1487
1367
  AssertionError: If the number of values per box is not 7 or 8.
1488
-
1489
- Examples:
1490
- >>> import torch
1491
- >>> boxes = torch.rand(3, 7) # 3 boxes with 7 values each
1492
- >>> orig_shape = (640, 480)
1493
- >>> obb = OBB(boxes, orig_shape)
1494
- >>> print(obb.xywhr) # Access the boxes in xywhr format
1495
1368
  """
1496
1369
  if boxes.ndim == 1:
1497
1370
  boxes = boxes[None, :]
@@ -1503,8 +1376,7 @@ class OBB(BaseTensor):
1503
1376
 
1504
1377
  @property
1505
1378
  def xywhr(self) -> torch.Tensor | np.ndarray:
1506
- """
1507
- Return boxes in [x_center, y_center, width, height, rotation] format.
1379
+ """Return boxes in [x_center, y_center, width, height, rotation] format.
1508
1380
 
1509
1381
  Returns:
1510
1382
  (torch.Tensor | np.ndarray): A tensor or numpy array containing the oriented bounding boxes with format
@@ -1521,15 +1393,14 @@ class OBB(BaseTensor):
1521
1393
 
1522
1394
  @property
1523
1395
  def conf(self) -> torch.Tensor | np.ndarray:
1524
- """
1525
- Return the confidence scores for Oriented Bounding Boxes (OBBs).
1396
+ """Return the confidence scores for Oriented Bounding Boxes (OBBs).
1526
1397
 
1527
1398
  This property retrieves the confidence values associated with each OBB detection. The confidence score
1528
1399
  represents the model's certainty in the detection.
1529
1400
 
1530
1401
  Returns:
1531
- (torch.Tensor | np.ndarray): A tensor or numpy array of shape (N,) containing confidence scores
1532
- for N detections, where each score is in the range [0, 1].
1402
+ (torch.Tensor | np.ndarray): A tensor or numpy array of shape (N,) containing confidence scores for N
1403
+ detections, where each score is in the range [0, 1].
1533
1404
 
1534
1405
  Examples:
1535
1406
  >>> results = model("image.jpg")
@@ -1541,12 +1412,11 @@ class OBB(BaseTensor):
1541
1412
 
1542
1413
  @property
1543
1414
  def cls(self) -> torch.Tensor | np.ndarray:
1544
- """
1545
- Return the class values of the oriented bounding boxes.
1415
+ """Return the class values of the oriented bounding boxes.
1546
1416
 
1547
1417
  Returns:
1548
- (torch.Tensor | np.ndarray): A tensor or numpy array containing the class values for each oriented
1549
- bounding box. The shape is (N,), where N is the number of boxes.
1418
+ (torch.Tensor | np.ndarray): A tensor or numpy array containing the class values for each oriented bounding
1419
+ box. The shape is (N,), where N is the number of boxes.
1550
1420
 
1551
1421
  Examples:
1552
1422
  >>> results = model("image.jpg")
@@ -1559,12 +1429,11 @@ class OBB(BaseTensor):
1559
1429
 
1560
1430
  @property
1561
1431
  def id(self) -> torch.Tensor | np.ndarray | None:
1562
- """
1563
- Return the tracking IDs of the oriented bounding boxes (if available).
1432
+ """Return the tracking IDs of the oriented bounding boxes (if available).
1564
1433
 
1565
1434
  Returns:
1566
- (torch.Tensor | np.ndarray | None): A tensor or numpy array containing the tracking IDs for each
1567
- oriented bounding box. Returns None if tracking IDs are not available.
1435
+ (torch.Tensor | np.ndarray | None): A tensor or numpy array containing the tracking IDs for each oriented
1436
+ bounding box. Returns None if tracking IDs are not available.
1568
1437
 
1569
1438
  Examples:
1570
1439
  >>> results = model("image.jpg", tracker=True) # Run inference with tracking
@@ -1579,12 +1448,11 @@ class OBB(BaseTensor):
1579
1448
  @property
1580
1449
  @lru_cache(maxsize=2)
1581
1450
  def xyxyxyxy(self) -> torch.Tensor | np.ndarray:
1582
- """
1583
- Convert OBB format to 8-point (xyxyxyxy) coordinate format for rotated bounding boxes.
1451
+ """Convert OBB format to 8-point (xyxyxyxy) coordinate format for rotated bounding boxes.
1584
1452
 
1585
1453
  Returns:
1586
- (torch.Tensor | np.ndarray): Rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2), where N is
1587
- the number of boxes. Each box is represented by 4 points (x, y), starting from the top-left corner and
1454
+ (torch.Tensor | np.ndarray): Rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2), where N is the
1455
+ number of boxes. Each box is represented by 4 points (x, y), starting from the top-left corner and
1588
1456
  moving clockwise.
1589
1457
 
1590
1458
  Examples:
@@ -1598,13 +1466,12 @@ class OBB(BaseTensor):
1598
1466
  @property
1599
1467
  @lru_cache(maxsize=2)
1600
1468
  def xyxyxyxyn(self) -> torch.Tensor | np.ndarray:
1601
- """
1602
- Convert rotated bounding boxes to normalized xyxyxyxy format.
1469
+ """Convert rotated bounding boxes to normalized xyxyxyxy format.
1603
1470
 
1604
1471
  Returns:
1605
1472
  (torch.Tensor | np.ndarray): Normalized rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2),
1606
- where N is the number of boxes. Each box is represented by 4 points (x, y), normalized relative to
1607
- the original image dimensions.
1473
+ where N is the number of boxes. Each box is represented by 4 points (x, y), normalized relative to the
1474
+ original image dimensions.
1608
1475
 
1609
1476
  Examples:
1610
1477
  >>> obb = OBB(torch.rand(10, 7), orig_shape=(640, 480)) # 10 random OBBs
@@ -1620,16 +1487,15 @@ class OBB(BaseTensor):
1620
1487
  @property
1621
1488
  @lru_cache(maxsize=2)
1622
1489
  def xyxy(self) -> torch.Tensor | np.ndarray:
1623
- """
1624
- Convert oriented bounding boxes (OBB) to axis-aligned bounding boxes in xyxy format.
1490
+ """Convert oriented bounding boxes (OBB) to axis-aligned bounding boxes in xyxy format.
1625
1491
 
1626
- This property calculates the minimal enclosing rectangle for each oriented bounding box and returns it in
1627
- xyxy format (x1, y1, x2, y2). This is useful for operations that require axis-aligned bounding boxes, such
1628
- as IoU calculation with non-rotated boxes.
1492
+ This property calculates the minimal enclosing rectangle for each oriented bounding box and returns it in xyxy
1493
+ format (x1, y1, x2, y2). This is useful for operations that require axis-aligned bounding boxes, such as IoU
1494
+ calculation with non-rotated boxes.
1629
1495
 
1630
1496
  Returns:
1631
- (torch.Tensor | np.ndarray): Axis-aligned bounding boxes in xyxy format with shape (N, 4), where N
1632
- is the number of boxes. Each row contains [x1, y1, x2, y2] coordinates.
1497
+ (torch.Tensor | np.ndarray): Axis-aligned bounding boxes in xyxy format with shape (N, 4), where N is the
1498
+ number of boxes. Each row contains [x1, y1, x2, y2] coordinates.
1633
1499
 
1634
1500
  Examples:
1635
1501
  >>> import torch