dgenerate-ultralytics-headless 8.3.189__py3-none-any.whl → 8.3.191__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/METADATA +1 -1
- {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/RECORD +111 -109
- tests/test_cuda.py +6 -5
- tests/test_exports.py +1 -6
- tests/test_python.py +1 -4
- tests/test_solutions.py +1 -1
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +16 -14
- ultralytics/cfg/datasets/VisDrone.yaml +4 -4
- ultralytics/data/annotator.py +6 -6
- ultralytics/data/augment.py +53 -51
- ultralytics/data/base.py +15 -13
- ultralytics/data/build.py +7 -4
- ultralytics/data/converter.py +9 -10
- ultralytics/data/dataset.py +24 -22
- ultralytics/data/loaders.py +13 -11
- ultralytics/data/split.py +4 -3
- ultralytics/data/split_dota.py +14 -12
- ultralytics/data/utils.py +31 -25
- ultralytics/engine/exporter.py +7 -4
- ultralytics/engine/model.py +16 -14
- ultralytics/engine/predictor.py +9 -7
- ultralytics/engine/results.py +59 -57
- ultralytics/engine/trainer.py +7 -0
- ultralytics/engine/tuner.py +4 -3
- ultralytics/engine/validator.py +3 -1
- ultralytics/hub/__init__.py +6 -2
- ultralytics/hub/auth.py +2 -2
- ultralytics/hub/google/__init__.py +9 -8
- ultralytics/hub/session.py +11 -11
- ultralytics/hub/utils.py +8 -9
- ultralytics/models/fastsam/model.py +8 -6
- ultralytics/models/nas/model.py +5 -3
- ultralytics/models/rtdetr/train.py +4 -3
- ultralytics/models/rtdetr/val.py +6 -4
- ultralytics/models/sam/amg.py +13 -10
- ultralytics/models/sam/model.py +3 -2
- ultralytics/models/sam/modules/blocks.py +21 -21
- ultralytics/models/sam/modules/decoders.py +11 -11
- ultralytics/models/sam/modules/encoders.py +25 -25
- ultralytics/models/sam/modules/memory_attention.py +9 -8
- ultralytics/models/sam/modules/sam.py +8 -10
- ultralytics/models/sam/modules/tiny_encoder.py +21 -20
- ultralytics/models/sam/modules/transformer.py +6 -5
- ultralytics/models/sam/modules/utils.py +7 -5
- ultralytics/models/sam/predict.py +32 -31
- ultralytics/models/utils/loss.py +29 -27
- ultralytics/models/utils/ops.py +10 -8
- ultralytics/models/yolo/classify/train.py +7 -5
- ultralytics/models/yolo/classify/val.py +10 -8
- ultralytics/models/yolo/detect/predict.py +3 -3
- ultralytics/models/yolo/detect/train.py +8 -6
- ultralytics/models/yolo/detect/val.py +23 -21
- ultralytics/models/yolo/model.py +14 -14
- ultralytics/models/yolo/obb/train.py +5 -3
- ultralytics/models/yolo/obb/val.py +13 -10
- ultralytics/models/yolo/pose/train.py +7 -5
- ultralytics/models/yolo/pose/val.py +11 -9
- ultralytics/models/yolo/segment/train.py +4 -5
- ultralytics/models/yolo/segment/val.py +12 -10
- ultralytics/models/yolo/world/train.py +9 -7
- ultralytics/models/yolo/yoloe/train.py +7 -6
- ultralytics/models/yolo/yoloe/val.py +10 -8
- ultralytics/nn/autobackend.py +40 -52
- ultralytics/nn/modules/__init__.py +3 -3
- ultralytics/nn/modules/block.py +12 -12
- ultralytics/nn/modules/conv.py +4 -3
- ultralytics/nn/modules/head.py +46 -38
- ultralytics/nn/modules/transformer.py +22 -21
- ultralytics/nn/tasks.py +2 -2
- ultralytics/nn/text_model.py +6 -5
- ultralytics/solutions/analytics.py +7 -5
- ultralytics/solutions/config.py +12 -10
- ultralytics/solutions/distance_calculation.py +3 -3
- ultralytics/solutions/heatmap.py +4 -2
- ultralytics/solutions/object_counter.py +5 -3
- ultralytics/solutions/parking_management.py +4 -2
- ultralytics/solutions/region_counter.py +7 -5
- ultralytics/solutions/similarity_search.py +5 -3
- ultralytics/solutions/solutions.py +38 -36
- ultralytics/solutions/streamlit_inference.py +8 -7
- ultralytics/trackers/bot_sort.py +11 -9
- ultralytics/trackers/byte_tracker.py +17 -15
- ultralytics/trackers/utils/gmc.py +4 -3
- ultralytics/utils/__init__.py +27 -77
- ultralytics/utils/autobatch.py +3 -2
- ultralytics/utils/autodevice.py +10 -10
- ultralytics/utils/benchmarks.py +11 -10
- ultralytics/utils/callbacks/comet.py +9 -9
- ultralytics/utils/callbacks/platform.py +2 -1
- ultralytics/utils/checks.py +20 -29
- ultralytics/utils/downloads.py +2 -2
- ultralytics/utils/export.py +12 -11
- ultralytics/utils/files.py +8 -7
- ultralytics/utils/git.py +139 -0
- ultralytics/utils/instance.py +8 -7
- ultralytics/utils/logger.py +7 -6
- ultralytics/utils/loss.py +15 -13
- ultralytics/utils/metrics.py +62 -62
- ultralytics/utils/nms.py +346 -0
- ultralytics/utils/ops.py +83 -251
- ultralytics/utils/patches.py +6 -4
- ultralytics/utils/plotting.py +18 -16
- ultralytics/utils/tal.py +1 -1
- ultralytics/utils/torch_utils.py +4 -2
- ultralytics/utils/tqdm.py +47 -33
- ultralytics/utils/triton.py +3 -2
- {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/top_level.txt +0 -0
ultralytics/utils/ops.py
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import contextlib
|
4
6
|
import math
|
5
7
|
import re
|
6
8
|
import time
|
7
|
-
from typing import Optional
|
8
9
|
|
9
10
|
import cv2
|
10
11
|
import numpy as np
|
11
12
|
import torch
|
12
13
|
import torch.nn.functional as F
|
13
14
|
|
14
|
-
from ultralytics.utils import
|
15
|
-
from ultralytics.utils.metrics import batch_probiou
|
15
|
+
from ultralytics.utils import NOT_MACOS14
|
16
16
|
|
17
17
|
|
18
18
|
class Profile(contextlib.ContextDecorator):
|
@@ -39,7 +39,7 @@ class Profile(contextlib.ContextDecorator):
|
|
39
39
|
... time.sleep(0.1)
|
40
40
|
"""
|
41
41
|
|
42
|
-
def __init__(self, t: float = 0.0, device:
|
42
|
+
def __init__(self, t: float = 0.0, device: torch.device | None = None):
|
43
43
|
"""
|
44
44
|
Initialize the Profile class.
|
45
45
|
|
@@ -122,20 +122,18 @@ def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding: bool = T
|
|
122
122
|
"""
|
123
123
|
if ratio_pad is None: # calculate from img0_shape
|
124
124
|
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
125
|
-
|
126
|
-
|
127
|
-
round((img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1),
|
128
|
-
) # wh padding
|
125
|
+
pad_x = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1)
|
126
|
+
pad_y = round((img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1)
|
129
127
|
else:
|
130
128
|
gain = ratio_pad[0][0]
|
131
|
-
|
129
|
+
pad_x, pad_y = ratio_pad[1]
|
132
130
|
|
133
131
|
if padding:
|
134
|
-
boxes[..., 0] -=
|
135
|
-
boxes[..., 1] -=
|
132
|
+
boxes[..., 0] -= pad_x # x padding
|
133
|
+
boxes[..., 1] -= pad_y # y padding
|
136
134
|
if not xywh:
|
137
|
-
boxes[..., 2] -=
|
138
|
-
boxes[..., 3] -=
|
135
|
+
boxes[..., 2] -= pad_x # x padding
|
136
|
+
boxes[..., 3] -= pad_y # y padding
|
139
137
|
boxes[..., :4] /= gain
|
140
138
|
return clip_boxes(boxes, img0_shape)
|
141
139
|
|
@@ -156,207 +154,32 @@ def make_divisible(x: int, divisor):
|
|
156
154
|
return math.ceil(x / divisor) * divisor
|
157
155
|
|
158
156
|
|
159
|
-
def nms_rotated(boxes, scores, threshold: float = 0.45, use_triu: bool = True):
|
160
|
-
"""
|
161
|
-
Perform NMS on oriented bounding boxes using probiou and fast-nms.
|
162
|
-
|
163
|
-
Args:
|
164
|
-
boxes (torch.Tensor): Rotated bounding boxes with shape (N, 5) in xywhr format.
|
165
|
-
scores (torch.Tensor): Confidence scores with shape (N,).
|
166
|
-
threshold (float): IoU threshold for NMS.
|
167
|
-
use_triu (bool): Whether to use torch.triu operator for upper triangular matrix operations.
|
168
|
-
|
169
|
-
Returns:
|
170
|
-
(torch.Tensor): Indices of boxes to keep after NMS.
|
171
|
-
"""
|
172
|
-
sorted_idx = torch.argsort(scores, descending=True)
|
173
|
-
boxes = boxes[sorted_idx]
|
174
|
-
ious = batch_probiou(boxes, boxes)
|
175
|
-
if use_triu:
|
176
|
-
ious = ious.triu_(diagonal=1)
|
177
|
-
# NOTE: handle the case when len(boxes) hence exportable by eliminating if-else condition
|
178
|
-
pick = torch.nonzero((ious >= threshold).sum(0) <= 0).squeeze_(-1)
|
179
|
-
else:
|
180
|
-
n = boxes.shape[0]
|
181
|
-
row_idx = torch.arange(n, device=boxes.device).view(-1, 1).expand(-1, n)
|
182
|
-
col_idx = torch.arange(n, device=boxes.device).view(1, -1).expand(n, -1)
|
183
|
-
upper_mask = row_idx < col_idx
|
184
|
-
ious = ious * upper_mask
|
185
|
-
# Zeroing these scores ensures the additional indices would not affect the final results
|
186
|
-
scores[~((ious >= threshold).sum(0) <= 0)] = 0
|
187
|
-
# NOTE: return indices with fixed length to avoid TFLite reshape error
|
188
|
-
pick = torch.topk(scores, scores.shape[0]).indices
|
189
|
-
return sorted_idx[pick]
|
190
|
-
|
191
|
-
|
192
|
-
def non_max_suppression(
|
193
|
-
prediction,
|
194
|
-
conf_thres: float = 0.25,
|
195
|
-
iou_thres: float = 0.45,
|
196
|
-
classes=None,
|
197
|
-
agnostic: bool = False,
|
198
|
-
multi_label: bool = False,
|
199
|
-
labels=(),
|
200
|
-
max_det: int = 300,
|
201
|
-
nc: int = 0, # number of classes (optional)
|
202
|
-
max_time_img: float = 0.05,
|
203
|
-
max_nms: int = 30000,
|
204
|
-
max_wh: int = 7680,
|
205
|
-
in_place: bool = True,
|
206
|
-
rotated: bool = False,
|
207
|
-
end2end: bool = False,
|
208
|
-
return_idxs: bool = False,
|
209
|
-
):
|
210
|
-
"""
|
211
|
-
Perform non-maximum suppression (NMS) on prediction results.
|
212
|
-
|
213
|
-
Applies NMS to filter overlapping bounding boxes based on confidence and IoU thresholds. Supports multiple
|
214
|
-
detection formats including standard boxes, rotated boxes, and masks.
|
215
|
-
|
216
|
-
Args:
|
217
|
-
prediction (torch.Tensor): Predictions with shape (batch_size, num_classes + 4 + num_masks, num_boxes)
|
218
|
-
containing boxes, classes, and optional masks.
|
219
|
-
conf_thres (float): Confidence threshold for filtering detections. Valid values are between 0.0 and 1.0.
|
220
|
-
iou_thres (float): IoU threshold for NMS filtering. Valid values are between 0.0 and 1.0.
|
221
|
-
classes (List[int], optional): List of class indices to consider. If None, all classes are considered.
|
222
|
-
agnostic (bool): Whether to perform class-agnostic NMS.
|
223
|
-
multi_label (bool): Whether each box can have multiple labels.
|
224
|
-
labels (List[List[Union[int, float, torch.Tensor]]]): A priori labels for each image.
|
225
|
-
max_det (int): Maximum number of detections to keep per image.
|
226
|
-
nc (int): Number of classes. Indices after this are considered masks.
|
227
|
-
max_time_img (float): Maximum time in seconds for processing one image.
|
228
|
-
max_nms (int): Maximum number of boxes for torchvision.ops.nms().
|
229
|
-
max_wh (int): Maximum box width and height in pixels.
|
230
|
-
in_place (bool): Whether to modify the input prediction tensor in place.
|
231
|
-
rotated (bool): Whether to handle Oriented Bounding Boxes (OBB).
|
232
|
-
end2end (bool): Whether the model is end-to-end and doesn't require NMS.
|
233
|
-
return_idxs (bool): Whether to return the indices of kept detections.
|
234
|
-
|
235
|
-
Returns:
|
236
|
-
output (List[torch.Tensor]): List of detections per image with shape (num_boxes, 6 + num_masks)
|
237
|
-
containing (x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
|
238
|
-
keepi (List[torch.Tensor]): Indices of kept detections if return_idxs=True.
|
239
|
-
"""
|
240
|
-
import torchvision # scope for faster 'import ultralytics'
|
241
|
-
|
242
|
-
# Checks
|
243
|
-
assert 0 <= conf_thres <= 1, f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
|
244
|
-
assert 0 <= iou_thres <= 1, f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
|
245
|
-
if isinstance(prediction, (list, tuple)): # YOLOv8 model in validation model, output = (inference_out, loss_out)
|
246
|
-
prediction = prediction[0] # select only inference output
|
247
|
-
if classes is not None:
|
248
|
-
classes = torch.tensor(classes, device=prediction.device)
|
249
|
-
|
250
|
-
if prediction.shape[-1] == 6 or end2end: # end-to-end model (BNC, i.e. 1,300,6)
|
251
|
-
output = [pred[pred[:, 4] > conf_thres][:max_det] for pred in prediction]
|
252
|
-
if classes is not None:
|
253
|
-
output = [pred[(pred[:, 5:6] == classes).any(1)] for pred in output]
|
254
|
-
return output
|
255
|
-
|
256
|
-
bs = prediction.shape[0] # batch size (BCN, i.e. 1,84,6300)
|
257
|
-
nc = nc or (prediction.shape[1] - 4) # number of classes
|
258
|
-
extra = prediction.shape[1] - nc - 4 # number of extra info
|
259
|
-
mi = 4 + nc # mask start index
|
260
|
-
xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates
|
261
|
-
xinds = torch.stack([torch.arange(len(i), device=prediction.device) for i in xc])[..., None] # to track idxs
|
262
|
-
|
263
|
-
# Settings
|
264
|
-
# min_wh = 2 # (pixels) minimum box width and height
|
265
|
-
time_limit = 2.0 + max_time_img * bs # seconds to quit after
|
266
|
-
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
|
267
|
-
|
268
|
-
prediction = prediction.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84)
|
269
|
-
if not rotated:
|
270
|
-
if in_place:
|
271
|
-
prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxy
|
272
|
-
else:
|
273
|
-
prediction = torch.cat((xywh2xyxy(prediction[..., :4]), prediction[..., 4:]), dim=-1) # xywh to xyxy
|
274
|
-
|
275
|
-
t = time.time()
|
276
|
-
output = [torch.zeros((0, 6 + extra), device=prediction.device)] * bs
|
277
|
-
keepi = [torch.zeros((0, 1), device=prediction.device)] * bs # to store the kept idxs
|
278
|
-
for xi, (x, xk) in enumerate(zip(prediction, xinds)): # image index, (preds, preds indices)
|
279
|
-
# Apply constraints
|
280
|
-
# x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height
|
281
|
-
filt = xc[xi] # confidence
|
282
|
-
x, xk = x[filt], xk[filt]
|
283
|
-
|
284
|
-
# Cat apriori labels if autolabelling
|
285
|
-
if labels and len(labels[xi]) and not rotated:
|
286
|
-
lb = labels[xi]
|
287
|
-
v = torch.zeros((len(lb), nc + extra + 4), device=x.device)
|
288
|
-
v[:, :4] = xywh2xyxy(lb[:, 1:5]) # box
|
289
|
-
v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls
|
290
|
-
x = torch.cat((x, v), 0)
|
291
|
-
|
292
|
-
# If none remain process next image
|
293
|
-
if not x.shape[0]:
|
294
|
-
continue
|
295
|
-
|
296
|
-
# Detections matrix nx6 (xyxy, conf, cls)
|
297
|
-
box, cls, mask = x.split((4, nc, extra), 1)
|
298
|
-
|
299
|
-
if multi_label:
|
300
|
-
i, j = torch.where(cls > conf_thres)
|
301
|
-
x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
|
302
|
-
xk = xk[i]
|
303
|
-
else: # best class only
|
304
|
-
conf, j = cls.max(1, keepdim=True)
|
305
|
-
filt = conf.view(-1) > conf_thres
|
306
|
-
x = torch.cat((box, conf, j.float(), mask), 1)[filt]
|
307
|
-
xk = xk[filt]
|
308
|
-
|
309
|
-
# Filter by class
|
310
|
-
if classes is not None:
|
311
|
-
filt = (x[:, 5:6] == classes).any(1)
|
312
|
-
x, xk = x[filt], xk[filt]
|
313
|
-
|
314
|
-
# Check shape
|
315
|
-
n = x.shape[0] # number of boxes
|
316
|
-
if not n: # no boxes
|
317
|
-
continue
|
318
|
-
if n > max_nms: # excess boxes
|
319
|
-
filt = x[:, 4].argsort(descending=True)[:max_nms] # sort by confidence and remove excess boxes
|
320
|
-
x, xk = x[filt], xk[filt]
|
321
|
-
|
322
|
-
# Batched NMS
|
323
|
-
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
|
324
|
-
scores = x[:, 4] # scores
|
325
|
-
if rotated:
|
326
|
-
boxes = torch.cat((x[:, :2] + c, x[:, 2:4], x[:, -1:]), dim=-1) # xywhr
|
327
|
-
i = nms_rotated(boxes, scores, iou_thres)
|
328
|
-
else:
|
329
|
-
boxes = x[:, :4] + c # boxes (offset by class)
|
330
|
-
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
|
331
|
-
i = i[:max_det] # limit detections
|
332
|
-
|
333
|
-
output[xi], keepi[xi] = x[i], xk[i].reshape(-1)
|
334
|
-
if (time.time() - t) > time_limit:
|
335
|
-
LOGGER.warning(f"NMS time limit {time_limit:.3f}s exceeded")
|
336
|
-
break # time limit exceeded
|
337
|
-
|
338
|
-
return (output, keepi) if return_idxs else output
|
339
|
-
|
340
|
-
|
341
157
|
def clip_boxes(boxes, shape):
|
342
158
|
"""
|
343
159
|
Clip bounding boxes to image boundaries.
|
344
160
|
|
345
161
|
Args:
|
346
162
|
boxes (torch.Tensor | np.ndarray): Bounding boxes to clip.
|
347
|
-
shape (tuple): Image shape as (
|
163
|
+
shape (tuple): Image shape as HWC or HW (supports both).
|
348
164
|
|
349
165
|
Returns:
|
350
166
|
(torch.Tensor | np.ndarray): Clipped bounding boxes.
|
351
167
|
"""
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
168
|
+
h, w = shape[:2] # supports both HWC or HW shapes
|
169
|
+
if isinstance(boxes, torch.Tensor): # faster individually
|
170
|
+
if NOT_MACOS14:
|
171
|
+
boxes[..., 0].clamp_(0, w) # x1
|
172
|
+
boxes[..., 1].clamp_(0, h) # y1
|
173
|
+
boxes[..., 2].clamp_(0, w) # x2
|
174
|
+
boxes[..., 3].clamp_(0, h) # y2
|
175
|
+
else: # Apple macOS14 MPS bug https://github.com/ultralytics/ultralytics/pull/21878
|
176
|
+
boxes[..., 0] = boxes[..., 0].clamp(0, w)
|
177
|
+
boxes[..., 1] = boxes[..., 1].clamp(0, h)
|
178
|
+
boxes[..., 2] = boxes[..., 2].clamp(0, w)
|
179
|
+
boxes[..., 3] = boxes[..., 3].clamp(0, h)
|
357
180
|
else: # np.array (faster grouped)
|
358
|
-
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0,
|
359
|
-
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0,
|
181
|
+
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, w) # x1, x2
|
182
|
+
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, h) # y1, y2
|
360
183
|
return boxes
|
361
184
|
|
362
185
|
|
@@ -366,17 +189,22 @@ def clip_coords(coords, shape):
|
|
366
189
|
|
367
190
|
Args:
|
368
191
|
coords (torch.Tensor | np.ndarray): Line coordinates to clip.
|
369
|
-
shape (tuple): Image shape as (
|
192
|
+
shape (tuple): Image shape as HWC or HW (supports both).
|
370
193
|
|
371
194
|
Returns:
|
372
195
|
(torch.Tensor | np.ndarray): Clipped coordinates.
|
373
196
|
"""
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
197
|
+
h, w = shape[:2] # supports both HWC or HW shapes
|
198
|
+
if isinstance(coords, torch.Tensor):
|
199
|
+
if NOT_MACOS14:
|
200
|
+
coords[..., 0].clamp_(0, w) # x
|
201
|
+
coords[..., 1].clamp_(0, h) # y
|
202
|
+
else: # Apple macOS14 MPS bug https://github.com/ultralytics/ultralytics/pull/21878
|
203
|
+
coords[..., 0] = coords[..., 0].clamp(0, w)
|
204
|
+
coords[..., 1] = coords[..., 1].clamp(0, h)
|
205
|
+
else: # np.array
|
206
|
+
coords[..., 0] = coords[..., 0].clip(0, w) # x
|
207
|
+
coords[..., 1] = coords[..., 1].clip(0, h) # y
|
380
208
|
return coords
|
381
209
|
|
382
210
|
|
@@ -389,32 +217,34 @@ def scale_image(masks, im0_shape, ratio_pad=None):
|
|
389
217
|
|
390
218
|
Args:
|
391
219
|
masks (np.ndarray): Resized and padded masks with shape [H, W, N] or [H, W, 3].
|
392
|
-
im0_shape (tuple): Original image shape as (
|
220
|
+
im0_shape (tuple): Original image shape as HWC or HW (supports both).
|
393
221
|
ratio_pad (tuple, optional): Ratio and padding values as ((ratio_h, ratio_w), (pad_h, pad_w)).
|
394
222
|
|
395
223
|
Returns:
|
396
224
|
(np.ndarray): Rescaled masks with shape [H, W, N] matching original image dimensions.
|
397
225
|
"""
|
398
226
|
# Rescale coordinates (xyxy) from im1_shape to im0_shape
|
399
|
-
|
400
|
-
|
227
|
+
im0_h, im0_w = im0_shape[:2] # supports both HWC or HW shapes
|
228
|
+
im1_h, im1_w, _ = masks.shape
|
229
|
+
if im1_h == im0_h and im1_w == im0_w:
|
401
230
|
return masks
|
231
|
+
|
402
232
|
if ratio_pad is None: # calculate from im0_shape
|
403
|
-
gain = min(
|
404
|
-
pad = (
|
233
|
+
gain = min(im1_h / im0_h, im1_w / im0_w) # gain = old / new
|
234
|
+
pad = (im1_w - im0_w * gain) / 2, (im1_h - im0_h * gain) / 2 # wh padding
|
405
235
|
else:
|
406
236
|
pad = ratio_pad[1]
|
407
237
|
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
)
|
238
|
+
pad_w, pad_h = pad
|
239
|
+
top = int(round(pad_h - 0.1))
|
240
|
+
left = int(round(pad_w - 0.1))
|
241
|
+
bottom = im1_h - int(round(pad_h + 0.1))
|
242
|
+
right = im1_w - int(round(pad_w + 0.1))
|
413
243
|
|
414
244
|
if len(masks.shape) < 2:
|
415
245
|
raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
|
416
246
|
masks = masks[top:bottom, left:right]
|
417
|
-
masks = cv2.resize(masks, (
|
247
|
+
masks = cv2.resize(masks, (im0_w, im0_h))
|
418
248
|
if len(masks.shape) == 2:
|
419
249
|
masks = masks[:, :, None]
|
420
250
|
|
@@ -434,10 +264,11 @@ def xyxy2xywh(x):
|
|
434
264
|
"""
|
435
265
|
assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
|
436
266
|
y = empty_like(x) # faster than clone/copy
|
437
|
-
|
438
|
-
y[...,
|
439
|
-
y[...,
|
440
|
-
y[...,
|
267
|
+
x1, y1, x2, y2 = x[..., 0], x[..., 1], x[..., 2], x[..., 3]
|
268
|
+
y[..., 0] = (x1 + x2) / 2 # x center
|
269
|
+
y[..., 1] = (y1 + y2) / 2 # y center
|
270
|
+
y[..., 2] = x2 - x1 # width
|
271
|
+
y[..., 3] = y2 - y1 # height
|
441
272
|
return y
|
442
273
|
|
443
274
|
|
@@ -478,10 +309,12 @@ def xywhn2xyxy(x, w: int = 640, h: int = 640, padw: int = 0, padh: int = 0):
|
|
478
309
|
"""
|
479
310
|
assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
|
480
311
|
y = empty_like(x) # faster than clone/copy
|
481
|
-
|
482
|
-
|
483
|
-
y[...,
|
484
|
-
y[...,
|
312
|
+
xc, yc, xw, xh = x[..., 0], x[..., 1], x[..., 2], x[..., 3]
|
313
|
+
half_w, half_h = xw / 2, xh / 2
|
314
|
+
y[..., 0] = w * (xc - half_w) + padw # top left x
|
315
|
+
y[..., 1] = h * (yc - half_h) + padh # top left y
|
316
|
+
y[..., 2] = w * (xc + half_w) + padw # bottom right x
|
317
|
+
y[..., 3] = h * (yc + half_h) + padh # bottom right y
|
485
318
|
return y
|
486
319
|
|
487
320
|
|
@@ -504,10 +337,11 @@ def xyxy2xywhn(x, w: int = 640, h: int = 640, clip: bool = False, eps: float = 0
|
|
504
337
|
x = clip_boxes(x, (h - eps, w - eps))
|
505
338
|
assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
|
506
339
|
y = empty_like(x) # faster than clone/copy
|
507
|
-
|
508
|
-
y[...,
|
509
|
-
y[...,
|
510
|
-
y[...,
|
340
|
+
x1, y1, x2, y2 = x[..., 0], x[..., 1], x[..., 2], x[..., 3]
|
341
|
+
y[..., 0] = ((x1 + x2) / 2) / w # x center
|
342
|
+
y[..., 1] = ((y1 + y2) / 2) / h # y center
|
343
|
+
y[..., 2] = (x2 - x1) / w # width
|
344
|
+
y[..., 3] = (y2 - y1) / h # height
|
511
345
|
return y
|
512
346
|
|
513
347
|
|
@@ -756,19 +590,15 @@ def scale_masks(masks, shape, padding: bool = True):
|
|
756
590
|
"""
|
757
591
|
mh, mw = masks.shape[2:]
|
758
592
|
gain = min(mh / shape[0], mw / shape[1]) # gain = old / new
|
759
|
-
|
593
|
+
pad_w = mw - shape[1] * gain
|
594
|
+
pad_h = mh - shape[0] * gain
|
760
595
|
if padding:
|
761
|
-
|
762
|
-
|
763
|
-
top, left = (int(round(
|
764
|
-
bottom
|
765
|
-
|
766
|
-
|
767
|
-
)
|
768
|
-
masks = masks[..., top:bottom, left:right]
|
769
|
-
|
770
|
-
masks = F.interpolate(masks, shape, mode="bilinear", align_corners=False) # NCHW
|
771
|
-
return masks
|
596
|
+
pad_w /= 2
|
597
|
+
pad_h /= 2
|
598
|
+
top, left = (int(round(pad_h - 0.1)), int(round(pad_w - 0.1))) if padding else (0, 0)
|
599
|
+
bottom = mh - int(round(pad_h + 0.1))
|
600
|
+
right = mw - int(round(pad_w + 0.1))
|
601
|
+
return F.interpolate(masks[..., top:bottom, left:right], shape, mode="bilinear", align_corners=False) # NCHW masks
|
772
602
|
|
773
603
|
|
774
604
|
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize: bool = False, padding: bool = True):
|
@@ -776,9 +606,9 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize: bool
|
|
776
606
|
Rescale segment coordinates from img1_shape to img0_shape.
|
777
607
|
|
778
608
|
Args:
|
779
|
-
img1_shape (tuple):
|
609
|
+
img1_shape (tuple): Source image shape as HWC or HW (supports both).
|
780
610
|
coords (torch.Tensor): Coordinates to scale with shape (N, 2).
|
781
|
-
img0_shape (tuple):
|
611
|
+
img0_shape (tuple): Image 0 shape as HWC or HW (supports both).
|
782
612
|
ratio_pad (tuple, optional): Ratio and padding values as ((ratio_h, ratio_w), (pad_h, pad_w)).
|
783
613
|
normalize (bool): Whether to normalize coordinates to range [0, 1].
|
784
614
|
padding (bool): Whether coordinates are based on YOLO-style augmented images with padding.
|
@@ -786,9 +616,11 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize: bool
|
|
786
616
|
Returns:
|
787
617
|
(torch.Tensor): Scaled coordinates.
|
788
618
|
"""
|
619
|
+
img0_h, img0_w = img0_shape[:2] # supports both HWC or HW shapes
|
789
620
|
if ratio_pad is None: # calculate from img0_shape
|
790
|
-
|
791
|
-
|
621
|
+
img1_h, img1_w = img1_shape[:2] # supports both HWC or HW shapes
|
622
|
+
gain = min(img1_h / img0_h, img1_w / img0_w) # gain = old / new
|
623
|
+
pad = (img1_w - img0_w * gain) / 2, (img1_h - img0_h * gain) / 2 # wh padding
|
792
624
|
else:
|
793
625
|
gain = ratio_pad[0][0]
|
794
626
|
pad = ratio_pad[1]
|
@@ -800,8 +632,8 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize: bool
|
|
800
632
|
coords[..., 1] /= gain
|
801
633
|
coords = clip_coords(coords, img0_shape)
|
802
634
|
if normalize:
|
803
|
-
coords[..., 0] /=
|
804
|
-
coords[..., 1] /=
|
635
|
+
coords[..., 0] /= img0_w # width
|
636
|
+
coords[..., 1] /= img0_h # height
|
805
637
|
return coords
|
806
638
|
|
807
639
|
|
ultralytics/utils/patches.py
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
"""Monkey patches to update/extend functionality of existing functions."""
|
3
3
|
|
4
|
+
from __future__ import annotations
|
5
|
+
|
4
6
|
import time
|
5
7
|
from contextlib import contextmanager
|
6
8
|
from copy import copy
|
7
9
|
from pathlib import Path
|
8
|
-
from typing import Any
|
10
|
+
from typing import Any
|
9
11
|
|
10
12
|
import cv2
|
11
13
|
import numpy as np
|
@@ -15,7 +17,7 @@ import torch
|
|
15
17
|
_imshow = cv2.imshow # copy to avoid recursion errors
|
16
18
|
|
17
19
|
|
18
|
-
def imread(filename: str, flags: int = cv2.IMREAD_COLOR) ->
|
20
|
+
def imread(filename: str, flags: int = cv2.IMREAD_COLOR) -> np.ndarray | None:
|
19
21
|
"""
|
20
22
|
Read an image from a file with multilanguage filename support.
|
21
23
|
|
@@ -42,7 +44,7 @@ def imread(filename: str, flags: int = cv2.IMREAD_COLOR) -> Optional[np.ndarray]
|
|
42
44
|
return im[..., None] if im is not None and im.ndim == 2 else im # Always ensure 3 dimensions
|
43
45
|
|
44
46
|
|
45
|
-
def imwrite(filename: str, img: np.ndarray, params:
|
47
|
+
def imwrite(filename: str, img: np.ndarray, params: list[int] | None = None) -> bool:
|
46
48
|
"""
|
47
49
|
Write an image to a file with multilanguage filename support.
|
48
50
|
|
@@ -164,7 +166,7 @@ def arange_patch(args):
|
|
164
166
|
|
165
167
|
|
166
168
|
@contextmanager
|
167
|
-
def override_configs(args, overrides:
|
169
|
+
def override_configs(args, overrides: dict[str, Any] | None = None):
|
168
170
|
"""
|
169
171
|
Context manager to temporarily override configurations in args.
|
170
172
|
|
ultralytics/utils/plotting.py
CHANGED
@@ -1,9 +1,11 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import math
|
4
6
|
import warnings
|
5
7
|
from pathlib import Path
|
6
|
-
from typing import Any, Callable
|
8
|
+
from typing import Any, Callable
|
7
9
|
|
8
10
|
import cv2
|
9
11
|
import numpy as np
|
@@ -142,12 +144,12 @@ class Colors:
|
|
142
144
|
dtype=np.uint8,
|
143
145
|
)
|
144
146
|
|
145
|
-
def __call__(self, i: int, bgr: bool = False) -> tuple:
|
147
|
+
def __call__(self, i: int | torch.Tensor, bgr: bool = False) -> tuple:
|
146
148
|
"""
|
147
149
|
Convert hex color codes to RGB values.
|
148
150
|
|
149
151
|
Args:
|
150
|
-
i (int): Color index.
|
152
|
+
i (int | torch.Tensor): Color index.
|
151
153
|
bgr (bool, optional): Whether to return BGR format instead of RGB.
|
152
154
|
|
153
155
|
Returns:
|
@@ -190,8 +192,8 @@ class Annotator:
|
|
190
192
|
def __init__(
|
191
193
|
self,
|
192
194
|
im,
|
193
|
-
line_width:
|
194
|
-
font_size:
|
195
|
+
line_width: int | None = None,
|
196
|
+
font_size: int | None = None,
|
195
197
|
font: str = "Arial.ttf",
|
196
198
|
pil: bool = False,
|
197
199
|
example: str = "abc",
|
@@ -409,10 +411,10 @@ class Annotator:
|
|
409
411
|
self,
|
410
412
|
kpts,
|
411
413
|
shape: tuple = (640, 640),
|
412
|
-
radius:
|
414
|
+
radius: int | None = None,
|
413
415
|
kpt_line: bool = True,
|
414
416
|
conf_thres: float = 0.25,
|
415
|
-
kpt_color:
|
417
|
+
kpt_color: tuple | None = None,
|
416
418
|
):
|
417
419
|
"""
|
418
420
|
Plot keypoints on the image.
|
@@ -517,7 +519,7 @@ class Annotator:
|
|
517
519
|
"""Return annotated image as array."""
|
518
520
|
return np.asarray(self.im)
|
519
521
|
|
520
|
-
def show(self, title:
|
522
|
+
def show(self, title: str | None = None):
|
521
523
|
"""Show the annotated image."""
|
522
524
|
im = Image.fromarray(np.asarray(self.im)[..., ::-1]) # Convert numpy array to PIL Image with RGB to BGR
|
523
525
|
if IS_COLAB or IS_KAGGLE: # can not use IS_JUPYTER as will run for all ipython environments
|
@@ -533,7 +535,7 @@ class Annotator:
|
|
533
535
|
cv2.imwrite(filename, np.asarray(self.im))
|
534
536
|
|
535
537
|
@staticmethod
|
536
|
-
def get_bbox_dimension(bbox:
|
538
|
+
def get_bbox_dimension(bbox: tuple | None = None):
|
537
539
|
"""
|
538
540
|
Calculate the dimensions and area of a bounding box.
|
539
541
|
|
@@ -678,17 +680,17 @@ def save_one_box(
|
|
678
680
|
|
679
681
|
@threaded
|
680
682
|
def plot_images(
|
681
|
-
labels:
|
682
|
-
images:
|
683
|
-
paths:
|
683
|
+
labels: dict[str, Any],
|
684
|
+
images: torch.Tensor | np.ndarray = np.zeros((0, 3, 640, 640), dtype=np.float32),
|
685
|
+
paths: list[str] | None = None,
|
684
686
|
fname: str = "images.jpg",
|
685
|
-
names:
|
686
|
-
on_plot:
|
687
|
+
names: dict[int, str] | None = None,
|
688
|
+
on_plot: Callable | None = None,
|
687
689
|
max_size: int = 1920,
|
688
690
|
max_subplots: int = 16,
|
689
691
|
save: bool = True,
|
690
692
|
conf_thres: float = 0.25,
|
691
|
-
) ->
|
693
|
+
) -> np.ndarray | None:
|
692
694
|
"""
|
693
695
|
Plot image grid with labels, bounding boxes, masks, and keypoints.
|
694
696
|
|
@@ -851,7 +853,7 @@ def plot_results(
|
|
851
853
|
segment: bool = False,
|
852
854
|
pose: bool = False,
|
853
855
|
classify: bool = False,
|
854
|
-
on_plot:
|
856
|
+
on_plot: Callable | None = None,
|
855
857
|
):
|
856
858
|
"""
|
857
859
|
Plot training results from a results CSV file. The function supports various types of data including segmentation,
|
ultralytics/utils/tal.py
CHANGED
@@ -387,7 +387,7 @@ def dist2bbox(distance, anchor_points, xywh=True, dim=-1):
|
|
387
387
|
if xywh:
|
388
388
|
c_xy = (x1y1 + x2y2) / 2
|
389
389
|
wh = x2y2 - x1y1
|
390
|
-
return torch.cat(
|
390
|
+
return torch.cat([c_xy, wh], dim) # xywh bbox
|
391
391
|
return torch.cat((x1y1, x2y2), dim) # xyxy bbox
|
392
392
|
|
393
393
|
|
ultralytics/utils/torch_utils.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import functools
|
4
6
|
import gc
|
5
7
|
import math
|
@@ -10,7 +12,7 @@ from contextlib import contextmanager
|
|
10
12
|
from copy import deepcopy
|
11
13
|
from datetime import datetime
|
12
14
|
from pathlib import Path
|
13
|
-
from typing import Any
|
15
|
+
from typing import Any
|
14
16
|
|
15
17
|
import numpy as np
|
16
18
|
import torch
|
@@ -708,7 +710,7 @@ class ModelEMA:
|
|
708
710
|
copy_attr(self.ema, model, include, exclude)
|
709
711
|
|
710
712
|
|
711
|
-
def strip_optimizer(f:
|
713
|
+
def strip_optimizer(f: str | Path = "best.pt", s: str = "", updates: dict[str, Any] = None) -> dict[str, Any]:
|
712
714
|
"""
|
713
715
|
Strip optimizer from 'f' to finalize training, optionally save as 's'.
|
714
716
|
|