dgenerate-ultralytics-headless 8.3.187__py3-none-any.whl → 8.3.190__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.187.dist-info → dgenerate_ultralytics_headless-8.3.190.dist-info}/METADATA +3 -2
- {dgenerate_ultralytics_headless-8.3.187.dist-info → dgenerate_ultralytics_headless-8.3.190.dist-info}/RECORD +38 -37
- ultralytics/__init__.py +1 -1
- ultralytics/data/utils.py +2 -2
- ultralytics/engine/exporter.py +9 -6
- ultralytics/engine/predictor.py +1 -1
- ultralytics/engine/results.py +5 -5
- ultralytics/engine/trainer.py +2 -0
- ultralytics/engine/validator.py +3 -1
- ultralytics/hub/__init__.py +6 -2
- ultralytics/hub/auth.py +2 -2
- ultralytics/hub/google/__init__.py +2 -2
- ultralytics/hub/session.py +3 -5
- ultralytics/hub/utils.py +5 -5
- ultralytics/models/rtdetr/val.py +3 -1
- ultralytics/models/yolo/detect/predict.py +2 -2
- ultralytics/models/yolo/detect/val.py +15 -4
- ultralytics/models/yolo/obb/val.py +5 -2
- ultralytics/models/yolo/segment/val.py +0 -3
- ultralytics/nn/autobackend.py +29 -36
- ultralytics/nn/modules/__init__.py +3 -3
- ultralytics/nn/modules/head.py +5 -1
- ultralytics/nn/tasks.py +2 -2
- ultralytics/utils/__init__.py +49 -14
- ultralytics/utils/benchmarks.py +12 -6
- ultralytics/utils/callbacks/platform.py +2 -1
- ultralytics/utils/checks.py +3 -3
- ultralytics/utils/downloads.py +46 -40
- ultralytics/utils/logger.py +7 -6
- ultralytics/utils/nms.py +346 -0
- ultralytics/utils/ops.py +80 -249
- ultralytics/utils/tal.py +1 -1
- ultralytics/utils/torch_utils.py +50 -47
- ultralytics/utils/tqdm.py +58 -59
- {dgenerate_ultralytics_headless-8.3.187.dist-info → dgenerate_ultralytics_headless-8.3.190.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.187.dist-info → dgenerate_ultralytics_headless-8.3.190.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.187.dist-info → dgenerate_ultralytics_headless-8.3.190.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.187.dist-info → dgenerate_ultralytics_headless-8.3.190.dist-info}/top_level.txt +0 -0
ultralytics/utils/ops.py
CHANGED
@@ -11,8 +11,7 @@ import numpy as np
|
|
11
11
|
import torch
|
12
12
|
import torch.nn.functional as F
|
13
13
|
|
14
|
-
from ultralytics.utils import
|
15
|
-
from ultralytics.utils.metrics import batch_probiou
|
14
|
+
from ultralytics.utils import NOT_MACOS14
|
16
15
|
|
17
16
|
|
18
17
|
class Profile(contextlib.ContextDecorator):
|
@@ -122,20 +121,18 @@ def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding: bool = T
|
|
122
121
|
"""
|
123
122
|
if ratio_pad is None: # calculate from img0_shape
|
124
123
|
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
125
|
-
|
126
|
-
|
127
|
-
round((img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1),
|
128
|
-
) # wh padding
|
124
|
+
pad_x = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1)
|
125
|
+
pad_y = round((img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1)
|
129
126
|
else:
|
130
127
|
gain = ratio_pad[0][0]
|
131
|
-
|
128
|
+
pad_x, pad_y = ratio_pad[1]
|
132
129
|
|
133
130
|
if padding:
|
134
|
-
boxes[..., 0] -=
|
135
|
-
boxes[..., 1] -=
|
131
|
+
boxes[..., 0] -= pad_x # x padding
|
132
|
+
boxes[..., 1] -= pad_y # y padding
|
136
133
|
if not xywh:
|
137
|
-
boxes[..., 2] -=
|
138
|
-
boxes[..., 3] -=
|
134
|
+
boxes[..., 2] -= pad_x # x padding
|
135
|
+
boxes[..., 3] -= pad_y # y padding
|
139
136
|
boxes[..., :4] /= gain
|
140
137
|
return clip_boxes(boxes, img0_shape)
|
141
138
|
|
@@ -156,207 +153,32 @@ def make_divisible(x: int, divisor):
|
|
156
153
|
return math.ceil(x / divisor) * divisor
|
157
154
|
|
158
155
|
|
159
|
-
def nms_rotated(boxes, scores, threshold: float = 0.45, use_triu: bool = True):
|
160
|
-
"""
|
161
|
-
Perform NMS on oriented bounding boxes using probiou and fast-nms.
|
162
|
-
|
163
|
-
Args:
|
164
|
-
boxes (torch.Tensor): Rotated bounding boxes with shape (N, 5) in xywhr format.
|
165
|
-
scores (torch.Tensor): Confidence scores with shape (N,).
|
166
|
-
threshold (float): IoU threshold for NMS.
|
167
|
-
use_triu (bool): Whether to use torch.triu operator for upper triangular matrix operations.
|
168
|
-
|
169
|
-
Returns:
|
170
|
-
(torch.Tensor): Indices of boxes to keep after NMS.
|
171
|
-
"""
|
172
|
-
sorted_idx = torch.argsort(scores, descending=True)
|
173
|
-
boxes = boxes[sorted_idx]
|
174
|
-
ious = batch_probiou(boxes, boxes)
|
175
|
-
if use_triu:
|
176
|
-
ious = ious.triu_(diagonal=1)
|
177
|
-
# NOTE: handle the case when len(boxes) hence exportable by eliminating if-else condition
|
178
|
-
pick = torch.nonzero((ious >= threshold).sum(0) <= 0).squeeze_(-1)
|
179
|
-
else:
|
180
|
-
n = boxes.shape[0]
|
181
|
-
row_idx = torch.arange(n, device=boxes.device).view(-1, 1).expand(-1, n)
|
182
|
-
col_idx = torch.arange(n, device=boxes.device).view(1, -1).expand(n, -1)
|
183
|
-
upper_mask = row_idx < col_idx
|
184
|
-
ious = ious * upper_mask
|
185
|
-
# Zeroing these scores ensures the additional indices would not affect the final results
|
186
|
-
scores[~((ious >= threshold).sum(0) <= 0)] = 0
|
187
|
-
# NOTE: return indices with fixed length to avoid TFLite reshape error
|
188
|
-
pick = torch.topk(scores, scores.shape[0]).indices
|
189
|
-
return sorted_idx[pick]
|
190
|
-
|
191
|
-
|
192
|
-
def non_max_suppression(
|
193
|
-
prediction,
|
194
|
-
conf_thres: float = 0.25,
|
195
|
-
iou_thres: float = 0.45,
|
196
|
-
classes=None,
|
197
|
-
agnostic: bool = False,
|
198
|
-
multi_label: bool = False,
|
199
|
-
labels=(),
|
200
|
-
max_det: int = 300,
|
201
|
-
nc: int = 0, # number of classes (optional)
|
202
|
-
max_time_img: float = 0.05,
|
203
|
-
max_nms: int = 30000,
|
204
|
-
max_wh: int = 7680,
|
205
|
-
in_place: bool = True,
|
206
|
-
rotated: bool = False,
|
207
|
-
end2end: bool = False,
|
208
|
-
return_idxs: bool = False,
|
209
|
-
):
|
210
|
-
"""
|
211
|
-
Perform non-maximum suppression (NMS) on prediction results.
|
212
|
-
|
213
|
-
Applies NMS to filter overlapping bounding boxes based on confidence and IoU thresholds. Supports multiple
|
214
|
-
detection formats including standard boxes, rotated boxes, and masks.
|
215
|
-
|
216
|
-
Args:
|
217
|
-
prediction (torch.Tensor): Predictions with shape (batch_size, num_classes + 4 + num_masks, num_boxes)
|
218
|
-
containing boxes, classes, and optional masks.
|
219
|
-
conf_thres (float): Confidence threshold for filtering detections. Valid values are between 0.0 and 1.0.
|
220
|
-
iou_thres (float): IoU threshold for NMS filtering. Valid values are between 0.0 and 1.0.
|
221
|
-
classes (List[int], optional): List of class indices to consider. If None, all classes are considered.
|
222
|
-
agnostic (bool): Whether to perform class-agnostic NMS.
|
223
|
-
multi_label (bool): Whether each box can have multiple labels.
|
224
|
-
labels (List[List[Union[int, float, torch.Tensor]]]): A priori labels for each image.
|
225
|
-
max_det (int): Maximum number of detections to keep per image.
|
226
|
-
nc (int): Number of classes. Indices after this are considered masks.
|
227
|
-
max_time_img (float): Maximum time in seconds for processing one image.
|
228
|
-
max_nms (int): Maximum number of boxes for torchvision.ops.nms().
|
229
|
-
max_wh (int): Maximum box width and height in pixels.
|
230
|
-
in_place (bool): Whether to modify the input prediction tensor in place.
|
231
|
-
rotated (bool): Whether to handle Oriented Bounding Boxes (OBB).
|
232
|
-
end2end (bool): Whether the model is end-to-end and doesn't require NMS.
|
233
|
-
return_idxs (bool): Whether to return the indices of kept detections.
|
234
|
-
|
235
|
-
Returns:
|
236
|
-
output (List[torch.Tensor]): List of detections per image with shape (num_boxes, 6 + num_masks)
|
237
|
-
containing (x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
|
238
|
-
keepi (List[torch.Tensor]): Indices of kept detections if return_idxs=True.
|
239
|
-
"""
|
240
|
-
import torchvision # scope for faster 'import ultralytics'
|
241
|
-
|
242
|
-
# Checks
|
243
|
-
assert 0 <= conf_thres <= 1, f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
|
244
|
-
assert 0 <= iou_thres <= 1, f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
|
245
|
-
if isinstance(prediction, (list, tuple)): # YOLOv8 model in validation model, output = (inference_out, loss_out)
|
246
|
-
prediction = prediction[0] # select only inference output
|
247
|
-
if classes is not None:
|
248
|
-
classes = torch.tensor(classes, device=prediction.device)
|
249
|
-
|
250
|
-
if prediction.shape[-1] == 6 or end2end: # end-to-end model (BNC, i.e. 1,300,6)
|
251
|
-
output = [pred[pred[:, 4] > conf_thres][:max_det] for pred in prediction]
|
252
|
-
if classes is not None:
|
253
|
-
output = [pred[(pred[:, 5:6] == classes).any(1)] for pred in output]
|
254
|
-
return output
|
255
|
-
|
256
|
-
bs = prediction.shape[0] # batch size (BCN, i.e. 1,84,6300)
|
257
|
-
nc = nc or (prediction.shape[1] - 4) # number of classes
|
258
|
-
extra = prediction.shape[1] - nc - 4 # number of extra info
|
259
|
-
mi = 4 + nc # mask start index
|
260
|
-
xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates
|
261
|
-
xinds = torch.stack([torch.arange(len(i), device=prediction.device) for i in xc])[..., None] # to track idxs
|
262
|
-
|
263
|
-
# Settings
|
264
|
-
# min_wh = 2 # (pixels) minimum box width and height
|
265
|
-
time_limit = 2.0 + max_time_img * bs # seconds to quit after
|
266
|
-
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
|
267
|
-
|
268
|
-
prediction = prediction.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84)
|
269
|
-
if not rotated:
|
270
|
-
if in_place:
|
271
|
-
prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxy
|
272
|
-
else:
|
273
|
-
prediction = torch.cat((xywh2xyxy(prediction[..., :4]), prediction[..., 4:]), dim=-1) # xywh to xyxy
|
274
|
-
|
275
|
-
t = time.time()
|
276
|
-
output = [torch.zeros((0, 6 + extra), device=prediction.device)] * bs
|
277
|
-
keepi = [torch.zeros((0, 1), device=prediction.device)] * bs # to store the kept idxs
|
278
|
-
for xi, (x, xk) in enumerate(zip(prediction, xinds)): # image index, (preds, preds indices)
|
279
|
-
# Apply constraints
|
280
|
-
# x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height
|
281
|
-
filt = xc[xi] # confidence
|
282
|
-
x, xk = x[filt], xk[filt]
|
283
|
-
|
284
|
-
# Cat apriori labels if autolabelling
|
285
|
-
if labels and len(labels[xi]) and not rotated:
|
286
|
-
lb = labels[xi]
|
287
|
-
v = torch.zeros((len(lb), nc + extra + 4), device=x.device)
|
288
|
-
v[:, :4] = xywh2xyxy(lb[:, 1:5]) # box
|
289
|
-
v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls
|
290
|
-
x = torch.cat((x, v), 0)
|
291
|
-
|
292
|
-
# If none remain process next image
|
293
|
-
if not x.shape[0]:
|
294
|
-
continue
|
295
|
-
|
296
|
-
# Detections matrix nx6 (xyxy, conf, cls)
|
297
|
-
box, cls, mask = x.split((4, nc, extra), 1)
|
298
|
-
|
299
|
-
if multi_label:
|
300
|
-
i, j = torch.where(cls > conf_thres)
|
301
|
-
x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
|
302
|
-
xk = xk[i]
|
303
|
-
else: # best class only
|
304
|
-
conf, j = cls.max(1, keepdim=True)
|
305
|
-
filt = conf.view(-1) > conf_thres
|
306
|
-
x = torch.cat((box, conf, j.float(), mask), 1)[filt]
|
307
|
-
xk = xk[filt]
|
308
|
-
|
309
|
-
# Filter by class
|
310
|
-
if classes is not None:
|
311
|
-
filt = (x[:, 5:6] == classes).any(1)
|
312
|
-
x, xk = x[filt], xk[filt]
|
313
|
-
|
314
|
-
# Check shape
|
315
|
-
n = x.shape[0] # number of boxes
|
316
|
-
if not n: # no boxes
|
317
|
-
continue
|
318
|
-
if n > max_nms: # excess boxes
|
319
|
-
filt = x[:, 4].argsort(descending=True)[:max_nms] # sort by confidence and remove excess boxes
|
320
|
-
x, xk = x[filt], xk[filt]
|
321
|
-
|
322
|
-
# Batched NMS
|
323
|
-
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
|
324
|
-
scores = x[:, 4] # scores
|
325
|
-
if rotated:
|
326
|
-
boxes = torch.cat((x[:, :2] + c, x[:, 2:4], x[:, -1:]), dim=-1) # xywhr
|
327
|
-
i = nms_rotated(boxes, scores, iou_thres)
|
328
|
-
else:
|
329
|
-
boxes = x[:, :4] + c # boxes (offset by class)
|
330
|
-
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
|
331
|
-
i = i[:max_det] # limit detections
|
332
|
-
|
333
|
-
output[xi], keepi[xi] = x[i], xk[i].reshape(-1)
|
334
|
-
if (time.time() - t) > time_limit:
|
335
|
-
LOGGER.warning(f"NMS time limit {time_limit:.3f}s exceeded")
|
336
|
-
break # time limit exceeded
|
337
|
-
|
338
|
-
return (output, keepi) if return_idxs else output
|
339
|
-
|
340
|
-
|
341
156
|
def clip_boxes(boxes, shape):
|
342
157
|
"""
|
343
158
|
Clip bounding boxes to image boundaries.
|
344
159
|
|
345
160
|
Args:
|
346
161
|
boxes (torch.Tensor | np.ndarray): Bounding boxes to clip.
|
347
|
-
shape (tuple): Image shape as (
|
162
|
+
shape (tuple): Image shape as HWC or HW (supports both).
|
348
163
|
|
349
164
|
Returns:
|
350
165
|
(torch.Tensor | np.ndarray): Clipped bounding boxes.
|
351
166
|
"""
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
167
|
+
h, w = shape[:2] # supports both HWC or HW shapes
|
168
|
+
if isinstance(boxes, torch.Tensor): # faster individually
|
169
|
+
if NOT_MACOS14:
|
170
|
+
boxes[..., 0].clamp_(0, w) # x1
|
171
|
+
boxes[..., 1].clamp_(0, h) # y1
|
172
|
+
boxes[..., 2].clamp_(0, w) # x2
|
173
|
+
boxes[..., 3].clamp_(0, h) # y2
|
174
|
+
else: # Apple macOS14 MPS bug https://github.com/ultralytics/ultralytics/pull/21878
|
175
|
+
boxes[..., 0] = boxes[..., 0].clamp(0, w)
|
176
|
+
boxes[..., 1] = boxes[..., 1].clamp(0, h)
|
177
|
+
boxes[..., 2] = boxes[..., 2].clamp(0, w)
|
178
|
+
boxes[..., 3] = boxes[..., 3].clamp(0, h)
|
357
179
|
else: # np.array (faster grouped)
|
358
|
-
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0,
|
359
|
-
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0,
|
180
|
+
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, w) # x1, x2
|
181
|
+
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, h) # y1, y2
|
360
182
|
return boxes
|
361
183
|
|
362
184
|
|
@@ -366,17 +188,22 @@ def clip_coords(coords, shape):
|
|
366
188
|
|
367
189
|
Args:
|
368
190
|
coords (torch.Tensor | np.ndarray): Line coordinates to clip.
|
369
|
-
shape (tuple): Image shape as (
|
191
|
+
shape (tuple): Image shape as HWC or HW (supports both).
|
370
192
|
|
371
193
|
Returns:
|
372
194
|
(torch.Tensor | np.ndarray): Clipped coordinates.
|
373
195
|
"""
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
196
|
+
h, w = shape[:2] # supports both HWC or HW shapes
|
197
|
+
if isinstance(coords, torch.Tensor):
|
198
|
+
if NOT_MACOS14:
|
199
|
+
coords[..., 0].clamp_(0, w) # x
|
200
|
+
coords[..., 1].clamp_(0, h) # y
|
201
|
+
else: # Apple macOS14 MPS bug https://github.com/ultralytics/ultralytics/pull/21878
|
202
|
+
coords[..., 0] = coords[..., 0].clamp(0, w)
|
203
|
+
coords[..., 1] = coords[..., 1].clamp(0, h)
|
204
|
+
else: # np.array
|
205
|
+
coords[..., 0] = coords[..., 0].clip(0, w) # x
|
206
|
+
coords[..., 1] = coords[..., 1].clip(0, h) # y
|
380
207
|
return coords
|
381
208
|
|
382
209
|
|
@@ -389,32 +216,34 @@ def scale_image(masks, im0_shape, ratio_pad=None):
|
|
389
216
|
|
390
217
|
Args:
|
391
218
|
masks (np.ndarray): Resized and padded masks with shape [H, W, N] or [H, W, 3].
|
392
|
-
im0_shape (tuple): Original image shape as (
|
219
|
+
im0_shape (tuple): Original image shape as HWC or HW (supports both).
|
393
220
|
ratio_pad (tuple, optional): Ratio and padding values as ((ratio_h, ratio_w), (pad_h, pad_w)).
|
394
221
|
|
395
222
|
Returns:
|
396
223
|
(np.ndarray): Rescaled masks with shape [H, W, N] matching original image dimensions.
|
397
224
|
"""
|
398
225
|
# Rescale coordinates (xyxy) from im1_shape to im0_shape
|
399
|
-
|
400
|
-
|
226
|
+
im0_h, im0_w = im0_shape[:2] # supports both HWC or HW shapes
|
227
|
+
im1_h, im1_w, _ = masks.shape
|
228
|
+
if im1_h == im0_h and im1_w == im0_w:
|
401
229
|
return masks
|
230
|
+
|
402
231
|
if ratio_pad is None: # calculate from im0_shape
|
403
|
-
gain = min(
|
404
|
-
pad = (
|
232
|
+
gain = min(im1_h / im0_h, im1_w / im0_w) # gain = old / new
|
233
|
+
pad = (im1_w - im0_w * gain) / 2, (im1_h - im0_h * gain) / 2 # wh padding
|
405
234
|
else:
|
406
235
|
pad = ratio_pad[1]
|
407
236
|
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
)
|
237
|
+
pad_w, pad_h = pad
|
238
|
+
top = int(round(pad_h - 0.1))
|
239
|
+
left = int(round(pad_w - 0.1))
|
240
|
+
bottom = im1_h - int(round(pad_h + 0.1))
|
241
|
+
right = im1_w - int(round(pad_w + 0.1))
|
413
242
|
|
414
243
|
if len(masks.shape) < 2:
|
415
244
|
raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
|
416
245
|
masks = masks[top:bottom, left:right]
|
417
|
-
masks = cv2.resize(masks, (
|
246
|
+
masks = cv2.resize(masks, (im0_w, im0_h))
|
418
247
|
if len(masks.shape) == 2:
|
419
248
|
masks = masks[:, :, None]
|
420
249
|
|
@@ -434,10 +263,11 @@ def xyxy2xywh(x):
|
|
434
263
|
"""
|
435
264
|
assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
|
436
265
|
y = empty_like(x) # faster than clone/copy
|
437
|
-
|
438
|
-
y[...,
|
439
|
-
y[...,
|
440
|
-
y[...,
|
266
|
+
x1, y1, x2, y2 = x[..., 0], x[..., 1], x[..., 2], x[..., 3]
|
267
|
+
y[..., 0] = (x1 + x2) / 2 # x center
|
268
|
+
y[..., 1] = (y1 + y2) / 2 # y center
|
269
|
+
y[..., 2] = x2 - x1 # width
|
270
|
+
y[..., 3] = y2 - y1 # height
|
441
271
|
return y
|
442
272
|
|
443
273
|
|
@@ -478,10 +308,12 @@ def xywhn2xyxy(x, w: int = 640, h: int = 640, padw: int = 0, padh: int = 0):
|
|
478
308
|
"""
|
479
309
|
assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
|
480
310
|
y = empty_like(x) # faster than clone/copy
|
481
|
-
|
482
|
-
|
483
|
-
y[...,
|
484
|
-
y[...,
|
311
|
+
xc, yc, xw, xh = x[..., 0], x[..., 1], x[..., 2], x[..., 3]
|
312
|
+
half_w, half_h = xw / 2, xh / 2
|
313
|
+
y[..., 0] = w * (xc - half_w) + padw # top left x
|
314
|
+
y[..., 1] = h * (yc - half_h) + padh # top left y
|
315
|
+
y[..., 2] = w * (xc + half_w) + padw # bottom right x
|
316
|
+
y[..., 3] = h * (yc + half_h) + padh # bottom right y
|
485
317
|
return y
|
486
318
|
|
487
319
|
|
@@ -504,10 +336,11 @@ def xyxy2xywhn(x, w: int = 640, h: int = 640, clip: bool = False, eps: float = 0
|
|
504
336
|
x = clip_boxes(x, (h - eps, w - eps))
|
505
337
|
assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
|
506
338
|
y = empty_like(x) # faster than clone/copy
|
507
|
-
|
508
|
-
y[...,
|
509
|
-
y[...,
|
510
|
-
y[...,
|
339
|
+
x1, y1, x2, y2 = x[..., 0], x[..., 1], x[..., 2], x[..., 3]
|
340
|
+
y[..., 0] = ((x1 + x2) / 2) / w # x center
|
341
|
+
y[..., 1] = ((y1 + y2) / 2) / h # y center
|
342
|
+
y[..., 2] = (x2 - x1) / w # width
|
343
|
+
y[..., 3] = (y2 - y1) / h # height
|
511
344
|
return y
|
512
345
|
|
513
346
|
|
@@ -756,19 +589,15 @@ def scale_masks(masks, shape, padding: bool = True):
|
|
756
589
|
"""
|
757
590
|
mh, mw = masks.shape[2:]
|
758
591
|
gain = min(mh / shape[0], mw / shape[1]) # gain = old / new
|
759
|
-
|
592
|
+
pad_w = mw - shape[1] * gain
|
593
|
+
pad_h = mh - shape[0] * gain
|
760
594
|
if padding:
|
761
|
-
|
762
|
-
|
763
|
-
top, left = (int(round(
|
764
|
-
bottom
|
765
|
-
|
766
|
-
|
767
|
-
)
|
768
|
-
masks = masks[..., top:bottom, left:right]
|
769
|
-
|
770
|
-
masks = F.interpolate(masks, shape, mode="bilinear", align_corners=False) # NCHW
|
771
|
-
return masks
|
595
|
+
pad_w /= 2
|
596
|
+
pad_h /= 2
|
597
|
+
top, left = (int(round(pad_h - 0.1)), int(round(pad_w - 0.1))) if padding else (0, 0)
|
598
|
+
bottom = mh - int(round(pad_h + 0.1))
|
599
|
+
right = mw - int(round(pad_w + 0.1))
|
600
|
+
return F.interpolate(masks[..., top:bottom, left:right], shape, mode="bilinear", align_corners=False) # NCHW masks
|
772
601
|
|
773
602
|
|
774
603
|
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize: bool = False, padding: bool = True):
|
@@ -776,9 +605,9 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize: bool
|
|
776
605
|
Rescale segment coordinates from img1_shape to img0_shape.
|
777
606
|
|
778
607
|
Args:
|
779
|
-
img1_shape (tuple):
|
608
|
+
img1_shape (tuple): Source image shape as HWC or HW (supports both).
|
780
609
|
coords (torch.Tensor): Coordinates to scale with shape (N, 2).
|
781
|
-
img0_shape (tuple):
|
610
|
+
img0_shape (tuple): Image 0 shape as HWC or HW (supports both).
|
782
611
|
ratio_pad (tuple, optional): Ratio and padding values as ((ratio_h, ratio_w), (pad_h, pad_w)).
|
783
612
|
normalize (bool): Whether to normalize coordinates to range [0, 1].
|
784
613
|
padding (bool): Whether coordinates are based on YOLO-style augmented images with padding.
|
@@ -786,9 +615,11 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize: bool
|
|
786
615
|
Returns:
|
787
616
|
(torch.Tensor): Scaled coordinates.
|
788
617
|
"""
|
618
|
+
img0_h, img0_w = img0_shape[:2] # supports both HWC or HW shapes
|
789
619
|
if ratio_pad is None: # calculate from img0_shape
|
790
|
-
|
791
|
-
|
620
|
+
img1_h, img1_w = img1_shape[:2] # supports both HWC or HW shapes
|
621
|
+
gain = min(img1_h / img0_h, img1_w / img0_w) # gain = old / new
|
622
|
+
pad = (img1_w - img0_w * gain) / 2, (img1_h - img0_h * gain) / 2 # wh padding
|
792
623
|
else:
|
793
624
|
gain = ratio_pad[0][0]
|
794
625
|
pad = ratio_pad[1]
|
@@ -800,8 +631,8 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize: bool
|
|
800
631
|
coords[..., 1] /= gain
|
801
632
|
coords = clip_coords(coords, img0_shape)
|
802
633
|
if normalize:
|
803
|
-
coords[..., 0] /=
|
804
|
-
coords[..., 1] /=
|
634
|
+
coords[..., 0] /= img0_w # width
|
635
|
+
coords[..., 1] /= img0_h # height
|
805
636
|
return coords
|
806
637
|
|
807
638
|
|
ultralytics/utils/tal.py
CHANGED
@@ -387,7 +387,7 @@ def dist2bbox(distance, anchor_points, xywh=True, dim=-1):
|
|
387
387
|
if xywh:
|
388
388
|
c_xy = (x1y1 + x2y2) / 2
|
389
389
|
wh = x2y2 - x1y1
|
390
|
-
return torch.cat(
|
390
|
+
return torch.cat([c_xy, wh], dim) # xywh bbox
|
391
391
|
return torch.cat((x1y1, x2y2), dim) # xyxy bbox
|
392
392
|
|
393
393
|
|
ultralytics/utils/torch_utils.py
CHANGED
@@ -250,68 +250,71 @@ def time_sync():
|
|
250
250
|
|
251
251
|
|
252
252
|
def fuse_conv_and_bn(conv, bn):
|
253
|
-
"""
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
groups=conv.groups,
|
263
|
-
bias=True,
|
264
|
-
)
|
265
|
-
.requires_grad_(False)
|
266
|
-
.to(conv.weight.device)
|
267
|
-
)
|
253
|
+
"""
|
254
|
+
Fuse Conv2d and BatchNorm2d layers for inference optimization.
|
255
|
+
|
256
|
+
Args:
|
257
|
+
conv (nn.Conv2d): Convolutional layer to fuse.
|
258
|
+
bn (nn.BatchNorm2d): Batch normalization layer to fuse.
|
259
|
+
|
260
|
+
Returns:
|
261
|
+
(nn.Conv2d): The fused convolutional layer with gradients disabled.
|
268
262
|
|
269
|
-
|
263
|
+
Example:
|
264
|
+
>>> conv = nn.Conv2d(3, 16, 3)
|
265
|
+
>>> bn = nn.BatchNorm2d(16)
|
266
|
+
>>> fused_conv = fuse_conv_and_bn(conv, bn)
|
267
|
+
"""
|
268
|
+
# Compute fused weights
|
270
269
|
w_conv = conv.weight.view(conv.out_channels, -1)
|
271
270
|
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
|
272
|
-
|
271
|
+
conv.weight.data = torch.mm(w_bn, w_conv).view(conv.weight.shape)
|
273
272
|
|
274
|
-
#
|
275
|
-
b_conv = (
|
276
|
-
torch.zeros(conv.weight.shape[0], dtype=conv.weight.dtype, device=conv.weight.device)
|
277
|
-
if conv.bias is None
|
278
|
-
else conv.bias
|
279
|
-
)
|
273
|
+
# Compute fused bias
|
274
|
+
b_conv = torch.zeros(conv.out_channels, device=conv.weight.device) if conv.bias is None else conv.bias
|
280
275
|
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
|
281
|
-
|
276
|
+
fused_bias = torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn
|
277
|
+
|
278
|
+
if conv.bias is None:
|
279
|
+
conv.register_parameter("bias", nn.Parameter(fused_bias))
|
280
|
+
else:
|
281
|
+
conv.bias.data = fused_bias
|
282
282
|
|
283
|
-
return
|
283
|
+
return conv.requires_grad_(False)
|
284
284
|
|
285
285
|
|
286
286
|
def fuse_deconv_and_bn(deconv, bn):
|
287
|
-
"""
|
288
|
-
|
289
|
-
nn.ConvTranspose2d(
|
290
|
-
deconv.in_channels,
|
291
|
-
deconv.out_channels,
|
292
|
-
kernel_size=deconv.kernel_size,
|
293
|
-
stride=deconv.stride,
|
294
|
-
padding=deconv.padding,
|
295
|
-
output_padding=deconv.output_padding,
|
296
|
-
dilation=deconv.dilation,
|
297
|
-
groups=deconv.groups,
|
298
|
-
bias=True,
|
299
|
-
)
|
300
|
-
.requires_grad_(False)
|
301
|
-
.to(deconv.weight.device)
|
302
|
-
)
|
287
|
+
"""
|
288
|
+
Fuse ConvTranspose2d and BatchNorm2d layers for inference optimization.
|
303
289
|
|
304
|
-
|
290
|
+
Args:
|
291
|
+
deconv (nn.ConvTranspose2d): Transposed convolutional layer to fuse.
|
292
|
+
bn (nn.BatchNorm2d): Batch normalization layer to fuse.
|
293
|
+
|
294
|
+
Returns:
|
295
|
+
(nn.ConvTranspose2d): The fused transposed convolutional layer with gradients disabled.
|
296
|
+
|
297
|
+
Example:
|
298
|
+
>>> deconv = nn.ConvTranspose2d(16, 3, 3)
|
299
|
+
>>> bn = nn.BatchNorm2d(3)
|
300
|
+
>>> fused_deconv = fuse_deconv_and_bn(deconv, bn)
|
301
|
+
"""
|
302
|
+
# Compute fused weights
|
305
303
|
w_deconv = deconv.weight.view(deconv.out_channels, -1)
|
306
304
|
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
|
307
|
-
|
305
|
+
deconv.weight.data = torch.mm(w_bn, w_deconv).view(deconv.weight.shape)
|
308
306
|
|
309
|
-
#
|
310
|
-
b_conv = torch.zeros(deconv.
|
307
|
+
# Compute fused bias
|
308
|
+
b_conv = torch.zeros(deconv.out_channels, device=deconv.weight.device) if deconv.bias is None else deconv.bias
|
311
309
|
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
|
312
|
-
|
310
|
+
fused_bias = torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn
|
311
|
+
|
312
|
+
if deconv.bias is None:
|
313
|
+
deconv.register_parameter("bias", nn.Parameter(fused_bias))
|
314
|
+
else:
|
315
|
+
deconv.bias.data = fused_bias
|
313
316
|
|
314
|
-
return
|
317
|
+
return deconv.requires_grad_(False)
|
315
318
|
|
316
319
|
|
317
320
|
def model_info(model, detailed=False, verbose=True, imgsz=640):
|