dgenerate-ultralytics-headless 8.3.196__py3-none-any.whl → 8.3.198__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.198.dist-info}/METADATA +1 -1
- {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.198.dist-info}/RECORD +46 -45
- tests/test_engine.py +9 -1
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +0 -1
- ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
- ultralytics/cfg/default.yaml +96 -94
- ultralytics/cfg/trackers/botsort.yaml +16 -17
- ultralytics/cfg/trackers/bytetrack.yaml +9 -11
- ultralytics/data/augment.py +1 -1
- ultralytics/data/dataset.py +1 -1
- ultralytics/engine/exporter.py +36 -35
- ultralytics/engine/model.py +1 -2
- ultralytics/engine/predictor.py +1 -2
- ultralytics/engine/results.py +1 -1
- ultralytics/engine/trainer.py +8 -10
- ultralytics/engine/tuner.py +54 -32
- ultralytics/models/sam/modules/decoders.py +3 -3
- ultralytics/models/sam/modules/sam.py +5 -5
- ultralytics/models/sam/predict.py +11 -11
- ultralytics/models/yolo/classify/train.py +2 -7
- ultralytics/models/yolo/classify/val.py +2 -2
- ultralytics/models/yolo/detect/predict.py +1 -1
- ultralytics/models/yolo/detect/train.py +1 -11
- ultralytics/models/yolo/detect/val.py +4 -4
- ultralytics/models/yolo/obb/val.py +3 -3
- ultralytics/models/yolo/pose/predict.py +1 -1
- ultralytics/models/yolo/pose/train.py +0 -7
- ultralytics/models/yolo/pose/val.py +2 -2
- ultralytics/models/yolo/segment/predict.py +2 -2
- ultralytics/models/yolo/segment/train.py +0 -6
- ultralytics/models/yolo/segment/val.py +13 -11
- ultralytics/models/yolo/yoloe/val.py +1 -1
- ultralytics/nn/modules/block.py +1 -1
- ultralytics/nn/modules/head.py +1 -2
- ultralytics/nn/tasks.py +2 -2
- ultralytics/utils/checks.py +1 -1
- ultralytics/utils/loss.py +1 -2
- ultralytics/utils/metrics.py +6 -6
- ultralytics/utils/nms.py +8 -14
- ultralytics/utils/plotting.py +22 -36
- ultralytics/utils/torch_utils.py +9 -27
- {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.198.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.198.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.198.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.198.dist-info}/top_level.txt +0 -0
@@ -423,7 +423,7 @@ class Predictor(BasePredictor):
|
|
423
423
|
pred_masks.append(crop_masks)
|
424
424
|
pred_bboxes.append(crop_bboxes)
|
425
425
|
pred_scores.append(crop_scores)
|
426
|
-
region_areas.append(area.expand(
|
426
|
+
region_areas.append(area.expand(crop_masks.shape[0]))
|
427
427
|
|
428
428
|
pred_masks = torch.cat(pred_masks)
|
429
429
|
pred_bboxes = torch.cat(pred_bboxes)
|
@@ -504,14 +504,14 @@ class Predictor(BasePredictor):
|
|
504
504
|
# (N, 1, H, W), (N, 1)
|
505
505
|
pred_masks, pred_scores = preds[:2]
|
506
506
|
pred_bboxes = preds[2] if self.segment_all else None
|
507
|
-
names = dict(enumerate(str(i) for i in range(
|
507
|
+
names = dict(enumerate(str(i) for i in range(pred_masks.shape[0])))
|
508
508
|
|
509
509
|
if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
|
510
510
|
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
|
511
511
|
|
512
512
|
results = []
|
513
513
|
for masks, orig_img, img_path in zip([pred_masks], orig_imgs, self.batch[0]):
|
514
|
-
if
|
514
|
+
if masks.shape[0] == 0:
|
515
515
|
masks, pred_bboxes = None, torch.zeros((0, 6), device=pred_masks.device)
|
516
516
|
else:
|
517
517
|
masks = ops.scale_masks(masks[None].float(), orig_img.shape[:2], padding=False)[0]
|
@@ -521,7 +521,7 @@ class Predictor(BasePredictor):
|
|
521
521
|
else:
|
522
522
|
pred_bboxes = batched_mask_to_box(masks)
|
523
523
|
# NOTE: SAM models do not return cls info. This `cls` here is just a placeholder for consistency.
|
524
|
-
cls = torch.arange(
|
524
|
+
cls = torch.arange(pred_masks.shape[0], dtype=torch.int32, device=pred_masks.device)
|
525
525
|
idx = pred_scores > self.args.conf
|
526
526
|
pred_bboxes = torch.cat([pred_bboxes, pred_scores[:, None], cls[:, None]], dim=-1)[idx]
|
527
527
|
masks = masks[idx]
|
@@ -633,7 +633,7 @@ class Predictor(BasePredictor):
|
|
633
633
|
"""
|
634
634
|
import torchvision # scope for faster 'import ultralytics'
|
635
635
|
|
636
|
-
if
|
636
|
+
if masks.shape[0] == 0:
|
637
637
|
return masks
|
638
638
|
|
639
639
|
# Filter small disconnected regions and holes
|
@@ -693,14 +693,14 @@ class Predictor(BasePredictor):
|
|
693
693
|
dst_shape = dst_shape or (self.args.imgsz, self.args.imgsz)
|
694
694
|
prompts = self._prepare_prompts(dst_shape, src_shape, bboxes, points, labels, masks)
|
695
695
|
pred_masks, pred_scores = self._inference_features(features, *prompts, multimask_output)
|
696
|
-
if
|
696
|
+
if pred_masks.shape[0] == 0:
|
697
697
|
pred_masks, pred_bboxes = None, torch.zeros((0, 6), device=pred_masks.device)
|
698
698
|
else:
|
699
699
|
pred_masks = ops.scale_masks(pred_masks[None].float(), src_shape, padding=False)[0]
|
700
700
|
pred_masks = pred_masks > self.model.mask_threshold # to bool
|
701
701
|
pred_bboxes = batched_mask_to_box(pred_masks)
|
702
702
|
# NOTE: SAM models do not return cls info. This `cls` here is just a placeholder for consistency.
|
703
|
-
cls = torch.arange(
|
703
|
+
cls = torch.arange(pred_masks.shape[0], dtype=torch.int32, device=pred_masks.device)
|
704
704
|
pred_bboxes = torch.cat([pred_bboxes, pred_scores[:, None], cls[:, None]], dim=-1)
|
705
705
|
return pred_masks, pred_bboxes
|
706
706
|
|
@@ -770,7 +770,7 @@ class SAM2Predictor(Predictor):
|
|
770
770
|
bboxes, points, labels, masks = super()._prepare_prompts(dst_shape, src_shape, bboxes, points, labels, masks)
|
771
771
|
if bboxes is not None:
|
772
772
|
bboxes = bboxes.view(-1, 2, 2)
|
773
|
-
bbox_labels = torch.tensor([[2, 3]], dtype=torch.int32, device=bboxes.device).expand(
|
773
|
+
bbox_labels = torch.tensor([[2, 3]], dtype=torch.int32, device=bboxes.device).expand(bboxes.shape[0], -1)
|
774
774
|
# NOTE: merge "boxes" and "points" into a single "points" input
|
775
775
|
# (where boxes are added at the beginning) to model.sam_prompt_encoder
|
776
776
|
if points is not None:
|
@@ -1025,7 +1025,7 @@ class SAM2VideoPredictor(SAM2Predictor):
|
|
1025
1025
|
pred_masks = current_out["pred_masks"].flatten(0, 1)
|
1026
1026
|
pred_masks = pred_masks[(pred_masks > self.model.mask_threshold).sum((1, 2)) > 0] # filter blank masks
|
1027
1027
|
|
1028
|
-
return pred_masks, torch.ones(
|
1028
|
+
return pred_masks, torch.ones(pred_masks.shape[0], dtype=pred_masks.dtype, device=pred_masks.device)
|
1029
1029
|
|
1030
1030
|
def postprocess(self, preds, img, orig_imgs):
|
1031
1031
|
"""
|
@@ -1465,7 +1465,7 @@ class SAM2VideoPredictor(SAM2Predictor):
|
|
1465
1465
|
else:
|
1466
1466
|
maskmem_pos_enc = model_constants["maskmem_pos_enc"]
|
1467
1467
|
# expand the cached maskmem_pos_enc to the actual batch size
|
1468
|
-
batch_size = out_maskmem_pos_enc[0].
|
1468
|
+
batch_size = out_maskmem_pos_enc[0].shape[0]
|
1469
1469
|
if batch_size > 1:
|
1470
1470
|
out_maskmem_pos_enc = [x.expand(batch_size, -1, -1, -1) for x in maskmem_pos_enc]
|
1471
1471
|
return out_maskmem_pos_enc
|
@@ -2028,7 +2028,7 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
|
|
2028
2028
|
point_inputs={"point_coords": point, "point_labels": label} if obj_idx is not None else None,
|
2029
2029
|
mask_inputs=mask,
|
2030
2030
|
multimask_output=False,
|
2031
|
-
high_res_features=[feat[: pix_feat_with_mem.
|
2031
|
+
high_res_features=[feat[: pix_feat_with_mem.shape[0]] for feat in self.high_res_features],
|
2032
2032
|
)
|
2033
2033
|
return {
|
2034
2034
|
"pred_masks": low_res_masks,
|
@@ -12,7 +12,7 @@ from ultralytics.engine.trainer import BaseTrainer
|
|
12
12
|
from ultralytics.models import yolo
|
13
13
|
from ultralytics.nn.tasks import ClassificationModel
|
14
14
|
from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
|
15
|
-
from ultralytics.utils.plotting import plot_images
|
15
|
+
from ultralytics.utils.plotting import plot_images
|
16
16
|
from ultralytics.utils.torch_utils import is_parallel, strip_optimizer, torch_distributed_zero_first
|
17
17
|
|
18
18
|
|
@@ -39,7 +39,6 @@ class ClassificationTrainer(BaseTrainer):
|
|
39
39
|
progress_string: Return a formatted string showing training progress.
|
40
40
|
get_validator: Return an instance of ClassificationValidator.
|
41
41
|
label_loss_items: Return a loss dict with labelled training loss items.
|
42
|
-
plot_metrics: Plot metrics from a CSV file.
|
43
42
|
final_eval: Evaluate trained model and save validation results.
|
44
43
|
plot_training_samples: Plot training samples with their annotations.
|
45
44
|
|
@@ -195,10 +194,6 @@ class ClassificationTrainer(BaseTrainer):
|
|
195
194
|
loss_items = [round(float(loss_items), 5)]
|
196
195
|
return dict(zip(keys, loss_items))
|
197
196
|
|
198
|
-
def plot_metrics(self):
|
199
|
-
"""Plot metrics from a CSV file."""
|
200
|
-
plot_results(file=self.csv, classify=True, on_plot=self.on_plot) # save results.png
|
201
|
-
|
202
197
|
def final_eval(self):
|
203
198
|
"""Evaluate trained model and save validation results."""
|
204
199
|
for f in self.last, self.best:
|
@@ -220,7 +215,7 @@ class ClassificationTrainer(BaseTrainer):
|
|
220
215
|
batch (dict[str, torch.Tensor]): Batch containing images and class labels.
|
221
216
|
ni (int): Number of iterations.
|
222
217
|
"""
|
223
|
-
batch["batch_idx"] = torch.arange(
|
218
|
+
batch["batch_idx"] = torch.arange(batch["img"].shape[0]) # add batch index for plotting
|
224
219
|
plot_images(
|
225
220
|
labels=batch,
|
226
221
|
fname=self.save_dir / f"train_batch{ni}.jpg",
|
@@ -178,7 +178,7 @@ class ClassificationValidator(BaseValidator):
|
|
178
178
|
>>> batch = {"img": torch.rand(16, 3, 224, 224), "cls": torch.randint(0, 10, (16,))}
|
179
179
|
>>> validator.plot_val_samples(batch, 0)
|
180
180
|
"""
|
181
|
-
batch["batch_idx"] = torch.arange(
|
181
|
+
batch["batch_idx"] = torch.arange(batch["img"].shape[0]) # add batch index for plotting
|
182
182
|
plot_images(
|
183
183
|
labels=batch,
|
184
184
|
fname=self.save_dir / f"val_batch{ni}_labels.jpg",
|
@@ -203,7 +203,7 @@ class ClassificationValidator(BaseValidator):
|
|
203
203
|
"""
|
204
204
|
batched_preds = dict(
|
205
205
|
img=batch["img"],
|
206
|
-
batch_idx=torch.arange(
|
206
|
+
batch_idx=torch.arange(batch["img"].shape[0]),
|
207
207
|
cls=torch.argmax(preds, dim=1),
|
208
208
|
)
|
209
209
|
plot_images(
|
@@ -89,7 +89,7 @@ class DetectionPredictor(BasePredictor):
|
|
89
89
|
obj_feats = torch.cat(
|
90
90
|
[x.permute(0, 2, 3, 1).reshape(x.shape[0], -1, s, x.shape[1] // s).mean(dim=-1) for x in feat_maps], dim=1
|
91
91
|
) # mean reduce all vectors to same length
|
92
|
-
return [feats[idx] if
|
92
|
+
return [feats[idx] if idx.shape[0] else [] for feats, idx in zip(obj_feats, idxs)] # for each img in batch
|
93
93
|
|
94
94
|
def construct_results(self, preds, img, orig_imgs):
|
95
95
|
"""
|
@@ -17,7 +17,7 @@ from ultralytics.models import yolo
|
|
17
17
|
from ultralytics.nn.tasks import DetectionModel
|
18
18
|
from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
|
19
19
|
from ultralytics.utils.patches import override_configs
|
20
|
-
from ultralytics.utils.plotting import plot_images, plot_labels
|
20
|
+
from ultralytics.utils.plotting import plot_images, plot_labels
|
21
21
|
from ultralytics.utils.torch_utils import torch_distributed_zero_first, unwrap_model
|
22
22
|
|
23
23
|
|
@@ -43,7 +43,6 @@ class DetectionTrainer(BaseTrainer):
|
|
43
43
|
label_loss_items: Return a loss dictionary with labeled training loss items.
|
44
44
|
progress_string: Return a formatted string of training progress.
|
45
45
|
plot_training_samples: Plot training samples with their annotations.
|
46
|
-
plot_metrics: Plot metrics from a CSV file.
|
47
46
|
plot_training_labels: Create a labeled training plot of the YOLO model.
|
48
47
|
auto_batch: Calculate optimal batch size based on model memory requirements.
|
49
48
|
|
@@ -64,7 +63,6 @@ class DetectionTrainer(BaseTrainer):
|
|
64
63
|
_callbacks (list, optional): List of callback functions to be executed during training.
|
65
64
|
"""
|
66
65
|
super().__init__(cfg, overrides, _callbacks)
|
67
|
-
self.dynamic_tensors = ["batch_idx", "cls", "bboxes"]
|
68
66
|
|
69
67
|
def build_dataset(self, img_path: str, mode: str = "train", batch: int | None = None):
|
70
68
|
"""
|
@@ -138,10 +136,6 @@ class DetectionTrainer(BaseTrainer):
|
|
138
136
|
] # new shape (stretched to gs-multiple)
|
139
137
|
imgs = nn.functional.interpolate(imgs, size=ns, mode="bilinear", align_corners=False)
|
140
138
|
batch["img"] = imgs
|
141
|
-
|
142
|
-
if self.args.compile:
|
143
|
-
for k in self.dynamic_tensors:
|
144
|
-
torch._dynamo.maybe_mark_dynamic(batch[k], 0)
|
145
139
|
return batch
|
146
140
|
|
147
141
|
def set_model_attributes(self):
|
@@ -222,10 +216,6 @@ class DetectionTrainer(BaseTrainer):
|
|
222
216
|
on_plot=self.on_plot,
|
223
217
|
)
|
224
218
|
|
225
|
-
def plot_metrics(self):
|
226
|
-
"""Plot metrics from a CSV file."""
|
227
|
-
plot_results(file=self.csv, on_plot=self.on_plot) # save results.png
|
228
|
-
|
229
219
|
def plot_training_labels(self):
|
230
220
|
"""Create a labeled training plot of the YOLO model."""
|
231
221
|
boxes = np.concatenate([lb["bboxes"] for lb in self.train_loader.dataset.labels], 0)
|
@@ -146,7 +146,7 @@ class DetectionValidator(BaseValidator):
|
|
146
146
|
ori_shape = batch["ori_shape"][si]
|
147
147
|
imgsz = batch["img"].shape[2:]
|
148
148
|
ratio_pad = batch["ratio_pad"][si]
|
149
|
-
if
|
149
|
+
if cls.shape[0]:
|
150
150
|
bbox = ops.xywh2xyxy(bbox) * torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]] # target boxes
|
151
151
|
return {
|
152
152
|
"cls": cls,
|
@@ -185,7 +185,7 @@ class DetectionValidator(BaseValidator):
|
|
185
185
|
predn = self._prepare_pred(pred)
|
186
186
|
|
187
187
|
cls = pbatch["cls"].cpu().numpy()
|
188
|
-
no_pred =
|
188
|
+
no_pred = predn["cls"].shape[0] == 0
|
189
189
|
self.metrics.update_stats(
|
190
190
|
{
|
191
191
|
**self._process_batch(predn, pbatch),
|
@@ -268,8 +268,8 @@ class DetectionValidator(BaseValidator):
|
|
268
268
|
Returns:
|
269
269
|
(dict[str, np.ndarray]): Dictionary containing 'tp' key with correct prediction matrix of shape (N, 10) for 10 IoU levels.
|
270
270
|
"""
|
271
|
-
if
|
272
|
-
return {"tp": np.zeros((
|
271
|
+
if batch["cls"].shape[0] == 0 or preds["cls"].shape[0] == 0:
|
272
|
+
return {"tp": np.zeros((preds["cls"].shape[0], self.niou), dtype=bool)}
|
273
273
|
iou = box_iou(batch["bboxes"], preds["bboxes"])
|
274
274
|
return {"tp": self.match_predictions(preds["cls"], batch["cls"], iou).cpu().numpy()}
|
275
275
|
|
@@ -93,8 +93,8 @@ class OBBValidator(DetectionValidator):
|
|
93
93
|
>>> gt_cls = torch.randint(0, 5, (50,)) # 50 ground truth class labels
|
94
94
|
>>> correct_matrix = validator._process_batch(detections, gt_bboxes, gt_cls)
|
95
95
|
"""
|
96
|
-
if
|
97
|
-
return {"tp": np.zeros((
|
96
|
+
if batch["cls"].shape[0] == 0 or preds["cls"].shape[0] == 0:
|
97
|
+
return {"tp": np.zeros((preds["cls"].shape[0], self.niou), dtype=bool)}
|
98
98
|
iou = batch_probiou(batch["bboxes"], preds["bboxes"])
|
99
99
|
return {"tp": self.match_predictions(preds["cls"], batch["cls"], iou).cpu().numpy()}
|
100
100
|
|
@@ -134,7 +134,7 @@ class OBBValidator(DetectionValidator):
|
|
134
134
|
ori_shape = batch["ori_shape"][si]
|
135
135
|
imgsz = batch["img"].shape[2:]
|
136
136
|
ratio_pad = batch["ratio_pad"][si]
|
137
|
-
if
|
137
|
+
if cls.shape[0]:
|
138
138
|
bbox[..., :4].mul_(torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]]) # target boxes
|
139
139
|
return {
|
140
140
|
"cls": cls,
|
@@ -73,7 +73,7 @@ class PosePredictor(DetectionPredictor):
|
|
73
73
|
"""
|
74
74
|
result = super().construct_result(pred, img, orig_img, img_path)
|
75
75
|
# Extract keypoints from prediction and reshape according to model's keypoint shape
|
76
|
-
pred_kpts = pred[:, 6:].view(
|
76
|
+
pred_kpts = pred[:, 6:].view(pred.shape[0], *self.model.kpt_shape)
|
77
77
|
# Scale keypoints coordinates to match the original image dimensions
|
78
78
|
pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, orig_img.shape)
|
79
79
|
result.update(keypoints=pred_kpts)
|
@@ -9,7 +9,6 @@ from typing import Any
|
|
9
9
|
from ultralytics.models import yolo
|
10
10
|
from ultralytics.nn.tasks import PoseModel
|
11
11
|
from ultralytics.utils import DEFAULT_CFG, LOGGER
|
12
|
-
from ultralytics.utils.plotting import plot_results
|
13
12
|
|
14
13
|
|
15
14
|
class PoseTrainer(yolo.detect.DetectionTrainer):
|
@@ -30,7 +29,6 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
30
29
|
set_model_attributes: Set keypoints shape attribute on the model.
|
31
30
|
get_validator: Create a validator instance for model evaluation.
|
32
31
|
plot_training_samples: Visualize training samples with keypoints.
|
33
|
-
plot_metrics: Generate and save training/validation metric plots.
|
34
32
|
get_dataset: Retrieve the dataset and ensure it contains required kpt_shape key.
|
35
33
|
|
36
34
|
Examples:
|
@@ -57,7 +55,6 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
57
55
|
overrides = {}
|
58
56
|
overrides["task"] = "pose"
|
59
57
|
super().__init__(cfg, overrides, _callbacks)
|
60
|
-
self.dynamic_tensors = ["batch_idx", "cls", "bboxes", "keypoints"]
|
61
58
|
|
62
59
|
if isinstance(self.args.device, str) and self.args.device.lower() == "mps":
|
63
60
|
LOGGER.warning(
|
@@ -102,10 +99,6 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
102
99
|
self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
|
103
100
|
)
|
104
101
|
|
105
|
-
def plot_metrics(self):
|
106
|
-
"""Plot training/validation metrics."""
|
107
|
-
plot_results(file=self.csv, pose=True, on_plot=self.on_plot) # save results.png
|
108
|
-
|
109
102
|
def get_dataset(self) -> dict[str, Any]:
|
110
103
|
"""
|
111
104
|
Retrieve the dataset and ensure it contains the required `kpt_shape` key.
|
@@ -192,8 +192,8 @@ class PoseValidator(DetectionValidator):
|
|
192
192
|
"""
|
193
193
|
tp = super()._process_batch(preds, batch)
|
194
194
|
gt_cls = batch["cls"]
|
195
|
-
if
|
196
|
-
tp_p = np.zeros((
|
195
|
+
if gt_cls.shape[0] == 0 or preds["cls"].shape[0] == 0:
|
196
|
+
tp_p = np.zeros((preds["cls"].shape[0], self.niou), dtype=bool)
|
197
197
|
else:
|
198
198
|
# `0.53` is from https://github.com/jin-s13/xtcocoapi/blob/master/xtcocotools/cocoeval.py#L384
|
199
199
|
area = ops.xyxy2xywh(batch["bboxes"])[:, 2:].prod(1) * 0.53
|
@@ -90,7 +90,7 @@ class SegmentationPredictor(DetectionPredictor):
|
|
90
90
|
Construct a single result object from the prediction.
|
91
91
|
|
92
92
|
Args:
|
93
|
-
pred (
|
93
|
+
pred (torch.Tensor): The predicted bounding boxes, scores, and masks.
|
94
94
|
img (torch.Tensor): The image after preprocessing.
|
95
95
|
orig_img (np.ndarray): The original image before preprocessing.
|
96
96
|
img_path (str): The path to the original image.
|
@@ -99,7 +99,7 @@ class SegmentationPredictor(DetectionPredictor):
|
|
99
99
|
Returns:
|
100
100
|
(Results): Result object containing the original image, image path, class names, bounding boxes, and masks.
|
101
101
|
"""
|
102
|
-
if
|
102
|
+
if pred.shape[0] == 0: # save empty boxes
|
103
103
|
masks = None
|
104
104
|
elif self.args.retina_masks:
|
105
105
|
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
|
@@ -8,7 +8,6 @@ from pathlib import Path
|
|
8
8
|
from ultralytics.models import yolo
|
9
9
|
from ultralytics.nn.tasks import SegmentationModel
|
10
10
|
from ultralytics.utils import DEFAULT_CFG, RANK
|
11
|
-
from ultralytics.utils.plotting import plot_results
|
12
11
|
|
13
12
|
|
14
13
|
class SegmentationTrainer(yolo.detect.DetectionTrainer):
|
@@ -41,7 +40,6 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
|
|
41
40
|
overrides = {}
|
42
41
|
overrides["task"] = "segment"
|
43
42
|
super().__init__(cfg, overrides, _callbacks)
|
44
|
-
self.dynamic_tensors = ["batch_idx", "cls", "bboxes", "masks"]
|
45
43
|
|
46
44
|
def get_model(self, cfg: dict | str | None = None, weights: str | Path | None = None, verbose: bool = True):
|
47
45
|
"""
|
@@ -72,7 +70,3 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
|
|
72
70
|
return yolo.segment.SegmentationValidator(
|
73
71
|
self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
|
74
72
|
)
|
75
|
-
|
76
|
-
def plot_metrics(self):
|
77
|
-
"""Plot training/validation metrics."""
|
78
|
-
plot_results(file=self.csv, segment=True, on_plot=self.on_plot) # save results.png
|
@@ -112,7 +112,7 @@ class SegmentationValidator(DetectionValidator):
|
|
112
112
|
coefficient = pred.pop("extra")
|
113
113
|
pred["masks"] = (
|
114
114
|
self.process(proto[i], coefficient, pred["bboxes"], shape=imgsz)
|
115
|
-
if
|
115
|
+
if coefficient.shape[0]
|
116
116
|
else torch.zeros(
|
117
117
|
(0, *(imgsz if self.process is ops.process_mask_native else proto.shape[2:])),
|
118
118
|
dtype=torch.uint8,
|
@@ -133,16 +133,18 @@ class SegmentationValidator(DetectionValidator):
|
|
133
133
|
(dict[str, Any]): Prepared batch with processed annotations.
|
134
134
|
"""
|
135
135
|
prepared_batch = super()._prepare_batch(si, batch)
|
136
|
-
nl =
|
136
|
+
nl = prepared_batch["cls"].shape[0]
|
137
137
|
if self.args.overlap_mask:
|
138
138
|
masks = batch["masks"][si]
|
139
139
|
index = torch.arange(1, nl + 1, device=masks.device).view(nl, 1, 1)
|
140
140
|
masks = (masks == index).float()
|
141
141
|
else:
|
142
142
|
masks = batch["masks"][batch["batch_idx"] == si]
|
143
|
-
if nl
|
144
|
-
|
145
|
-
masks
|
143
|
+
if nl:
|
144
|
+
mask_size = [s if self.process is ops.process_mask_native else s // 4 for s in prepared_batch["imgsz"]]
|
145
|
+
if masks.shape[1:] != mask_size:
|
146
|
+
masks = F.interpolate(masks[None], mask_size, mode="bilinear", align_corners=False)[0]
|
147
|
+
masks = masks.gt_(0.5)
|
146
148
|
prepared_batch["masks"] = masks
|
147
149
|
return prepared_batch
|
148
150
|
|
@@ -168,8 +170,8 @@ class SegmentationValidator(DetectionValidator):
|
|
168
170
|
"""
|
169
171
|
tp = super()._process_batch(preds, batch)
|
170
172
|
gt_cls = batch["cls"]
|
171
|
-
if
|
172
|
-
tp_m = np.zeros((
|
173
|
+
if gt_cls.shape[0] == 0 or preds["cls"].shape[0] == 0:
|
174
|
+
tp_m = np.zeros((preds["cls"].shape[0], self.niou), dtype=bool)
|
173
175
|
else:
|
174
176
|
iou = mask_iou(batch["masks"].flatten(1), preds["masks"].flatten(1))
|
175
177
|
tp_m = self.match_predictions(preds["cls"], gt_cls, iou).cpu().numpy()
|
@@ -187,10 +189,10 @@ class SegmentationValidator(DetectionValidator):
|
|
187
189
|
"""
|
188
190
|
for p in preds:
|
189
191
|
masks = p["masks"]
|
190
|
-
if masks.shape[0] >
|
191
|
-
LOGGER.warning("Limiting validation plots to
|
192
|
-
p["masks"] = torch.as_tensor(masks[:
|
193
|
-
super().plot_predictions(batch, preds, ni, max_det=
|
192
|
+
if masks.shape[0] > self.args.max_det:
|
193
|
+
LOGGER.warning(f"Limiting validation plots to 'max_det={self.args.max_det}' items.")
|
194
|
+
p["masks"] = torch.as_tensor(masks[: self.args.max_det], dtype=torch.uint8).cpu()
|
195
|
+
super().plot_predictions(batch, preds, ni, max_det=self.args.max_det) # plot bboxes
|
194
196
|
|
195
197
|
def save_one_txt(self, predn: torch.Tensor, save_conf: bool, shape: tuple[int, int], file: Path) -> None:
|
196
198
|
"""
|
@@ -89,7 +89,7 @@ class YOLOEDetectValidator(DetectionValidator):
|
|
89
89
|
for i in range(preds.shape[0]):
|
90
90
|
cls = batch["cls"][batch_idx == i].squeeze(-1).to(torch.int).unique(sorted=True)
|
91
91
|
pad_cls = torch.ones(preds.shape[1], device=self.device) * -1
|
92
|
-
pad_cls[:
|
92
|
+
pad_cls[: cls.shape[0]] = cls
|
93
93
|
for c in cls:
|
94
94
|
visual_pe[c] += preds[i][pad_cls == c].sum(0) / cls_visual_num[c]
|
95
95
|
|
ultralytics/nn/modules/block.py
CHANGED
@@ -1921,7 +1921,7 @@ class A2C2f(nn.Module):
|
|
1921
1921
|
y.extend(m(y[-1]) for m in self.m)
|
1922
1922
|
y = self.cv2(torch.cat(y, 1))
|
1923
1923
|
if self.gamma is not None:
|
1924
|
-
return x + self.gamma.view(-1,
|
1924
|
+
return x + self.gamma.view(-1, self.gamma.shape[0], 1, 1) * y
|
1925
1925
|
return y
|
1926
1926
|
|
1927
1927
|
|
ultralytics/nn/modules/head.py
CHANGED
@@ -13,7 +13,7 @@ from torch.nn.init import constant_, xavier_uniform_
|
|
13
13
|
|
14
14
|
from ultralytics.utils import NOT_MACOS14
|
15
15
|
from ultralytics.utils.tal import TORCH_1_10, dist2bbox, dist2rbox, make_anchors
|
16
|
-
from ultralytics.utils.torch_utils import
|
16
|
+
from ultralytics.utils.torch_utils import fuse_conv_and_bn, smart_inference_mode
|
17
17
|
|
18
18
|
from .block import DFL, SAVPE, BNContrastiveHead, ContrastiveHead, Proto, Residual, SwiGLUFFN
|
19
19
|
from .conv import Conv, DWConv
|
@@ -149,7 +149,6 @@ class Detect(nn.Module):
|
|
149
149
|
y = self.postprocess(y.permute(0, 2, 1), self.max_det, self.nc)
|
150
150
|
return y if self.export else (y, {"one2many": x, "one2one": one2one})
|
151
151
|
|
152
|
-
@disable_dynamo
|
153
152
|
def _inference(self, x: list[torch.Tensor]) -> torch.Tensor:
|
154
153
|
"""
|
155
154
|
Decode predicted bounding boxes and class probabilities based on multiple-level feature maps.
|
ultralytics/nn/tasks.py
CHANGED
@@ -766,7 +766,7 @@ class RTDETRDetectionModel(DetectionModel):
|
|
766
766
|
|
767
767
|
img = batch["img"]
|
768
768
|
# NOTE: preprocess gt_bbox and gt_labels to list.
|
769
|
-
bs =
|
769
|
+
bs = img.shape[0]
|
770
770
|
batch_idx = batch["batch_idx"]
|
771
771
|
gt_groups = [(batch_idx == i).sum().item() for i in range(bs)]
|
772
772
|
targets = {
|
@@ -923,7 +923,7 @@ class WorldModel(DetectionModel):
|
|
923
923
|
(torch.Tensor): Model's output tensor.
|
924
924
|
"""
|
925
925
|
txt_feats = (self.txt_feats if txt_feats is None else txt_feats).to(device=x.device, dtype=x.dtype)
|
926
|
-
if
|
926
|
+
if txt_feats.shape[0] != x.shape[0] or self.model[-1].export:
|
927
927
|
txt_feats = txt_feats.expand(x.shape[0], -1, -1)
|
928
928
|
ori_txt_feats = txt_feats.clone()
|
929
929
|
y, dt, embeddings = [], [], [] # outputs
|
ultralytics/utils/checks.py
CHANGED
@@ -907,7 +907,7 @@ def is_intel():
|
|
907
907
|
try:
|
908
908
|
result = subprocess.run(["xpu-smi", "discovery"], capture_output=True, text=True, timeout=5)
|
909
909
|
return "intel" in result.stdout.lower()
|
910
|
-
except
|
910
|
+
except Exception: # broad clause to capture all Intel GPU exception types
|
911
911
|
return False
|
912
912
|
|
913
913
|
|
ultralytics/utils/loss.py
CHANGED
@@ -11,7 +11,7 @@ import torch.nn.functional as F
|
|
11
11
|
from ultralytics.utils.metrics import OKS_SIGMA
|
12
12
|
from ultralytics.utils.ops import crop_mask, xywh2xyxy, xyxy2xywh
|
13
13
|
from ultralytics.utils.tal import RotatedTaskAlignedAssigner, TaskAlignedAssigner, dist2bbox, dist2rbox, make_anchors
|
14
|
-
from ultralytics.utils.torch_utils import autocast
|
14
|
+
from ultralytics.utils.torch_utils import autocast
|
15
15
|
|
16
16
|
from .metrics import bbox_iou, probiou
|
17
17
|
from .tal import bbox2dist
|
@@ -215,7 +215,6 @@ class v8DetectionLoss:
|
|
215
215
|
self.assigner = TaskAlignedAssigner(topk=tal_topk, num_classes=self.nc, alpha=0.5, beta=6.0)
|
216
216
|
self.bbox_loss = BboxLoss(m.reg_max).to(device)
|
217
217
|
self.proj = torch.arange(m.reg_max, dtype=torch.float, device=device)
|
218
|
-
disable_dynamo(self.__class__) # exclude from compile
|
219
218
|
|
220
219
|
def preprocess(self, targets: torch.Tensor, batch_size: int, scale_tensor: torch.Tensor) -> torch.Tensor:
|
221
220
|
"""Preprocess targets by converting to tensor format and scaling coordinates."""
|
ultralytics/utils/metrics.py
CHANGED
@@ -397,11 +397,11 @@ class ConfusionMatrix(DataExportMixin):
|
|
397
397
|
gt_cls, gt_bboxes = batch["cls"], batch["bboxes"]
|
398
398
|
if self.matches is not None: # only if visualization is enabled
|
399
399
|
self.matches = {k: defaultdict(list) for k in {"TP", "FP", "FN", "GT"}}
|
400
|
-
for i in range(
|
400
|
+
for i in range(gt_cls.shape[0]):
|
401
401
|
self._append_matches("GT", batch, i) # store GT
|
402
402
|
is_obb = gt_bboxes.shape[1] == 5 # check if boxes contains angle for OBB
|
403
403
|
conf = 0.25 if conf in {None, 0.01 if is_obb else 0.001} else conf # apply 0.25 if default val conf is passed
|
404
|
-
no_pred =
|
404
|
+
no_pred = detections["cls"].shape[0] == 0
|
405
405
|
if gt_cls.shape[0] == 0: # Check if labels is empty
|
406
406
|
if not no_pred:
|
407
407
|
detections = {k: detections[k][detections["conf"] > conf] for k in detections}
|
@@ -491,13 +491,13 @@ class ConfusionMatrix(DataExportMixin):
|
|
491
491
|
for i, mtype in enumerate(["GT", "FP", "TP", "FN"]):
|
492
492
|
mbatch = self.matches[mtype]
|
493
493
|
if "conf" not in mbatch:
|
494
|
-
mbatch["conf"] = torch.tensor([1.0] *
|
495
|
-
mbatch["batch_idx"] = torch.ones(
|
494
|
+
mbatch["conf"] = torch.tensor([1.0] * mbatch["bboxes"].shape[0], device=img.device)
|
495
|
+
mbatch["batch_idx"] = torch.ones(mbatch["bboxes"].shape[0], device=img.device) * i
|
496
496
|
for k in mbatch.keys():
|
497
497
|
labels[k] += mbatch[k]
|
498
498
|
|
499
499
|
labels = {k: torch.stack(v, 0) if len(v) else v for k, v in labels.items()}
|
500
|
-
if self.task != "obb" and
|
500
|
+
if self.task != "obb" and labels["bboxes"].shape[0]:
|
501
501
|
labels["bboxes"] = xyxy2xywh(labels["bboxes"])
|
502
502
|
(save_dir / "visualizations").mkdir(parents=True, exist_ok=True)
|
503
503
|
plot_images(
|
@@ -980,7 +980,7 @@ class Metric(SimpleClass):
|
|
980
980
|
|
981
981
|
def fitness(self) -> float:
|
982
982
|
"""Return model fitness as a weighted combination of metrics."""
|
983
|
-
w = [0.0, 0.0, 0.
|
983
|
+
w = [0.0, 0.0, 0.0, 1.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
|
984
984
|
return (np.nan_to_num(np.array(self.mean_results())) * w).sum()
|
985
985
|
|
986
986
|
def update(self, results: tuple):
|
ultralytics/utils/nms.py
CHANGED
@@ -192,6 +192,7 @@ class TorchNMS:
|
|
192
192
|
iou_threshold: float,
|
193
193
|
use_triu: bool = True,
|
194
194
|
iou_func=box_iou,
|
195
|
+
exit_early: bool = True,
|
195
196
|
) -> torch.Tensor:
|
196
197
|
"""
|
197
198
|
Fast-NMS implementation from https://arxiv.org/pdf/1904.02689 using upper triangular matrix operations.
|
@@ -202,6 +203,7 @@ class TorchNMS:
|
|
202
203
|
iou_threshold (float): IoU threshold for suppression.
|
203
204
|
use_triu (bool): Whether to use torch.triu operator for upper triangular matrix operations.
|
204
205
|
iou_func (callable): Function to compute IoU between boxes.
|
206
|
+
exit_early (bool): Whether to exit early if there are no boxes.
|
205
207
|
|
206
208
|
Returns:
|
207
209
|
(torch.Tensor): Indices of boxes to keep after NMS.
|
@@ -212,7 +214,7 @@ class TorchNMS:
|
|
212
214
|
>>> scores = torch.tensor([0.9, 0.8])
|
213
215
|
>>> keep = TorchNMS.nms(boxes, scores, 0.5)
|
214
216
|
"""
|
215
|
-
if boxes.numel() == 0:
|
217
|
+
if boxes.numel() == 0 and exit_early:
|
216
218
|
return torch.empty((0,), dtype=torch.int64, device=boxes.device)
|
217
219
|
|
218
220
|
sorted_idx = torch.argsort(scores, descending=True)
|
@@ -261,12 +263,11 @@ class TorchNMS:
|
|
261
263
|
areas = (x2 - x1) * (y2 - y1)
|
262
264
|
|
263
265
|
# Sort by scores descending
|
264
|
-
|
266
|
+
order = scores.argsort(0, descending=True)
|
265
267
|
|
266
268
|
# Pre-allocate keep list with maximum possible size
|
267
269
|
keep = torch.zeros(order.numel(), dtype=torch.int64, device=boxes.device)
|
268
270
|
keep_idx = 0
|
269
|
-
|
270
271
|
while order.numel() > 0:
|
271
272
|
i = order[0]
|
272
273
|
keep[keep_idx] = i
|
@@ -274,7 +275,6 @@ class TorchNMS:
|
|
274
275
|
|
275
276
|
if order.numel() == 1:
|
276
277
|
break
|
277
|
-
|
278
278
|
# Vectorized IoU calculation for remaining boxes
|
279
279
|
rest = order[1:]
|
280
280
|
xx1 = torch.maximum(x1[i], x1[rest])
|
@@ -286,20 +286,14 @@ class TorchNMS:
|
|
286
286
|
w = (xx2 - xx1).clamp_(min=0)
|
287
287
|
h = (yy2 - yy1).clamp_(min=0)
|
288
288
|
inter = w * h
|
289
|
-
|
290
|
-
# Early termination: skip IoU calculation if no intersection
|
289
|
+
# Early exit: skip IoU calculation if no intersection
|
291
290
|
if inter.sum() == 0:
|
292
291
|
# No overlaps with current box, keep all remaining boxes
|
293
|
-
|
294
|
-
|
295
|
-
keep_idx += remaining_count
|
296
|
-
break
|
297
|
-
|
292
|
+
order = rest
|
293
|
+
continue
|
298
294
|
iou = inter / (areas[i] + areas[rest] - inter)
|
299
|
-
|
300
295
|
# Keep boxes with IoU <= threshold
|
301
|
-
|
302
|
-
order = rest[mask]
|
296
|
+
order = rest[iou <= iou_threshold]
|
303
297
|
|
304
298
|
return keep[:keep_idx]
|
305
299
|
|