dgenerate-ultralytics-headless 8.3.194__py3-none-any.whl → 8.3.195__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/METADATA +1 -2
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/RECORD +97 -96
- tests/test_python.py +1 -1
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +8 -8
- ultralytics/data/annotator.py +1 -1
- ultralytics/data/augment.py +75 -75
- ultralytics/data/base.py +12 -12
- ultralytics/data/converter.py +4 -4
- ultralytics/data/dataset.py +7 -7
- ultralytics/data/loaders.py +15 -15
- ultralytics/data/split_dota.py +10 -10
- ultralytics/data/utils.py +12 -12
- ultralytics/engine/model.py +13 -13
- ultralytics/engine/predictor.py +13 -13
- ultralytics/engine/results.py +21 -21
- ultralytics/hub/google/__init__.py +2 -2
- ultralytics/hub/session.py +7 -7
- ultralytics/models/fastsam/model.py +5 -5
- ultralytics/models/fastsam/predict.py +11 -11
- ultralytics/models/nas/model.py +1 -1
- ultralytics/models/rtdetr/predict.py +2 -2
- ultralytics/models/rtdetr/val.py +4 -4
- ultralytics/models/sam/amg.py +6 -6
- ultralytics/models/sam/build.py +9 -9
- ultralytics/models/sam/model.py +7 -7
- ultralytics/models/sam/modules/blocks.py +6 -6
- ultralytics/models/sam/modules/decoders.py +1 -1
- ultralytics/models/sam/modules/encoders.py +27 -27
- ultralytics/models/sam/modules/sam.py +4 -4
- ultralytics/models/sam/modules/tiny_encoder.py +18 -18
- ultralytics/models/sam/modules/utils.py +8 -8
- ultralytics/models/sam/predict.py +63 -63
- ultralytics/models/utils/loss.py +22 -22
- ultralytics/models/utils/ops.py +8 -8
- ultralytics/models/yolo/classify/predict.py +2 -2
- ultralytics/models/yolo/classify/train.py +8 -8
- ultralytics/models/yolo/classify/val.py +4 -4
- ultralytics/models/yolo/detect/predict.py +3 -3
- ultralytics/models/yolo/detect/train.py +6 -6
- ultralytics/models/yolo/detect/val.py +32 -32
- ultralytics/models/yolo/model.py +6 -6
- ultralytics/models/yolo/obb/train.py +1 -1
- ultralytics/models/yolo/obb/val.py +13 -13
- ultralytics/models/yolo/pose/val.py +11 -11
- ultralytics/models/yolo/segment/predict.py +4 -4
- ultralytics/models/yolo/segment/train.py +1 -1
- ultralytics/models/yolo/segment/val.py +14 -14
- ultralytics/models/yolo/world/train.py +9 -9
- ultralytics/models/yolo/world/train_world.py +1 -1
- ultralytics/models/yolo/yoloe/predict.py +4 -4
- ultralytics/models/yolo/yoloe/train.py +4 -4
- ultralytics/nn/autobackend.py +2 -2
- ultralytics/nn/modules/block.py +6 -6
- ultralytics/nn/modules/conv.py +2 -2
- ultralytics/nn/modules/head.py +4 -4
- ultralytics/nn/tasks.py +13 -13
- ultralytics/nn/text_model.py +3 -3
- ultralytics/solutions/ai_gym.py +2 -2
- ultralytics/solutions/analytics.py +3 -3
- ultralytics/solutions/config.py +5 -5
- ultralytics/solutions/distance_calculation.py +2 -2
- ultralytics/solutions/heatmap.py +1 -1
- ultralytics/solutions/instance_segmentation.py +4 -4
- ultralytics/solutions/object_counter.py +4 -4
- ultralytics/solutions/parking_management.py +7 -7
- ultralytics/solutions/queue_management.py +3 -3
- ultralytics/solutions/region_counter.py +4 -4
- ultralytics/solutions/similarity_search.py +2 -2
- ultralytics/solutions/solutions.py +48 -48
- ultralytics/solutions/streamlit_inference.py +1 -1
- ultralytics/solutions/trackzone.py +4 -4
- ultralytics/solutions/vision_eye.py +1 -1
- ultralytics/trackers/byte_tracker.py +11 -11
- ultralytics/trackers/utils/gmc.py +3 -3
- ultralytics/trackers/utils/matching.py +5 -5
- ultralytics/utils/autodevice.py +2 -2
- ultralytics/utils/benchmarks.py +10 -10
- ultralytics/utils/callbacks/clearml.py +1 -1
- ultralytics/utils/callbacks/comet.py +5 -5
- ultralytics/utils/checks.py +5 -5
- ultralytics/utils/cpu.py +90 -0
- ultralytics/utils/dist.py +1 -1
- ultralytics/utils/downloads.py +2 -2
- ultralytics/utils/export.py +5 -5
- ultralytics/utils/instance.py +2 -2
- ultralytics/utils/metrics.py +35 -35
- ultralytics/utils/nms.py +4 -4
- ultralytics/utils/ops.py +1 -1
- ultralytics/utils/patches.py +2 -2
- ultralytics/utils/plotting.py +9 -9
- ultralytics/utils/torch_utils.py +2 -6
- ultralytics/utils/triton.py +5 -5
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/top_level.txt +0 -0
@@ -29,7 +29,7 @@ class DetectionTrainer(BaseTrainer):
|
|
29
29
|
|
30
30
|
Attributes:
|
31
31
|
model (DetectionModel): The YOLO detection model being trained.
|
32
|
-
data (
|
32
|
+
data (dict): Dictionary containing dataset information including class names and number of classes.
|
33
33
|
loss_names (tuple): Names of the loss components used in training (box_loss, cls_loss, dfl_loss).
|
34
34
|
|
35
35
|
Methods:
|
@@ -96,10 +96,10 @@ class DetectionTrainer(BaseTrainer):
|
|
96
96
|
Preprocess a batch of images by scaling and converting to float.
|
97
97
|
|
98
98
|
Args:
|
99
|
-
batch (
|
99
|
+
batch (dict): Dictionary containing batch data with 'img' tensor.
|
100
100
|
|
101
101
|
Returns:
|
102
|
-
(
|
102
|
+
(dict): Preprocessed batch with normalized images.
|
103
103
|
"""
|
104
104
|
batch["img"] = batch["img"].to(self.device, non_blocking=True).float() / 255
|
105
105
|
if self.args.multi_scale:
|
@@ -158,11 +158,11 @@ class DetectionTrainer(BaseTrainer):
|
|
158
158
|
Return a loss dict with labeled training loss items tensor.
|
159
159
|
|
160
160
|
Args:
|
161
|
-
loss_items (
|
161
|
+
loss_items (list[float], optional): List of loss values.
|
162
162
|
prefix (str): Prefix for keys in the returned dictionary.
|
163
163
|
|
164
164
|
Returns:
|
165
|
-
(
|
165
|
+
(dict | list): Dictionary of labeled loss items if loss_items is provided, otherwise list of keys.
|
166
166
|
"""
|
167
167
|
keys = [f"{prefix}/{x}" for x in self.loss_names]
|
168
168
|
if loss_items is not None:
|
@@ -186,7 +186,7 @@ class DetectionTrainer(BaseTrainer):
|
|
186
186
|
Plot training samples with their annotations.
|
187
187
|
|
188
188
|
Args:
|
189
|
-
batch (
|
189
|
+
batch (dict[str, Any]): Dictionary containing batch data.
|
190
190
|
ni (int): Number of iterations.
|
191
191
|
"""
|
192
192
|
plot_images(
|
@@ -27,13 +27,13 @@ class DetectionValidator(BaseValidator):
|
|
27
27
|
Attributes:
|
28
28
|
is_coco (bool): Whether the dataset is COCO.
|
29
29
|
is_lvis (bool): Whether the dataset is LVIS.
|
30
|
-
class_map (
|
30
|
+
class_map (list[int]): Mapping from model class indices to dataset class indices.
|
31
31
|
metrics (DetMetrics): Object detection metrics calculator.
|
32
32
|
iouv (torch.Tensor): IoU thresholds for mAP calculation.
|
33
33
|
niou (int): Number of IoU thresholds.
|
34
|
-
lb (
|
35
|
-
jdict (
|
36
|
-
stats (
|
34
|
+
lb (list[Any]): List for storing ground truth labels for hybrid saving.
|
35
|
+
jdict (list[dict[str, Any]]): List for storing JSON detection results.
|
36
|
+
stats (dict[str, list[torch.Tensor]]): Dictionary for storing statistics during validation.
|
37
37
|
|
38
38
|
Examples:
|
39
39
|
>>> from ultralytics.models.yolo.detect import DetectionValidator
|
@@ -49,8 +49,8 @@ class DetectionValidator(BaseValidator):
|
|
49
49
|
Args:
|
50
50
|
dataloader (torch.utils.data.DataLoader, optional): Dataloader to use for validation.
|
51
51
|
save_dir (Path, optional): Directory to save results.
|
52
|
-
args (
|
53
|
-
_callbacks (
|
52
|
+
args (dict[str, Any], optional): Arguments for the validator.
|
53
|
+
_callbacks (list[Any], optional): List of callback functions.
|
54
54
|
"""
|
55
55
|
super().__init__(dataloader, save_dir, args, _callbacks)
|
56
56
|
self.is_coco = False
|
@@ -66,10 +66,10 @@ class DetectionValidator(BaseValidator):
|
|
66
66
|
Preprocess batch of images for YOLO validation.
|
67
67
|
|
68
68
|
Args:
|
69
|
-
batch (
|
69
|
+
batch (dict[str, Any]): Batch containing images and annotations.
|
70
70
|
|
71
71
|
Returns:
|
72
|
-
(
|
72
|
+
(dict[str, Any]): Preprocessed batch.
|
73
73
|
"""
|
74
74
|
batch["img"] = batch["img"].to(self.device, non_blocking=True)
|
75
75
|
batch["img"] = (batch["img"].half() if self.args.half else batch["img"].float()) / 255
|
@@ -114,7 +114,7 @@ class DetectionValidator(BaseValidator):
|
|
114
114
|
preds (torch.Tensor): Raw predictions from the model.
|
115
115
|
|
116
116
|
Returns:
|
117
|
-
(
|
117
|
+
(list[dict[str, torch.Tensor]]): Processed predictions after NMS, where each dict contains
|
118
118
|
'bboxes', 'conf', 'cls', and 'extra' tensors.
|
119
119
|
"""
|
120
120
|
outputs = nms.non_max_suppression(
|
@@ -136,10 +136,10 @@ class DetectionValidator(BaseValidator):
|
|
136
136
|
|
137
137
|
Args:
|
138
138
|
si (int): Batch index.
|
139
|
-
batch (
|
139
|
+
batch (dict[str, Any]): Batch data containing images and annotations.
|
140
140
|
|
141
141
|
Returns:
|
142
|
-
(
|
142
|
+
(dict[str, Any]): Prepared batch with processed annotations.
|
143
143
|
"""
|
144
144
|
idx = batch["batch_idx"] == si
|
145
145
|
cls = batch["cls"][idx].squeeze(-1)
|
@@ -163,10 +163,10 @@ class DetectionValidator(BaseValidator):
|
|
163
163
|
Prepare predictions for evaluation against ground truth.
|
164
164
|
|
165
165
|
Args:
|
166
|
-
pred (
|
166
|
+
pred (dict[str, torch.Tensor]): Post-processed predictions from the model.
|
167
167
|
|
168
168
|
Returns:
|
169
|
-
(
|
169
|
+
(dict[str, torch.Tensor]): Prepared predictions in native space.
|
170
170
|
"""
|
171
171
|
if self.args.single_cls:
|
172
172
|
pred["cls"] *= 0
|
@@ -177,8 +177,8 @@ class DetectionValidator(BaseValidator):
|
|
177
177
|
Update metrics with new predictions and ground truth.
|
178
178
|
|
179
179
|
Args:
|
180
|
-
preds (
|
181
|
-
batch (
|
180
|
+
preds (list[dict[str, torch.Tensor]]): List of predictions from the model.
|
181
|
+
batch (dict[str, Any]): Batch data containing ground truth.
|
182
182
|
"""
|
183
183
|
for si, pred in enumerate(preds):
|
184
184
|
self.seen += 1
|
@@ -232,7 +232,7 @@ class DetectionValidator(BaseValidator):
|
|
232
232
|
Calculate and return metrics statistics.
|
233
233
|
|
234
234
|
Returns:
|
235
|
-
(
|
235
|
+
(dict[str, Any]): Dictionary containing metrics results.
|
236
236
|
"""
|
237
237
|
self.metrics.process(save_dir=self.save_dir, plot=self.args.plots, on_plot=self.on_plot)
|
238
238
|
self.metrics.clear_stats()
|
@@ -263,11 +263,11 @@ class DetectionValidator(BaseValidator):
|
|
263
263
|
Return correct prediction matrix.
|
264
264
|
|
265
265
|
Args:
|
266
|
-
preds (
|
267
|
-
batch (
|
266
|
+
preds (dict[str, torch.Tensor]): Dictionary containing prediction data with 'bboxes' and 'cls' keys.
|
267
|
+
batch (dict[str, Any]): Batch dictionary containing ground truth data with 'bboxes' and 'cls' keys.
|
268
268
|
|
269
269
|
Returns:
|
270
|
-
(
|
270
|
+
(dict[str, np.ndarray]): Dictionary containing 'tp' key with correct prediction matrix of shape (N, 10) for 10 IoU levels.
|
271
271
|
"""
|
272
272
|
if len(batch["cls"]) == 0 or len(preds["cls"]) == 0:
|
273
273
|
return {"tp": np.zeros((len(preds["cls"]), self.niou), dtype=bool)}
|
@@ -307,7 +307,7 @@ class DetectionValidator(BaseValidator):
|
|
307
307
|
Plot validation image samples.
|
308
308
|
|
309
309
|
Args:
|
310
|
-
batch (
|
310
|
+
batch (dict[str, Any]): Batch containing images and annotations.
|
311
311
|
ni (int): Batch index.
|
312
312
|
"""
|
313
313
|
plot_images(
|
@@ -325,8 +325,8 @@ class DetectionValidator(BaseValidator):
|
|
325
325
|
Plot predicted bounding boxes on input images and save the result.
|
326
326
|
|
327
327
|
Args:
|
328
|
-
batch (
|
329
|
-
preds (
|
328
|
+
batch (dict[str, Any]): Batch containing images and annotations.
|
329
|
+
preds (list[dict[str, torch.Tensor]]): List of predictions from the model.
|
330
330
|
ni (int): Batch index.
|
331
331
|
max_det (Optional[int]): Maximum number of detections to plot.
|
332
332
|
"""
|
@@ -352,9 +352,9 @@ class DetectionValidator(BaseValidator):
|
|
352
352
|
Save YOLO detections to a txt file in normalized coordinates in a specific format.
|
353
353
|
|
354
354
|
Args:
|
355
|
-
predn (
|
355
|
+
predn (dict[str, torch.Tensor]): Dictionary containing predictions with keys 'bboxes', 'conf', and 'cls'.
|
356
356
|
save_conf (bool): Whether to save confidence scores.
|
357
|
-
shape (
|
357
|
+
shape (tuple[int, int]): Shape of the original image (height, width).
|
358
358
|
file (Path): File path to save the detections.
|
359
359
|
"""
|
360
360
|
from ultralytics.engine.results import Results
|
@@ -371,9 +371,9 @@ class DetectionValidator(BaseValidator):
|
|
371
371
|
Serialize YOLO predictions to COCO json format.
|
372
372
|
|
373
373
|
Args:
|
374
|
-
predn (
|
374
|
+
predn (dict[str, torch.Tensor]): Predictions dictionary containing 'bboxes', 'conf', and 'cls' keys
|
375
375
|
with bounding box coordinates, confidence scores, and class predictions.
|
376
|
-
pbatch (
|
376
|
+
pbatch (dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
|
377
377
|
|
378
378
|
Examples:
|
379
379
|
>>> result = {
|
@@ -417,10 +417,10 @@ class DetectionValidator(BaseValidator):
|
|
417
417
|
Evaluate YOLO output in JSON format and return performance statistics.
|
418
418
|
|
419
419
|
Args:
|
420
|
-
stats (
|
420
|
+
stats (dict[str, Any]): Current statistics dictionary.
|
421
421
|
|
422
422
|
Returns:
|
423
|
-
(
|
423
|
+
(dict[str, Any]): Updated statistics dictionary with COCO/LVIS evaluation results.
|
424
424
|
"""
|
425
425
|
pred_json = self.save_dir / "predictions.json" # predictions
|
426
426
|
anno_json = (
|
@@ -446,16 +446,16 @@ class DetectionValidator(BaseValidator):
|
|
446
446
|
including mAP50, mAP50-95, and LVIS-specific metrics if applicable.
|
447
447
|
|
448
448
|
Args:
|
449
|
-
stats (
|
449
|
+
stats (dict[str, Any]): Dictionary to store computed metrics and statistics.
|
450
450
|
pred_json (str | Path]): Path to JSON file containing predictions in COCO format.
|
451
451
|
anno_json (str | Path]): Path to JSON file containing ground truth annotations in COCO format.
|
452
|
-
iou_types (str |
|
452
|
+
iou_types (str | list[str]]): IoU type(s) for evaluation. Can be single string or list of strings.
|
453
453
|
Common values include "bbox", "segm", "keypoints". Defaults to "bbox".
|
454
|
-
suffix (str |
|
454
|
+
suffix (str | list[str]]): Suffix to append to metric names in stats dictionary. Should correspond
|
455
455
|
to iou_types if multiple types provided. Defaults to "Box".
|
456
456
|
|
457
457
|
Returns:
|
458
|
-
(
|
458
|
+
(dict[str, Any]): Updated stats dictionary containing the computed COCO/LVIS evaluation metrics.
|
459
459
|
"""
|
460
460
|
if self.args.save_json and (self.is_coco or self.is_lvis) and len(self.jdict):
|
461
461
|
LOGGER.info(f"\nEvaluating faster-coco-eval mAP using {pred_json} and {anno_json}...")
|
ultralytics/models/yolo/model.py
CHANGED
@@ -185,7 +185,7 @@ class YOLOWorld(Model):
|
|
185
185
|
Set the model's class names for detection.
|
186
186
|
|
187
187
|
Args:
|
188
|
-
classes (
|
188
|
+
classes (list[str]): A list of categories i.e. ["person"].
|
189
189
|
"""
|
190
190
|
self.model.set_classes(classes)
|
191
191
|
# Remove background if it's given
|
@@ -299,8 +299,8 @@ class YOLOE(Model):
|
|
299
299
|
classification tasks. The model must be an instance of YOLOEModel.
|
300
300
|
|
301
301
|
Args:
|
302
|
-
vocab (
|
303
|
-
names (
|
302
|
+
vocab (list[str]): Vocabulary list containing tokens or words used by the model for text processing.
|
303
|
+
names (list[str]): List of class names that the model can detect or classify.
|
304
304
|
|
305
305
|
Raises:
|
306
306
|
AssertionError: If the model is not an instance of YOLOEModel.
|
@@ -322,7 +322,7 @@ class YOLOE(Model):
|
|
322
322
|
Set the model's class names and embeddings for detection.
|
323
323
|
|
324
324
|
Args:
|
325
|
-
classes (
|
325
|
+
classes (list[str]): A list of categories i.e. ["person"].
|
326
326
|
embeddings (torch.Tensor): Embeddings corresponding to the classes.
|
327
327
|
"""
|
328
328
|
assert isinstance(self.model, YOLOEModel)
|
@@ -381,7 +381,7 @@ class YOLOE(Model):
|
|
381
381
|
directory paths, URL/YouTube streams, PIL images, numpy arrays, or webcam indices.
|
382
382
|
stream (bool): Whether to stream the prediction results. If True, results are yielded as a
|
383
383
|
generator as they are computed.
|
384
|
-
visual_prompts (
|
384
|
+
visual_prompts (dict[str, list]): Dictionary containing visual prompts for the model. Must include
|
385
385
|
'bboxes' and 'cls' keys when non-empty.
|
386
386
|
refer_image (str | PIL.Image | np.ndarray, optional): Reference image for visual prompts.
|
387
387
|
predictor (callable, optional): Custom predictor function. If None, a predictor is automatically
|
@@ -389,7 +389,7 @@ class YOLOE(Model):
|
|
389
389
|
**kwargs (Any): Additional keyword arguments passed to the predictor.
|
390
390
|
|
391
391
|
Returns:
|
392
|
-
(
|
392
|
+
(list | generator): List of Results objects or generator of Results objects if stream=True.
|
393
393
|
|
394
394
|
Examples:
|
395
395
|
>>> model = YOLOE("yoloe-11s-seg.pt")
|
@@ -45,7 +45,7 @@ class OBBTrainer(yolo.detect.DetectionTrainer):
|
|
45
45
|
model configuration.
|
46
46
|
overrides (dict, optional): Dictionary of parameter overrides for the configuration. Any values here
|
47
47
|
will take precedence over those in cfg.
|
48
|
-
_callbacks (
|
48
|
+
_callbacks (list[Any], optional): List of callback functions to be invoked during training.
|
49
49
|
|
50
50
|
Examples:
|
51
51
|
>>> from ultralytics.models.yolo.obb import OBBTrainer
|
@@ -77,13 +77,13 @@ class OBBValidator(DetectionValidator):
|
|
77
77
|
Compute the correct prediction matrix for a batch of detections and ground truth bounding boxes.
|
78
78
|
|
79
79
|
Args:
|
80
|
-
preds (
|
80
|
+
preds (dict[str, torch.Tensor]): Prediction dictionary containing 'cls' and 'bboxes' keys with detected
|
81
81
|
class labels and bounding boxes.
|
82
|
-
batch (
|
82
|
+
batch (dict[str, torch.Tensor]): Batch dictionary containing 'cls' and 'bboxes' keys with ground truth
|
83
83
|
class labels and bounding boxes.
|
84
84
|
|
85
85
|
Returns:
|
86
|
-
(
|
86
|
+
(dict[str, np.ndarray]): Dictionary containing 'tp' key with the correct prediction matrix as a numpy
|
87
87
|
array with shape (N, 10), which includes 10 IoU levels for each detection, indicating the accuracy
|
88
88
|
of predictions compared to the ground truth.
|
89
89
|
|
@@ -104,7 +104,7 @@ class OBBValidator(DetectionValidator):
|
|
104
104
|
preds (torch.Tensor): Raw predictions from the model.
|
105
105
|
|
106
106
|
Returns:
|
107
|
-
(
|
107
|
+
(list[dict[str, torch.Tensor]]): Processed predictions with angle information concatenated to bboxes.
|
108
108
|
"""
|
109
109
|
preds = super().postprocess(preds)
|
110
110
|
for pred in preds:
|
@@ -117,7 +117,7 @@ class OBBValidator(DetectionValidator):
|
|
117
117
|
|
118
118
|
Args:
|
119
119
|
si (int): Batch index to process.
|
120
|
-
batch (
|
120
|
+
batch (dict[str, Any]): Dictionary containing batch data with keys:
|
121
121
|
- batch_idx: Tensor of batch indices
|
122
122
|
- cls: Tensor of class labels
|
123
123
|
- bboxes: Tensor of bounding boxes
|
@@ -126,7 +126,7 @@ class OBBValidator(DetectionValidator):
|
|
126
126
|
- ratio_pad: Ratio and padding information
|
127
127
|
|
128
128
|
Returns:
|
129
|
-
(
|
129
|
+
(dict[str, Any]): Prepared batch data with scaled bounding boxes and metadata.
|
130
130
|
"""
|
131
131
|
idx = batch["batch_idx"] == si
|
132
132
|
cls = batch["cls"][idx].squeeze(-1)
|
@@ -150,8 +150,8 @@ class OBBValidator(DetectionValidator):
|
|
150
150
|
Plot predicted bounding boxes on input images and save the result.
|
151
151
|
|
152
152
|
Args:
|
153
|
-
batch (
|
154
|
-
preds (
|
153
|
+
batch (dict[str, Any]): Batch data containing images, file paths, and other metadata.
|
154
|
+
preds (list[torch.Tensor]): List of prediction tensors for each image in the batch.
|
155
155
|
ni (int): Batch index used for naming the output file.
|
156
156
|
|
157
157
|
Examples:
|
@@ -170,9 +170,9 @@ class OBBValidator(DetectionValidator):
|
|
170
170
|
Convert YOLO predictions to COCO JSON format with rotated bounding box information.
|
171
171
|
|
172
172
|
Args:
|
173
|
-
predn (
|
173
|
+
predn (dict[str, torch.Tensor]): Prediction dictionary containing 'bboxes', 'conf', and 'cls' keys
|
174
174
|
with bounding box coordinates, confidence scores, and class predictions.
|
175
|
-
pbatch (
|
175
|
+
pbatch (dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
|
176
176
|
|
177
177
|
Notes:
|
178
178
|
This method processes rotated bounding box predictions and converts them to both rbox format
|
@@ -204,7 +204,7 @@ class OBBValidator(DetectionValidator):
|
|
204
204
|
predn (torch.Tensor): Predicted detections with shape (N, 7) containing bounding boxes, confidence scores,
|
205
205
|
class predictions, and angles in format (x, y, w, h, conf, cls, angle).
|
206
206
|
save_conf (bool): Whether to save confidence scores in the text file.
|
207
|
-
shape (
|
207
|
+
shape (tuple[int, int]): Original image shape in format (height, width).
|
208
208
|
file (Path): Output file path to save detections.
|
209
209
|
|
210
210
|
Examples:
|
@@ -237,10 +237,10 @@ class OBBValidator(DetectionValidator):
|
|
237
237
|
Evaluate YOLO output in JSON format and save predictions in DOTA format.
|
238
238
|
|
239
239
|
Args:
|
240
|
-
stats (
|
240
|
+
stats (dict[str, Any]): Performance statistics dictionary.
|
241
241
|
|
242
242
|
Returns:
|
243
|
-
(
|
243
|
+
(dict[str, Any]): Updated performance statistics.
|
244
244
|
"""
|
245
245
|
if self.args.save_json and self.is_dota and len(self.jdict):
|
246
246
|
import json
|
@@ -22,7 +22,7 @@ class PoseValidator(DetectionValidator):
|
|
22
22
|
|
23
23
|
Attributes:
|
24
24
|
sigma (np.ndarray): Sigma values for OKS calculation, either OKS_SIGMA or ones divided by number of keypoints.
|
25
|
-
kpt_shape (
|
25
|
+
kpt_shape (list[int]): Shape of the keypoints, typically [17, 3] for COCO format.
|
26
26
|
args (dict): Arguments for the validator including task set to "pose".
|
27
27
|
metrics (PoseMetrics): Metrics object for pose evaluation.
|
28
28
|
|
@@ -132,7 +132,7 @@ class PoseValidator(DetectionValidator):
|
|
132
132
|
bounding boxes, confidence scores, class predictions, and keypoint data.
|
133
133
|
|
134
134
|
Returns:
|
135
|
-
(
|
135
|
+
(dict[torch.Tensor]): Dict of processed prediction dictionaries, each containing:
|
136
136
|
- 'bboxes': Bounding box coordinates
|
137
137
|
- 'conf': Confidence scores
|
138
138
|
- 'cls': Class predictions
|
@@ -154,10 +154,10 @@ class PoseValidator(DetectionValidator):
|
|
154
154
|
|
155
155
|
Args:
|
156
156
|
si (int): Batch index.
|
157
|
-
batch (
|
157
|
+
batch (dict[str, Any]): Dictionary containing batch data with keys like 'keypoints', 'batch_idx', etc.
|
158
158
|
|
159
159
|
Returns:
|
160
|
-
(
|
160
|
+
(dict[str, Any]): Prepared batch with keypoints scaled to original image dimensions.
|
161
161
|
|
162
162
|
Notes:
|
163
163
|
This method extends the parent class's _prepare_batch method by adding keypoint processing.
|
@@ -177,13 +177,13 @@ class PoseValidator(DetectionValidator):
|
|
177
177
|
Return correct prediction matrix by computing Intersection over Union (IoU) between detections and ground truth.
|
178
178
|
|
179
179
|
Args:
|
180
|
-
preds (
|
180
|
+
preds (dict[str, torch.Tensor]): Dictionary containing prediction data with keys 'cls' for class predictions
|
181
181
|
and 'keypoints' for keypoint predictions.
|
182
|
-
batch (
|
182
|
+
batch (dict[str, Any]): Dictionary containing ground truth data with keys 'cls' for class labels,
|
183
183
|
'bboxes' for bounding boxes, and 'keypoints' for keypoint annotations.
|
184
184
|
|
185
185
|
Returns:
|
186
|
-
(
|
186
|
+
(dict[str, np.ndarray]): Dictionary containing the correct prediction matrix including 'tp_p' for pose
|
187
187
|
true positives across 10 IoU levels.
|
188
188
|
|
189
189
|
Notes:
|
@@ -207,9 +207,9 @@ class PoseValidator(DetectionValidator):
|
|
207
207
|
Save YOLO pose detections to a text file in normalized coordinates.
|
208
208
|
|
209
209
|
Args:
|
210
|
-
predn (
|
210
|
+
predn (dict[str, torch.Tensor]): Dictionary containing predictions with keys 'bboxes', 'conf', 'cls' and 'keypoints.
|
211
211
|
save_conf (bool): Whether to save confidence scores.
|
212
|
-
shape (
|
212
|
+
shape (tuple[int, int]): Shape of the original image (height, width).
|
213
213
|
file (Path): Output file path to save detections.
|
214
214
|
|
215
215
|
Notes:
|
@@ -234,9 +234,9 @@ class PoseValidator(DetectionValidator):
|
|
234
234
|
to COCO format, and appends the results to the internal JSON dictionary (self.jdict).
|
235
235
|
|
236
236
|
Args:
|
237
|
-
predn (
|
237
|
+
predn (dict[str, torch.Tensor]): Prediction dictionary containing 'bboxes', 'conf', 'cls',
|
238
238
|
and 'keypoints' tensors.
|
239
|
-
pbatch (
|
239
|
+
pbatch (dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
|
240
240
|
|
241
241
|
Notes:
|
242
242
|
The method extracts the image ID from the filename stem (either as an integer if numeric, or as a string),
|
@@ -71,13 +71,13 @@ class SegmentationPredictor(DetectionPredictor):
|
|
71
71
|
Construct a list of result objects from the predictions.
|
72
72
|
|
73
73
|
Args:
|
74
|
-
preds (
|
74
|
+
preds (list[torch.Tensor]): List of predicted bounding boxes, scores, and masks.
|
75
75
|
img (torch.Tensor): The image after preprocessing.
|
76
|
-
orig_imgs (
|
77
|
-
protos (
|
76
|
+
orig_imgs (list[np.ndarray]): List of original images before preprocessing.
|
77
|
+
protos (list[torch.Tensor]): List of prototype masks.
|
78
78
|
|
79
79
|
Returns:
|
80
|
-
(
|
80
|
+
(list[Results]): List of result objects containing the original images, image paths, class names,
|
81
81
|
bounding boxes, and masks.
|
82
82
|
"""
|
83
83
|
return [
|
@@ -19,7 +19,7 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
|
|
19
19
|
functionality including model initialization, validation, and visualization.
|
20
20
|
|
21
21
|
Attributes:
|
22
|
-
loss_names (
|
22
|
+
loss_names (tuple[str]): Names of the loss components used during training.
|
23
23
|
|
24
24
|
Examples:
|
25
25
|
>>> from ultralytics.models.yolo.segment import SegmentationTrainer
|
@@ -57,10 +57,10 @@ class SegmentationValidator(DetectionValidator):
|
|
57
57
|
Preprocess batch of images for YOLO segmentation validation.
|
58
58
|
|
59
59
|
Args:
|
60
|
-
batch (
|
60
|
+
batch (dict[str, Any]): Batch containing images and annotations.
|
61
61
|
|
62
62
|
Returns:
|
63
|
-
(
|
63
|
+
(dict[str, Any]): Preprocessed batch.
|
64
64
|
"""
|
65
65
|
batch = super().preprocess(batch)
|
66
66
|
batch["masks"] = batch["masks"].to(self.device, non_blocking=True).float()
|
@@ -100,10 +100,10 @@ class SegmentationValidator(DetectionValidator):
|
|
100
100
|
Post-process YOLO predictions and return output detections with proto.
|
101
101
|
|
102
102
|
Args:
|
103
|
-
preds (
|
103
|
+
preds (list[torch.Tensor]): Raw predictions from the model.
|
104
104
|
|
105
105
|
Returns:
|
106
|
-
|
106
|
+
list[dict[str, torch.Tensor]]: Processed detection predictions with masks.
|
107
107
|
"""
|
108
108
|
proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
|
109
109
|
preds = super().postprocess(preds[0])
|
@@ -127,10 +127,10 @@ class SegmentationValidator(DetectionValidator):
|
|
127
127
|
|
128
128
|
Args:
|
129
129
|
si (int): Batch index.
|
130
|
-
batch (
|
130
|
+
batch (dict[str, Any]): Batch data containing images and annotations.
|
131
131
|
|
132
132
|
Returns:
|
133
|
-
(
|
133
|
+
(dict[str, Any]): Prepared batch with processed annotations.
|
134
134
|
"""
|
135
135
|
prepared_batch = super()._prepare_batch(si, batch)
|
136
136
|
nl = len(prepared_batch["cls"])
|
@@ -151,11 +151,11 @@ class SegmentationValidator(DetectionValidator):
|
|
151
151
|
Compute correct prediction matrix for a batch based on bounding boxes and optional masks.
|
152
152
|
|
153
153
|
Args:
|
154
|
-
preds (
|
155
|
-
batch (
|
154
|
+
preds (dict[str, torch.Tensor]): Dictionary containing predictions with keys like 'cls' and 'masks'.
|
155
|
+
batch (dict[str, Any]): Dictionary containing batch data with keys like 'cls' and 'masks'.
|
156
156
|
|
157
157
|
Returns:
|
158
|
-
(
|
158
|
+
(dict[str, np.ndarray]): A dictionary containing correct prediction matrices including 'tp_m' for mask IoU.
|
159
159
|
|
160
160
|
Notes:
|
161
161
|
- If `masks` is True, the function computes IoU between predicted and ground truth masks.
|
@@ -181,8 +181,8 @@ class SegmentationValidator(DetectionValidator):
|
|
181
181
|
Plot batch predictions with masks and bounding boxes.
|
182
182
|
|
183
183
|
Args:
|
184
|
-
batch (
|
185
|
-
preds (
|
184
|
+
batch (dict[str, Any]): Batch containing images and annotations.
|
185
|
+
preds (list[dict[str, torch.Tensor]]): List of predictions from the model.
|
186
186
|
ni (int): Batch index.
|
187
187
|
"""
|
188
188
|
for p in preds:
|
@@ -199,7 +199,7 @@ class SegmentationValidator(DetectionValidator):
|
|
199
199
|
Args:
|
200
200
|
predn (torch.Tensor): Predictions in the format (x1, y1, x2, y2, conf, class).
|
201
201
|
save_conf (bool): Whether to save confidence scores.
|
202
|
-
shape (
|
202
|
+
shape (tuple[int, int]): Shape of the original image.
|
203
203
|
file (Path): File path to save the detections.
|
204
204
|
"""
|
205
205
|
from ultralytics.engine.results import Results
|
@@ -217,8 +217,8 @@ class SegmentationValidator(DetectionValidator):
|
|
217
217
|
Save one JSON result for COCO evaluation.
|
218
218
|
|
219
219
|
Args:
|
220
|
-
predn (
|
221
|
-
pbatch (
|
220
|
+
predn (dict[str, torch.Tensor]): Predictions containing bboxes, masks, confidence scores, and classes.
|
221
|
+
pbatch (dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
|
222
222
|
"""
|
223
223
|
from faster_coco_eval.core.mask import encode # noqa
|
224
224
|
|
@@ -32,10 +32,10 @@ class WorldTrainer(DetectionTrainer):
|
|
32
32
|
accelerate training with multi-modal data.
|
33
33
|
|
34
34
|
Attributes:
|
35
|
-
text_embeddings (
|
35
|
+
text_embeddings (dict[str, torch.Tensor] | None): Cached text embeddings for category names to accelerate
|
36
36
|
training.
|
37
37
|
model (WorldModel): The YOLO World model being trained.
|
38
|
-
data (
|
38
|
+
data (dict[str, Any]): Dataset configuration containing class information.
|
39
39
|
args (Any): Training arguments and configuration.
|
40
40
|
|
41
41
|
Methods:
|
@@ -58,9 +58,9 @@ class WorldTrainer(DetectionTrainer):
|
|
58
58
|
Initialize a WorldTrainer object with given arguments.
|
59
59
|
|
60
60
|
Args:
|
61
|
-
cfg (
|
62
|
-
overrides (
|
63
|
-
_callbacks (
|
61
|
+
cfg (dict[str, Any]): Configuration for the trainer.
|
62
|
+
overrides (dict[str, Any], optional): Configuration overrides.
|
63
|
+
_callbacks (list[Any], optional): List of callback functions.
|
64
64
|
"""
|
65
65
|
if overrides is None:
|
66
66
|
overrides = {}
|
@@ -72,7 +72,7 @@ class WorldTrainer(DetectionTrainer):
|
|
72
72
|
Return WorldModel initialized with specified config and weights.
|
73
73
|
|
74
74
|
Args:
|
75
|
-
cfg (
|
75
|
+
cfg (dict[str, Any] | str, optional): Model configuration.
|
76
76
|
weights (str, optional): Path to pretrained weights.
|
77
77
|
verbose (bool): Whether to display model info.
|
78
78
|
|
@@ -121,7 +121,7 @@ class WorldTrainer(DetectionTrainer):
|
|
121
121
|
for these categories to improve training efficiency.
|
122
122
|
|
123
123
|
Args:
|
124
|
-
datasets (
|
124
|
+
datasets (list[Any]): List of datasets from which to extract category names.
|
125
125
|
batch (int | None): Batch size used for processing.
|
126
126
|
|
127
127
|
Notes:
|
@@ -144,12 +144,12 @@ class WorldTrainer(DetectionTrainer):
|
|
144
144
|
Generate text embeddings for a list of text samples.
|
145
145
|
|
146
146
|
Args:
|
147
|
-
texts (
|
147
|
+
texts (list[str]): List of text samples to encode.
|
148
148
|
batch (int): Batch size for processing.
|
149
149
|
cache_dir (Path): Directory to save/load cached embeddings.
|
150
150
|
|
151
151
|
Returns:
|
152
|
-
(
|
152
|
+
(dict[str, torch.Tensor]): Dictionary mapping text samples to their embeddings.
|
153
153
|
"""
|
154
154
|
model = "clip:ViT-B/32"
|
155
155
|
cache_path = cache_dir / f"text_embeddings_{model.replace(':', '_').replace('/', '_')}.pt"
|
@@ -94,7 +94,7 @@ class WorldTrainerFromScratch(WorldTrainer):
|
|
94
94
|
standard YOLO datasets and grounding datasets with different formats.
|
95
95
|
|
96
96
|
Args:
|
97
|
-
img_path (
|
97
|
+
img_path (list[str] | str): Path to the folder containing images or list of paths.
|
98
98
|
mode (str): 'train' mode or 'val' mode, allowing customized augmentations for each mode.
|
99
99
|
batch (int, optional): Size of batches, used for rectangular training/validation.
|
100
100
|
|