dgenerate-ultralytics-headless 8.3.194__py3-none-any.whl → 8.3.196__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/METADATA +1 -2
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/RECORD +107 -106
- tests/test_python.py +1 -1
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +9 -8
- ultralytics/cfg/default.yaml +1 -0
- ultralytics/data/annotator.py +1 -1
- ultralytics/data/augment.py +76 -76
- ultralytics/data/base.py +12 -12
- ultralytics/data/build.py +5 -1
- ultralytics/data/converter.py +4 -4
- ultralytics/data/dataset.py +7 -7
- ultralytics/data/loaders.py +15 -15
- ultralytics/data/split_dota.py +10 -10
- ultralytics/data/utils.py +12 -12
- ultralytics/engine/exporter.py +19 -31
- ultralytics/engine/model.py +13 -13
- ultralytics/engine/predictor.py +16 -14
- ultralytics/engine/results.py +21 -21
- ultralytics/engine/trainer.py +15 -4
- ultralytics/engine/validator.py +6 -2
- ultralytics/hub/google/__init__.py +2 -2
- ultralytics/hub/session.py +7 -7
- ultralytics/models/fastsam/model.py +5 -5
- ultralytics/models/fastsam/predict.py +11 -11
- ultralytics/models/nas/model.py +1 -1
- ultralytics/models/rtdetr/predict.py +2 -2
- ultralytics/models/rtdetr/val.py +4 -4
- ultralytics/models/sam/amg.py +6 -6
- ultralytics/models/sam/build.py +9 -9
- ultralytics/models/sam/model.py +7 -7
- ultralytics/models/sam/modules/blocks.py +6 -6
- ultralytics/models/sam/modules/decoders.py +1 -1
- ultralytics/models/sam/modules/encoders.py +27 -27
- ultralytics/models/sam/modules/sam.py +4 -4
- ultralytics/models/sam/modules/tiny_encoder.py +18 -18
- ultralytics/models/sam/modules/utils.py +8 -8
- ultralytics/models/sam/predict.py +63 -63
- ultralytics/models/utils/loss.py +22 -22
- ultralytics/models/utils/ops.py +8 -8
- ultralytics/models/yolo/classify/predict.py +2 -2
- ultralytics/models/yolo/classify/train.py +9 -19
- ultralytics/models/yolo/classify/val.py +4 -4
- ultralytics/models/yolo/detect/predict.py +3 -3
- ultralytics/models/yolo/detect/train.py +38 -12
- ultralytics/models/yolo/detect/val.py +38 -37
- ultralytics/models/yolo/model.py +6 -6
- ultralytics/models/yolo/obb/train.py +1 -10
- ultralytics/models/yolo/obb/val.py +13 -13
- ultralytics/models/yolo/pose/train.py +1 -9
- ultralytics/models/yolo/pose/val.py +12 -12
- ultralytics/models/yolo/segment/predict.py +4 -4
- ultralytics/models/yolo/segment/train.py +2 -10
- ultralytics/models/yolo/segment/val.py +15 -15
- ultralytics/models/yolo/world/train.py +13 -13
- ultralytics/models/yolo/world/train_world.py +3 -3
- ultralytics/models/yolo/yoloe/predict.py +4 -4
- ultralytics/models/yolo/yoloe/train.py +7 -16
- ultralytics/models/yolo/yoloe/val.py +0 -7
- ultralytics/nn/autobackend.py +2 -2
- ultralytics/nn/modules/block.py +6 -6
- ultralytics/nn/modules/conv.py +2 -2
- ultralytics/nn/modules/head.py +6 -5
- ultralytics/nn/tasks.py +17 -15
- ultralytics/nn/text_model.py +3 -3
- ultralytics/solutions/ai_gym.py +2 -2
- ultralytics/solutions/analytics.py +3 -3
- ultralytics/solutions/config.py +5 -5
- ultralytics/solutions/distance_calculation.py +2 -2
- ultralytics/solutions/heatmap.py +1 -1
- ultralytics/solutions/instance_segmentation.py +4 -4
- ultralytics/solutions/object_counter.py +4 -4
- ultralytics/solutions/parking_management.py +7 -7
- ultralytics/solutions/queue_management.py +3 -3
- ultralytics/solutions/region_counter.py +4 -4
- ultralytics/solutions/similarity_search.py +2 -2
- ultralytics/solutions/solutions.py +48 -48
- ultralytics/solutions/streamlit_inference.py +1 -1
- ultralytics/solutions/trackzone.py +4 -4
- ultralytics/solutions/vision_eye.py +1 -1
- ultralytics/trackers/byte_tracker.py +11 -11
- ultralytics/trackers/utils/gmc.py +3 -3
- ultralytics/trackers/utils/matching.py +5 -5
- ultralytics/utils/__init__.py +30 -19
- ultralytics/utils/autodevice.py +2 -2
- ultralytics/utils/benchmarks.py +10 -10
- ultralytics/utils/callbacks/clearml.py +1 -1
- ultralytics/utils/callbacks/comet.py +5 -5
- ultralytics/utils/callbacks/tensorboard.py +2 -2
- ultralytics/utils/checks.py +7 -5
- ultralytics/utils/cpu.py +90 -0
- ultralytics/utils/dist.py +1 -1
- ultralytics/utils/downloads.py +2 -2
- ultralytics/utils/export.py +5 -5
- ultralytics/utils/instance.py +2 -2
- ultralytics/utils/loss.py +14 -8
- ultralytics/utils/metrics.py +35 -35
- ultralytics/utils/nms.py +4 -4
- ultralytics/utils/ops.py +1 -1
- ultralytics/utils/patches.py +2 -2
- ultralytics/utils/plotting.py +10 -9
- ultralytics/utils/torch_utils.py +113 -15
- ultralytics/utils/triton.py +5 -5
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/top_level.txt +0 -0
ultralytics/engine/results.py
CHANGED
@@ -30,7 +30,7 @@ class BaseTensor(SimpleClass):
|
|
30
30
|
|
31
31
|
Attributes:
|
32
32
|
data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints.
|
33
|
-
orig_shape (
|
33
|
+
orig_shape (tuple[int, int]): Original shape of the image, typically in the format (height, width).
|
34
34
|
|
35
35
|
Methods:
|
36
36
|
cpu: Return a copy of the tensor stored in CPU memory.
|
@@ -54,7 +54,7 @@ class BaseTensor(SimpleClass):
|
|
54
54
|
|
55
55
|
Args:
|
56
56
|
data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints.
|
57
|
-
orig_shape (
|
57
|
+
orig_shape (tuple[int, int]): Original shape of the image in (height, width) format.
|
58
58
|
|
59
59
|
Examples:
|
60
60
|
>>> import torch
|
@@ -72,7 +72,7 @@ class BaseTensor(SimpleClass):
|
|
72
72
|
Return the shape of the underlying data tensor.
|
73
73
|
|
74
74
|
Returns:
|
75
|
-
(
|
75
|
+
(tuple[int, ...]): The shape of the data tensor.
|
76
76
|
|
77
77
|
Examples:
|
78
78
|
>>> data = torch.rand(100, 4)
|
@@ -174,7 +174,7 @@ class BaseTensor(SimpleClass):
|
|
174
174
|
Return a new BaseTensor instance containing the specified indexed elements of the data tensor.
|
175
175
|
|
176
176
|
Args:
|
177
|
-
idx (int |
|
177
|
+
idx (int | list[int] | torch.Tensor): Index or indices to select from the data tensor.
|
178
178
|
|
179
179
|
Returns:
|
180
180
|
(BaseTensor): A new BaseTensor instance containing the indexed data.
|
@@ -199,7 +199,7 @@ class Results(SimpleClass, DataExportMixin):
|
|
199
199
|
|
200
200
|
Attributes:
|
201
201
|
orig_img (np.ndarray): The original image as a numpy array.
|
202
|
-
orig_shape (
|
202
|
+
orig_shape (tuple[int, int]): Original image shape in (height, width) format.
|
203
203
|
boxes (Boxes | None): Detected bounding boxes.
|
204
204
|
masks (Masks | None): Segmentation masks.
|
205
205
|
probs (Probs | None): Classification probabilities.
|
@@ -261,7 +261,7 @@ class Results(SimpleClass, DataExportMixin):
|
|
261
261
|
probs (torch.Tensor | None): A 1D tensor of probabilities of each class for classification task.
|
262
262
|
keypoints (torch.Tensor | None): A 2D tensor of keypoint coordinates for each detection.
|
263
263
|
obb (torch.Tensor | None): A 2D tensor of oriented bounding box coordinates for each detection.
|
264
|
-
speed (
|
264
|
+
speed (dict | None): A dictionary containing preprocess, inference, and postprocess speeds (ms/image).
|
265
265
|
|
266
266
|
Examples:
|
267
267
|
>>> results = model("path/to/image.jpg")
|
@@ -799,7 +799,7 @@ class Results(SimpleClass, DataExportMixin):
|
|
799
799
|
decimals (int): Number of decimal places to round the output values to.
|
800
800
|
|
801
801
|
Returns:
|
802
|
-
(
|
802
|
+
(list[dict[str, Any]]): A list of dictionaries, each containing summarized information for a single detection
|
803
803
|
or classification result. The structure of each dictionary varies based on the task type
|
804
804
|
(classification or detection) and available information (boxes, masks, keypoints).
|
805
805
|
|
@@ -862,7 +862,7 @@ class Boxes(BaseTensor):
|
|
862
862
|
|
863
863
|
Attributes:
|
864
864
|
data (torch.Tensor | np.ndarray): The raw tensor containing detection boxes and associated data.
|
865
|
-
orig_shape (
|
865
|
+
orig_shape (tuple[int, int]): The original image dimensions (height, width).
|
866
866
|
is_track (bool): Indicates whether tracking IDs are included in the box data.
|
867
867
|
xyxy (torch.Tensor | np.ndarray): Boxes in [x1, y1, x2, y2] format.
|
868
868
|
conf (torch.Tensor | np.ndarray): Confidence scores for each box.
|
@@ -901,11 +901,11 @@ class Boxes(BaseTensor):
|
|
901
901
|
boxes (torch.Tensor | np.ndarray): A tensor or numpy array with detection boxes of shape
|
902
902
|
(num_boxes, 6) or (num_boxes, 7). Columns should contain
|
903
903
|
[x1, y1, x2, y2, confidence, class, (optional) track_id].
|
904
|
-
orig_shape (
|
904
|
+
orig_shape (tuple[int, int]): The original image shape as (height, width). Used for normalization.
|
905
905
|
|
906
906
|
Attributes:
|
907
907
|
data (torch.Tensor): The raw tensor containing detection boxes and their associated data.
|
908
|
-
orig_shape (
|
908
|
+
orig_shape (tuple[int, int]): The original image size, used for normalization.
|
909
909
|
is_track (bool): Indicates whether tracking IDs are included in the box data.
|
910
910
|
|
911
911
|
Examples:
|
@@ -1081,8 +1081,8 @@ class Masks(BaseTensor):
|
|
1081
1081
|
Attributes:
|
1082
1082
|
data (torch.Tensor | np.ndarray): The raw tensor or array containing mask data.
|
1083
1083
|
orig_shape (tuple): Original image shape in (height, width) format.
|
1084
|
-
xy (
|
1085
|
-
xyn (
|
1084
|
+
xy (list[np.ndarray]): A list of segments in pixel coordinates.
|
1085
|
+
xyn (list[np.ndarray]): A list of normalized segments.
|
1086
1086
|
|
1087
1087
|
Methods:
|
1088
1088
|
cpu: Return a copy of the Masks object with the mask tensor on CPU memory.
|
@@ -1127,7 +1127,7 @@ class Masks(BaseTensor):
|
|
1127
1127
|
are normalized relative to the original image shape.
|
1128
1128
|
|
1129
1129
|
Returns:
|
1130
|
-
(
|
1130
|
+
(list[np.ndarray]): A list of numpy arrays, where each array contains the normalized xy-coordinates
|
1131
1131
|
of a single segmentation mask. Each array has shape (N, 2), where N is the number of points in the
|
1132
1132
|
mask contour.
|
1133
1133
|
|
@@ -1152,7 +1152,7 @@ class Masks(BaseTensor):
|
|
1152
1152
|
Masks object. The coordinates are scaled to match the original image dimensions.
|
1153
1153
|
|
1154
1154
|
Returns:
|
1155
|
-
(
|
1155
|
+
(list[np.ndarray]): A list of numpy arrays, where each array contains the [x, y] pixel
|
1156
1156
|
coordinates for a single segmentation mask. Each array has shape (N, 2), where N is the
|
1157
1157
|
number of points in the segment.
|
1158
1158
|
|
@@ -1179,7 +1179,7 @@ class Keypoints(BaseTensor):
|
|
1179
1179
|
|
1180
1180
|
Attributes:
|
1181
1181
|
data (torch.Tensor): The raw tensor containing keypoint data.
|
1182
|
-
orig_shape (
|
1182
|
+
orig_shape (tuple[int, int]): The original image dimensions (height, width).
|
1183
1183
|
has_visible (bool): Indicates whether visibility information is available for keypoints.
|
1184
1184
|
xy (torch.Tensor): Keypoint coordinates in [x, y] format.
|
1185
1185
|
xyn (torch.Tensor): Normalized keypoint coordinates in [x, y] format, relative to orig_shape.
|
@@ -1213,7 +1213,7 @@ class Keypoints(BaseTensor):
|
|
1213
1213
|
keypoints (torch.Tensor): A tensor containing keypoint data. Shape can be either:
|
1214
1214
|
- (num_objects, num_keypoints, 2) for x, y coordinates only
|
1215
1215
|
- (num_objects, num_keypoints, 3) for x, y coordinates and confidence scores
|
1216
|
-
orig_shape (
|
1216
|
+
orig_shape (tuple[int, int]): The original image dimensions (height, width).
|
1217
1217
|
|
1218
1218
|
Examples:
|
1219
1219
|
>>> kpts = torch.rand(1, 17, 3) # 1 object, 17 keypoints (COCO format), x,y,conf
|
@@ -1301,7 +1301,7 @@ class Probs(BaseTensor):
|
|
1301
1301
|
data (torch.Tensor | np.ndarray): The raw tensor or array containing classification probabilities.
|
1302
1302
|
orig_shape (tuple | None): The original image shape as (height, width). Not used in this class.
|
1303
1303
|
top1 (int): Index of the class with the highest probability.
|
1304
|
-
top5 (
|
1304
|
+
top5 (list[int]): Indices of the top 5 classes by probability.
|
1305
1305
|
top1conf (torch.Tensor | np.ndarray): Confidence score of the top 1 class.
|
1306
1306
|
top5conf (torch.Tensor | np.ndarray): Confidence scores of the top 5 classes.
|
1307
1307
|
|
@@ -1339,7 +1339,7 @@ class Probs(BaseTensor):
|
|
1339
1339
|
Attributes:
|
1340
1340
|
data (torch.Tensor | np.ndarray): The raw tensor or array containing classification probabilities.
|
1341
1341
|
top1 (int): Index of the top 1 class.
|
1342
|
-
top5 (
|
1342
|
+
top5 (list[int]): Indices of the top 5 classes.
|
1343
1343
|
top1conf (torch.Tensor | np.ndarray): Confidence of the top 1 class.
|
1344
1344
|
top5conf (torch.Tensor | np.ndarray): Confidences of the top 5 classes.
|
1345
1345
|
|
@@ -1379,7 +1379,7 @@ class Probs(BaseTensor):
|
|
1379
1379
|
Return the indices of the top 5 class probabilities.
|
1380
1380
|
|
1381
1381
|
Returns:
|
1382
|
-
(
|
1382
|
+
(list[int]): A list containing the indices of the top 5 class probabilities, sorted in descending order.
|
1383
1383
|
|
1384
1384
|
Examples:
|
1385
1385
|
>>> probs = Probs(torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5]))
|
@@ -1476,11 +1476,11 @@ class OBB(BaseTensor):
|
|
1476
1476
|
boxes (torch.Tensor | np.ndarray): A tensor or numpy array containing the detection boxes,
|
1477
1477
|
with shape (num_boxes, 7) or (num_boxes, 8). The last two columns contain confidence and class values.
|
1478
1478
|
If present, the third last column contains track IDs, and the fifth column contains rotation.
|
1479
|
-
orig_shape (
|
1479
|
+
orig_shape (tuple[int, int]): Original image size, in the format (height, width).
|
1480
1480
|
|
1481
1481
|
Attributes:
|
1482
1482
|
data (torch.Tensor | np.ndarray): The raw OBB tensor.
|
1483
|
-
orig_shape (
|
1483
|
+
orig_shape (tuple[int, int]): The original image shape.
|
1484
1484
|
is_track (bool): Whether the boxes include tracking IDs.
|
1485
1485
|
|
1486
1486
|
Raises:
|
ultralytics/engine/trainer.py
CHANGED
@@ -46,6 +46,7 @@ from ultralytics.utils.torch_utils import (
|
|
46
46
|
TORCH_2_4,
|
47
47
|
EarlyStopping,
|
48
48
|
ModelEMA,
|
49
|
+
attempt_compile,
|
49
50
|
autocast,
|
50
51
|
convert_optimizer_state_dict_to_fp16,
|
51
52
|
init_seeds,
|
@@ -54,6 +55,7 @@ from ultralytics.utils.torch_utils import (
|
|
54
55
|
strip_optimizer,
|
55
56
|
torch_distributed_zero_first,
|
56
57
|
unset_deterministic,
|
58
|
+
unwrap_model,
|
57
59
|
)
|
58
60
|
|
59
61
|
|
@@ -256,6 +258,14 @@ class BaseTrainer:
|
|
256
258
|
self.model = self.model.to(self.device)
|
257
259
|
self.set_model_attributes()
|
258
260
|
|
261
|
+
# Initialize loss criterion before compilation for torch.compile compatibility
|
262
|
+
if hasattr(self.model, "init_criterion"):
|
263
|
+
self.model.criterion = self.model.init_criterion()
|
264
|
+
|
265
|
+
# Compile model
|
266
|
+
if self.args.compile:
|
267
|
+
self.model = attempt_compile(self.model, device=self.device)
|
268
|
+
|
259
269
|
# Freeze layers
|
260
270
|
freeze_list = (
|
261
271
|
self.args.freeze
|
@@ -404,6 +414,7 @@ class BaseTrainer:
|
|
404
414
|
# Forward
|
405
415
|
with autocast(self.amp):
|
406
416
|
batch = self.preprocess_batch(batch)
|
417
|
+
metadata = {k: batch.pop(k, None) for k in ["im_file", "ori_shape", "resized_shape"]}
|
407
418
|
loss, self.loss_items = self.model(batch)
|
408
419
|
self.loss = loss.sum()
|
409
420
|
if RANK != -1:
|
@@ -445,6 +456,7 @@ class BaseTrainer:
|
|
445
456
|
)
|
446
457
|
self.run_callbacks("on_batch_end")
|
447
458
|
if self.args.plots and ni in self.plot_idx:
|
459
|
+
batch = {**batch, **metadata}
|
448
460
|
self.plot_training_samples(batch, ni)
|
449
461
|
|
450
462
|
self.run_callbacks("on_train_batch_end")
|
@@ -565,7 +577,7 @@ class BaseTrainer:
|
|
565
577
|
"epoch": self.epoch,
|
566
578
|
"best_fitness": self.best_fitness,
|
567
579
|
"model": None, # resume and final checkpoints derive from EMA
|
568
|
-
"ema": deepcopy(self.ema.ema).half(),
|
580
|
+
"ema": deepcopy(unwrap_model(self.ema.ema)).half(),
|
569
581
|
"updates": self.ema.updates,
|
570
582
|
"optimizer": convert_optimizer_state_dict_to_fp16(deepcopy(self.optimizer.state_dict())),
|
571
583
|
"train_args": vars(self.args), # save as dict
|
@@ -592,8 +604,6 @@ class BaseTrainer:
|
|
592
604
|
self.best.write_bytes(serialized_ckpt) # save best.pt
|
593
605
|
if (self.save_period > 0) and (self.epoch % self.save_period == 0):
|
594
606
|
(self.wdir / f"epoch{self.epoch}.pt").write_bytes(serialized_ckpt) # save epoch, i.e. 'epoch3.pt'
|
595
|
-
# if self.args.close_mosaic and self.epoch == (self.epochs - self.args.close_mosaic - 1):
|
596
|
-
# (self.wdir / "last_mosaic.pt").write_bytes(serialized_ckpt) # save mosaic checkpoint
|
597
607
|
|
598
608
|
def get_dataset(self):
|
599
609
|
"""
|
@@ -667,7 +677,7 @@ class BaseTrainer:
|
|
667
677
|
|
668
678
|
def validate(self):
|
669
679
|
"""
|
670
|
-
Run validation on
|
680
|
+
Run validation on val set using self.validator.
|
671
681
|
|
672
682
|
Returns:
|
673
683
|
metrics (dict): Dictionary of validation metrics.
|
@@ -755,6 +765,7 @@ class BaseTrainer:
|
|
755
765
|
strip_optimizer(f, updates={k: ckpt[k]} if k in ckpt else None)
|
756
766
|
LOGGER.info(f"\nValidating {f}...")
|
757
767
|
self.validator.args.plots = self.args.plots
|
768
|
+
self.validator.args.compile = False # disable final val compile as too slow
|
758
769
|
self.metrics = self.validator(model=f)
|
759
770
|
self.metrics.pop("fitness", None)
|
760
771
|
self.run_callbacks("on_fit_epoch_end")
|
ultralytics/engine/validator.py
CHANGED
@@ -36,7 +36,7 @@ from ultralytics.nn.autobackend import AutoBackend
|
|
36
36
|
from ultralytics.utils import LOGGER, TQDM, callbacks, colorstr, emojis
|
37
37
|
from ultralytics.utils.checks import check_imgsz
|
38
38
|
from ultralytics.utils.ops import Profile
|
39
|
-
from ultralytics.utils.torch_utils import
|
39
|
+
from ultralytics.utils.torch_utils import attempt_compile, select_device, smart_inference_mode, unwrap_model
|
40
40
|
|
41
41
|
|
42
42
|
class BaseValidator:
|
@@ -148,6 +148,8 @@ class BaseValidator:
|
|
148
148
|
# Force FP16 val during training
|
149
149
|
self.args.half = self.device.type != "cpu" and trainer.amp
|
150
150
|
model = trainer.ema.ema or trainer.model
|
151
|
+
if trainer.args.compile and hasattr(model, "_orig_mod"):
|
152
|
+
model = model._orig_mod # validate non-compiled original model to avoid issues
|
151
153
|
model = model.half() if self.args.half else model.float()
|
152
154
|
self.loss = torch.zeros_like(trainer.loss_items, device=trainer.device)
|
153
155
|
self.args.plots &= trainer.stopper.possible_stop or (trainer.epoch == trainer.epochs - 1)
|
@@ -186,6 +188,8 @@ class BaseValidator:
|
|
186
188
|
self.dataloader = self.dataloader or self.get_dataloader(self.data.get(self.args.split), self.args.batch)
|
187
189
|
|
188
190
|
model.eval()
|
191
|
+
if self.args.compile:
|
192
|
+
model = attempt_compile(model, device=self.device)
|
189
193
|
model.warmup(imgsz=(1 if pt else self.args.batch, self.data["channels"], imgsz, imgsz)) # warmup
|
190
194
|
|
191
195
|
self.run_callbacks("on_val_start")
|
@@ -196,7 +200,7 @@ class BaseValidator:
|
|
196
200
|
Profile(device=self.device),
|
197
201
|
)
|
198
202
|
bar = TQDM(self.dataloader, desc=self.get_desc(), total=len(self.dataloader))
|
199
|
-
self.init_metrics(
|
203
|
+
self.init_metrics(unwrap_model(model))
|
200
204
|
self.jdict = [] # empty before each val
|
201
205
|
for batch_i, batch in enumerate(bar):
|
202
206
|
self.run_callbacks("on_val_batch_start")
|
@@ -15,7 +15,7 @@ class GCPRegions:
|
|
15
15
|
geographical location, tier classification, and network latency.
|
16
16
|
|
17
17
|
Attributes:
|
18
|
-
regions (
|
18
|
+
regions (dict[str, tuple[int, str, str]]): A dictionary of GCP regions with their tier, city, and country.
|
19
19
|
|
20
20
|
Methods:
|
21
21
|
tier1: Returns a list of tier 1 GCP regions.
|
@@ -136,7 +136,7 @@ class GCPRegions:
|
|
136
136
|
attempts (int, optional): Number of ping attempts per region.
|
137
137
|
|
138
138
|
Returns:
|
139
|
-
(
|
139
|
+
(list[tuple[str, float, float, float, float]]): List of tuples containing region information and
|
140
140
|
latency statistics. Each tuple contains (region, mean_latency, std_dev, min_latency, max_latency).
|
141
141
|
|
142
142
|
Examples:
|
ultralytics/hub/session.py
CHANGED
@@ -28,13 +28,13 @@ class HUBTrainingSession:
|
|
28
28
|
Attributes:
|
29
29
|
model_id (str): Identifier for the YOLO model being trained.
|
30
30
|
model_url (str): URL for the model in Ultralytics HUB.
|
31
|
-
rate_limits (
|
32
|
-
timers (
|
33
|
-
metrics_queue (
|
34
|
-
metrics_upload_failed_queue (
|
31
|
+
rate_limits (dict[str, int]): Rate limits for different API calls in seconds.
|
32
|
+
timers (dict[str, Any]): Timers for rate limiting.
|
33
|
+
metrics_queue (dict[str, Any]): Queue for the model's metrics.
|
34
|
+
metrics_upload_failed_queue (dict[str, Any]): Queue for metrics that failed to upload.
|
35
35
|
model (Any): Model data fetched from Ultralytics HUB.
|
36
36
|
model_file (str): Path to the model file.
|
37
|
-
train_args (
|
37
|
+
train_args (dict[str, Any]): Arguments for training the model.
|
38
38
|
client (Any): Client for interacting with Ultralytics HUB.
|
39
39
|
filename (str): Filename of the model.
|
40
40
|
|
@@ -98,7 +98,7 @@ class HUBTrainingSession:
|
|
98
98
|
|
99
99
|
Args:
|
100
100
|
identifier (str): Model identifier used to initialize the HUB training session.
|
101
|
-
args (
|
101
|
+
args (dict[str, Any], optional): Arguments for creating a new model if identifier is not a HUB model URL.
|
102
102
|
|
103
103
|
Returns:
|
104
104
|
session (HUBTrainingSession | None): An authenticated session or None if creation fails.
|
@@ -144,7 +144,7 @@ class HUBTrainingSession:
|
|
144
144
|
Initialize a HUB training session with the specified model arguments.
|
145
145
|
|
146
146
|
Args:
|
147
|
-
model_args (
|
147
|
+
model_args (dict[str, Any]): Arguments for creating the model, including batch size, epochs, image size,
|
148
148
|
etc.
|
149
149
|
|
150
150
|
Returns:
|
@@ -63,14 +63,14 @@ class FastSAM(Model):
|
|
63
63
|
source (str | PIL.Image | np.ndarray): Input source for prediction, can be a file path, URL, PIL image,
|
64
64
|
or numpy array.
|
65
65
|
stream (bool): Whether to enable real-time streaming mode for video inputs.
|
66
|
-
bboxes (
|
67
|
-
points (
|
68
|
-
labels (
|
69
|
-
texts (
|
66
|
+
bboxes (list, optional): Bounding box coordinates for prompted segmentation in format [[x1, y1, x2, y2]].
|
67
|
+
points (list, optional): Point coordinates for prompted segmentation in format [[x, y]].
|
68
|
+
labels (list, optional): Class labels for prompted segmentation.
|
69
|
+
texts (list, optional): Text prompts for segmentation guidance.
|
70
70
|
**kwargs (Any): Additional keyword arguments passed to the predictor.
|
71
71
|
|
72
72
|
Returns:
|
73
|
-
(
|
73
|
+
(list): List of Results objects containing the prediction results.
|
74
74
|
"""
|
75
75
|
prompts = dict(bboxes=bboxes, points=points, labels=labels, texts=texts)
|
76
76
|
return super().predict(source, stream, prompts=prompts, **kwargs)
|
@@ -52,12 +52,12 @@ class FastSAMPredictor(SegmentationPredictor):
|
|
52
52
|
Apply postprocessing to FastSAM predictions and handle prompts.
|
53
53
|
|
54
54
|
Args:
|
55
|
-
preds (
|
55
|
+
preds (list[torch.Tensor]): Raw predictions from the model.
|
56
56
|
img (torch.Tensor): Input image tensor that was fed to the model.
|
57
|
-
orig_imgs (
|
57
|
+
orig_imgs (list[np.ndarray]): Original images before preprocessing.
|
58
58
|
|
59
59
|
Returns:
|
60
|
-
(
|
60
|
+
(list[Results]): Processed results with prompts applied.
|
61
61
|
"""
|
62
62
|
bboxes = self.prompts.pop("bboxes", None)
|
63
63
|
points = self.prompts.pop("points", None)
|
@@ -80,14 +80,14 @@ class FastSAMPredictor(SegmentationPredictor):
|
|
80
80
|
Perform image segmentation inference based on cues like bounding boxes, points, and text prompts.
|
81
81
|
|
82
82
|
Args:
|
83
|
-
results (Results |
|
84
|
-
bboxes (np.ndarray |
|
85
|
-
points (np.ndarray |
|
86
|
-
labels (np.ndarray |
|
87
|
-
texts (str |
|
83
|
+
results (Results | list[Results]): Original inference results from FastSAM models without any prompts.
|
84
|
+
bboxes (np.ndarray | list, optional): Bounding boxes with shape (N, 4), in XYXY format.
|
85
|
+
points (np.ndarray | list, optional): Points indicating object locations with shape (N, 2), in pixels.
|
86
|
+
labels (np.ndarray | list, optional): Labels for point prompts, shape (N, ). 1 = foreground, 0 = background.
|
87
|
+
texts (str | list[str], optional): Textual prompts, a list containing string objects.
|
88
88
|
|
89
89
|
Returns:
|
90
|
-
(
|
90
|
+
(list[Results]): Output results filtered and determined by the provided prompts.
|
91
91
|
"""
|
92
92
|
if bboxes is None and points is None and texts is None:
|
93
93
|
return results
|
@@ -154,8 +154,8 @@ class FastSAMPredictor(SegmentationPredictor):
|
|
154
154
|
Perform CLIP inference to calculate similarity between images and text prompts.
|
155
155
|
|
156
156
|
Args:
|
157
|
-
images (
|
158
|
-
texts (
|
157
|
+
images (list[PIL.Image]): List of source images, each should be PIL.Image with RGB channel order.
|
158
|
+
texts (list[str]): List of prompt texts, each should be a string object.
|
159
159
|
|
160
160
|
Returns:
|
161
161
|
(torch.Tensor): Similarity matrix between given images and texts with shape (M, N).
|
ultralytics/models/nas/model.py
CHANGED
@@ -91,7 +91,7 @@ class NAS(Model):
|
|
91
91
|
verbose (bool): Controls verbosity.
|
92
92
|
|
93
93
|
Returns:
|
94
|
-
(
|
94
|
+
(dict[str, Any]): Model information dictionary.
|
95
95
|
"""
|
96
96
|
return model_info(self.model, detailed=detailed, verbose=verbose, imgsz=640)
|
97
97
|
|
@@ -47,7 +47,7 @@ class RTDETRPredictor(BasePredictor):
|
|
47
47
|
orig_imgs (list | torch.Tensor): Original, unprocessed images.
|
48
48
|
|
49
49
|
Returns:
|
50
|
-
results (
|
50
|
+
results (list[Results]): A list of Results objects containing the post-processed bounding boxes,
|
51
51
|
confidence scores, and class labels.
|
52
52
|
"""
|
53
53
|
if not isinstance(preds, (list, tuple)): # list for PyTorch inference but list[0] Tensor for export inference
|
@@ -82,7 +82,7 @@ class RTDETRPredictor(BasePredictor):
|
|
82
82
|
(640) and scale_filled.
|
83
83
|
|
84
84
|
Args:
|
85
|
-
im (
|
85
|
+
im (list[np.ndarray] | torch.Tensor): Input images of shape (N, 3, H, W) for tensor,
|
86
86
|
[(H, W, 3) x N] for list.
|
87
87
|
|
88
88
|
Returns:
|
ultralytics/models/rtdetr/val.py
CHANGED
@@ -163,11 +163,11 @@ class RTDETRValidator(DetectionValidator):
|
|
163
163
|
Apply Non-maximum suppression to prediction outputs.
|
164
164
|
|
165
165
|
Args:
|
166
|
-
preds (torch.Tensor |
|
166
|
+
preds (torch.Tensor | list | tuple): Raw predictions from the model. If tensor, should have shape
|
167
167
|
(batch_size, num_predictions, num_classes + 4) where last dimension contains bbox coords and class scores.
|
168
168
|
|
169
169
|
Returns:
|
170
|
-
(
|
170
|
+
(list[dict[str, torch.Tensor]]): List of dictionaries for each image, each containing:
|
171
171
|
- 'bboxes': Tensor of shape (N, 4) with bounding box coordinates
|
172
172
|
- 'conf': Tensor of shape (N,) with confidence scores
|
173
173
|
- 'cls': Tensor of shape (N,) with class indices
|
@@ -194,9 +194,9 @@ class RTDETRValidator(DetectionValidator):
|
|
194
194
|
Serialize YOLO predictions to COCO json format.
|
195
195
|
|
196
196
|
Args:
|
197
|
-
predn (
|
197
|
+
predn (dict[str, torch.Tensor]): Predictions dictionary containing 'bboxes', 'conf', and 'cls' keys
|
198
198
|
with bounding box coordinates, confidence scores, and class predictions.
|
199
|
-
pbatch (
|
199
|
+
pbatch (dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
|
200
200
|
"""
|
201
201
|
path = Path(pbatch["im_file"])
|
202
202
|
stem = path.stem
|
ultralytics/models/sam/amg.py
CHANGED
@@ -19,8 +19,8 @@ def is_box_near_crop_edge(
|
|
19
19
|
|
20
20
|
Args:
|
21
21
|
boxes (torch.Tensor): Bounding boxes in XYXY format.
|
22
|
-
crop_box (
|
23
|
-
orig_box (
|
22
|
+
crop_box (list[int]): Crop box coordinates in [x0, y0, x1, y1] format.
|
23
|
+
orig_box (list[int]): Original image box coordinates in [x0, y0, x1, y1] format.
|
24
24
|
atol (float, optional): Absolute tolerance for edge proximity detection.
|
25
25
|
|
26
26
|
Returns:
|
@@ -53,7 +53,7 @@ def batch_iterator(batch_size: int, *args) -> Generator[list[Any]]:
|
|
53
53
|
*args (Any): Variable length input iterables to batch. All iterables must have the same length.
|
54
54
|
|
55
55
|
Yields:
|
56
|
-
(
|
56
|
+
(list[Any]): A list of batched elements from each input iterable.
|
57
57
|
|
58
58
|
Examples:
|
59
59
|
>>> data = [1, 2, 3, 4, 5]
|
@@ -121,13 +121,13 @@ def generate_crop_boxes(
|
|
121
121
|
Generate crop boxes of varying sizes for multiscale image processing, with layered overlapping regions.
|
122
122
|
|
123
123
|
Args:
|
124
|
-
im_size (
|
124
|
+
im_size (tuple[int, ...]): Height and width of the input image.
|
125
125
|
n_layers (int): Number of layers to generate crop boxes for.
|
126
126
|
overlap_ratio (float): Ratio of overlap between adjacent crop boxes.
|
127
127
|
|
128
128
|
Returns:
|
129
|
-
crop_boxes (
|
130
|
-
layer_idxs (
|
129
|
+
crop_boxes (list[list[int]]): List of crop boxes in [x0, y0, x1, y1] format.
|
130
|
+
layer_idxs (list[int]): List of layer indices corresponding to each crop box.
|
131
131
|
|
132
132
|
Examples:
|
133
133
|
>>> im_size = (800, 1200) # Height, width
|
ultralytics/models/sam/build.py
CHANGED
@@ -130,10 +130,10 @@ def _build_sam(
|
|
130
130
|
Build a Segment Anything Model (SAM) with specified encoder parameters.
|
131
131
|
|
132
132
|
Args:
|
133
|
-
encoder_embed_dim (int |
|
134
|
-
encoder_depth (int |
|
135
|
-
encoder_num_heads (int |
|
136
|
-
encoder_global_attn_indexes (
|
133
|
+
encoder_embed_dim (int | list[int]): Embedding dimension for the encoder.
|
134
|
+
encoder_depth (int | list[int]): Depth of the encoder.
|
135
|
+
encoder_num_heads (int | list[int]): Number of attention heads in the encoder.
|
136
|
+
encoder_global_attn_indexes (list[int] | None): Indexes for global attention in the encoder.
|
137
137
|
checkpoint (str | None, optional): Path to the model checkpoint file.
|
138
138
|
mobile_sam (bool, optional): Whether to build a Mobile-SAM model.
|
139
139
|
|
@@ -228,12 +228,12 @@ def _build_sam2(
|
|
228
228
|
|
229
229
|
Args:
|
230
230
|
encoder_embed_dim (int, optional): Embedding dimension for the encoder.
|
231
|
-
encoder_stages (
|
231
|
+
encoder_stages (list[int], optional): Number of blocks in each stage of the encoder.
|
232
232
|
encoder_num_heads (int, optional): Number of attention heads in the encoder.
|
233
|
-
encoder_global_att_blocks (
|
234
|
-
encoder_backbone_channel_list (
|
235
|
-
encoder_window_spatial_size (
|
236
|
-
encoder_window_spec (
|
233
|
+
encoder_global_att_blocks (list[int], optional): Indices of global attention blocks in the encoder.
|
234
|
+
encoder_backbone_channel_list (list[int], optional): Channel dimensions for each level of the encoder backbone.
|
235
|
+
encoder_window_spatial_size (list[int], optional): Spatial size of the window for position embeddings.
|
236
|
+
encoder_window_spec (list[int], optional): Window specifications for each stage of the encoder.
|
237
237
|
checkpoint (str | None, optional): Path to the checkpoint file for loading pre-trained weights.
|
238
238
|
|
239
239
|
Returns:
|
ultralytics/models/sam/model.py
CHANGED
@@ -91,9 +91,9 @@ class SAM(Model):
|
|
91
91
|
source (str | PIL.Image | np.ndarray): Path to the image or video file, or a PIL.Image object, or
|
92
92
|
a np.ndarray object.
|
93
93
|
stream (bool): If True, enables real-time streaming.
|
94
|
-
bboxes (
|
95
|
-
points (
|
96
|
-
labels (
|
94
|
+
bboxes (list[list[float]] | None): List of bounding box coordinates for prompted segmentation.
|
95
|
+
points (list[list[float]] | None): List of points for prompted segmentation.
|
96
|
+
labels (list[int] | None): List of labels for prompted segmentation.
|
97
97
|
**kwargs (Any): Additional keyword arguments for prediction.
|
98
98
|
|
99
99
|
Returns:
|
@@ -121,9 +121,9 @@ class SAM(Model):
|
|
121
121
|
source (str | PIL.Image | np.ndarray | None): Path to the image or video file, or a PIL.Image
|
122
122
|
object, or a np.ndarray object.
|
123
123
|
stream (bool): If True, enables real-time streaming.
|
124
|
-
bboxes (
|
125
|
-
points (
|
126
|
-
labels (
|
124
|
+
bboxes (list[list[float]] | None): List of bounding box coordinates for prompted segmentation.
|
125
|
+
points (list[list[float]] | None): List of points for prompted segmentation.
|
126
|
+
labels (list[int] | None): List of labels for prompted segmentation.
|
127
127
|
**kwargs (Any): Additional keyword arguments to be passed to the predict method.
|
128
128
|
|
129
129
|
Returns:
|
@@ -160,7 +160,7 @@ class SAM(Model):
|
|
160
160
|
Provide a mapping from the 'segment' task to its corresponding 'Predictor'.
|
161
161
|
|
162
162
|
Returns:
|
163
|
-
(
|
163
|
+
(dict[str, dict[str, Type[Predictor]]]): A dictionary mapping the 'segment' task to its corresponding
|
164
164
|
Predictor class. For SAM2 models, it maps to SAM2Predictor, otherwise to the standard Predictor.
|
165
165
|
|
166
166
|
Examples:
|
@@ -593,7 +593,7 @@ class MultiScaleBlock(nn.Module):
|
|
593
593
|
norm1 (nn.Module): First normalization layer.
|
594
594
|
window_size (int): Size of the window for partitioning.
|
595
595
|
pool (nn.Module | None): Pooling layer for query downsampling.
|
596
|
-
q_stride (
|
596
|
+
q_stride (tuple[int, int] | None): Stride for query pooling.
|
597
597
|
attn (MultiScaleAttention): Multi-scale attention module.
|
598
598
|
drop_path (nn.Module): Drop path layer for regularization.
|
599
599
|
norm2 (nn.Module): Second normalization layer.
|
@@ -934,7 +934,7 @@ class Block(nn.Module):
|
|
934
934
|
use_rel_pos (bool): If True, uses relative positional embeddings in attention.
|
935
935
|
rel_pos_zero_init (bool): If True, initializes relative positional parameters to zero.
|
936
936
|
window_size (int): Size of attention window. If 0, uses global attention.
|
937
|
-
input_size (
|
937
|
+
input_size (tuple[int, int] | None): Input resolution for calculating relative positional parameter size.
|
938
938
|
|
939
939
|
Examples:
|
940
940
|
>>> block = Block(dim=256, num_heads=8, window_size=7)
|
@@ -1026,7 +1026,7 @@ class REAttention(nn.Module):
|
|
1026
1026
|
qkv_bias (bool): If True, adds a learnable bias to query, key, value projections.
|
1027
1027
|
use_rel_pos (bool): If True, uses relative positional encodings.
|
1028
1028
|
rel_pos_zero_init (bool): If True, initializes relative positional parameters to zero.
|
1029
|
-
input_size (
|
1029
|
+
input_size (tuple[int, int] | None): Input resolution for calculating relative positional parameter size.
|
1030
1030
|
Required if use_rel_pos is True.
|
1031
1031
|
|
1032
1032
|
Examples:
|
@@ -1106,9 +1106,9 @@ class PatchEmbed(nn.Module):
|
|
1106
1106
|
image data into a suitable format for subsequent transformer blocks.
|
1107
1107
|
|
1108
1108
|
Args:
|
1109
|
-
kernel_size (
|
1110
|
-
stride (
|
1111
|
-
padding (
|
1109
|
+
kernel_size (tuple[int, int]): Size of the convolutional kernel for patch extraction.
|
1110
|
+
stride (tuple[int, int]): Stride of the convolutional operation.
|
1111
|
+
padding (tuple[int, int]): Padding applied to the input before convolution.
|
1112
1112
|
in_chans (int): Number of input image channels.
|
1113
1113
|
embed_dim (int): Dimensionality of the output patch embeddings.
|
1114
1114
|
|
@@ -329,7 +329,7 @@ class SAM2MaskDecoder(nn.Module):
|
|
329
329
|
dense_prompt_embeddings (torch.Tensor): Embeddings of the mask inputs with shape (B, C, H, W).
|
330
330
|
multimask_output (bool): Whether to return multiple masks or a single mask.
|
331
331
|
repeat_image (bool): Flag to repeat the image embeddings.
|
332
|
-
high_res_features (
|
332
|
+
high_res_features (list[torch.Tensor] | None, optional): Optional high-resolution features.
|
333
333
|
|
334
334
|
Returns:
|
335
335
|
masks (torch.Tensor): Batched predicted masks with shape (B, N, H, W).
|