dgenerate-ultralytics-headless 8.3.236__py3-none-any.whl → 8.3.239__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/METADATA +1 -1
- {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/RECORD +117 -105
- tests/test_exports.py +3 -1
- tests/test_python.py +2 -2
- tests/test_solutions.py +6 -6
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +4 -4
- ultralytics/cfg/datasets/Argoverse.yaml +7 -6
- ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
- ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
- ultralytics/cfg/datasets/VOC.yaml +15 -16
- ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
- ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
- ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
- ultralytics/cfg/datasets/dota8.yaml +2 -2
- ultralytics/cfg/datasets/kitti.yaml +1 -1
- ultralytics/cfg/datasets/xView.yaml +16 -16
- ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
- ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
- ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
- ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
- ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
- ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
- ultralytics/data/augment.py +1 -1
- ultralytics/data/base.py +4 -2
- ultralytics/data/build.py +4 -4
- ultralytics/data/loaders.py +17 -12
- ultralytics/data/utils.py +4 -4
- ultralytics/engine/exporter.py +40 -25
- ultralytics/engine/predictor.py +8 -6
- ultralytics/engine/results.py +12 -13
- ultralytics/engine/trainer.py +10 -2
- ultralytics/engine/tuner.py +2 -3
- ultralytics/engine/validator.py +2 -2
- ultralytics/models/fastsam/model.py +2 -2
- ultralytics/models/fastsam/predict.py +2 -3
- ultralytics/models/fastsam/val.py +4 -4
- ultralytics/models/rtdetr/predict.py +2 -3
- ultralytics/models/rtdetr/val.py +10 -5
- ultralytics/models/sam/__init__.py +14 -1
- ultralytics/models/sam/build.py +22 -13
- ultralytics/models/sam/build_sam3.py +377 -0
- ultralytics/models/sam/model.py +13 -5
- ultralytics/models/sam/modules/blocks.py +20 -8
- ultralytics/models/sam/modules/decoders.py +2 -3
- ultralytics/models/sam/modules/encoders.py +4 -1
- ultralytics/models/sam/modules/memory_attention.py +6 -2
- ultralytics/models/sam/modules/sam.py +159 -10
- ultralytics/models/sam/modules/utils.py +134 -4
- ultralytics/models/sam/predict.py +2073 -139
- ultralytics/models/sam/sam3/__init__.py +3 -0
- ultralytics/models/sam/sam3/decoder.py +546 -0
- ultralytics/models/sam/sam3/encoder.py +535 -0
- ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
- ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
- ultralytics/models/sam/sam3/model_misc.py +198 -0
- ultralytics/models/sam/sam3/necks.py +129 -0
- ultralytics/models/sam/sam3/sam3_image.py +339 -0
- ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
- ultralytics/models/sam/sam3/vitdet.py +546 -0
- ultralytics/models/sam/sam3/vl_combiner.py +160 -0
- ultralytics/models/yolo/classify/val.py +1 -1
- ultralytics/models/yolo/detect/train.py +1 -1
- ultralytics/models/yolo/detect/val.py +7 -7
- ultralytics/models/yolo/obb/val.py +19 -8
- ultralytics/models/yolo/pose/val.py +1 -1
- ultralytics/models/yolo/segment/val.py +1 -1
- ultralytics/nn/autobackend.py +9 -9
- ultralytics/nn/modules/block.py +1 -1
- ultralytics/nn/modules/transformer.py +21 -1
- ultralytics/nn/tasks.py +3 -3
- ultralytics/nn/text_model.py +2 -7
- ultralytics/solutions/ai_gym.py +1 -1
- ultralytics/solutions/analytics.py +6 -6
- ultralytics/solutions/config.py +1 -1
- ultralytics/solutions/distance_calculation.py +1 -1
- ultralytics/solutions/object_counter.py +1 -1
- ultralytics/solutions/object_cropper.py +3 -6
- ultralytics/solutions/parking_management.py +21 -17
- ultralytics/solutions/queue_management.py +5 -5
- ultralytics/solutions/region_counter.py +2 -2
- ultralytics/solutions/security_alarm.py +1 -1
- ultralytics/solutions/solutions.py +45 -22
- ultralytics/solutions/speed_estimation.py +1 -1
- ultralytics/trackers/basetrack.py +1 -1
- ultralytics/trackers/bot_sort.py +4 -3
- ultralytics/trackers/byte_tracker.py +4 -4
- ultralytics/trackers/utils/gmc.py +6 -7
- ultralytics/trackers/utils/kalman_filter.py +2 -1
- ultralytics/trackers/utils/matching.py +4 -3
- ultralytics/utils/__init__.py +12 -3
- ultralytics/utils/benchmarks.py +2 -2
- ultralytics/utils/callbacks/tensorboard.py +19 -25
- ultralytics/utils/checks.py +4 -3
- ultralytics/utils/downloads.py +1 -1
- ultralytics/utils/export/tensorflow.py +16 -2
- ultralytics/utils/files.py +13 -12
- ultralytics/utils/logger.py +62 -27
- ultralytics/utils/metrics.py +1 -1
- ultralytics/utils/ops.py +7 -9
- ultralytics/utils/patches.py +3 -3
- ultralytics/utils/plotting.py +7 -12
- ultralytics/utils/tuner.py +1 -1
- {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/top_level.txt +0 -0
ultralytics/engine/exporter.py
CHANGED
|
@@ -66,7 +66,6 @@ import re
|
|
|
66
66
|
import shutil
|
|
67
67
|
import subprocess
|
|
68
68
|
import time
|
|
69
|
-
import warnings
|
|
70
69
|
from copy import deepcopy
|
|
71
70
|
from datetime import datetime
|
|
72
71
|
from pathlib import Path
|
|
@@ -128,7 +127,15 @@ from ultralytics.utils.metrics import batch_probiou
|
|
|
128
127
|
from ultralytics.utils.nms import TorchNMS
|
|
129
128
|
from ultralytics.utils.ops import Profile
|
|
130
129
|
from ultralytics.utils.patches import arange_patch
|
|
131
|
-
from ultralytics.utils.torch_utils import
|
|
130
|
+
from ultralytics.utils.torch_utils import (
|
|
131
|
+
TORCH_1_10,
|
|
132
|
+
TORCH_1_11,
|
|
133
|
+
TORCH_1_13,
|
|
134
|
+
TORCH_2_1,
|
|
135
|
+
TORCH_2_4,
|
|
136
|
+
TORCH_2_9,
|
|
137
|
+
select_device,
|
|
138
|
+
)
|
|
132
139
|
|
|
133
140
|
|
|
134
141
|
def export_formats():
|
|
@@ -306,7 +313,11 @@ class Exporter:
|
|
|
306
313
|
callbacks.add_integration_callbacks(self)
|
|
307
314
|
|
|
308
315
|
def __call__(self, model=None) -> str:
|
|
309
|
-
"""
|
|
316
|
+
"""Export a model and return the final exported path as a string.
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
(str): Path to the exported file or directory (the last export artifact).
|
|
320
|
+
"""
|
|
310
321
|
t = time.time()
|
|
311
322
|
fmt = self.args.format.lower() # to lowercase
|
|
312
323
|
if fmt in {"tensorrt", "trt"}: # 'engine' aliases
|
|
@@ -356,9 +367,10 @@ class Exporter:
|
|
|
356
367
|
LOGGER.warning("TensorRT requires GPU export, automatically assigning device=0")
|
|
357
368
|
self.args.device = "0"
|
|
358
369
|
if engine and "dla" in str(self.args.device): # convert int/list to str first
|
|
359
|
-
|
|
370
|
+
device_str = str(self.args.device)
|
|
371
|
+
dla = device_str.rsplit(":", 1)[-1]
|
|
360
372
|
self.args.device = "0" # update device to "0"
|
|
361
|
-
assert dla in {"0", "1"}, f"Expected
|
|
373
|
+
assert dla in {"0", "1"}, f"Expected device 'dla:0' or 'dla:1', but got {device_str}."
|
|
362
374
|
if imx and self.args.device is None and torch.cuda.is_available():
|
|
363
375
|
LOGGER.warning("Exporting on CPU while CUDA is available, setting device=0 for faster export on GPU.")
|
|
364
376
|
self.args.device = "0" # update device to "0"
|
|
@@ -369,7 +381,7 @@ class Exporter:
|
|
|
369
381
|
validate_args(fmt, self.args, fmt_keys)
|
|
370
382
|
if axelera:
|
|
371
383
|
if not IS_PYTHON_3_10:
|
|
372
|
-
SystemError("Axelera export only supported on Python 3.10.")
|
|
384
|
+
raise SystemError("Axelera export only supported on Python 3.10.")
|
|
373
385
|
if not self.args.int8:
|
|
374
386
|
LOGGER.warning("Setting int8=True for Axelera mixed-precision export.")
|
|
375
387
|
self.args.int8 = True
|
|
@@ -392,8 +404,10 @@ class Exporter:
|
|
|
392
404
|
if self.args.half and self.args.int8:
|
|
393
405
|
LOGGER.warning("half=True and int8=True are mutually exclusive, setting half=False.")
|
|
394
406
|
self.args.half = False
|
|
395
|
-
if self.args.half and
|
|
396
|
-
LOGGER.warning(
|
|
407
|
+
if self.args.half and jit and self.device.type == "cpu":
|
|
408
|
+
LOGGER.warning(
|
|
409
|
+
"half=True only compatible with GPU export for TorchScript, i.e. use device=0, setting half=False."
|
|
410
|
+
)
|
|
397
411
|
self.args.half = False
|
|
398
412
|
self.imgsz = check_imgsz(self.args.imgsz, stride=model.stride, min_dim=2) # check image size
|
|
399
413
|
if self.args.optimize:
|
|
@@ -503,11 +517,6 @@ class Exporter:
|
|
|
503
517
|
if self.args.half and (onnx or jit) and self.device.type != "cpu":
|
|
504
518
|
im, model = im.half(), model.half() # to FP16
|
|
505
519
|
|
|
506
|
-
# Filter warnings
|
|
507
|
-
warnings.filterwarnings("ignore", category=torch.jit.TracerWarning) # suppress TracerWarning
|
|
508
|
-
warnings.filterwarnings("ignore", category=UserWarning) # suppress shape prim::Constant missing ONNX warning
|
|
509
|
-
warnings.filterwarnings("ignore", category=DeprecationWarning) # suppress CoreML np.bool deprecation warning
|
|
510
|
-
|
|
511
520
|
# Assign
|
|
512
521
|
self.im = im
|
|
513
522
|
self.model = model
|
|
@@ -608,7 +617,7 @@ class Exporter:
|
|
|
608
617
|
)
|
|
609
618
|
|
|
610
619
|
self.run_callbacks("on_export_end")
|
|
611
|
-
return f #
|
|
620
|
+
return f # path to final export artifact
|
|
612
621
|
|
|
613
622
|
def get_int8_calibration_dataloader(self, prefix=""):
|
|
614
623
|
"""Build and return a dataloader for calibration of INT8 models."""
|
|
@@ -655,7 +664,7 @@ class Exporter:
|
|
|
655
664
|
@try_export
|
|
656
665
|
def export_onnx(self, prefix=colorstr("ONNX:")):
|
|
657
666
|
"""Export YOLO model to ONNX format."""
|
|
658
|
-
requirements = ["onnx>=1.12.0
|
|
667
|
+
requirements = ["onnx>=1.12.0,<2.0.0"]
|
|
659
668
|
if self.args.simplify:
|
|
660
669
|
requirements += ["onnxslim>=0.1.71", "onnxruntime" + ("-gpu" if torch.cuda.is_available() else "")]
|
|
661
670
|
check_requirements(requirements)
|
|
@@ -716,6 +725,16 @@ class Exporter:
|
|
|
716
725
|
LOGGER.info(f"{prefix} limiting IR version {model_onnx.ir_version} to 10 for ONNXRuntime compatibility...")
|
|
717
726
|
model_onnx.ir_version = 10
|
|
718
727
|
|
|
728
|
+
# FP16 conversion for CPU export (GPU exports are already FP16 from model.half() during tracing)
|
|
729
|
+
if self.args.half and self.args.format == "onnx" and self.device.type == "cpu":
|
|
730
|
+
try:
|
|
731
|
+
from onnxruntime.transformers import float16
|
|
732
|
+
|
|
733
|
+
LOGGER.info(f"{prefix} converting to FP16...")
|
|
734
|
+
model_onnx = float16.convert_float_to_float16(model_onnx, keep_io_types=True)
|
|
735
|
+
except Exception as e:
|
|
736
|
+
LOGGER.warning(f"{prefix} FP16 conversion failure: {e}")
|
|
737
|
+
|
|
719
738
|
onnx.save(model_onnx, f)
|
|
720
739
|
return f
|
|
721
740
|
|
|
@@ -821,6 +840,7 @@ class Exporter:
|
|
|
821
840
|
@try_export
|
|
822
841
|
def export_mnn(self, prefix=colorstr("MNN:")):
|
|
823
842
|
"""Export YOLO model to MNN format using MNN https://github.com/alibaba/MNN."""
|
|
843
|
+
assert TORCH_1_10, "MNN export requires torch>=1.10.0 to avoid segmentation faults"
|
|
824
844
|
f_onnx = self.export_onnx() # get onnx model first
|
|
825
845
|
|
|
826
846
|
check_requirements("MNN>=2.9.6")
|
|
@@ -930,7 +950,7 @@ class Exporter:
|
|
|
930
950
|
|
|
931
951
|
# Based on apple's documentation it is better to leave out the minimum_deployment target and let that get set
|
|
932
952
|
# Internally based on the model conversion and output type.
|
|
933
|
-
# Setting
|
|
953
|
+
# Setting minimum_deployment_target >= iOS16 will require setting compute_precision=ct.precision.FLOAT32.
|
|
934
954
|
# iOS16 adds in better support for FP16, but none of the CoreML NMS specifications handle FP16 as input.
|
|
935
955
|
ct_model = ct.convert(
|
|
936
956
|
ts,
|
|
@@ -1025,7 +1045,7 @@ class Exporter:
|
|
|
1025
1045
|
"sng4onnx>=1.0.1", # required by 'onnx2tf' package
|
|
1026
1046
|
"onnx_graphsurgeon>=0.3.26", # required by 'onnx2tf' package
|
|
1027
1047
|
"ai-edge-litert>=1.2.0" + (",<1.4.0" if MACOS else ""), # required by 'onnx2tf' package
|
|
1028
|
-
"onnx>=1.12.0
|
|
1048
|
+
"onnx>=1.12.0,<2.0.0",
|
|
1029
1049
|
"onnx2tf>=1.26.3",
|
|
1030
1050
|
"onnxslim>=0.1.71",
|
|
1031
1051
|
"onnxruntime-gpu" if cuda else "onnxruntime",
|
|
@@ -1220,10 +1240,9 @@ class Exporter:
|
|
|
1220
1240
|
f"{sudo}mkdir -p /etc/apt/keyrings",
|
|
1221
1241
|
f"curl -fsSL https://packages.cloud.google.com/apt/doc/apt-key.gpg | {sudo}gpg --dearmor -o /etc/apt/keyrings/google.gpg",
|
|
1222
1242
|
f'echo "deb [signed-by=/etc/apt/keyrings/google.gpg] https://packages.cloud.google.com/apt coral-edgetpu-stable main" | {sudo}tee /etc/apt/sources.list.d/coral-edgetpu.list',
|
|
1223
|
-
f"{sudo}apt-get update",
|
|
1224
|
-
f"{sudo}apt-get install -y edgetpu-compiler",
|
|
1225
1243
|
):
|
|
1226
1244
|
subprocess.run(c, shell=True, check=True)
|
|
1245
|
+
check_apt_requirements(["edgetpu-compiler"])
|
|
1227
1246
|
|
|
1228
1247
|
ver = subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().rsplit(maxsplit=1)[-1]
|
|
1229
1248
|
LOGGER.info(f"\n{prefix} starting export with Edge TPU compiler {ver}...")
|
|
@@ -1301,16 +1320,12 @@ class Exporter:
|
|
|
1301
1320
|
java_version = int(version_match.group(1)) if version_match else 0
|
|
1302
1321
|
assert java_version >= 17, "Java version too old"
|
|
1303
1322
|
except (FileNotFoundError, subprocess.CalledProcessError, AssertionError):
|
|
1304
|
-
cmd = None
|
|
1305
1323
|
if IS_UBUNTU or IS_DEBIAN_TRIXIE:
|
|
1306
1324
|
LOGGER.info(f"\n{prefix} installing Java 21 for Ubuntu...")
|
|
1307
|
-
|
|
1325
|
+
check_apt_requirements(["openjdk-21-jre"])
|
|
1308
1326
|
elif IS_RASPBERRYPI or IS_DEBIAN_BOOKWORM:
|
|
1309
1327
|
LOGGER.info(f"\n{prefix} installing Java 17 for Raspberry Pi or Debian ...")
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
if cmd:
|
|
1313
|
-
subprocess.run(cmd, check=True)
|
|
1328
|
+
check_apt_requirements(["openjdk-17-jre"])
|
|
1314
1329
|
|
|
1315
1330
|
return torch2imx(
|
|
1316
1331
|
self.model,
|
ultralytics/engine/predictor.py
CHANGED
|
@@ -55,8 +55,8 @@ from ultralytics.utils.files import increment_path
|
|
|
55
55
|
from ultralytics.utils.torch_utils import attempt_compile, select_device, smart_inference_mode
|
|
56
56
|
|
|
57
57
|
STREAM_WARNING = """
|
|
58
|
-
|
|
59
|
-
|
|
58
|
+
Inference results will accumulate in RAM unless `stream=True` is passed, which can cause out-of-memory errors for large
|
|
59
|
+
sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.
|
|
60
60
|
|
|
61
61
|
Example:
|
|
62
62
|
results = model(source=..., stream=True) # generator of Results objects
|
|
@@ -222,7 +222,7 @@ class BasePredictor:
|
|
|
222
222
|
if stream:
|
|
223
223
|
return self.stream_inference(source, model, *args, **kwargs)
|
|
224
224
|
else:
|
|
225
|
-
return list(self.stream_inference(source, model, *args, **kwargs)) # merge list of
|
|
225
|
+
return list(self.stream_inference(source, model, *args, **kwargs)) # merge list of Results into one
|
|
226
226
|
|
|
227
227
|
def predict_cli(self, source=None, model=None):
|
|
228
228
|
"""Method used for Command Line Interface (CLI) prediction.
|
|
@@ -244,14 +244,15 @@ class BasePredictor:
|
|
|
244
244
|
for _ in gen: # sourcery skip: remove-empty-nested-block, noqa
|
|
245
245
|
pass
|
|
246
246
|
|
|
247
|
-
def setup_source(self, source):
|
|
247
|
+
def setup_source(self, source, stride: int | None = None):
|
|
248
248
|
"""Set up source and inference mode.
|
|
249
249
|
|
|
250
250
|
Args:
|
|
251
251
|
source (str | Path | list[str] | list[Path] | list[np.ndarray] | np.ndarray | torch.Tensor): Source for
|
|
252
252
|
inference.
|
|
253
|
+
stride (int, optional): Model stride for image size checking.
|
|
253
254
|
"""
|
|
254
|
-
self.imgsz = check_imgsz(self.args.imgsz, stride=self.model.stride, min_dim=2) # check image size
|
|
255
|
+
self.imgsz = check_imgsz(self.args.imgsz, stride=stride or self.model.stride, min_dim=2) # check image size
|
|
255
256
|
self.dataset = load_inference_source(
|
|
256
257
|
source=source,
|
|
257
258
|
batch=self.args.batch,
|
|
@@ -315,7 +316,8 @@ class BasePredictor:
|
|
|
315
316
|
ops.Profile(device=self.device),
|
|
316
317
|
)
|
|
317
318
|
self.run_callbacks("on_predict_start")
|
|
318
|
-
for
|
|
319
|
+
for batch in self.dataset:
|
|
320
|
+
self.batch = batch
|
|
319
321
|
self.run_callbacks("on_predict_batch_start")
|
|
320
322
|
paths, im0s, s = self.batch
|
|
321
323
|
|
ultralytics/engine/results.py
CHANGED
|
@@ -91,17 +91,17 @@ class BaseTensor(SimpleClass):
|
|
|
91
91
|
return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.cpu(), self.orig_shape)
|
|
92
92
|
|
|
93
93
|
def numpy(self):
|
|
94
|
-
"""Return a copy of
|
|
94
|
+
"""Return a copy of this object with its data converted to a NumPy array.
|
|
95
95
|
|
|
96
96
|
Returns:
|
|
97
|
-
(
|
|
97
|
+
(BaseTensor): A new instance with `data` as a NumPy array.
|
|
98
98
|
|
|
99
99
|
Examples:
|
|
100
100
|
>>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
|
|
101
101
|
>>> orig_shape = (720, 1280)
|
|
102
102
|
>>> base_tensor = BaseTensor(data, orig_shape)
|
|
103
|
-
>>>
|
|
104
|
-
>>> print(type(
|
|
103
|
+
>>> numpy_tensor = base_tensor.numpy()
|
|
104
|
+
>>> print(type(numpy_tensor.data))
|
|
105
105
|
<class 'numpy.ndarray'>
|
|
106
106
|
"""
|
|
107
107
|
return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.numpy(), self.orig_shape)
|
|
@@ -110,8 +110,7 @@ class BaseTensor(SimpleClass):
|
|
|
110
110
|
"""Move the tensor to GPU memory.
|
|
111
111
|
|
|
112
112
|
Returns:
|
|
113
|
-
(BaseTensor): A new BaseTensor instance with the data moved to GPU memory
|
|
114
|
-
otherwise returns self.
|
|
113
|
+
(BaseTensor): A new BaseTensor instance with the data moved to GPU memory.
|
|
115
114
|
|
|
116
115
|
Examples:
|
|
117
116
|
>>> import torch
|
|
@@ -201,14 +200,14 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
201
200
|
cuda: Move all tensors in the Results object to GPU memory.
|
|
202
201
|
to: Move all tensors to the specified device and dtype.
|
|
203
202
|
new: Create a new Results object with the same image, path, names, and speed attributes.
|
|
204
|
-
plot: Plot detection results on an input
|
|
203
|
+
plot: Plot detection results on an input BGR image.
|
|
205
204
|
show: Display the image with annotated inference results.
|
|
206
205
|
save: Save annotated inference results image to file.
|
|
207
206
|
verbose: Return a log string for each task in the results.
|
|
208
207
|
save_txt: Save detection results to a text file.
|
|
209
208
|
save_crop: Save cropped detection images to specified directory.
|
|
210
209
|
summary: Convert inference results to a summarized dictionary.
|
|
211
|
-
to_df: Convert detection results to a Polars
|
|
210
|
+
to_df: Convert detection results to a Polars DataFrame.
|
|
212
211
|
to_json: Convert detection results to JSON format.
|
|
213
212
|
to_csv: Convert detection results to a CSV format.
|
|
214
213
|
|
|
@@ -461,7 +460,7 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
461
460
|
color_mode: str = "class",
|
|
462
461
|
txt_color: tuple[int, int, int] = (255, 255, 255),
|
|
463
462
|
) -> np.ndarray:
|
|
464
|
-
"""Plot detection results on an input
|
|
463
|
+
"""Plot detection results on an input BGR image.
|
|
465
464
|
|
|
466
465
|
Args:
|
|
467
466
|
conf (bool): Whether to plot detection confidence scores.
|
|
@@ -481,10 +480,10 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
481
480
|
save (bool): Whether to save the annotated image.
|
|
482
481
|
filename (str | None): Filename to save image if save is True.
|
|
483
482
|
color_mode (str): Specify the color mode, e.g., 'instance' or 'class'.
|
|
484
|
-
txt_color (tuple[int, int, int]):
|
|
483
|
+
txt_color (tuple[int, int, int]): Text color in BGR format for classification output.
|
|
485
484
|
|
|
486
485
|
Returns:
|
|
487
|
-
(np.ndarray): Annotated image as a
|
|
486
|
+
(np.ndarray | PIL.Image.Image): Annotated image as a NumPy array (BGR) or PIL image (RGB) if `pil=True`.
|
|
488
487
|
|
|
489
488
|
Examples:
|
|
490
489
|
>>> results = model("image.jpg")
|
|
@@ -734,10 +733,10 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
734
733
|
- Original image is copied before cropping to avoid modifying the original.
|
|
735
734
|
"""
|
|
736
735
|
if self.probs is not None:
|
|
737
|
-
LOGGER.warning("Classify task
|
|
736
|
+
LOGGER.warning("Classify task does not support `save_crop`.")
|
|
738
737
|
return
|
|
739
738
|
if self.obb is not None:
|
|
740
|
-
LOGGER.warning("OBB task
|
|
739
|
+
LOGGER.warning("OBB task does not support `save_crop`.")
|
|
741
740
|
return
|
|
742
741
|
for d in self.boxes:
|
|
743
742
|
save_one_box(
|
ultralytics/engine/trainer.py
CHANGED
|
@@ -714,11 +714,11 @@ class BaseTrainer:
|
|
|
714
714
|
raise NotImplementedError("This task trainer doesn't support loading cfg files")
|
|
715
715
|
|
|
716
716
|
def get_validator(self):
|
|
717
|
-
"""
|
|
717
|
+
"""Raise NotImplementedError (must be implemented by subclasses)."""
|
|
718
718
|
raise NotImplementedError("get_validator function not implemented in trainer")
|
|
719
719
|
|
|
720
720
|
def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode="train"):
|
|
721
|
-
"""
|
|
721
|
+
"""Raise NotImplementedError (must return a `torch.utils.data.DataLoader` in subclasses)."""
|
|
722
722
|
raise NotImplementedError("get_dataloader function not implemented in trainer")
|
|
723
723
|
|
|
724
724
|
def build_dataset(self, img_path, mode="train", batch=None):
|
|
@@ -812,6 +812,14 @@ class BaseTrainer:
|
|
|
812
812
|
"device",
|
|
813
813
|
"close_mosaic",
|
|
814
814
|
"augmentations",
|
|
815
|
+
"save_period",
|
|
816
|
+
"workers",
|
|
817
|
+
"cache",
|
|
818
|
+
"patience",
|
|
819
|
+
"time",
|
|
820
|
+
"freeze",
|
|
821
|
+
"val",
|
|
822
|
+
"plots",
|
|
815
823
|
): # allow arg updates to reduce memory or update device on resume
|
|
816
824
|
if k in overrides:
|
|
817
825
|
setattr(self.args, k, overrides[k])
|
ultralytics/engine/tuner.py
CHANGED
|
@@ -8,7 +8,7 @@ that yield the best model performance. This is particularly crucial in deep lear
|
|
|
8
8
|
where small changes in hyperparameters can lead to significant differences in model accuracy and efficiency.
|
|
9
9
|
|
|
10
10
|
Examples:
|
|
11
|
-
Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=
|
|
11
|
+
Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=10 for 300 tuning iterations.
|
|
12
12
|
>>> from ultralytics import YOLO
|
|
13
13
|
>>> model = YOLO("yolo11n.pt")
|
|
14
14
|
>>> model.tune(data="coco8.yaml", epochs=10, iterations=300, optimizer="AdamW", plots=False, save=False, val=False)
|
|
@@ -55,7 +55,7 @@ class Tuner:
|
|
|
55
55
|
__call__: Execute the hyperparameter evolution across multiple iterations.
|
|
56
56
|
|
|
57
57
|
Examples:
|
|
58
|
-
Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=
|
|
58
|
+
Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=10 for 300 tuning iterations.
|
|
59
59
|
>>> from ultralytics import YOLO
|
|
60
60
|
>>> model = YOLO("yolo11n.pt")
|
|
61
61
|
>>> model.tune(
|
|
@@ -283,7 +283,6 @@ class Tuner:
|
|
|
283
283
|
"""Mutate hyperparameters based on bounds and scaling factors specified in `self.space`.
|
|
284
284
|
|
|
285
285
|
Args:
|
|
286
|
-
parent (str): Parent selection method (kept for API compatibility, unused in BLX mode).
|
|
287
286
|
n (int): Number of top parents to consider.
|
|
288
287
|
mutation (float): Probability of a parameter mutation in any given iteration.
|
|
289
288
|
sigma (float): Standard deviation for Gaussian random number generator.
|
ultralytics/engine/validator.py
CHANGED
|
@@ -48,7 +48,7 @@ class BaseValidator:
|
|
|
48
48
|
|
|
49
49
|
Attributes:
|
|
50
50
|
args (SimpleNamespace): Configuration for the validator.
|
|
51
|
-
dataloader (DataLoader):
|
|
51
|
+
dataloader (DataLoader): DataLoader to use for validation.
|
|
52
52
|
model (nn.Module): Model to validate.
|
|
53
53
|
data (dict): Data dictionary containing dataset information.
|
|
54
54
|
device (torch.device): Device to use for validation.
|
|
@@ -95,7 +95,7 @@ class BaseValidator:
|
|
|
95
95
|
"""Initialize a BaseValidator instance.
|
|
96
96
|
|
|
97
97
|
Args:
|
|
98
|
-
dataloader (torch.utils.data.DataLoader, optional):
|
|
98
|
+
dataloader (torch.utils.data.DataLoader, optional): DataLoader to be used for validation.
|
|
99
99
|
save_dir (Path, optional): Directory to save results.
|
|
100
100
|
args (SimpleNamespace, optional): Configuration for the validator.
|
|
101
101
|
_callbacks (dict, optional): Dictionary to store various callback functions.
|
|
@@ -12,7 +12,7 @@ from .val import FastSAMValidator
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class FastSAM(Model):
|
|
15
|
-
"""FastSAM model interface for
|
|
15
|
+
"""FastSAM model interface for Segment Anything tasks.
|
|
16
16
|
|
|
17
17
|
This class extends the base Model class to provide specific functionality for the FastSAM (Fast Segment Anything
|
|
18
18
|
Model) implementation, allowing for efficient and accurate image segmentation with optional prompting support.
|
|
@@ -39,7 +39,7 @@ class FastSAM(Model):
|
|
|
39
39
|
"""Initialize the FastSAM model with the specified pre-trained weights."""
|
|
40
40
|
if str(model) == "FastSAM.pt":
|
|
41
41
|
model = "FastSAM-x.pt"
|
|
42
|
-
assert Path(model).suffix not in {".yaml", ".yml"}, "FastSAM
|
|
42
|
+
assert Path(model).suffix not in {".yaml", ".yml"}, "FastSAM only supports pre-trained weights."
|
|
43
43
|
super().__init__(model=model, task="segment")
|
|
44
44
|
|
|
45
45
|
def predict(
|
|
@@ -22,8 +22,7 @@ class FastSAMPredictor(SegmentationPredictor):
|
|
|
22
22
|
Attributes:
|
|
23
23
|
prompts (dict): Dictionary containing prompt information for segmentation (bboxes, points, labels, texts).
|
|
24
24
|
device (torch.device): Device on which model and tensors are processed.
|
|
25
|
-
|
|
26
|
-
clip_preprocess (Any, optional): CLIP preprocessing function for images, loaded on demand.
|
|
25
|
+
clip (Any, optional): CLIP model used for text-based prompting, loaded on demand.
|
|
27
26
|
|
|
28
27
|
Methods:
|
|
29
28
|
postprocess: Apply postprocessing to FastSAM predictions and handle prompts.
|
|
@@ -116,7 +115,7 @@ class FastSAMPredictor(SegmentationPredictor):
|
|
|
116
115
|
labels = torch.ones(points.shape[0])
|
|
117
116
|
labels = torch.as_tensor(labels, dtype=torch.int32, device=self.device)
|
|
118
117
|
assert len(labels) == len(points), (
|
|
119
|
-
f"Expected `labels`
|
|
118
|
+
f"Expected `labels` to have the same length as `points`, but got {len(labels)} and {len(points)}."
|
|
120
119
|
)
|
|
121
120
|
point_idx = (
|
|
122
121
|
torch.ones(len(result), dtype=torch.bool, device=self.device)
|
|
@@ -4,9 +4,9 @@ from ultralytics.models.yolo.segment import SegmentationValidator
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class FastSAMValidator(SegmentationValidator):
|
|
7
|
-
"""Custom validation class for
|
|
7
|
+
"""Custom validation class for FastSAM (Segment Anything Model) segmentation in the Ultralytics YOLO framework.
|
|
8
8
|
|
|
9
|
-
Extends the SegmentationValidator class, customizing the validation process specifically for
|
|
9
|
+
Extends the SegmentationValidator class, customizing the validation process specifically for FastSAM. This class
|
|
10
10
|
sets the task to 'segment' and uses the SegmentMetrics for evaluation. Additionally, plotting features are disabled
|
|
11
11
|
to avoid errors during validation.
|
|
12
12
|
|
|
@@ -18,14 +18,14 @@ class FastSAMValidator(SegmentationValidator):
|
|
|
18
18
|
metrics (SegmentMetrics): Segmentation metrics calculator for evaluation.
|
|
19
19
|
|
|
20
20
|
Methods:
|
|
21
|
-
__init__: Initialize the FastSAMValidator with custom settings for
|
|
21
|
+
__init__: Initialize the FastSAMValidator with custom settings for FastSAM.
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
24
|
def __init__(self, dataloader=None, save_dir=None, args=None, _callbacks=None):
|
|
25
25
|
"""Initialize the FastSAMValidator class, setting the task to 'segment' and metrics to SegmentMetrics.
|
|
26
26
|
|
|
27
27
|
Args:
|
|
28
|
-
dataloader (torch.utils.data.DataLoader, optional):
|
|
28
|
+
dataloader (torch.utils.data.DataLoader, optional): DataLoader to be used for validation.
|
|
29
29
|
save_dir (Path, optional): Directory to save results.
|
|
30
30
|
args (SimpleNamespace, optional): Configuration for the validator.
|
|
31
31
|
_callbacks (list, optional): List of callback functions to be invoked during validation.
|
|
@@ -75,11 +75,10 @@ class RTDETRPredictor(BasePredictor):
|
|
|
75
75
|
def pre_transform(self, im):
|
|
76
76
|
"""Pre-transform input images before feeding them into the model for inference.
|
|
77
77
|
|
|
78
|
-
The input images are letterboxed to ensure a square aspect ratio and scale-filled.
|
|
79
|
-
and scale_filled.
|
|
78
|
+
The input images are letterboxed to ensure a square aspect ratio and scale-filled.
|
|
80
79
|
|
|
81
80
|
Args:
|
|
82
|
-
im (list[np.ndarray]
|
|
81
|
+
im (list[np.ndarray] | torch.Tensor): Input images of shape (N, 3, H, W) for tensor, [(H, W, 3) x N] for
|
|
83
82
|
list.
|
|
84
83
|
|
|
85
84
|
Returns:
|
ultralytics/models/rtdetr/val.py
CHANGED
|
@@ -35,7 +35,7 @@ class RTDETRDataset(YOLODataset):
|
|
|
35
35
|
Examples:
|
|
36
36
|
Initialize an RT-DETR dataset
|
|
37
37
|
>>> dataset = RTDETRDataset(img_path="path/to/images", imgsz=640)
|
|
38
|
-
>>> image, hw = dataset.load_image(0)
|
|
38
|
+
>>> image, hw0, hw = dataset.load_image(0)
|
|
39
39
|
"""
|
|
40
40
|
|
|
41
41
|
def __init__(self, *args, data=None, **kwargs):
|
|
@@ -59,13 +59,14 @@ class RTDETRDataset(YOLODataset):
|
|
|
59
59
|
rect_mode (bool, optional): Whether to use rectangular mode for batch inference.
|
|
60
60
|
|
|
61
61
|
Returns:
|
|
62
|
-
im (
|
|
63
|
-
|
|
62
|
+
im (np.ndarray): Loaded image as a NumPy array.
|
|
63
|
+
hw_original (tuple[int, int]): Original image dimensions in (height, width) format.
|
|
64
|
+
hw_resized (tuple[int, int]): Resized image dimensions in (height, width) format.
|
|
64
65
|
|
|
65
66
|
Examples:
|
|
66
67
|
Load an image from the dataset
|
|
67
68
|
>>> dataset = RTDETRDataset(img_path="path/to/images")
|
|
68
|
-
>>> image, hw = dataset.load_image(0)
|
|
69
|
+
>>> image, hw0, hw = dataset.load_image(0)
|
|
69
70
|
"""
|
|
70
71
|
return super().load_image(i=i, rect_mode=rect_mode)
|
|
71
72
|
|
|
@@ -85,7 +86,7 @@ class RTDETRDataset(YOLODataset):
|
|
|
85
86
|
transforms = v8_transforms(self, self.imgsz, hyp, stretch=True)
|
|
86
87
|
else:
|
|
87
88
|
# transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), auto=False, scale_fill=True)])
|
|
88
|
-
transforms = Compose([
|
|
89
|
+
transforms = Compose([])
|
|
89
90
|
transforms.append(
|
|
90
91
|
Format(
|
|
91
92
|
bbox_format="xywh",
|
|
@@ -150,6 +151,10 @@ class RTDETRValidator(DetectionValidator):
|
|
|
150
151
|
data=self.data,
|
|
151
152
|
)
|
|
152
153
|
|
|
154
|
+
def scale_preds(self, predn: dict[str, torch.Tensor], pbatch: dict[str, Any]) -> dict[str, torch.Tensor]:
|
|
155
|
+
"""Scales predictions to the original image size."""
|
|
156
|
+
return predn
|
|
157
|
+
|
|
153
158
|
def postprocess(
|
|
154
159
|
self, preds: torch.Tensor | list[torch.Tensor] | tuple[torch.Tensor]
|
|
155
160
|
) -> list[dict[str, torch.Tensor]]:
|
|
@@ -1,7 +1,16 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
3
|
from .model import SAM
|
|
4
|
-
from .predict import
|
|
4
|
+
from .predict import (
|
|
5
|
+
Predictor,
|
|
6
|
+
SAM2DynamicInteractivePredictor,
|
|
7
|
+
SAM2Predictor,
|
|
8
|
+
SAM2VideoPredictor,
|
|
9
|
+
SAM3Predictor,
|
|
10
|
+
SAM3SemanticPredictor,
|
|
11
|
+
SAM3VideoPredictor,
|
|
12
|
+
SAM3VideoSemanticPredictor,
|
|
13
|
+
)
|
|
5
14
|
|
|
6
15
|
__all__ = (
|
|
7
16
|
"SAM",
|
|
@@ -9,4 +18,8 @@ __all__ = (
|
|
|
9
18
|
"SAM2DynamicInteractivePredictor",
|
|
10
19
|
"SAM2Predictor",
|
|
11
20
|
"SAM2VideoPredictor",
|
|
21
|
+
"SAM3Predictor",
|
|
22
|
+
"SAM3SemanticPredictor",
|
|
23
|
+
"SAM3VideoPredictor",
|
|
24
|
+
"SAM3VideoSemanticPredictor",
|
|
12
25
|
) # tuple or list of exportable items
|
ultralytics/models/sam/build.py
CHANGED
|
@@ -21,6 +21,21 @@ from .modules.tiny_encoder import TinyViT
|
|
|
21
21
|
from .modules.transformer import TwoWayTransformer
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
def _load_checkpoint(model, checkpoint):
|
|
25
|
+
"""Load checkpoint into model from file path."""
|
|
26
|
+
if checkpoint is None:
|
|
27
|
+
return model
|
|
28
|
+
|
|
29
|
+
checkpoint = attempt_download_asset(checkpoint)
|
|
30
|
+
with open(checkpoint, "rb") as f:
|
|
31
|
+
state_dict = torch_load(f)
|
|
32
|
+
# Handle nested "model" key
|
|
33
|
+
if "model" in state_dict and isinstance(state_dict["model"], dict):
|
|
34
|
+
state_dict = state_dict["model"]
|
|
35
|
+
model.load_state_dict(state_dict)
|
|
36
|
+
return model
|
|
37
|
+
|
|
38
|
+
|
|
24
39
|
def build_sam_vit_h(checkpoint=None):
|
|
25
40
|
"""Build and return a Segment Anything Model (SAM) h-size model with specified encoder parameters."""
|
|
26
41
|
return _build_sam(
|
|
@@ -205,22 +220,19 @@ def _build_sam(
|
|
|
205
220
|
pixel_std=[58.395, 57.12, 57.375],
|
|
206
221
|
)
|
|
207
222
|
if checkpoint is not None:
|
|
208
|
-
|
|
209
|
-
with open(checkpoint, "rb") as f:
|
|
210
|
-
state_dict = torch_load(f)
|
|
211
|
-
sam.load_state_dict(state_dict)
|
|
223
|
+
sam = _load_checkpoint(sam, checkpoint)
|
|
212
224
|
sam.eval()
|
|
213
225
|
return sam
|
|
214
226
|
|
|
215
227
|
|
|
216
228
|
def _build_sam2(
|
|
217
229
|
encoder_embed_dim=1280,
|
|
218
|
-
encoder_stages=
|
|
230
|
+
encoder_stages=(2, 6, 36, 4),
|
|
219
231
|
encoder_num_heads=2,
|
|
220
|
-
encoder_global_att_blocks=
|
|
221
|
-
encoder_backbone_channel_list=
|
|
222
|
-
encoder_window_spatial_size=
|
|
223
|
-
encoder_window_spec=
|
|
232
|
+
encoder_global_att_blocks=(7, 15, 23, 31),
|
|
233
|
+
encoder_backbone_channel_list=(1152, 576, 288, 144),
|
|
234
|
+
encoder_window_spatial_size=(7, 7),
|
|
235
|
+
encoder_window_spec=(8, 4, 16, 8),
|
|
224
236
|
checkpoint=None,
|
|
225
237
|
):
|
|
226
238
|
"""Build and return a Segment Anything Model 2 (SAM2) with specified architecture parameters.
|
|
@@ -299,10 +311,7 @@ def _build_sam2(
|
|
|
299
311
|
)
|
|
300
312
|
|
|
301
313
|
if checkpoint is not None:
|
|
302
|
-
|
|
303
|
-
with open(checkpoint, "rb") as f:
|
|
304
|
-
state_dict = torch_load(f)["model"]
|
|
305
|
-
sam2.load_state_dict(state_dict)
|
|
314
|
+
sam2 = _load_checkpoint(sam2, checkpoint)
|
|
306
315
|
sam2.eval()
|
|
307
316
|
return sam2
|
|
308
317
|
|