dgenerate-ultralytics-headless 8.3.236__py3-none-any.whl → 8.3.239__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/METADATA +1 -1
  2. {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/RECORD +117 -105
  3. tests/test_exports.py +3 -1
  4. tests/test_python.py +2 -2
  5. tests/test_solutions.py +6 -6
  6. ultralytics/__init__.py +1 -1
  7. ultralytics/cfg/__init__.py +4 -4
  8. ultralytics/cfg/datasets/Argoverse.yaml +7 -6
  9. ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
  10. ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
  11. ultralytics/cfg/datasets/VOC.yaml +15 -16
  12. ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
  13. ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
  14. ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
  15. ultralytics/cfg/datasets/dota8.yaml +2 -2
  16. ultralytics/cfg/datasets/kitti.yaml +1 -1
  17. ultralytics/cfg/datasets/xView.yaml +16 -16
  18. ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
  19. ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
  20. ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
  21. ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
  22. ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
  23. ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
  24. ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
  25. ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
  26. ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
  27. ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
  28. ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
  29. ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
  30. ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
  31. ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
  32. ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
  33. ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
  34. ultralytics/data/augment.py +1 -1
  35. ultralytics/data/base.py +4 -2
  36. ultralytics/data/build.py +4 -4
  37. ultralytics/data/loaders.py +17 -12
  38. ultralytics/data/utils.py +4 -4
  39. ultralytics/engine/exporter.py +40 -25
  40. ultralytics/engine/predictor.py +8 -6
  41. ultralytics/engine/results.py +12 -13
  42. ultralytics/engine/trainer.py +10 -2
  43. ultralytics/engine/tuner.py +2 -3
  44. ultralytics/engine/validator.py +2 -2
  45. ultralytics/models/fastsam/model.py +2 -2
  46. ultralytics/models/fastsam/predict.py +2 -3
  47. ultralytics/models/fastsam/val.py +4 -4
  48. ultralytics/models/rtdetr/predict.py +2 -3
  49. ultralytics/models/rtdetr/val.py +10 -5
  50. ultralytics/models/sam/__init__.py +14 -1
  51. ultralytics/models/sam/build.py +22 -13
  52. ultralytics/models/sam/build_sam3.py +377 -0
  53. ultralytics/models/sam/model.py +13 -5
  54. ultralytics/models/sam/modules/blocks.py +20 -8
  55. ultralytics/models/sam/modules/decoders.py +2 -3
  56. ultralytics/models/sam/modules/encoders.py +4 -1
  57. ultralytics/models/sam/modules/memory_attention.py +6 -2
  58. ultralytics/models/sam/modules/sam.py +159 -10
  59. ultralytics/models/sam/modules/utils.py +134 -4
  60. ultralytics/models/sam/predict.py +2073 -139
  61. ultralytics/models/sam/sam3/__init__.py +3 -0
  62. ultralytics/models/sam/sam3/decoder.py +546 -0
  63. ultralytics/models/sam/sam3/encoder.py +535 -0
  64. ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
  65. ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
  66. ultralytics/models/sam/sam3/model_misc.py +198 -0
  67. ultralytics/models/sam/sam3/necks.py +129 -0
  68. ultralytics/models/sam/sam3/sam3_image.py +339 -0
  69. ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
  70. ultralytics/models/sam/sam3/vitdet.py +546 -0
  71. ultralytics/models/sam/sam3/vl_combiner.py +160 -0
  72. ultralytics/models/yolo/classify/val.py +1 -1
  73. ultralytics/models/yolo/detect/train.py +1 -1
  74. ultralytics/models/yolo/detect/val.py +7 -7
  75. ultralytics/models/yolo/obb/val.py +19 -8
  76. ultralytics/models/yolo/pose/val.py +1 -1
  77. ultralytics/models/yolo/segment/val.py +1 -1
  78. ultralytics/nn/autobackend.py +9 -9
  79. ultralytics/nn/modules/block.py +1 -1
  80. ultralytics/nn/modules/transformer.py +21 -1
  81. ultralytics/nn/tasks.py +3 -3
  82. ultralytics/nn/text_model.py +2 -7
  83. ultralytics/solutions/ai_gym.py +1 -1
  84. ultralytics/solutions/analytics.py +6 -6
  85. ultralytics/solutions/config.py +1 -1
  86. ultralytics/solutions/distance_calculation.py +1 -1
  87. ultralytics/solutions/object_counter.py +1 -1
  88. ultralytics/solutions/object_cropper.py +3 -6
  89. ultralytics/solutions/parking_management.py +21 -17
  90. ultralytics/solutions/queue_management.py +5 -5
  91. ultralytics/solutions/region_counter.py +2 -2
  92. ultralytics/solutions/security_alarm.py +1 -1
  93. ultralytics/solutions/solutions.py +45 -22
  94. ultralytics/solutions/speed_estimation.py +1 -1
  95. ultralytics/trackers/basetrack.py +1 -1
  96. ultralytics/trackers/bot_sort.py +4 -3
  97. ultralytics/trackers/byte_tracker.py +4 -4
  98. ultralytics/trackers/utils/gmc.py +6 -7
  99. ultralytics/trackers/utils/kalman_filter.py +2 -1
  100. ultralytics/trackers/utils/matching.py +4 -3
  101. ultralytics/utils/__init__.py +12 -3
  102. ultralytics/utils/benchmarks.py +2 -2
  103. ultralytics/utils/callbacks/tensorboard.py +19 -25
  104. ultralytics/utils/checks.py +4 -3
  105. ultralytics/utils/downloads.py +1 -1
  106. ultralytics/utils/export/tensorflow.py +16 -2
  107. ultralytics/utils/files.py +13 -12
  108. ultralytics/utils/logger.py +62 -27
  109. ultralytics/utils/metrics.py +1 -1
  110. ultralytics/utils/ops.py +7 -9
  111. ultralytics/utils/patches.py +3 -3
  112. ultralytics/utils/plotting.py +7 -12
  113. ultralytics/utils/tuner.py +1 -1
  114. {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/WHEEL +0 -0
  115. {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/entry_points.txt +0 -0
  116. {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/licenses/LICENSE +0 -0
  117. {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/top_level.txt +0 -0
@@ -66,7 +66,6 @@ import re
66
66
  import shutil
67
67
  import subprocess
68
68
  import time
69
- import warnings
70
69
  from copy import deepcopy
71
70
  from datetime import datetime
72
71
  from pathlib import Path
@@ -128,7 +127,15 @@ from ultralytics.utils.metrics import batch_probiou
128
127
  from ultralytics.utils.nms import TorchNMS
129
128
  from ultralytics.utils.ops import Profile
130
129
  from ultralytics.utils.patches import arange_patch
131
- from ultralytics.utils.torch_utils import TORCH_1_11, TORCH_1_13, TORCH_2_1, TORCH_2_4, TORCH_2_9, select_device
130
+ from ultralytics.utils.torch_utils import (
131
+ TORCH_1_10,
132
+ TORCH_1_11,
133
+ TORCH_1_13,
134
+ TORCH_2_1,
135
+ TORCH_2_4,
136
+ TORCH_2_9,
137
+ select_device,
138
+ )
132
139
 
133
140
 
134
141
  def export_formats():
@@ -306,7 +313,11 @@ class Exporter:
306
313
  callbacks.add_integration_callbacks(self)
307
314
 
308
315
  def __call__(self, model=None) -> str:
309
- """Return list of exported files/dirs after running callbacks."""
316
+ """Export a model and return the final exported path as a string.
317
+
318
+ Returns:
319
+ (str): Path to the exported file or directory (the last export artifact).
320
+ """
310
321
  t = time.time()
311
322
  fmt = self.args.format.lower() # to lowercase
312
323
  if fmt in {"tensorrt", "trt"}: # 'engine' aliases
@@ -356,9 +367,10 @@ class Exporter:
356
367
  LOGGER.warning("TensorRT requires GPU export, automatically assigning device=0")
357
368
  self.args.device = "0"
358
369
  if engine and "dla" in str(self.args.device): # convert int/list to str first
359
- dla = self.args.device.rsplit(":", 1)[-1]
370
+ device_str = str(self.args.device)
371
+ dla = device_str.rsplit(":", 1)[-1]
360
372
  self.args.device = "0" # update device to "0"
361
- assert dla in {"0", "1"}, f"Expected self.args.device='dla:0' or 'dla:1, but got {self.args.device}."
373
+ assert dla in {"0", "1"}, f"Expected device 'dla:0' or 'dla:1', but got {device_str}."
362
374
  if imx and self.args.device is None and torch.cuda.is_available():
363
375
  LOGGER.warning("Exporting on CPU while CUDA is available, setting device=0 for faster export on GPU.")
364
376
  self.args.device = "0" # update device to "0"
@@ -369,7 +381,7 @@ class Exporter:
369
381
  validate_args(fmt, self.args, fmt_keys)
370
382
  if axelera:
371
383
  if not IS_PYTHON_3_10:
372
- SystemError("Axelera export only supported on Python 3.10.")
384
+ raise SystemError("Axelera export only supported on Python 3.10.")
373
385
  if not self.args.int8:
374
386
  LOGGER.warning("Setting int8=True for Axelera mixed-precision export.")
375
387
  self.args.int8 = True
@@ -392,8 +404,10 @@ class Exporter:
392
404
  if self.args.half and self.args.int8:
393
405
  LOGGER.warning("half=True and int8=True are mutually exclusive, setting half=False.")
394
406
  self.args.half = False
395
- if self.args.half and (onnx or jit) and self.device.type == "cpu":
396
- LOGGER.warning("half=True only compatible with GPU export, i.e. use device=0, setting half=False.")
407
+ if self.args.half and jit and self.device.type == "cpu":
408
+ LOGGER.warning(
409
+ "half=True only compatible with GPU export for TorchScript, i.e. use device=0, setting half=False."
410
+ )
397
411
  self.args.half = False
398
412
  self.imgsz = check_imgsz(self.args.imgsz, stride=model.stride, min_dim=2) # check image size
399
413
  if self.args.optimize:
@@ -503,11 +517,6 @@ class Exporter:
503
517
  if self.args.half and (onnx or jit) and self.device.type != "cpu":
504
518
  im, model = im.half(), model.half() # to FP16
505
519
 
506
- # Filter warnings
507
- warnings.filterwarnings("ignore", category=torch.jit.TracerWarning) # suppress TracerWarning
508
- warnings.filterwarnings("ignore", category=UserWarning) # suppress shape prim::Constant missing ONNX warning
509
- warnings.filterwarnings("ignore", category=DeprecationWarning) # suppress CoreML np.bool deprecation warning
510
-
511
520
  # Assign
512
521
  self.im = im
513
522
  self.model = model
@@ -608,7 +617,7 @@ class Exporter:
608
617
  )
609
618
 
610
619
  self.run_callbacks("on_export_end")
611
- return f # return list of exported files/dirs
620
+ return f # path to final export artifact
612
621
 
613
622
  def get_int8_calibration_dataloader(self, prefix=""):
614
623
  """Build and return a dataloader for calibration of INT8 models."""
@@ -655,7 +664,7 @@ class Exporter:
655
664
  @try_export
656
665
  def export_onnx(self, prefix=colorstr("ONNX:")):
657
666
  """Export YOLO model to ONNX format."""
658
- requirements = ["onnx>=1.12.0,<=1.19.1"]
667
+ requirements = ["onnx>=1.12.0,<2.0.0"]
659
668
  if self.args.simplify:
660
669
  requirements += ["onnxslim>=0.1.71", "onnxruntime" + ("-gpu" if torch.cuda.is_available() else "")]
661
670
  check_requirements(requirements)
@@ -716,6 +725,16 @@ class Exporter:
716
725
  LOGGER.info(f"{prefix} limiting IR version {model_onnx.ir_version} to 10 for ONNXRuntime compatibility...")
717
726
  model_onnx.ir_version = 10
718
727
 
728
+ # FP16 conversion for CPU export (GPU exports are already FP16 from model.half() during tracing)
729
+ if self.args.half and self.args.format == "onnx" and self.device.type == "cpu":
730
+ try:
731
+ from onnxruntime.transformers import float16
732
+
733
+ LOGGER.info(f"{prefix} converting to FP16...")
734
+ model_onnx = float16.convert_float_to_float16(model_onnx, keep_io_types=True)
735
+ except Exception as e:
736
+ LOGGER.warning(f"{prefix} FP16 conversion failure: {e}")
737
+
719
738
  onnx.save(model_onnx, f)
720
739
  return f
721
740
 
@@ -821,6 +840,7 @@ class Exporter:
821
840
  @try_export
822
841
  def export_mnn(self, prefix=colorstr("MNN:")):
823
842
  """Export YOLO model to MNN format using MNN https://github.com/alibaba/MNN."""
843
+ assert TORCH_1_10, "MNN export requires torch>=1.10.0 to avoid segmentation faults"
824
844
  f_onnx = self.export_onnx() # get onnx model first
825
845
 
826
846
  check_requirements("MNN>=2.9.6")
@@ -930,7 +950,7 @@ class Exporter:
930
950
 
931
951
  # Based on apple's documentation it is better to leave out the minimum_deployment target and let that get set
932
952
  # Internally based on the model conversion and output type.
933
- # Setting minimum_depoloyment_target >= iOS16 will require setting compute_precision=ct.precision.FLOAT32.
953
+ # Setting minimum_deployment_target >= iOS16 will require setting compute_precision=ct.precision.FLOAT32.
934
954
  # iOS16 adds in better support for FP16, but none of the CoreML NMS specifications handle FP16 as input.
935
955
  ct_model = ct.convert(
936
956
  ts,
@@ -1025,7 +1045,7 @@ class Exporter:
1025
1045
  "sng4onnx>=1.0.1", # required by 'onnx2tf' package
1026
1046
  "onnx_graphsurgeon>=0.3.26", # required by 'onnx2tf' package
1027
1047
  "ai-edge-litert>=1.2.0" + (",<1.4.0" if MACOS else ""), # required by 'onnx2tf' package
1028
- "onnx>=1.12.0,<=1.19.1",
1048
+ "onnx>=1.12.0,<2.0.0",
1029
1049
  "onnx2tf>=1.26.3",
1030
1050
  "onnxslim>=0.1.71",
1031
1051
  "onnxruntime-gpu" if cuda else "onnxruntime",
@@ -1220,10 +1240,9 @@ class Exporter:
1220
1240
  f"{sudo}mkdir -p /etc/apt/keyrings",
1221
1241
  f"curl -fsSL https://packages.cloud.google.com/apt/doc/apt-key.gpg | {sudo}gpg --dearmor -o /etc/apt/keyrings/google.gpg",
1222
1242
  f'echo "deb [signed-by=/etc/apt/keyrings/google.gpg] https://packages.cloud.google.com/apt coral-edgetpu-stable main" | {sudo}tee /etc/apt/sources.list.d/coral-edgetpu.list',
1223
- f"{sudo}apt-get update",
1224
- f"{sudo}apt-get install -y edgetpu-compiler",
1225
1243
  ):
1226
1244
  subprocess.run(c, shell=True, check=True)
1245
+ check_apt_requirements(["edgetpu-compiler"])
1227
1246
 
1228
1247
  ver = subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().rsplit(maxsplit=1)[-1]
1229
1248
  LOGGER.info(f"\n{prefix} starting export with Edge TPU compiler {ver}...")
@@ -1301,16 +1320,12 @@ class Exporter:
1301
1320
  java_version = int(version_match.group(1)) if version_match else 0
1302
1321
  assert java_version >= 17, "Java version too old"
1303
1322
  except (FileNotFoundError, subprocess.CalledProcessError, AssertionError):
1304
- cmd = None
1305
1323
  if IS_UBUNTU or IS_DEBIAN_TRIXIE:
1306
1324
  LOGGER.info(f"\n{prefix} installing Java 21 for Ubuntu...")
1307
- cmd = (["sudo"] if is_sudo_available() else []) + ["apt-get", "install", "-y", "openjdk-21-jre"]
1325
+ check_apt_requirements(["openjdk-21-jre"])
1308
1326
  elif IS_RASPBERRYPI or IS_DEBIAN_BOOKWORM:
1309
1327
  LOGGER.info(f"\n{prefix} installing Java 17 for Raspberry Pi or Debian ...")
1310
- cmd = (["sudo"] if is_sudo_available() else []) + ["apt-get", "install", "-y", "openjdk-17-jre"]
1311
-
1312
- if cmd:
1313
- subprocess.run(cmd, check=True)
1328
+ check_apt_requirements(["openjdk-17-jre"])
1314
1329
 
1315
1330
  return torch2imx(
1316
1331
  self.model,
@@ -55,8 +55,8 @@ from ultralytics.utils.files import increment_path
55
55
  from ultralytics.utils.torch_utils import attempt_compile, select_device, smart_inference_mode
56
56
 
57
57
  STREAM_WARNING = """
58
- inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
59
- errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.
58
+ Inference results will accumulate in RAM unless `stream=True` is passed, which can cause out-of-memory errors for large
59
+ sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.
60
60
 
61
61
  Example:
62
62
  results = model(source=..., stream=True) # generator of Results objects
@@ -222,7 +222,7 @@ class BasePredictor:
222
222
  if stream:
223
223
  return self.stream_inference(source, model, *args, **kwargs)
224
224
  else:
225
- return list(self.stream_inference(source, model, *args, **kwargs)) # merge list of Result into one
225
+ return list(self.stream_inference(source, model, *args, **kwargs)) # merge list of Results into one
226
226
 
227
227
  def predict_cli(self, source=None, model=None):
228
228
  """Method used for Command Line Interface (CLI) prediction.
@@ -244,14 +244,15 @@ class BasePredictor:
244
244
  for _ in gen: # sourcery skip: remove-empty-nested-block, noqa
245
245
  pass
246
246
 
247
- def setup_source(self, source):
247
+ def setup_source(self, source, stride: int | None = None):
248
248
  """Set up source and inference mode.
249
249
 
250
250
  Args:
251
251
  source (str | Path | list[str] | list[Path] | list[np.ndarray] | np.ndarray | torch.Tensor): Source for
252
252
  inference.
253
+ stride (int, optional): Model stride for image size checking.
253
254
  """
254
- self.imgsz = check_imgsz(self.args.imgsz, stride=self.model.stride, min_dim=2) # check image size
255
+ self.imgsz = check_imgsz(self.args.imgsz, stride=stride or self.model.stride, min_dim=2) # check image size
255
256
  self.dataset = load_inference_source(
256
257
  source=source,
257
258
  batch=self.args.batch,
@@ -315,7 +316,8 @@ class BasePredictor:
315
316
  ops.Profile(device=self.device),
316
317
  )
317
318
  self.run_callbacks("on_predict_start")
318
- for self.batch in self.dataset:
319
+ for batch in self.dataset:
320
+ self.batch = batch
319
321
  self.run_callbacks("on_predict_batch_start")
320
322
  paths, im0s, s = self.batch
321
323
 
@@ -91,17 +91,17 @@ class BaseTensor(SimpleClass):
91
91
  return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.cpu(), self.orig_shape)
92
92
 
93
93
  def numpy(self):
94
- """Return a copy of the tensor as a numpy array.
94
+ """Return a copy of this object with its data converted to a NumPy array.
95
95
 
96
96
  Returns:
97
- (np.ndarray): A numpy array containing the same data as the original tensor.
97
+ (BaseTensor): A new instance with `data` as a NumPy array.
98
98
 
99
99
  Examples:
100
100
  >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
101
101
  >>> orig_shape = (720, 1280)
102
102
  >>> base_tensor = BaseTensor(data, orig_shape)
103
- >>> numpy_array = base_tensor.numpy()
104
- >>> print(type(numpy_array))
103
+ >>> numpy_tensor = base_tensor.numpy()
104
+ >>> print(type(numpy_tensor.data))
105
105
  <class 'numpy.ndarray'>
106
106
  """
107
107
  return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.numpy(), self.orig_shape)
@@ -110,8 +110,7 @@ class BaseTensor(SimpleClass):
110
110
  """Move the tensor to GPU memory.
111
111
 
112
112
  Returns:
113
- (BaseTensor): A new BaseTensor instance with the data moved to GPU memory if it's not already a numpy array,
114
- otherwise returns self.
113
+ (BaseTensor): A new BaseTensor instance with the data moved to GPU memory.
115
114
 
116
115
  Examples:
117
116
  >>> import torch
@@ -201,14 +200,14 @@ class Results(SimpleClass, DataExportMixin):
201
200
  cuda: Move all tensors in the Results object to GPU memory.
202
201
  to: Move all tensors to the specified device and dtype.
203
202
  new: Create a new Results object with the same image, path, names, and speed attributes.
204
- plot: Plot detection results on an input RGB image.
203
+ plot: Plot detection results on an input BGR image.
205
204
  show: Display the image with annotated inference results.
206
205
  save: Save annotated inference results image to file.
207
206
  verbose: Return a log string for each task in the results.
208
207
  save_txt: Save detection results to a text file.
209
208
  save_crop: Save cropped detection images to specified directory.
210
209
  summary: Convert inference results to a summarized dictionary.
211
- to_df: Convert detection results to a Polars Dataframe.
210
+ to_df: Convert detection results to a Polars DataFrame.
212
211
  to_json: Convert detection results to JSON format.
213
212
  to_csv: Convert detection results to a CSV format.
214
213
 
@@ -461,7 +460,7 @@ class Results(SimpleClass, DataExportMixin):
461
460
  color_mode: str = "class",
462
461
  txt_color: tuple[int, int, int] = (255, 255, 255),
463
462
  ) -> np.ndarray:
464
- """Plot detection results on an input RGB image.
463
+ """Plot detection results on an input BGR image.
465
464
 
466
465
  Args:
467
466
  conf (bool): Whether to plot detection confidence scores.
@@ -481,10 +480,10 @@ class Results(SimpleClass, DataExportMixin):
481
480
  save (bool): Whether to save the annotated image.
482
481
  filename (str | None): Filename to save image if save is True.
483
482
  color_mode (str): Specify the color mode, e.g., 'instance' or 'class'.
484
- txt_color (tuple[int, int, int]): Specify the RGB text color for classification task.
483
+ txt_color (tuple[int, int, int]): Text color in BGR format for classification output.
485
484
 
486
485
  Returns:
487
- (np.ndarray): Annotated image as a numpy array.
486
+ (np.ndarray | PIL.Image.Image): Annotated image as a NumPy array (BGR) or PIL image (RGB) if `pil=True`.
488
487
 
489
488
  Examples:
490
489
  >>> results = model("image.jpg")
@@ -734,10 +733,10 @@ class Results(SimpleClass, DataExportMixin):
734
733
  - Original image is copied before cropping to avoid modifying the original.
735
734
  """
736
735
  if self.probs is not None:
737
- LOGGER.warning("Classify task do not support `save_crop`.")
736
+ LOGGER.warning("Classify task does not support `save_crop`.")
738
737
  return
739
738
  if self.obb is not None:
740
- LOGGER.warning("OBB task do not support `save_crop`.")
739
+ LOGGER.warning("OBB task does not support `save_crop`.")
741
740
  return
742
741
  for d in self.boxes:
743
742
  save_one_box(
@@ -714,11 +714,11 @@ class BaseTrainer:
714
714
  raise NotImplementedError("This task trainer doesn't support loading cfg files")
715
715
 
716
716
  def get_validator(self):
717
- """Return a NotImplementedError when the get_validator function is called."""
717
+ """Raise NotImplementedError (must be implemented by subclasses)."""
718
718
  raise NotImplementedError("get_validator function not implemented in trainer")
719
719
 
720
720
  def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode="train"):
721
- """Return dataloader derived from torch.data.Dataloader."""
721
+ """Raise NotImplementedError (must return a `torch.utils.data.DataLoader` in subclasses)."""
722
722
  raise NotImplementedError("get_dataloader function not implemented in trainer")
723
723
 
724
724
  def build_dataset(self, img_path, mode="train", batch=None):
@@ -812,6 +812,14 @@ class BaseTrainer:
812
812
  "device",
813
813
  "close_mosaic",
814
814
  "augmentations",
815
+ "save_period",
816
+ "workers",
817
+ "cache",
818
+ "patience",
819
+ "time",
820
+ "freeze",
821
+ "val",
822
+ "plots",
815
823
  ): # allow arg updates to reduce memory or update device on resume
816
824
  if k in overrides:
817
825
  setattr(self.args, k, overrides[k])
@@ -8,7 +8,7 @@ that yield the best model performance. This is particularly crucial in deep lear
8
8
  where small changes in hyperparameters can lead to significant differences in model accuracy and efficiency.
9
9
 
10
10
  Examples:
11
- Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
11
+ Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=10 for 300 tuning iterations.
12
12
  >>> from ultralytics import YOLO
13
13
  >>> model = YOLO("yolo11n.pt")
14
14
  >>> model.tune(data="coco8.yaml", epochs=10, iterations=300, optimizer="AdamW", plots=False, save=False, val=False)
@@ -55,7 +55,7 @@ class Tuner:
55
55
  __call__: Execute the hyperparameter evolution across multiple iterations.
56
56
 
57
57
  Examples:
58
- Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
58
+ Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=10 for 300 tuning iterations.
59
59
  >>> from ultralytics import YOLO
60
60
  >>> model = YOLO("yolo11n.pt")
61
61
  >>> model.tune(
@@ -283,7 +283,6 @@ class Tuner:
283
283
  """Mutate hyperparameters based on bounds and scaling factors specified in `self.space`.
284
284
 
285
285
  Args:
286
- parent (str): Parent selection method (kept for API compatibility, unused in BLX mode).
287
286
  n (int): Number of top parents to consider.
288
287
  mutation (float): Probability of a parameter mutation in any given iteration.
289
288
  sigma (float): Standard deviation for Gaussian random number generator.
@@ -48,7 +48,7 @@ class BaseValidator:
48
48
 
49
49
  Attributes:
50
50
  args (SimpleNamespace): Configuration for the validator.
51
- dataloader (DataLoader): Dataloader to use for validation.
51
+ dataloader (DataLoader): DataLoader to use for validation.
52
52
  model (nn.Module): Model to validate.
53
53
  data (dict): Data dictionary containing dataset information.
54
54
  device (torch.device): Device to use for validation.
@@ -95,7 +95,7 @@ class BaseValidator:
95
95
  """Initialize a BaseValidator instance.
96
96
 
97
97
  Args:
98
- dataloader (torch.utils.data.DataLoader, optional): Dataloader to be used for validation.
98
+ dataloader (torch.utils.data.DataLoader, optional): DataLoader to be used for validation.
99
99
  save_dir (Path, optional): Directory to save results.
100
100
  args (SimpleNamespace, optional): Configuration for the validator.
101
101
  _callbacks (dict, optional): Dictionary to store various callback functions.
@@ -12,7 +12,7 @@ from .val import FastSAMValidator
12
12
 
13
13
 
14
14
  class FastSAM(Model):
15
- """FastSAM model interface for segment anything tasks.
15
+ """FastSAM model interface for Segment Anything tasks.
16
16
 
17
17
  This class extends the base Model class to provide specific functionality for the FastSAM (Fast Segment Anything
18
18
  Model) implementation, allowing for efficient and accurate image segmentation with optional prompting support.
@@ -39,7 +39,7 @@ class FastSAM(Model):
39
39
  """Initialize the FastSAM model with the specified pre-trained weights."""
40
40
  if str(model) == "FastSAM.pt":
41
41
  model = "FastSAM-x.pt"
42
- assert Path(model).suffix not in {".yaml", ".yml"}, "FastSAM models only support pre-trained models."
42
+ assert Path(model).suffix not in {".yaml", ".yml"}, "FastSAM only supports pre-trained weights."
43
43
  super().__init__(model=model, task="segment")
44
44
 
45
45
  def predict(
@@ -22,8 +22,7 @@ class FastSAMPredictor(SegmentationPredictor):
22
22
  Attributes:
23
23
  prompts (dict): Dictionary containing prompt information for segmentation (bboxes, points, labels, texts).
24
24
  device (torch.device): Device on which model and tensors are processed.
25
- clip_model (Any, optional): CLIP model for text-based prompting, loaded on demand.
26
- clip_preprocess (Any, optional): CLIP preprocessing function for images, loaded on demand.
25
+ clip (Any, optional): CLIP model used for text-based prompting, loaded on demand.
27
26
 
28
27
  Methods:
29
28
  postprocess: Apply postprocessing to FastSAM predictions and handle prompts.
@@ -116,7 +115,7 @@ class FastSAMPredictor(SegmentationPredictor):
116
115
  labels = torch.ones(points.shape[0])
117
116
  labels = torch.as_tensor(labels, dtype=torch.int32, device=self.device)
118
117
  assert len(labels) == len(points), (
119
- f"Expected `labels` with same size as `point`, but got {len(labels)} and {len(points)}"
118
+ f"Expected `labels` to have the same length as `points`, but got {len(labels)} and {len(points)}."
120
119
  )
121
120
  point_idx = (
122
121
  torch.ones(len(result), dtype=torch.bool, device=self.device)
@@ -4,9 +4,9 @@ from ultralytics.models.yolo.segment import SegmentationValidator
4
4
 
5
5
 
6
6
  class FastSAMValidator(SegmentationValidator):
7
- """Custom validation class for Fast SAM (Segment Anything Model) segmentation in Ultralytics YOLO framework.
7
+ """Custom validation class for FastSAM (Segment Anything Model) segmentation in the Ultralytics YOLO framework.
8
8
 
9
- Extends the SegmentationValidator class, customizing the validation process specifically for Fast SAM. This class
9
+ Extends the SegmentationValidator class, customizing the validation process specifically for FastSAM. This class
10
10
  sets the task to 'segment' and uses the SegmentMetrics for evaluation. Additionally, plotting features are disabled
11
11
  to avoid errors during validation.
12
12
 
@@ -18,14 +18,14 @@ class FastSAMValidator(SegmentationValidator):
18
18
  metrics (SegmentMetrics): Segmentation metrics calculator for evaluation.
19
19
 
20
20
  Methods:
21
- __init__: Initialize the FastSAMValidator with custom settings for Fast SAM.
21
+ __init__: Initialize the FastSAMValidator with custom settings for FastSAM.
22
22
  """
23
23
 
24
24
  def __init__(self, dataloader=None, save_dir=None, args=None, _callbacks=None):
25
25
  """Initialize the FastSAMValidator class, setting the task to 'segment' and metrics to SegmentMetrics.
26
26
 
27
27
  Args:
28
- dataloader (torch.utils.data.DataLoader, optional): Dataloader to be used for validation.
28
+ dataloader (torch.utils.data.DataLoader, optional): DataLoader to be used for validation.
29
29
  save_dir (Path, optional): Directory to save results.
30
30
  args (SimpleNamespace, optional): Configuration for the validator.
31
31
  _callbacks (list, optional): List of callback functions to be invoked during validation.
@@ -75,11 +75,10 @@ class RTDETRPredictor(BasePredictor):
75
75
  def pre_transform(self, im):
76
76
  """Pre-transform input images before feeding them into the model for inference.
77
77
 
78
- The input images are letterboxed to ensure a square aspect ratio and scale-filled. The size must be square (640)
79
- and scale_filled.
78
+ The input images are letterboxed to ensure a square aspect ratio and scale-filled.
80
79
 
81
80
  Args:
82
- im (list[np.ndarray] | torch.Tensor): Input images of shape (N, 3, H, W) for tensor, [(H, W, 3) x N] for
81
+ im (list[np.ndarray] | torch.Tensor): Input images of shape (N, 3, H, W) for tensor, [(H, W, 3) x N] for
83
82
  list.
84
83
 
85
84
  Returns:
@@ -35,7 +35,7 @@ class RTDETRDataset(YOLODataset):
35
35
  Examples:
36
36
  Initialize an RT-DETR dataset
37
37
  >>> dataset = RTDETRDataset(img_path="path/to/images", imgsz=640)
38
- >>> image, hw = dataset.load_image(0)
38
+ >>> image, hw0, hw = dataset.load_image(0)
39
39
  """
40
40
 
41
41
  def __init__(self, *args, data=None, **kwargs):
@@ -59,13 +59,14 @@ class RTDETRDataset(YOLODataset):
59
59
  rect_mode (bool, optional): Whether to use rectangular mode for batch inference.
60
60
 
61
61
  Returns:
62
- im (torch.Tensor): The loaded image.
63
- resized_hw (tuple): Height and width of the resized image with shape (2,).
62
+ im (np.ndarray): Loaded image as a NumPy array.
63
+ hw_original (tuple[int, int]): Original image dimensions in (height, width) format.
64
+ hw_resized (tuple[int, int]): Resized image dimensions in (height, width) format.
64
65
 
65
66
  Examples:
66
67
  Load an image from the dataset
67
68
  >>> dataset = RTDETRDataset(img_path="path/to/images")
68
- >>> image, hw = dataset.load_image(0)
69
+ >>> image, hw0, hw = dataset.load_image(0)
69
70
  """
70
71
  return super().load_image(i=i, rect_mode=rect_mode)
71
72
 
@@ -85,7 +86,7 @@ class RTDETRDataset(YOLODataset):
85
86
  transforms = v8_transforms(self, self.imgsz, hyp, stretch=True)
86
87
  else:
87
88
  # transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), auto=False, scale_fill=True)])
88
- transforms = Compose([lambda x: {**x, **{"ratio_pad": [x["ratio_pad"], [0, 0]]}}])
89
+ transforms = Compose([])
89
90
  transforms.append(
90
91
  Format(
91
92
  bbox_format="xywh",
@@ -150,6 +151,10 @@ class RTDETRValidator(DetectionValidator):
150
151
  data=self.data,
151
152
  )
152
153
 
154
+ def scale_preds(self, predn: dict[str, torch.Tensor], pbatch: dict[str, Any]) -> dict[str, torch.Tensor]:
155
+ """Scales predictions to the original image size."""
156
+ return predn
157
+
153
158
  def postprocess(
154
159
  self, preds: torch.Tensor | list[torch.Tensor] | tuple[torch.Tensor]
155
160
  ) -> list[dict[str, torch.Tensor]]:
@@ -1,7 +1,16 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
3
  from .model import SAM
4
- from .predict import Predictor, SAM2DynamicInteractivePredictor, SAM2Predictor, SAM2VideoPredictor
4
+ from .predict import (
5
+ Predictor,
6
+ SAM2DynamicInteractivePredictor,
7
+ SAM2Predictor,
8
+ SAM2VideoPredictor,
9
+ SAM3Predictor,
10
+ SAM3SemanticPredictor,
11
+ SAM3VideoPredictor,
12
+ SAM3VideoSemanticPredictor,
13
+ )
5
14
 
6
15
  __all__ = (
7
16
  "SAM",
@@ -9,4 +18,8 @@ __all__ = (
9
18
  "SAM2DynamicInteractivePredictor",
10
19
  "SAM2Predictor",
11
20
  "SAM2VideoPredictor",
21
+ "SAM3Predictor",
22
+ "SAM3SemanticPredictor",
23
+ "SAM3VideoPredictor",
24
+ "SAM3VideoSemanticPredictor",
12
25
  ) # tuple or list of exportable items
@@ -21,6 +21,21 @@ from .modules.tiny_encoder import TinyViT
21
21
  from .modules.transformer import TwoWayTransformer
22
22
 
23
23
 
24
+ def _load_checkpoint(model, checkpoint):
25
+ """Load checkpoint into model from file path."""
26
+ if checkpoint is None:
27
+ return model
28
+
29
+ checkpoint = attempt_download_asset(checkpoint)
30
+ with open(checkpoint, "rb") as f:
31
+ state_dict = torch_load(f)
32
+ # Handle nested "model" key
33
+ if "model" in state_dict and isinstance(state_dict["model"], dict):
34
+ state_dict = state_dict["model"]
35
+ model.load_state_dict(state_dict)
36
+ return model
37
+
38
+
24
39
  def build_sam_vit_h(checkpoint=None):
25
40
  """Build and return a Segment Anything Model (SAM) h-size model with specified encoder parameters."""
26
41
  return _build_sam(
@@ -205,22 +220,19 @@ def _build_sam(
205
220
  pixel_std=[58.395, 57.12, 57.375],
206
221
  )
207
222
  if checkpoint is not None:
208
- checkpoint = attempt_download_asset(checkpoint)
209
- with open(checkpoint, "rb") as f:
210
- state_dict = torch_load(f)
211
- sam.load_state_dict(state_dict)
223
+ sam = _load_checkpoint(sam, checkpoint)
212
224
  sam.eval()
213
225
  return sam
214
226
 
215
227
 
216
228
  def _build_sam2(
217
229
  encoder_embed_dim=1280,
218
- encoder_stages=[2, 6, 36, 4],
230
+ encoder_stages=(2, 6, 36, 4),
219
231
  encoder_num_heads=2,
220
- encoder_global_att_blocks=[7, 15, 23, 31],
221
- encoder_backbone_channel_list=[1152, 576, 288, 144],
222
- encoder_window_spatial_size=[7, 7],
223
- encoder_window_spec=[8, 4, 16, 8],
232
+ encoder_global_att_blocks=(7, 15, 23, 31),
233
+ encoder_backbone_channel_list=(1152, 576, 288, 144),
234
+ encoder_window_spatial_size=(7, 7),
235
+ encoder_window_spec=(8, 4, 16, 8),
224
236
  checkpoint=None,
225
237
  ):
226
238
  """Build and return a Segment Anything Model 2 (SAM2) with specified architecture parameters.
@@ -299,10 +311,7 @@ def _build_sam2(
299
311
  )
300
312
 
301
313
  if checkpoint is not None:
302
- checkpoint = attempt_download_asset(checkpoint)
303
- with open(checkpoint, "rb") as f:
304
- state_dict = torch_load(f)["model"]
305
- sam2.load_state_dict(state_dict)
314
+ sam2 = _load_checkpoint(sam2, checkpoint)
306
315
  sam2.eval()
307
316
  return sam2
308
317