PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.237__py3-none-any.whl → 8.3.239__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.237py3-none-any.whl → 8.3.239py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

{dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/METADATA +1 -1
{dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/RECORD +104 -105
tests/test_exports.py +3 -1
tests/test_python.py +2 -2
tests/test_solutions.py +6 -6
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +4 -4
ultralytics/cfg/datasets/Argoverse.yaml +7 -6
ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
ultralytics/cfg/datasets/VOC.yaml +15 -16
ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
ultralytics/cfg/datasets/dota8.yaml +2 -2
ultralytics/cfg/datasets/kitti.yaml +1 -1
ultralytics/cfg/datasets/xView.yaml +16 -16
ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
ultralytics/data/augment.py +1 -1
ultralytics/data/base.py +4 -2
ultralytics/data/build.py +4 -4
ultralytics/data/loaders.py +17 -12
ultralytics/data/utils.py +4 -4
ultralytics/engine/exporter.py +24 -16
ultralytics/engine/predictor.py +5 -4
ultralytics/engine/results.py +12 -13
ultralytics/engine/trainer.py +2 -2
ultralytics/engine/tuner.py +2 -3
ultralytics/engine/validator.py +2 -2
ultralytics/models/fastsam/model.py +2 -2
ultralytics/models/fastsam/predict.py +2 -3
ultralytics/models/fastsam/val.py +4 -4
ultralytics/models/rtdetr/predict.py +2 -3
ultralytics/models/rtdetr/val.py +5 -4
ultralytics/models/sam/build.py +5 -5
ultralytics/models/sam/build_sam3.py +9 -6
ultralytics/models/sam/model.py +1 -1
ultralytics/models/sam/modules/sam.py +10 -5
ultralytics/models/sam/predict.py +24 -48
ultralytics/models/sam/sam3/encoder.py +4 -4
ultralytics/models/sam/sam3/geometry_encoders.py +3 -3
ultralytics/models/sam/sam3/necks.py +17 -17
ultralytics/models/sam/sam3/sam3_image.py +3 -21
ultralytics/models/sam/sam3/vl_combiner.py +1 -6
ultralytics/models/yolo/classify/val.py +1 -1
ultralytics/models/yolo/detect/train.py +1 -1
ultralytics/models/yolo/detect/val.py +7 -7
ultralytics/models/yolo/obb/val.py +1 -1
ultralytics/models/yolo/pose/val.py +1 -1
ultralytics/models/yolo/segment/val.py +1 -1
ultralytics/nn/autobackend.py +9 -9
ultralytics/nn/modules/block.py +1 -1
ultralytics/nn/tasks.py +3 -3
ultralytics/nn/text_model.py +2 -7
ultralytics/solutions/ai_gym.py +1 -1
ultralytics/solutions/analytics.py +6 -6
ultralytics/solutions/config.py +1 -1
ultralytics/solutions/distance_calculation.py +1 -1
ultralytics/solutions/object_counter.py +1 -1
ultralytics/solutions/object_cropper.py +3 -6
ultralytics/solutions/parking_management.py +21 -17
ultralytics/solutions/queue_management.py +5 -5
ultralytics/solutions/region_counter.py +2 -2
ultralytics/solutions/security_alarm.py +1 -1
ultralytics/solutions/solutions.py +45 -22
ultralytics/solutions/speed_estimation.py +1 -1
ultralytics/trackers/basetrack.py +1 -1
ultralytics/trackers/bot_sort.py +4 -3
ultralytics/trackers/byte_tracker.py +4 -4
ultralytics/trackers/utils/gmc.py +6 -7
ultralytics/trackers/utils/kalman_filter.py +2 -1
ultralytics/trackers/utils/matching.py +4 -3
ultralytics/utils/__init__.py +12 -3
ultralytics/utils/benchmarks.py +2 -2
ultralytics/utils/callbacks/tensorboard.py +19 -25
ultralytics/utils/checks.py +2 -1
ultralytics/utils/downloads.py +1 -1
ultralytics/utils/export/tensorflow.py +16 -2
ultralytics/utils/files.py +13 -12
ultralytics/utils/logger.py +62 -27
ultralytics/utils/metrics.py +1 -1
ultralytics/utils/ops.py +6 -6
ultralytics/utils/patches.py +3 -3
ultralytics/utils/plotting.py +7 -12
ultralytics/utils/tuner.py +1 -1
ultralytics/models/sam/sam3/tokenizer_ve.py +0 -242
{dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/WHEEL +0 -0
{dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/top_level.txt +0 -0

ultralytics/engine/exporter.py CHANGED Viewed

@@ -66,7 +66,6 @@ import re
 import shutil
 import subprocess
 import time
-import warnings
 from copy import deepcopy
 from datetime import datetime
 from pathlib import Path
@@ -128,7 +127,15 @@ from ultralytics.utils.metrics import batch_probiou
 from ultralytics.utils.nms import TorchNMS
 from ultralytics.utils.ops import Profile
 from ultralytics.utils.patches import arange_patch
-from ultralytics.utils.torch_utils import TORCH_1_11, TORCH_1_13, TORCH_2_1, TORCH_2_4, TORCH_2_9, select_device
+from ultralytics.utils.torch_utils import (
+    TORCH_1_10,
+    TORCH_1_11,
+    TORCH_1_13,
+    TORCH_2_1,
+    TORCH_2_4,
+    TORCH_2_9,
+    select_device,
+)
 def export_formats():
@@ -306,7 +313,11 @@ class Exporter:
         callbacks.add_integration_callbacks(self)
     def __call__(self, model=None) -> str:
-        """Return list of exported files/dirs after running callbacks."""
+        """Export a model and return the final exported path as a string.
+        Returns:
+            (str): Path to the exported file or directory (the last export artifact).
+        """
         t = time.time()
         fmt = self.args.format.lower()  # to lowercase
         if fmt in {"tensorrt", "trt"}:  # 'engine' aliases
@@ -356,9 +367,10 @@ class Exporter:
             LOGGER.warning("TensorRT requires GPU export, automatically assigning device=0")
             self.args.device = "0"
         if engine and "dla" in str(self.args.device):  # convert int/list to str first
-            dla = self.args.device.rsplit(":", 1)[-1]
+            device_str = str(self.args.device)
+            dla = device_str.rsplit(":", 1)[-1]
             self.args.device = "0"  # update device to "0"
-            assert dla in {"0", "1"}, f"Expected self.args.device='dla:0' or 'dla:1, but got {self.args.device}."
+            assert dla in {"0", "1"}, f"Expected device 'dla:0' or 'dla:1', but got {device_str}."
         if imx and self.args.device is None and torch.cuda.is_available():
             LOGGER.warning("Exporting on CPU while CUDA is available, setting device=0 for faster export on GPU.")
             self.args.device = "0"  # update device to "0"
@@ -369,7 +381,7 @@ class Exporter:
         validate_args(fmt, self.args, fmt_keys)
         if axelera:
             if not IS_PYTHON_3_10:
-                SystemError("Axelera export only supported on Python 3.10.")
+                raise SystemError("Axelera export only supported on Python 3.10.")
             if not self.args.int8:
                 LOGGER.warning("Setting int8=True for Axelera mixed-precision export.")
                 self.args.int8 = True
@@ -505,11 +517,6 @@ class Exporter:
         if self.args.half and (onnx or jit) and self.device.type != "cpu":
             im, model = im.half(), model.half()  # to FP16
-        # Filter warnings
-        warnings.filterwarnings("ignore", category=torch.jit.TracerWarning)  # suppress TracerWarning
-        warnings.filterwarnings("ignore", category=UserWarning)  # suppress shape prim::Constant missing ONNX warning
-        warnings.filterwarnings("ignore", category=DeprecationWarning)  # suppress CoreML np.bool deprecation warning
         # Assign
         self.im = im
         self.model = model
@@ -610,7 +617,7 @@ class Exporter:
             )
         self.run_callbacks("on_export_end")
-        return f  # return list of exported files/dirs
+        return f  # path to final export artifact
     def get_int8_calibration_dataloader(self, prefix=""):
         """Build and return a dataloader for calibration of INT8 models."""
@@ -657,7 +664,7 @@ class Exporter:
     @try_export
     def export_onnx(self, prefix=colorstr("ONNX:")):
         """Export YOLO model to ONNX format."""
-        requirements = ["onnx>=1.12.0,<=1.19.1"]
+        requirements = ["onnx>=1.12.0,<2.0.0"]
         if self.args.simplify:
             requirements += ["onnxslim>=0.1.71", "onnxruntime" + ("-gpu" if torch.cuda.is_available() else "")]
         check_requirements(requirements)
@@ -719,7 +726,7 @@ class Exporter:
             model_onnx.ir_version = 10
         # FP16 conversion for CPU export (GPU exports are already FP16 from model.half() during tracing)
-        if self.args.half and self.device.type == "cpu":
+        if self.args.half and self.args.format == "onnx" and self.device.type == "cpu":
             try:
                 from onnxruntime.transformers import float16
@@ -833,6 +840,7 @@ class Exporter:
     @try_export
     def export_mnn(self, prefix=colorstr("MNN:")):
         """Export YOLO model to MNN format using MNN https://github.com/alibaba/MNN."""
+        assert TORCH_1_10, "MNN export requires torch>=1.10.0 to avoid segmentation faults"
         f_onnx = self.export_onnx()  # get onnx model first
         check_requirements("MNN>=2.9.6")
@@ -942,7 +950,7 @@ class Exporter:
         # Based on apple's documentation it is better to leave out the minimum_deployment target and let that get set
         # Internally based on the model conversion and output type.
-        # Setting minimum_depoloyment_target >= iOS16 will require setting compute_precision=ct.precision.FLOAT32.
+        # Setting minimum_deployment_target >= iOS16 will require setting compute_precision=ct.precision.FLOAT32.
         # iOS16 adds in better support for FP16, but none of the CoreML NMS specifications handle FP16 as input.
         ct_model = ct.convert(
             ts,
@@ -1037,7 +1045,7 @@ class Exporter:
                 "sng4onnx>=1.0.1",  # required by 'onnx2tf' package
                 "onnx_graphsurgeon>=0.3.26",  # required by 'onnx2tf' package
                 "ai-edge-litert>=1.2.0" + (",<1.4.0" if MACOS else ""),  # required by 'onnx2tf' package
-                "onnx>=1.12.0,<=1.19.1",
+                "onnx>=1.12.0,<2.0.0",
                 "onnx2tf>=1.26.3",
                 "onnxslim>=0.1.71",
                 "onnxruntime-gpu" if cuda else "onnxruntime",

ultralytics/engine/predictor.py CHANGED Viewed

@@ -55,8 +55,8 @@ from ultralytics.utils.files import increment_path
 from ultralytics.utils.torch_utils import attempt_compile, select_device, smart_inference_mode
 STREAM_WARNING = """
-inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
-errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.
+Inference results will accumulate in RAM unless `stream=True` is passed, which can cause out-of-memory errors for large
+sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.
 Example:
     results = model(source=..., stream=True)  # generator of Results objects
@@ -222,7 +222,7 @@ class BasePredictor:
         if stream:
             return self.stream_inference(source, model, *args, **kwargs)
         else:
-            return list(self.stream_inference(source, model, *args, **kwargs))  # merge list of Result into one
+            return list(self.stream_inference(source, model, *args, **kwargs))  # merge list of Results into one
     def predict_cli(self, source=None, model=None):
         """Method used for Command Line Interface (CLI) prediction.
@@ -316,7 +316,8 @@ class BasePredictor:
                 ops.Profile(device=self.device),
             )
             self.run_callbacks("on_predict_start")
-            for self.batch in self.dataset:
+            for batch in self.dataset:
+                self.batch = batch
                 self.run_callbacks("on_predict_batch_start")
                 paths, im0s, s = self.batch

ultralytics/engine/results.py CHANGED Viewed

@@ -91,17 +91,17 @@ class BaseTensor(SimpleClass):
         return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.cpu(), self.orig_shape)
     def numpy(self):
-        """Return a copy of the tensor as a numpy array.
+        """Return a copy of this object with its data converted to a NumPy array.
         Returns:
-            (np.ndarray): A numpy array containing the same data as the original tensor.
+            (BaseTensor): A new instance with `data` as a NumPy array.
         Examples:
             >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
             >>> orig_shape = (720, 1280)
             >>> base_tensor = BaseTensor(data, orig_shape)
-            >>> numpy_array = base_tensor.numpy()
-            >>> print(type(numpy_array))
+            >>> numpy_tensor = base_tensor.numpy()
+            >>> print(type(numpy_tensor.data))
             <class 'numpy.ndarray'>
         """
         return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.numpy(), self.orig_shape)
@@ -110,8 +110,7 @@ class BaseTensor(SimpleClass):
         """Move the tensor to GPU memory.
         Returns:
-            (BaseTensor): A new BaseTensor instance with the data moved to GPU memory if it's not already a numpy array,
-                otherwise returns self.
+            (BaseTensor): A new BaseTensor instance with the data moved to GPU memory.
         Examples:
             >>> import torch
@@ -201,14 +200,14 @@ class Results(SimpleClass, DataExportMixin):
         cuda: Move all tensors in the Results object to GPU memory.
         to: Move all tensors to the specified device and dtype.
         new: Create a new Results object with the same image, path, names, and speed attributes.
-        plot: Plot detection results on an input RGB image.
+        plot: Plot detection results on an input BGR image.
         show: Display the image with annotated inference results.
         save: Save annotated inference results image to file.
         verbose: Return a log string for each task in the results.
         save_txt: Save detection results to a text file.
         save_crop: Save cropped detection images to specified directory.
         summary: Convert inference results to a summarized dictionary.
-        to_df: Convert detection results to a Polars Dataframe.
+        to_df: Convert detection results to a Polars DataFrame.
         to_json: Convert detection results to JSON format.
         to_csv: Convert detection results to a CSV format.
@@ -461,7 +460,7 @@ class Results(SimpleClass, DataExportMixin):
         color_mode: str = "class",
         txt_color: tuple[int, int, int] = (255, 255, 255),
     ) -> np.ndarray:
-        """Plot detection results on an input RGB image.
+        """Plot detection results on an input BGR image.
         Args:
             conf (bool): Whether to plot detection confidence scores.
@@ -481,10 +480,10 @@ class Results(SimpleClass, DataExportMixin):
             save (bool): Whether to save the annotated image.
             filename (str | None): Filename to save image if save is True.
             color_mode (str): Specify the color mode, e.g., 'instance' or 'class'.
-            txt_color (tuple[int, int, int]): Specify the RGB text color for classification task.
+            txt_color (tuple[int, int, int]): Text color in BGR format for classification output.
         Returns:
-            (np.ndarray): Annotated image as a numpy array.
+            (np.ndarray | PIL.Image.Image): Annotated image as a NumPy array (BGR) or PIL image (RGB) if `pil=True`.
         Examples:
             >>> results = model("image.jpg")
@@ -734,10 +733,10 @@ class Results(SimpleClass, DataExportMixin):
             - Original image is copied before cropping to avoid modifying the original.
         """
         if self.probs is not None:
-            LOGGER.warning("Classify task do not support `save_crop`.")
+            LOGGER.warning("Classify task does not support `save_crop`.")
             return
         if self.obb is not None:
-            LOGGER.warning("OBB task do not support `save_crop`.")
+            LOGGER.warning("OBB task does not support `save_crop`.")
             return
         for d in self.boxes:
             save_one_box(

ultralytics/engine/trainer.py CHANGED Viewed

@@ -714,11 +714,11 @@ class BaseTrainer:
         raise NotImplementedError("This task trainer doesn't support loading cfg files")
     def get_validator(self):
-        """Return a NotImplementedError when the get_validator function is called."""
+        """Raise NotImplementedError (must be implemented by subclasses)."""
         raise NotImplementedError("get_validator function not implemented in trainer")
     def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode="train"):
-        """Return dataloader derived from torch.data.Dataloader."""
+        """Raise NotImplementedError (must return a `torch.utils.data.DataLoader` in subclasses)."""
         raise NotImplementedError("get_dataloader function not implemented in trainer")
     def build_dataset(self, img_path, mode="train", batch=None):

ultralytics/engine/tuner.py CHANGED Viewed

@@ -8,7 +8,7 @@ that yield the best model performance. This is particularly crucial in deep lear
 where small changes in hyperparameters can lead to significant differences in model accuracy and efficiency.
 Examples:
-    Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
+    Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=10 for 300 tuning iterations.
     >>> from ultralytics import YOLO
     >>> model = YOLO("yolo11n.pt")
     >>> model.tune(data="coco8.yaml", epochs=10, iterations=300, optimizer="AdamW", plots=False, save=False, val=False)
@@ -55,7 +55,7 @@ class Tuner:
         __call__: Execute the hyperparameter evolution across multiple iterations.
     Examples:
-        Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
+        Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=10 for 300 tuning iterations.
         >>> from ultralytics import YOLO
         >>> model = YOLO("yolo11n.pt")
         >>> model.tune(
@@ -283,7 +283,6 @@ class Tuner:
         """Mutate hyperparameters based on bounds and scaling factors specified in `self.space`.
         Args:
-            parent (str): Parent selection method (kept for API compatibility, unused in BLX mode).
             n (int): Number of top parents to consider.
             mutation (float): Probability of a parameter mutation in any given iteration.
             sigma (float): Standard deviation for Gaussian random number generator.

ultralytics/engine/validator.py CHANGED Viewed

@@ -48,7 +48,7 @@ class BaseValidator:
     Attributes:
         args (SimpleNamespace): Configuration for the validator.
-        dataloader (DataLoader): Dataloader to use for validation.
+        dataloader (DataLoader): DataLoader to use for validation.
         model (nn.Module): Model to validate.
         data (dict): Data dictionary containing dataset information.
         device (torch.device): Device to use for validation.
@@ -95,7 +95,7 @@ class BaseValidator:
         """Initialize a BaseValidator instance.
         Args:
-            dataloader (torch.utils.data.DataLoader, optional): Dataloader to be used for validation.
+            dataloader (torch.utils.data.DataLoader, optional): DataLoader to be used for validation.
             save_dir (Path, optional): Directory to save results.
             args (SimpleNamespace, optional): Configuration for the validator.
             _callbacks (dict, optional): Dictionary to store various callback functions.

ultralytics/models/fastsam/model.py CHANGED Viewed

@@ -12,7 +12,7 @@ from .val import FastSAMValidator
 class FastSAM(Model):
-    """FastSAM model interface for segment anything tasks.
+    """FastSAM model interface for Segment Anything tasks.
     This class extends the base Model class to provide specific functionality for the FastSAM (Fast Segment Anything
     Model) implementation, allowing for efficient and accurate image segmentation with optional prompting support.
@@ -39,7 +39,7 @@ class FastSAM(Model):
         """Initialize the FastSAM model with the specified pre-trained weights."""
         if str(model) == "FastSAM.pt":
             model = "FastSAM-x.pt"
-        assert Path(model).suffix not in {".yaml", ".yml"}, "FastSAM models only support pre-trained models."
+        assert Path(model).suffix not in {".yaml", ".yml"}, "FastSAM only supports pre-trained weights."
         super().__init__(model=model, task="segment")
     def predict(

ultralytics/models/fastsam/predict.py CHANGED Viewed

@@ -22,8 +22,7 @@ class FastSAMPredictor(SegmentationPredictor):
     Attributes:
         prompts (dict): Dictionary containing prompt information for segmentation (bboxes, points, labels, texts).
         device (torch.device): Device on which model and tensors are processed.
-        clip_model (Any, optional): CLIP model for text-based prompting, loaded on demand.
-        clip_preprocess (Any, optional): CLIP preprocessing function for images, loaded on demand.
+        clip (Any, optional): CLIP model used for text-based prompting, loaded on demand.
     Methods:
         postprocess: Apply postprocessing to FastSAM predictions and handle prompts.
@@ -116,7 +115,7 @@ class FastSAMPredictor(SegmentationPredictor):
                     labels = torch.ones(points.shape[0])
                 labels = torch.as_tensor(labels, dtype=torch.int32, device=self.device)
                 assert len(labels) == len(points), (
-                    f"Expected `labels` with same size as `point`, but got {len(labels)} and {len(points)}"
+                    f"Expected `labels` to have the same length as `points`, but got {len(labels)} and {len(points)}."
                 )
                 point_idx = (
                     torch.ones(len(result), dtype=torch.bool, device=self.device)

ultralytics/models/fastsam/val.py CHANGED Viewed

@@ -4,9 +4,9 @@ from ultralytics.models.yolo.segment import SegmentationValidator
 class FastSAMValidator(SegmentationValidator):
-    """Custom validation class for Fast SAM (Segment Anything Model) segmentation in Ultralytics YOLO framework.
+    """Custom validation class for FastSAM (Segment Anything Model) segmentation in the Ultralytics YOLO framework.
-    Extends the SegmentationValidator class, customizing the validation process specifically for Fast SAM. This class
+    Extends the SegmentationValidator class, customizing the validation process specifically for FastSAM. This class
     sets the task to 'segment' and uses the SegmentMetrics for evaluation. Additionally, plotting features are disabled
     to avoid errors during validation.
@@ -18,14 +18,14 @@ class FastSAMValidator(SegmentationValidator):
         metrics (SegmentMetrics): Segmentation metrics calculator for evaluation.
     Methods:
-        __init__: Initialize the FastSAMValidator with custom settings for Fast SAM.
+        __init__: Initialize the FastSAMValidator with custom settings for FastSAM.
     """
     def __init__(self, dataloader=None, save_dir=None, args=None, _callbacks=None):
         """Initialize the FastSAMValidator class, setting the task to 'segment' and metrics to SegmentMetrics.
         Args:
-            dataloader (torch.utils.data.DataLoader, optional): Dataloader to be used for validation.
+            dataloader (torch.utils.data.DataLoader, optional): DataLoader to be used for validation.
             save_dir (Path, optional): Directory to save results.
             args (SimpleNamespace, optional): Configuration for the validator.
             _callbacks (list, optional): List of callback functions to be invoked during validation.

ultralytics/models/rtdetr/predict.py CHANGED Viewed

@@ -75,11 +75,10 @@ class RTDETRPredictor(BasePredictor):
     def pre_transform(self, im):
         """Pre-transform input images before feeding them into the model for inference.
-        The input images are letterboxed to ensure a square aspect ratio and scale-filled. The size must be square (640)
-        and scale_filled.
+        The input images are letterboxed to ensure a square aspect ratio and scale-filled.
         Args:
-            im (list[np.ndarray]  | torch.Tensor): Input images of shape (N, 3, H, W) for tensor, [(H, W, 3) x N] for
+            im (list[np.ndarray] | torch.Tensor): Input images of shape (N, 3, H, W) for tensor, [(H, W, 3) x N] for
                 list.
         Returns:

ultralytics/models/rtdetr/val.py CHANGED Viewed

@@ -35,7 +35,7 @@ class RTDETRDataset(YOLODataset):
     Examples:
         Initialize an RT-DETR dataset
         >>> dataset = RTDETRDataset(img_path="path/to/images", imgsz=640)
-        >>> image, hw = dataset.load_image(0)
+        >>> image, hw0, hw = dataset.load_image(0)
     """
     def __init__(self, *args, data=None, **kwargs):
@@ -59,13 +59,14 @@ class RTDETRDataset(YOLODataset):
             rect_mode (bool, optional): Whether to use rectangular mode for batch inference.
         Returns:
-            im (torch.Tensor): The loaded image.
-            resized_hw (tuple): Height and width of the resized image with shape (2,).
+            im (np.ndarray): Loaded image as a NumPy array.
+            hw_original (tuple[int, int]): Original image dimensions in (height, width) format.
+            hw_resized (tuple[int, int]): Resized image dimensions in (height, width) format.
         Examples:
             Load an image from the dataset
             >>> dataset = RTDETRDataset(img_path="path/to/images")
-            >>> image, hw = dataset.load_image(0)
+            >>> image, hw0, hw = dataset.load_image(0)
         """
         return super().load_image(i=i, rect_mode=rect_mode)

ultralytics/models/sam/build.py CHANGED Viewed

@@ -227,12 +227,12 @@ def _build_sam(
 def _build_sam2(
     encoder_embed_dim=1280,
-    encoder_stages=[2, 6, 36, 4],
+    encoder_stages=(2, 6, 36, 4),
     encoder_num_heads=2,
-    encoder_global_att_blocks=[7, 15, 23, 31],
-    encoder_backbone_channel_list=[1152, 576, 288, 144],
-    encoder_window_spatial_size=[7, 7],
-    encoder_window_spec=[8, 4, 16, 8],
+    encoder_global_att_blocks=(7, 15, 23, 31),
+    encoder_backbone_channel_list=(1152, 576, 288, 144),
+    encoder_window_spatial_size=(7, 7),
+    encoder_window_spec=(8, 4, 16, 8),
     checkpoint=None,
 ):
     """Build and return a Segment Anything Model 2 (SAM2) with specified architecture parameters.

ultralytics/models/sam/build_sam3.py CHANGED Viewed

@@ -19,7 +19,6 @@ from .sam3.model_misc import DotProductScoring, TransformerWrapper
 from .sam3.necks import Sam3DualViTDetNeck
 from .sam3.sam3_image import SAM3SemanticModel
 from .sam3.text_encoder_ve import VETextEncoder
-from .sam3.tokenizer_ve import SimpleTokenizer
 from .sam3.vitdet import ViT
 from .sam3.vl_combiner import SAM3VLBackbone
@@ -133,27 +132,31 @@ def _create_sam3_transformer() -> TransformerWrapper:
     return TransformerWrapper(encoder=encoder, decoder=decoder, d_model=256)
-def build_sam3_image_model(
-    checkpoint_path: str, bpe_path: str, enable_segmentation: bool = True, compile: bool = False
-):
+def build_sam3_image_model(checkpoint_path: str, enable_segmentation: bool = True, compile: bool = False):
     """Build SAM3 image model.
     Args:
         checkpoint_path: Optional path to model checkpoint
-        bpe_path: Path to the BPE tokenizer vocabulary
         enable_segmentation: Whether to enable segmentation head
         compile: To enable compilation, set to "default"
     Returns:
         A SAM3 image model
     """
+    try:
+        import clip
+    except ImportError:
+        from ultralytics.utils.checks import check_requirements
+        check_requirements("git+https://github.com/ultralytics/CLIP.git")
+        import clip
     # Create visual components
     compile_mode = "default" if compile else None
     vision_encoder = _create_vision_backbone(compile_mode=compile_mode, enable_inst_interactivity=True)
     # Create text components
     text_encoder = VETextEncoder(
-        tokenizer=SimpleTokenizer(bpe_path=bpe_path),
+        tokenizer=clip.simple_tokenizer.SimpleTokenizer(),
         d_model=256,
         width=1024,
         heads=16,

ultralytics/models/sam/model.py CHANGED Viewed

@@ -44,7 +44,7 @@ class SAM(Model):
         >>> sam = SAM("sam_b.pt")
         >>> results = sam.predict("image.jpg", points=[[500, 375]])
         >>> for r in results:
-        >>>     print(f"Detected {len(r.masks)} masks")
+        ...     print(f"Detected {len(r.masks)} masks")
     """
     def __init__(self, model: str = "sam_b.pt") -> None:

ultralytics/models/sam/modules/sam.py CHANGED Viewed

@@ -607,8 +607,14 @@ class SAM2Model(torch.nn.Module):
             backbone_out["backbone_fpn"][1] = self.sam_mask_decoder.conv_s1(backbone_out["backbone_fpn"][1])
         return backbone_out
-    def _prepare_backbone_features(self, backbone_out):
+    def _prepare_backbone_features(self, backbone_out, batch=1):
         """Prepare and flatten visual features from the image backbone output for further processing."""
+        if batch > 1:  # expand features if there's more than one prompt
+            backbone_out = {
+                **backbone_out,
+                "backbone_fpn": [feat.expand(batch, -1, -1, -1) for feat in backbone_out["backbone_fpn"]],
+                "vision_pos_enc": [pos.expand(batch, -1, -1, -1) for pos in backbone_out["vision_pos_enc"]],
+            }
         assert len(backbone_out["backbone_fpn"]) == len(backbone_out["vision_pos_enc"])
         assert len(backbone_out["backbone_fpn"]) >= self.num_feature_levels
@@ -619,7 +625,6 @@ class SAM2Model(torch.nn.Module):
         # flatten NxCxHxW to HWxNxC
         vision_feats = [x.flatten(2).permute(2, 0, 1) for x in feature_maps]
         vision_pos_embeds = [x.flatten(2).permute(2, 0, 1) for x in vision_pos_embeds]
         return backbone_out, vision_feats, vision_pos_embeds, feat_sizes
     def _prepare_memory_conditioned_features(
@@ -782,7 +787,7 @@ class SAM2Model(torch.nn.Module):
             memory_pos=memory_pos_embed,
             num_obj_ptr_tokens=num_obj_ptr_tokens,
         )
-        # reshape the output (HW)BC => BCHW
+        # Reshape output (HW)BC => BCHW
         pix_feat_with_mem = pix_feat_with_mem.permute(1, 2, 0).view(B, C, H, W)
         return pix_feat_with_mem
@@ -859,7 +864,7 @@ class SAM2Model(torch.nn.Module):
             pix_feat = pix_feat.view(-1, self.hidden_dim, *feat_sizes[-1])
             sam_outputs = self._use_mask_as_output(mask_inputs, pix_feat, high_res_features)
         else:
-            # fused the visual feature with previous memory features in the memory bank
+            # Fuse visual features with previous memory features in the memory bank
             pix_feat = self._prepare_memory_conditioned_features(
                 frame_idx=frame_idx,
                 is_init_cond_frame=is_init_cond_frame,
@@ -1150,6 +1155,6 @@ class SAM3Model(SAM2Model):
         # Apply pixel-wise non-overlapping constraint based on mask scores
         pixel_level_non_overlapping_masks = self._apply_non_overlapping_constraints(pred_masks)
         # Fully suppress masks with high shrinkage (probably noisy) based on the pixel wise non-overlapping constraints
-        # NOTE: The output of this function can be a no op if none of the masks shrinked by a large factor.
+        # NOTE: The output of this function can be a no op if none of the masks shrink by a large factor.
         pred_masks = self._suppress_shrinked_masks(pred_masks, pixel_level_non_overlapping_masks)
         return pred_masks

dgenerate-ultralytics-headless 8.3.237__py3-none-any.whl → 8.3.239__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.237py3-none-any.whl → 8.3.239py3-none-any.whl