PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.236__py3-none-any.whl → 8.3.239__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.236py3-none-any.whl → 8.3.239py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

{dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/METADATA +1 -1
{dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/RECORD +117 -105
tests/test_exports.py +3 -1
tests/test_python.py +2 -2
tests/test_solutions.py +6 -6
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +4 -4
ultralytics/cfg/datasets/Argoverse.yaml +7 -6
ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
ultralytics/cfg/datasets/VOC.yaml +15 -16
ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
ultralytics/cfg/datasets/dota8.yaml +2 -2
ultralytics/cfg/datasets/kitti.yaml +1 -1
ultralytics/cfg/datasets/xView.yaml +16 -16
ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
ultralytics/data/augment.py +1 -1
ultralytics/data/base.py +4 -2
ultralytics/data/build.py +4 -4
ultralytics/data/loaders.py +17 -12
ultralytics/data/utils.py +4 -4
ultralytics/engine/exporter.py +40 -25
ultralytics/engine/predictor.py +8 -6
ultralytics/engine/results.py +12 -13
ultralytics/engine/trainer.py +10 -2
ultralytics/engine/tuner.py +2 -3
ultralytics/engine/validator.py +2 -2
ultralytics/models/fastsam/model.py +2 -2
ultralytics/models/fastsam/predict.py +2 -3
ultralytics/models/fastsam/val.py +4 -4
ultralytics/models/rtdetr/predict.py +2 -3
ultralytics/models/rtdetr/val.py +10 -5
ultralytics/models/sam/__init__.py +14 -1
ultralytics/models/sam/build.py +22 -13
ultralytics/models/sam/build_sam3.py +377 -0
ultralytics/models/sam/model.py +13 -5
ultralytics/models/sam/modules/blocks.py +20 -8
ultralytics/models/sam/modules/decoders.py +2 -3
ultralytics/models/sam/modules/encoders.py +4 -1
ultralytics/models/sam/modules/memory_attention.py +6 -2
ultralytics/models/sam/modules/sam.py +159 -10
ultralytics/models/sam/modules/utils.py +134 -4
ultralytics/models/sam/predict.py +2073 -139
ultralytics/models/sam/sam3/__init__.py +3 -0
ultralytics/models/sam/sam3/decoder.py +546 -0
ultralytics/models/sam/sam3/encoder.py +535 -0
ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
ultralytics/models/sam/sam3/model_misc.py +198 -0
ultralytics/models/sam/sam3/necks.py +129 -0
ultralytics/models/sam/sam3/sam3_image.py +339 -0
ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
ultralytics/models/sam/sam3/vitdet.py +546 -0
ultralytics/models/sam/sam3/vl_combiner.py +160 -0
ultralytics/models/yolo/classify/val.py +1 -1
ultralytics/models/yolo/detect/train.py +1 -1
ultralytics/models/yolo/detect/val.py +7 -7
ultralytics/models/yolo/obb/val.py +19 -8
ultralytics/models/yolo/pose/val.py +1 -1
ultralytics/models/yolo/segment/val.py +1 -1
ultralytics/nn/autobackend.py +9 -9
ultralytics/nn/modules/block.py +1 -1
ultralytics/nn/modules/transformer.py +21 -1
ultralytics/nn/tasks.py +3 -3
ultralytics/nn/text_model.py +2 -7
ultralytics/solutions/ai_gym.py +1 -1
ultralytics/solutions/analytics.py +6 -6
ultralytics/solutions/config.py +1 -1
ultralytics/solutions/distance_calculation.py +1 -1
ultralytics/solutions/object_counter.py +1 -1
ultralytics/solutions/object_cropper.py +3 -6
ultralytics/solutions/parking_management.py +21 -17
ultralytics/solutions/queue_management.py +5 -5
ultralytics/solutions/region_counter.py +2 -2
ultralytics/solutions/security_alarm.py +1 -1
ultralytics/solutions/solutions.py +45 -22
ultralytics/solutions/speed_estimation.py +1 -1
ultralytics/trackers/basetrack.py +1 -1
ultralytics/trackers/bot_sort.py +4 -3
ultralytics/trackers/byte_tracker.py +4 -4
ultralytics/trackers/utils/gmc.py +6 -7
ultralytics/trackers/utils/kalman_filter.py +2 -1
ultralytics/trackers/utils/matching.py +4 -3
ultralytics/utils/__init__.py +12 -3
ultralytics/utils/benchmarks.py +2 -2
ultralytics/utils/callbacks/tensorboard.py +19 -25
ultralytics/utils/checks.py +4 -3
ultralytics/utils/downloads.py +1 -1
ultralytics/utils/export/tensorflow.py +16 -2
ultralytics/utils/files.py +13 -12
ultralytics/utils/logger.py +62 -27
ultralytics/utils/metrics.py +1 -1
ultralytics/utils/ops.py +7 -9
ultralytics/utils/patches.py +3 -3
ultralytics/utils/plotting.py +7 -12
ultralytics/utils/tuner.py +1 -1
{dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/WHEEL +0 -0
{dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/top_level.txt +0 -0

ultralytics/models/sam/sam3/vl_combiner.py ADDED Viewed

@@ -0,0 +1,160 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+"""Provides utility to combine a vision backbone with a language backbone."""
+from __future__ import annotations
+from copy import copy
+import torch
+import torch.nn as nn
+from torch.nn.attention import SDPBackend, sdpa_kernel
+from .necks import Sam3DualViTDetNeck
+class SAM3VLBackbone(nn.Module):
+    """This backbone combines a vision backbone and a language backbone without fusion. As such it is more of a
+    convenience wrapper to handle the two backbones together.
+    It adds support for activation checkpointing and compilation.
+    """
+    def __init__(
+        self,
+        visual: Sam3DualViTDetNeck,
+        text,
+        compile_visual: bool = False,
+        act_ckpt_whole_vision_backbone: bool = False,
+        act_ckpt_whole_language_backbone: bool = False,
+        scalp=0,
+    ):
+        """Initialize the backbone combiner.
+        :param visual: The vision backbone to use
+        :param text: The text encoder to use
+        """
+        super().__init__()
+        self.vision_backbone: Sam3DualViTDetNeck = torch.compile(visual) if compile_visual else visual
+        self.language_backbone = text
+        self.scalp = scalp
+        # allow running activation checkpointing on the entire vision and language backbones
+        self.act_ckpt_whole_vision_backbone = act_ckpt_whole_vision_backbone
+        self.act_ckpt_whole_language_backbone = act_ckpt_whole_language_backbone
+    def forward(
+        self,
+        samples: torch.Tensor,
+        captions: list[str],
+        input_boxes: torch.Tensor = None,
+        additional_text: list[str] | None = None,
+    ):
+        """Forward pass of the backbone combiner.
+        :param samples: The input images
+        :param captions: The input captions
+        :param input_boxes: If the text contains place-holders for boxes, this
+            parameter contains the tensor containing their spatial features
+        :param additional_text: This can be used to encode some additional text
+            (different from the captions) in the same forward of the backbone
+        :return: Output dictionary with the following keys:
+            - vision_features: The output of the vision backbone
+            - language_features: The output of the language backbone
+            - language_mask: The attention mask of the language backbone
+            - vision_pos_enc: The positional encoding of the vision backbone
+            - (optional) additional_text_features: The output of the language
+                backbone for the additional text
+            - (optional) additional_text_mask: The attention mask of the
+                language backbone for the additional text
+        """
+        output = self.forward_image(samples)
+        output.update(self.forward_text(captions, input_boxes, additional_text))
+        return output
+    def forward_image(self, samples: torch.Tensor):
+        """Forward pass of the vision backbone and get both SAM3 and SAM2 features."""
+        # Forward through backbone
+        sam3_features, sam3_pos, sam2_features, sam2_pos = self.vision_backbone.forward(samples)
+        if self.scalp > 0:
+            # Discard the lowest resolution features
+            sam3_features, sam3_pos = (
+                sam3_features[: -self.scalp],
+                sam3_pos[: -self.scalp],
+            )
+            if sam2_features is not None and sam2_pos is not None:
+                sam2_features, sam2_pos = (
+                    sam2_features[: -self.scalp],
+                    sam2_pos[: -self.scalp],
+                )
+        sam2_output = None
+        if sam2_features is not None and sam2_pos is not None:
+            sam2_src = sam2_features[-1]
+            sam2_output = {
+                "vision_features": sam2_src,
+                "vision_pos_enc": sam2_pos,
+                "backbone_fpn": sam2_features,
+            }
+        sam3_src = sam3_features[-1]
+        return {
+            "vision_features": sam3_src,
+            "vision_pos_enc": sam3_pos,
+            "backbone_fpn": sam3_features,
+            "sam2_backbone_out": sam2_output,
+        }
+    def forward_image_sam2(self, samples: torch.Tensor):
+        """Forward pass of the vision backbone to get SAM2 features only."""
+        xs = self.vision_backbone.trunk(samples)
+        x = xs[-1]  # simpleFPN
+        assert self.vision_backbone.sam2_convs is not None, "SAM2 neck is not available."
+        sam2_features, sam2_pos = self.vision_backbone.sam_forward_feature_levels(x, self.vision_backbone.sam2_convs)
+        if self.scalp > 0:
+            # Discard the lowest resolution features
+            sam2_features, sam2_pos = (
+                sam2_features[: -self.scalp],
+                sam2_pos[: -self.scalp],
+            )
+        return {
+            "vision_features": sam2_features[-1],
+            "vision_pos_enc": sam2_pos,
+            "backbone_fpn": sam2_features,
+        }
+    def forward_text(self, captions, input_boxes=None, additional_text=None):
+        """Forward pass of the text encoder."""
+        output = {}
+        # Forward through text_encoder
+        text_to_encode = copy(captions)
+        if additional_text is not None:
+            # if there are additional_text, we piggy-back them into this forward.
+            # They'll be used later for output alignment
+            text_to_encode += additional_text
+        with sdpa_kernel([SDPBackend.MATH, SDPBackend.EFFICIENT_ATTENTION, SDPBackend.FLASH_ATTENTION]):
+            text_attention_mask, text_memory, text_embeds = self.language_backbone(text_to_encode, input_boxes)
+        if additional_text is not None:
+            output["additional_text_features"] = text_memory[:, -len(additional_text) :]
+            output["additional_text_mask"] = text_attention_mask[-len(additional_text) :]
+        text_memory = text_memory[:, : len(captions)]
+        text_attention_mask = text_attention_mask[: len(captions)]
+        text_embeds = text_embeds[:, : len(captions)]
+        output["language_features"] = text_memory
+        output["language_mask"] = text_attention_mask
+        output["language_embeds"] = text_embeds  # Text embeddings before forward to the encoder
+        return output
+    def set_imgsz(self, imgsz: list[int] = [1008, 1008]):
+        """Set the image size for the vision backbone."""
+        self.vision_backbone.set_imgsz(imgsz)

ultralytics/models/yolo/classify/val.py CHANGED Viewed

@@ -57,7 +57,7 @@ class ClassificationValidator(BaseValidator):
         """Initialize ClassificationValidator with dataloader, save directory, and other parameters.
         Args:
-            dataloader (torch.utils.data.DataLoader, optional): Dataloader to use for validation.
+            dataloader (torch.utils.data.DataLoader, optional): DataLoader to use for validation.
             save_dir (str | Path, optional): Directory to save results.
             args (dict, optional): Arguments containing model and validation configuration.
             _callbacks (list, optional): List of callback functions to be called during validation.

ultralytics/models/yolo/detect/train.py CHANGED Viewed

@@ -53,7 +53,7 @@ class DetectionTrainer(BaseTrainer):
     """
     def __init__(self, cfg=DEFAULT_CFG, overrides: dict[str, Any] | None = None, _callbacks=None):
-        """Initialize a DetectionTrainer object for training YOLO object detection model training.
+        """Initialize a DetectionTrainer object for training YOLO object detection models.
         Args:
             cfg (dict, optional): Default configuration dictionary containing training parameters.

ultralytics/models/yolo/detect/val.py CHANGED Viewed

@@ -46,7 +46,7 @@ class DetectionValidator(BaseValidator):
         """Initialize detection validator with necessary variables and settings.
         Args:
-            dataloader (torch.utils.data.DataLoader, optional): Dataloader to use for validation.
+            dataloader (torch.utils.data.DataLoader, optional): DataLoader to use for validation.
             save_dir (Path, optional): Directory to save results.
             args (dict[str, Any], optional): Arguments for the validator.
             _callbacks (list[Any], optional): List of callback functions.
@@ -256,7 +256,7 @@ class DetectionValidator(BaseValidator):
         pf = "%22s" + "%11i" * 2 + "%11.3g" * len(self.metrics.keys)  # print format
         LOGGER.info(pf % ("all", self.seen, self.metrics.nt_per_class.sum(), *self.metrics.mean_results()))
         if self.metrics.nt_per_class.sum() == 0:
-            LOGGER.warning(f"no labels found in {self.args.task} set, can not compute metrics without labels")
+            LOGGER.warning(f"no labels found in {self.args.task} set, cannot compute metrics without labels")
         # Print results per class
         if self.args.verbose and not self.training and self.nc > 1 and len(self.metrics.stats):
@@ -308,7 +308,7 @@ class DetectionValidator(BaseValidator):
             batch_size (int): Size of each batch.
         Returns:
-            (torch.utils.data.DataLoader): Dataloader for validation.
+            (torch.utils.data.DataLoader): DataLoader for validation.
         """
         dataset = self.build_dataset(dataset_path, batch=batch_size, mode="val")
         return build_dataloader(
@@ -460,11 +460,11 @@ class DetectionValidator(BaseValidator):
         Args:
             stats (dict[str, Any]): Dictionary to store computed metrics and statistics.
-            pred_json (str | Path]): Path to JSON file containing predictions in COCO format.
-            anno_json (str | Path]): Path to JSON file containing ground truth annotations in COCO format.
-            iou_types (str | list[str]]): IoU type(s) for evaluation. Can be single string or list of strings. Common
+            pred_json (str | Path): Path to JSON file containing predictions in COCO format.
+            anno_json (str | Path): Path to JSON file containing ground truth annotations in COCO format.
+            iou_types (str | list[str]): IoU type(s) for evaluation. Can be single string or list of strings. Common
                 values include "bbox", "segm", "keypoints". Defaults to "bbox".
-            suffix (str | list[str]]): Suffix to append to metric names in stats dictionary. Should correspond to
+            suffix (str | list[str]): Suffix to append to metric names in stats dictionary. Should correspond to
                 iou_types if multiple types provided. Defaults to "Box".
         Returns:

ultralytics/models/yolo/obb/val.py CHANGED Viewed

@@ -12,6 +12,7 @@ from ultralytics.models.yolo.detect import DetectionValidator
 from ultralytics.utils import LOGGER, ops
 from ultralytics.utils.metrics import OBBMetrics, batch_probiou
 from ultralytics.utils.nms import TorchNMS
+from ultralytics.utils.plotting import plot_images
 class OBBValidator(DetectionValidator):
@@ -49,7 +50,7 @@ class OBBValidator(DetectionValidator):
         extends the DetectionValidator class and configures it specifically for the OBB task.
         Args:
-            dataloader (torch.utils.data.DataLoader, optional): Dataloader to be used for validation.
+            dataloader (torch.utils.data.DataLoader, optional): DataLoader to be used for validation.
             save_dir (str | Path, optional): Directory to save results.
             args (dict | SimpleNamespace, optional): Arguments containing validation parameters.
             _callbacks (list, optional): List of callback functions to be called during validation.
@@ -141,24 +142,34 @@ class OBBValidator(DetectionValidator):
             "im_file": batch["im_file"][si],
         }
-    def plot_predictions(self, batch: dict[str, Any], preds: list[torch.Tensor], ni: int) -> None:
+    def plot_predictions(self, batch: dict[str, Any], preds: list[dict[str, torch.Tensor]], ni: int) -> None:
         """Plot predicted bounding boxes on input images and save the result.
         Args:
             batch (dict[str, Any]): Batch data containing images, file paths, and other metadata.
-            preds (list[torch.Tensor]): List of prediction tensors for each image in the batch.
+            preds (list[dict[str, torch.Tensor]]): List of prediction dictionaries for each image in the batch.
             ni (int): Batch index used for naming the output file.
         Examples:
             >>> validator = OBBValidator()
             >>> batch = {"img": images, "im_file": paths}
-            >>> preds = [torch.rand(10, 7)]  # Example predictions for one image
+            >>> preds = [{"bboxes": torch.rand(10, 5), "cls": torch.zeros(10), "conf": torch.rand(10)}]
             >>> validator.plot_predictions(batch, preds, 0)
         """
-        for p in preds:
-            # TODO: fix this duplicated `xywh2xyxy`
-            p["bboxes"][:, :4] = ops.xywh2xyxy(p["bboxes"][:, :4])  # convert to xyxy format for plotting
-        super().plot_predictions(batch, preds, ni)  # plot bboxes
+        if not preds:
+            return
+        for i, pred in enumerate(preds):
+            pred["batch_idx"] = torch.ones_like(pred["conf"]) * i
+        keys = preds[0].keys()
+        batched_preds = {k: torch.cat([x[k] for x in preds], dim=0) for k in keys}
+        plot_images(
+            images=batch["img"],
+            labels=batched_preds,
+            paths=batch["im_file"],
+            fname=self.save_dir / f"val_batch{ni}_pred.jpg",
+            names=self.names,
+            on_plot=self.on_plot,
+        )
     def pred_to_json(self, predn: dict[str, torch.Tensor], pbatch: dict[str, Any]) -> None:
         """Convert YOLO predictions to COCO JSON format with rotated bounding box information.

ultralytics/models/yolo/pose/val.py CHANGED Viewed

@@ -59,7 +59,7 @@ class PoseValidator(DetectionValidator):
         specialized metrics for pose evaluation.
         Args:
-            dataloader (torch.utils.data.DataLoader, optional): Dataloader to be used for validation.
+            dataloader (torch.utils.data.DataLoader, optional): DataLoader to be used for validation.
             save_dir (Path | str, optional): Directory to save results.
             args (dict, optional): Arguments for the validator including task set to "pose".
             _callbacks (list, optional): List of callback functions to be executed during validation.

ultralytics/models/yolo/segment/val.py CHANGED Viewed

@@ -39,7 +39,7 @@ class SegmentationValidator(DetectionValidator):
         """Initialize SegmentationValidator and set task to 'segment', metrics to SegmentMetrics.
         Args:
-            dataloader (torch.utils.data.DataLoader, optional): Dataloader to use for validation.
+            dataloader (torch.utils.data.DataLoader, optional): DataLoader to use for validation.
             save_dir (Path, optional): Directory to save results.
             args (namespace, optional): Arguments for the validator.
             _callbacks (list, optional): List of callback functions.

ultralytics/nn/autobackend.py CHANGED Viewed

@@ -127,7 +127,7 @@ class AutoBackend(nn.Module):
     Methods:
         forward: Run inference on an input image.
-        from_numpy: Convert numpy array to tensor.
+        from_numpy: Convert NumPy arrays to tensors on the model device.
         warmup: Warm up the model with a dummy input.
         _model_type: Determine the model type from file path.
@@ -182,7 +182,7 @@ class AutoBackend(nn.Module):
             triton,
         ) = self._model_type("" if nn_module else model)
         fp16 &= pt or jit or onnx or xml or engine or nn_module or triton  # FP16
-        nhwc = coreml or saved_model or pb or tflite or edgetpu or rknn  # BHWC formats (vs torch BCWH)
+        nhwc = coreml or saved_model or pb or tflite or edgetpu or rknn  # BHWC formats (vs torch BCHW)
         stride, ch = 32, 3  # default stride and channels
         end2end, dynamic = False, False
         metadata, task = None, None
@@ -894,14 +894,14 @@ class AutoBackend(nn.Module):
         else:
             return self.from_numpy(y)
-    def from_numpy(self, x: np.ndarray) -> torch.Tensor:
-        """Convert a numpy array to a tensor.
+    def from_numpy(self, x: np.ndarray | torch.Tensor) -> torch.Tensor:
+        """Convert a NumPy array to a torch tensor on the model device.
         Args:
-            x (np.ndarray): The array to be converted.
+            x (np.ndarray | torch.Tensor): Input array or tensor.
         Returns:
-            (torch.Tensor): The converted tensor
+            (torch.Tensor): Tensor on `self.device`.
         """
         return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x
@@ -909,7 +909,7 @@ class AutoBackend(nn.Module):
         """Warm up the model by running one forward pass with a dummy input.
         Args:
-            imgsz (tuple): The shape of the dummy input tensor in the format (batch_size, channels, height, width)
+            imgsz (tuple[int, int, int, int]): Dummy input shape in (batch, channels, height, width) format.
         """
         warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton, self.nn_module
         if any(warmup_types) and (self.device.type != "cpu" or self.triton):
@@ -931,8 +931,8 @@ class AutoBackend(nn.Module):
             (list[bool]): List of booleans indicating the model type.
         Examples:
-            >>> model = AutoBackend(model="path/to/model.onnx")
-            >>> model_type = model._model_type()  # returns "onnx"
+            >>> types = AutoBackend._model_type("path/to/model.onnx")
+            >>> assert types[2]  # onnx
         """
         from ultralytics.engine.exporter import export_formats

ultralytics/nn/modules/block.py CHANGED Viewed

@@ -1812,7 +1812,7 @@ class A2C2f(nn.Module):
         """
         super().__init__()
         c_ = int(c2 * e)  # hidden channels
-        assert c_ % 32 == 0, "Dimension of ABlock be a multiple of 32."
+        assert c_ % 32 == 0, "Dimension of ABlock must be a multiple of 32."
         self.cv1 = Conv(c1, c_, 1, 1)
         self.cv2 = Conv((1 + n) * c_, c2, 1)

ultralytics/nn/modules/transformer.py CHANGED Viewed

@@ -359,7 +359,15 @@ class MLP(nn.Module):
     """
     def __init__(
-        self, input_dim: int, hidden_dim: int, output_dim: int, num_layers: int, act=nn.ReLU, sigmoid: bool = False
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        output_dim: int,
+        num_layers: int,
+        act=nn.ReLU,
+        sigmoid: bool = False,
+        residual: bool = False,
+        out_norm: nn.Module = None,
     ):
         """Initialize the MLP with specified input, hidden, output dimensions and number of layers.
@@ -370,6 +378,8 @@ class MLP(nn.Module):
             num_layers (int): Number of layers.
             act (nn.Module): Activation function.
             sigmoid (bool): Whether to apply sigmoid to the output.
+            residual (bool): Whether to use residual connections.
+            out_norm (nn.Module, optional): Normalization layer for the output.
         """
         super().__init__()
         self.num_layers = num_layers
@@ -377,6 +387,12 @@ class MLP(nn.Module):
         self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim, *h], [*h, output_dim]))
         self.sigmoid = sigmoid
         self.act = act()
+        if residual and input_dim != output_dim:
+            raise ValueError("residual is only supported if input_dim == output_dim")
+        self.residual = residual
+        # whether to apply a normalization layer to the output
+        assert isinstance(out_norm, nn.Module) or out_norm is None
+        self.out_norm = out_norm or nn.Identity()
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         """Forward pass for the entire MLP.
@@ -387,8 +403,12 @@ class MLP(nn.Module):
         Returns:
             (torch.Tensor): Output tensor after MLP.
         """
+        orig_x = x
         for i, layer in enumerate(self.layers):
             x = getattr(self, "act", nn.ReLU())(layer(x)) if i < self.num_layers - 1 else layer(x)
+        if getattr(self, "residual", False):
+            x = x + orig_x
+        x = getattr(self, "out_norm", nn.Identity())(x)
         return x.sigmoid() if getattr(self, "sigmoid", False) else x

ultralytics/nn/tasks.py CHANGED Viewed

@@ -866,7 +866,7 @@ class WorldModel(DetectionModel):
         self.model[-1].nc = len(text)
     def get_text_pe(self, text, batch=80, cache_clip_model=True):
-        """Set classes in advance so that model could do offline-inference without clip model.
+        """Get text positional embeddings for offline inference without CLIP model.
         Args:
             text (list[str]): List of class names.
@@ -987,13 +987,13 @@ class YOLOEModel(DetectionModel):
     @smart_inference_mode()
     def get_text_pe(self, text, batch=80, cache_clip_model=False, without_reprta=False):
-        """Set classes in advance so that model could do offline-inference without clip model.
+        """Get text positional embeddings for offline inference without CLIP model.
         Args:
             text (list[str]): List of class names.
             batch (int): Batch size for processing text tokens.
             cache_clip_model (bool): Whether to cache the CLIP model.
-            without_reprta (bool): Whether to return text embeddings cooperated with reprta module.
+            without_reprta (bool): Whether to return text embeddings without reprta module processing.
         Returns:
             (torch.Tensor): Text positional embeddings.

ultralytics/nn/text_model.py CHANGED Viewed

@@ -196,12 +196,7 @@ class MobileCLIP(TextModel):
             device (torch.device): Device to load the model on.
         """
         try:
-            import warnings
-            # Suppress 'timm.models.layers is deprecated, please import via timm.layers' warning from mobileclip usage
-            with warnings.catch_warnings():
-                warnings.filterwarnings("ignore", category=FutureWarning)
-                import mobileclip
+            import mobileclip
         except ImportError:
             # Ultralytics fork preferred since Apple MobileCLIP repo has incorrect version of torchvision
             checks.check_requirements("git+https://github.com/ultralytics/mobileclip.git")
@@ -308,7 +303,7 @@ class MobileCLIPTS(TextModel):
             (torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
         Examples:
-            >>> model = MobileCLIPTS("cpu")
+            >>> model = MobileCLIPTS(device=torch.device("cpu"))
             >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
             >>> strict_tokens = model.tokenize(
             ...     ["a very long caption"], truncate=False

ultralytics/solutions/ai_gym.py CHANGED Viewed

@@ -13,7 +13,7 @@ class AIGym(BaseSolution):
     repetitions of exercises based on predefined angle thresholds for up and down positions.
     Attributes:
-        states (dict[float, int, str]): Stores per-track angle, count, and stage for workout monitoring.
+        states (dict[int, dict[str, float | int | str]]): Per-track angle, rep count, and stage for workout monitoring.
         up_angle (float): Angle threshold for considering the 'up' position of an exercise.
         down_angle (float): Angle threshold for considering the 'down' position of an exercise.
         kpts (list[int]): Indices of keypoints used for angle calculation.

ultralytics/solutions/analytics.py CHANGED Viewed

@@ -56,7 +56,7 @@ class Analytics(BaseSolution):
         from matplotlib.backends.backend_agg import FigureCanvasAgg
         from matplotlib.figure import Figure
-        self.type = self.CFG["analytics_type"]  # type of analytics i.e "line", "pie", "bar" or "area" charts.
+        self.type = self.CFG["analytics_type"]  # Chart type: "line", "pie", "bar", or "area".
         self.x_label = "Classes" if self.type in {"bar", "pie"} else "Frame#"
         self.y_label = "Total Counts"
@@ -66,10 +66,10 @@ class Analytics(BaseSolution):
         self.title = "Ultralytics Solutions"  # window name
         self.max_points = 45  # maximum points to be drawn on window
         self.fontsize = 25  # text font size for display
-        figsize = self.CFG["figsize"]  # set output image size i.e (12.8, 7.2) -> w = 1280, h = 720
+        figsize = self.CFG["figsize"]  # Output size, e.g. (12.8, 7.2) -> 1280x720.
         self.color_cycle = cycle(["#DD00BA", "#042AFF", "#FF4447", "#7D24FF", "#BD00FF"])
-        self.total_counts = 0  # count variable for storing total counts i.e. for line
+        self.total_counts = 0  # Stores total counts for line charts.
         self.clswise_count = {}  # dictionary for class-wise counts
         self.update_every = kwargs.get("update_every", 30)  # Only update graph every 30 frames by default
         self.last_plot_im = None  # Cache of the last rendered chart
@@ -104,7 +104,7 @@ class Analytics(BaseSolution):
                 and 'classwise_count' (dict, per-class object count).
         Raises:
-            ModuleNotFoundError: If an unsupported chart type is specified.
+            ValueError: If an unsupported chart type is specified.
         Examples:
             >>> analytics = Analytics(analytics_type="line")
@@ -131,9 +131,9 @@ class Analytics(BaseSolution):
                 )
             plot_im = self.last_plot_im
         else:
-            raise ModuleNotFoundError(f"{self.type} chart is not supported ❌")
+            raise ValueError(f"Unsupported analytics_type='{self.type}'. Supported types: line, bar, pie, area.")
-        # return output dictionary with summary for more usage
+        # Return results for downstream use.
         return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids), classwise_count=self.clswise_count)
     def update_graph(

ultralytics/solutions/config.py CHANGED Viewed

@@ -35,7 +35,7 @@ class SolutionConfig:
         vision_point (tuple[int, int]): Reference point for directional tracking or perspective drawing.
         crop_dir (str): Directory path to save cropped detection images.
         json_file (str): Path to a JSON file containing data for parking areas.
-        line_width (int): Width for visual display i.e. bounding boxes, keypoints, counts.
+        line_width (int): Width for visual display, e.g. bounding boxes, keypoints, and counts.
         records (int): Number of detection records to send email alerts.
         fps (float): Frame rate (Frames Per Second) for speed estimation calculation.
         max_hist (int): Maximum number of historical points or states stored per tracked object for speed estimation.

ultralytics/solutions/distance_calculation.py CHANGED Viewed

@@ -17,7 +17,7 @@ class DistanceCalculation(BaseSolution):
     Attributes:
         left_mouse_count (int): Counter for left mouse button clicks.
-        selected_boxes (dict[int, list[float]]): Dictionary to store selected bounding boxes and their track IDs.
+        selected_boxes (dict[int, Any]): Dictionary to store selected bounding boxes keyed by track ID.
         centroids (list[list[int]]): List to store centroids of selected bounding boxes.
     Methods:

ultralytics/solutions/object_counter.py CHANGED Viewed

@@ -19,7 +19,7 @@ class ObjectCounter(BaseSolution):
         in_count (int): Counter for objects moving inward.
         out_count (int): Counter for objects moving outward.
         counted_ids (list[int]): List of IDs of objects that have been counted.
-        classwise_counts (dict[str, dict[str, int]]): Dictionary for counts, categorized by object class.
+        classwise_count (dict[str, dict[str, int]]): Dictionary for counts, categorized by object class.
         region_initialized (bool): Flag indicating whether the counting region has been initialized.
         show_in (bool): Flag to control display of inward count.
         show_out (bool): Flag to control display of outward count.

ultralytics/solutions/object_cropper.py CHANGED Viewed

@@ -1,6 +1,5 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
-import os
 from pathlib import Path
 from typing import Any
@@ -40,12 +39,10 @@ class ObjectCropper(BaseSolution):
         super().__init__(**kwargs)
         self.crop_dir = self.CFG["crop_dir"]  # Directory for storing cropped detections
-        if not os.path.exists(self.crop_dir):
-            os.mkdir(self.crop_dir)  # Create directory if it does not exist
+        Path(self.crop_dir).mkdir(parents=True, exist_ok=True)
         if self.CFG["show"]:
-            self.LOGGER.warning(
-                f"show=True disabled for crop solution, results will be saved in the directory named: {self.crop_dir}"
-            )
+            self.LOGGER.warning(f"show=True is not supported for ObjectCropper; saving crops to '{self.crop_dir}'.")
+            self.CFG["show"] = False
         self.crop_idx = 0  # Initialize counter for total cropped objects
         self.iou = self.CFG["iou"]
         self.conf = self.CFG["conf"]

dgenerate-ultralytics-headless 8.3.236__py3-none-any.whl → 8.3.239__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.236py3-none-any.whl → 8.3.239py3-none-any.whl