PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.178__py3-none-any.whl → 8.3.180__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.178py3-none-any.whl → 8.3.180py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

{dgenerate_ultralytics_headless-8.3.178.dist-info → dgenerate_ultralytics_headless-8.3.180.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dgenerate-ultralytics-headless
-Version: 8.3.178
+Version: 8.3.180
 Summary: Automatically built Ultralytics package with python-opencv-headless dependency instead of python-opencv
 Author-email: Glenn Jocher <glenn.jocher@ultralytics.com>, Jing Qiu <jing.qiu@ultralytics.com>
 Maintainer-email: Ultralytics <hello@ultralytics.com>
@@ -57,13 +57,13 @@ Requires-Dist: mkdocs-material>=9.5.9; extra == "dev"
 Requires-Dist: mkdocstrings[python]; extra == "dev"
 Requires-Dist: mkdocs-ultralytics-plugin>=0.1.26; extra == "dev"
 Requires-Dist: mkdocs-macros-plugin>=1.0.5; extra == "dev"
-Requires-Dist: click==8.2.1; extra == "dev"
 Provides-Extra: export
+Requires-Dist: numpy<2.0.0; extra == "export"
 Requires-Dist: onnx<1.18.0,>=1.12.0; extra == "export"
 Requires-Dist: coremltools>=8.0; (platform_system != "Windows" and python_version <= "3.13") and extra == "export"
 Requires-Dist: scikit-learn>=1.3.2; (platform_system != "Windows" and python_version <= "3.13") and extra == "export"
 Requires-Dist: openvino>=2024.0.0; extra == "export"
-Requires-Dist: tensorflow>=2.0.0; extra == "export"
+Requires-Dist: tensorflow<=2.19.0,>=2.0.0; extra == "export"
 Requires-Dist: tensorflowjs>=2.0.0; extra == "export"
 Requires-Dist: tensorstore>=0.1.63; (platform_machine == "aarch64" and python_version >= "3.9") and extra == "export"
 Requires-Dist: h5py!=3.11.0; platform_machine == "aarch64" and extra == "export"

{dgenerate_ultralytics_headless-8.3.178.dist-info → dgenerate_ultralytics_headless-8.3.180.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-dgenerate_ultralytics_headless-8.3.178.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
+dgenerate_ultralytics_headless-8.3.180.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
 tests/__init__.py,sha256=b4KP5_q-2IO8Br8YHOSLYnn7IwZS81l_vfEF2YPa2lM,894
 tests/conftest.py,sha256=LXtQJcFNWPGuzauTGkiXgsvVC3llJKfg22WcmhRzuQc,2593
 tests/test_cli.py,sha256=EMf5gTAopOnIz8VvzaM-Qb044o7D0flnUHYQ-2ffOM4,5670
@@ -8,10 +8,10 @@ tests/test_exports.py,sha256=CY-4xVZlVM16vdyIC0mSR3Ix59aiZm1qjFGIhSNmB20,11007
 tests/test_integrations.py,sha256=kl_AKmE_Qs1GB0_91iVwbzNxofm_hFTt0zzU6JF-pg4,6323
 tests/test_python.py,sha256=-qvdeg-hEcKU5mWSDEU24iFZ-i8FAwQRznSXpkp6WQ4,27928
 tests/test_solutions.py,sha256=tuf6n_fsI8KvSdJrnc-cqP2qYdiYqCWuVrx0z9dOz3Q,13213
-ultralytics/__init__.py,sha256=jfnesIVlI3MP1Fu1VhXuCSl2_tHCed9GUcoR23EvLlI,730
+ultralytics/__init__.py,sha256=fHQo0GhHdl2c_XkrtYaXY22EnCd1IZeIe5C59ZLWimc,730
 ultralytics/assets/bus.jpg,sha256=wCAZxJecGR63Od3ZRERe9Aja1Weayrb9Ug751DS_vGM,137419
 ultralytics/assets/zidane.jpg,sha256=Ftc4aeMmen1O0A3o6GCDO9FlfBslLpTAw0gnetx7bts,50427
-ultralytics/cfg/__init__.py,sha256=VIpPHImhjb0XLJquGZrG_LBGZchtOtBSXR7HYTYV2GU,39602
+ultralytics/cfg/__init__.py,sha256=Uj1br3-NVFvP6VY5CL4PK63mAQAom93XFC5cqSbM6t4,39887
 ultralytics/cfg/default.yaml,sha256=1SspGAK_K_DT7DBfEScJh4jsJUTOxahehZYj92xmj7o,8347
 ultralytics/cfg/datasets/Argoverse.yaml,sha256=4SGaJio9JFUkrscHJTPnH_QSbYm48Wbk8EFwl39zntc,3262
 ultralytics/cfg/datasets/DOTAv1.5.yaml,sha256=VZ_KKFX0H2YvlFVJ8JHcLWYBZ2xiQ6Z-ROSTiKWpS7c,1211
@@ -21,8 +21,8 @@ ultralytics/cfg/datasets/HomeObjects-3K.yaml,sha256=xEtSqEad-rtfGuIrERjjhdISggmP
 ultralytics/cfg/datasets/ImageNet.yaml,sha256=GvDWypLVG_H3H67Ai8IC1pvK6fwcTtF5FRhzO1OXXDU,42530
 ultralytics/cfg/datasets/Objects365.yaml,sha256=vLzbT3xgpLR-bHhrHOiYyzYvDIniRdevgSyPetm8QHk,9354
 ultralytics/cfg/datasets/SKU-110K.yaml,sha256=a52le1-JQ2YH6b1WLMUxVz7RkZ36YsmXgWyw0z3q9nQ,2542
-ultralytics/cfg/datasets/VOC.yaml,sha256=GfJkYxN6uAiBTHOsR57L0UDi5NE9vH59A15EROrp0DU,3785
-ultralytics/cfg/datasets/VisDrone.yaml,sha256=NujUSnR6gpXYdcvgg9nxmSZjPjcC9MdZ_YzMipvnuK8,3615
+ultralytics/cfg/datasets/VOC.yaml,sha256=o09FWAAsr1MH3ftBJ_n-4Tmc3zxnVJL1HqlqKRUYVTQ,3774
+ultralytics/cfg/datasets/VisDrone.yaml,sha256=dYAewe84CrGmxAA_z6UnZUAd7peaw5l3ARDcssojADk,3604
 ultralytics/cfg/datasets/african-wildlife.yaml,sha256=SuloMp9WAZBigGC8az-VLACsFhTM76_O29yhTvUqdnU,915
 ultralytics/cfg/datasets/brain-tumor.yaml,sha256=qrxPO_t9wxbn2kHFwP3vGTzSWj2ELTLelUwYL3_b6nc,800
 ultralytics/cfg/datasets/carparts-seg.yaml,sha256=A4e9hM1unTY2jjZIXGiKSarF6R-Ad9R99t57OgRJ37w,1253
@@ -151,7 +151,7 @@ ultralytics/models/sam/__init__.py,sha256=iR7B06rAEni21eptg8n4rLOP0Z_qV9y9PL-L93
 ultralytics/models/sam/amg.py,sha256=IpcuIfC5KBRiF4sdrsPl1ecWEJy75axo1yG23r5BFsw,11783
 ultralytics/models/sam/build.py,sha256=J6n-_QOYLa63jldEZmhRe9D3Is_AJE8xyZLUjzfRyTY,12629
 ultralytics/models/sam/model.py,sha256=j1TwsLmtxhiXyceU31VPzGVkjRXGylphKrdPSzUJRJc,7231
-ultralytics/models/sam/predict.py,sha256=2dg6L8X_I4RqTHAeH8w3m2ojFczkplx1Wu_ytwzAAgQ,82979
+ultralytics/models/sam/predict.py,sha256=VBClks5F74BX0uZWpMfUjaXM_H222BYJ09lRYF6A2u8,86184
 ultralytics/models/sam/modules/__init__.py,sha256=lm6MckFYCPTbqIoX7w0s_daxdjNeBeKW6DXppv1-QUM,70
 ultralytics/models/sam/modules/blocks.py,sha256=n8oe9sx91_RktsF2_2UYNKH7qk8bFXuJtEaIEpQQ3ws,46059
 ultralytics/models/sam/modules/decoders.py,sha256=-1fhBO47hA-3CzkU-PzkCK4Nsi_VJ_CH6Q9SMjydN4I,25609
@@ -165,7 +165,7 @@ ultralytics/models/utils/__init__.py,sha256=lm6MckFYCPTbqIoX7w0s_daxdjNeBeKW6DXp
 ultralytics/models/utils/loss.py,sha256=E-61TfLPc04IdeL6IlFDityDoPju-ov0ouWV_cNY4Kg,21254
 ultralytics/models/utils/ops.py,sha256=Pr77n8XW25SUEx4X3bBvXcVIbRdJPoaXJuG0KWWawRQ,15253
 ultralytics/models/yolo/__init__.py,sha256=or0j5xvcM0usMlsFTYhNAOcQUri7reD0cD9JR5b7zDk,307
-ultralytics/models/yolo/model.py,sha256=e66CIsSLHbEeGlkEQ1r6WwVDKAoR2nc0-UoGA94z-eM,18544
+ultralytics/models/yolo/model.py,sha256=96PDREUJwDiPb3w4lp2HCesc3c3y1WGyLttOUhUYPxk,18715
 ultralytics/models/yolo/classify/__init__.py,sha256=9--HVaNOfI1K7rn_rRqclL8FUAnpfeBrRqEQIaQw2xM,383
 ultralytics/models/yolo/classify/predict.py,sha256=FqAC2YXe25bRwedMZhF3Lw0waoY-a60xMKELhxApP9I,4149
 ultralytics/models/yolo/classify/train.py,sha256=V-hevc6X7xemnpyru84OfTRA77eNnkVSMEz16_OUvo4,10244
@@ -239,9 +239,9 @@ ultralytics/utils/__init__.py,sha256=KyczXn2SLR0g8xkCVpfNaPkhDCQ4A8vLt99teYu7mss
 ultralytics/utils/autobatch.py,sha256=33m8YgggLIhltDqMXZ5OE-FGs2QiHrl2-LfgY1mI4cw,5119
 ultralytics/utils/autodevice.py,sha256=AvgXFt8c1Cg4icKh0Hbhhz8UmVQ2Wjyfdfkeb2C8zck,8855
 ultralytics/utils/benchmarks.py,sha256=btsi_B0mfLPfhE8GrsBpi79vl7SRam0YYngNFAsY8Ak,31035
-ultralytics/utils/checks.py,sha256=mkDl_BTLZyjfhYbFVSG6xYmxhB2s7wsQ62ugnhspqOc,34707
+ultralytics/utils/checks.py,sha256=q64U5wKyejD-2W2fCPqJ0Oiaa4_4vq2pVxV9wp6lMz4,34707
 ultralytics/utils/dist.py,sha256=A9lDGtGefTjSVvVS38w86GOdbtLzNBDZuDGK0MT4PRI,4170
-ultralytics/utils/downloads.py,sha256=awaWFsx1k4wKESni5IgEmcAlAJVfKKpULhQmgmUhn2c,21916
+ultralytics/utils/downloads.py,sha256=A7r4LpWUojGkam9-VQ3Ylu-Cn1lAUGKyJE6VzwQbp7M,22016
 ultralytics/utils/errors.py,sha256=XT9Ru7ivoBgofK6PlnyigGoa7Fmf5nEhyHtnD-8TRXI,1584
 ultralytics/utils/export.py,sha256=LK-wlTlyb_zIKtSvOmfmvR70RcUU9Ct9UBDt5wn9_rY,9880
 ultralytics/utils/files.py,sha256=ZCbLGleiF0f-PqYfaxMFAWop88w7U1hpreHXl8b2ko0,8238
@@ -266,8 +266,8 @@ ultralytics/utils/callbacks/neptune.py,sha256=j8pecmlcsM8FGzLKWoBw5xUsi5t8E5HuxY
 ultralytics/utils/callbacks/raytune.py,sha256=S6Bq16oQDQ8BQgnZzA0zJHGN_BBr8iAM_WtGoLiEcwg,1283
 ultralytics/utils/callbacks/tensorboard.py,sha256=MDPBW7aDes-66OE6YqKXXvqA_EocjzEMHWGM-8z9vUQ,5281
 ultralytics/utils/callbacks/wb.py,sha256=Tm_-aRr2CN32MJkY9tylpMBJkb007-MSRNSQ7rDJ5QU,7521
-dgenerate_ultralytics_headless-8.3.178.dist-info/METADATA,sha256=qF1t4-NTjrCC-2ur8_Ay4sbps_4hp85OJEY4tgH_ZDM,38716
-dgenerate_ultralytics_headless-8.3.178.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dgenerate_ultralytics_headless-8.3.178.dist-info/entry_points.txt,sha256=YM_wiKyTe9yRrsEfqvYolNO5ngwfoL4-NwgKzc8_7sI,93
-dgenerate_ultralytics_headless-8.3.178.dist-info/top_level.txt,sha256=XP49TwiMw4QGsvTLSYiJhz1xF_k7ev5mQ8jJXaXi45Q,12
-dgenerate_ultralytics_headless-8.3.178.dist-info/RECORD,,
+dgenerate_ultralytics_headless-8.3.180.dist-info/METADATA,sha256=-yv2HSf0JD7vFjkRFBIrwXoyHrcdbdQdhB5ocRW3_hk,38727
+dgenerate_ultralytics_headless-8.3.180.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dgenerate_ultralytics_headless-8.3.180.dist-info/entry_points.txt,sha256=YM_wiKyTe9yRrsEfqvYolNO5ngwfoL4-NwgKzc8_7sI,93
+dgenerate_ultralytics_headless-8.3.180.dist-info/top_level.txt,sha256=XP49TwiMw4QGsvTLSYiJhz1xF_k7ev5mQ8jJXaXi45Q,12
+dgenerate_ultralytics_headless-8.3.180.dist-info/RECORD,,

ultralytics/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
-__version__ = "8.3.178"
+__version__ = "8.3.180"
 import os

ultralytics/cfg/__init__.py CHANGED Viewed

@@ -954,7 +954,10 @@ def entrypoint(debug: str = "") -> None:
         from ultralytics import YOLO
         model = YOLO(model, task=task)
+        if "yoloe" in stem or "world" in stem:
+            cls_list = overrides.pop("classes", DEFAULT_CFG.classes)
+            if cls_list is not None and isinstance(cls_list, str):
+                model.set_classes(cls_list.split(","))  # convert "person, bus" -> ['person', ' bus'].
     # Task Update
     if task != model.task:
         if task:

ultralytics/cfg/datasets/VOC.yaml CHANGED Viewed

@@ -87,7 +87,7 @@ download: |
       f"{url}VOCtest_06-Nov-2007.zip",  # 438MB, 4953 images
       f"{url}VOCtrainval_11-May-2012.zip",  # 1.95GB, 17126 images
   ]
-  download(urls, dir=dir / "images", curl=True, threads=3, exist_ok=True)  # download and unzip over existing (required)
+  download(urls, dir=dir / "images", threads=3, exist_ok=True)  # download and unzip over existing (required)
   # Convert
   path = dir / "images/VOCdevkit"

ultralytics/cfg/datasets/VisDrone.yaml CHANGED Viewed

@@ -78,7 +78,7 @@ download: |
       "https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-test-dev.zip",
       # "https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-test-challenge.zip",
   ]
-  download(urls, dir=dir, curl=True, threads=4)
+  download(urls, dir=dir, threads=4)
   # Convert
   splits = {"VisDrone2019-DET-train": "train", "VisDrone2019-DET-val": "val", "VisDrone2019-DET-test-dev": "test"}

ultralytics/models/sam/predict.py CHANGED Viewed

@@ -182,9 +182,8 @@ class Predictor(BasePredictor):
             **kwargs (Any): Additional keyword arguments.
         Returns:
-            pred_masks (np.ndarray): The output masks in shape (C, H, W), where C is the number of generated masks.
-            pred_scores (np.ndarray): An array of length C containing quality scores predicted by the model for each mask.
-            pred_logits (np.ndarray): Low-resolution logits of shape (C, H, W) for subsequent inference, where H=W=256.
+            pred_masks (torch.Tensor): The output masks in shape (C, H, W), where C is the number of generated masks.
+            pred_scores (torch.Tensor): An array of length C containing quality scores predicted by the model for each mask.
         Examples:
             >>> predictor = Predictor()
@@ -219,8 +218,8 @@ class Predictor(BasePredictor):
             multimask_output (bool): Flag to return multiple masks for ambiguous prompts.
         Returns:
-            pred_masks (np.ndarray): Output masks with shape (C, H, W), where C is the number of generated masks.
-            pred_scores (np.ndarray): Quality scores predicted by the model for each mask, with length C.
+            pred_masks (torch.Tensor): Output masks with shape (C, H, W), where C is the number of generated masks.
+            pred_scores (torch.Tensor): Quality scores predicted by the model for each mask, with length C.
         Examples:
             >>> predictor = Predictor()
@@ -230,7 +229,34 @@ class Predictor(BasePredictor):
         """
         features = self.get_im_features(im) if self.features is None else self.features
-        bboxes, points, labels, masks = self._prepare_prompts(im.shape[2:], bboxes, points, labels, masks)
+        prompts = self._prepare_prompts(im.shape[2:], self.batch[1][0].shape[:2], bboxes, points, labels, masks)
+        return self._inference_features(features, *prompts, multimask_output)
+    def _inference_features(
+        self,
+        features,
+        bboxes=None,
+        points=None,
+        labels=None,
+        masks=None,
+        multimask_output=False,
+    ):
+        """
+        Perform inference on image features using the SAM model.
+        Args:
+            features (torch.Tensor): Extracted image features with shape (B, C, H, W) from the SAM model image encoder.
+            bboxes (np.ndarray | List[List[float]] | None): Bounding boxes in XYXY format with shape (N, 4).
+            points (np.ndarray | List[List[float]] | None): Object location points with shape (N, 2), in pixels.
+            labels (np.ndarray | List[int] | None): Point prompt labels with shape (N,). 1 = foreground, 0 = background.
+            masks (List[np.ndarray] | np.ndarray | None): Masks for the objects, where each mask is a 2D array.
+            multimask_output (bool): Flag to return multiple masks for ambiguous prompts.
+            img_idx (int): Index of the image in the batch to process.
+        Returns:
+            pred_masks (torch.Tensor): Output masks with shape (C, H, W), where C is the number of generated masks.
+            pred_scores (torch.Tensor): Quality scores for each mask, with length C.
+        """
         points = (points, labels) if points is not None else None
         # Embed prompts
         sparse_embeddings, dense_embeddings = self.model.prompt_encoder(points=points, boxes=bboxes, masks=masks)
@@ -248,12 +274,13 @@ class Predictor(BasePredictor):
         # `d` could be 1 or 3 depends on `multimask_output`.
         return pred_masks.flatten(0, 1), pred_scores.flatten(0, 1)
-    def _prepare_prompts(self, dst_shape, bboxes=None, points=None, labels=None, masks=None):
+    def _prepare_prompts(self, dst_shape, src_shape, bboxes=None, points=None, labels=None, masks=None):
         """
         Prepare and transform the input prompts for processing based on the destination shape.
         Args:
-            dst_shape (tuple): The target shape (height, width) for the prompts.
+            dst_shape (Tuple[int, int]): The target shape (height, width) for the prompts.
+            src_shape (Tuple[int, int]): The source shape (height, width) of the input image.
             bboxes (np.ndarray | List | None): Bounding boxes in XYXY format with shape (N, 4).
             points (np.ndarray | List | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels.
             labels (np.ndarray | List | None): Point prompt labels with shape (N) or (N, num_points). 1 for foreground, 0 for background.
@@ -268,7 +295,6 @@ class Predictor(BasePredictor):
         Raises:
             AssertionError: If the number of points don't match the number of labels, in case labels were passed.
         """
-        src_shape = self.batch[1][0].shape[:2]
         r = 1.0 if self.segment_all else min(dst_shape[0] / src_shape[0], dst_shape[1] / src_shape[1])
         # Transform input prompts
         if points is not None:
@@ -543,7 +569,7 @@ class Predictor(BasePredictor):
             - The extracted features are stored in the `self.features` attribute for later use.
         """
         if self.model is None:
-            self.setup_model(model=None)
+            self.setup_model()
         self.setup_source(image)
         assert len(self.dataset) == 1, "`set_image` only supports setting one image!"
         for batch in self.dataset:
@@ -620,6 +646,53 @@ class Predictor(BasePredictor):
         return new_masks[keep].to(device=masks.device, dtype=masks.dtype), keep
+    @smart_inference_mode()
+    def inference_features(
+        self,
+        features,
+        src_shape,
+        dst_shape=None,
+        bboxes=None,
+        points=None,
+        labels=None,
+        masks=None,
+        multimask_output=False,
+    ):
+        """
+        Perform prompts preprocessing and inference on provided image features using the SAM model.
+        Args:
+            features (torch.Tensor | Dict[str, Any]): Extracted image features from the SAM/SAM2 model image encoder.
+            src_shape (Tuple[int, int]): The source shape (height, width) of the input image.
+            dst_shape (Tuple[int, int] | None): The target shape (height, width) for the prompts. If None, defaults to (imgsz, imgsz).
+            bboxes (np.ndarray | List[List[float]] | None): Bounding boxes in xyxy format with shape (N, 4).
+            points (np.ndarray | List[List[float]] | None): Points indicating object locations with shape (N, 2), in pixels.
+            labels (np.ndarray | List[int] | None): Point prompt labels with shape (N, ).
+            masks (List[np.ndarray] | np.ndarray | None): Masks for the objects, where each mask is a 2D array.
+            multimask_output (bool): Flag to return multiple masks for ambiguous prompts.
+        Returns:
+            pred_masks (torch.Tensor): The output masks in shape (C, H, W), where C is the number of generated masks.
+            pred_bboxes (torch.Tensor): Bounding boxes for each mask with shape (N, 6), where N is the number of boxes.
+                Each box is in xyxy format with additional columns for score and class.
+        Notes:
+            - The input features is a torch.Tensor of shape (B, C, H, W) if performing on SAM, or a Dict[str, Any] if performing on SAM2.
+        """
+        dst_shape = dst_shape or (self.args.imgsz, self.args.imgsz)
+        prompts = self._prepare_prompts(dst_shape, src_shape, bboxes, points, labels, masks)
+        pred_masks, pred_scores = self._inference_features(features, *prompts, multimask_output)
+        if len(pred_masks) == 0:
+            pred_masks, pred_bboxes = None, torch.zeros((0, 6), device=pred_masks.device)
+        else:
+            pred_masks = ops.scale_masks(pred_masks[None].float(), src_shape, padding=False)[0]
+            pred_masks = pred_masks > self.model.mask_threshold  # to bool
+            pred_bboxes = batched_mask_to_box(pred_masks)
+            # NOTE: SAM models do not return cls info. This `cls` here is just a placeholder for consistency.
+            cls = torch.arange(len(pred_masks), dtype=torch.int32, device=pred_masks.device)
+            pred_bboxes = torch.cat([pred_bboxes, pred_scores[:, None], cls[:, None]], dim=-1)
+        return pred_masks, pred_bboxes
 class SAM2Predictor(Predictor):
     """
@@ -663,80 +736,13 @@ class SAM2Predictor(Predictor):
         return build_sam(self.args.model)
-    def prompt_inference(
-        self,
-        im,
-        bboxes=None,
-        points=None,
-        labels=None,
-        masks=None,
-        multimask_output=False,
-        img_idx=-1,
-    ):
-        """
-        Perform image segmentation inference based on various prompts using SAM2 architecture.
-        This method leverages the Segment Anything Model 2 (SAM2) to generate segmentation masks for input images
-        based on provided prompts such as bounding boxes, points, or existing masks. It supports both single and
-        multi-object prediction scenarios.
-        Args:
-            im (torch.Tensor): Preprocessed input image tensor with shape (N, C, H, W).
-            bboxes (np.ndarray | List[List[float]] | None): Bounding boxes in XYXY format with shape (N, 4).
-            points (np.ndarray | List[List[float]] | None): Object location points with shape (N, 2), in pixels.
-            labels (np.ndarray | List[int] | None): Point prompt labels with shape (N,). 1 = foreground, 0 = background.
-            masks (np.ndarray | None): Low-resolution masks from previous predictions with shape (N, H, W).
-            multimask_output (bool): Flag to return multiple masks for ambiguous prompts.
-            img_idx (int): Index of the image in the batch to process.
-        Returns:
-            pred_masks (np.ndarray): Output masks with shape (C, H, W), where C is the number of generated masks.
-            pred_scores (np.ndarray): Quality scores for each mask, with length C.
-        Examples:
-            >>> predictor = SAM2Predictor(cfg)
-            >>> image = torch.rand(1, 3, 640, 640)
-            >>> bboxes = [[100, 100, 200, 200]]
-            >>> result = predictor(image, bboxes=bboxes)[0]
-            >>> print(f"Generated {result.masks.shape[0]} masks with average score {result.boxes.conf.mean():.2f}")
-        Notes:
-            - The method supports batched inference for multiple objects when points or bboxes are provided.
-            - Input prompts (bboxes, points) are automatically scaled to match the input image dimensions.
-            - When both bboxes and points are provided, they are merged into a single 'points' input for the model.
-        """
-        features = self.get_im_features(im) if self.features is None else self.features
-        points, labels, masks = self._prepare_prompts(im.shape[2:], bboxes, points, labels, masks)
-        points = (points, labels) if points is not None else None
-        sparse_embeddings, dense_embeddings = self.model.sam_prompt_encoder(
-            points=points,
-            boxes=None,
-            masks=masks,
-        )
-        # Predict masks
-        batched_mode = points is not None and points[0].shape[0] > 1  # multi object prediction
-        high_res_features = [feat_level[img_idx].unsqueeze(0) for feat_level in features["high_res_feats"]]
-        pred_masks, pred_scores, _, _ = self.model.sam_mask_decoder(
-            image_embeddings=features["image_embed"][img_idx].unsqueeze(0),
-            image_pe=self.model.sam_prompt_encoder.get_dense_pe(),
-            sparse_prompt_embeddings=sparse_embeddings,
-            dense_prompt_embeddings=dense_embeddings,
-            multimask_output=multimask_output,
-            repeat_image=batched_mode,
-            high_res_features=high_res_features,
-        )
-        # (N, d, H, W) --> (N*d, H, W), (N, d) --> (N*d, )
-        # `d` could be 1 or 3 depends on `multimask_output`.
-        return pred_masks.flatten(0, 1), pred_scores.flatten(0, 1)
-    def _prepare_prompts(self, dst_shape, bboxes=None, points=None, labels=None, masks=None):
+    def _prepare_prompts(self, dst_shape, src_shape, bboxes=None, points=None, labels=None, masks=None):
         """
         Prepare and transform the input prompts for processing based on the destination shape.
         Args:
-            dst_shape (tuple): The target shape (height, width) for the prompts.
+            dst_shape (Tuple[int, int]): The target shape (height, width) for the prompts.
+            src_shape (Tuple[int, int]): The source shape (height, width) of the input image.
             bboxes (np.ndarray | List | None): Bounding boxes in XYXY format with shape (N, 4).
             points (np.ndarray | List | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels.
             labels (np.ndarray | List | None): Point prompt labels with shape (N,) or (N, num_points). 1 for foreground, 0 for background.
@@ -750,7 +756,7 @@ class SAM2Predictor(Predictor):
         Raises:
             AssertionError: If the number of points don't match the number of labels, in case labels were passed.
         """
-        bboxes, points, labels, masks = super()._prepare_prompts(dst_shape, bboxes, points, labels, masks)
+        bboxes, points, labels, masks = super()._prepare_prompts(dst_shape, src_shape, bboxes, points, labels, masks)
         if bboxes is not None:
             bboxes = bboxes.view(-1, 2, 2)
             bbox_labels = torch.tensor([[2, 3]], dtype=torch.int32, device=bboxes.device).expand(len(bboxes), -1)
@@ -813,6 +819,54 @@ class SAM2Predictor(Predictor):
         ][::-1]
         return {"image_embed": feats[-1], "high_res_feats": feats[:-1]}
+    def _inference_features(
+        self,
+        features,
+        points=None,
+        labels=None,
+        masks=None,
+        multimask_output=False,
+        img_idx=-1,
+    ):
+        """
+        Perform inference on image features using the SAM2 model.
+        Args:
+            features (Dict[str, Any]): Extracted image information from the SAM2 model image encoder, it's a dictionary including:
+                image_embed (torch.Tensor): Image embedding with shape (B, C, H, W).
+                high_res_feats (List[torch.Tensor]): List of high-resolution feature maps from the backbone, each with shape (B, C, H, W).
+            points (np.ndarray | List[List[float]] | None): Object location points with shape (N, 2), in pixels.
+            labels (np.ndarray | List[int] | None): Point prompt labels with shape (N,). 1 = foreground, 0 = background.
+            masks (List[np.ndarray] | np.ndarray | None): Masks for the objects, where each mask is a 2D array.
+            multimask_output (bool): Flag to return multiple masks for ambiguous prompts.
+            img_idx (int): Index of the image in the batch to process.
+        Returns:
+            pred_masks (torch.Tensor): Output masks with shape (C, H, W), where C is the number of generated masks.
+            pred_scores (torch.Tensor): Quality scores for each mask, with length C.
+        """
+        points = (points, labels) if points is not None else None
+        sparse_embeddings, dense_embeddings = self.model.sam_prompt_encoder(
+            points=points,
+            boxes=None,
+            masks=masks,
+        )
+        # Predict masks
+        batched_mode = points is not None and points[0].shape[0] > 1  # multi object prediction
+        high_res_features = [feat_level[img_idx].unsqueeze(0) for feat_level in features["high_res_feats"]]
+        pred_masks, pred_scores, _, _ = self.model.sam_mask_decoder(
+            image_embeddings=features["image_embed"][img_idx].unsqueeze(0),
+            image_pe=self.model.sam_prompt_encoder.get_dense_pe(),
+            sparse_prompt_embeddings=sparse_embeddings,
+            dense_prompt_embeddings=dense_embeddings,
+            multimask_output=multimask_output,
+            repeat_image=batched_mode,
+            high_res_features=high_res_features,
+        )
+        # (N, d, H, W) --> (N*d, H, W), (N, d) --> (N*d, )
+        # `d` could be 1 or 3 depends on `multimask_output`.
+        return pred_masks.flatten(0, 1), pred_scores.flatten(0, 1)
 class SAM2VideoPredictor(SAM2Predictor):
     """
@@ -900,8 +954,8 @@ class SAM2VideoPredictor(SAM2Predictor):
             masks (np.ndarray, optional): Low-resolution masks from previous predictions shape (N,H,W). For SAM H=W=256.
         Returns:
-            pred_masks (np.ndarray): The output masks in shape CxHxW, where C is the number of generated masks.
-            pred_scores (np.ndarray): An array of length C containing quality scores predicted by the model for each mask.
+            pred_masks (torch.Tensor): The output masks in shape CxHxW, where C is the number of generated masks.
+            pred_scores (torch.Tensor): An array of length C containing quality scores predicted by the model for each mask.
         """
         # Override prompts if any stored in self.prompts
         bboxes = self.prompts.pop("bboxes", bboxes)
@@ -912,7 +966,9 @@ class SAM2VideoPredictor(SAM2Predictor):
         self.inference_state["im"] = im
         output_dict = self.inference_state["output_dict"]
         if len(output_dict["cond_frame_outputs"]) == 0:  # initialize prompts
-            points, labels, masks = self._prepare_prompts(im.shape[2:], bboxes, points, labels, masks)
+            points, labels, masks = self._prepare_prompts(
+                im.shape[2:], self.batch[1][0].shape[:2], bboxes, points, labels, masks
+            )
             if points is not None:
                 for i in range(len(points)):
                     self.add_new_prompts(obj_id=i, points=points[[i]], labels=labels[[i]], frame_idx=frame)
@@ -966,7 +1022,7 @@ class SAM2VideoPredictor(SAM2Predictor):
         the masks do not overlap, which can be useful for certain applications.
         Args:
-            preds (tuple): The predictions from the model.
+            preds (Tuple[torch.Tensor, torch.Tensor]): The predicted masks and scores from the model.
             img (torch.Tensor): The processed image tensor.
             orig_imgs (List[np.ndarray]): The original images before processing.

ultralytics/models/yolo/model.py CHANGED Viewed

@@ -3,6 +3,8 @@
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Union
+import torch
 from ultralytics.data.build import load_inference_source
 from ultralytics.engine.model import Model
 from ultralytics.models import yolo
@@ -315,7 +317,7 @@ class YOLOE(Model):
         assert isinstance(self.model, YOLOEModel)
         return self.model.get_vocab(names)
-    def set_classes(self, classes: List[str], embeddings) -> None:
+    def set_classes(self, classes: List[str], embeddings: Optional[torch.Tensor] = None) -> None:
         """
         Set the model's class names and embeddings for detection.
@@ -324,6 +326,8 @@ class YOLOE(Model):
             embeddings (torch.Tensor): Embeddings corresponding to the classes.
         """
         assert isinstance(self.model, YOLOEModel)
+        if embeddings is None:
+            embeddings = self.get_text_pe(classes)  # generate text embeddings if not provided
         self.model.set_classes(classes, embeddings)
         # Verify no background class is present
         assert " " not in classes

ultralytics/utils/checks.py CHANGED Viewed

@@ -694,7 +694,7 @@ def collect_system_info():
         "GPU count": torch.cuda.device_count() if cuda else None,
         "CUDA": torch.version.cuda if cuda else None,
     }
-    LOGGER.info("\n" + "\n".join(f"{k:<20}{v}" for k, v in info_dict.items()) + "\n")
+    LOGGER.info("\n" + "\n".join(f"{k:<23}{v}" for k, v in info_dict.items()) + "\n")
     package_info = {}
     for r in parse_requirements(package="ultralytics"):
@@ -705,7 +705,7 @@ def collect_system_info():
             current = "(not installed)"
             is_met = "❌ "
         package_info[r.name] = f"{is_met}{current}{r.specifier}"
-        LOGGER.info(f"{r.name:<20}{package_info[r.name]}")
+        LOGGER.info(f"{r.name:<23}{package_info[r.name]}")
     info_dict["Package Info"] = package_info

ultralytics/utils/downloads.py CHANGED Viewed

@@ -501,7 +501,9 @@ def download(
     """
     dir = Path(dir)
     dir.mkdir(parents=True, exist_ok=True)  # make directory
+    urls = [url] if isinstance(url, (str, Path)) else url
     if threads > 1:
+        LOGGER.info(f"Downloading {len(urls)} file(s) with {threads} threads to {dir}...")
         with ThreadPool(threads) as pool:
             pool.map(
                 lambda x: safe_download(
@@ -512,12 +514,12 @@ def download(
                     curl=curl,
                     retry=retry,
                     exist_ok=exist_ok,
-                    progress=threads <= 1,
+                    progress=True,
                 ),
-                zip(url, repeat(dir)),
+                zip(urls, repeat(dir)),
             )
             pool.close()
             pool.join()
     else:
-        for u in [url] if isinstance(url, (str, Path)) else url:
+        for u in urls:
             safe_download(url=u, dir=dir, unzip=unzip, delete=delete, curl=curl, retry=retry, exist_ok=exist_ok)

{dgenerate_ultralytics_headless-8.3.178.dist-info → dgenerate_ultralytics_headless-8.3.180.dist-info}/WHEEL RENAMED Viewed

File without changes

{dgenerate_ultralytics_headless-8.3.178.dist-info → dgenerate_ultralytics_headless-8.3.180.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{dgenerate_ultralytics_headless-8.3.178.dist-info → dgenerate_ultralytics_headless-8.3.180.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dgenerate_ultralytics_headless-8.3.178.dist-info → dgenerate_ultralytics_headless-8.3.180.dist-info}/top_level.txt RENAMED Viewed

File without changes

dgenerate-ultralytics-headless 8.3.178__py3-none-any.whl → 8.3.180__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.178py3-none-any.whl → 8.3.180py3-none-any.whl