PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.135__py3-none-any.whl → 8.3.137__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.135py3-none-any.whl → 8.3.137py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{dgenerate_ultralytics_headless-8.3.135.dist-info → dgenerate_ultralytics_headless-8.3.137.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dgenerate-ultralytics-headless
-Version: 8.3.135
+Version: 8.3.137
 Summary: Automatically built Ultralytics package with python-opencv-headless dependency instead of python-opencv
 Author-email: Glenn Jocher <glenn.jocher@ultralytics.com>, Jing Qiu <jing.qiu@ultralytics.com>
 Maintainer-email: Ultralytics <hello@ultralytics.com>
@@ -46,7 +46,6 @@ Requires-Dist: tqdm>=4.64.0
 Requires-Dist: psutil
 Requires-Dist: py-cpuinfo
 Requires-Dist: pandas>=1.1.4
-Requires-Dist: seaborn>=0.11.0
 Requires-Dist: ultralytics-thop>=2.0.0
 Provides-Extra: dev
 Requires-Dist: ipython; extra == "dev"

{dgenerate_ultralytics_headless-8.3.135.dist-info → dgenerate_ultralytics_headless-8.3.137.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
-dgenerate_ultralytics_headless-8.3.135.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
+dgenerate_ultralytics_headless-8.3.137.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
 tests/__init__.py,sha256=xnMhv3O_DF1YrW4zk__ZywQzAaoTDjPKPoiI1Ktss1w,670
 tests/conftest.py,sha256=rsIAipRKfrVNoTaJ1LdpYue8AbcJ_fr3d3WIlM_6uXY,2982
 tests/test_cli.py,sha256=vXUC_EK0fa87JRhHsCOZf7AJQ5_Jm1sL8u-yhmsaQh0,5851
-tests/test_cuda.py,sha256=eKwaqLxWTRRYNROnkH24Ch-HmxTRKQLSIxbMYFYq_p0,8123
+tests/test_cuda.py,sha256=L_2xp2TH-pInsdI8UrbZ5onRtHQGdUVoPXnyX6Ot4_U,7950
 tests/test_engine.py,sha256=aGqZ8P7QO5C_nOa1b4FOyk92Ysdk5WiP-ST310Vyxys,4962
-tests/test_exports.py,sha256=UeeBloqYYGZNh520R3CR80XBxA9XFrNmbK9An6V6C4w,9838
+tests/test_exports.py,sha256=dhZn86LdbapW15RthQF870LGxDjC1MUZhlGdBgPmgIQ,9716
 tests/test_integrations.py,sha256=dQteeRsRVuT_p5-T88-7jqT65Zm9iAXkyKg-KQ1_TQ8,6341
 tests/test_python.py,sha256=KWsncKpeDdRmjRftmJpsMl7bBLI3TG_I7Lb4kuemZzQ,25618
 tests/test_solutions.py,sha256=IFlqyOUCvGbLe_YZqWmNCe_afg4as0p-SfAv3j7VURI,6205
-ultralytics/__init__.py,sha256=7IMXy8Z7sekeQRLOVZyuYbA-1kse0gieArFyUxQ9dyE,730
+ultralytics/__init__.py,sha256=8hzZtbr1IMQwOTdqbcNED-RHZiqww--zXivCgQOzujQ,730
 ultralytics/assets/bus.jpg,sha256=wCAZxJecGR63Od3ZRERe9Aja1Weayrb9Ug751DS_vGM,137419
 ultralytics/assets/zidane.jpg,sha256=Ftc4aeMmen1O0A3o6GCDO9FlfBslLpTAw0gnetx7bts,50427
-ultralytics/cfg/__init__.py,sha256=p1dKUDoVsnjJG8Qj5Q-eukb0WH2IoWV3BcJpEmKu2tE,39487
+ultralytics/cfg/__init__.py,sha256=h0UVCvX6DIpoR4_pthpZD_Ihq7eCaS8HbXsPOm82G0E,39540
 ultralytics/cfg/default.yaml,sha256=oFG6llJO-Py5H-cR9qs-7FieJamroDLwpbrkhmfROOM,8307
 ultralytics/cfg/datasets/Argoverse.yaml,sha256=_xlEDIJ9XkUo0v_iNL7FW079BoSeZtKSuLteKTtGbA8,3275
 ultralytics/cfg/datasets/DOTAv1.5.yaml,sha256=SHND_CFkojxw5iQD5Mcgju2kCZIl0gW2ajuzv1cqoL0,1224
@@ -119,7 +119,7 @@ ultralytics/data/scripts/get_coco.sh,sha256=UuJpJeo3qQpTHVINeOpmP0NYmg8PhEFE3A8J
 ultralytics/data/scripts/get_coco128.sh,sha256=qmRQl_hOKrsdHrTrnyQuFIH01oDz3lfaz138OgGfLt8,650
 ultralytics/data/scripts/get_imagenet.sh,sha256=hr42H16bM47iT27rgS7MpEo-GeOZAYUQXgr0B2cwn48,1705
 ultralytics/engine/__init__.py,sha256=lm6MckFYCPTbqIoX7w0s_daxdjNeBeKW6DXppv1-QUM,70
-ultralytics/engine/exporter.py,sha256=tXqZlcOZnDqtK7A0nwago7FfDAb3ftnYui-VeOFzVs0,70823
+ultralytics/engine/exporter.py,sha256=JucFVR_RAfzrRWM9kJK6MHALEbdzrf93ReTnAhiRTBo,70823
 ultralytics/engine/model.py,sha256=fWhPNWUQzjjWfTEXzTaqSSearV4THRkEa_fl4dDvzWw,52930
 ultralytics/engine/predictor.py,sha256=AwKpOGY2G-thNNiRw4Kf_MBLamq5tbRhXLNSMRArqFo,21803
 ultralytics/engine/results.py,sha256=MhbyMCwgslmtV53fqii4UJUaLQ4gKTKdkXi7vvmJDAE,79628
@@ -186,17 +186,17 @@ ultralytics/models/yolo/segment/predict.py,sha256=mIC3aHI7Jg4dU1k2UZnjVj4unE-5TW
 ultralytics/models/yolo/segment/train.py,sha256=EIyIAjYp127Mb-DomyjPORaONu57OY_gOTK9p2MwW6E,5359
 ultralytics/models/yolo/segment/val.py,sha256=cXJM1JNuzDraU0SJQRIdzNxabd0bfcxiRE8wozHZChY,18415
 ultralytics/models/yolo/world/__init__.py,sha256=nlh8I6t8hMGz_vZg8QSlsUW1R-2eKvn9CGUoPPQEGhA,131
-ultralytics/models/yolo/world/train.py,sha256=HUJ0XiJIGx_FA9kqNYnSFsaKWMiZUDxgkpfGoBH6UNc,4896
-ultralytics/models/yolo/world/train_world.py,sha256=DSa-t9jDbtwF43SJlvtESh1Ux7M77zo9f945eR2D-5w,8363
+ultralytics/models/yolo/world/train.py,sha256=4e54RghcrpdtpxG3n2Nicwo-tcj-wI4nLcUo8_4cf30,6898
+ultralytics/models/yolo/world/train_world.py,sha256=fFhhI-toaEy1_-XcPM1_mF395WRQ26gZ4UxqyUAZmWw,8461
 ultralytics/models/yolo/yoloe/__init__.py,sha256=6SLytdJtwu37qewf7CobG7C7Wl1m-xtNdvCXEasfPDE,760
 ultralytics/models/yolo/yoloe/predict.py,sha256=N0oYcr_mdw8wyUAWprAwJhrA0r23BaTeYXEjw2e8_mI,6993
-ultralytics/models/yolo/yoloe/train.py,sha256=St3zw_XWRol9pODWU4lvKlJnWYr1lmWQNuhLFwWMge4,12989
+ultralytics/models/yolo/yoloe/train.py,sha256=xRPDJ3nUWxtqjESfmUtsZslVhpgzrZRw8z_QU5hV6nc,11710
 ultralytics/models/yolo/yoloe/train_seg.py,sha256=BYFBd04k5WQaJPcFbCvVIbEf2IOQyW8_sGeoVT_74j0,4632
 ultralytics/models/yolo/yoloe/val.py,sha256=oA8cVT3pBXF6aPZy7ITq0mDcktRuIgks8tTtqMRISyY,8431
 ultralytics/nn/__init__.py,sha256=rjociYD9lo_K-d-1s6TbdWklPLjTcEHk7OIlRDJstIE,615
 ultralytics/nn/autobackend.py,sha256=X2cxCytBu9fmniy8uJ5aZb28IukQ-uxV1INXeS1lclA,39368
-ultralytics/nn/tasks.py,sha256=o7QZvlZyvmECxkITJjtDCPf-hAxXcZOLXP7PKtegOPQ,63594
-ultralytics/nn/text_model.py,sha256=8_7SRejKZA4Pi-ha0gjcWrQDDCDMBhtwlg8pPMWgjDE,13145
+ultralytics/nn/tasks.py,sha256=iJWpwRr4yZg1dTT-9jXuzIqkdFmbZm1b7hejnO-CiZk,64337
+ultralytics/nn/text_model.py,sha256=wr5yPRbMqtSr2N5Rzdd0vuv9PcQe8qw4uO596ZHZVGU,13236
 ultralytics/nn/modules/__init__.py,sha256=dXLtIk9rt944WfsTdpgEdWOg3HQEHdwQztuZ6WNJygs,3144
 ultralytics/nn/modules/activation.py,sha256=PvXZkA9AzEntR575JkFORdmtcRwATyy0lje-uHA5_8w,2210
 ultralytics/nn/modules/block.py,sha256=yd6Ao9T2UJNAWc8oB1-CSxyF6-exqbFcN3hTWUZNU3M,66701
@@ -238,7 +238,7 @@ ultralytics/utils/__init__.py,sha256=vac0M-Hx55QXl6Vod3QPjnLBlt87Hwxu1784RXPmeQA
 ultralytics/utils/autobatch.py,sha256=kg05q2qKg74y_Uq2vvr01i3KhLfpVR7sT0IXBt3_kyI,4921
 ultralytics/utils/autodevice.py,sha256=OKZfTbswg6SlsYGCGMqROkA-451CXGG47oeyC5Q1kFM,7232
 ultralytics/utils/benchmarks.py,sha256=lDNNnLeLUzmqKrqrqlCOiau-q7A-gcLooZP2dbxCu-U,30214
-ultralytics/utils/checks.py,sha256=L5G8CiQo8v2842KLGOaLG5y_AYRoa5gxCdtTt48LnS0,33129
+ultralytics/utils/checks.py,sha256=TGhnnNVT3NEBhSeckWIe1rGlXUyYI3xhFqK6CR0oBiE,33192
 ultralytics/utils/dist.py,sha256=aytW0JEkcA5ZTZucV92ot7Bn-apiej8aLk3QNWicjAc,4103
 ultralytics/utils/downloads.py,sha256=Rn8xDwn2bzgBqiYz3Xn0rm3MWjk4T-QUd2Ajlu1EpQ4,22312
 ultralytics/utils/errors.py,sha256=vY9h2evFSrHnZdHJVVrmm8Zzw4qVDLyo9DeYW5g0dFk,1573
@@ -246,10 +246,10 @@ ultralytics/utils/export.py,sha256=XInnl9AQeik7EuR1492nzDvgDqaV43FlnM5CLamrgd4,8
 ultralytics/utils/files.py,sha256=0K4O1cgqRiXaDw7EQK13TqA5SME_RrvfDVQSPetNr5w,8042
 ultralytics/utils/instance.py,sha256=UOEsXR9V-bXNRk6BTonASBEgeMqvzzAk4S7VdXZJUAM,18090
 ultralytics/utils/loss.py,sha256=Woc_rj7ptCyezHdylEygXMeSEgivYu_B9jJHD4UwxWE,37607
-ultralytics/utils/metrics.py,sha256=pWNq-66VqkMjj05Gqkm8ddoElDK72q_U9cl8y-aEN6k,53963
+ultralytics/utils/metrics.py,sha256=n8guPEADBMRNpeXNShEX-fxVv9xck8S4QaOIiaW_kl0,56037
 ultralytics/utils/ops.py,sha256=YFwPrKlPcgEmgAWqnJVR0Ccx5NQgp5e3P-YYHwVSP0k,34779
 ultralytics/utils/patches.py,sha256=_dhIU_eDklQE-aWIjpyjPHl_wOwZoGuIUQnXgdSwk_A,5020
-ultralytics/utils/plotting.py,sha256=m9Hsbt6U073jAiztX6clpd9KzznW62oHxCWlBcm0T-s,46920
+ultralytics/utils/plotting.py,sha256=GKic2OMavjJPT3pOPdU0UcvQTrG1LVt0vHJM-Zuy9Bs,47217
 ultralytics/utils/tal.py,sha256=P5nPoR9qNnFuDIda0fsn8WP6m1V8r7EbvXUuhNRFFTA,20805
 ultralytics/utils/torch_utils.py,sha256=2SJxxg8Qr0YqOoQ-8qAYn6VrzZdQMObqiw3CJZ-rAY0,39611
 ultralytics/utils/triton.py,sha256=xK9Db_ZUVDnIK1u76S2G-6ulIBsLfj9HN_YOaSrnMuU,5304
@@ -265,8 +265,8 @@ ultralytics/utils/callbacks/neptune.py,sha256=yYUgEgSv6L39sSev6vjwhAWU3DlPDsbSDV
 ultralytics/utils/callbacks/raytune.py,sha256=A8amUGpux7dYES-L1iSeMoMXBySGWCD1aUqT7vcG-pU,1284
 ultralytics/utils/callbacks/tensorboard.py,sha256=jgYnym3cUQFAgN1GzTyO7l3jINtfAh8zhrllDvnLuVQ,5339
 ultralytics/utils/callbacks/wb.py,sha256=iDRFXI4IIDm8R5OI89DMTmjs8aHLo1HRCLkOFKdaMG4,7507
-dgenerate_ultralytics_headless-8.3.135.dist-info/METADATA,sha256=8_HSDModHJ24S-bmagyx903_49yFcEJGC63r5nON6g4,38327
-dgenerate_ultralytics_headless-8.3.135.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
-dgenerate_ultralytics_headless-8.3.135.dist-info/entry_points.txt,sha256=YM_wiKyTe9yRrsEfqvYolNO5ngwfoL4-NwgKzc8_7sI,93
-dgenerate_ultralytics_headless-8.3.135.dist-info/top_level.txt,sha256=XP49TwiMw4QGsvTLSYiJhz1xF_k7ev5mQ8jJXaXi45Q,12
-dgenerate_ultralytics_headless-8.3.135.dist-info/RECORD,,
+dgenerate_ultralytics_headless-8.3.137.dist-info/METADATA,sha256=8ui4ivOJaSEgzcD9bZTlWkJ3-Q_44TdABJCrfpEeLRM,38296
+dgenerate_ultralytics_headless-8.3.137.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
+dgenerate_ultralytics_headless-8.3.137.dist-info/entry_points.txt,sha256=YM_wiKyTe9yRrsEfqvYolNO5ngwfoL4-NwgKzc8_7sI,93
+dgenerate_ultralytics_headless-8.3.137.dist-info/top_level.txt,sha256=XP49TwiMw4QGsvTLSYiJhz1xF_k7ev5mQ8jJXaXi45Q,12
+dgenerate_ultralytics_headless-8.3.137.dist-info/RECORD,,

tests/test_cuda.py CHANGED Viewed

@@ -41,7 +41,7 @@ def test_amp():
 @pytest.mark.slow
-# @pytest.mark.skipif(IS_JETSON, reason="Temporary disable ONNX for Jetson")
+@pytest.mark.skipif(IS_JETSON, reason="Temporary disable ONNX for Jetson")
 @pytest.mark.skipif(not DEVICES, reason="No CUDA devices available")
 @pytest.mark.parametrize(
     "task, dynamic, int8, half, batch, simplify, nms",
@@ -50,12 +50,7 @@ def test_amp():
         for task, dynamic, int8, half, batch, simplify, nms in product(
             TASKS, [True, False], [False], [False], [1, 2], [True, False], [True, False]
         )
-        if not (
-            (int8 and half)
-            or (task == "classify" and nms)
-            or (task == "obb" and nms and (not TORCH_1_13 or IS_JETSON))  # obb nms fails on NVIDIA Jetson
-            or (simplify and dynamic)  # onnxslim is slow when dynamic=True
-        )
+        if not ((int8 and half) or (task == "classify" and nms) or (task == "obb" and nms and not TORCH_1_13))
     ],
 )
 def test_export_onnx_matrix(task, dynamic, int8, half, batch, simplify, nms):

tests/test_exports.py CHANGED Viewed

@@ -83,12 +83,7 @@ def test_export_openvino_matrix(task, dynamic, int8, half, batch, nms):
         for task, dynamic, int8, half, batch, simplify, nms in product(
             TASKS, [True, False], [False], [False], [1, 2], [True, False], [True, False]
         )
-        if not (
-            (int8 and half)
-            or (task == "classify" and nms)
-            or (task == "obb" and nms and not TORCH_1_13)
-            or (simplify and dynamic)  # onnxslim is slow when dynamic=True
-        )
+        if not ((int8 and half) or (task == "classify" and nms) or (task == "obb" and nms and not TORCH_1_13))
     ],
 )
 def test_export_onnx_matrix(task, dynamic, int8, half, batch, simplify, nms):

ultralytics/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
-__version__ = "8.3.135"
+__version__ = "8.3.137"
 import os

ultralytics/cfg/__init__.py CHANGED Viewed

@@ -7,8 +7,6 @@ from pathlib import Path
 from types import SimpleNamespace
 from typing import Any, Dict, List, Union
-import cv2
 from ultralytics import __version__
 from ultralytics.utils import (
     ASSETS,
@@ -707,6 +705,8 @@ def handle_yolo_solutions(args: List[str]) -> None:
             ]
         )
     else:
+        import cv2  # Only needed for cap and vw functionality
         from ultralytics import solutions
         solution = getattr(solutions, SOLUTION_MAP[solution_name])(is_cli=True, **overrides)  # class i.e ObjectCounter
@@ -919,7 +919,7 @@ def entrypoint(debug: str = "") -> None:
         if task not in TASKS:
             if task == "track":
                 LOGGER.warning(
-                    "invalid 'task=track', setting 'task=detect' and 'mode=track'. Valid tasks are {TASKS}.\n{CLI_HELP_MSG}."
+                    f"invalid 'task=track', setting 'task=detect' and 'mode=track'. Valid tasks are {TASKS}.\n{CLI_HELP_MSG}."
                 )
                 task, mode = "detect", "track"
             else:

ultralytics/engine/exporter.py CHANGED Viewed

@@ -557,7 +557,7 @@ class Exporter:
         """YOLO ONNX export."""
         requirements = ["onnx>=1.12.0,<1.18.0"]
         if self.args.simplify:
-            requirements += ["onnxslim>=0.1.46", "onnxruntime" + ("-gpu" if torch.cuda.is_available() else "")]
+            requirements += ["onnxslim>=0.1.53", "onnxruntime" + ("-gpu" if torch.cuda.is_available() else "")]
         check_requirements(requirements)
         import onnx  # noqa
@@ -928,7 +928,7 @@ class Exporter:
                 "ai-edge-litert>=1.2.0",  # required by 'onnx2tf' package
                 "onnx>=1.12.0,<1.18.0",
                 "onnx2tf>=1.26.3",
-                "onnxslim>=0.1.46",
+                "onnxslim>=0.1.53",
                 "onnxruntime-gpu" if cuda else "onnxruntime",
                 "protobuf>=5",
             ),

ultralytics/models/yolo/world/train.py CHANGED Viewed

@@ -1,11 +1,14 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 import itertools
+from pathlib import Path
+import torch
 from ultralytics.data import build_yolo_dataset
-from ultralytics.models import yolo
+from ultralytics.models.yolo.detect import DetectionTrainer
 from ultralytics.nn.tasks import WorldModel
-from ultralytics.utils import DEFAULT_CFG, RANK, checks
+from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
 from ultralytics.utils.torch_utils import de_parallel
@@ -15,13 +18,9 @@ def on_pretrain_routine_end(trainer):
         # Set class names for evaluation
         names = [name.split("/")[0] for name in list(trainer.test_loader.dataset.data["names"].values())]
         de_parallel(trainer.ema.ema).set_classes(names, cache_clip_model=False)
-    device = next(trainer.model.parameters()).device
-    trainer.text_model, _ = trainer.clip.load("ViT-B/32", device=device)
-    for p in trainer.text_model.parameters():
-        p.requires_grad_(False)
-class WorldTrainer(yolo.detect.DetectionTrainer):
+class WorldTrainer(DetectionTrainer):
     """
     A class to fine-tune a world model on a close-set dataset.
@@ -54,14 +53,7 @@ class WorldTrainer(yolo.detect.DetectionTrainer):
         if overrides is None:
             overrides = {}
         super().__init__(cfg, overrides, _callbacks)
-        # Import and assign clip
-        try:
-            import clip
-        except ImportError:
-            checks.check_requirements("git+https://github.com/ultralytics/CLIP.git")
-            import clip
-        self.clip = clip
+        self.text_embeddings = None
     def get_model(self, cfg=None, weights=None, verbose=True):
         """
@@ -102,18 +94,72 @@ class WorldTrainer(yolo.detect.DetectionTrainer):
             (Dataset): YOLO dataset configured for training or validation.
         """
         gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
-        return build_yolo_dataset(
+        dataset = build_yolo_dataset(
             self.args, img_path, batch, self.data, mode=mode, rect=mode == "val", stride=gs, multi_modal=mode == "train"
         )
+        if mode == "train":
+            self.set_text_embeddings([dataset], batch)  # cache text embeddings to accelerate training
+        return dataset
+    def set_text_embeddings(self, datasets, batch):
+        """
+        Set text embeddings for datasets to accelerate training by caching category names.
+        This method collects unique category names from all datasets, then generates and caches text embeddings
+        for these categories to improve training efficiency.
+        Args:
+            datasets (List[Dataset]): List of datasets from which to extract category names.
+            batch (int | None): Batch size used for processing.
+        Notes:
+            This method collects category names from datasets that have the 'category_names' attribute,
+            then uses the first dataset's image path to determine where to cache the generated text embeddings.
+        """
+        text_embeddings = {}
+        for dataset in datasets:
+            if not hasattr(dataset, "category_names"):
+                continue
+            text_embeddings.update(
+                self.generate_text_embeddings(
+                    list(dataset.category_names), batch, cache_dir=Path(dataset.img_path).parent
+                )
+            )
+        self.text_embeddings = text_embeddings
+    def generate_text_embeddings(self, texts, batch, cache_dir):
+        """
+        Generate text embeddings for a list of text samples.
+        Args:
+            texts (List[str]): List of text samples to encode.
+            batch (int): Batch size for processing.
+            cache_dir (Path): Directory to save/load cached embeddings.
+        Returns:
+            (dict): Dictionary mapping text samples to their embeddings.
+        """
+        model = "clip:ViT-B/32"
+        cache_path = cache_dir / f"text_embeddings_{model.replace(':', '_').replace('/', '_')}.pt"
+        if cache_path.exists():
+            LOGGER.info(f"Reading existed cache from '{cache_path}'")
+            txt_map = torch.load(cache_path)
+            if sorted(txt_map.keys()) == sorted(texts):
+                return txt_map
+        LOGGER.info(f"Caching text embeddings to '{cache_path}'")
+        assert self.model is not None
+        txt_feats = self.model.get_text_pe(texts, batch, cache_clip_model=False)
+        txt_map = dict(zip(texts, txt_feats.squeeze(0)))
+        torch.save(txt_map, cache_path)
+        return txt_map
     def preprocess_batch(self, batch):
         """Preprocess a batch of images and text for YOLOWorld training."""
-        batch = super().preprocess_batch(batch)
+        batch = DetectionTrainer.preprocess_batch(self, batch)
         # Add text features
         texts = list(itertools.chain(*batch["texts"]))
-        text_token = self.clip.tokenize(texts).to(batch["img"].device)
-        txt_feats = self.text_model.encode_text(text_token).to(dtype=batch["img"].dtype)  # torch.float32
+        txt_feats = torch.stack([self.text_embeddings[text] for text in texts]).to(self.device)
         txt_feats = txt_feats / txt_feats.norm(p=2, dim=-1, keepdim=True)
         batch["txt_feats"] = txt_feats.reshape(len(batch["texts"]), -1, txt_feats.shape[-1])
         return batch

ultralytics/models/yolo/world/train_world.py CHANGED Viewed

@@ -100,6 +100,7 @@ class WorldTrainerFromScratch(WorldTrainer):
             else build_grounding(self.args, im_path["img_path"], im_path["json_file"], batch, stride=gs)
             for im_path in img_path
         ]
+        self.set_text_embeddings(datasets, batch)  # cache text embeddings to accelerate training
         return YOLOConcatDataset(datasets) if len(datasets) > 1 else datasets[0]
     def get_dataset(self):

ultralytics/models/yolo/yoloe/train.py CHANGED Viewed

@@ -2,7 +2,6 @@
 import itertools
 from copy import copy, deepcopy
-from pathlib import Path
 import torch
@@ -157,40 +156,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
         Returns:
             (YOLOConcatDataset | Dataset): The constructed dataset for training or validation.
         """
-        datasets = WorldTrainerFromScratch.build_dataset(self, img_path, mode, batch)
-        if mode == "train":
-            self.set_text_embeddings(
-                datasets.datasets if hasattr(datasets, "datasets") else [datasets], batch
-            )  # cache text embeddings to accelerate training
-        return datasets
-    def set_text_embeddings(self, datasets, batch):
-        """
-        Set text embeddings for datasets to accelerate training by caching category names.
-        This method collects unique category names from all datasets, then generates and caches text embeddings
-        for these categories to improve training efficiency.
-        Args:
-            datasets (List[Dataset]): List of datasets from which to extract category names.
-            batch (int | None): Batch size used for processing.
-        Notes:
-            This method collects category names from datasets that have the 'category_names' attribute,
-            then uses the first dataset's image path to determine where to cache the generated text embeddings.
-        """
-        # TODO: open up an interface to determine whether to do cache
-        category_names = set()
-        for dataset in datasets:
-            if not hasattr(dataset, "category_names"):
-                continue
-            category_names |= dataset.category_names
-        # TODO: enable to update the path or use a more general way to get the path
-        img_path = datasets[0].img_path
-        self.text_embeddings = self.generate_text_embeddings(
-            category_names, batch, cache_path=Path(img_path).parent / "text_embeddings.pt"
-        )
+        return WorldTrainerFromScratch.build_dataset(self, img_path, mode, batch)
     def preprocess_batch(self, batch):
         """Process batch for training, moving text features to the appropriate device."""
@@ -202,23 +168,28 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
         batch["txt_feats"] = txt_feats
         return batch
-    def generate_text_embeddings(self, texts, batch, cache_path="embeddings.pt"):
+    def generate_text_embeddings(self, texts, batch, cache_dir):
         """
         Generate text embeddings for a list of text samples.
         Args:
             texts (List[str]): List of text samples to encode.
             batch (int): Batch size for processing.
-            cache_path (str | Path): Path to save/load cached embeddings.
+            cache_dir (Path): Directory to save/load cached embeddings.
         Returns:
             (dict): Dictionary mapping text samples to their embeddings.
         """
+        model = "mobileclip:blt"
+        cache_path = cache_dir / f"text_embeddings_{model.replace(':', '_').replace('/', '_')}.pt"
         if cache_path.exists():
             LOGGER.info(f"Reading existed cache from '{cache_path}'")
-            return torch.load(cache_path)
+            txt_map = torch.load(cache_path)
+            if sorted(txt_map.keys()) == sorted(texts):
+                return txt_map
+        LOGGER.info(f"Caching text embeddings to '{cache_path}'")
         assert self.model is not None
-        txt_feats = self.model.get_text_pe(texts, batch, without_reprta=True)
+        txt_feats = self.model.get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
         txt_map = dict(zip(texts, txt_feats.squeeze(0)))
         torch.save(txt_map, cache_path)
         return txt_map

ultralytics/nn/tasks.py CHANGED Viewed

@@ -146,6 +146,8 @@ class BaseModel(torch.nn.Module):
             (torch.Tensor): The last output of the model.
         """
         y, dt, embeddings = [], [], []  # outputs
+        embed = frozenset(embed) if embed is not None else {-1}
+        max_idx = max(embed)
         for m in self.model:
             if m.f != -1:  # if not from previous layer
                 x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
@@ -155,9 +157,9 @@ class BaseModel(torch.nn.Module):
             y.append(x if m.i in self.save else None)  # save output
             if visualize:
                 feature_visualization(x, m.type, m.i, save_dir=visualize)
-            if embed and m.i in embed:
+            if m.i in embed:
                 embeddings.append(torch.nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1))  # flatten
-                if m.i == max(embed):
+                if m.i == max_idx:
                     return torch.unbind(torch.cat(embeddings, 1), dim=0)
         return x
@@ -677,6 +679,8 @@ class RTDETRDetectionModel(DetectionModel):
             (torch.Tensor): Model's output tensor.
         """
         y, dt, embeddings = [], [], []  # outputs
+        embed = frozenset(embed) if embed is not None else {-1}
+        max_idx = max(embed)
         for m in self.model[:-1]:  # except the head part
             if m.f != -1:  # if not from previous layer
                 x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
@@ -686,9 +690,9 @@ class RTDETRDetectionModel(DetectionModel):
             y.append(x if m.i in self.save else None)  # save output
             if visualize:
                 feature_visualization(x, m.type, m.i, save_dir=visualize)
-            if embed and m.i in embed:
+            if m.i in embed:
                 embeddings.append(torch.nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1))  # flatten
-                if m.i == max(embed):
+                if m.i == max_idx:
                     return torch.unbind(torch.cat(embeddings, 1), dim=0)
         head = self.model[-1]
         x = head([y[j] for j in head.f], batch)  # head inference
@@ -721,24 +725,33 @@ class WorldModel(DetectionModel):
             batch (int): Batch size for processing text tokens.
             cache_clip_model (bool): Whether to cache the CLIP model.
         """
-        try:
-            import clip
-        except ImportError:
-            check_requirements("git+https://github.com/ultralytics/CLIP.git")
-            import clip
-        if (
-            not getattr(self, "clip_model", None) and cache_clip_model
-        ):  # for backwards compatibility of models lacking clip_model attribute
-            self.clip_model = clip.load("ViT-B/32")[0]
-        model = self.clip_model if cache_clip_model else clip.load("ViT-B/32")[0]
-        device = next(model.parameters()).device
-        text_token = clip.tokenize(text).to(device)
+        self.txt_feats = self.get_text_pe(text, batch=batch, cache_clip_model=cache_clip_model)
+        self.model[-1].nc = len(text)
+    @smart_inference_mode()
+    def get_text_pe(self, text, batch=80, cache_clip_model=True):
+        """
+        Set classes in advance so that model could do offline-inference without clip model.
+        Args:
+            text (List[str]): List of class names.
+            batch (int): Batch size for processing text tokens.
+            cache_clip_model (bool): Whether to cache the CLIP model.
+        Returns:
+            (torch.Tensor): Text positional embeddings.
+        """
+        from ultralytics.nn.text_model import build_text_model
+        device = next(self.model.parameters()).device
+        if not getattr(self, "clip_model", None) and cache_clip_model:
+            # For backwards compatibility of models lacking clip_model attribute
+            self.clip_model = build_text_model("clip:ViT-B/32", device=device)
+        model = self.clip_model if cache_clip_model else build_text_model("clip:ViT-B/32", device=device)
+        text_token = model.tokenize(text)
         txt_feats = [model.encode_text(token).detach() for token in text_token.split(batch)]
         txt_feats = txt_feats[0] if len(txt_feats) == 1 else torch.cat(txt_feats, dim=0)
-        txt_feats = txt_feats / txt_feats.norm(p=2, dim=-1, keepdim=True)
-        self.txt_feats = txt_feats.reshape(-1, len(text), txt_feats.shape[-1])
-        self.model[-1].nc = len(text)
+        return txt_feats.reshape(-1, len(text), txt_feats.shape[-1])
     def predict(self, x, profile=False, visualize=False, txt_feats=None, augment=False, embed=None):
         """
@@ -760,6 +773,8 @@ class WorldModel(DetectionModel):
             txt_feats = txt_feats.expand(x.shape[0], -1, -1)
         ori_txt_feats = txt_feats.clone()
         y, dt, embeddings = [], [], []  # outputs
+        embed = frozenset(embed) if embed is not None else {-1}
+        max_idx = max(embed)
         for m in self.model:  # except the head part
             if m.f != -1:  # if not from previous layer
                 x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
@@ -777,9 +792,9 @@ class WorldModel(DetectionModel):
             y.append(x if m.i in self.save else None)  # save output
             if visualize:
                 feature_visualization(x, m.type, m.i, save_dir=visualize)
-            if embed and m.i in embed:
+            if m.i in embed:
                 embeddings.append(torch.nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1))  # flatten
-                if m.i == max(embed):
+                if m.i == max_idx:
                     return torch.unbind(torch.cat(embeddings, 1), dim=0)
         return x
@@ -976,6 +991,8 @@ class YOLOEModel(DetectionModel):
         """
         y, dt, embeddings = [], [], []  # outputs
         b = x.shape[0]
+        embed = frozenset(embed) if embed is not None else {-1}
+        max_idx = max(embed)
         for m in self.model:  # except the head part
             if m.f != -1:  # if not from previous layer
                 x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
@@ -997,9 +1014,9 @@ class YOLOEModel(DetectionModel):
             y.append(x if m.i in self.save else None)  # save output
             if visualize:
                 feature_visualization(x, m.type, m.i, save_dir=visualize)
-            if embed and m.i in embed:
+            if m.i in embed:
                 embeddings.append(torch.nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1))  # flatten
-                if m.i == max(embed):
+                if m.i == max_idx:
                     return torch.unbind(torch.cat(embeddings, 1), dim=0)
         return x

ultralytics/nn/text_model.py CHANGED Viewed

@@ -324,6 +324,7 @@ class MobileCLIPTS(TextModel):
             >>> features.shape
             torch.Size([2, 512])  # Actual dimension depends on model size
         """
+        # NOTE: no need to do normalization here as it's embedded in the torchscript model
         return self.encoder(texts)

ultralytics/utils/checks.py CHANGED Viewed

@@ -80,6 +80,7 @@ def parse_requirements(file_path=ROOT.parent / "requirements.txt", package=""):
     return requirements
+@functools.lru_cache
 def parse_version(version="0.0.0") -> tuple:
     """
     Convert a version string to a tuple of integers, ignoring any extra non-numeric string attached to the version.
@@ -164,6 +165,7 @@ def check_imgsz(imgsz, stride=32, min_dim=1, max_dim=2, floor=0):
     return sz
+@functools.lru_cache
 def check_version(
     current: str = "0.0.0",
     required: str = "0.0.0",
@@ -580,6 +582,7 @@ def check_is_path_safe(basedir, path):
     return path_resolved.exists() and path_resolved.parts[: len(base_dir_resolved.parts)] == base_dir_resolved.parts
+@functools.lru_cache
 def check_imshow(warn=False):
     """
     Check if environment supports image displays.

ultralytics/utils/metrics.py CHANGED Viewed

@@ -409,7 +409,7 @@ class ConfusionMatrix:
     @plt_settings()
     def plot(self, normalize=True, save_dir="", names=(), on_plot=None):
         """
-        Plot the confusion matrix using seaborn and save it to a file.
+        Plot the confusion matrix using matplotlib and save it to a file.
         Args:
             normalize (bool): Whether to normalize the confusion matrix.
@@ -418,34 +418,63 @@ class ConfusionMatrix:
             on_plot (func): An optional callback to pass plots path and data when they are rendered.
         """
         import matplotlib.pyplot as plt  # scope for faster 'import ultralytics'
-        import seaborn
         array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1e-9) if normalize else 1)  # normalize columns
         array[array < 0.005] = np.nan  # don't annotate (would appear as 0.00)
-        fig, ax = plt.subplots(1, 1, figsize=(12, 9), tight_layout=True)
-        nc, nn = self.nc, len(names)  # number of classes, names
-        seaborn.set_theme(font_scale=1.0 if nc < 50 else 0.8)  # for label size
-        labels = (0 < nn < 99) and (nn == nc)  # apply names to ticklabels
-        ticklabels = (list(names) + ["background"]) if labels else "auto"
+        names = list(names)
+        fig, ax = plt.subplots(1, 1, figsize=(12, 9))
+        if self.nc >= 100:  # downsample for large class count
+            k = max(2, self.nc // 60)  # step size for downsampling, always > 1
+            keep_idx = slice(None, None, k)  # create slice instead of array
+            names = names[keep_idx]  # slice class names
+            array = array[keep_idx, :][:, keep_idx]  # slice matrix rows and cols
+            n = (self.nc + k - 1) // k  # number of retained classes
+            nc = nn = n if self.task == "classify" else n + 1  # adjust for background if needed
+        else:
+            nc = nn = self.nc if self.task == "classify" else self.nc + 1
+        ticklabels = (names + ["background"]) if (0 < nn < 99) and (nn == nc) else "auto"
+        xy_ticks = np.arange(len(ticklabels))
+        tick_fontsize = max(6, 15 - 0.1 * nc)  # Minimum size is 6
+        label_fontsize = max(6, 12 - 0.1 * nc)
+        title_fontsize = max(6, 12 - 0.1 * nc)
+        btm = max(0.1, 0.25 - 0.001 * nc)  # Minimum value is 0.1
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")  # suppress empty matrix RuntimeWarning: All-NaN slice encountered
-            seaborn.heatmap(
-                array,
-                ax=ax,
-                annot=nc < 30,
-                annot_kws={"size": 8},
-                cmap="Blues",
-                fmt=".2f" if normalize else ".0f",
-                square=True,
-                vmin=0.0,
-                xticklabels=ticklabels,
-                yticklabels=ticklabels,
-            ).set_facecolor((1, 1, 1))
+            im = ax.imshow(array, cmap="Blues", vmin=0.0, interpolation="none")
+            ax.xaxis.set_label_position("bottom")
+            if nc < 30:  # Add score for each cell of confusion matrix
+                for i, row in enumerate(array[:nc]):
+                    for j, val in enumerate(row[:nc]):
+                        val = array[i, j]
+                        if np.isnan(val):
+                            continue
+                        ax.text(
+                            j,
+                            i,
+                            f"{val:.2f}" if normalize else f"{int(val)}",
+                            ha="center",
+                            va="center",
+                            fontsize=10,
+                            color="white" if val > (0.7 if normalize else 2) else "black",
+                        )
+            cbar = fig.colorbar(im, ax=ax, fraction=0.046, pad=0.05)
         title = "Confusion Matrix" + " Normalized" * normalize
-        ax.set_xlabel("True")
-        ax.set_ylabel("Predicted")
-        ax.set_title(title)
+        ax.set_xlabel("True", fontsize=label_fontsize, labelpad=10)
+        ax.set_ylabel("Predicted", fontsize=label_fontsize, labelpad=10)
+        ax.set_title(title, fontsize=title_fontsize, pad=20)
+        ax.set_xticks(xy_ticks)
+        ax.set_yticks(xy_ticks)
+        ax.tick_params(axis="x", bottom=True, top=False, labelbottom=True, labeltop=False)
+        ax.tick_params(axis="y", left=True, right=False, labelleft=True, labelright=False)
+        if ticklabels != "auto":
+            ax.set_xticklabels(ticklabels, fontsize=tick_fontsize, rotation=90, ha="center")
+            ax.set_yticklabels(ticklabels, fontsize=tick_fontsize)
+        for s in ["left", "right", "bottom", "top", "outline"]:
+            if s != "outline":
+                ax.spines[s].set_visible(False)  # Confusion matrix plot don't have outline
+            cbar.ax.spines[s].set_visible(False)
+        fig.subplots_adjust(left=0, right=0.84, top=0.94, bottom=btm)  # Adjust layout to ensure equal margins
         plot_fname = Path(save_dir) / f"{title.lower().replace(' ', '_')}.png"
         fig.savefig(plot_fname, dpi=250)
         plt.close(fig)

ultralytics/utils/plotting.py CHANGED Viewed

@@ -537,9 +537,9 @@ def plot_labels(boxes, cls, names=(), save_dir=Path(""), on_plot=None):
     """
     import matplotlib.pyplot as plt  # scope for faster 'import ultralytics'
     import pandas
-    import seaborn
+    from matplotlib.colors import LinearSegmentedColormap
-    # Filter matplotlib>=3.7.2 warning and Seaborn use_inf and is_categorical FutureWarnings
+    # Filter matplotlib>=3.7.2 warning
     warnings.filterwarnings("ignore", category=UserWarning, message="The figure layout has changed to tight")
     warnings.filterwarnings("ignore", category=FutureWarning)
@@ -549,12 +549,17 @@ def plot_labels(boxes, cls, names=(), save_dir=Path(""), on_plot=None):
     boxes = boxes[:1000000]  # limit to 1M boxes
     x = pandas.DataFrame(boxes, columns=["x", "y", "width", "height"])
-    # Seaborn correlogram
-    seaborn.pairplot(x, corner=True, diag_kind="auto", kind="hist", diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9))
-    plt.savefig(save_dir / "labels_correlogram.jpg", dpi=200)
-    plt.close()
+    try:  # Seaborn correlogram
+        import seaborn
+        seaborn.pairplot(x, corner=True, diag_kind="auto", kind="hist", diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9))
+        plt.savefig(save_dir / "labels_correlogram.jpg", dpi=200)
+        plt.close()
+    except ImportError:
+        pass  # Skip if seaborn is not installed
     # Matplotlib labels
+    subplot_3_4_color = LinearSegmentedColormap.from_list("white_blue", ["white", "blue"])
     ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel()
     y = ax[0].hist(cls, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8)
     for i in range(nc):
@@ -565,18 +570,19 @@ def plot_labels(boxes, cls, names=(), save_dir=Path(""), on_plot=None):
         ax[0].set_xticklabels(list(names.values()), rotation=90, fontsize=10)
     else:
         ax[0].set_xlabel("classes")
-    seaborn.histplot(x, x="x", y="y", ax=ax[2], bins=50, pmax=0.9)
-    seaborn.histplot(x, x="width", y="height", ax=ax[3], bins=50, pmax=0.9)
-    # Rectangles
-    boxes[:, 0:2] = 0.5  # center
-    boxes = ops.xywh2xyxy(boxes) * 1000
+    boxes = np.column_stack([0.5 - boxes[:, 2:4] / 2, 0.5 + boxes[:, 2:4] / 2]) * 1000
     img = Image.fromarray(np.ones((1000, 1000, 3), dtype=np.uint8) * 255)
     for cls, box in zip(cls[:500], boxes[:500]):
         ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls))  # plot
     ax[1].imshow(img)
     ax[1].axis("off")
+    ax[2].hist2d(x["x"], x["y"], bins=50, cmap=subplot_3_4_color)
+    ax[2].set_xlabel("x")
+    ax[2].set_ylabel("y")
+    ax[3].hist2d(x["width"], x["height"], bins=50, cmap=subplot_3_4_color)
+    ax[3].set_xlabel("width")
+    ax[3].set_ylabel("height")
     for a in [0, 1, 2, 3]:
         for s in ["top", "right", "left", "bottom"]:
             ax[a].spines[s].set_visible(False)

{dgenerate_ultralytics_headless-8.3.135.dist-info → dgenerate_ultralytics_headless-8.3.137.dist-info}/WHEEL RENAMED Viewed

File without changes

{dgenerate_ultralytics_headless-8.3.135.dist-info → dgenerate_ultralytics_headless-8.3.137.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{dgenerate_ultralytics_headless-8.3.135.dist-info → dgenerate_ultralytics_headless-8.3.137.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dgenerate_ultralytics_headless-8.3.135.dist-info → dgenerate_ultralytics_headless-8.3.137.dist-info}/top_level.txt RENAMED Viewed

File without changes

dgenerate-ultralytics-headless 8.3.135__py3-none-any.whl → 8.3.137__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.135py3-none-any.whl → 8.3.137py3-none-any.whl