PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.137__py3-none-any.whl → 8.3.224__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.137py3-none-any.whl → 8.3.224py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (215) hide show

{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/METADATA +41 -34
dgenerate_ultralytics_headless-8.3.224.dist-info/RECORD +285 -0
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/WHEEL +1 -1
tests/__init__.py +7 -6
tests/conftest.py +15 -39
tests/test_cli.py +17 -17
tests/test_cuda.py +17 -8
tests/test_engine.py +36 -10
tests/test_exports.py +98 -37
tests/test_integrations.py +12 -15
tests/test_python.py +126 -82
tests/test_solutions.py +319 -135
ultralytics/__init__.py +27 -9
ultralytics/cfg/__init__.py +83 -87
ultralytics/cfg/datasets/Argoverse.yaml +4 -4
ultralytics/cfg/datasets/DOTAv1.5.yaml +2 -2
ultralytics/cfg/datasets/DOTAv1.yaml +2 -2
ultralytics/cfg/datasets/GlobalWheat2020.yaml +2 -2
ultralytics/cfg/datasets/HomeObjects-3K.yaml +4 -5
ultralytics/cfg/datasets/ImageNet.yaml +3 -3
ultralytics/cfg/datasets/Objects365.yaml +24 -20
ultralytics/cfg/datasets/SKU-110K.yaml +9 -9
ultralytics/cfg/datasets/VOC.yaml +10 -13
ultralytics/cfg/datasets/VisDrone.yaml +43 -33
ultralytics/cfg/datasets/african-wildlife.yaml +5 -5
ultralytics/cfg/datasets/brain-tumor.yaml +4 -5
ultralytics/cfg/datasets/carparts-seg.yaml +5 -5
ultralytics/cfg/datasets/coco-pose.yaml +26 -4
ultralytics/cfg/datasets/coco.yaml +4 -4
ultralytics/cfg/datasets/coco128-seg.yaml +2 -2
ultralytics/cfg/datasets/coco128.yaml +2 -2
ultralytics/cfg/datasets/coco8-grayscale.yaml +103 -0
ultralytics/cfg/datasets/coco8-multispectral.yaml +2 -2
ultralytics/cfg/datasets/coco8-pose.yaml +23 -2
ultralytics/cfg/datasets/coco8-seg.yaml +2 -2
ultralytics/cfg/datasets/coco8.yaml +2 -2
ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
ultralytics/cfg/datasets/crack-seg.yaml +5 -5
ultralytics/cfg/datasets/dog-pose.yaml +32 -4
ultralytics/cfg/datasets/dota8-multispectral.yaml +2 -2
ultralytics/cfg/datasets/dota8.yaml +2 -2
ultralytics/cfg/datasets/hand-keypoints.yaml +29 -4
ultralytics/cfg/datasets/lvis.yaml +9 -9
ultralytics/cfg/datasets/medical-pills.yaml +4 -5
ultralytics/cfg/datasets/open-images-v7.yaml +7 -10
ultralytics/cfg/datasets/package-seg.yaml +5 -5
ultralytics/cfg/datasets/signature.yaml +4 -4
ultralytics/cfg/datasets/tiger-pose.yaml +20 -4
ultralytics/cfg/datasets/xView.yaml +5 -5
ultralytics/cfg/default.yaml +96 -93
ultralytics/cfg/trackers/botsort.yaml +16 -17
ultralytics/cfg/trackers/bytetrack.yaml +9 -11
ultralytics/data/__init__.py +4 -4
ultralytics/data/annotator.py +12 -12
ultralytics/data/augment.py +531 -564
ultralytics/data/base.py +76 -81
ultralytics/data/build.py +206 -42
ultralytics/data/converter.py +179 -78
ultralytics/data/dataset.py +121 -121
ultralytics/data/loaders.py +114 -91
ultralytics/data/split.py +28 -15
ultralytics/data/split_dota.py +67 -48
ultralytics/data/utils.py +110 -89
ultralytics/engine/exporter.py +422 -460
ultralytics/engine/model.py +224 -252
ultralytics/engine/predictor.py +94 -89
ultralytics/engine/results.py +345 -595
ultralytics/engine/trainer.py +231 -134
ultralytics/engine/tuner.py +279 -73
ultralytics/engine/validator.py +53 -46
ultralytics/hub/__init__.py +26 -28
ultralytics/hub/auth.py +30 -16
ultralytics/hub/google/__init__.py +34 -36
ultralytics/hub/session.py +53 -77
ultralytics/hub/utils.py +23 -109
ultralytics/models/__init__.py +1 -1
ultralytics/models/fastsam/__init__.py +1 -1
ultralytics/models/fastsam/model.py +36 -18
ultralytics/models/fastsam/predict.py +33 -44
ultralytics/models/fastsam/utils.py +4 -5
ultralytics/models/fastsam/val.py +12 -14
ultralytics/models/nas/__init__.py +1 -1
ultralytics/models/nas/model.py +16 -20
ultralytics/models/nas/predict.py +12 -14
ultralytics/models/nas/val.py +4 -5
ultralytics/models/rtdetr/__init__.py +1 -1
ultralytics/models/rtdetr/model.py +9 -9
ultralytics/models/rtdetr/predict.py +22 -17
ultralytics/models/rtdetr/train.py +20 -16
ultralytics/models/rtdetr/val.py +79 -59
ultralytics/models/sam/__init__.py +8 -2
ultralytics/models/sam/amg.py +53 -38
ultralytics/models/sam/build.py +29 -31
ultralytics/models/sam/model.py +33 -38
ultralytics/models/sam/modules/blocks.py +159 -182
ultralytics/models/sam/modules/decoders.py +38 -47
ultralytics/models/sam/modules/encoders.py +114 -133
ultralytics/models/sam/modules/memory_attention.py +38 -31
ultralytics/models/sam/modules/sam.py +114 -93
ultralytics/models/sam/modules/tiny_encoder.py +268 -291
ultralytics/models/sam/modules/transformer.py +59 -66
ultralytics/models/sam/modules/utils.py +55 -72
ultralytics/models/sam/predict.py +745 -341
ultralytics/models/utils/loss.py +118 -107
ultralytics/models/utils/ops.py +118 -71
ultralytics/models/yolo/__init__.py +1 -1
ultralytics/models/yolo/classify/predict.py +28 -26
ultralytics/models/yolo/classify/train.py +50 -81
ultralytics/models/yolo/classify/val.py +68 -61
ultralytics/models/yolo/detect/predict.py +12 -15
ultralytics/models/yolo/detect/train.py +56 -46
ultralytics/models/yolo/detect/val.py +279 -223
ultralytics/models/yolo/model.py +167 -86
ultralytics/models/yolo/obb/predict.py +7 -11
ultralytics/models/yolo/obb/train.py +23 -25
ultralytics/models/yolo/obb/val.py +107 -99
ultralytics/models/yolo/pose/__init__.py +1 -1
ultralytics/models/yolo/pose/predict.py +12 -14
ultralytics/models/yolo/pose/train.py +31 -69
ultralytics/models/yolo/pose/val.py +119 -254
ultralytics/models/yolo/segment/predict.py +21 -25
ultralytics/models/yolo/segment/train.py +12 -66
ultralytics/models/yolo/segment/val.py +126 -305
ultralytics/models/yolo/world/train.py +53 -45
ultralytics/models/yolo/world/train_world.py +51 -32
ultralytics/models/yolo/yoloe/__init__.py +7 -7
ultralytics/models/yolo/yoloe/predict.py +30 -37
ultralytics/models/yolo/yoloe/train.py +89 -71
ultralytics/models/yolo/yoloe/train_seg.py +15 -17
ultralytics/models/yolo/yoloe/val.py +56 -41
ultralytics/nn/__init__.py +9 -11
ultralytics/nn/autobackend.py +179 -107
ultralytics/nn/modules/__init__.py +67 -67
ultralytics/nn/modules/activation.py +8 -7
ultralytics/nn/modules/block.py +302 -323
ultralytics/nn/modules/conv.py +61 -104
ultralytics/nn/modules/head.py +488 -186
ultralytics/nn/modules/transformer.py +183 -123
ultralytics/nn/modules/utils.py +15 -20
ultralytics/nn/tasks.py +327 -203
ultralytics/nn/text_model.py +81 -65
ultralytics/py.typed +1 -0
ultralytics/solutions/__init__.py +12 -12
ultralytics/solutions/ai_gym.py +19 -27
ultralytics/solutions/analytics.py +36 -26
ultralytics/solutions/config.py +29 -28
ultralytics/solutions/distance_calculation.py +23 -24
ultralytics/solutions/heatmap.py +17 -19
ultralytics/solutions/instance_segmentation.py +21 -19
ultralytics/solutions/object_blurrer.py +16 -17
ultralytics/solutions/object_counter.py +48 -53
ultralytics/solutions/object_cropper.py +22 -16
ultralytics/solutions/parking_management.py +61 -58
ultralytics/solutions/queue_management.py +19 -19
ultralytics/solutions/region_counter.py +63 -50
ultralytics/solutions/security_alarm.py +22 -25
ultralytics/solutions/similarity_search.py +107 -60
ultralytics/solutions/solutions.py +343 -262
ultralytics/solutions/speed_estimation.py +35 -31
ultralytics/solutions/streamlit_inference.py +104 -40
ultralytics/solutions/templates/similarity-search.html +31 -24
ultralytics/solutions/trackzone.py +24 -24
ultralytics/solutions/vision_eye.py +11 -12
ultralytics/trackers/__init__.py +1 -1
ultralytics/trackers/basetrack.py +18 -27
ultralytics/trackers/bot_sort.py +48 -39
ultralytics/trackers/byte_tracker.py +94 -94
ultralytics/trackers/track.py +7 -16
ultralytics/trackers/utils/gmc.py +37 -69
ultralytics/trackers/utils/kalman_filter.py +68 -76
ultralytics/trackers/utils/matching.py +13 -17
ultralytics/utils/__init__.py +251 -275
ultralytics/utils/autobatch.py +19 -7
ultralytics/utils/autodevice.py +68 -38
ultralytics/utils/benchmarks.py +169 -130
ultralytics/utils/callbacks/base.py +12 -13
ultralytics/utils/callbacks/clearml.py +14 -15
ultralytics/utils/callbacks/comet.py +139 -66
ultralytics/utils/callbacks/dvc.py +19 -27
ultralytics/utils/callbacks/hub.py +8 -6
ultralytics/utils/callbacks/mlflow.py +6 -10
ultralytics/utils/callbacks/neptune.py +11 -19
ultralytics/utils/callbacks/platform.py +73 -0
ultralytics/utils/callbacks/raytune.py +3 -4
ultralytics/utils/callbacks/tensorboard.py +9 -12
ultralytics/utils/callbacks/wb.py +33 -30
ultralytics/utils/checks.py +163 -114
ultralytics/utils/cpu.py +89 -0
ultralytics/utils/dist.py +24 -20
ultralytics/utils/downloads.py +176 -146
ultralytics/utils/errors.py +11 -13
ultralytics/utils/events.py +113 -0
ultralytics/utils/export/__init__.py +7 -0
ultralytics/utils/{export.py → export/engine.py} +81 -63
ultralytics/utils/export/imx.py +294 -0
ultralytics/utils/export/tensorflow.py +217 -0
ultralytics/utils/files.py +33 -36
ultralytics/utils/git.py +137 -0
ultralytics/utils/instance.py +105 -120
ultralytics/utils/logger.py +404 -0
ultralytics/utils/loss.py +99 -61
ultralytics/utils/metrics.py +649 -478
ultralytics/utils/nms.py +337 -0
ultralytics/utils/ops.py +263 -451
ultralytics/utils/patches.py +70 -31
ultralytics/utils/plotting.py +253 -223
ultralytics/utils/tal.py +48 -61
ultralytics/utils/torch_utils.py +244 -251
ultralytics/utils/tqdm.py +438 -0
ultralytics/utils/triton.py +22 -23
ultralytics/utils/tuner.py +11 -10
dgenerate_ultralytics_headless-8.3.137.dist-info/RECORD +0 -272
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/top_level.txt +0 -0

ultralytics/nn/text_model.py CHANGED Viewed

@@ -1,10 +1,13 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+from __future__ import annotations
 from abc import abstractmethod
 from pathlib import Path
 import torch
 import torch.nn as nn
+from PIL import Image
 from ultralytics.utils import checks
 from ultralytics.utils.torch_utils import smart_inference_mode
@@ -17,15 +20,14 @@ except ImportError:
 class TextModel(nn.Module):
-    """
-    Abstract base class for text encoding models.
+    """Abstract base class for text encoding models.
     This class defines the interface for text encoding models used in vision-language tasks. Subclasses must implement
-    the tokenize and encode_text methods.
+    the tokenize and encode_text methods to provide text tokenization and encoding functionality.
     Methods:
-        tokenize: Convert input texts to tokens.
-        encode_text: Encode tokenized texts into feature vectors.
+        tokenize: Convert input texts to tokens for model processing.
+        encode_text: Encode tokenized texts into normalized feature vectors.
     """
     def __init__(self):
@@ -33,22 +35,21 @@ class TextModel(nn.Module):
         super().__init__()
     @abstractmethod
-    def tokenize(texts):
+    def tokenize(self, texts):
         """Convert input texts to tokens for model processing."""
         pass
     @abstractmethod
-    def encode_text(texts, dtype):
+    def encode_text(self, texts, dtype):
         """Encode tokenized texts into normalized feature vectors."""
         pass
 class CLIP(TextModel):
-    """
-    Implements OpenAI's CLIP (Contrastive Language-Image Pre-training) text encoder.
+    """Implements OpenAI's CLIP (Contrastive Language-Image Pre-training) text encoder.
-    This class provides a text encoder based on OpenAI's CLIP model, which can convert text into feature vectors
-    that are aligned with corresponding image features in a shared embedding space.
+    This class provides a text encoder based on OpenAI's CLIP model, which can convert text into feature vectors that
+    are aligned with corresponding image features in a shared embedding space.
     Attributes:
         model (clip.model.CLIP): The loaded CLIP model.
@@ -59,7 +60,6 @@ class CLIP(TextModel):
         encode_text: Encode tokenized texts into normalized feature vectors.
     Examples:
-        >>> from ultralytics.models.sam import CLIP
         >>> import torch
         >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         >>> clip_model = CLIP(size="ViT-B/32", device=device)
@@ -68,12 +68,11 @@ class CLIP(TextModel):
         >>> print(text_features.shape)
     """
-    def __init__(self, size, device):
-        """
-        Initialize the CLIP text encoder.
+    def __init__(self, size: str, device: torch.device) -> None:
+        """Initialize the CLIP text encoder.
-        This class implements the TextModel interface using OpenAI's CLIP model for text encoding. It loads
-        a pre-trained CLIP model of the specified size and prepares it for text encoding tasks.
+        This class implements the TextModel interface using OpenAI's CLIP model for text encoding. It loads a
+        pre-trained CLIP model of the specified size and prepares it for text encoding tasks.
         Args:
             size (str): Model size identifier (e.g., 'ViT-B/32').
@@ -81,22 +80,20 @@ class CLIP(TextModel):
         Examples:
             >>> import torch
-            >>> from ultralytics.models.sam.modules.clip import CLIP
             >>> clip_model = CLIP("ViT-B/32", device=torch.device("cuda:0"))
             >>> text_features = clip_model.encode_text(["a photo of a cat", "a photo of a dog"])
         """
         super().__init__()
-        self.model = clip.load(size, device=device)[0]
+        self.model, self.image_preprocess = clip.load(size, device=device)
         self.to(device)
         self.device = device
         self.eval()
-    def tokenize(self, texts):
-        """
-        Convert input texts to CLIP tokens.
+    def tokenize(self, texts: str | list[str]) -> torch.Tensor:
+        """Convert input texts to CLIP tokens.
         Args:
-            texts (str | List[str]): Input text or list of texts to tokenize.
+            texts (str | list[str]): Input text or list of texts to tokenize.
         Returns:
             (torch.Tensor): Tokenized text tensor with shape (batch_size, context_length) ready for model processing.
@@ -109,16 +106,15 @@ class CLIP(TextModel):
         return clip.tokenize(texts).to(self.device)
     @smart_inference_mode()
-    def encode_text(self, texts, dtype=torch.float32):
-        """
-        Encode tokenized texts into normalized feature vectors.
+    def encode_text(self, texts: torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
+        """Encode tokenized texts into normalized feature vectors.
         This method processes tokenized text inputs through the CLIP model to generate feature vectors, which are then
         normalized to unit length. These normalized vectors can be used for text-image similarity comparisons.
         Args:
             texts (torch.Tensor): Tokenized text inputs, typically created using the tokenize() method.
-            dtype (torch.dtype, optional): Data type for output features. Default is torch.float32.
+            dtype (torch.dtype, optional): Data type for output features.
         Returns:
             (torch.Tensor): Normalized text feature vectors with unit length (L2 norm = 1).
@@ -134,13 +130,43 @@ class CLIP(TextModel):
         txt_feats = txt_feats / txt_feats.norm(p=2, dim=-1, keepdim=True)
         return txt_feats
+    @smart_inference_mode()
+    def encode_image(self, image: Image.Image | torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
+        """Encode preprocessed images into normalized feature vectors.
+        This method processes preprocessed image inputs through the CLIP model to generate feature vectors, which are
+        then normalized to unit length. These normalized vectors can be used for text-image similarity comparisons.
+        Args:
+            image (PIL.Image | torch.Tensor): Preprocessed image input. If a PIL Image is provided, it will be converted
+                to a tensor using the model's image preprocessing function.
+            dtype (torch.dtype, optional): Data type for output features.
+        Returns:
+            (torch.Tensor): Normalized image feature vectors with unit length (L2 norm = 1).
+        Examples:
+            >>> from ultralytics.nn.text_model import CLIP
+            >>> from PIL import Image
+            >>> clip_model = CLIP("ViT-B/32", device="cuda")
+            >>> image = Image.open("path/to/image.jpg")
+            >>> image_tensor = clip_model.image_preprocess(image).unsqueeze(0).to("cuda")
+            >>> features = clip_model.encode_image(image_tensor)
+            >>> features.shape
+            torch.Size([1, 512])
+        """
+        if isinstance(image, Image.Image):
+            image = self.image_preprocess(image).unsqueeze(0).to(self.device)
+        img_feats = self.model.encode_image(image).to(dtype)
+        img_feats = img_feats / img_feats.norm(p=2, dim=-1, keepdim=True)
+        return img_feats
 class MobileCLIP(TextModel):
-    """
-    Implement Apple's MobileCLIP text encoder for efficient text encoding.
+    """Implement Apple's MobileCLIP text encoder for efficient text encoding.
     This class implements the TextModel interface using Apple's MobileCLIP model, providing efficient text encoding
-    capabilities for vision-language tasks.
+    capabilities for vision-language tasks with reduced computational requirements compared to standard CLIP models.
     Attributes:
         model (mobileclip.model.MobileCLIP): The loaded MobileCLIP model.
@@ -161,9 +187,8 @@ class MobileCLIP(TextModel):
     config_size_map = {"s0": "s0", "s1": "s1", "s2": "s2", "b": "b", "blt": "b"}
-    def __init__(self, size, device):
-        """
-        Initialize the MobileCLIP text encoder.
+    def __init__(self, size: str, device: torch.device) -> None:
+        """Initialize the MobileCLIP text encoder.
         This class implements the TextModel interface using Apple's MobileCLIP model for efficient text encoding.
@@ -172,7 +197,6 @@ class MobileCLIP(TextModel):
             device (torch.device): Device to load the model on.
         Examples:
-            >>> from ultralytics.nn.modules import MobileCLIP
             >>> import torch
             >>> model = MobileCLIP("s0", device=torch.device("cpu"))
             >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
@@ -203,9 +227,8 @@ class MobileCLIP(TextModel):
         self.device = device
         self.eval()
-    def tokenize(self, texts):
-        """
-        Convert input texts to MobileCLIP tokens.
+    def tokenize(self, texts: list[str]) -> torch.Tensor:
+        """Convert input texts to MobileCLIP tokens.
         Args:
             texts (list[str]): List of text strings to tokenize.
@@ -220,9 +243,8 @@ class MobileCLIP(TextModel):
         return self.tokenizer(texts).to(self.device)
     @smart_inference_mode()
-    def encode_text(self, texts, dtype=torch.float32):
-        """
-        Encode tokenized texts into normalized feature vectors.
+    def encode_text(self, texts: torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
+        """Encode tokenized texts into normalized feature vectors.
         Args:
             texts (torch.Tensor): Tokenized text inputs.
@@ -244,14 +266,13 @@ class MobileCLIP(TextModel):
 class MobileCLIPTS(TextModel):
-    """
-    Load a TorchScript traced version of MobileCLIP.
+    """Load a TorchScript traced version of MobileCLIP.
-    This class implements the TextModel interface using Apple's MobileCLIP model, providing efficient text encoding
-    capabilities for vision-language tasks.
+    This class implements the TextModel interface using Apple's MobileCLIP model in TorchScript format, providing
+    efficient text encoding capabilities for vision-language tasks with optimized inference performance.
     Attributes:
-        encoder (mobileclip.model.MobileCLIP): The loaded MobileCLIP text encoder.
+        encoder (torch.jit.ScriptModule): The loaded TorchScript MobileCLIP text encoder.
         tokenizer (callable): Tokenizer function for processing text inputs.
         device (torch.device): Device where the model is loaded.
@@ -261,24 +282,22 @@ class MobileCLIPTS(TextModel):
     Examples:
         >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        >>> text_encoder = MobileCLIP(device=device)
+        >>> text_encoder = MobileCLIPTS(device=device)
         >>> tokens = text_encoder.tokenize(["a photo of a cat", "a photo of a dog"])
         >>> features = text_encoder.encode_text(tokens)
     """
-    def __init__(self, device):
-        """
-        Initialize the MobileCLIP text encoder.
+    def __init__(self, device: torch.device):
+        """Initialize the MobileCLIP TorchScript text encoder.
-        This class implements the TextModel interface using Apple's MobileCLIP model for efficient text encoding.
+        This class implements the TextModel interface using Apple's MobileCLIP model in TorchScript format for efficient
+        text encoding with optimized inference performance.
         Args:
             device (torch.device): Device to load the model on.
         Examples:
-            >>> from ultralytics.nn.modules import MobileCLIP
-            >>> import torch
-            >>> model = MobileCLIP(device=torch.device("cpu"))
+            >>> model = MobileCLIPTS(device=torch.device("cpu"))
             >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
             >>> features = model.encode_text(tokens)
         """
@@ -289,9 +308,8 @@ class MobileCLIPTS(TextModel):
         self.tokenizer = clip.clip.tokenize
         self.device = device
-    def tokenize(self, texts):
-        """
-        Convert input texts to MobileCLIP tokens.
+    def tokenize(self, texts: list[str]) -> torch.Tensor:
+        """Convert input texts to MobileCLIP tokens.
         Args:
             texts (list[str]): List of text strings to tokenize.
@@ -300,15 +318,14 @@ class MobileCLIPTS(TextModel):
             (torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
         Examples:
-            >>> model = MobileCLIP("cpu")
+            >>> model = MobileCLIPTS("cpu")
             >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
         """
         return self.tokenizer(texts).to(self.device)
     @smart_inference_mode()
-    def encode_text(self, texts, dtype=torch.float32):
-        """
-        Encode tokenized texts into normalized feature vectors.
+    def encode_text(self, texts: torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
+        """Encode tokenized texts into normalized feature vectors.
         Args:
             texts (torch.Tensor): Tokenized text inputs.
@@ -318,19 +335,18 @@ class MobileCLIPTS(TextModel):
             (torch.Tensor): Normalized text feature vectors with L2 normalization applied.
         Examples:
-            >>> model = MobileCLIP(device="cpu")
+            >>> model = MobileCLIPTS(device="cpu")
             >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
             >>> features = model.encode_text(tokens)
             >>> features.shape
             torch.Size([2, 512])  # Actual dimension depends on model size
         """
         # NOTE: no need to do normalization here as it's embedded in the torchscript model
-        return self.encoder(texts)
+        return self.encoder(texts).to(dtype)
-def build_text_model(variant, device=None):
-    """
-    Build a text encoding model based on the specified variant.
+def build_text_model(variant: str, device: torch.device = None) -> TextModel:
+    """Build a text encoding model based on the specified variant.
     Args:
         variant (str): Model variant in format "base:size" (e.g., "clip:ViT-B/32" or "mobileclip:s0").

ultralytics/py.typed ADDED Viewed

	@@ -0,0 +1 @@
1	+ partial

ultralytics/solutions/__init__.py CHANGED Viewed

@@ -19,23 +19,23 @@ from .trackzone import TrackZone
 from .vision_eye import VisionEye
 __all__ = (
-    "ObjectCounter",
-    "ObjectCropper",
-    "ObjectBlurrer",
     "AIGym",
-    "RegionCounter",
-    "SecurityAlarm",
+    "Analytics",
+    "DistanceCalculation",
     "Heatmap",
+    "Inference",
     "InstanceSegmentation",
-    "VisionEye",
-    "SpeedEstimator",
-    "DistanceCalculation",
-    "QueueManager",
+    "ObjectBlurrer",
+    "ObjectCounter",
+    "ObjectCropper",
     "ParkingManagement",
     "ParkingPtsSelection",
-    "Analytics",
-    "Inference",
-    "TrackZone",
+    "QueueManager",
+    "RegionCounter",
     "SearchApp",
+    "SecurityAlarm",
+    "SpeedEstimator",
+    "TrackZone",
+    "VisionEye",
     "VisualAISearch",
 )

ultralytics/solutions/ai_gym.py CHANGED Viewed

@@ -1,25 +1,25 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 from collections import defaultdict
+from typing import Any
 from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
 class AIGym(BaseSolution):
-    """
-    A class to manage gym steps of people in a real-time video stream based on their poses.
+    """A class to manage gym steps of people in a real-time video stream based on their poses.
     This class extends BaseSolution to monitor workouts using YOLO pose estimation models. It tracks and counts
     repetitions of exercises based on predefined angle thresholds for up and down positions.
     Attributes:
-        states (Dict[float, int, str]): Stores per-track angle, count, and stage for workout monitoring.
+        states (dict[float, int, str]): Stores per-track angle, count, and stage for workout monitoring.
         up_angle (float): Angle threshold for considering the 'up' position of an exercise.
         down_angle (float): Angle threshold for considering the 'down' position of an exercise.
-        kpts (List[int]): Indices of keypoints used for angle calculation.
+        kpts (list[int]): Indices of keypoints used for angle calculation.
     Methods:
-        process: Processes a frame to detect poses, calculate angles, and count repetitions.
+        process: Process a frame to detect poses, calculate angles, and count repetitions.
     Examples:
         >>> gym = AIGym(model="yolo11n-pose.pt")
@@ -30,13 +30,12 @@ class AIGym(BaseSolution):
         >>> cv2.waitKey(0)
     """
-    def __init__(self, **kwargs):
-        """
-        Initialize AIGym for workout monitoring using pose estimation and predefined angles.
+    def __init__(self, **kwargs: Any) -> None:
+        """Initialize AIGym for workout monitoring using pose estimation and predefined angles.
         Args:
-            **kwargs (Any): Keyword arguments passed to the parent class constructor.
-                model (str): Model name or path, defaults to "yolo11n-pose.pt".
+            **kwargs (Any): Keyword arguments passed to the parent class constructor including:
+                - model (str): Model name or path, defaults to "yolo11n-pose.pt".
         """
         kwargs["model"] = kwargs.get("model", "yolo11n-pose.pt")
         super().__init__(**kwargs)
@@ -47,23 +46,19 @@ class AIGym(BaseSolution):
         self.down_angle = float(self.CFG["down_angle"])  # Pose down predefined angle to consider down pose
         self.kpts = self.CFG["kpts"]  # User selected kpts of workouts storage for further usage
-    def process(self, im0):
-        """
-        Monitor workouts using Ultralytics YOLO Pose Model.
+    def process(self, im0) -> SolutionResults:
+        """Monitor workouts using Ultralytics YOLO Pose Model.
-        This function processes an input image to track and analyze human poses for workout monitoring. It uses
-        the YOLO Pose model to detect keypoints, estimate angles, and count repetitions based on predefined
-        angle thresholds.
+        This function processes an input image to track and analyze human poses for workout monitoring. It uses the YOLO
+        Pose model to detect keypoints, estimate angles, and count repetitions based on predefined angle thresholds.
         Args:
             im0 (np.ndarray): Input image for processing.
         Returns:
-            (SolutionResults): Contains processed image `plot_im`,
-                'workout_count' (list of completed reps),
-                'workout_stage' (list of current stages),
-                'workout_angle' (list of angles), and
-                'total_tracks' (total number of tracked individuals).
+            (SolutionResults): Contains processed image `plot_im`, 'workout_count' (list of completed reps),
+                'workout_stage' (list of current stages), 'workout_angle' (list of angles), and 'total_tracks' (total
+                number of tracked individuals).
         Examples:
             >>> gym = AIGym()
@@ -74,15 +69,12 @@ class AIGym(BaseSolution):
         annotator = SolutionAnnotator(im0, line_width=self.line_width)  # Initialize annotator
         self.extract_tracks(im0)  # Extract tracks (bounding boxes, classes, and masks)
-        tracks = self.tracks[0]
-        if tracks.boxes.id is not None:
-            track_ids = tracks.boxes.id.cpu().tolist()
-            kpt_data = tracks.keypoints.data.cpu()  # Avoid repeated .cpu() calls
+        if len(self.boxes):
+            kpt_data = self.tracks.keypoints.data
             for i, k in enumerate(kpt_data):
-                track_id = int(track_ids[i])  # get track id
-                state = self.states[track_id]  # get state details
+                state = self.states[self.track_ids[i]]  # get state details
                 # Get keypoints and estimate the angle
                 state["angle"] = annotator.estimate_pose_angle(*[k[int(idx)] for idx in self.kpts])
                 annotator.draw_specific_kpts(k, self.kpts, radius=self.line_width * 3)

ultralytics/solutions/analytics.py CHANGED Viewed

@@ -1,6 +1,9 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+from __future__ import annotations
 from itertools import cycle
+from typing import Any
 import cv2
 import numpy as np
@@ -9,11 +12,10 @@ from ultralytics.solutions.solutions import BaseSolution, SolutionResults  # Imp
 class Analytics(BaseSolution):
-    """
-    A class for creating and updating various types of charts for visual analytics.
+    """A class for creating and updating various types of charts for visual analytics.
-    This class extends BaseSolution to provide functionality for generating line, bar, pie, and area charts
-    based on object detection and tracking data.
+    This class extends BaseSolution to provide functionality for generating line, bar, pie, and area charts based on
+    object detection and tracking data.
     Attributes:
         type (str): The type of analytics chart to generate ('line', 'bar', 'pie', or 'area').
@@ -26,12 +28,12 @@ class Analytics(BaseSolution):
         fontsize (int): Font size for text display.
         color_cycle (cycle): Cyclic iterator for chart colors.
         total_counts (int): Total count of detected objects (used for line charts).
-        clswise_count (Dict[str, int]): Dictionary for class-wise object counts.
+        clswise_count (dict[str, int]): Dictionary for class-wise object counts.
         fig (Figure): Matplotlib figure object for the chart.
         ax (Axes): Matplotlib axes object for the chart.
         canvas (FigureCanvasAgg): Canvas for rendering the chart.
         lines (dict): Dictionary to store line objects for area charts.
-        color_mapping (Dict[str, str]): Dictionary mapping class labels to colors for consistent visualization.
+        color_mapping (dict[str, str]): Dictionary mapping class labels to colors for consistent visualization.
     Methods:
         process: Process image data and update the chart.
@@ -44,7 +46,7 @@ class Analytics(BaseSolution):
         >>> cv2.imshow("Analytics", results.plot_im)
     """
-    def __init__(self, **kwargs):
+    def __init__(self, **kwargs: Any) -> None:
         """Initialize Analytics class with various chart types for visual data representation."""
         super().__init__(**kwargs)
@@ -67,6 +69,8 @@ class Analytics(BaseSolution):
         self.total_counts = 0  # count variable for storing total counts i.e. for line
         self.clswise_count = {}  # dictionary for class-wise counts
+        self.update_every = kwargs.get("update_every", 30)  # Only update graph every 30 frames by default
+        self.last_plot_im = None  # Cache of the last rendered chart
         # Ensure line and area chart
         if self.type in {"line", "area"}:
@@ -86,9 +90,8 @@ class Analytics(BaseSolution):
             if self.type == "pie":  # Ensure pie chart is circular
                 self.ax.axis("equal")
-    def process(self, im0, frame_number):
-        """
-        Process image data and run object tracking to update analytics charts.
+    def process(self, im0: np.ndarray, frame_number: int) -> SolutionResults:
+        """Process image data and run object tracking to update analytics charts.
         Args:
             im0 (np.ndarray): Input image for processing.
@@ -110,29 +113,35 @@ class Analytics(BaseSolution):
         if self.type == "line":
             for _ in self.boxes:
                 self.total_counts += 1
-            plot_im = self.update_graph(frame_number=frame_number)
+            update_required = frame_number % self.update_every == 0 or self.last_plot_im is None
+            if update_required:
+                self.last_plot_im = self.update_graph(frame_number=frame_number)
+            plot_im = self.last_plot_im
             self.total_counts = 0
         elif self.type in {"pie", "bar", "area"}:
-            self.clswise_count = {}
-            for cls in self.clss:
-                if self.names[int(cls)] in self.clswise_count:
-                    self.clswise_count[self.names[int(cls)]] += 1
-                else:
-                    self.clswise_count[self.names[int(cls)]] = 1
-            plot_im = self.update_graph(frame_number=frame_number, count_dict=self.clswise_count, plot=self.type)
+            from collections import Counter
+            self.clswise_count = Counter(self.names[int(cls)] for cls in self.clss)
+            update_required = frame_number % self.update_every == 0 or self.last_plot_im is None
+            if update_required:
+                self.last_plot_im = self.update_graph(
+                    frame_number=frame_number, count_dict=self.clswise_count, plot=self.type
+                )
+            plot_im = self.last_plot_im
         else:
             raise ModuleNotFoundError(f"{self.type} chart is not supported ❌")
         # return output dictionary with summary for more usage
         return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids), classwise_count=self.clswise_count)
-    def update_graph(self, frame_number, count_dict=None, plot="line"):
-        """
-        Update the graph with new data for single or multiple classes.
+    def update_graph(
+        self, frame_number: int, count_dict: dict[str, int] | None = None, plot: str = "line"
+    ) -> np.ndarray:
+        """Update the graph with new data for single or multiple classes.
         Args:
             frame_number (int): The current frame number.
-            count_dict (Dict[str, int] | None): Dictionary with class names as keys and counts as values for multiple
+            count_dict (dict[str, int], optional): Dictionary with class names as keys and counts as values for multiple
                 classes. If None, updates a single line graph.
             plot (str): Type of the plot. Options are 'line', 'bar', 'pie', or 'area'.
@@ -184,7 +193,7 @@ class Analytics(BaseSolution):
                 self.ax.clear()
                 for key, y_data in y_data_dict.items():
                     color = next(color_cycle)
-                    self.ax.fill_between(x_data, y_data, color=color, alpha=0.7)
+                    self.ax.fill_between(x_data, y_data, color=color, alpha=0.55)
                     self.ax.plot(
                         x_data,
                         y_data,
@@ -194,7 +203,7 @@ class Analytics(BaseSolution):
                         markersize=self.line_width * 5,
                         label=f"{key} Data Points",
                     )
-            if plot == "bar":
+            elif plot == "bar":
                 self.ax.clear()  # clear bar data
                 for label in labels:  # Map labels to colors
                     if label not in self.color_mapping:
@@ -214,12 +223,12 @@ class Analytics(BaseSolution):
                 for bar, label in zip(bars, labels):
                     bar.set_label(label)  # Assign label to each bar
                 self.ax.legend(loc="upper left", fontsize=13, facecolor=self.fg_color, edgecolor=self.fg_color)
-            if plot == "pie":
+            elif plot == "pie":
                 total = sum(counts)
                 percentages = [size / total * 100 for size in counts]
-                start_angle = 90
                 self.ax.clear()
+                start_angle = 90
                 # Create pie chart and create legend labels with percentages
                 wedges, _ = self.ax.pie(
                     counts, labels=labels, startangle=start_angle, textprops={"color": self.fg_color}, autopct=None
@@ -232,6 +241,7 @@ class Analytics(BaseSolution):
         # Common plot settings
         self.ax.set_facecolor("#f0f0f0")  # Set to light gray or any other color you like
+        self.ax.grid(True, linestyle="--", linewidth=0.5, alpha=0.5)  # Display grid for more data insights
         self.ax.set_title(self.title, color=self.fg_color, fontsize=self.fontsize)
         self.ax.set_xlabel(self.x_label, color=self.fg_color, fontsize=self.fontsize - 3)
         self.ax.set_ylabel(self.y_label, color=self.fg_color, fontsize=self.fontsize - 3)

dgenerate-ultralytics-headless 8.3.137__py3-none-any.whl → 8.3.224__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.137py3-none-any.whl → 8.3.224py3-none-any.whl