PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.137__py3-none-any.whl → 8.3.224__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.137py3-none-any.whl → 8.3.224py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (215) hide show

{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/METADATA +41 -34
dgenerate_ultralytics_headless-8.3.224.dist-info/RECORD +285 -0
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/WHEEL +1 -1
tests/__init__.py +7 -6
tests/conftest.py +15 -39
tests/test_cli.py +17 -17
tests/test_cuda.py +17 -8
tests/test_engine.py +36 -10
tests/test_exports.py +98 -37
tests/test_integrations.py +12 -15
tests/test_python.py +126 -82
tests/test_solutions.py +319 -135
ultralytics/__init__.py +27 -9
ultralytics/cfg/__init__.py +83 -87
ultralytics/cfg/datasets/Argoverse.yaml +4 -4
ultralytics/cfg/datasets/DOTAv1.5.yaml +2 -2
ultralytics/cfg/datasets/DOTAv1.yaml +2 -2
ultralytics/cfg/datasets/GlobalWheat2020.yaml +2 -2
ultralytics/cfg/datasets/HomeObjects-3K.yaml +4 -5
ultralytics/cfg/datasets/ImageNet.yaml +3 -3
ultralytics/cfg/datasets/Objects365.yaml +24 -20
ultralytics/cfg/datasets/SKU-110K.yaml +9 -9
ultralytics/cfg/datasets/VOC.yaml +10 -13
ultralytics/cfg/datasets/VisDrone.yaml +43 -33
ultralytics/cfg/datasets/african-wildlife.yaml +5 -5
ultralytics/cfg/datasets/brain-tumor.yaml +4 -5
ultralytics/cfg/datasets/carparts-seg.yaml +5 -5
ultralytics/cfg/datasets/coco-pose.yaml +26 -4
ultralytics/cfg/datasets/coco.yaml +4 -4
ultralytics/cfg/datasets/coco128-seg.yaml +2 -2
ultralytics/cfg/datasets/coco128.yaml +2 -2
ultralytics/cfg/datasets/coco8-grayscale.yaml +103 -0
ultralytics/cfg/datasets/coco8-multispectral.yaml +2 -2
ultralytics/cfg/datasets/coco8-pose.yaml +23 -2
ultralytics/cfg/datasets/coco8-seg.yaml +2 -2
ultralytics/cfg/datasets/coco8.yaml +2 -2
ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
ultralytics/cfg/datasets/crack-seg.yaml +5 -5
ultralytics/cfg/datasets/dog-pose.yaml +32 -4
ultralytics/cfg/datasets/dota8-multispectral.yaml +2 -2
ultralytics/cfg/datasets/dota8.yaml +2 -2
ultralytics/cfg/datasets/hand-keypoints.yaml +29 -4
ultralytics/cfg/datasets/lvis.yaml +9 -9
ultralytics/cfg/datasets/medical-pills.yaml +4 -5
ultralytics/cfg/datasets/open-images-v7.yaml +7 -10
ultralytics/cfg/datasets/package-seg.yaml +5 -5
ultralytics/cfg/datasets/signature.yaml +4 -4
ultralytics/cfg/datasets/tiger-pose.yaml +20 -4
ultralytics/cfg/datasets/xView.yaml +5 -5
ultralytics/cfg/default.yaml +96 -93
ultralytics/cfg/trackers/botsort.yaml +16 -17
ultralytics/cfg/trackers/bytetrack.yaml +9 -11
ultralytics/data/__init__.py +4 -4
ultralytics/data/annotator.py +12 -12
ultralytics/data/augment.py +531 -564
ultralytics/data/base.py +76 -81
ultralytics/data/build.py +206 -42
ultralytics/data/converter.py +179 -78
ultralytics/data/dataset.py +121 -121
ultralytics/data/loaders.py +114 -91
ultralytics/data/split.py +28 -15
ultralytics/data/split_dota.py +67 -48
ultralytics/data/utils.py +110 -89
ultralytics/engine/exporter.py +422 -460
ultralytics/engine/model.py +224 -252
ultralytics/engine/predictor.py +94 -89
ultralytics/engine/results.py +345 -595
ultralytics/engine/trainer.py +231 -134
ultralytics/engine/tuner.py +279 -73
ultralytics/engine/validator.py +53 -46
ultralytics/hub/__init__.py +26 -28
ultralytics/hub/auth.py +30 -16
ultralytics/hub/google/__init__.py +34 -36
ultralytics/hub/session.py +53 -77
ultralytics/hub/utils.py +23 -109
ultralytics/models/__init__.py +1 -1
ultralytics/models/fastsam/__init__.py +1 -1
ultralytics/models/fastsam/model.py +36 -18
ultralytics/models/fastsam/predict.py +33 -44
ultralytics/models/fastsam/utils.py +4 -5
ultralytics/models/fastsam/val.py +12 -14
ultralytics/models/nas/__init__.py +1 -1
ultralytics/models/nas/model.py +16 -20
ultralytics/models/nas/predict.py +12 -14
ultralytics/models/nas/val.py +4 -5
ultralytics/models/rtdetr/__init__.py +1 -1
ultralytics/models/rtdetr/model.py +9 -9
ultralytics/models/rtdetr/predict.py +22 -17
ultralytics/models/rtdetr/train.py +20 -16
ultralytics/models/rtdetr/val.py +79 -59
ultralytics/models/sam/__init__.py +8 -2
ultralytics/models/sam/amg.py +53 -38
ultralytics/models/sam/build.py +29 -31
ultralytics/models/sam/model.py +33 -38
ultralytics/models/sam/modules/blocks.py +159 -182
ultralytics/models/sam/modules/decoders.py +38 -47
ultralytics/models/sam/modules/encoders.py +114 -133
ultralytics/models/sam/modules/memory_attention.py +38 -31
ultralytics/models/sam/modules/sam.py +114 -93
ultralytics/models/sam/modules/tiny_encoder.py +268 -291
ultralytics/models/sam/modules/transformer.py +59 -66
ultralytics/models/sam/modules/utils.py +55 -72
ultralytics/models/sam/predict.py +745 -341
ultralytics/models/utils/loss.py +118 -107
ultralytics/models/utils/ops.py +118 -71
ultralytics/models/yolo/__init__.py +1 -1
ultralytics/models/yolo/classify/predict.py +28 -26
ultralytics/models/yolo/classify/train.py +50 -81
ultralytics/models/yolo/classify/val.py +68 -61
ultralytics/models/yolo/detect/predict.py +12 -15
ultralytics/models/yolo/detect/train.py +56 -46
ultralytics/models/yolo/detect/val.py +279 -223
ultralytics/models/yolo/model.py +167 -86
ultralytics/models/yolo/obb/predict.py +7 -11
ultralytics/models/yolo/obb/train.py +23 -25
ultralytics/models/yolo/obb/val.py +107 -99
ultralytics/models/yolo/pose/__init__.py +1 -1
ultralytics/models/yolo/pose/predict.py +12 -14
ultralytics/models/yolo/pose/train.py +31 -69
ultralytics/models/yolo/pose/val.py +119 -254
ultralytics/models/yolo/segment/predict.py +21 -25
ultralytics/models/yolo/segment/train.py +12 -66
ultralytics/models/yolo/segment/val.py +126 -305
ultralytics/models/yolo/world/train.py +53 -45
ultralytics/models/yolo/world/train_world.py +51 -32
ultralytics/models/yolo/yoloe/__init__.py +7 -7
ultralytics/models/yolo/yoloe/predict.py +30 -37
ultralytics/models/yolo/yoloe/train.py +89 -71
ultralytics/models/yolo/yoloe/train_seg.py +15 -17
ultralytics/models/yolo/yoloe/val.py +56 -41
ultralytics/nn/__init__.py +9 -11
ultralytics/nn/autobackend.py +179 -107
ultralytics/nn/modules/__init__.py +67 -67
ultralytics/nn/modules/activation.py +8 -7
ultralytics/nn/modules/block.py +302 -323
ultralytics/nn/modules/conv.py +61 -104
ultralytics/nn/modules/head.py +488 -186
ultralytics/nn/modules/transformer.py +183 -123
ultralytics/nn/modules/utils.py +15 -20
ultralytics/nn/tasks.py +327 -203
ultralytics/nn/text_model.py +81 -65
ultralytics/py.typed +1 -0
ultralytics/solutions/__init__.py +12 -12
ultralytics/solutions/ai_gym.py +19 -27
ultralytics/solutions/analytics.py +36 -26
ultralytics/solutions/config.py +29 -28
ultralytics/solutions/distance_calculation.py +23 -24
ultralytics/solutions/heatmap.py +17 -19
ultralytics/solutions/instance_segmentation.py +21 -19
ultralytics/solutions/object_blurrer.py +16 -17
ultralytics/solutions/object_counter.py +48 -53
ultralytics/solutions/object_cropper.py +22 -16
ultralytics/solutions/parking_management.py +61 -58
ultralytics/solutions/queue_management.py +19 -19
ultralytics/solutions/region_counter.py +63 -50
ultralytics/solutions/security_alarm.py +22 -25
ultralytics/solutions/similarity_search.py +107 -60
ultralytics/solutions/solutions.py +343 -262
ultralytics/solutions/speed_estimation.py +35 -31
ultralytics/solutions/streamlit_inference.py +104 -40
ultralytics/solutions/templates/similarity-search.html +31 -24
ultralytics/solutions/trackzone.py +24 -24
ultralytics/solutions/vision_eye.py +11 -12
ultralytics/trackers/__init__.py +1 -1
ultralytics/trackers/basetrack.py +18 -27
ultralytics/trackers/bot_sort.py +48 -39
ultralytics/trackers/byte_tracker.py +94 -94
ultralytics/trackers/track.py +7 -16
ultralytics/trackers/utils/gmc.py +37 -69
ultralytics/trackers/utils/kalman_filter.py +68 -76
ultralytics/trackers/utils/matching.py +13 -17
ultralytics/utils/__init__.py +251 -275
ultralytics/utils/autobatch.py +19 -7
ultralytics/utils/autodevice.py +68 -38
ultralytics/utils/benchmarks.py +169 -130
ultralytics/utils/callbacks/base.py +12 -13
ultralytics/utils/callbacks/clearml.py +14 -15
ultralytics/utils/callbacks/comet.py +139 -66
ultralytics/utils/callbacks/dvc.py +19 -27
ultralytics/utils/callbacks/hub.py +8 -6
ultralytics/utils/callbacks/mlflow.py +6 -10
ultralytics/utils/callbacks/neptune.py +11 -19
ultralytics/utils/callbacks/platform.py +73 -0
ultralytics/utils/callbacks/raytune.py +3 -4
ultralytics/utils/callbacks/tensorboard.py +9 -12
ultralytics/utils/callbacks/wb.py +33 -30
ultralytics/utils/checks.py +163 -114
ultralytics/utils/cpu.py +89 -0
ultralytics/utils/dist.py +24 -20
ultralytics/utils/downloads.py +176 -146
ultralytics/utils/errors.py +11 -13
ultralytics/utils/events.py +113 -0
ultralytics/utils/export/__init__.py +7 -0
ultralytics/utils/{export.py → export/engine.py} +81 -63
ultralytics/utils/export/imx.py +294 -0
ultralytics/utils/export/tensorflow.py +217 -0
ultralytics/utils/files.py +33 -36
ultralytics/utils/git.py +137 -0
ultralytics/utils/instance.py +105 -120
ultralytics/utils/logger.py +404 -0
ultralytics/utils/loss.py +99 -61
ultralytics/utils/metrics.py +649 -478
ultralytics/utils/nms.py +337 -0
ultralytics/utils/ops.py +263 -451
ultralytics/utils/patches.py +70 -31
ultralytics/utils/plotting.py +253 -223
ultralytics/utils/tal.py +48 -61
ultralytics/utils/torch_utils.py +244 -251
ultralytics/utils/tqdm.py +438 -0
ultralytics/utils/triton.py +22 -23
ultralytics/utils/tuner.py +11 -10
dgenerate_ultralytics_headless-8.3.137.dist-info/RECORD +0 -272
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/top_level.txt +0 -0

ultralytics/solutions/queue_management.py CHANGED Viewed

@@ -1,29 +1,30 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+from typing import Any
 from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
 from ultralytics.utils.plotting import colors
 class QueueManager(BaseSolution):
-    """
-    Manages queue counting in real-time video streams based on object tracks.
+    """Manages queue counting in real-time video streams based on object tracks.
-    This class extends BaseSolution to provide functionality for tracking and counting objects within a specified
-    region in video frames.
+    This class extends BaseSolution to provide functionality for tracking and counting objects within a specified region
+    in video frames.
     Attributes:
         counts (int): The current count of objects in the queue.
-        rect_color (Tuple[int, int, int]): RGB color tuple for drawing the queue region rectangle.
+        rect_color (tuple[int, int, int]): RGB color tuple for drawing the queue region rectangle.
         region_length (int): The number of points defining the queue region.
-        track_line (List[Tuple[int, int]]): List of track line coordinates.
-        track_history (Dict[int, List[Tuple[int, int]]]): Dictionary storing tracking history for each object.
+        track_line (list[tuple[int, int]]): List of track line coordinates.
+        track_history (dict[int, list[tuple[int, int]]]): Dictionary storing tracking history for each object.
     Methods:
-        initialize_region: Initializes the queue region.
-        process: Processes a single frame for queue management.
-        extract_tracks: Extracts object tracks from the current frame.
-        store_tracking_history: Stores the tracking history for an object.
-        display_output: Displays the processed output.
+        initialize_region: Initialize the queue region.
+        process: Process a single frame for queue management.
+        extract_tracks: Extract object tracks from the current frame.
+        store_tracking_history: Store the tracking history for an object.
+        display_output: Display the processed output.
     Examples:
         >>> cap = cv2.VideoCapture("path/to/video.mp4")
@@ -35,20 +36,19 @@ class QueueManager(BaseSolution):
         >>>     results = queue_manager.process(im0)
     """
-    def __init__(self, **kwargs):
-        """Initializes the QueueManager with parameters for tracking and counting objects in a video stream."""
+    def __init__(self, **kwargs: Any) -> None:
+        """Initialize the QueueManager with parameters for tracking and counting objects in a video stream."""
         super().__init__(**kwargs)
         self.initialize_region()
         self.counts = 0  # Queue counts information
         self.rect_color = (255, 255, 255)  # Rectangle color for visualization
         self.region_length = len(self.region)  # Store region length for further usage
-    def process(self, im0):
-        """
-        Process queue management for a single frame of video.
+    def process(self, im0) -> SolutionResults:
+        """Process queue management for a single frame of video.
         Args:
-            im0 (numpy.ndarray): Input image for processing, typically a frame from a video stream.
+            im0 (np.ndarray): Input image for processing, typically a frame from a video stream.
         Returns:
             (SolutionResults): Contains processed image `im0`, 'queue_count' (int, number of objects in the queue) and
@@ -81,7 +81,7 @@ class QueueManager(BaseSolution):
         # Display queue counts
         annotator.queue_counts_display(
-            f"Queue Counts : {str(self.counts)}",
+            f"Queue Counts : {self.counts}",
             points=self.region,
             region_color=self.rect_color,
             txt_color=(104, 31, 17),

ultralytics/solutions/region_counter.py CHANGED Viewed

@@ -1,5 +1,9 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+from __future__ import annotations
+from typing import Any
 import numpy as np
 from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
@@ -7,8 +11,7 @@ from ultralytics.utils.plotting import colors
 class RegionCounter(BaseSolution):
-    """
-    A class for real-time counting of objects within user-defined regions in a video stream.
+    """A class for real-time counting of objects within user-defined regions in a video stream.
     This class inherits from `BaseSolution` and provides functionality to define polygonal regions in a video frame,
     track objects, and count those objects that pass through each defined region. Useful for applications requiring
@@ -17,38 +20,54 @@ class RegionCounter(BaseSolution):
     Attributes:
         region_template (dict): Template for creating new counting regions with default attributes including name,
             polygon coordinates, and display colors.
-        counting_regions (list): List storing all defined regions, where each entry is based on `region_template`
-            and includes specific region settings like name, coordinates, and color.
+        counting_regions (list): List storing all defined regions, where each entry is based on `region_template` and
+            includes specific region settings like name, coordinates, and color.
         region_counts (dict): Dictionary storing the count of objects for each named region.
     Methods:
-        add_region: Adds a new counting region with specified attributes.
-        process: Processes video frames to count objects in each region.
+        add_region: Add a new counting region with specified attributes.
+        process: Process video frames to count objects in each region.
+        initialize_regions: Initialize zones to count the objects in each one. Zones could be multiple as well.
+    Examples:
+        Initialize a RegionCounter and add a counting region
+        >>> counter = RegionCounter()
+        >>> counter.add_region("Zone1", [(100, 100), (200, 100), (200, 200), (100, 200)], (255, 0, 0), (255, 255, 255))
+        >>> results = counter.process(frame)
+        >>> print(f"Total tracks: {results.total_tracks}")
     """
-    def __init__(self, **kwargs):
-        """Initializes the RegionCounter class for real-time counting in different regions of video streams."""
+    def __init__(self, **kwargs: Any) -> None:
+        """Initialize the RegionCounter for real-time object counting in user-defined regions."""
         super().__init__(**kwargs)
         self.region_template = {
             "name": "Default Region",
             "polygon": None,
             "counts": 0,
-            "dragging": False,
             "region_color": (255, 255, 255),
             "text_color": (0, 0, 0),
         }
         self.region_counts = {}
         self.counting_regions = []
+        self.initialize_regions()
-    def add_region(self, name, polygon_points, region_color, text_color):
-        """
-        Add a new region to the counting list based on the provided template with specific attributes.
+    def add_region(
+        self,
+        name: str,
+        polygon_points: list[tuple],
+        region_color: tuple[int, int, int],
+        text_color: tuple[int, int, int],
+    ) -> dict[str, Any]:
+        """Add a new region to the counting list based on the provided template with specific attributes.
         Args:
             name (str): Name assigned to the new region.
-            polygon_points (List[Tuple]): List of (x, y) coordinates defining the region's polygon.
-            region_color (tuple): BGR color for region visualization.
-            text_color (tuple): BGR color for the text within the region.
+            polygon_points (list[tuple]): List of (x, y) coordinates defining the region's polygon.
+            region_color (tuple[int, int, int]): BGR color for region visualization.
+            text_color (tuple[int, int, int]): BGR color for the text within the region.
+        Returns:
+            (dict[str, any]): Returns a dictionary including the region information i.e. name, region_color etc.
         """
         region = self.region_template.copy()
         region.update(
@@ -60,58 +79,52 @@ class RegionCounter(BaseSolution):
             }
         )
         self.counting_regions.append(region)
+        return region
-    def process(self, im0):
-        """
-        Process the input frame to detect and count objects within each defined region.
+    def initialize_regions(self):
+        """Initialize regions only once."""
+        if self.region is None:
+            self.initialize_region()
+        if not isinstance(self.region, dict):  # Ensure self.region is initialized and structured as a dictionary
+            self.region = {"Region#01": self.region}
+        for i, (name, pts) in enumerate(self.region.items()):
+            region = self.add_region(name, pts, colors(i, True), (255, 255, 255))
+            region["prepared_polygon"] = self.prep(region["polygon"])
+    def process(self, im0: np.ndarray) -> SolutionResults:
+        """Process the input frame to detect and count objects within each defined region.
         Args:
             im0 (np.ndarray): Input image frame where objects and regions are annotated.
         Returns:
-            (SolutionResults): Contains processed image `plot_im`, 'total_tracks' (int, total number of tracked objects),
-                and 'region_counts' (dict, counts of objects per region).
+            (SolutionResults): Contains processed image `plot_im`, 'total_tracks' (int, total number of tracked
+                objects), and 'region_counts' (dict, counts of objects per region).
         """
         self.extract_tracks(im0)
         annotator = SolutionAnnotator(im0, line_width=self.line_width)
-        # Ensure self.region is initialized and structured as a dictionary
-        if not isinstance(self.region, dict):
-            self.region = {"Region#01": self.region or self.initialize_region()}
-        # Draw only valid regions
-        for idx, (region_name, reg_pts) in enumerate(self.region.items(), start=1):
-            color = colors(idx, True)
-            annotator.draw_region(reg_pts, color, self.line_width * 2)
-            self.add_region(region_name, reg_pts, color, annotator.get_txt_color())
-        # Prepare regions for containment check (only process valid ones)
-        for region in self.counting_regions:
-            if "prepared_polygon" not in region:
-                region["prepared_polygon"] = self.prep(region["polygon"])
-        # Convert bounding boxes to NumPy array for center points
-        boxes_np = np.array([((box[0] + box[2]) / 2, (box[1] + box[3]) / 2) for box in self.boxes], dtype=np.float32)
-        points = [self.Point(pt) for pt in boxes_np]  # Convert centers to Point objects
-        # Process bounding boxes & check containment
-        if points:
-            for point, cls, track_id, box, conf in zip(points, self.clss, self.track_ids, self.boxes, self.confs):
-                annotator.box_label(box, label=self.adjust_box_label(cls, conf, track_id), color=colors(track_id, True))
-                for region in self.counting_regions:
-                    if region["prepared_polygon"].contains(point):
-                        region["counts"] += 1
-                        self.region_counts[region["name"]] = region["counts"]
+        for box, cls, track_id, conf in zip(self.boxes, self.clss, self.track_ids, self.confs):
+            annotator.box_label(box, label=self.adjust_box_label(cls, conf, track_id), color=colors(track_id, True))
+            center = self.Point(((box[0] + box[2]) / 2, (box[1] + box[3]) / 2))
+            for region in self.counting_regions:
+                if region["prepared_polygon"].contains(center):
+                    region["counts"] += 1
+                    self.region_counts[region["name"]] = region["counts"]
         # Display region counts
         for region in self.counting_regions:
-            annotator.text_label(
-                region["polygon"].bounds,
+            poly = region["polygon"]
+            pts = list(map(tuple, np.array(poly.exterior.coords, dtype=np.int32)))
+            (x1, y1), (x2, y2) = [(int(poly.centroid.x), int(poly.centroid.y))] * 2
+            annotator.draw_region(pts, region["region_color"], self.line_width * 2)
+            annotator.adaptive_label(
+                [x1, y1, x2, y2],
                 label=str(region["counts"]),
                 color=region["region_color"],
                 txt_color=region["text_color"],
                 margin=self.line_width * 4,
+                shape="rect",
             )
             region["counts"] = 0  # Reset for next frame
         plot_im = annotator.result()

ultralytics/solutions/security_alarm.py CHANGED Viewed

@@ -1,13 +1,14 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+from typing import Any
 from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
 from ultralytics.utils import LOGGER
 from ultralytics.utils.plotting import colors
 class SecurityAlarm(BaseSolution):
-    """
-    A class to manage security alarm functionalities for real-time monitoring.
+    """A class to manage security alarm functionalities for real-time monitoring.
     This class extends the BaseSolution class and provides features to monitor objects in a frame, send email
     notifications when specific thresholds are exceeded for total detections, and annotate the output frame for
@@ -32,9 +33,8 @@ class SecurityAlarm(BaseSolution):
         >>> results = security.process(frame)
     """
-    def __init__(self, **kwargs):
-        """
-        Initialize the SecurityAlarm class with parameters for real-time object monitoring.
+    def __init__(self, **kwargs: Any) -> None:
+        """Initialize the SecurityAlarm class with parameters for real-time object monitoring.
         Args:
             **kwargs (Any): Additional keyword arguments passed to the parent class.
@@ -46,17 +46,16 @@ class SecurityAlarm(BaseSolution):
         self.to_email = ""
         self.from_email = ""
-    def authenticate(self, from_email, password, to_email):
-        """
-        Authenticate the email server for sending alert notifications.
+    def authenticate(self, from_email: str, password: str, to_email: str) -> None:
+        """Authenticate the email server for sending alert notifications.
+        This method initializes a secure connection with the SMTP server and logs in using the provided credentials.
         Args:
             from_email (str): Sender's email address.
             password (str): Password for the sender's email account.
             to_email (str): Recipient's email address.
-        This method initializes a secure connection with the SMTP server and logs in using the provided credentials.
         Examples:
             >>> alarm = SecurityAlarm()
             >>> alarm.authenticate("sender@example.com", "password123", "recipient@example.com")
@@ -69,17 +68,16 @@ class SecurityAlarm(BaseSolution):
         self.to_email = to_email
         self.from_email = from_email
-    def send_email(self, im0, records=5):
-        """
-        Send an email notification with an image attachment indicating the number of objects detected.
-        Args:
-            im0 (numpy.ndarray): The input image or frame to be attached to the email.
-            records (int): The number of detected objects to be included in the email message.
+    def send_email(self, im0, records: int = 5) -> None:
+        """Send an email notification with an image attachment indicating the number of objects detected.
         This method encodes the input image, composes the email message with details about the detection, and sends it
         to the specified recipient.
+        Args:
+            im0 (np.ndarray): The input image or frame to be attached to the email.
+            records (int, optional): The number of detected objects to be included in the email message.
         Examples:
             >>> alarm = SecurityAlarm()
             >>> frame = cv2.imread("path/to/image.jpg")
@@ -114,21 +112,20 @@ class SecurityAlarm(BaseSolution):
         except Exception as e:
             LOGGER.error(f"Failed to send email: {e}")
-    def process(self, im0):
-        """
-        Monitor the frame, process object detections, and trigger alerts if thresholds are exceeded.
+    def process(self, im0) -> SolutionResults:
+        """Monitor the frame, process object detections, and trigger alerts if thresholds are exceeded.
+        This method processes the input frame, extracts detections, annotates the frame with bounding boxes, and sends
+        an email notification if the number of detected objects surpasses the specified threshold and an alert has not
+        already been sent.
         Args:
-            im0 (numpy.ndarray): The input image or frame to be processed and annotated.
+            im0 (np.ndarray): The input image or frame to be processed and annotated.
         Returns:
             (SolutionResults): Contains processed image `plot_im`, 'total_tracks' (total number of tracked objects) and
                 'email_sent' (whether an email alert was triggered).
-        This method processes the input frame, extracts detections, annotates the frame with bounding boxes, and sends
-        an email notification if the number of detected objects surpasses the specified threshold and an alert has not
-        already been sent.
         Examples:
             >>> alarm = SecurityAlarm()
             >>> frame = cv2.imread("path/to/image.jpg")

ultralytics/solutions/similarity_search.py CHANGED Viewed

@@ -1,87 +1,104 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+from __future__ import annotations
 import os
 from pathlib import Path
+from typing import Any
 import numpy as np
-import torch
 from PIL import Image
 from ultralytics.data.utils import IMG_FORMATS
-from ultralytics.solutions.solutions import BaseSolution
+from ultralytics.utils import LOGGER, TORCH_VERSION
 from ultralytics.utils.checks import check_requirements
-from ultralytics.utils.torch_utils import select_device
+from ultralytics.utils.torch_utils import TORCH_2_4, select_device
 os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"  # Avoid OpenMP conflict on some systems
-class VisualAISearch(BaseSolution):
-    """
-    VisualAISearch leverages OpenCLIP to generate high-quality image and text embeddings, aligning them in a shared
-    semantic space. It then uses FAISS to perform fast and scalable similarity-based retrieval, allowing users to search
-    large collections of images using natural language queries with high accuracy and speed.
+class VisualAISearch:
+    """A semantic image search system that leverages OpenCLIP for generating high-quality image and text embeddings and
+    FAISS for fast similarity-based retrieval.
+    This class aligns image and text embeddings in a shared semantic space, enabling users to search large collections
+    of images using natural language queries with high accuracy and speed.
     Attributes:
         data (str): Directory containing images.
         device (str): Computation device, e.g., 'cpu' or 'cuda'.
+        faiss_index (str): Path to the FAISS index file.
+        data_path_npy (str): Path to the numpy file storing image paths.
+        data_dir (Path): Path object for the data directory.
+        model: Loaded CLIP model.
+        index: FAISS index for similarity search.
+        image_paths (list[str]): List of image file paths.
+    Methods:
+        extract_image_feature: Extract CLIP embedding from an image.
+        extract_text_feature: Extract CLIP embedding from text.
+        load_or_build_index: Load existing FAISS index or build new one.
+        search: Perform semantic search for similar images.
+    Examples:
+        Initialize and search for images
+        >>> searcher = VisualAISearch(data="path/to/images", device="cuda")
+        >>> results = searcher.search("a cat sitting on a chair", k=10)
     """
-    def __init__(self, **kwargs):
-        """Initializes the VisualAISearch class with the FAISS index file and CLIP model."""
-        super().__init__(**kwargs)
-        check_requirements(["git+https://github.com/ultralytics/CLIP.git", "faiss-cpu"])
-        import clip
-        import faiss
+    def __init__(self, **kwargs: Any) -> None:
+        """Initialize the VisualAISearch class with FAISS index and CLIP model."""
+        assert TORCH_2_4, f"VisualAISearch requires torch>=2.4 (found torch=={TORCH_VERSION})"
+        from ultralytics.nn.text_model import build_text_model
-        self.faiss = faiss
-        self.clip = clip
+        check_requirements("faiss-cpu")
+        self.faiss = __import__("faiss")
         self.faiss_index = "faiss.index"
         self.data_path_npy = "paths.npy"
-        self.model_name = "ViT-B/32"
-        self.data_dir = Path(self.CFG["data"])
-        self.device = select_device(self.CFG["device"])
+        self.data_dir = Path(kwargs.get("data", "images"))
+        self.device = select_device(kwargs.get("device", "cpu"))
         if not self.data_dir.exists():
             from ultralytics.utils import ASSETS_URL
-            self.LOGGER.warning(f"{self.data_dir} not found. Downloading images.zip from {ASSETS_URL}/images.zip")
+            LOGGER.warning(f"{self.data_dir} not found. Downloading images.zip from {ASSETS_URL}/images.zip")
             from ultralytics.utils.downloads import safe_download
             safe_download(url=f"{ASSETS_URL}/images.zip", unzip=True, retry=3)
             self.data_dir = Path("images")
-        self.model, self.preprocess = clip.load(self.model_name, device=self.device)
+        self.model = build_text_model("clip:ViT-B/32", device=self.device)
         self.index = None
         self.image_paths = []
         self.load_or_build_index()
-    def extract_image_feature(self, path):
-        """Extract CLIP image embedding."""
-        image = Image.open(path)
-        tensor = self.preprocess(image).unsqueeze(0).to(self.device)
-        with torch.no_grad():
-            return self.model.encode_image(tensor).cpu().numpy()
-    def extract_text_feature(self, text):
-        """Extract CLIP text embedding."""
-        tokens = self.clip.tokenize([text]).to(self.device)
-        with torch.no_grad():
-            return self.model.encode_text(tokens).cpu().numpy()
-    def load_or_build_index(self):
-        """Loads FAISS index or builds a new one from image features."""
+    def extract_image_feature(self, path: Path) -> np.ndarray:
+        """Extract CLIP image embedding from the given image path."""
+        return self.model.encode_image(Image.open(path)).detach().cpu().numpy()
+    def extract_text_feature(self, text: str) -> np.ndarray:
+        """Extract CLIP text embedding from the given text query."""
+        return self.model.encode_text(self.model.tokenize([text])).detach().cpu().numpy()
+    def load_or_build_index(self) -> None:
+        """Load existing FAISS index or build a new one from image features.
+        Checks if FAISS index and image paths exist on disk. If found, loads them directly. Otherwise, builds a new
+        index by extracting features from all images in the data directory, normalizes the features, and saves both the
+        index and image paths for future use.
+        """
         # Check if the FAISS index and corresponding image paths already exist
         if Path(self.faiss_index).exists() and Path(self.data_path_npy).exists():
-            self.LOGGER.info("Loading existing FAISS index...")
+            LOGGER.info("Loading existing FAISS index...")
             self.index = self.faiss.read_index(self.faiss_index)  # Load the FAISS index from disk
             self.image_paths = np.load(self.data_path_npy)  # Load the saved image path list
             return  # Exit the function as the index is successfully loaded
         # If the index doesn't exist, start building it from scratch
-        self.LOGGER.info("Building FAISS index from images...")
+        LOGGER.info("Building FAISS index from images...")
         vectors = []  # List to store feature vectors of images
         # Iterate over all image files in the data directory
@@ -94,7 +111,7 @@ class VisualAISearch(BaseSolution):
                 vectors.append(self.extract_image_feature(file))
                 self.image_paths.append(file.name)  # Store the corresponding image name
             except Exception as e:
-                self.LOGGER.warning(f"Skipping {file.name}: {e}")
+                LOGGER.warning(f"Skipping {file.name}: {e}")
         # If no vectors were successfully created, raise an error
         if not vectors:
@@ -108,10 +125,24 @@ class VisualAISearch(BaseSolution):
         self.faiss.write_index(self.index, self.faiss_index)  # Save the newly built FAISS index to disk
         np.save(self.data_path_npy, np.array(self.image_paths))  # Save the list of image paths to disk
-        self.LOGGER.info(f"Indexed {len(self.image_paths)} images.")
+        LOGGER.info(f"Indexed {len(self.image_paths)} images.")
+    def search(self, query: str, k: int = 30, similarity_thresh: float = 0.1) -> list[str]:
+        """Return top-k semantically similar images to the given query.
-    def search(self, query, k=30, similarity_thresh=0.1):
-        """Returns top-k semantically similar images to the given query."""
+        Args:
+            query (str): Natural language text query to search for.
+            k (int, optional): Maximum number of results to return.
+            similarity_thresh (float, optional): Minimum similarity threshold for filtering results.
+        Returns:
+            (list[str]): List of image filenames ranked by similarity score.
+        Examples:
+            Search for images matching a query
+            >>> searcher = VisualAISearch(data="images")
+            >>> results = searcher.search("red car", k=5, similarity_thresh=0.2)
+        """
         text_feat = self.extract_text_feature(query).astype("float32")
         self.faiss.normalize_L2(text_feat)
@@ -121,31 +152,47 @@ class VisualAISearch(BaseSolution):
         ]
         results.sort(key=lambda x: x[1], reverse=True)
-        self.LOGGER.info("\nRanked Results:")
+        LOGGER.info("\nRanked Results:")
         for name, score in results:
-            self.LOGGER.info(f"  - {name} | Similarity: {score:.4f}")
+            LOGGER.info(f"  - {name} | Similarity: {score:.4f}")
         return [r[0] for r in results]
-    def __call__(self, query):
-        """Direct call for search function."""
+    def __call__(self, query: str) -> list[str]:
+        """Direct call interface for the search function."""
         return self.search(query)
 class SearchApp:
-    """
-    A Flask-based web interface powers the semantic image search experience, enabling users to input natural language
-    queries and instantly view the most relevant images retrieved from the indexed database—all through a clean,
-    responsive, and easily customizable frontend.
+    """A Flask-based web interface for semantic image search with natural language queries.
-    Args:
-        data (str): Path to images to index and search.
-        device (str): Device to run inference on (e.g. 'cpu', 'cuda').
+    This class provides a clean, responsive frontend that enables users to input natural language queries and instantly
+    view the most relevant images retrieved from the indexed database.
+    Attributes:
+        render_template: Flask template rendering function.
+        request: Flask request object.
+        searcher (VisualAISearch): Instance of the VisualAISearch class.
+        app (Flask): Flask application instance.
+    Methods:
+        index: Process user queries and display search results.
+        run: Start the Flask web application.
+    Examples:
+        Start a search application
+        >>> app = SearchApp(data="path/to/images", device="cuda")
+        >>> app.run(debug=True)
     """
-    def __init__(self, data="images", device=None):
-        """Initialization of the VisualAISearch class for performing semantic image search."""
-        check_requirements("flask")
+    def __init__(self, data: str = "images", device: str | None = None) -> None:
+        """Initialize the SearchApp with VisualAISearch backend.
+        Args:
+            data (str, optional): Path to directory containing images to index and search.
+            device (str, optional): Device to run inference on (e.g. 'cpu', 'cuda').
+        """
+        check_requirements("flask>=3.0.1")
         from flask import Flask, render_template, request
         self.render_template = render_template
@@ -159,14 +206,14 @@ class SearchApp:
         )
         self.app.add_url_rule("/", view_func=self.index, methods=["GET", "POST"])
-    def index(self):
-        """Function to process the user query and display output."""
+    def index(self) -> str:
+        """Process user query and display search results in the web interface."""
         results = []
         if self.request.method == "POST":
             query = self.request.form.get("query", "").strip()
             results = self.searcher(query)
         return self.render_template("similarity-search.html", results=results)
-    def run(self, debug=False):
-        """Runs the Flask web app."""
+    def run(self, debug: bool = False) -> None:
+        """Start the Flask web application server."""
         self.app.run(debug=debug)

dgenerate-ultralytics-headless 8.3.137__py3-none-any.whl → 8.3.224__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.137py3-none-any.whl → 8.3.224py3-none-any.whl