PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.159__py3-none-any.whl → 8.3.161__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.159py3-none-any.whl → 8.3.161py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

{dgenerate_ultralytics_headless-8.3.159.dist-info → dgenerate_ultralytics_headless-8.3.161.dist-info}/METADATA +1 -1
{dgenerate_ultralytics_headless-8.3.159.dist-info → dgenerate_ultralytics_headless-8.3.161.dist-info}/RECORD +62 -62
tests/test_python.py +2 -1
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +0 -2
ultralytics/cfg/datasets/Argoverse.yaml +1 -1
ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
ultralytics/cfg/datasets/GlobalWheat2020.yaml +1 -1
ultralytics/cfg/datasets/HomeObjects-3K.yaml +1 -1
ultralytics/cfg/datasets/ImageNet.yaml +1 -1
ultralytics/cfg/datasets/Objects365.yaml +1 -1
ultralytics/cfg/datasets/SKU-110K.yaml +1 -1
ultralytics/cfg/datasets/VOC.yaml +1 -1
ultralytics/cfg/datasets/VisDrone.yaml +6 -3
ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
ultralytics/cfg/datasets/brain-tumor.yaml +1 -1
ultralytics/cfg/datasets/carparts-seg.yaml +1 -1
ultralytics/cfg/datasets/coco-pose.yaml +1 -1
ultralytics/cfg/datasets/coco.yaml +1 -1
ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
ultralytics/cfg/datasets/coco128.yaml +1 -1
ultralytics/cfg/datasets/coco8-grayscale.yaml +1 -1
ultralytics/cfg/datasets/coco8-multispectral.yaml +1 -1
ultralytics/cfg/datasets/coco8-pose.yaml +1 -1
ultralytics/cfg/datasets/coco8-seg.yaml +1 -1
ultralytics/cfg/datasets/coco8.yaml +1 -1
ultralytics/cfg/datasets/crack-seg.yaml +1 -1
ultralytics/cfg/datasets/dog-pose.yaml +1 -1
ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
ultralytics/cfg/datasets/dota8.yaml +1 -1
ultralytics/cfg/datasets/hand-keypoints.yaml +1 -1
ultralytics/cfg/datasets/lvis.yaml +1 -1
ultralytics/cfg/datasets/medical-pills.yaml +1 -1
ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
ultralytics/cfg/datasets/package-seg.yaml +1 -1
ultralytics/cfg/datasets/signature.yaml +1 -1
ultralytics/cfg/datasets/tiger-pose.yaml +1 -1
ultralytics/cfg/datasets/xView.yaml +1 -1
ultralytics/data/augment.py +8 -8
ultralytics/data/converter.py +3 -5
ultralytics/data/dataset.py +1 -1
ultralytics/data/split.py +1 -1
ultralytics/engine/exporter.py +11 -2
ultralytics/engine/model.py +2 -0
ultralytics/engine/results.py +1 -6
ultralytics/models/yolo/model.py +25 -24
ultralytics/models/yolo/world/train.py +1 -1
ultralytics/models/yolo/world/train_world.py +6 -6
ultralytics/models/yolo/yoloe/train.py +1 -1
ultralytics/nn/autobackend.py +7 -1
ultralytics/solutions/heatmap.py +1 -1
ultralytics/solutions/object_counter.py +9 -9
ultralytics/solutions/similarity_search.py +11 -12
ultralytics/solutions/solutions.py +55 -56
ultralytics/utils/__init__.py +1 -4
ultralytics/utils/instance.py +2 -0
ultralytics/utils/metrics.py +24 -36
{dgenerate_ultralytics_headless-8.3.159.dist-info → dgenerate_ultralytics_headless-8.3.161.dist-info}/WHEEL +0 -0
{dgenerate_ultralytics_headless-8.3.159.dist-info → dgenerate_ultralytics_headless-8.3.161.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.159.dist-info → dgenerate_ultralytics_headless-8.3.161.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.159.dist-info → dgenerate_ultralytics_headless-8.3.161.dist-info}/top_level.txt +0 -0

ultralytics/cfg/datasets/tiger-pose.yaml CHANGED Viewed

@@ -9,7 +9,7 @@
 #     └── tiger-pose  ← downloads here (75.3 MB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/tiger-pose # dataset root dir
+path: tiger-pose # dataset root dir
 train: train # train images (relative to 'path') 210 images
 val: val # val images (relative to 'path') 53 images

ultralytics/cfg/datasets/xView.yaml CHANGED Viewed

@@ -10,7 +10,7 @@
 #     └── xView  ← downloads here (20.7 GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/xView # dataset root dir
+path: xView # dataset root dir
 train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
 val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images

ultralytics/data/augment.py CHANGED Viewed

@@ -251,8 +251,7 @@ class Compose:
             >>> multiple_transforms = compose[0:2]  # Returns a Compose object with RandomFlip and RandomPerspective
         """
         assert isinstance(index, (int, list)), f"The indices should be either list or int type but got {type(index)}"
-        index = [index] if isinstance(index, int) else index
-        return Compose([self.transforms[i] for i in index])
+        return Compose([self.transforms[i] for i in index]) if isinstance(index, list) else self.transforms[index]
     def __setitem__(self, index: Union[list, int], value: Union[list, int]) -> None:
         """
@@ -1560,14 +1559,15 @@ class RandomFlip:
         h = 1 if instances.normalized else h
         w = 1 if instances.normalized else w
-        # Flip up-down
+        # WARNING: two separate if and calls to random.random() intentional for reproducibility with older versions
         if self.direction == "vertical" and random.random() < self.p:
             img = np.flipud(img)
             instances.flipud(h)
+            if self.flip_idx is not None and instances.keypoints is not None:
+                instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :])
         if self.direction == "horizontal" and random.random() < self.p:
             img = np.fliplr(img)
             instances.fliplr(w)
-            # For keypoints
             if self.flip_idx is not None and instances.keypoints is not None:
                 instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :])
         labels["img"] = np.ascontiguousarray(img)
@@ -2533,9 +2533,9 @@ def v8_transforms(dataset, imgsz, hyp, stretch=False):
     flip_idx = dataset.data.get("flip_idx", [])  # for keypoints augmentation
     if dataset.use_keypoints:
         kpt_shape = dataset.data.get("kpt_shape", None)
-        if len(flip_idx) == 0 and hyp.fliplr > 0.0:
-            hyp.fliplr = 0.0
-            LOGGER.warning("No 'flip_idx' array defined in data.yaml, setting augmentation 'fliplr=0.0'")
+        if len(flip_idx) == 0 and (hyp.fliplr > 0.0 or hyp.flipud > 0.0):
+            hyp.fliplr = hyp.flipud = 0.0  # both fliplr and flipud require flip_idx
+            LOGGER.warning("No 'flip_idx' array defined in data.yaml, disabling 'fliplr' and 'flipud' augmentations.")
         elif flip_idx and (len(flip_idx) != kpt_shape[0]):
             raise ValueError(f"data.yaml flip_idx={flip_idx} length must be equal to kpt_shape[0]={kpt_shape[0]}")
@@ -2546,7 +2546,7 @@ def v8_transforms(dataset, imgsz, hyp, stretch=False):
             CutMix(dataset, pre_transform=pre_transform, p=hyp.cutmix),
             Albumentations(p=1.0),
             RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v),
-            RandomFlip(direction="vertical", p=hyp.flipud),
+            RandomFlip(direction="vertical", p=hyp.flipud, flip_idx=flip_idx),
             RandomFlip(direction="horizontal", p=hyp.fliplr, flip_idx=flip_idx),
         ]
     )  # transforms

ultralytics/data/converter.py CHANGED Viewed

@@ -248,12 +248,10 @@ def convert_coco(
         >>> from ultralytics.data.converter import convert_coco
         Convert COCO annotations to YOLO format
-        >>> convert_coco("../datasets/coco/annotations/", use_segments=True, use_keypoints=False, cls91to80=False)
+        >>> convert_coco("coco/annotations/", use_segments=True, use_keypoints=False, cls91to80=False)
         Convert LVIS annotations to YOLO format
-        >>> convert_coco(
-        ...     "../datasets/lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True
-        ... )
+        >>> convert_coco("lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True)
     """
     # Create dataset directory
     save_dir = increment_path(save_dir)  # increment if save directory already exists
@@ -724,7 +722,7 @@ def convert_to_multispectral(path: Union[str, Path], n_channels: int = 10, repla
         >>> convert_to_multispectral("path/to/image.jpg", n_channels=10)
         Convert a dataset
-        >>> convert_to_multispectral("../datasets/coco8", n_channels=10)
+        >>> convert_to_multispectral("coco8", n_channels=10)
     """
     from scipy.interpolate import interp1d

ultralytics/data/dataset.py CHANGED Viewed

@@ -482,7 +482,7 @@ class GroundingDataset(YOLODataset):
             a warning is logged and verification is skipped.
         """
         expected_counts = {
-            "final_mixed_train_no_coco_segm": 3662344,
+            "final_mixed_train_no_coco_segm": 3662412,
             "final_mixed_train_no_coco": 3681235,
             "final_flickr_separateGT_train_segm": 638214,
             "final_flickr_separateGT_train": 640704,

ultralytics/data/split.py CHANGED Viewed

@@ -135,4 +135,4 @@ def autosplit(
 if __name__ == "__main__":
-    split_classify_dataset("../datasets/caltech101")
+    split_classify_dataset("caltech101")

ultralytics/engine/exporter.py CHANGED Viewed

@@ -706,7 +706,16 @@ class Exporter:
     def export_paddle(self, prefix=colorstr("PaddlePaddle:")):
         """Export YOLO model to PaddlePaddle format."""
         assert not IS_JETSON, "Jetson Paddle exports not supported yet"
-        check_requirements(("paddlepaddle-gpu" if torch.cuda.is_available() else "paddlepaddle>=3.0.0", "x2paddle"))
+        check_requirements(
+            (
+                "paddlepaddle-gpu"
+                if torch.cuda.is_available()
+                else "paddlepaddle==3.0.0"  # pin 3.0.0 for ARM64
+                if ARM64
+                else "paddlepaddle>=3.0.0",
+                "x2paddle",
+            )
+        )
         import x2paddle  # noqa
         from x2paddle.convert import pytorch2paddle  # noqa
@@ -1495,7 +1504,7 @@ class NMSModel(torch.nn.Module):
         scores, classes = scores.max(dim=-1)
         self.args.max_det = min(pred.shape[1], self.args.max_det)  # in case num_anchors < max_det
         # (N, max_det, 4 coords + 1 class score + 1 class label + extra_shape).
-        out = torch.zeros(bs, self.args.max_det, boxes.shape[-1] + 2 + extra_shape, **kwargs)
+        out = torch.zeros(pred.shape[0], self.args.max_det, boxes.shape[-1] + 2 + extra_shape, **kwargs)
         for i in range(bs):
             box, cls, score, extra = boxes[i], classes[i], scores[i], extras[i]
             mask = score > self.args.conf

ultralytics/engine/model.py CHANGED Viewed

@@ -777,6 +777,8 @@ class Model(torch.nn.Module):
         checks.check_pip_update_available()
+        if isinstance(kwargs.get("pretrained", None), (str, Path)):
+            self.load(kwargs["pretrained"])  # load pretrained weights if provided
         overrides = YAML.load(checks.check_yaml(kwargs["cfg"])) if kwargs.get("cfg") else self.overrides
         custom = {
             # NOTE: handle the case when 'cfg' includes 'data'.

ultralytics/engine/results.py CHANGED Viewed

@@ -16,7 +16,6 @@ import torch
 from ultralytics.data.augment import LetterBox
 from ultralytics.utils import LOGGER, DataExportMixin, SimpleClass, ops
 from ultralytics.utils.plotting import Annotator, colors, save_one_box
-from ultralytics.utils.torch_utils import smart_inference_mode
 class BaseTensor(SimpleClass):
@@ -801,7 +800,7 @@ class Results(SimpleClass, DataExportMixin):
             decimals (int): Number of decimal places to round the output values to.
         Returns:
-            (List[Dict]): A list of dictionaries, each containing summarized information for a single detection
+            (List[Dict[str, Any]]): A list of dictionaries, each containing summarized information for a single detection
                 or classification result. The structure of each dictionary varies based on the task type
                 (classification or detection) and available information (boxes, masks, keypoints).
@@ -1204,7 +1203,6 @@ class Keypoints(BaseTensor):
         >>> keypoints_cpu = keypoints.cpu()  # Move keypoints to CPU
     """
-    @smart_inference_mode()  # avoid keypoints < conf in-place error
     def __init__(self, keypoints: Union[torch.Tensor, np.ndarray], orig_shape: Tuple[int, int]) -> None:
         """
         Initialize the Keypoints object with detection keypoints and original image dimensions.
@@ -1225,9 +1223,6 @@ class Keypoints(BaseTensor):
         """
         if keypoints.ndim == 2:
             keypoints = keypoints[None, :]
-        if keypoints.shape[2] == 3:  # x, y, conf
-            mask = keypoints[..., 2] < 0.5  # points with conf < 0.5 (not visible)
-            keypoints[..., :2][mask] = 0
         super().__init__(keypoints, orig_shape)
         self.has_visible = self.data.shape[-1] == 3

ultralytics/models/yolo/model.py CHANGED Viewed

@@ -406,18 +406,18 @@ class YOLOE(Model):
                 f"Expected equal number of bounding boxes and classes, but got {len(visual_prompts['bboxes'])} and "
                 f"{len(visual_prompts['cls'])} respectively"
             )
-        self.predictor = (predictor or self._smart_load("predictor"))(
-            overrides={
-                "task": self.model.task,
-                "mode": "predict",
-                "save": False,
-                "verbose": refer_image is None,
-                "batch": 1,
-            },
-            _callbacks=self.callbacks,
-        )
+            if not isinstance(self.predictor, yolo.yoloe.YOLOEVPDetectPredictor):
+                self.predictor = (predictor or yolo.yoloe.YOLOEVPDetectPredictor)(
+                    overrides={
+                        "task": self.model.task,
+                        "mode": "predict",
+                        "save": False,
+                        "verbose": refer_image is None,
+                        "batch": 1,
+                    },
+                    _callbacks=self.callbacks,
+                )
-        if len(visual_prompts):
             num_cls = (
                 max(len(set(c)) for c in visual_prompts["cls"])
                 if isinstance(source, list) and refer_image is None  # means multiple images
@@ -426,18 +426,19 @@ class YOLOE(Model):
             self.model.model[-1].nc = num_cls
             self.model.names = [f"object{i}" for i in range(num_cls)]
             self.predictor.set_prompts(visual_prompts.copy())
-        self.predictor.setup_model(model=self.model)
-        if refer_image is None and source is not None:
-            dataset = load_inference_source(source)
-            if dataset.mode in {"video", "stream"}:
-                # NOTE: set the first frame as refer image for videos/streams inference
-                refer_image = next(iter(dataset))[1][0]
-        if refer_image is not None and len(visual_prompts):
-            vpe = self.predictor.get_vpe(refer_image)
-            self.model.set_classes(self.model.names, vpe)
-            self.task = "segment" if isinstance(self.predictor, yolo.segment.SegmentationPredictor) else "detect"
-            self.predictor = None  # reset predictor
+            self.predictor.setup_model(model=self.model)
+            if refer_image is None and source is not None:
+                dataset = load_inference_source(source)
+                if dataset.mode in {"video", "stream"}:
+                    # NOTE: set the first frame as refer image for videos/streams inference
+                    refer_image = next(iter(dataset))[1][0]
+            if refer_image is not None:
+                vpe = self.predictor.get_vpe(refer_image)
+                self.model.set_classes(self.model.names, vpe)
+                self.task = "segment" if isinstance(self.predictor, yolo.segment.SegmentationPredictor) else "detect"
+                self.predictor = None  # reset predictor
+        elif isinstance(self.predictor, yolo.yoloe.YOLOEVPDetectPredictor):
+            self.predictor = None  # reset predictor if no visual prompts
         return super().predict(source, stream, **kwargs)

ultralytics/models/yolo/world/train.py CHANGED Viewed

@@ -158,7 +158,7 @@ class WorldTrainer(DetectionTrainer):
                 return txt_map
         LOGGER.info(f"Caching text embeddings to '{cache_path}'")
         assert self.model is not None
-        txt_feats = self.model.get_text_pe(texts, batch, cache_clip_model=False)
+        txt_feats = de_parallel(self.model).get_text_pe(texts, batch, cache_clip_model=False)
         txt_map = dict(zip(texts, txt_feats.squeeze(0)))
         torch.save(txt_map, cache_path)
         return txt_map

ultralytics/models/yolo/world/train_world.py CHANGED Viewed

@@ -35,12 +35,12 @@ class WorldTrainerFromScratch(WorldTrainer):
         ...         yolo_data=["Objects365.yaml"],
         ...         grounding_data=[
         ...             dict(
-        ...                 img_path="../datasets/flickr30k/images",
-        ...                 json_file="../datasets/flickr30k/final_flickr_separateGT_train.json",
+        ...                 img_path="flickr30k/images",
+        ...                 json_file="flickr30k/final_flickr_separateGT_train.json",
         ...             ),
         ...             dict(
-        ...                 img_path="../datasets/GQA/images",
-        ...                 json_file="../datasets/GQA/final_mixed_train_no_coco.json",
+        ...                 img_path="GQA/images",
+        ...                 json_file="GQA/final_mixed_train_no_coco.json",
         ...             ),
         ...         ],
         ...     ),
@@ -70,8 +70,8 @@ class WorldTrainerFromScratch(WorldTrainer):
             ...         yolo_data=["Objects365.yaml"],
             ...         grounding_data=[
             ...             dict(
-            ...                 img_path="../datasets/flickr30k/images",
-            ...                 json_file="../datasets/flickr30k/final_flickr_separateGT_train.json",
+            ...                 img_path="flickr30k/images",
+            ...                 json_file="flickr30k/final_flickr_separateGT_train.json",
             ...             ),
             ...         ],
             ...     ),

ultralytics/models/yolo/yoloe/train.py CHANGED Viewed

@@ -222,7 +222,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
                 return txt_map
         LOGGER.info(f"Caching text embeddings to '{cache_path}'")
         assert self.model is not None
-        txt_feats = self.model.get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
+        txt_feats = de_parallel(self.model).get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
         txt_map = dict(zip(texts, txt_feats.squeeze(0)))
         torch.save(txt_map, cache_path)
         return txt_map

ultralytics/nn/autobackend.py CHANGED Viewed

@@ -487,7 +487,13 @@ class AutoBackend(nn.Module):
         # PaddlePaddle
         elif paddle:
             LOGGER.info(f"Loading {w} for PaddlePaddle inference...")
-            check_requirements("paddlepaddle-gpu" if cuda else "paddlepaddle>=3.0.0")
+            check_requirements(
+                "paddlepaddle-gpu"
+                if torch.cuda.is_available()
+                else "paddlepaddle==3.0.0"  # pin 3.0.0 for ARM64
+                if ARM64
+                else "paddlepaddle>=3.0.0"
+            )
             import paddle.inference as pdi  # noqa
             w = Path(w)

ultralytics/solutions/heatmap.py CHANGED Viewed

@@ -124,6 +124,6 @@ class Heatmap(ObjectCounter):
             plot_im=plot_im,
             in_count=self.in_count,
             out_count=self.out_count,
-            classwise_count=dict(self.classwise_counts),
+            classwise_count=dict(self.classwise_count),
             total_tracks=len(self.track_ids),
         )

ultralytics/solutions/object_counter.py CHANGED Viewed

@@ -43,7 +43,7 @@ class ObjectCounter(BaseSolution):
         self.in_count = 0  # Counter for objects moving inward
         self.out_count = 0  # Counter for objects moving outward
         self.counted_ids = []  # List of IDs of objects that have been counted
-        self.classwise_counts = defaultdict(lambda: {"IN": 0, "OUT": 0})  # Dictionary for counts, categorized by class
+        self.classwise_count = defaultdict(lambda: {"IN": 0, "OUT": 0})  # Dictionary for counts, categorized by class
         self.region_initialized = False  # Flag indicating whether the region has been initialized
         self.show_in = self.CFG["show_in"]
@@ -85,17 +85,17 @@ class ObjectCounter(BaseSolution):
                     # Vertical region: Compare x-coordinates to determine direction
                     if current_centroid[0] > prev_position[0]:  # Moving right
                         self.in_count += 1
-                        self.classwise_counts[self.names[cls]]["IN"] += 1
+                        self.classwise_count[self.names[cls]]["IN"] += 1
                     else:  # Moving left
                         self.out_count += 1
-                        self.classwise_counts[self.names[cls]]["OUT"] += 1
+                        self.classwise_count[self.names[cls]]["OUT"] += 1
                 # Horizontal region: Compare y-coordinates to determine direction
                 elif current_centroid[1] > prev_position[1]:  # Moving downward
                     self.in_count += 1
-                    self.classwise_counts[self.names[cls]]["IN"] += 1
+                    self.classwise_count[self.names[cls]]["IN"] += 1
                 else:  # Moving upward
                     self.out_count += 1
-                    self.classwise_counts[self.names[cls]]["OUT"] += 1
+                    self.classwise_count[self.names[cls]]["OUT"] += 1
                 self.counted_ids.append(track_id)
         elif len(self.region) > 2:  # Polygonal region
@@ -111,10 +111,10 @@ class ObjectCounter(BaseSolution):
                     and current_centroid[1] > prev_position[1]
                 ):  # Moving right or downward
                     self.in_count += 1
-                    self.classwise_counts[self.names[cls]]["IN"] += 1
+                    self.classwise_count[self.names[cls]]["IN"] += 1
                 else:  # Moving left or upward
                     self.out_count += 1
-                    self.classwise_counts[self.names[cls]]["OUT"] += 1
+                    self.classwise_count[self.names[cls]]["OUT"] += 1
                 self.counted_ids.append(track_id)
     def display_counts(self, plot_im) -> None:
@@ -132,7 +132,7 @@ class ObjectCounter(BaseSolution):
         labels_dict = {
             str.capitalize(key): f"{'IN ' + str(value['IN']) if self.show_in else ''} "
             f"{'OUT ' + str(value['OUT']) if self.show_out else ''}".strip()
-            for key, value in self.classwise_counts.items()
+            for key, value in self.classwise_count.items()
             if value["IN"] != 0 or value["OUT"] != 0 and (self.show_in or self.show_out)
         }
         if labels_dict:
@@ -190,6 +190,6 @@ class ObjectCounter(BaseSolution):
             plot_im=plot_im,
             in_count=self.in_count,
             out_count=self.out_count,
-            classwise_count=dict(self.classwise_counts),
+            classwise_count=dict(self.classwise_count),
             total_tracks=len(self.track_ids),
         )

ultralytics/solutions/similarity_search.py CHANGED Viewed

@@ -9,14 +9,14 @@ from PIL import Image
 from ultralytics.data.utils import IMG_FORMATS
 from ultralytics.nn.text_model import build_text_model
-from ultralytics.solutions.solutions import BaseSolution
+from ultralytics.utils import LOGGER
 from ultralytics.utils.checks import check_requirements
 from ultralytics.utils.torch_utils import select_device
 os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"  # Avoid OpenMP conflict on some systems
-class VisualAISearch(BaseSolution):
+class VisualAISearch:
     """
     A semantic image search system that leverages OpenCLIP for generating high-quality image and text embeddings and
     FAISS for fast similarity-based retrieval.
@@ -48,19 +48,18 @@ class VisualAISearch(BaseSolution):
     def __init__(self, **kwargs: Any) -> None:
         """Initialize the VisualAISearch class with FAISS index and CLIP model."""
-        super().__init__(**kwargs)
         check_requirements("faiss-cpu")
         self.faiss = __import__("faiss")
         self.faiss_index = "faiss.index"
         self.data_path_npy = "paths.npy"
-        self.data_dir = Path(self.CFG["data"])
-        self.device = select_device(self.CFG["device"])
+        self.data_dir = Path(kwargs.get("data", "images"))
+        self.device = select_device(kwargs.get("device", "cpu"))
         if not self.data_dir.exists():
             from ultralytics.utils import ASSETS_URL
-            self.LOGGER.warning(f"{self.data_dir} not found. Downloading images.zip from {ASSETS_URL}/images.zip")
+            LOGGER.warning(f"{self.data_dir} not found. Downloading images.zip from {ASSETS_URL}/images.zip")
             from ultralytics.utils.downloads import safe_download
             safe_download(url=f"{ASSETS_URL}/images.zip", unzip=True, retry=3)
@@ -91,13 +90,13 @@ class VisualAISearch(BaseSolution):
         """
         # Check if the FAISS index and corresponding image paths already exist
         if Path(self.faiss_index).exists() and Path(self.data_path_npy).exists():
-            self.LOGGER.info("Loading existing FAISS index...")
+            LOGGER.info("Loading existing FAISS index...")
             self.index = self.faiss.read_index(self.faiss_index)  # Load the FAISS index from disk
             self.image_paths = np.load(self.data_path_npy)  # Load the saved image path list
             return  # Exit the function as the index is successfully loaded
         # If the index doesn't exist, start building it from scratch
-        self.LOGGER.info("Building FAISS index from images...")
+        LOGGER.info("Building FAISS index from images...")
         vectors = []  # List to store feature vectors of images
         # Iterate over all image files in the data directory
@@ -110,7 +109,7 @@ class VisualAISearch(BaseSolution):
                 vectors.append(self.extract_image_feature(file))
                 self.image_paths.append(file.name)  # Store the corresponding image name
             except Exception as e:
-                self.LOGGER.warning(f"Skipping {file.name}: {e}")
+                LOGGER.warning(f"Skipping {file.name}: {e}")
         # If no vectors were successfully created, raise an error
         if not vectors:
@@ -124,7 +123,7 @@ class VisualAISearch(BaseSolution):
         self.faiss.write_index(self.index, self.faiss_index)  # Save the newly built FAISS index to disk
         np.save(self.data_path_npy, np.array(self.image_paths))  # Save the list of image paths to disk
-        self.LOGGER.info(f"Indexed {len(self.image_paths)} images.")
+        LOGGER.info(f"Indexed {len(self.image_paths)} images.")
     def search(self, query: str, k: int = 30, similarity_thresh: float = 0.1) -> List[str]:
         """
@@ -152,9 +151,9 @@ class VisualAISearch(BaseSolution):
         ]
         results.sort(key=lambda x: x[1], reverse=True)
-        self.LOGGER.info("\nRanked Results:")
+        LOGGER.info("\nRanked Results:")
         for name, score in results:
-            self.LOGGER.info(f"  - {name} | Similarity: {score:.4f}")
+            LOGGER.info(f"  - {name} | Similarity: {score:.4f}")
         return [r[0] for r in results]

ultralytics/solutions/solutions.py CHANGED Viewed

@@ -81,60 +81,59 @@ class BaseSolution:
         self.CFG = vars(SolutionConfig().update(**kwargs))
         self.LOGGER = LOGGER  # Store logger object to be used in multiple solution classes
-        if self.__class__.__name__ != "VisualAISearch":
-            check_requirements("shapely>=2.0.0")
-            from shapely.geometry import LineString, Point, Polygon
-            from shapely.prepared import prep
-            self.LineString = LineString
-            self.Polygon = Polygon
-            self.Point = Point
-            self.prep = prep
-            self.annotator = None  # Initialize annotator
-            self.tracks = None
-            self.track_data = None
-            self.boxes = []
-            self.clss = []
-            self.track_ids = []
-            self.track_line = None
-            self.masks = None
-            self.r_s = None
-            self.frame_no = -1  # Only for logging
-            self.LOGGER.info(f"Ultralytics Solutions: ✅ {self.CFG}")
-            self.region = self.CFG["region"]  # Store region data for other classes usage
-            self.line_width = self.CFG["line_width"]
-            # Load Model and store additional information (classes, show_conf, show_label)
-            if self.CFG["model"] is None:
-                self.CFG["model"] = "yolo11n.pt"
-            self.model = YOLO(self.CFG["model"])
-            self.names = self.model.names
-            self.classes = self.CFG["classes"]
-            self.show_conf = self.CFG["show_conf"]
-            self.show_labels = self.CFG["show_labels"]
-            self.device = self.CFG["device"]
-            self.track_add_args = {  # Tracker additional arguments for advance configuration
-                k: self.CFG[k] for k in ["iou", "conf", "device", "max_det", "half", "tracker"]
-            }  # verbose must be passed to track method; setting it False in YOLO still logs the track information.
-            if is_cli and self.CFG["source"] is None:
-                d_s = "solutions_ci_demo.mp4" if "-pose" not in self.CFG["model"] else "solution_ci_pose_demo.mp4"
-                self.LOGGER.warning(f"source not provided. using default source {ASSETS_URL}/{d_s}")
-                from ultralytics.utils.downloads import safe_download
-                safe_download(f"{ASSETS_URL}/{d_s}")  # download source from ultralytics assets
-                self.CFG["source"] = d_s  # set default source
-            # Initialize environment and region setup
-            self.env_check = check_imshow(warn=True)
-            self.track_history = defaultdict(list)
-            self.profilers = (
-                ops.Profile(device=self.device),  # track
-                ops.Profile(device=self.device),  # solution
-            )
+        check_requirements("shapely>=2.0.0")
+        from shapely.geometry import LineString, Point, Polygon
+        from shapely.prepared import prep
+        self.LineString = LineString
+        self.Polygon = Polygon
+        self.Point = Point
+        self.prep = prep
+        self.annotator = None  # Initialize annotator
+        self.tracks = None
+        self.track_data = None
+        self.boxes = []
+        self.clss = []
+        self.track_ids = []
+        self.track_line = None
+        self.masks = None
+        self.r_s = None
+        self.frame_no = -1  # Only for logging
+        self.LOGGER.info(f"Ultralytics Solutions: ✅ {self.CFG}")
+        self.region = self.CFG["region"]  # Store region data for other classes usage
+        self.line_width = self.CFG["line_width"]
+        # Load Model and store additional information (classes, show_conf, show_label)
+        if self.CFG["model"] is None:
+            self.CFG["model"] = "yolo11n.pt"
+        self.model = YOLO(self.CFG["model"])
+        self.names = self.model.names
+        self.classes = self.CFG["classes"]
+        self.show_conf = self.CFG["show_conf"]
+        self.show_labels = self.CFG["show_labels"]
+        self.device = self.CFG["device"]
+        self.track_add_args = {  # Tracker additional arguments for advance configuration
+            k: self.CFG[k] for k in ["iou", "conf", "device", "max_det", "half", "tracker"]
+        }  # verbose must be passed to track method; setting it False in YOLO still logs the track information.
+        if is_cli and self.CFG["source"] is None:
+            d_s = "solutions_ci_demo.mp4" if "-pose" not in self.CFG["model"] else "solution_ci_pose_demo.mp4"
+            self.LOGGER.warning(f"source not provided. using default source {ASSETS_URL}/{d_s}")
+            from ultralytics.utils.downloads import safe_download
+            safe_download(f"{ASSETS_URL}/{d_s}")  # download source from ultralytics assets
+            self.CFG["source"] = d_s  # set default source
+        # Initialize environment and region setup
+        self.env_check = check_imshow(warn=True)
+        self.track_history = defaultdict(list)
+        self.profilers = (
+            ops.Profile(device=self.device),  # track
+            ops.Profile(device=self.device),  # solution
+        )
     def adjust_box_label(self, cls: int, conf: float, track_id: Optional[int] = None) -> Optional[str]:
         """
@@ -808,10 +807,10 @@ class SolutionResults:
         filled_slots (int): The number of filled slots in a monitored area.
         email_sent (bool): A flag indicating whether an email notification was sent.
         total_tracks (int): The total number of tracked objects.
-        region_counts (Dict): The count of objects within a specific region.
+        region_counts (Dict[str, int]): The count of objects within a specific region.
         speed_dict (Dict[str, float]): A dictionary containing speed information for tracked objects.
         total_crop_objects (int): Total number of cropped objects using ObjectCropper class.
-        speed (Dict): Performance timing information for tracking and solution processing.
+        speed (Dict[str, float]): Performance timing information for tracking and solution processing.
     """
     def __init__(self, **kwargs):

ultralytics/utils/__init__.py CHANGED Viewed

@@ -255,11 +255,8 @@ class DataExportMixin:
         Notes:
             Requires `lxml` package to be installed.
         """
-        from ultralytics.utils.checks import check_requirements
-        check_requirements("lxml")
         df = self.to_df(normalize=normalize, decimals=decimals)
-        return '<?xml version="1.0" encoding="utf-8"?>\n<root></root>' if df.empty else df.to_xml()
+        return '<?xml version="1.0" encoding="utf-8"?>\n<root></root>' if df.empty else df.to_xml(parser="etree")
     def to_html(self, normalize=False, decimals=5, index=False):
         """

ultralytics/utils/instance.py CHANGED Viewed

@@ -406,6 +406,8 @@ class Instances:
                 | (self.keypoints[..., 1] < 0)
                 | (self.keypoints[..., 1] > h)
             ] = 0.0
+            self.keypoints[..., 0] = self.keypoints[..., 0].clip(0, w)
+            self.keypoints[..., 1] = self.keypoints[..., 1].clip(0, h)
     def remove_zero_area_boxes(self):
         """

dgenerate-ultralytics-headless 8.3.159__py3-none-any.whl → 8.3.161__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.159py3-none-any.whl → 8.3.161py3-none-any.whl