PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.143__py3-none-any.whl → 8.3.144__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.143py3-none-any.whl → 8.3.144py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (148) hide show

{dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/METADATA +1 -1
dgenerate_ultralytics_headless-8.3.144.dist-info/RECORD +272 -0
tests/conftest.py +7 -24
tests/test_cli.py +1 -1
tests/test_cuda.py +7 -2
tests/test_engine.py +7 -8
tests/test_exports.py +16 -16
tests/test_integrations.py +1 -1
tests/test_solutions.py +11 -11
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +16 -13
ultralytics/data/annotator.py +6 -5
ultralytics/data/augment.py +127 -126
ultralytics/data/base.py +54 -51
ultralytics/data/build.py +47 -23
ultralytics/data/converter.py +47 -43
ultralytics/data/dataset.py +51 -50
ultralytics/data/loaders.py +77 -44
ultralytics/data/split.py +22 -9
ultralytics/data/split_dota.py +63 -39
ultralytics/data/utils.py +59 -39
ultralytics/engine/exporter.py +79 -27
ultralytics/engine/model.py +39 -39
ultralytics/engine/predictor.py +37 -28
ultralytics/engine/results.py +187 -157
ultralytics/engine/trainer.py +36 -19
ultralytics/engine/tuner.py +12 -9
ultralytics/engine/validator.py +7 -9
ultralytics/hub/__init__.py +11 -13
ultralytics/hub/auth.py +22 -2
ultralytics/hub/google/__init__.py +19 -19
ultralytics/hub/session.py +37 -51
ultralytics/hub/utils.py +19 -5
ultralytics/models/fastsam/model.py +30 -12
ultralytics/models/fastsam/predict.py +5 -6
ultralytics/models/fastsam/utils.py +3 -3
ultralytics/models/fastsam/val.py +10 -6
ultralytics/models/nas/model.py +9 -5
ultralytics/models/nas/predict.py +6 -6
ultralytics/models/nas/val.py +3 -3
ultralytics/models/rtdetr/model.py +7 -6
ultralytics/models/rtdetr/predict.py +14 -7
ultralytics/models/rtdetr/train.py +10 -4
ultralytics/models/rtdetr/val.py +36 -9
ultralytics/models/sam/amg.py +30 -12
ultralytics/models/sam/build.py +22 -22
ultralytics/models/sam/model.py +10 -9
ultralytics/models/sam/modules/blocks.py +76 -80
ultralytics/models/sam/modules/decoders.py +6 -8
ultralytics/models/sam/modules/encoders.py +23 -26
ultralytics/models/sam/modules/memory_attention.py +13 -1
ultralytics/models/sam/modules/sam.py +57 -26
ultralytics/models/sam/modules/tiny_encoder.py +232 -237
ultralytics/models/sam/modules/transformer.py +13 -13
ultralytics/models/sam/modules/utils.py +11 -19
ultralytics/models/sam/predict.py +114 -101
ultralytics/models/utils/loss.py +98 -77
ultralytics/models/utils/ops.py +116 -67
ultralytics/models/yolo/classify/predict.py +5 -5
ultralytics/models/yolo/classify/train.py +32 -28
ultralytics/models/yolo/classify/val.py +7 -8
ultralytics/models/yolo/detect/predict.py +1 -0
ultralytics/models/yolo/detect/train.py +15 -14
ultralytics/models/yolo/detect/val.py +37 -36
ultralytics/models/yolo/model.py +106 -23
ultralytics/models/yolo/obb/predict.py +3 -4
ultralytics/models/yolo/obb/train.py +14 -6
ultralytics/models/yolo/obb/val.py +29 -23
ultralytics/models/yolo/pose/predict.py +9 -8
ultralytics/models/yolo/pose/train.py +24 -16
ultralytics/models/yolo/pose/val.py +44 -26
ultralytics/models/yolo/segment/predict.py +5 -5
ultralytics/models/yolo/segment/train.py +11 -7
ultralytics/models/yolo/segment/val.py +2 -2
ultralytics/models/yolo/world/train.py +33 -23
ultralytics/models/yolo/world/train_world.py +11 -3
ultralytics/models/yolo/yoloe/predict.py +11 -11
ultralytics/models/yolo/yoloe/train.py +73 -21
ultralytics/models/yolo/yoloe/train_seg.py +10 -7
ultralytics/models/yolo/yoloe/val.py +42 -18
ultralytics/nn/autobackend.py +59 -15
ultralytics/nn/modules/__init__.py +4 -4
ultralytics/nn/modules/activation.py +4 -1
ultralytics/nn/modules/block.py +178 -111
ultralytics/nn/modules/conv.py +6 -5
ultralytics/nn/modules/head.py +469 -121
ultralytics/nn/modules/transformer.py +147 -58
ultralytics/nn/tasks.py +227 -20
ultralytics/nn/text_model.py +30 -33
ultralytics/solutions/ai_gym.py +1 -1
ultralytics/solutions/analytics.py +7 -4
ultralytics/solutions/config.py +10 -10
ultralytics/solutions/distance_calculation.py +11 -10
ultralytics/solutions/heatmap.py +1 -1
ultralytics/solutions/instance_segmentation.py +6 -3
ultralytics/solutions/object_blurrer.py +3 -3
ultralytics/solutions/object_counter.py +15 -7
ultralytics/solutions/object_cropper.py +3 -2
ultralytics/solutions/parking_management.py +29 -28
ultralytics/solutions/queue_management.py +6 -6
ultralytics/solutions/region_counter.py +10 -3
ultralytics/solutions/security_alarm.py +3 -3
ultralytics/solutions/similarity_search.py +85 -24
ultralytics/solutions/solutions.py +184 -75
ultralytics/solutions/speed_estimation.py +28 -22
ultralytics/solutions/streamlit_inference.py +17 -12
ultralytics/solutions/trackzone.py +4 -4
ultralytics/trackers/basetrack.py +16 -23
ultralytics/trackers/bot_sort.py +30 -20
ultralytics/trackers/byte_tracker.py +70 -64
ultralytics/trackers/track.py +4 -8
ultralytics/trackers/utils/gmc.py +31 -58
ultralytics/trackers/utils/kalman_filter.py +37 -37
ultralytics/trackers/utils/matching.py +1 -1
ultralytics/utils/__init__.py +105 -89
ultralytics/utils/autobatch.py +16 -3
ultralytics/utils/autodevice.py +54 -24
ultralytics/utils/benchmarks.py +42 -28
ultralytics/utils/callbacks/base.py +3 -3
ultralytics/utils/callbacks/clearml.py +9 -9
ultralytics/utils/callbacks/comet.py +67 -25
ultralytics/utils/callbacks/dvc.py +7 -10
ultralytics/utils/callbacks/mlflow.py +2 -5
ultralytics/utils/callbacks/neptune.py +7 -13
ultralytics/utils/callbacks/raytune.py +1 -1
ultralytics/utils/callbacks/tensorboard.py +5 -6
ultralytics/utils/callbacks/wb.py +14 -14
ultralytics/utils/checks.py +14 -13
ultralytics/utils/dist.py +5 -5
ultralytics/utils/downloads.py +94 -67
ultralytics/utils/errors.py +5 -5
ultralytics/utils/export.py +61 -47
ultralytics/utils/files.py +23 -22
ultralytics/utils/instance.py +48 -52
ultralytics/utils/loss.py +78 -40
ultralytics/utils/metrics.py +186 -130
ultralytics/utils/ops.py +186 -190
ultralytics/utils/patches.py +15 -17
ultralytics/utils/plotting.py +71 -27
ultralytics/utils/tal.py +21 -15
ultralytics/utils/torch_utils.py +53 -50
ultralytics/utils/triton.py +5 -4
ultralytics/utils/tuner.py +5 -5
dgenerate_ultralytics_headless-8.3.143.dist-info/RECORD +0 -272
{dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/WHEEL +0 -0
{dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/top_level.txt +0 -0

ultralytics/data/base.py CHANGED Viewed

@@ -7,7 +7,7 @@ import random
 from copy import deepcopy
 from multiprocessing.pool import ThreadPool
 from pathlib import Path
-from typing import Optional
+from typing import Any, Dict, List, Optional, Tuple, Union
 import cv2
 import numpy as np
@@ -32,6 +32,7 @@ class BaseDataset(Dataset):
         single_cls (bool): Whether to treat all objects as a single class.
         prefix (str): Prefix to print in log messages.
         fraction (float): Fraction of dataset to utilize.
+        channels (int): Number of channels in the images (1 for grayscale, 3 for RGB).
         cv2_flag (int): OpenCV flag for reading images.
         im_files (List[str]): List of image file paths.
         labels (List[Dict]): List of label data dictionaries.
@@ -48,6 +49,8 @@ class BaseDataset(Dataset):
         npy_files (List[Path]): List of numpy file paths.
         cache (str): Cache images to RAM or disk during training.
         transforms (callable): Image transformation function.
+        batch_shapes (np.ndarray): Batch shapes for rectangular training.
+        batch (np.ndarray): Batch index of each image.
     Methods:
         get_img_files: Read image files from the specified path.
@@ -66,39 +69,39 @@ class BaseDataset(Dataset):
     def __init__(
         self,
-        img_path,
-        imgsz=640,
-        cache=False,
-        augment=True,
-        hyp=DEFAULT_CFG,
-        prefix="",
-        rect=False,
-        batch_size=16,
-        stride=32,
-        pad=0.5,
-        single_cls=False,
-        classes=None,
-        fraction=1.0,
-        channels=3,
+        img_path: Union[str, List[str]],
+        imgsz: int = 640,
+        cache: Union[bool, str] = False,
+        augment: bool = True,
+        hyp: Dict[str, Any] = DEFAULT_CFG,
+        prefix: str = "",
+        rect: bool = False,
+        batch_size: int = 16,
+        stride: int = 32,
+        pad: float = 0.5,
+        single_cls: bool = False,
+        classes: Optional[List[int]] = None,
+        fraction: float = 1.0,
+        channels: int = 3,
     ):
         """
         Initialize BaseDataset with given configuration and options.
         Args:
-            img_path (str): Path to the folder containing images.
-            imgsz (int, optional): Image size for resizing.
-            cache (bool | str, optional): Cache images to RAM or disk during training.
-            augment (bool, optional): If True, data augmentation is applied.
-            hyp (dict, optional): Hyperparameters to apply data augmentation.
-            prefix (str, optional): Prefix to print in log messages.
-            rect (bool, optional): If True, rectangular training is used.
-            batch_size (int, optional): Size of batches.
-            stride (int, optional): Stride used in the model.
-            pad (float, optional): Padding value.
-            single_cls (bool, optional): If True, single class training is used.
-            classes (list, optional): List of included classes.
-            fraction (float, optional): Fraction of dataset to utilize.
-            channels (int, optional): Number of channels in the images (1 for grayscale, 3 for RGB).
+            img_path (str | List[str]): Path to the folder containing images or list of image paths.
+            imgsz (int): Image size for resizing.
+            cache (bool | str): Cache images to RAM or disk during training.
+            augment (bool): If True, data augmentation is applied.
+            hyp (Dict[str, Any]): Hyperparameters to apply data augmentation.
+            prefix (str): Prefix to print in log messages.
+            rect (bool): If True, rectangular training is used.
+            batch_size (int): Size of batches.
+            stride (int): Stride used in the model.
+            pad (float): Padding value.
+            single_cls (bool): If True, single class training is used.
+            classes (List[int], optional): List of included classes.
+            fraction (float): Fraction of dataset to utilize.
+            channels (int): Number of channels in the images (1 for grayscale, 3 for RGB).
         """
         super().__init__()
         self.img_path = img_path
@@ -142,7 +145,7 @@ class BaseDataset(Dataset):
         # Transforms
         self.transforms = self.build_transforms(hyp=hyp)
-    def get_img_files(self, img_path):
+    def get_img_files(self, img_path: Union[str, List[str]]) -> List[str]:
         """
         Read image files from the specified path.
@@ -180,12 +183,12 @@ class BaseDataset(Dataset):
         check_file_speeds(im_files, prefix=self.prefix)  # check image read speeds
         return im_files
-    def update_labels(self, include_class: Optional[list]):
+    def update_labels(self, include_class: Optional[List[int]]) -> None:
         """
         Update labels to include only specified classes.
         Args:
-            include_class (list, optional): List of classes to include. If None, all classes are included.
+            include_class (List[int], optional): List of classes to include. If None, all classes are included.
         """
         include_class_array = np.array(include_class).reshape(1, -1)
         for i in range(len(self.labels)):
@@ -204,18 +207,18 @@ class BaseDataset(Dataset):
             if self.single_cls:
                 self.labels[i]["cls"][:, 0] = 0
-    def load_image(self, i, rect_mode=True):
+    def load_image(self, i: int, rect_mode: bool = True) -> Tuple[np.ndarray, Tuple[int, int], Tuple[int, int]]:
         """
         Load an image from dataset index 'i'.
         Args:
             i (int): Index of the image to load.
-            rect_mode (bool, optional): Whether to use rectangular resizing.
+            rect_mode (bool): Whether to use rectangular resizing.
         Returns:
-            (np.ndarray): Loaded image as a NumPy array.
-            (Tuple[int, int]): Original image dimensions in (height, width) format.
-            (Tuple[int, int]): Resized image dimensions in (height, width) format.
+            im (np.ndarray): Loaded image as a NumPy array.
+            hw_original (Tuple[int, int]): Original image dimensions in (height, width) format.
+            hw_resized (Tuple[int, int]): Resized image dimensions in (height, width) format.
         Raises:
             FileNotFoundError: If the image file is not found.
@@ -258,7 +261,7 @@ class BaseDataset(Dataset):
         return self.ims[i], self.im_hw0[i], self.im_hw[i]
-    def cache_images(self):
+    def cache_images(self) -> None:
         """Cache images to memory or disk for faster training."""
         b, gb = 0, 1 << 30  # bytes of cached images, bytes per gigabytes
         fcn, storage = (self.cache_images_to_disk, "Disk") if self.cache == "disk" else (self.load_image, "RAM")
@@ -274,18 +277,18 @@ class BaseDataset(Dataset):
                 pbar.desc = f"{self.prefix}Caching images ({b / gb:.1f}GB {storage})"
             pbar.close()
-    def cache_images_to_disk(self, i):
+    def cache_images_to_disk(self, i: int) -> None:
         """Save an image as an *.npy file for faster loading."""
         f = self.npy_files[i]
         if not f.exists():
             np.save(f.as_posix(), imread(self.im_files[i]), allow_pickle=False)
-    def check_cache_disk(self, safety_margin=0.5):
+    def check_cache_disk(self, safety_margin: float = 0.5) -> bool:
         """
         Check if there's enough disk space for caching images.
         Args:
-            safety_margin (float, optional): Safety margin factor for disk space calculation.
+            safety_margin (float): Safety margin factor for disk space calculation.
         Returns:
             (bool): True if there's enough disk space, False otherwise.
@@ -316,12 +319,12 @@ class BaseDataset(Dataset):
             return False
         return True
-    def check_cache_ram(self, safety_margin=0.5):
+    def check_cache_ram(self, safety_margin: float = 0.5) -> bool:
         """
         Check if there's enough RAM for caching images.
         Args:
-            safety_margin (float, optional): Safety margin factor for RAM calculation.
+            safety_margin (float): Safety margin factor for RAM calculation.
         Returns:
             (bool): True if there's enough RAM, False otherwise.
@@ -346,7 +349,7 @@ class BaseDataset(Dataset):
             return False
         return True
-    def set_rectangle(self):
+    def set_rectangle(self) -> None:
         """Set the shape of bounding boxes for YOLO detections as rectangles."""
         bi = np.floor(np.arange(self.ni) / self.batch_size).astype(int)  # batch index
         nb = bi[-1] + 1  # number of batches
@@ -371,11 +374,11 @@ class BaseDataset(Dataset):
         self.batch_shapes = np.ceil(np.array(shapes) * self.imgsz / self.stride + self.pad).astype(int) * self.stride
         self.batch = bi  # batch index of image
-    def __getitem__(self, index):
+    def __getitem__(self, index: int) -> Dict[str, Any]:
         """Return transformed label information for given index."""
         return self.transforms(self.get_image_and_label(index))
-    def get_image_and_label(self, index):
+    def get_image_and_label(self, index: int) -> Dict[str, Any]:
         """
         Get and return label information from the dataset.
@@ -383,7 +386,7 @@ class BaseDataset(Dataset):
             index (int): Index of the image to retrieve.
         Returns:
-            (dict): Label dictionary with image and metadata.
+            (Dict[str, Any]): Label dictionary with image and metadata.
         """
         label = deepcopy(self.labels[index])  # requires deepcopy() https://github.com/ultralytics/ultralytics/pull/1948
         label.pop("shape", None)  # shape is for rect, remove it
@@ -396,15 +399,15 @@ class BaseDataset(Dataset):
             label["rect_shape"] = self.batch_shapes[self.batch[index]]
         return self.update_labels_info(label)
-    def __len__(self):
+    def __len__(self) -> int:
         """Return the length of the labels list for the dataset."""
         return len(self.labels)
-    def update_labels_info(self, label):
+    def update_labels_info(self, label: Dict[str, Any]) -> Dict[str, Any]:
         """Custom your label format here."""
         return label
-    def build_transforms(self, hyp=None):
+    def build_transforms(self, hyp: Optional[Dict[str, Any]] = None):
         """
         Users can customize augmentations here.
@@ -418,7 +421,7 @@ class BaseDataset(Dataset):
         """
         raise NotImplementedError
-    def get_labels(self):
+    def get_labels(self) -> List[Dict[str, Any]]:
         """
         Users can customize their own format here.

ultralytics/data/build.py CHANGED Viewed

@@ -3,6 +3,7 @@
 import os
 import random
 from pathlib import Path
+from typing import Any, Iterator
 import numpy as np
 import torch
@@ -27,33 +28,40 @@ from ultralytics.utils.checks import check_file
 class InfiniteDataLoader(dataloader.DataLoader):
     """
-    Dataloader that reuses workers.
+    Dataloader that reuses workers for infinite iteration.
     This dataloader extends the PyTorch DataLoader to provide infinite recycling of workers, which improves efficiency
-    for training loops that need to iterate through the dataset multiple times.
+    for training loops that need to iterate through the dataset multiple times without recreating workers.
     Attributes:
         batch_sampler (_RepeatSampler): A sampler that repeats indefinitely.
         iterator (Iterator): The iterator from the parent DataLoader.
     Methods:
-        __len__: Returns the length of the batch sampler's sampler.
-        __iter__: Creates a sampler that repeats indefinitely.
-        __del__: Ensures workers are properly terminated.
-        reset: Resets the iterator, useful when modifying dataset settings during training.
+        __len__: Return the length of the batch sampler's sampler.
+        __iter__: Create a sampler that repeats indefinitely.
+        __del__: Ensure workers are properly terminated.
+        reset: Reset the iterator, useful when modifying dataset settings during training.
+    Examples:
+        Create an infinite dataloader for training
+        >>> dataset = YOLODataset(...)
+        >>> dataloader = InfiniteDataLoader(dataset, batch_size=16, shuffle=True)
+        >>> for batch in dataloader:  # Infinite iteration
+        >>>     train_step(batch)
     """
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args: Any, **kwargs: Any):
         """Initialize the InfiniteDataLoader with the same arguments as DataLoader."""
         super().__init__(*args, **kwargs)
         object.__setattr__(self, "batch_sampler", _RepeatSampler(self.batch_sampler))
         self.iterator = super().__iter__()
-    def __len__(self):
+    def __len__(self) -> int:
         """Return the length of the batch sampler's sampler."""
         return len(self.batch_sampler.sampler)
-    def __iter__(self):
+    def __iter__(self) -> Iterator:
         """Create an iterator that yields indefinitely from the underlying iterator."""
         for _ in range(len(self)):
             yield next(self.iterator)
@@ -77,26 +85,26 @@ class InfiniteDataLoader(dataloader.DataLoader):
 class _RepeatSampler:
     """
-    Sampler that repeats forever.
+    Sampler that repeats forever for infinite iteration.
     This sampler wraps another sampler and yields its contents indefinitely, allowing for infinite iteration
-    over a dataset.
+    over a dataset without recreating the sampler.
     Attributes:
         sampler (Dataset.sampler): The sampler to repeat.
     """
-    def __init__(self, sampler):
+    def __init__(self, sampler: Any):
         """Initialize the _RepeatSampler with a sampler to repeat indefinitely."""
         self.sampler = sampler
-    def __iter__(self):
+    def __iter__(self) -> Iterator:
         """Iterate over the sampler indefinitely, yielding its contents."""
         while True:
             yield from iter(self.sampler)
-def seed_worker(worker_id):  # noqa
+def seed_worker(worker_id: int):  # noqa
     """Set dataloader worker seed for reproducibility across worker processes."""
     worker_seed = torch.initial_seed() % 2**32
     np.random.seed(worker_seed)
@@ -146,7 +154,7 @@ def build_grounding(cfg, img_path, json_file, batch, mode="train", rect=False, s
     )
-def build_dataloader(dataset, batch, workers, shuffle=True, rank=-1):
+def build_dataloader(dataset, batch: int, workers: int, shuffle: bool = True, rank: int = -1):
     """
     Create and return an InfiniteDataLoader or DataLoader for training or validation.
@@ -154,11 +162,16 @@ def build_dataloader(dataset, batch, workers, shuffle=True, rank=-1):
         dataset (Dataset): Dataset to load data from.
         batch (int): Batch size for the dataloader.
         workers (int): Number of worker threads for loading data.
-        shuffle (bool): Whether to shuffle the dataset.
-        rank (int): Process rank in distributed training. -1 for single-GPU training.
+        shuffle (bool, optional): Whether to shuffle the dataset.
+        rank (int, optional): Process rank in distributed training. -1 for single-GPU training.
     Returns:
         (InfiniteDataLoader): A dataloader that can be used for training or validation.
+    Examples:
+        Create a dataloader for training
+        >>> dataset = YOLODataset(...)
+        >>> dataloader = build_dataloader(dataset, batch=16, workers=4, shuffle=True)
     """
     batch = min(batch, len(dataset))
     nd = torch.cuda.device_count()  # number of CUDA devices
@@ -184,18 +197,22 @@ def check_source(source):
     Check the type of input source and return corresponding flag values.
     Args:
-        source (str | int | Path | List | Tuple | np.ndarray | PIL.Image | torch.Tensor): The input source to check.
+        source (str | int | Path | list | tuple | np.ndarray | PIL.Image | torch.Tensor): The input source to check.
     Returns:
-        source (str | int | Path | List | Tuple | np.ndarray | PIL.Image | torch.Tensor): The processed source.
+        source (str | int | Path | list | tuple | np.ndarray | PIL.Image | torch.Tensor): The processed source.
         webcam (bool): Whether the source is a webcam.
         screenshot (bool): Whether the source is a screenshot.
         from_img (bool): Whether the source is an image or list of images.
         in_memory (bool): Whether the source is an in-memory object.
         tensor (bool): Whether the source is a torch.Tensor.
-    Raises:
-        TypeError: If the source type is unsupported.
+    Examples:
+        Check a file path source
+        >>> source, webcam, screenshot, from_img, in_memory, tensor = check_source("image.jpg")
+        Check a webcam source
+        >>> source, webcam, screenshot, from_img, in_memory, tensor = check_source(0)
     """
     webcam, screenshot, from_img, in_memory, tensor = False, False, False, False, False
     if isinstance(source, (str, int, Path)):  # int for local usb camera
@@ -222,7 +239,7 @@ def check_source(source):
     return source, webcam, screenshot, from_img, in_memory, tensor
-def load_inference_source(source=None, batch=1, vid_stride=1, buffer=False, channels=3):
+def load_inference_source(source=None, batch: int = 1, vid_stride: int = 1, buffer: bool = False, channels: int = 3):
     """
     Load an inference source for object detection and apply necessary transformations.
@@ -231,10 +248,17 @@ def load_inference_source(source=None, batch=1, vid_stride=1, buffer=False, chan
         batch (int, optional): Batch size for dataloaders.
         vid_stride (int, optional): The frame interval for video sources.
         buffer (bool, optional): Whether stream frames will be buffered.
-        channels (int): The number of input channels for the model.
+        channels (int, optional): The number of input channels for the model.
     Returns:
         (Dataset): A dataset object for the specified input source with attached source_type attribute.
+    Examples:
+        Load an image source for inference
+        >>> dataset = load_inference_source("image.jpg", batch=1)
+        Load a video stream source
+        >>> dataset = load_inference_source("rtsp://example.com/stream", vid_stride=2)
     """
     source, stream, screenshot, from_img, in_memory, tensor = check_source(source)
     source_type = source.source_type if in_memory else SourceTypes(stream, screenshot, from_img, tensor)

ultralytics/data/converter.py CHANGED Viewed

@@ -6,6 +6,7 @@ import shutil
 from collections import defaultdict
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
+from typing import List, Optional, Union
 import cv2
 import numpy as np
@@ -16,13 +17,13 @@ from ultralytics.utils.downloads import download, zip_directory
 from ultralytics.utils.files import increment_path
-def coco91_to_coco80_class():
+def coco91_to_coco80_class() -> List[int]:
     """
-    Converts 91-index COCO class IDs to 80-index COCO class IDs.
+    Convert 91-index COCO class IDs to 80-index COCO class IDs.
     Returns:
-        (list): A list of 91 class IDs where the index represents the 80-index class ID and the value is the
-            corresponding 91-index class ID.
+        (List[int]): A list of 91 class IDs where the index represents the 80-index class ID and the value
+            is the corresponding 91-index class ID.
     """
     return [
         0,
@@ -119,10 +120,15 @@ def coco91_to_coco80_class():
     ]
-def coco80_to_coco91_class():
+def coco80_to_coco91_class() -> List[int]:
     r"""
-    Converts 80-index (val2014) to 91-index (paper).
-    For details see https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/.
+    Convert 80-index (val2014) to 91-index (paper).
+    Returns:
+        (List[int]): A list of 80 class IDs where each value is the corresponding 91-index class ID.
+    References:
+        https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
     Examples:
         >>> import numpy as np
@@ -220,15 +226,15 @@ def coco80_to_coco91_class():
 def convert_coco(
-    labels_dir="../coco/annotations/",
-    save_dir="coco_converted/",
-    use_segments=False,
-    use_keypoints=False,
-    cls91to80=True,
-    lvis=False,
+    labels_dir: str = "../coco/annotations/",
+    save_dir: str = "coco_converted/",
+    use_segments: bool = False,
+    use_keypoints: bool = False,
+    cls91to80: bool = True,
+    lvis: bool = False,
 ):
     """
-    Converts COCO dataset annotations to a YOLO annotation format suitable for training YOLO models.
+    Convert COCO dataset annotations to a YOLO annotation format suitable for training YOLO models.
     Args:
         labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
@@ -246,15 +252,8 @@ def convert_coco(
         Convert LVIS annotations to YOLO format
         >>> convert_coco(
-        >>>    "../datasets/lvis/annotations/",
-        ...     use_segments=True,
-        ...     use_keypoints=False,
-        ...     cls91to80=False,
-        ...     lvis=True
+        ...     "../datasets/lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True
         ... )
-    Output:
-        Generates output files in the specified output directory.
     """
     # Create dataset directory
     save_dir = increment_path(save_dir)  # increment if save directory already exists
@@ -347,12 +346,12 @@ def convert_coco(
     LOGGER.info(f"{'LVIS' if lvis else 'COCO'} data converted successfully.\nResults saved to {save_dir.resolve()}")
-def convert_segment_masks_to_yolo_seg(masks_dir, output_dir, classes):
+def convert_segment_masks_to_yolo_seg(masks_dir: str, output_dir: str, classes: int):
     """
-    Converts a dataset of segmentation mask images to the YOLO segmentation format.
+    Convert a dataset of segmentation mask images to the YOLO segmentation format.
-    This function takes the directory containing the binary format mask images and converts them into YOLO segmentation format.
-    The converted masks are saved in the specified output directory.
+    This function takes the directory containing the binary format mask images and converts them into YOLO segmentation
+    format. The converted masks are saved in the specified output directory.
     Args:
         masks_dir (str): The path to the directory where all mask images (png, jpg) are stored.
@@ -425,7 +424,7 @@ def convert_segment_masks_to_yolo_seg(masks_dir, output_dir, classes):
 def convert_dota_to_yolo_obb(dota_root_path: str):
     """
-    Converts DOTA dataset annotations to YOLO OBB (Oriented Bounding Box) format.
+    Convert DOTA dataset annotations to YOLO OBB (Oriented Bounding Box) format.
     The function processes images in the 'train' and 'val' folders of the DOTA dataset. For each image, it reads the
     associated label from the original labels directory and writes new labels in YOLO OBB format to a new directory.
@@ -479,8 +478,8 @@ def convert_dota_to_yolo_obb(dota_root_path: str):
         "helipad": 17,
     }
-    def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir):
-        """Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory."""
+    def convert_label(image_name: str, image_width: int, image_height: int, orig_label_dir: Path, save_dir: Path):
+        """Convert a single image's DOTA annotation to YOLO OBB format and save it to a specified directory."""
         orig_label_path = orig_label_dir / f"{image_name}.txt"
         save_path = save_dir / f"{image_name}.txt"
@@ -516,7 +515,7 @@ def convert_dota_to_yolo_obb(dota_root_path: str):
             convert_label(image_name_without_ext, w, h, orig_label_dir, save_dir)
-def min_index(arr1, arr2):
+def min_index(arr1: np.ndarray, arr2: np.ndarray):
     """
     Find a pair of indexes with the shortest distance between two arrays of 2D points.
@@ -525,15 +524,17 @@ def min_index(arr1, arr2):
         arr2 (np.ndarray): A NumPy array of shape (M, 2) representing M 2D points.
     Returns:
-        (tuple): A tuple containing the indexes of the points with the shortest distance in arr1 and arr2 respectively.
+        idx1 (int): Index of the point in arr1 with the shortest distance.
+        idx2 (int): Index of the point in arr2 with the shortest distance.
     """
     dis = ((arr1[:, None, :] - arr2[None, :, :]) ** 2).sum(-1)
     return np.unravel_index(np.argmin(dis, axis=None), dis.shape)
-def merge_multi_segment(segments):
+def merge_multi_segment(segments: List[List]):
     """
     Merge multiple segments into one list by connecting the coordinates with the minimum distance between each segment.
     This function connects these coordinates with a thin line to merge all segments into one.
     Args:
@@ -581,17 +582,19 @@ def merge_multi_segment(segments):
     return s
-def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt", device=None):
+def yolo_bbox2segment(
+    im_dir: Union[str, Path], save_dir: Optional[Union[str, Path]] = None, sam_model: str = "sam_b.pt", device=None
+):
     """
-    Converts existing object detection dataset (bounding boxes) to segmentation dataset or oriented bounding box (OBB)
-    in YOLO format. Generates segmentation data using SAM auto-annotator as needed.
+    Convert existing object detection dataset (bounding boxes) to segmentation dataset or oriented bounding box (OBB) in
+    YOLO format. Generate segmentation data using SAM auto-annotator as needed.
     Args:
         im_dir (str | Path): Path to image directory to convert.
-        save_dir (str | Path): Path to save the generated labels, labels will be saved
+        save_dir (str | Path, optional): Path to save the generated labels, labels will be saved
             into `labels-segment` in the same directory level of `im_dir` if save_dir is None.
         sam_model (str): Segmentation model to use for intermediate segmentation data.
-        device (int | str): The specific device to run SAM models.
+        device (int | str, optional): The specific device to run SAM models.
     Notes:
         The input directory structure assumed for dataset:
@@ -647,7 +650,7 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt", device=None):
 def create_synthetic_coco_dataset():
     """
-    Creates a synthetic COCO dataset with random images based on filenames from label lists.
+    Create a synthetic COCO dataset with random images based on filenames from label lists.
     This function downloads COCO labels, reads image filenames from label list files,
     creates synthetic images for train2017 and val2017 subsets, and organizes
@@ -664,8 +667,8 @@ def create_synthetic_coco_dataset():
         - Reads image filenames from train2017.txt and val2017.txt files.
     """
-    def create_synthetic_image(image_file):
-        """Generates synthetic images with random sizes and colors for dataset augmentation or testing purposes."""
+    def create_synthetic_image(image_file: Path):
+        """Generate synthetic images with random sizes and colors for dataset augmentation or testing purposes."""
         if not image_file.exists():
             size = (random.randint(480, 640), random.randint(480, 640))
             Image.new(
@@ -703,7 +706,7 @@ def create_synthetic_coco_dataset():
     LOGGER.info("Synthetic COCO dataset created successfully.")
-def convert_to_multispectral(path, n_channels=10, replace=False, zip=False):
+def convert_to_multispectral(path: Union[str, Path], n_channels: int = 10, replace: bool = False, zip: bool = False):
     """
     Convert RGB images to multispectral images by interpolating across wavelength bands.
@@ -717,9 +720,10 @@ def convert_to_multispectral(path, n_channels=10, replace=False, zip=False):
         zip (bool): Whether to zip the converted images into a zip file.
     Examples:
-        >>> # Convert a single image
+        Convert a single image
         >>> convert_to_multispectral("path/to/image.jpg", n_channels=10)
-        >>> # Convert a dataset
+        Convert a dataset
         >>> convert_to_multispectral("../datasets/coco8", n_channels=10)
     """
     from scipy.interpolate import interp1d

dgenerate-ultralytics-headless 8.3.143__py3-none-any.whl → 8.3.144__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.143py3-none-any.whl → 8.3.144py3-none-any.whl