PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.194__py3-none-any.whl → 8.3.196__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.194py3-none-any.whl → 8.3.196py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

{dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/METADATA +1 -2
{dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/RECORD +107 -106
tests/test_python.py +1 -1
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +9 -8
ultralytics/cfg/default.yaml +1 -0
ultralytics/data/annotator.py +1 -1
ultralytics/data/augment.py +76 -76
ultralytics/data/base.py +12 -12
ultralytics/data/build.py +5 -1
ultralytics/data/converter.py +4 -4
ultralytics/data/dataset.py +7 -7
ultralytics/data/loaders.py +15 -15
ultralytics/data/split_dota.py +10 -10
ultralytics/data/utils.py +12 -12
ultralytics/engine/exporter.py +19 -31
ultralytics/engine/model.py +13 -13
ultralytics/engine/predictor.py +16 -14
ultralytics/engine/results.py +21 -21
ultralytics/engine/trainer.py +15 -4
ultralytics/engine/validator.py +6 -2
ultralytics/hub/google/__init__.py +2 -2
ultralytics/hub/session.py +7 -7
ultralytics/models/fastsam/model.py +5 -5
ultralytics/models/fastsam/predict.py +11 -11
ultralytics/models/nas/model.py +1 -1
ultralytics/models/rtdetr/predict.py +2 -2
ultralytics/models/rtdetr/val.py +4 -4
ultralytics/models/sam/amg.py +6 -6
ultralytics/models/sam/build.py +9 -9
ultralytics/models/sam/model.py +7 -7
ultralytics/models/sam/modules/blocks.py +6 -6
ultralytics/models/sam/modules/decoders.py +1 -1
ultralytics/models/sam/modules/encoders.py +27 -27
ultralytics/models/sam/modules/sam.py +4 -4
ultralytics/models/sam/modules/tiny_encoder.py +18 -18
ultralytics/models/sam/modules/utils.py +8 -8
ultralytics/models/sam/predict.py +63 -63
ultralytics/models/utils/loss.py +22 -22
ultralytics/models/utils/ops.py +8 -8
ultralytics/models/yolo/classify/predict.py +2 -2
ultralytics/models/yolo/classify/train.py +9 -19
ultralytics/models/yolo/classify/val.py +4 -4
ultralytics/models/yolo/detect/predict.py +3 -3
ultralytics/models/yolo/detect/train.py +38 -12
ultralytics/models/yolo/detect/val.py +38 -37
ultralytics/models/yolo/model.py +6 -6
ultralytics/models/yolo/obb/train.py +1 -10
ultralytics/models/yolo/obb/val.py +13 -13
ultralytics/models/yolo/pose/train.py +1 -9
ultralytics/models/yolo/pose/val.py +12 -12
ultralytics/models/yolo/segment/predict.py +4 -4
ultralytics/models/yolo/segment/train.py +2 -10
ultralytics/models/yolo/segment/val.py +15 -15
ultralytics/models/yolo/world/train.py +13 -13
ultralytics/models/yolo/world/train_world.py +3 -3
ultralytics/models/yolo/yoloe/predict.py +4 -4
ultralytics/models/yolo/yoloe/train.py +7 -16
ultralytics/models/yolo/yoloe/val.py +0 -7
ultralytics/nn/autobackend.py +2 -2
ultralytics/nn/modules/block.py +6 -6
ultralytics/nn/modules/conv.py +2 -2
ultralytics/nn/modules/head.py +6 -5
ultralytics/nn/tasks.py +17 -15
ultralytics/nn/text_model.py +3 -3
ultralytics/solutions/ai_gym.py +2 -2
ultralytics/solutions/analytics.py +3 -3
ultralytics/solutions/config.py +5 -5
ultralytics/solutions/distance_calculation.py +2 -2
ultralytics/solutions/heatmap.py +1 -1
ultralytics/solutions/instance_segmentation.py +4 -4
ultralytics/solutions/object_counter.py +4 -4
ultralytics/solutions/parking_management.py +7 -7
ultralytics/solutions/queue_management.py +3 -3
ultralytics/solutions/region_counter.py +4 -4
ultralytics/solutions/similarity_search.py +2 -2
ultralytics/solutions/solutions.py +48 -48
ultralytics/solutions/streamlit_inference.py +1 -1
ultralytics/solutions/trackzone.py +4 -4
ultralytics/solutions/vision_eye.py +1 -1
ultralytics/trackers/byte_tracker.py +11 -11
ultralytics/trackers/utils/gmc.py +3 -3
ultralytics/trackers/utils/matching.py +5 -5
ultralytics/utils/__init__.py +30 -19
ultralytics/utils/autodevice.py +2 -2
ultralytics/utils/benchmarks.py +10 -10
ultralytics/utils/callbacks/clearml.py +1 -1
ultralytics/utils/callbacks/comet.py +5 -5
ultralytics/utils/callbacks/tensorboard.py +2 -2
ultralytics/utils/checks.py +7 -5
ultralytics/utils/cpu.py +90 -0
ultralytics/utils/dist.py +1 -1
ultralytics/utils/downloads.py +2 -2
ultralytics/utils/export.py +5 -5
ultralytics/utils/instance.py +2 -2
ultralytics/utils/loss.py +14 -8
ultralytics/utils/metrics.py +35 -35
ultralytics/utils/nms.py +4 -4
ultralytics/utils/ops.py +1 -1
ultralytics/utils/patches.py +2 -2
ultralytics/utils/plotting.py +10 -9
ultralytics/utils/torch_utils.py +113 -15
ultralytics/utils/triton.py +5 -5
{dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/WHEEL +0 -0
{dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/top_level.txt +0 -0

ultralytics/models/sam/predict.py CHANGED Viewed

@@ -51,7 +51,7 @@ class Predictor(BasePredictor):
         device (torch.device): The device (CPU or GPU) on which the model is loaded.
         im (torch.Tensor): The preprocessed input image.
         features (torch.Tensor): Extracted image features.
-        prompts (Dict[str, Any]): Dictionary to store various types of prompts (e.g., bboxes, points, masks).
+        prompts (dict[str, Any]): Dictionary to store various types of prompts (e.g., bboxes, points, masks).
         segment_all (bool): Flag to indicate if full image segmentation should be performed.
         mean (torch.Tensor): Mean values for image normalization.
         std (torch.Tensor): Standard deviation values for image normalization.
@@ -116,7 +116,7 @@ class Predictor(BasePredictor):
         torch.Tensor and list of np.ndarray as input formats.
         Args:
-            im (torch.Tensor | List[np.ndarray]): Input image(s) in BCHW tensor format or list of HWC numpy arrays.
+            im (torch.Tensor | list[np.ndarray]): Input image(s) in BCHW tensor format or list of HWC numpy arrays.
         Returns:
             (torch.Tensor): The preprocessed image tensor, normalized and converted to the appropriate dtype.
@@ -149,10 +149,10 @@ class Predictor(BasePredictor):
         Currently, batched inference is not supported; hence the list length should be 1.
         Args:
-            im (List[np.ndarray]): List containing a single image in HWC numpy array format.
+            im (list[np.ndarray]): List containing a single image in HWC numpy array format.
         Returns:
-            (List[np.ndarray]): List containing the transformed image.
+            (list[np.ndarray]): List containing the transformed image.
         Raises:
             AssertionError: If the input list contains more than one image.
@@ -177,9 +177,9 @@ class Predictor(BasePredictor):
         Args:
             im (torch.Tensor): The preprocessed input image in tensor format, with shape (N, C, H, W).
-            bboxes (np.ndarray | List | None): Bounding boxes with shape (N, 4), in XYXY format.
-            points (np.ndarray | List | None): Points indicating object locations with shape (N, 2), in pixels.
-            labels (np.ndarray | List | None): Labels for point prompts, shape (N,). 1 = foreground, 0 = background.
+            bboxes (np.ndarray | list | None): Bounding boxes with shape (N, 4), in XYXY format.
+            points (np.ndarray | list | None): Points indicating object locations with shape (N, 2), in pixels.
+            labels (np.ndarray | list | None): Labels for point prompts, shape (N,). 1 = foreground, 0 = background.
             masks (np.ndarray | None): Low-resolution masks from previous predictions, shape (N, H, W). For SAM H=W=256.
             multimask_output (bool): Flag to return multiple masks. Helpful for ambiguous prompts.
             *args (Any): Additional positional arguments.
@@ -215,9 +215,9 @@ class Predictor(BasePredictor):
         Args:
             im (torch.Tensor): Preprocessed input image tensor with shape (N, C, H, W).
-            bboxes (np.ndarray | List | None): Bounding boxes in XYXY format with shape (N, 4).
-            points (np.ndarray | List | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels.
-            labels (np.ndarray | List | None): Point prompt labels with shape (N) or (N, num_points). 1 for foreground, 0 for background.
+            bboxes (np.ndarray | list | None): Bounding boxes in XYXY format with shape (N, 4).
+            points (np.ndarray | list | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels.
+            labels (np.ndarray | list | None): Point prompt labels with shape (N) or (N, num_points). 1 for foreground, 0 for background.
             masks (np.ndarray | None): Low-res masks from previous predictions with shape (N, H, W). For SAM, H=W=256.
             multimask_output (bool): Flag to return multiple masks for ambiguous prompts.
@@ -250,10 +250,10 @@ class Predictor(BasePredictor):
         Args:
             features (torch.Tensor): Extracted image features with shape (B, C, H, W) from the SAM model image encoder.
-            bboxes (np.ndarray | List[List[float]] | None): Bounding boxes in XYXY format with shape (N, 4).
-            points (np.ndarray | List[List[float]] | None): Object location points with shape (N, 2), in pixels.
-            labels (np.ndarray | List[int] | None): Point prompt labels with shape (N,). 1 = foreground, 0 = background.
-            masks (List[np.ndarray] | np.ndarray | None): Masks for the objects, where each mask is a 2D array.
+            bboxes (np.ndarray | list[list[float]] | None): Bounding boxes in XYXY format with shape (N, 4).
+            points (np.ndarray | list[list[float]] | None): Object location points with shape (N, 2), in pixels.
+            labels (np.ndarray | list[int] | None): Point prompt labels with shape (N,). 1 = foreground, 0 = background.
+            masks (list[np.ndarray] | np.ndarray | None): Masks for the objects, where each mask is a 2D array.
             multimask_output (bool): Flag to return multiple masks for ambiguous prompts.
         Returns:
@@ -282,12 +282,12 @@ class Predictor(BasePredictor):
         Prepare and transform the input prompts for processing based on the destination shape.
         Args:
-            dst_shape (Tuple[int, int]): The target shape (height, width) for the prompts.
-            src_shape (Tuple[int, int]): The source shape (height, width) of the input image.
-            bboxes (np.ndarray | List | None): Bounding boxes in XYXY format with shape (N, 4).
-            points (np.ndarray | List | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels.
-            labels (np.ndarray | List | None): Point prompt labels with shape (N) or (N, num_points). 1 for foreground, 0 for background.
-            masks (List[np.ndarray] | np.ndarray | None): Masks for the objects, where each mask is a 2D array with shape (H, W).
+            dst_shape (tuple[int, int]): The target shape (height, width) for the prompts.
+            src_shape (tuple[int, int]): The source shape (height, width) of the input image.
+            bboxes (np.ndarray | list | None): Bounding boxes in XYXY format with shape (N, 4).
+            points (np.ndarray | list | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels.
+            labels (np.ndarray | list | None): Point prompt labels with shape (N) or (N, num_points). 1 for foreground, 0 for background.
+            masks (list[np.ndarray] | np.ndarray | None): Masks for the objects, where each mask is a 2D array with shape (H, W).
         Returns:
             bboxes (torch.Tensor | None): Transformed bounding boxes.
@@ -351,7 +351,7 @@ class Predictor(BasePredictor):
             crop_n_layers (int): Number of layers for additional mask predictions on image crops.
             crop_overlap_ratio (float): Overlap between crops, scaled down in subsequent layers.
             crop_downscale_factor (int): Scaling factor for sampled points-per-side in each layer.
-            point_grids (List[np.ndarray] | None): Custom grids for point sampling normalized to [0,1].
+            point_grids (list[np.ndarray] | None): Custom grids for point sampling normalized to [0,1].
             points_stride (int): Number of points to sample along each side of the image.
             points_batch_size (int): Batch size for the number of points processed simultaneously.
             conf_thres (float): Confidence threshold [0,1] for filtering based on mask quality prediction.
@@ -490,10 +490,10 @@ class Predictor(BasePredictor):
                 - pred_scores (torch.Tensor): Confidence scores for each mask with shape (N, 1).
                 - pred_bboxes (torch.Tensor, optional): Predicted bounding boxes if segment_all is True.
             img (torch.Tensor): The processed input image tensor with shape (C, H, W).
-            orig_imgs (List[np.ndarray] | torch.Tensor): The original, unprocessed images.
+            orig_imgs (list[np.ndarray] | torch.Tensor): The original, unprocessed images.
         Returns:
-            (List[Results]): List of Results objects containing detection masks, bounding boxes, and other
+            (list[Results]): List of Results objects containing detection masks, bounding boxes, and other
                 metadata for each processed image.
         Examples:
@@ -623,7 +623,7 @@ class Predictor(BasePredictor):
         Returns:
             new_masks (torch.Tensor): Processed masks with small regions removed, shape (N, H, W).
-            keep (List[int]): Indices of remaining masks after NMS, for filtering corresponding boxes.
+            keep (list[int]): Indices of remaining masks after NMS, for filtering corresponding boxes.
         Examples:
             >>> masks = torch.rand(5, 640, 640) > 0.5  # 5 random binary masks
@@ -673,13 +673,13 @@ class Predictor(BasePredictor):
         Perform prompts preprocessing and inference on provided image features using the SAM model.
         Args:
-            features (torch.Tensor | Dict[str, Any]): Extracted image features from the SAM/SAM2 model image encoder.
-            src_shape (Tuple[int, int]): The source shape (height, width) of the input image.
-            dst_shape (Tuple[int, int] | None): The target shape (height, width) for the prompts. If None, defaults to (imgsz, imgsz).
-            bboxes (np.ndarray | List[List[float]] | None): Bounding boxes in xyxy format with shape (N, 4).
-            points (np.ndarray | List[List[float]] | None): Points indicating object locations with shape (N, 2), in pixels.
-            labels (np.ndarray | List[int] | None): Point prompt labels with shape (N, ).
-            masks (List[np.ndarray] | np.ndarray | None): Masks for the objects, where each mask is a 2D array.
+            features (torch.Tensor | dict[str, Any]): Extracted image features from the SAM/SAM2 model image encoder.
+            src_shape (tuple[int, int]): The source shape (height, width) of the input image.
+            dst_shape (tuple[int, int] | None): The target shape (height, width) for the prompts. If None, defaults to (imgsz, imgsz).
+            bboxes (np.ndarray | list[list[float]] | None): Bounding boxes in xyxy format with shape (N, 4).
+            points (np.ndarray | list[list[float]] | None): Points indicating object locations with shape (N, 2), in pixels.
+            labels (np.ndarray | list[int] | None): Point prompt labels with shape (N, ).
+            masks (list[np.ndarray] | np.ndarray | None): Masks for the objects, where each mask is a 2D array.
             multimask_output (bool): Flag to return multiple masks for ambiguous prompts.
         Returns:
@@ -688,7 +688,7 @@ class Predictor(BasePredictor):
                 Each box is in xyxy format with additional columns for score and class.
         Notes:
-            - The input features is a torch.Tensor of shape (B, C, H, W) if performing on SAM, or a Dict[str, Any] if performing on SAM2.
+            - The input features is a torch.Tensor of shape (B, C, H, W) if performing on SAM, or a dict[str, Any] if performing on SAM2.
         """
         dst_shape = dst_shape or (self.args.imgsz, self.args.imgsz)
         prompts = self._prepare_prompts(dst_shape, src_shape, bboxes, points, labels, masks)
@@ -714,12 +714,12 @@ class SAM2Predictor(Predictor):
     prompt-based inference.
     Attributes:
-        _bb_feat_sizes (List[tuple]): Feature sizes for different backbone levels.
+        _bb_feat_sizes (list[tuple]): Feature sizes for different backbone levels.
         model (torch.nn.Module): The loaded SAM2 model.
         device (torch.device): The device (CPU or GPU) on which the model is loaded.
         features (dict): Cached image features for efficient inference.
         segment_all (bool): Flag to indicate if all segments should be predicted.
-        prompts (Dict[str, Any]): Dictionary to store various types of prompts for inference.
+        prompts (dict[str, Any]): Dictionary to store various types of prompts for inference.
     Methods:
         get_model: Retrieve and initialize the SAM2 model.
@@ -752,12 +752,12 @@ class SAM2Predictor(Predictor):
         Prepare and transform the input prompts for processing based on the destination shape.
         Args:
-            dst_shape (Tuple[int, int]): The target shape (height, width) for the prompts.
-            src_shape (Tuple[int, int]): The source shape (height, width) of the input image.
-            bboxes (np.ndarray | List | None): Bounding boxes in XYXY format with shape (N, 4).
-            points (np.ndarray | List | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels.
-            labels (np.ndarray | List | None): Point prompt labels with shape (N,) or (N, num_points). 1 for foreground, 0 for background.
-            masks (List | np.ndarray | None): Masks for the objects, where each mask is a 2D array.
+            dst_shape (tuple[int, int]): The target shape (height, width) for the prompts.
+            src_shape (tuple[int, int]): The source shape (height, width) of the input image.
+            bboxes (np.ndarray | list | None): Bounding boxes in XYXY format with shape (N, 4).
+            points (np.ndarray | list | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels.
+            labels (np.ndarray | list | None): Point prompt labels with shape (N,) or (N, num_points). 1 for foreground, 0 for background.
+            masks (list | np.ndarray | None): Masks for the objects, where each mask is a 2D array.
         Returns:
             points (torch.Tensor | None): Transformed points.
@@ -842,13 +842,13 @@ class SAM2Predictor(Predictor):
         Perform inference on image features using the SAM2 model.
         Args:
-            features (torch.Tensor | Dict[str, Any]): Extracted image features with shape (B, C, H, W) from the SAM2 model image encoder, it
+            features (torch.Tensor | dict[str, Any]): Extracted image features with shape (B, C, H, W) from the SAM2 model image encoder, it
                 could also be a dictionary including:
                 - image_embed (torch.Tensor): Image embedding with shape (B, C, H, W).
-                - high_res_feats (List[torch.Tensor]): List of high-resolution feature maps from the backbone, each with shape (B, C, H, W).
-            points (np.ndarray | List[List[float]] | None): Object location points with shape (N, 2), in pixels.
-            labels (np.ndarray | List[int] | None): Point prompt labels with shape (N,). 1 = foreground, 0 = background.
-            masks (List[np.ndarray] | np.ndarray | None): Masks for the objects, where each mask is a 2D array.
+                - high_res_feats (list[torch.Tensor]): List of high-resolution feature maps from the backbone, each with shape (B, C, H, W).
+            points (np.ndarray | list[list[float]] | None): Object location points with shape (N, 2), in pixels.
+            labels (np.ndarray | list[int] | None): Point prompt labels with shape (N,). 1 = foreground, 0 = background.
+            masks (list[np.ndarray] | np.ndarray | None): Masks for the objects, where each mask is a 2D array.
             multimask_output (bool): Flag to return multiple masks for ambiguous prompts.
             img_idx (int): Index of the image in the batch to process.
@@ -962,9 +962,9 @@ class SAM2VideoPredictor(SAM2Predictor):
         Args:
             im (torch.Tensor): The preprocessed input image in tensor format, with shape (N, C, H, W).
-            bboxes (np.ndarray | List, optional): Bounding boxes with shape (N, 4), in XYXY format.
-            points (np.ndarray | List, optional): Points indicating object locations with shape (N, 2), in pixels.
-            labels (np.ndarray | List, optional): Labels for point prompts, shape (N, ). 1 = foreground, 0 = background.
+            bboxes (np.ndarray | list, optional): Bounding boxes with shape (N, 4), in XYXY format.
+            points (np.ndarray | list, optional): Points indicating object locations with shape (N, 2), in pixels.
+            labels (np.ndarray | list, optional): Labels for point prompts, shape (N, ). 1 = foreground, 0 = background.
             masks (np.ndarray, optional): Low-resolution masks from previous predictions shape (N,H,W). For SAM H=W=256.
         Returns:
@@ -1036,9 +1036,9 @@ class SAM2VideoPredictor(SAM2Predictor):
         the masks do not overlap, which can be useful for certain applications.
         Args:
-            preds (Tuple[torch.Tensor, torch.Tensor]): The predicted masks and scores from the model.
+            preds (tuple[torch.Tensor, torch.Tensor]): The predicted masks and scores from the model.
             img (torch.Tensor): The processed image tensor.
-            orig_imgs (List[np.ndarray]): The original images before processing.
+            orig_imgs (list[np.ndarray]): The original images before processing.
         Returns:
             (list): The post-processed predictions.
@@ -1286,7 +1286,7 @@ class SAM2VideoPredictor(SAM2Predictor):
         Returns:
             vis_feats (torch.Tensor): The visual features extracted from the image.
             vis_pos_embed (torch.Tensor): The positional embeddings for the visual features.
-            feat_sizes (List[tuple]): A list containing the sizes of the extracted features.
+            feat_sizes (list[tuple]): A list containing the sizes of the extracted features.
         Note:
             - If `batch` is greater than 1, the features are expanded to fit the batch size.
@@ -1442,11 +1442,11 @@ class SAM2VideoPredictor(SAM2Predictor):
         the current batch size.
         Args:
-            out_maskmem_pos_enc (List[torch.Tensor] | None): The positional encoding for mask memory.
+            out_maskmem_pos_enc (list[torch.Tensor] | None): The positional encoding for mask memory.
                 Should be a list of tensors or None.
         Returns:
-            (List[torch.Tensor]): The positional encoding for mask memory, either cached or expanded.
+            (list[torch.Tensor]): The positional encoding for mask memory, either cached or expanded.
         Note:
             - The method assumes that `out_maskmem_pos_enc` is a list of tensors or None.
@@ -1730,10 +1730,10 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
         specified overrides
         Args:
-            cfg (Dict[str, Any]): Configuration dictionary containing default settings.
-            overrides (Dict[str, Any] | None): Dictionary of values to override default configuration.
+            cfg (dict[str, Any]): Configuration dictionary containing default settings.
+            overrides (dict[str, Any] | None): Dictionary of values to override default configuration.
             max_obj_num (int): Maximum number of objects to track. Default is 3. this is set to keep fix feature size for the model.
-            _callbacks (Dict[str, Any] | None): Dictionary of callback functions to customize behavior.
+            _callbacks (dict[str, Any] | None): Dictionary of callback functions to customize behavior.
         Examples:
             >>> predictor = SAM2DynamicInteractivePredictor(cfg=DEFAULT_CFG)
@@ -1778,11 +1778,11 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
         Args:
             im (torch.Tensor | np.ndarray): The input image tensor or numpy array.
-            bboxes (List[List[float]] | None): Optional list of bounding boxes to update the memory.
-            masks (List[torch.Tensor | np.ndarray] | None): Optional masks to update the memory.
-            points (List[List[float]] | None): Optional list of points to update the memory, each point is [x, y].
-            labels (List[int] | None): Optional list of object IDs corresponding to the points (>0 for positive, 0 for negative).
-            obj_ids (List[int] | None): Optional list of object IDs corresponding to the prompts.
+            bboxes (list[list[float]] | None): Optional list of bounding boxes to update the memory.
+            masks (list[torch.Tensor | np.ndarray] | None): Optional masks to update the memory.
+            points (list[list[float]] | None): Optional list of points to update the memory, each point is [x, y].
+            labels (list[int] | None): Optional list of object IDs corresponding to the points (>0 for positive, 0 for negative).
+            obj_ids (list[int] | None): Optional list of object IDs corresponding to the prompts.
             update_memory (bool): Flag to indicate whether to update the memory with new objects.
         Returns:
@@ -1855,7 +1855,7 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
         Append the imgState to the memory_bank and update the memory for the model.
         Args:
-            obj_ids (List[int]): List of object IDs corresponding to the prompts.
+            obj_ids (list[int]): List of object IDs corresponding to the prompts.
             points (torch.Tensor | None): Tensor of shape (B, N, 2) representing the input points for N objects.
             labels (torch.Tensor | None): Tensor of shape (B, N) representing the labels for the input points.
             masks (torch.Tensor | None): Optional tensor of shape (N, H, W) representing the input masks for N objects.
@@ -2009,7 +2009,7 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
             mask (torch.Tensor | None): The mask input for the object with shape (H, W).
         Returns:
-            current_out (Dict[str, Any]): A dictionary containing the current output with mask predictions and object pointers.
+            current_out (dict[str, Any]): A dictionary containing the current output with mask predictions and object pointers.
                 Keys include 'point_inputs', 'mask_inputs', 'pred_masks', 'pred_masks_high_res', 'obj_ptr', 'object_score_logits'.
         """
         if mask is not None and self.model.use_mask_input_as_output_without_sam:

ultralytics/models/utils/loss.py CHANGED Viewed

@@ -23,7 +23,7 @@ class DETRLoss(nn.Module):
     Attributes:
         nc (int): Number of classes.
-        loss_gain (Dict[str, float]): Coefficients for different loss components.
+        loss_gain (dict[str, float]): Coefficients for different loss components.
         aux_loss (bool): Whether to compute auxiliary losses.
         use_fl (bool): Whether to use FocalLoss.
         use_vfl (bool): Whether to use VarifocalLoss.
@@ -55,7 +55,7 @@ class DETRLoss(nn.Module):
         Args:
             nc (int): Number of classes.
-            loss_gain (Dict[str, float], optional): Coefficients for different loss components.
+            loss_gain (dict[str, float], optional): Coefficients for different loss components.
             aux_loss (bool): Whether to use auxiliary losses from each decoder layer.
             use_fl (bool): Whether to use FocalLoss.
             use_vfl (bool): Whether to use VarifocalLoss.
@@ -93,7 +93,7 @@ class DETRLoss(nn.Module):
             postfix (str, optional): String to append to the loss name for identification in multi-loss scenarios.
         Returns:
-            (Dict[str, torch.Tensor]): Dictionary containing classification loss value.
+            (dict[str, torch.Tensor]): Dictionary containing classification loss value.
         Notes:
             The function supports different classification loss types:
@@ -133,7 +133,7 @@ class DETRLoss(nn.Module):
             postfix (str, optional): String to append to the loss names for identification in multi-loss scenarios.
         Returns:
-            (Dict[str, torch.Tensor]): Dictionary containing:
+            (dict[str, torch.Tensor]): Dictionary containing:
                 - loss_bbox{postfix}: L1 loss between predicted and ground truth boxes, scaled by the bbox loss gain.
                 - loss_giou{postfix}: GIoU loss between predicted and ground truth boxes, scaled by the giou loss gain.
@@ -207,14 +207,14 @@ class DETRLoss(nn.Module):
             pred_scores (torch.Tensor): Predicted scores from auxiliary layers.
             gt_bboxes (torch.Tensor): Ground truth bounding boxes.
             gt_cls (torch.Tensor): Ground truth classes.
-            gt_groups (List[int]): Number of ground truths per image.
-            match_indices (List[Tuple], optional): Pre-computed matching indices.
+            gt_groups (list[int]): Number of ground truths per image.
+            match_indices (list[tuple], optional): Pre-computed matching indices.
             postfix (str, optional): String to append to loss names.
             masks (torch.Tensor, optional): Predicted masks if using segmentation.
             gt_mask (torch.Tensor, optional): Ground truth masks if using segmentation.
         Returns:
-            (Dict[str, torch.Tensor]): Dictionary of auxiliary losses.
+            (dict[str, torch.Tensor]): Dictionary of auxiliary losses.
         """
         # NOTE: loss class, bbox, giou, mask, dice
         loss = torch.zeros(5 if masks is not None else 3, device=pred_bboxes.device)
@@ -265,10 +265,10 @@ class DETRLoss(nn.Module):
         Extract batch indices, source indices, and destination indices from match indices.
         Args:
-            match_indices (List[Tuple]): List of tuples containing matched indices.
+            match_indices (list[tuple]): List of tuples containing matched indices.
         Returns:
-            batch_idx (Tuple[torch.Tensor, torch.Tensor]): Tuple containing (batch_idx, src_idx).
+            batch_idx (tuple[torch.Tensor, torch.Tensor]): Tuple containing (batch_idx, src_idx).
             dst_idx (torch.Tensor): Destination indices.
         """
         batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(match_indices)])
@@ -285,7 +285,7 @@ class DETRLoss(nn.Module):
         Args:
             pred_bboxes (torch.Tensor): Predicted bounding boxes.
             gt_bboxes (torch.Tensor): Ground truth bounding boxes.
-            match_indices (List[Tuple]): List of tuples containing matched indices.
+            match_indices (list[tuple]): List of tuples containing matched indices.
         Returns:
             pred_assigned (torch.Tensor): Assigned predicted bounding boxes.
@@ -325,14 +325,14 @@ class DETRLoss(nn.Module):
             pred_scores (torch.Tensor): Predicted class scores.
             gt_bboxes (torch.Tensor): Ground truth bounding boxes.
             gt_cls (torch.Tensor): Ground truth classes.
-            gt_groups (List[int]): Number of ground truths per image.
+            gt_groups (list[int]): Number of ground truths per image.
             masks (torch.Tensor, optional): Predicted masks if using segmentation.
             gt_mask (torch.Tensor, optional): Ground truth masks if using segmentation.
             postfix (str, optional): String to append to loss names.
-            match_indices (List[Tuple], optional): Pre-computed matching indices.
+            match_indices (list[tuple], optional): Pre-computed matching indices.
         Returns:
-            (Dict[str, torch.Tensor]): Dictionary of losses.
+            (dict[str, torch.Tensor]): Dictionary of losses.
         """
         if match_indices is None:
             match_indices = self.matcher(
@@ -370,12 +370,12 @@ class DETRLoss(nn.Module):
         Args:
             pred_bboxes (torch.Tensor): Predicted bounding boxes, shape (L, B, N, 4).
             pred_scores (torch.Tensor): Predicted class scores, shape (L, B, N, C).
-            batch (Dict[str, Any]): Batch information containing cls, bboxes, and gt_groups.
+            batch (dict[str, Any]): Batch information containing cls, bboxes, and gt_groups.
             postfix (str, optional): Postfix for loss names.
             **kwargs (Any): Additional arguments, may include 'match_indices'.
         Returns:
-            (Dict[str, torch.Tensor]): Computed losses, including main and auxiliary (if enabled).
+            (dict[str, torch.Tensor]): Computed losses, including main and auxiliary (if enabled).
         Notes:
             Uses last elements of pred_bboxes and pred_scores for main loss, and the rest for auxiliary losses if
@@ -419,14 +419,14 @@ class RTDETRDetectionLoss(DETRLoss):
         Forward pass to compute detection loss with optional denoising loss.
         Args:
-            preds (Tuple[torch.Tensor, torch.Tensor]): Tuple containing predicted bounding boxes and scores.
-            batch (Dict[str, Any]): Batch data containing ground truth information.
+            preds (tuple[torch.Tensor, torch.Tensor]): Tuple containing predicted bounding boxes and scores.
+            batch (dict[str, Any]): Batch data containing ground truth information.
             dn_bboxes (torch.Tensor, optional): Denoising bounding boxes.
             dn_scores (torch.Tensor, optional): Denoising scores.
-            dn_meta (Dict[str, Any], optional): Metadata for denoising.
+            dn_meta (dict[str, Any], optional): Metadata for denoising.
         Returns:
-            (Dict[str, torch.Tensor]): Dictionary containing total loss and denoising loss if applicable.
+            (dict[str, torch.Tensor]): Dictionary containing total loss and denoising loss if applicable.
         """
         pred_bboxes, pred_scores = preds
         total_loss = super().forward(pred_bboxes, pred_scores, batch)
@@ -456,12 +456,12 @@ class RTDETRDetectionLoss(DETRLoss):
         Get match indices for denoising.
         Args:
-            dn_pos_idx (List[torch.Tensor]): List of tensors containing positive indices for denoising.
+            dn_pos_idx (list[torch.Tensor]): List of tensors containing positive indices for denoising.
             dn_num_group (int): Number of denoising groups.
-            gt_groups (List[int]): List of integers representing number of ground truths per image.
+            gt_groups (list[int]): List of integers representing number of ground truths per image.
         Returns:
-            (List[Tuple[torch.Tensor, torch.Tensor]]): List of tuples containing matched indices for denoising.
+            (list[tuple[torch.Tensor, torch.Tensor]]): List of tuples containing matched indices for denoising.
         """
         dn_match_indices = []
         idx_groups = torch.as_tensor([0, *gt_groups[:-1]]).cumsum_(0)

ultralytics/models/utils/ops.py CHANGED Viewed

@@ -22,7 +22,7 @@ class HungarianMatcher(nn.Module):
     used in end-to-end object detection models like DETR.
     Attributes:
-        cost_gain (Dict[str, float]): Dictionary of cost coefficients for 'class', 'bbox', 'giou', 'mask', and 'dice'
+        cost_gain (dict[str, float]): Dictionary of cost coefficients for 'class', 'bbox', 'giou', 'mask', and 'dice'
             components.
         use_fl (bool): Whether to use Focal Loss for classification cost calculation.
         with_mask (bool): Whether the model makes mask predictions.
@@ -60,7 +60,7 @@ class HungarianMatcher(nn.Module):
         Initialize HungarianMatcher for optimal assignment of predicted and ground truth bounding boxes.
         Args:
-            cost_gain (Dict[str, float], optional): Dictionary of cost coefficients for different matching cost
+            cost_gain (dict[str, float], optional): Dictionary of cost coefficients for different matching cost
                 components. Should contain keys 'class', 'bbox', 'giou', 'mask', and 'dice'.
             use_fl (bool): Whether to use Focal Loss for classification cost calculation.
             with_mask (bool): Whether the model makes mask predictions.
@@ -100,12 +100,12 @@ class HungarianMatcher(nn.Module):
                 num_classes).
             gt_bboxes (torch.Tensor): Ground truth bounding boxes with shape (num_gts, 4).
             gt_cls (torch.Tensor): Ground truth class labels with shape (num_gts,).
-            gt_groups (List[int]): Number of ground truth boxes for each image in the batch.
+            gt_groups (list[int]): Number of ground truth boxes for each image in the batch.
             masks (torch.Tensor, optional): Predicted masks with shape (batch_size, num_queries, height, width).
-            gt_mask (List[torch.Tensor], optional): Ground truth masks, each with shape (num_masks, Height, Width).
+            gt_mask (list[torch.Tensor], optional): Ground truth masks, each with shape (num_masks, Height, Width).
         Returns:
-            (List[Tuple[torch.Tensor, torch.Tensor]]): A list of size batch_size, each element is a tuple
+            (list[tuple[torch.Tensor, torch.Tensor]]): A list of size batch_size, each element is a tuple
                 (index_i, index_j), where index_i is the tensor of indices of the selected predictions (in order)
                 and index_j is the tensor of indices of the corresponding selected ground truth targets (in order).
                 For each batch element, it holds: len(index_i) = len(index_j) = min(num_queries, num_target_boxes).
@@ -205,8 +205,8 @@ def get_cdn_group(
     bounding boxes and class labels. It generates both positive and negative samples to improve model robustness.
     Args:
-        batch (Dict[str, Any]): Batch dictionary containing 'gt_cls' (torch.Tensor with shape (num_gts,)),
-            'gt_bboxes' (torch.Tensor with shape (num_gts, 4)), and 'gt_groups' (List[int]) indicating number of
+        batch (dict[str, Any]): Batch dictionary containing 'gt_cls' (torch.Tensor with shape (num_gts,)),
+            'gt_bboxes' (torch.Tensor with shape (num_gts, 4)), and 'gt_groups' (list[int]) indicating number of
             ground truths per image.
         num_classes (int): Total number of object classes.
         num_queries (int): Number of object queries.
@@ -220,7 +220,7 @@ def get_cdn_group(
         padding_cls (torch.Tensor | None): Modified class embeddings for denoising with shape (bs, num_dn, embed_dim).
         padding_bbox (torch.Tensor | None): Modified bounding boxes for denoising with shape (bs, num_dn, 4).
         attn_mask (torch.Tensor | None): Attention mask for denoising with shape (tgt_size, tgt_size).
-        dn_meta (Dict[str, Any] | None): Meta information dictionary containing denoising parameters.
+        dn_meta (dict[str, Any] | None): Meta information dictionary containing denoising parameters.
     Examples:
         Generate denoising group for training

ultralytics/models/yolo/classify/predict.py CHANGED Viewed

@@ -78,10 +78,10 @@ class ClassificationPredictor(BasePredictor):
         Args:
             preds (torch.Tensor): Raw predictions from the model.
             img (torch.Tensor): Input images after preprocessing.
-            orig_imgs (List[np.ndarray] | torch.Tensor): Original images before preprocessing.
+            orig_imgs (list[np.ndarray] | torch.Tensor): Original images before preprocessing.
         Returns:
-            (List[Results]): List of Results objects containing classification results for each image.
+            (list[Results]): List of Results objects containing classification results for each image.
         """
         if not isinstance(orig_imgs, list):  # Input images are a torch.Tensor, not a list
             orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)

ultralytics/models/yolo/classify/train.py CHANGED Viewed

@@ -25,8 +25,8 @@ class ClassificationTrainer(BaseTrainer):
     Attributes:
         model (ClassificationModel): The classification model to be trained.
-        data (Dict[str, Any]): Dictionary containing dataset information including class names and number of classes.
-        loss_names (List[str]): Names of the loss functions used during training.
+        data (dict[str, Any]): Dictionary containing dataset information including class names and number of classes.
+        loss_names (list[str]): Names of the loss functions used during training.
         validator (ClassificationValidator): Validator instance for model evaluation.
     Methods:
@@ -55,20 +55,10 @@ class ClassificationTrainer(BaseTrainer):
         """
         Initialize a ClassificationTrainer object.
-        This constructor sets up a trainer for image classification tasks, configuring the task type and default
-        image size if not specified.
         Args:
-            cfg (Dict[str, Any], optional): Default configuration dictionary containing training parameters.
-            overrides (Dict[str, Any], optional): Dictionary of parameter overrides for the default configuration.
-            _callbacks (List[Any], optional): List of callback functions to be executed during training.
-        Examples:
-            Create a trainer with custom configuration
-            >>> from ultralytics.models.yolo.classify import ClassificationTrainer
-            >>> args = dict(model="yolo11n-cls.pt", data="imagenet10", epochs=3)
-            >>> trainer = ClassificationTrainer(overrides=args)
-            >>> trainer.train()
+            cfg (dict[str, Any], optional): Default configuration dictionary containing training parameters.
+            overrides (dict[str, Any], optional): Dictionary of parameter overrides for the default configuration.
+            _callbacks (list[Any], optional): List of callback functions to be executed during training.
         """
         if overrides is None:
             overrides = {}
@@ -155,7 +145,7 @@ class ClassificationTrainer(BaseTrainer):
         with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
             dataset = self.build_dataset(dataset_path, mode)
-        loader = build_dataloader(dataset, batch_size, self.args.workers, rank=rank)
+        loader = build_dataloader(dataset, batch_size, self.args.workers, rank=rank, drop_last=self.args.compile)
         # Attach inference transforms
         if mode != "train":
             if is_parallel(self.model):
@@ -196,8 +186,8 @@ class ClassificationTrainer(BaseTrainer):
             prefix (str, optional): Prefix to prepend to loss names.
         Returns:
-            keys (List[str]): List of loss keys if loss_items is None.
-            loss_dict (Dict[str, float]): Dictionary of loss items if loss_items is provided.
+            keys (list[str]): List of loss keys if loss_items is None.
+            loss_dict (dict[str, float]): Dictionary of loss items if loss_items is provided.
         """
         keys = [f"{prefix}/{x}" for x in self.loss_names]
         if loss_items is None:
@@ -227,7 +217,7 @@ class ClassificationTrainer(BaseTrainer):
         Plot training samples with their annotations.
         Args:
-            batch (Dict[str, torch.Tensor]): Batch containing images and class labels.
+            batch (dict[str, torch.Tensor]): Batch containing images and class labels.
             ni (int): Number of iterations.
         """
         batch["batch_idx"] = torch.arange(len(batch["img"]))  # add batch index for plotting

ultralytics/models/yolo/classify/val.py CHANGED Viewed

@@ -22,8 +22,8 @@ class ClassificationValidator(BaseValidator):
     confusion matrix generation, and visualization of results.
     Attributes:
-        targets (List[torch.Tensor]): Ground truth class labels.
-        pred (List[torch.Tensor]): Model predictions.
+        targets (list[torch.Tensor]): Ground truth class labels.
+        pred (list[torch.Tensor]): Model predictions.
         metrics (ClassifyMetrics): Object to calculate and store classification metrics.
         names (dict): Mapping of class indices to class names.
         nc (int): Number of classes.
@@ -170,7 +170,7 @@ class ClassificationValidator(BaseValidator):
         Plot validation image samples with their ground truth labels.
         Args:
-            batch (Dict[str, Any]): Dictionary containing batch data with 'img' (images) and 'cls' (class labels).
+            batch (dict[str, Any]): Dictionary containing batch data with 'img' (images) and 'cls' (class labels).
             ni (int): Batch index used for naming the output file.
         Examples:
@@ -191,7 +191,7 @@ class ClassificationValidator(BaseValidator):
         Plot images with their predicted class labels and save the visualization.
         Args:
-            batch (Dict[str, Any]): Batch data containing images and other information.
+            batch (dict[str, Any]): Batch data containing images and other information.
             preds (torch.Tensor): Model predictions with shape (batch_size, num_classes).
             ni (int): Batch index used for naming the output file.

ultralytics/models/yolo/detect/predict.py CHANGED Viewed

@@ -96,12 +96,12 @@ class DetectionPredictor(BasePredictor):
         Construct a list of Results objects from model predictions.
         Args:
-            preds (List[torch.Tensor]): List of predicted bounding boxes and scores for each image.
+            preds (list[torch.Tensor]): List of predicted bounding boxes and scores for each image.
             img (torch.Tensor): Batch of preprocessed images used for inference.
-            orig_imgs (List[np.ndarray]): List of original images before preprocessing.
+            orig_imgs (list[np.ndarray]): List of original images before preprocessing.
         Returns:
-            (List[Results]): List of Results objects containing detection information for each image.
+            (list[Results]): List of Results objects containing detection information for each image.
         """
         return [
             self.construct_result(pred, img, orig_img, img_path)

dgenerate-ultralytics-headless 8.3.194__py3-none-any.whl → 8.3.196__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.194py3-none-any.whl → 8.3.196py3-none-any.whl