PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.194__py3-none-any.whl → 8.3.196__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.194py3-none-any.whl → 8.3.196py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

{dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/METADATA +1 -2
{dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/RECORD +107 -106
tests/test_python.py +1 -1
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +9 -8
ultralytics/cfg/default.yaml +1 -0
ultralytics/data/annotator.py +1 -1
ultralytics/data/augment.py +76 -76
ultralytics/data/base.py +12 -12
ultralytics/data/build.py +5 -1
ultralytics/data/converter.py +4 -4
ultralytics/data/dataset.py +7 -7
ultralytics/data/loaders.py +15 -15
ultralytics/data/split_dota.py +10 -10
ultralytics/data/utils.py +12 -12
ultralytics/engine/exporter.py +19 -31
ultralytics/engine/model.py +13 -13
ultralytics/engine/predictor.py +16 -14
ultralytics/engine/results.py +21 -21
ultralytics/engine/trainer.py +15 -4
ultralytics/engine/validator.py +6 -2
ultralytics/hub/google/__init__.py +2 -2
ultralytics/hub/session.py +7 -7
ultralytics/models/fastsam/model.py +5 -5
ultralytics/models/fastsam/predict.py +11 -11
ultralytics/models/nas/model.py +1 -1
ultralytics/models/rtdetr/predict.py +2 -2
ultralytics/models/rtdetr/val.py +4 -4
ultralytics/models/sam/amg.py +6 -6
ultralytics/models/sam/build.py +9 -9
ultralytics/models/sam/model.py +7 -7
ultralytics/models/sam/modules/blocks.py +6 -6
ultralytics/models/sam/modules/decoders.py +1 -1
ultralytics/models/sam/modules/encoders.py +27 -27
ultralytics/models/sam/modules/sam.py +4 -4
ultralytics/models/sam/modules/tiny_encoder.py +18 -18
ultralytics/models/sam/modules/utils.py +8 -8
ultralytics/models/sam/predict.py +63 -63
ultralytics/models/utils/loss.py +22 -22
ultralytics/models/utils/ops.py +8 -8
ultralytics/models/yolo/classify/predict.py +2 -2
ultralytics/models/yolo/classify/train.py +9 -19
ultralytics/models/yolo/classify/val.py +4 -4
ultralytics/models/yolo/detect/predict.py +3 -3
ultralytics/models/yolo/detect/train.py +38 -12
ultralytics/models/yolo/detect/val.py +38 -37
ultralytics/models/yolo/model.py +6 -6
ultralytics/models/yolo/obb/train.py +1 -10
ultralytics/models/yolo/obb/val.py +13 -13
ultralytics/models/yolo/pose/train.py +1 -9
ultralytics/models/yolo/pose/val.py +12 -12
ultralytics/models/yolo/segment/predict.py +4 -4
ultralytics/models/yolo/segment/train.py +2 -10
ultralytics/models/yolo/segment/val.py +15 -15
ultralytics/models/yolo/world/train.py +13 -13
ultralytics/models/yolo/world/train_world.py +3 -3
ultralytics/models/yolo/yoloe/predict.py +4 -4
ultralytics/models/yolo/yoloe/train.py +7 -16
ultralytics/models/yolo/yoloe/val.py +0 -7
ultralytics/nn/autobackend.py +2 -2
ultralytics/nn/modules/block.py +6 -6
ultralytics/nn/modules/conv.py +2 -2
ultralytics/nn/modules/head.py +6 -5
ultralytics/nn/tasks.py +17 -15
ultralytics/nn/text_model.py +3 -3
ultralytics/solutions/ai_gym.py +2 -2
ultralytics/solutions/analytics.py +3 -3
ultralytics/solutions/config.py +5 -5
ultralytics/solutions/distance_calculation.py +2 -2
ultralytics/solutions/heatmap.py +1 -1
ultralytics/solutions/instance_segmentation.py +4 -4
ultralytics/solutions/object_counter.py +4 -4
ultralytics/solutions/parking_management.py +7 -7
ultralytics/solutions/queue_management.py +3 -3
ultralytics/solutions/region_counter.py +4 -4
ultralytics/solutions/similarity_search.py +2 -2
ultralytics/solutions/solutions.py +48 -48
ultralytics/solutions/streamlit_inference.py +1 -1
ultralytics/solutions/trackzone.py +4 -4
ultralytics/solutions/vision_eye.py +1 -1
ultralytics/trackers/byte_tracker.py +11 -11
ultralytics/trackers/utils/gmc.py +3 -3
ultralytics/trackers/utils/matching.py +5 -5
ultralytics/utils/__init__.py +30 -19
ultralytics/utils/autodevice.py +2 -2
ultralytics/utils/benchmarks.py +10 -10
ultralytics/utils/callbacks/clearml.py +1 -1
ultralytics/utils/callbacks/comet.py +5 -5
ultralytics/utils/callbacks/tensorboard.py +2 -2
ultralytics/utils/checks.py +7 -5
ultralytics/utils/cpu.py +90 -0
ultralytics/utils/dist.py +1 -1
ultralytics/utils/downloads.py +2 -2
ultralytics/utils/export.py +5 -5
ultralytics/utils/instance.py +2 -2
ultralytics/utils/loss.py +14 -8
ultralytics/utils/metrics.py +35 -35
ultralytics/utils/nms.py +4 -4
ultralytics/utils/ops.py +1 -1
ultralytics/utils/patches.py +2 -2
ultralytics/utils/plotting.py +10 -9
ultralytics/utils/torch_utils.py +113 -15
ultralytics/utils/triton.py +5 -5
{dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/WHEEL +0 -0
{dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/top_level.txt +0 -0

ultralytics/engine/results.py CHANGED Viewed

@@ -30,7 +30,7 @@ class BaseTensor(SimpleClass):
     Attributes:
         data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints.
-        orig_shape (Tuple[int, int]): Original shape of the image, typically in the format (height, width).
+        orig_shape (tuple[int, int]): Original shape of the image, typically in the format (height, width).
     Methods:
         cpu: Return a copy of the tensor stored in CPU memory.
@@ -54,7 +54,7 @@ class BaseTensor(SimpleClass):
         Args:
             data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints.
-            orig_shape (Tuple[int, int]): Original shape of the image in (height, width) format.
+            orig_shape (tuple[int, int]): Original shape of the image in (height, width) format.
         Examples:
             >>> import torch
@@ -72,7 +72,7 @@ class BaseTensor(SimpleClass):
         Return the shape of the underlying data tensor.
         Returns:
-            (Tuple[int, ...]): The shape of the data tensor.
+            (tuple[int, ...]): The shape of the data tensor.
         Examples:
             >>> data = torch.rand(100, 4)
@@ -174,7 +174,7 @@ class BaseTensor(SimpleClass):
         Return a new BaseTensor instance containing the specified indexed elements of the data tensor.
         Args:
-            idx (int | List[int] | torch.Tensor): Index or indices to select from the data tensor.
+            idx (int | list[int] | torch.Tensor): Index or indices to select from the data tensor.
         Returns:
             (BaseTensor): A new BaseTensor instance containing the indexed data.
@@ -199,7 +199,7 @@ class Results(SimpleClass, DataExportMixin):
     Attributes:
         orig_img (np.ndarray): The original image as a numpy array.
-        orig_shape (Tuple[int, int]): Original image shape in (height, width) format.
+        orig_shape (tuple[int, int]): Original image shape in (height, width) format.
         boxes (Boxes | None): Detected bounding boxes.
         masks (Masks | None): Segmentation masks.
         probs (Probs | None): Classification probabilities.
@@ -261,7 +261,7 @@ class Results(SimpleClass, DataExportMixin):
             probs (torch.Tensor | None): A 1D tensor of probabilities of each class for classification task.
             keypoints (torch.Tensor | None): A 2D tensor of keypoint coordinates for each detection.
             obb (torch.Tensor | None): A 2D tensor of oriented bounding box coordinates for each detection.
-            speed (Dict | None): A dictionary containing preprocess, inference, and postprocess speeds (ms/image).
+            speed (dict | None): A dictionary containing preprocess, inference, and postprocess speeds (ms/image).
         Examples:
             >>> results = model("path/to/image.jpg")
@@ -799,7 +799,7 @@ class Results(SimpleClass, DataExportMixin):
             decimals (int): Number of decimal places to round the output values to.
         Returns:
-            (List[Dict[str, Any]]): A list of dictionaries, each containing summarized information for a single detection
+            (list[dict[str, Any]]): A list of dictionaries, each containing summarized information for a single detection
                 or classification result. The structure of each dictionary varies based on the task type
                 (classification or detection) and available information (boxes, masks, keypoints).
@@ -862,7 +862,7 @@ class Boxes(BaseTensor):
     Attributes:
         data (torch.Tensor | np.ndarray): The raw tensor containing detection boxes and associated data.
-        orig_shape (Tuple[int, int]): The original image dimensions (height, width).
+        orig_shape (tuple[int, int]): The original image dimensions (height, width).
         is_track (bool): Indicates whether tracking IDs are included in the box data.
         xyxy (torch.Tensor | np.ndarray): Boxes in [x1, y1, x2, y2] format.
         conf (torch.Tensor | np.ndarray): Confidence scores for each box.
@@ -901,11 +901,11 @@ class Boxes(BaseTensor):
             boxes (torch.Tensor | np.ndarray): A tensor or numpy array with detection boxes of shape
                 (num_boxes, 6) or (num_boxes, 7). Columns should contain
                 [x1, y1, x2, y2, confidence, class, (optional) track_id].
-            orig_shape (Tuple[int, int]): The original image shape as (height, width). Used for normalization.
+            orig_shape (tuple[int, int]): The original image shape as (height, width). Used for normalization.
         Attributes:
             data (torch.Tensor): The raw tensor containing detection boxes and their associated data.
-            orig_shape (Tuple[int, int]): The original image size, used for normalization.
+            orig_shape (tuple[int, int]): The original image size, used for normalization.
             is_track (bool): Indicates whether tracking IDs are included in the box data.
         Examples:
@@ -1081,8 +1081,8 @@ class Masks(BaseTensor):
     Attributes:
         data (torch.Tensor | np.ndarray): The raw tensor or array containing mask data.
         orig_shape (tuple): Original image shape in (height, width) format.
-        xy (List[np.ndarray]): A list of segments in pixel coordinates.
-        xyn (List[np.ndarray]): A list of normalized segments.
+        xy (list[np.ndarray]): A list of segments in pixel coordinates.
+        xyn (list[np.ndarray]): A list of normalized segments.
     Methods:
         cpu: Return a copy of the Masks object with the mask tensor on CPU memory.
@@ -1127,7 +1127,7 @@ class Masks(BaseTensor):
         are normalized relative to the original image shape.
         Returns:
-            (List[np.ndarray]): A list of numpy arrays, where each array contains the normalized xy-coordinates
+            (list[np.ndarray]): A list of numpy arrays, where each array contains the normalized xy-coordinates
                 of a single segmentation mask. Each array has shape (N, 2), where N is the number of points in the
                 mask contour.
@@ -1152,7 +1152,7 @@ class Masks(BaseTensor):
         Masks object. The coordinates are scaled to match the original image dimensions.
         Returns:
-            (List[np.ndarray]): A list of numpy arrays, where each array contains the [x, y] pixel
+            (list[np.ndarray]): A list of numpy arrays, where each array contains the [x, y] pixel
                 coordinates for a single segmentation mask. Each array has shape (N, 2), where N is the
                 number of points in the segment.
@@ -1179,7 +1179,7 @@ class Keypoints(BaseTensor):
     Attributes:
         data (torch.Tensor): The raw tensor containing keypoint data.
-        orig_shape (Tuple[int, int]): The original image dimensions (height, width).
+        orig_shape (tuple[int, int]): The original image dimensions (height, width).
         has_visible (bool): Indicates whether visibility information is available for keypoints.
         xy (torch.Tensor): Keypoint coordinates in [x, y] format.
         xyn (torch.Tensor): Normalized keypoint coordinates in [x, y] format, relative to orig_shape.
@@ -1213,7 +1213,7 @@ class Keypoints(BaseTensor):
             keypoints (torch.Tensor): A tensor containing keypoint data. Shape can be either:
                 - (num_objects, num_keypoints, 2) for x, y coordinates only
                 - (num_objects, num_keypoints, 3) for x, y coordinates and confidence scores
-            orig_shape (Tuple[int, int]): The original image dimensions (height, width).
+            orig_shape (tuple[int, int]): The original image dimensions (height, width).
         Examples:
             >>> kpts = torch.rand(1, 17, 3)  # 1 object, 17 keypoints (COCO format), x,y,conf
@@ -1301,7 +1301,7 @@ class Probs(BaseTensor):
         data (torch.Tensor | np.ndarray): The raw tensor or array containing classification probabilities.
         orig_shape (tuple | None): The original image shape as (height, width). Not used in this class.
         top1 (int): Index of the class with the highest probability.
-        top5 (List[int]): Indices of the top 5 classes by probability.
+        top5 (list[int]): Indices of the top 5 classes by probability.
         top1conf (torch.Tensor | np.ndarray): Confidence score of the top 1 class.
         top5conf (torch.Tensor | np.ndarray): Confidence scores of the top 5 classes.
@@ -1339,7 +1339,7 @@ class Probs(BaseTensor):
         Attributes:
             data (torch.Tensor | np.ndarray): The raw tensor or array containing classification probabilities.
             top1 (int): Index of the top 1 class.
-            top5 (List[int]): Indices of the top 5 classes.
+            top5 (list[int]): Indices of the top 5 classes.
             top1conf (torch.Tensor | np.ndarray): Confidence of the top 1 class.
             top5conf (torch.Tensor | np.ndarray): Confidences of the top 5 classes.
@@ -1379,7 +1379,7 @@ class Probs(BaseTensor):
         Return the indices of the top 5 class probabilities.
         Returns:
-            (List[int]): A list containing the indices of the top 5 class probabilities, sorted in descending order.
+            (list[int]): A list containing the indices of the top 5 class probabilities, sorted in descending order.
         Examples:
             >>> probs = Probs(torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5]))
@@ -1476,11 +1476,11 @@ class OBB(BaseTensor):
             boxes (torch.Tensor | np.ndarray): A tensor or numpy array containing the detection boxes,
                 with shape (num_boxes, 7) or (num_boxes, 8). The last two columns contain confidence and class values.
                 If present, the third last column contains track IDs, and the fifth column contains rotation.
-            orig_shape (Tuple[int, int]): Original image size, in the format (height, width).
+            orig_shape (tuple[int, int]): Original image size, in the format (height, width).
         Attributes:
             data (torch.Tensor | np.ndarray): The raw OBB tensor.
-            orig_shape (Tuple[int, int]): The original image shape.
+            orig_shape (tuple[int, int]): The original image shape.
             is_track (bool): Whether the boxes include tracking IDs.
         Raises:

ultralytics/engine/trainer.py CHANGED Viewed

@@ -46,6 +46,7 @@ from ultralytics.utils.torch_utils import (
     TORCH_2_4,
     EarlyStopping,
     ModelEMA,
+    attempt_compile,
     autocast,
     convert_optimizer_state_dict_to_fp16,
     init_seeds,
@@ -54,6 +55,7 @@ from ultralytics.utils.torch_utils import (
     strip_optimizer,
     torch_distributed_zero_first,
     unset_deterministic,
+    unwrap_model,
 )
@@ -256,6 +258,14 @@ class BaseTrainer:
         self.model = self.model.to(self.device)
         self.set_model_attributes()
+        # Initialize loss criterion before compilation for torch.compile compatibility
+        if hasattr(self.model, "init_criterion"):
+            self.model.criterion = self.model.init_criterion()
+        # Compile model
+        if self.args.compile:
+            self.model = attempt_compile(self.model, device=self.device)
         # Freeze layers
         freeze_list = (
             self.args.freeze
@@ -404,6 +414,7 @@ class BaseTrainer:
                 # Forward
                 with autocast(self.amp):
                     batch = self.preprocess_batch(batch)
+                    metadata = {k: batch.pop(k, None) for k in ["im_file", "ori_shape", "resized_shape"]}
                     loss, self.loss_items = self.model(batch)
                     self.loss = loss.sum()
                     if RANK != -1:
@@ -445,6 +456,7 @@ class BaseTrainer:
                     )
                     self.run_callbacks("on_batch_end")
                     if self.args.plots and ni in self.plot_idx:
+                        batch = {**batch, **metadata}
                         self.plot_training_samples(batch, ni)
                 self.run_callbacks("on_train_batch_end")
@@ -565,7 +577,7 @@ class BaseTrainer:
                 "epoch": self.epoch,
                 "best_fitness": self.best_fitness,
                 "model": None,  # resume and final checkpoints derive from EMA
-                "ema": deepcopy(self.ema.ema).half(),
+                "ema": deepcopy(unwrap_model(self.ema.ema)).half(),
                 "updates": self.ema.updates,
                 "optimizer": convert_optimizer_state_dict_to_fp16(deepcopy(self.optimizer.state_dict())),
                 "train_args": vars(self.args),  # save as dict
@@ -592,8 +604,6 @@ class BaseTrainer:
             self.best.write_bytes(serialized_ckpt)  # save best.pt
         if (self.save_period > 0) and (self.epoch % self.save_period == 0):
             (self.wdir / f"epoch{self.epoch}.pt").write_bytes(serialized_ckpt)  # save epoch, i.e. 'epoch3.pt'
-        # if self.args.close_mosaic and self.epoch == (self.epochs - self.args.close_mosaic - 1):
-        #    (self.wdir / "last_mosaic.pt").write_bytes(serialized_ckpt)  # save mosaic checkpoint
     def get_dataset(self):
         """
@@ -667,7 +677,7 @@ class BaseTrainer:
     def validate(self):
         """
-        Run validation on test set using self.validator.
+        Run validation on val set using self.validator.
         Returns:
             metrics (dict): Dictionary of validation metrics.
@@ -755,6 +765,7 @@ class BaseTrainer:
                     strip_optimizer(f, updates={k: ckpt[k]} if k in ckpt else None)
                     LOGGER.info(f"\nValidating {f}...")
                     self.validator.args.plots = self.args.plots
+                    self.validator.args.compile = False  # disable final val compile as too slow
                     self.metrics = self.validator(model=f)
                     self.metrics.pop("fitness", None)
                     self.run_callbacks("on_fit_epoch_end")

ultralytics/engine/validator.py CHANGED Viewed

@@ -36,7 +36,7 @@ from ultralytics.nn.autobackend import AutoBackend
 from ultralytics.utils import LOGGER, TQDM, callbacks, colorstr, emojis
 from ultralytics.utils.checks import check_imgsz
 from ultralytics.utils.ops import Profile
-from ultralytics.utils.torch_utils import de_parallel, select_device, smart_inference_mode
+from ultralytics.utils.torch_utils import attempt_compile, select_device, smart_inference_mode, unwrap_model
 class BaseValidator:
@@ -148,6 +148,8 @@ class BaseValidator:
             # Force FP16 val during training
             self.args.half = self.device.type != "cpu" and trainer.amp
             model = trainer.ema.ema or trainer.model
+            if trainer.args.compile and hasattr(model, "_orig_mod"):
+                model = model._orig_mod  # validate non-compiled original model to avoid issues
             model = model.half() if self.args.half else model.float()
             self.loss = torch.zeros_like(trainer.loss_items, device=trainer.device)
             self.args.plots &= trainer.stopper.possible_stop or (trainer.epoch == trainer.epochs - 1)
@@ -186,6 +188,8 @@ class BaseValidator:
             self.dataloader = self.dataloader or self.get_dataloader(self.data.get(self.args.split), self.args.batch)
             model.eval()
+            if self.args.compile:
+                model = attempt_compile(model, device=self.device)
             model.warmup(imgsz=(1 if pt else self.args.batch, self.data["channels"], imgsz, imgsz))  # warmup
         self.run_callbacks("on_val_start")
@@ -196,7 +200,7 @@ class BaseValidator:
             Profile(device=self.device),
         )
         bar = TQDM(self.dataloader, desc=self.get_desc(), total=len(self.dataloader))
-        self.init_metrics(de_parallel(model))
+        self.init_metrics(unwrap_model(model))
         self.jdict = []  # empty before each val
         for batch_i, batch in enumerate(bar):
             self.run_callbacks("on_val_batch_start")

ultralytics/hub/google/__init__.py CHANGED Viewed

@@ -15,7 +15,7 @@ class GCPRegions:
     geographical location, tier classification, and network latency.
     Attributes:
-        regions (Dict[str, Tuple[int, str, str]]): A dictionary of GCP regions with their tier, city, and country.
+        regions (dict[str, tuple[int, str, str]]): A dictionary of GCP regions with their tier, city, and country.
     Methods:
         tier1: Returns a list of tier 1 GCP regions.
@@ -136,7 +136,7 @@ class GCPRegions:
             attempts (int, optional): Number of ping attempts per region.
         Returns:
-            (List[Tuple[str, float, float, float, float]]): List of tuples containing region information and
+            (list[tuple[str, float, float, float, float]]): List of tuples containing region information and
                 latency statistics. Each tuple contains (region, mean_latency, std_dev, min_latency, max_latency).
         Examples:

ultralytics/hub/session.py CHANGED Viewed

@@ -28,13 +28,13 @@ class HUBTrainingSession:
     Attributes:
         model_id (str): Identifier for the YOLO model being trained.
         model_url (str): URL for the model in Ultralytics HUB.
-        rate_limits (Dict[str, int]): Rate limits for different API calls in seconds.
-        timers (Dict[str, Any]): Timers for rate limiting.
-        metrics_queue (Dict[str, Any]): Queue for the model's metrics.
-        metrics_upload_failed_queue (Dict[str, Any]): Queue for metrics that failed to upload.
+        rate_limits (dict[str, int]): Rate limits for different API calls in seconds.
+        timers (dict[str, Any]): Timers for rate limiting.
+        metrics_queue (dict[str, Any]): Queue for the model's metrics.
+        metrics_upload_failed_queue (dict[str, Any]): Queue for metrics that failed to upload.
         model (Any): Model data fetched from Ultralytics HUB.
         model_file (str): Path to the model file.
-        train_args (Dict[str, Any]): Arguments for training the model.
+        train_args (dict[str, Any]): Arguments for training the model.
         client (Any): Client for interacting with Ultralytics HUB.
         filename (str): Filename of the model.
@@ -98,7 +98,7 @@ class HUBTrainingSession:
         Args:
             identifier (str): Model identifier used to initialize the HUB training session.
-            args (Dict[str, Any], optional): Arguments for creating a new model if identifier is not a HUB model URL.
+            args (dict[str, Any], optional): Arguments for creating a new model if identifier is not a HUB model URL.
         Returns:
             session (HUBTrainingSession | None): An authenticated session or None if creation fails.
@@ -144,7 +144,7 @@ class HUBTrainingSession:
         Initialize a HUB training session with the specified model arguments.
         Args:
-            model_args (Dict[str, Any]): Arguments for creating the model, including batch size, epochs, image size,
+            model_args (dict[str, Any]): Arguments for creating the model, including batch size, epochs, image size,
                 etc.
         Returns:

ultralytics/models/fastsam/model.py CHANGED Viewed

@@ -63,14 +63,14 @@ class FastSAM(Model):
             source (str | PIL.Image | np.ndarray): Input source for prediction, can be a file path, URL, PIL image,
                 or numpy array.
             stream (bool): Whether to enable real-time streaming mode for video inputs.
-            bboxes (List, optional): Bounding box coordinates for prompted segmentation in format [[x1, y1, x2, y2]].
-            points (List, optional): Point coordinates for prompted segmentation in format [[x, y]].
-            labels (List, optional): Class labels for prompted segmentation.
-            texts (List, optional): Text prompts for segmentation guidance.
+            bboxes (list, optional): Bounding box coordinates for prompted segmentation in format [[x1, y1, x2, y2]].
+            points (list, optional): Point coordinates for prompted segmentation in format [[x, y]].
+            labels (list, optional): Class labels for prompted segmentation.
+            texts (list, optional): Text prompts for segmentation guidance.
             **kwargs (Any): Additional keyword arguments passed to the predictor.
         Returns:
-            (List): List of Results objects containing the prediction results.
+            (list): List of Results objects containing the prediction results.
         """
         prompts = dict(bboxes=bboxes, points=points, labels=labels, texts=texts)
         return super().predict(source, stream, prompts=prompts, **kwargs)

ultralytics/models/fastsam/predict.py CHANGED Viewed

@@ -52,12 +52,12 @@ class FastSAMPredictor(SegmentationPredictor):
         Apply postprocessing to FastSAM predictions and handle prompts.
         Args:
-            preds (List[torch.Tensor]): Raw predictions from the model.
+            preds (list[torch.Tensor]): Raw predictions from the model.
             img (torch.Tensor): Input image tensor that was fed to the model.
-            orig_imgs (List[np.ndarray]): Original images before preprocessing.
+            orig_imgs (list[np.ndarray]): Original images before preprocessing.
         Returns:
-            (List[Results]): Processed results with prompts applied.
+            (list[Results]): Processed results with prompts applied.
         """
         bboxes = self.prompts.pop("bboxes", None)
         points = self.prompts.pop("points", None)
@@ -80,14 +80,14 @@ class FastSAMPredictor(SegmentationPredictor):
         Perform image segmentation inference based on cues like bounding boxes, points, and text prompts.
         Args:
-            results (Results | List[Results]): Original inference results from FastSAM models without any prompts.
-            bboxes (np.ndarray | List, optional): Bounding boxes with shape (N, 4), in XYXY format.
-            points (np.ndarray | List, optional): Points indicating object locations with shape (N, 2), in pixels.
-            labels (np.ndarray | List, optional): Labels for point prompts, shape (N, ). 1 = foreground, 0 = background.
-            texts (str | List[str], optional): Textual prompts, a list containing string objects.
+            results (Results | list[Results]): Original inference results from FastSAM models without any prompts.
+            bboxes (np.ndarray | list, optional): Bounding boxes with shape (N, 4), in XYXY format.
+            points (np.ndarray | list, optional): Points indicating object locations with shape (N, 2), in pixels.
+            labels (np.ndarray | list, optional): Labels for point prompts, shape (N, ). 1 = foreground, 0 = background.
+            texts (str | list[str], optional): Textual prompts, a list containing string objects.
         Returns:
-            (List[Results]): Output results filtered and determined by the provided prompts.
+            (list[Results]): Output results filtered and determined by the provided prompts.
         """
         if bboxes is None and points is None and texts is None:
             return results
@@ -154,8 +154,8 @@ class FastSAMPredictor(SegmentationPredictor):
         Perform CLIP inference to calculate similarity between images and text prompts.
         Args:
-            images (List[PIL.Image]): List of source images, each should be PIL.Image with RGB channel order.
-            texts (List[str]): List of prompt texts, each should be a string object.
+            images (list[PIL.Image]): List of source images, each should be PIL.Image with RGB channel order.
+            texts (list[str]): List of prompt texts, each should be a string object.
         Returns:
             (torch.Tensor): Similarity matrix between given images and texts with shape (M, N).

ultralytics/models/nas/model.py CHANGED Viewed

@@ -91,7 +91,7 @@ class NAS(Model):
             verbose (bool): Controls verbosity.
         Returns:
-            (Dict[str, Any]): Model information dictionary.
+            (dict[str, Any]): Model information dictionary.
         """
         return model_info(self.model, detailed=detailed, verbose=verbose, imgsz=640)

ultralytics/models/rtdetr/predict.py CHANGED Viewed

@@ -47,7 +47,7 @@ class RTDETRPredictor(BasePredictor):
             orig_imgs (list | torch.Tensor): Original, unprocessed images.
         Returns:
-            results (List[Results]): A list of Results objects containing the post-processed bounding boxes,
+            results (list[Results]): A list of Results objects containing the post-processed bounding boxes,
                 confidence scores, and class labels.
         """
         if not isinstance(preds, (list, tuple)):  # list for PyTorch inference but list[0] Tensor for export inference
@@ -82,7 +82,7 @@ class RTDETRPredictor(BasePredictor):
         (640) and scale_filled.
         Args:
-            im (List[np.ndarray]  | torch.Tensor): Input images of shape (N, 3, H, W) for tensor,
+            im (list[np.ndarray]  | torch.Tensor): Input images of shape (N, 3, H, W) for tensor,
                 [(H, W, 3) x N] for list.
         Returns:

ultralytics/models/rtdetr/val.py CHANGED Viewed

@@ -163,11 +163,11 @@ class RTDETRValidator(DetectionValidator):
         Apply Non-maximum suppression to prediction outputs.
         Args:
-            preds (torch.Tensor | List | Tuple): Raw predictions from the model. If tensor, should have shape
+            preds (torch.Tensor | list | tuple): Raw predictions from the model. If tensor, should have shape
                 (batch_size, num_predictions, num_classes + 4) where last dimension contains bbox coords and class scores.
         Returns:
-            (List[Dict[str, torch.Tensor]]): List of dictionaries for each image, each containing:
+            (list[dict[str, torch.Tensor]]): List of dictionaries for each image, each containing:
                 - 'bboxes': Tensor of shape (N, 4) with bounding box coordinates
                 - 'conf': Tensor of shape (N,) with confidence scores
                 - 'cls': Tensor of shape (N,) with class indices
@@ -194,9 +194,9 @@ class RTDETRValidator(DetectionValidator):
         Serialize YOLO predictions to COCO json format.
         Args:
-            predn (Dict[str, torch.Tensor]): Predictions dictionary containing 'bboxes', 'conf', and 'cls' keys
+            predn (dict[str, torch.Tensor]): Predictions dictionary containing 'bboxes', 'conf', and 'cls' keys
                 with bounding box coordinates, confidence scores, and class predictions.
-            pbatch (Dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
+            pbatch (dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
         """
         path = Path(pbatch["im_file"])
         stem = path.stem

ultralytics/models/sam/amg.py CHANGED Viewed

@@ -19,8 +19,8 @@ def is_box_near_crop_edge(
     Args:
         boxes (torch.Tensor): Bounding boxes in XYXY format.
-        crop_box (List[int]): Crop box coordinates in [x0, y0, x1, y1] format.
-        orig_box (List[int]): Original image box coordinates in [x0, y0, x1, y1] format.
+        crop_box (list[int]): Crop box coordinates in [x0, y0, x1, y1] format.
+        orig_box (list[int]): Original image box coordinates in [x0, y0, x1, y1] format.
         atol (float, optional): Absolute tolerance for edge proximity detection.
     Returns:
@@ -53,7 +53,7 @@ def batch_iterator(batch_size: int, *args) -> Generator[list[Any]]:
         *args (Any): Variable length input iterables to batch. All iterables must have the same length.
     Yields:
-        (List[Any]): A list of batched elements from each input iterable.
+        (list[Any]): A list of batched elements from each input iterable.
     Examples:
         >>> data = [1, 2, 3, 4, 5]
@@ -121,13 +121,13 @@ def generate_crop_boxes(
     Generate crop boxes of varying sizes for multiscale image processing, with layered overlapping regions.
     Args:
-        im_size (Tuple[int, ...]): Height and width of the input image.
+        im_size (tuple[int, ...]): Height and width of the input image.
         n_layers (int): Number of layers to generate crop boxes for.
         overlap_ratio (float): Ratio of overlap between adjacent crop boxes.
     Returns:
-        crop_boxes (List[List[int]]): List of crop boxes in [x0, y0, x1, y1] format.
-        layer_idxs (List[int]): List of layer indices corresponding to each crop box.
+        crop_boxes (list[list[int]]): List of crop boxes in [x0, y0, x1, y1] format.
+        layer_idxs (list[int]): List of layer indices corresponding to each crop box.
     Examples:
         >>> im_size = (800, 1200)  # Height, width

ultralytics/models/sam/build.py CHANGED Viewed

@@ -130,10 +130,10 @@ def _build_sam(
     Build a Segment Anything Model (SAM) with specified encoder parameters.
     Args:
-        encoder_embed_dim (int | List[int]): Embedding dimension for the encoder.
-        encoder_depth (int | List[int]): Depth of the encoder.
-        encoder_num_heads (int | List[int]): Number of attention heads in the encoder.
-        encoder_global_attn_indexes (List[int] | None): Indexes for global attention in the encoder.
+        encoder_embed_dim (int | list[int]): Embedding dimension for the encoder.
+        encoder_depth (int | list[int]): Depth of the encoder.
+        encoder_num_heads (int | list[int]): Number of attention heads in the encoder.
+        encoder_global_attn_indexes (list[int] | None): Indexes for global attention in the encoder.
         checkpoint (str | None, optional): Path to the model checkpoint file.
         mobile_sam (bool, optional): Whether to build a Mobile-SAM model.
@@ -228,12 +228,12 @@ def _build_sam2(
     Args:
         encoder_embed_dim (int, optional): Embedding dimension for the encoder.
-        encoder_stages (List[int], optional): Number of blocks in each stage of the encoder.
+        encoder_stages (list[int], optional): Number of blocks in each stage of the encoder.
         encoder_num_heads (int, optional): Number of attention heads in the encoder.
-        encoder_global_att_blocks (List[int], optional): Indices of global attention blocks in the encoder.
-        encoder_backbone_channel_list (List[int], optional): Channel dimensions for each level of the encoder backbone.
-        encoder_window_spatial_size (List[int], optional): Spatial size of the window for position embeddings.
-        encoder_window_spec (List[int], optional): Window specifications for each stage of the encoder.
+        encoder_global_att_blocks (list[int], optional): Indices of global attention blocks in the encoder.
+        encoder_backbone_channel_list (list[int], optional): Channel dimensions for each level of the encoder backbone.
+        encoder_window_spatial_size (list[int], optional): Spatial size of the window for position embeddings.
+        encoder_window_spec (list[int], optional): Window specifications for each stage of the encoder.
         checkpoint (str | None, optional): Path to the checkpoint file for loading pre-trained weights.
     Returns:

ultralytics/models/sam/model.py CHANGED Viewed

@@ -91,9 +91,9 @@ class SAM(Model):
             source (str | PIL.Image | np.ndarray): Path to the image or video file, or a PIL.Image object, or
                 a np.ndarray object.
             stream (bool): If True, enables real-time streaming.
-            bboxes (List[List[float]] | None): List of bounding box coordinates for prompted segmentation.
-            points (List[List[float]] | None): List of points for prompted segmentation.
-            labels (List[int] | None): List of labels for prompted segmentation.
+            bboxes (list[list[float]] | None): List of bounding box coordinates for prompted segmentation.
+            points (list[list[float]] | None): List of points for prompted segmentation.
+            labels (list[int] | None): List of labels for prompted segmentation.
             **kwargs (Any): Additional keyword arguments for prediction.
         Returns:
@@ -121,9 +121,9 @@ class SAM(Model):
             source (str | PIL.Image | np.ndarray | None): Path to the image or video file, or a PIL.Image
                 object, or a np.ndarray object.
             stream (bool): If True, enables real-time streaming.
-            bboxes (List[List[float]] | None): List of bounding box coordinates for prompted segmentation.
-            points (List[List[float]] | None): List of points for prompted segmentation.
-            labels (List[int] | None): List of labels for prompted segmentation.
+            bboxes (list[list[float]] | None): List of bounding box coordinates for prompted segmentation.
+            points (list[list[float]] | None): List of points for prompted segmentation.
+            labels (list[int] | None): List of labels for prompted segmentation.
             **kwargs (Any): Additional keyword arguments to be passed to the predict method.
         Returns:
@@ -160,7 +160,7 @@ class SAM(Model):
         Provide a mapping from the 'segment' task to its corresponding 'Predictor'.
         Returns:
-            (Dict[str, Dict[str, Type[Predictor]]]): A dictionary mapping the 'segment' task to its corresponding
+            (dict[str, dict[str, Type[Predictor]]]): A dictionary mapping the 'segment' task to its corresponding
                 Predictor class. For SAM2 models, it maps to SAM2Predictor, otherwise to the standard Predictor.
         Examples:

ultralytics/models/sam/modules/blocks.py CHANGED Viewed

@@ -593,7 +593,7 @@ class MultiScaleBlock(nn.Module):
         norm1 (nn.Module): First normalization layer.
         window_size (int): Size of the window for partitioning.
         pool (nn.Module | None): Pooling layer for query downsampling.
-        q_stride (Tuple[int, int] | None): Stride for query pooling.
+        q_stride (tuple[int, int] | None): Stride for query pooling.
         attn (MultiScaleAttention): Multi-scale attention module.
         drop_path (nn.Module): Drop path layer for regularization.
         norm2 (nn.Module): Second normalization layer.
@@ -934,7 +934,7 @@ class Block(nn.Module):
             use_rel_pos (bool): If True, uses relative positional embeddings in attention.
             rel_pos_zero_init (bool): If True, initializes relative positional parameters to zero.
             window_size (int): Size of attention window. If 0, uses global attention.
-            input_size (Tuple[int, int] | None): Input resolution for calculating relative positional parameter size.
+            input_size (tuple[int, int] | None): Input resolution for calculating relative positional parameter size.
         Examples:
             >>> block = Block(dim=256, num_heads=8, window_size=7)
@@ -1026,7 +1026,7 @@ class REAttention(nn.Module):
             qkv_bias (bool): If True, adds a learnable bias to query, key, value projections.
             use_rel_pos (bool): If True, uses relative positional encodings.
             rel_pos_zero_init (bool): If True, initializes relative positional parameters to zero.
-            input_size (Tuple[int, int] | None): Input resolution for calculating relative positional parameter size.
+            input_size (tuple[int, int] | None): Input resolution for calculating relative positional parameter size.
                 Required if use_rel_pos is True.
         Examples:
@@ -1106,9 +1106,9 @@ class PatchEmbed(nn.Module):
         image data into a suitable format for subsequent transformer blocks.
         Args:
-            kernel_size (Tuple[int, int]): Size of the convolutional kernel for patch extraction.
-            stride (Tuple[int, int]): Stride of the convolutional operation.
-            padding (Tuple[int, int]): Padding applied to the input before convolution.
+            kernel_size (tuple[int, int]): Size of the convolutional kernel for patch extraction.
+            stride (tuple[int, int]): Stride of the convolutional operation.
+            padding (tuple[int, int]): Padding applied to the input before convolution.
             in_chans (int): Number of input image channels.
             embed_dim (int): Dimensionality of the output patch embeddings.

ultralytics/models/sam/modules/decoders.py CHANGED Viewed

@@ -329,7 +329,7 @@ class SAM2MaskDecoder(nn.Module):
             dense_prompt_embeddings (torch.Tensor): Embeddings of the mask inputs with shape (B, C, H, W).
             multimask_output (bool): Whether to return multiple masks or a single mask.
             repeat_image (bool): Flag to repeat the image embeddings.
-            high_res_features (List[torch.Tensor] | None, optional): Optional high-resolution features.
+            high_res_features (list[torch.Tensor] | None, optional): Optional high-resolution features.
         Returns:
             masks (torch.Tensor): Batched predicted masks with shape (B, N, H, W).

dgenerate-ultralytics-headless 8.3.194__py3-none-any.whl → 8.3.196__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.194py3-none-any.whl → 8.3.196py3-none-any.whl