PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.134__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.134__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (272) hide show

dgenerate_ultralytics_headless-8.3.134.dist-info/METADATA +400 -0
dgenerate_ultralytics_headless-8.3.134.dist-info/RECORD +272 -0
dgenerate_ultralytics_headless-8.3.134.dist-info/WHEEL +5 -0
dgenerate_ultralytics_headless-8.3.134.dist-info/entry_points.txt +3 -0
dgenerate_ultralytics_headless-8.3.134.dist-info/licenses/LICENSE +661 -0
dgenerate_ultralytics_headless-8.3.134.dist-info/top_level.txt +1 -0
tests/__init__.py +22 -0
tests/conftest.py +83 -0
tests/test_cli.py +138 -0
tests/test_cuda.py +215 -0
tests/test_engine.py +131 -0
tests/test_exports.py +236 -0
tests/test_integrations.py +154 -0
tests/test_python.py +694 -0
tests/test_solutions.py +187 -0
ultralytics/__init__.py +30 -0
ultralytics/assets/bus.jpg +0 -0
ultralytics/assets/zidane.jpg +0 -0
ultralytics/cfg/__init__.py +1023 -0
ultralytics/cfg/datasets/Argoverse.yaml +77 -0
ultralytics/cfg/datasets/DOTAv1.5.yaml +37 -0
ultralytics/cfg/datasets/DOTAv1.yaml +36 -0
ultralytics/cfg/datasets/GlobalWheat2020.yaml +68 -0
ultralytics/cfg/datasets/HomeObjects-3K.yaml +33 -0
ultralytics/cfg/datasets/ImageNet.yaml +2025 -0
ultralytics/cfg/datasets/Objects365.yaml +443 -0
ultralytics/cfg/datasets/SKU-110K.yaml +58 -0
ultralytics/cfg/datasets/VOC.yaml +106 -0
ultralytics/cfg/datasets/VisDrone.yaml +77 -0
ultralytics/cfg/datasets/african-wildlife.yaml +25 -0
ultralytics/cfg/datasets/brain-tumor.yaml +23 -0
ultralytics/cfg/datasets/carparts-seg.yaml +44 -0
ultralytics/cfg/datasets/coco-pose.yaml +42 -0
ultralytics/cfg/datasets/coco.yaml +118 -0
ultralytics/cfg/datasets/coco128-seg.yaml +101 -0
ultralytics/cfg/datasets/coco128.yaml +101 -0
ultralytics/cfg/datasets/coco8-multispectral.yaml +104 -0
ultralytics/cfg/datasets/coco8-pose.yaml +26 -0
ultralytics/cfg/datasets/coco8-seg.yaml +101 -0
ultralytics/cfg/datasets/coco8.yaml +101 -0
ultralytics/cfg/datasets/crack-seg.yaml +22 -0
ultralytics/cfg/datasets/dog-pose.yaml +24 -0
ultralytics/cfg/datasets/dota8-multispectral.yaml +38 -0
ultralytics/cfg/datasets/dota8.yaml +35 -0
ultralytics/cfg/datasets/hand-keypoints.yaml +26 -0
ultralytics/cfg/datasets/lvis.yaml +1240 -0
ultralytics/cfg/datasets/medical-pills.yaml +22 -0
ultralytics/cfg/datasets/open-images-v7.yaml +666 -0
ultralytics/cfg/datasets/package-seg.yaml +22 -0
ultralytics/cfg/datasets/signature.yaml +21 -0
ultralytics/cfg/datasets/tiger-pose.yaml +25 -0
ultralytics/cfg/datasets/xView.yaml +155 -0
ultralytics/cfg/default.yaml +127 -0
ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml +17 -0
ultralytics/cfg/models/11/yolo11-cls.yaml +33 -0
ultralytics/cfg/models/11/yolo11-obb.yaml +50 -0
ultralytics/cfg/models/11/yolo11-pose.yaml +51 -0
ultralytics/cfg/models/11/yolo11-seg.yaml +50 -0
ultralytics/cfg/models/11/yolo11.yaml +50 -0
ultralytics/cfg/models/11/yoloe-11-seg.yaml +48 -0
ultralytics/cfg/models/11/yoloe-11.yaml +48 -0
ultralytics/cfg/models/12/yolo12-cls.yaml +32 -0
ultralytics/cfg/models/12/yolo12-obb.yaml +48 -0
ultralytics/cfg/models/12/yolo12-pose.yaml +49 -0
ultralytics/cfg/models/12/yolo12-seg.yaml +48 -0
ultralytics/cfg/models/12/yolo12.yaml +48 -0
ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +53 -0
ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +45 -0
ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +45 -0
ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +57 -0
ultralytics/cfg/models/v10/yolov10b.yaml +45 -0
ultralytics/cfg/models/v10/yolov10l.yaml +45 -0
ultralytics/cfg/models/v10/yolov10m.yaml +45 -0
ultralytics/cfg/models/v10/yolov10n.yaml +45 -0
ultralytics/cfg/models/v10/yolov10s.yaml +45 -0
ultralytics/cfg/models/v10/yolov10x.yaml +45 -0
ultralytics/cfg/models/v3/yolov3-spp.yaml +49 -0
ultralytics/cfg/models/v3/yolov3-tiny.yaml +40 -0
ultralytics/cfg/models/v3/yolov3.yaml +49 -0
ultralytics/cfg/models/v5/yolov5-p6.yaml +62 -0
ultralytics/cfg/models/v5/yolov5.yaml +51 -0
ultralytics/cfg/models/v6/yolov6.yaml +56 -0
ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +45 -0
ultralytics/cfg/models/v8/yoloe-v8.yaml +45 -0
ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +28 -0
ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +28 -0
ultralytics/cfg/models/v8/yolov8-cls.yaml +32 -0
ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +58 -0
ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +60 -0
ultralytics/cfg/models/v8/yolov8-ghost.yaml +50 -0
ultralytics/cfg/models/v8/yolov8-obb.yaml +49 -0
ultralytics/cfg/models/v8/yolov8-p2.yaml +57 -0
ultralytics/cfg/models/v8/yolov8-p6.yaml +59 -0
ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +60 -0
ultralytics/cfg/models/v8/yolov8-pose.yaml +50 -0
ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +49 -0
ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +59 -0
ultralytics/cfg/models/v8/yolov8-seg.yaml +49 -0
ultralytics/cfg/models/v8/yolov8-world.yaml +51 -0
ultralytics/cfg/models/v8/yolov8-worldv2.yaml +49 -0
ultralytics/cfg/models/v8/yolov8.yaml +49 -0
ultralytics/cfg/models/v9/yolov9c-seg.yaml +41 -0
ultralytics/cfg/models/v9/yolov9c.yaml +41 -0
ultralytics/cfg/models/v9/yolov9e-seg.yaml +64 -0
ultralytics/cfg/models/v9/yolov9e.yaml +64 -0
ultralytics/cfg/models/v9/yolov9m.yaml +41 -0
ultralytics/cfg/models/v9/yolov9s.yaml +41 -0
ultralytics/cfg/models/v9/yolov9t.yaml +41 -0
ultralytics/cfg/trackers/botsort.yaml +22 -0
ultralytics/cfg/trackers/bytetrack.yaml +14 -0
ultralytics/data/__init__.py +26 -0
ultralytics/data/annotator.py +66 -0
ultralytics/data/augment.py +2945 -0
ultralytics/data/base.py +438 -0
ultralytics/data/build.py +258 -0
ultralytics/data/converter.py +754 -0
ultralytics/data/dataset.py +834 -0
ultralytics/data/loaders.py +676 -0
ultralytics/data/scripts/download_weights.sh +18 -0
ultralytics/data/scripts/get_coco.sh +61 -0
ultralytics/data/scripts/get_coco128.sh +18 -0
ultralytics/data/scripts/get_imagenet.sh +52 -0
ultralytics/data/split.py +125 -0
ultralytics/data/split_dota.py +325 -0
ultralytics/data/utils.py +777 -0
ultralytics/engine/__init__.py +1 -0
ultralytics/engine/exporter.py +1519 -0
ultralytics/engine/model.py +1156 -0
ultralytics/engine/predictor.py +502 -0
ultralytics/engine/results.py +1840 -0
ultralytics/engine/trainer.py +853 -0
ultralytics/engine/tuner.py +243 -0
ultralytics/engine/validator.py +377 -0
ultralytics/hub/__init__.py +168 -0
ultralytics/hub/auth.py +137 -0
ultralytics/hub/google/__init__.py +176 -0
ultralytics/hub/session.py +446 -0
ultralytics/hub/utils.py +248 -0
ultralytics/models/__init__.py +9 -0
ultralytics/models/fastsam/__init__.py +7 -0
ultralytics/models/fastsam/model.py +61 -0
ultralytics/models/fastsam/predict.py +181 -0
ultralytics/models/fastsam/utils.py +24 -0
ultralytics/models/fastsam/val.py +40 -0
ultralytics/models/nas/__init__.py +7 -0
ultralytics/models/nas/model.py +102 -0
ultralytics/models/nas/predict.py +58 -0
ultralytics/models/nas/val.py +39 -0
ultralytics/models/rtdetr/__init__.py +7 -0
ultralytics/models/rtdetr/model.py +63 -0
ultralytics/models/rtdetr/predict.py +84 -0
ultralytics/models/rtdetr/train.py +85 -0
ultralytics/models/rtdetr/val.py +191 -0
ultralytics/models/sam/__init__.py +6 -0
ultralytics/models/sam/amg.py +260 -0
ultralytics/models/sam/build.py +358 -0
ultralytics/models/sam/model.py +170 -0
ultralytics/models/sam/modules/__init__.py +1 -0
ultralytics/models/sam/modules/blocks.py +1129 -0
ultralytics/models/sam/modules/decoders.py +515 -0
ultralytics/models/sam/modules/encoders.py +854 -0
ultralytics/models/sam/modules/memory_attention.py +299 -0
ultralytics/models/sam/modules/sam.py +1006 -0
ultralytics/models/sam/modules/tiny_encoder.py +1002 -0
ultralytics/models/sam/modules/transformer.py +351 -0
ultralytics/models/sam/modules/utils.py +394 -0
ultralytics/models/sam/predict.py +1605 -0
ultralytics/models/utils/__init__.py +1 -0
ultralytics/models/utils/loss.py +455 -0
ultralytics/models/utils/ops.py +268 -0
ultralytics/models/yolo/__init__.py +7 -0
ultralytics/models/yolo/classify/__init__.py +7 -0
ultralytics/models/yolo/classify/predict.py +88 -0
ultralytics/models/yolo/classify/train.py +233 -0
ultralytics/models/yolo/classify/val.py +215 -0
ultralytics/models/yolo/detect/__init__.py +7 -0
ultralytics/models/yolo/detect/predict.py +124 -0
ultralytics/models/yolo/detect/train.py +217 -0
ultralytics/models/yolo/detect/val.py +451 -0
ultralytics/models/yolo/model.py +354 -0
ultralytics/models/yolo/obb/__init__.py +7 -0
ultralytics/models/yolo/obb/predict.py +66 -0
ultralytics/models/yolo/obb/train.py +81 -0
ultralytics/models/yolo/obb/val.py +283 -0
ultralytics/models/yolo/pose/__init__.py +7 -0
ultralytics/models/yolo/pose/predict.py +79 -0
ultralytics/models/yolo/pose/train.py +154 -0
ultralytics/models/yolo/pose/val.py +394 -0
ultralytics/models/yolo/segment/__init__.py +7 -0
ultralytics/models/yolo/segment/predict.py +113 -0
ultralytics/models/yolo/segment/train.py +123 -0
ultralytics/models/yolo/segment/val.py +428 -0
ultralytics/models/yolo/world/__init__.py +5 -0
ultralytics/models/yolo/world/train.py +119 -0
ultralytics/models/yolo/world/train_world.py +176 -0
ultralytics/models/yolo/yoloe/__init__.py +22 -0
ultralytics/models/yolo/yoloe/predict.py +169 -0
ultralytics/models/yolo/yoloe/train.py +298 -0
ultralytics/models/yolo/yoloe/train_seg.py +124 -0
ultralytics/models/yolo/yoloe/val.py +191 -0
ultralytics/nn/__init__.py +29 -0
ultralytics/nn/autobackend.py +842 -0
ultralytics/nn/modules/__init__.py +182 -0
ultralytics/nn/modules/activation.py +53 -0
ultralytics/nn/modules/block.py +1966 -0
ultralytics/nn/modules/conv.py +712 -0
ultralytics/nn/modules/head.py +880 -0
ultralytics/nn/modules/transformer.py +713 -0
ultralytics/nn/modules/utils.py +164 -0
ultralytics/nn/tasks.py +1627 -0
ultralytics/nn/text_model.py +351 -0
ultralytics/solutions/__init__.py +41 -0
ultralytics/solutions/ai_gym.py +116 -0
ultralytics/solutions/analytics.py +252 -0
ultralytics/solutions/config.py +106 -0
ultralytics/solutions/distance_calculation.py +124 -0
ultralytics/solutions/heatmap.py +127 -0
ultralytics/solutions/instance_segmentation.py +84 -0
ultralytics/solutions/object_blurrer.py +90 -0
ultralytics/solutions/object_counter.py +195 -0
ultralytics/solutions/object_cropper.py +84 -0
ultralytics/solutions/parking_management.py +273 -0
ultralytics/solutions/queue_management.py +93 -0
ultralytics/solutions/region_counter.py +120 -0
ultralytics/solutions/security_alarm.py +154 -0
ultralytics/solutions/similarity_search.py +172 -0
ultralytics/solutions/solutions.py +724 -0
ultralytics/solutions/speed_estimation.py +110 -0
ultralytics/solutions/streamlit_inference.py +196 -0
ultralytics/solutions/templates/similarity-search.html +160 -0
ultralytics/solutions/trackzone.py +88 -0
ultralytics/solutions/vision_eye.py +68 -0
ultralytics/trackers/__init__.py +7 -0
ultralytics/trackers/basetrack.py +124 -0
ultralytics/trackers/bot_sort.py +260 -0
ultralytics/trackers/byte_tracker.py +480 -0
ultralytics/trackers/track.py +125 -0
ultralytics/trackers/utils/__init__.py +1 -0
ultralytics/trackers/utils/gmc.py +376 -0
ultralytics/trackers/utils/kalman_filter.py +493 -0
ultralytics/trackers/utils/matching.py +157 -0
ultralytics/utils/__init__.py +1435 -0
ultralytics/utils/autobatch.py +106 -0
ultralytics/utils/autodevice.py +174 -0
ultralytics/utils/benchmarks.py +695 -0
ultralytics/utils/callbacks/__init__.py +5 -0
ultralytics/utils/callbacks/base.py +234 -0
ultralytics/utils/callbacks/clearml.py +153 -0
ultralytics/utils/callbacks/comet.py +552 -0
ultralytics/utils/callbacks/dvc.py +205 -0
ultralytics/utils/callbacks/hub.py +108 -0
ultralytics/utils/callbacks/mlflow.py +138 -0
ultralytics/utils/callbacks/neptune.py +140 -0
ultralytics/utils/callbacks/raytune.py +43 -0
ultralytics/utils/callbacks/tensorboard.py +132 -0
ultralytics/utils/callbacks/wb.py +185 -0
ultralytics/utils/checks.py +897 -0
ultralytics/utils/dist.py +119 -0
ultralytics/utils/downloads.py +499 -0
ultralytics/utils/errors.py +43 -0
ultralytics/utils/export.py +219 -0
ultralytics/utils/files.py +221 -0
ultralytics/utils/instance.py +499 -0
ultralytics/utils/loss.py +813 -0
ultralytics/utils/metrics.py +1356 -0
ultralytics/utils/ops.py +885 -0
ultralytics/utils/patches.py +143 -0
ultralytics/utils/plotting.py +1011 -0
ultralytics/utils/tal.py +416 -0
ultralytics/utils/torch_utils.py +990 -0
ultralytics/utils/triton.py +116 -0
ultralytics/utils/tuner.py +159 -0

ultralytics/models/utils/ops.py ADDED Viewed

@@ -0,0 +1,268 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from scipy.optimize import linear_sum_assignment
+from ultralytics.utils.metrics import bbox_iou
+from ultralytics.utils.ops import xywh2xyxy, xyxy2xywh
+class HungarianMatcher(nn.Module):
+    """
+    A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in an
+    end-to-end fashion.
+    HungarianMatcher performs optimal assignment over the predicted and ground truth bounding boxes using a cost
+    function that considers classification scores, bounding box coordinates, and optionally, mask predictions.
+    Attributes:
+        cost_gain (dict): Dictionary of cost coefficients: 'class', 'bbox', 'giou', 'mask', and 'dice'.
+        use_fl (bool): Indicates whether to use Focal Loss for the classification cost calculation.
+        with_mask (bool): Indicates whether the model makes mask predictions.
+        num_sample_points (int): The number of sample points used in mask cost calculation.
+        alpha (float): The alpha factor in Focal Loss calculation.
+        gamma (float): The gamma factor in Focal Loss calculation.
+    Methods:
+        forward: Computes the assignment between predictions and ground truths for a batch.
+        _cost_mask: Computes the mask cost and dice cost if masks are predicted.
+    """
+    def __init__(self, cost_gain=None, use_fl=True, with_mask=False, num_sample_points=12544, alpha=0.25, gamma=2.0):
+        """
+        Initialize a HungarianMatcher module for optimal assignment of predicted and ground truth bounding boxes.
+        The HungarianMatcher uses a cost function that considers classification scores, bounding box coordinates,
+        and optionally mask predictions to perform optimal bipartite matching between predictions and ground truths.
+        Args:
+            cost_gain (dict, optional): Dictionary of cost coefficients for different components of the matching cost.
+                Should contain keys 'class', 'bbox', 'giou', 'mask', and 'dice'.
+            use_fl (bool, optional): Whether to use Focal Loss for the classification cost calculation.
+            with_mask (bool, optional): Whether the model makes mask predictions.
+            num_sample_points (int, optional): Number of sample points used in mask cost calculation.
+            alpha (float, optional): Alpha factor in Focal Loss calculation.
+            gamma (float, optional): Gamma factor in Focal Loss calculation.
+        """
+        super().__init__()
+        if cost_gain is None:
+            cost_gain = {"class": 1, "bbox": 5, "giou": 2, "mask": 1, "dice": 1}
+        self.cost_gain = cost_gain
+        self.use_fl = use_fl
+        self.with_mask = with_mask
+        self.num_sample_points = num_sample_points
+        self.alpha = alpha
+        self.gamma = gamma
+    def forward(self, pred_bboxes, pred_scores, gt_bboxes, gt_cls, gt_groups, masks=None, gt_mask=None):
+        """
+        Forward pass for HungarianMatcher. Computes costs based on prediction and ground truth and finds the optimal
+        matching between predictions and ground truth based on these costs.
+        Args:
+            pred_bboxes (torch.Tensor): Predicted bounding boxes with shape (batch_size, num_queries, 4).
+            pred_scores (torch.Tensor): Predicted scores with shape (batch_size, num_queries, num_classes).
+            gt_cls (torch.Tensor): Ground truth classes with shape (num_gts, ).
+            gt_bboxes (torch.Tensor): Ground truth bounding boxes with shape (num_gts, 4).
+            gt_groups (List[int]): List of length equal to batch size, containing the number of ground truths for
+                each image.
+            masks (torch.Tensor, optional): Predicted masks with shape (batch_size, num_queries, height, width).
+            gt_mask (List[torch.Tensor], optional): List of ground truth masks, each with shape (num_masks, Height, Width).
+        Returns:
+            (List[Tuple[torch.Tensor, torch.Tensor]]): A list of size batch_size, each element is a tuple (index_i, index_j), where:
+                - index_i is the tensor of indices of the selected predictions (in order)
+                - index_j is the tensor of indices of the corresponding selected ground truth targets (in order)
+                For each batch element, it holds:
+                    len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
+        """
+        bs, nq, nc = pred_scores.shape
+        if sum(gt_groups) == 0:
+            return [(torch.tensor([], dtype=torch.long), torch.tensor([], dtype=torch.long)) for _ in range(bs)]
+        # We flatten to compute the cost matrices in a batch
+        # (batch_size * num_queries, num_classes)
+        pred_scores = pred_scores.detach().view(-1, nc)
+        pred_scores = F.sigmoid(pred_scores) if self.use_fl else F.softmax(pred_scores, dim=-1)
+        # (batch_size * num_queries, 4)
+        pred_bboxes = pred_bboxes.detach().view(-1, 4)
+        # Compute the classification cost
+        pred_scores = pred_scores[:, gt_cls]
+        if self.use_fl:
+            neg_cost_class = (1 - self.alpha) * (pred_scores**self.gamma) * (-(1 - pred_scores + 1e-8).log())
+            pos_cost_class = self.alpha * ((1 - pred_scores) ** self.gamma) * (-(pred_scores + 1e-8).log())
+            cost_class = pos_cost_class - neg_cost_class
+        else:
+            cost_class = -pred_scores
+        # Compute the L1 cost between boxes
+        cost_bbox = (pred_bboxes.unsqueeze(1) - gt_bboxes.unsqueeze(0)).abs().sum(-1)  # (bs*num_queries, num_gt)
+        # Compute the GIoU cost between boxes, (bs*num_queries, num_gt)
+        cost_giou = 1.0 - bbox_iou(pred_bboxes.unsqueeze(1), gt_bboxes.unsqueeze(0), xywh=True, GIoU=True).squeeze(-1)
+        # Final cost matrix
+        C = (
+            self.cost_gain["class"] * cost_class
+            + self.cost_gain["bbox"] * cost_bbox
+            + self.cost_gain["giou"] * cost_giou
+        )
+        # Compute the mask cost and dice cost
+        if self.with_mask:
+            C += self._cost_mask(bs, gt_groups, masks, gt_mask)
+        # Set invalid values (NaNs and infinities) to 0 (fixes ValueError: matrix contains invalid numeric entries)
+        C[C.isnan() | C.isinf()] = 0.0
+        C = C.view(bs, nq, -1).cpu()
+        indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(gt_groups, -1))]
+        gt_groups = torch.as_tensor([0, *gt_groups[:-1]]).cumsum_(0)  # (idx for queries, idx for gt)
+        return [
+            (torch.tensor(i, dtype=torch.long), torch.tensor(j, dtype=torch.long) + gt_groups[k])
+            for k, (i, j) in enumerate(indices)
+        ]
+    # This function is for future RT-DETR Segment models
+    # def _cost_mask(self, bs, num_gts, masks=None, gt_mask=None):
+    #     assert masks is not None and gt_mask is not None, 'Make sure the input has `mask` and `gt_mask`'
+    #     # all masks share the same set of points for efficient matching
+    #     sample_points = torch.rand([bs, 1, self.num_sample_points, 2])
+    #     sample_points = 2.0 * sample_points - 1.0
+    #
+    #     out_mask = F.grid_sample(masks.detach(), sample_points, align_corners=False).squeeze(-2)
+    #     out_mask = out_mask.flatten(0, 1)
+    #
+    #     tgt_mask = torch.cat(gt_mask).unsqueeze(1)
+    #     sample_points = torch.cat([a.repeat(b, 1, 1, 1) for a, b in zip(sample_points, num_gts) if b > 0])
+    #     tgt_mask = F.grid_sample(tgt_mask, sample_points, align_corners=False).squeeze([1, 2])
+    #
+    #     with torch.amp.autocast("cuda", enabled=False):
+    #         # binary cross entropy cost
+    #         pos_cost_mask = F.binary_cross_entropy_with_logits(out_mask, torch.ones_like(out_mask), reduction='none')
+    #         neg_cost_mask = F.binary_cross_entropy_with_logits(out_mask, torch.zeros_like(out_mask), reduction='none')
+    #         cost_mask = torch.matmul(pos_cost_mask, tgt_mask.T) + torch.matmul(neg_cost_mask, 1 - tgt_mask.T)
+    #         cost_mask /= self.num_sample_points
+    #
+    #         # dice cost
+    #         out_mask = F.sigmoid(out_mask)
+    #         numerator = 2 * torch.matmul(out_mask, tgt_mask.T)
+    #         denominator = out_mask.sum(-1, keepdim=True) + tgt_mask.sum(-1).unsqueeze(0)
+    #         cost_dice = 1 - (numerator + 1) / (denominator + 1)
+    #
+    #         C = self.cost_gain['mask'] * cost_mask + self.cost_gain['dice'] * cost_dice
+    #     return C
+def get_cdn_group(
+    batch, num_classes, num_queries, class_embed, num_dn=100, cls_noise_ratio=0.5, box_noise_scale=1.0, training=False
+):
+    """
+    Get contrastive denoising training group with positive and negative samples from ground truths.
+    Args:
+        batch (dict): A dict that includes 'gt_cls' (torch.Tensor with shape (num_gts, )), 'gt_bboxes'
+            (torch.Tensor with shape (num_gts, 4)), 'gt_groups' (List[int]) which is a list of batch size length
+            indicating the number of gts of each image.
+        num_classes (int): Number of classes.
+        num_queries (int): Number of queries.
+        class_embed (torch.Tensor): Embedding weights to map class labels to embedding space.
+        num_dn (int, optional): Number of denoising queries.
+        cls_noise_ratio (float, optional): Noise ratio for class labels.
+        box_noise_scale (float, optional): Noise scale for bounding box coordinates.
+        training (bool, optional): If it's in training mode.
+    Returns:
+        padding_cls (Optional[torch.Tensor]): The modified class embeddings for denoising.
+        padding_bbox (Optional[torch.Tensor]): The modified bounding boxes for denoising.
+        attn_mask (Optional[torch.Tensor]): The attention mask for denoising.
+        dn_meta (Optional[Dict]): Meta information for denoising.
+    """
+    if (not training) or num_dn <= 0 or batch is None:
+        return None, None, None, None
+    gt_groups = batch["gt_groups"]
+    total_num = sum(gt_groups)
+    max_nums = max(gt_groups)
+    if max_nums == 0:
+        return None, None, None, None
+    num_group = num_dn // max_nums
+    num_group = 1 if num_group == 0 else num_group
+    # Pad gt to max_num of a batch
+    bs = len(gt_groups)
+    gt_cls = batch["cls"]  # (bs*num, )
+    gt_bbox = batch["bboxes"]  # bs*num, 4
+    b_idx = batch["batch_idx"]
+    # Each group has positive and negative queries.
+    dn_cls = gt_cls.repeat(2 * num_group)  # (2*num_group*bs*num, )
+    dn_bbox = gt_bbox.repeat(2 * num_group, 1)  # 2*num_group*bs*num, 4
+    dn_b_idx = b_idx.repeat(2 * num_group).view(-1)  # (2*num_group*bs*num, )
+    # Positive and negative mask
+    # (bs*num*num_group, ), the second total_num*num_group part as negative samples
+    neg_idx = torch.arange(total_num * num_group, dtype=torch.long, device=gt_bbox.device) + num_group * total_num
+    if cls_noise_ratio > 0:
+        # Half of bbox prob
+        mask = torch.rand(dn_cls.shape) < (cls_noise_ratio * 0.5)
+        idx = torch.nonzero(mask).squeeze(-1)
+        # Randomly put a new one here
+        new_label = torch.randint_like(idx, 0, num_classes, dtype=dn_cls.dtype, device=dn_cls.device)
+        dn_cls[idx] = new_label
+    if box_noise_scale > 0:
+        known_bbox = xywh2xyxy(dn_bbox)
+        diff = (dn_bbox[..., 2:] * 0.5).repeat(1, 2) * box_noise_scale  # 2*num_group*bs*num, 4
+        rand_sign = torch.randint_like(dn_bbox, 0, 2) * 2.0 - 1.0
+        rand_part = torch.rand_like(dn_bbox)
+        rand_part[neg_idx] += 1.0
+        rand_part *= rand_sign
+        known_bbox += rand_part * diff
+        known_bbox.clip_(min=0.0, max=1.0)
+        dn_bbox = xyxy2xywh(known_bbox)
+        dn_bbox = torch.logit(dn_bbox, eps=1e-6)  # inverse sigmoid
+    num_dn = int(max_nums * 2 * num_group)  # total denoising queries
+    # class_embed = torch.cat([class_embed, torch.zeros([1, class_embed.shape[-1]], device=class_embed.device)])
+    dn_cls_embed = class_embed[dn_cls]  # bs*num * 2 * num_group, 256
+    padding_cls = torch.zeros(bs, num_dn, dn_cls_embed.shape[-1], device=gt_cls.device)
+    padding_bbox = torch.zeros(bs, num_dn, 4, device=gt_bbox.device)
+    map_indices = torch.cat([torch.tensor(range(num), dtype=torch.long) for num in gt_groups])
+    pos_idx = torch.stack([map_indices + max_nums * i for i in range(num_group)], dim=0)
+    map_indices = torch.cat([map_indices + max_nums * i for i in range(2 * num_group)])
+    padding_cls[(dn_b_idx, map_indices)] = dn_cls_embed
+    padding_bbox[(dn_b_idx, map_indices)] = dn_bbox
+    tgt_size = num_dn + num_queries
+    attn_mask = torch.zeros([tgt_size, tgt_size], dtype=torch.bool)
+    # Match query cannot see the reconstruct
+    attn_mask[num_dn:, :num_dn] = True
+    # Reconstruct cannot see each other
+    for i in range(num_group):
+        if i == 0:
+            attn_mask[max_nums * 2 * i : max_nums * 2 * (i + 1), max_nums * 2 * (i + 1) : num_dn] = True
+        if i == num_group - 1:
+            attn_mask[max_nums * 2 * i : max_nums * 2 * (i + 1), : max_nums * i * 2] = True
+        else:
+            attn_mask[max_nums * 2 * i : max_nums * 2 * (i + 1), max_nums * 2 * (i + 1) : num_dn] = True
+            attn_mask[max_nums * 2 * i : max_nums * 2 * (i + 1), : max_nums * 2 * i] = True
+    dn_meta = {
+        "dn_pos_idx": [p.reshape(-1) for p in pos_idx.cpu().split(list(gt_groups), dim=1)],
+        "dn_num_group": num_group,
+        "dn_num_split": [num_dn, num_queries],
+    }
+    return (
+        padding_cls.to(class_embed.device),
+        padding_bbox.to(class_embed.device),
+        attn_mask.to(class_embed.device),
+        dn_meta,
+    )

ultralytics/models/yolo/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+from ultralytics.models.yolo import classify, detect, obb, pose, segment, world, yoloe
+from .model import YOLO, YOLOE, YOLOWorld
+__all__ = "classify", "segment", "detect", "pose", "obb", "world", "yoloe", "YOLO", "YOLOWorld", "YOLOE"

ultralytics/models/yolo/classify/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+from ultralytics.models.yolo.classify.predict import ClassificationPredictor
+from ultralytics.models.yolo.classify.train import ClassificationTrainer
+from ultralytics.models.yolo.classify.val import ClassificationValidator
+__all__ = "ClassificationPredictor", "ClassificationTrainer", "ClassificationValidator"

ultralytics/models/yolo/classify/predict.py ADDED Viewed

@@ -0,0 +1,88 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+import cv2
+import torch
+from PIL import Image
+from ultralytics.engine.predictor import BasePredictor
+from ultralytics.engine.results import Results
+from ultralytics.utils import DEFAULT_CFG, ops
+class ClassificationPredictor(BasePredictor):
+    """
+    A class extending the BasePredictor class for prediction based on a classification model.
+    This predictor handles the specific requirements of classification models, including preprocessing images
+    and postprocessing predictions to generate classification results.
+    Attributes:
+        args (dict): Configuration arguments for the predictor.
+        _legacy_transform_name (str): Name of the legacy transform class for backward compatibility.
+    Methods:
+        preprocess: Convert input images to model-compatible format.
+        postprocess: Process model predictions into Results objects.
+    Notes:
+        - Torchvision classification models can also be passed to the 'model' argument, i.e. model='resnet18'.
+    Examples:
+        >>> from ultralytics.utils import ASSETS
+        >>> from ultralytics.models.yolo.classify import ClassificationPredictor
+        >>> args = dict(model="yolo11n-cls.pt", source=ASSETS)
+        >>> predictor = ClassificationPredictor(overrides=args)
+        >>> predictor.predict_cli()
+    """
+    def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+        """
+        Initialize the ClassificationPredictor with the specified configuration and set task to 'classify'.
+        This constructor initializes a ClassificationPredictor instance, which extends BasePredictor for classification
+        tasks. It ensures the task is set to 'classify' regardless of input configuration.
+        Args:
+            cfg (dict): Default configuration dictionary containing prediction settings. Defaults to DEFAULT_CFG.
+            overrides (dict, optional): Configuration overrides that take precedence over cfg.
+            _callbacks (list, optional): List of callback functions to be executed during prediction.
+        """
+        super().__init__(cfg, overrides, _callbacks)
+        self.args.task = "classify"
+        self._legacy_transform_name = "ultralytics.yolo.data.augment.ToTensor"
+    def preprocess(self, img):
+        """Convert input images to model-compatible tensor format with appropriate normalization."""
+        if not isinstance(img, torch.Tensor):
+            is_legacy_transform = any(
+                self._legacy_transform_name in str(transform) for transform in self.transforms.transforms
+            )
+            if is_legacy_transform:  # to handle legacy transforms
+                img = torch.stack([self.transforms(im) for im in img], dim=0)
+            else:
+                img = torch.stack(
+                    [self.transforms(Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))) for im in img], dim=0
+                )
+        img = (img if isinstance(img, torch.Tensor) else torch.from_numpy(img)).to(self.model.device)
+        return img.half() if self.model.fp16 else img.float()  # uint8 to fp16/32
+    def postprocess(self, preds, img, orig_imgs):
+        """
+        Process predictions to return Results objects with classification probabilities.
+        Args:
+            preds (torch.Tensor): Raw predictions from the model.
+            img (torch.Tensor): Input images after preprocessing.
+            orig_imgs (List[np.ndarray] | torch.Tensor): Original images before preprocessing.
+        Returns:
+            (List[Results]): List of Results objects containing classification results for each image.
+        """
+        if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
+            orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
+        preds = preds[0] if isinstance(preds, (list, tuple)) else preds
+        return [
+            Results(orig_img, path=img_path, names=self.model.names, probs=pred)
+            for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0])
+        ]

ultralytics/models/yolo/classify/train.py ADDED Viewed

@@ -0,0 +1,233 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+from copy import copy
+import torch
+from ultralytics.data import ClassificationDataset, build_dataloader
+from ultralytics.engine.trainer import BaseTrainer
+from ultralytics.models import yolo
+from ultralytics.nn.tasks import ClassificationModel
+from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
+from ultralytics.utils.plotting import plot_images, plot_results
+from ultralytics.utils.torch_utils import is_parallel, strip_optimizer, torch_distributed_zero_first
+class ClassificationTrainer(BaseTrainer):
+    """
+    A class extending the BaseTrainer class for training based on a classification model.
+    This trainer handles the training process for image classification tasks, supporting both YOLO classification models
+    and torchvision models.
+    Attributes:
+        model (ClassificationModel): The classification model to be trained.
+        data (dict): Dictionary containing dataset information including class names and number of classes.
+        loss_names (List[str]): Names of the loss functions used during training.
+        validator (ClassificationValidator): Validator instance for model evaluation.
+    Methods:
+        set_model_attributes: Set the model's class names from the loaded dataset.
+        get_model: Return a modified PyTorch model configured for training.
+        setup_model: Load, create or download model for classification.
+        build_dataset: Create a ClassificationDataset instance.
+        get_dataloader: Return PyTorch DataLoader with transforms for image preprocessing.
+        preprocess_batch: Preprocess a batch of images and classes.
+        progress_string: Return a formatted string showing training progress.
+        get_validator: Return an instance of ClassificationValidator.
+        label_loss_items: Return a loss dict with labelled training loss items.
+        plot_metrics: Plot metrics from a CSV file.
+        final_eval: Evaluate trained model and save validation results.
+        plot_training_samples: Plot training samples with their annotations.
+    Examples:
+        >>> from ultralytics.models.yolo.classify import ClassificationTrainer
+        >>> args = dict(model="yolo11n-cls.pt", data="imagenet10", epochs=3)
+        >>> trainer = ClassificationTrainer(overrides=args)
+        >>> trainer.train()
+    """
+    def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+        """
+        Initialize a ClassificationTrainer object.
+        This constructor sets up a trainer for image classification tasks, configuring the task type and default
+        image size if not specified.
+        Args:
+            cfg (dict, optional): Default configuration dictionary containing training parameters.
+            overrides (dict, optional): Dictionary of parameter overrides for the default configuration.
+            _callbacks (list, optional): List of callback functions to be executed during training.
+        Examples:
+            >>> from ultralytics.models.yolo.classify import ClassificationTrainer
+            >>> args = dict(model="yolo11n-cls.pt", data="imagenet10", epochs=3)
+            >>> trainer = ClassificationTrainer(overrides=args)
+            >>> trainer.train()
+        """
+        if overrides is None:
+            overrides = {}
+        overrides["task"] = "classify"
+        if overrides.get("imgsz") is None:
+            overrides["imgsz"] = 224
+        super().__init__(cfg, overrides, _callbacks)
+    def set_model_attributes(self):
+        """Set the YOLO model's class names from the loaded dataset."""
+        self.model.names = self.data["names"]
+    def get_model(self, cfg=None, weights=None, verbose=True):
+        """
+        Return a modified PyTorch model configured for training YOLO.
+        Args:
+            cfg (Any): Model configuration.
+            weights (Any): Pre-trained model weights.
+            verbose (bool): Whether to display model information.
+        Returns:
+            (ClassificationModel): Configured PyTorch model for classification.
+        """
+        model = ClassificationModel(cfg, nc=self.data["nc"], ch=self.data["channels"], verbose=verbose and RANK == -1)
+        if weights:
+            model.load(weights)
+        for m in model.modules():
+            if not self.args.pretrained and hasattr(m, "reset_parameters"):
+                m.reset_parameters()
+            if isinstance(m, torch.nn.Dropout) and self.args.dropout:
+                m.p = self.args.dropout  # set dropout
+        for p in model.parameters():
+            p.requires_grad = True  # for training
+        return model
+    def setup_model(self):
+        """
+        Load, create or download model for classification tasks.
+        Returns:
+            (Any): Model checkpoint if applicable, otherwise None.
+        """
+        import torchvision  # scope for faster 'import ultralytics'
+        if str(self.model) in torchvision.models.__dict__:
+            self.model = torchvision.models.__dict__[self.model](
+                weights="IMAGENET1K_V1" if self.args.pretrained else None
+            )
+            ckpt = None
+        else:
+            ckpt = super().setup_model()
+        ClassificationModel.reshape_outputs(self.model, self.data["nc"])
+        return ckpt
+    def build_dataset(self, img_path, mode="train", batch=None):
+        """
+        Create a ClassificationDataset instance given an image path and mode.
+        Args:
+            img_path (str): Path to the dataset images.
+            mode (str): Dataset mode ('train', 'val', or 'test').
+            batch (Any): Batch information (unused in this implementation).
+        Returns:
+            (ClassificationDataset): Dataset for the specified mode.
+        """
+        return ClassificationDataset(root=img_path, args=self.args, augment=mode == "train", prefix=mode)
+    def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode="train"):
+        """
+        Return PyTorch DataLoader with transforms to preprocess images.
+        Args:
+            dataset_path (str): Path to the dataset.
+            batch_size (int): Number of images per batch.
+            rank (int): Process rank for distributed training.
+            mode (str): 'train', 'val', or 'test' mode.
+        Returns:
+            (torch.utils.data.DataLoader): DataLoader for the specified dataset and mode.
+        """
+        with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
+            dataset = self.build_dataset(dataset_path, mode)
+        loader = build_dataloader(dataset, batch_size, self.args.workers, rank=rank)
+        # Attach inference transforms
+        if mode != "train":
+            if is_parallel(self.model):
+                self.model.module.transforms = loader.dataset.torch_transforms
+            else:
+                self.model.transforms = loader.dataset.torch_transforms
+        return loader
+    def preprocess_batch(self, batch):
+        """Preprocesses a batch of images and classes."""
+        batch["img"] = batch["img"].to(self.device)
+        batch["cls"] = batch["cls"].to(self.device)
+        return batch
+    def progress_string(self):
+        """Returns a formatted string showing training progress."""
+        return ("\n" + "%11s" * (4 + len(self.loss_names))) % (
+            "Epoch",
+            "GPU_mem",
+            *self.loss_names,
+            "Instances",
+            "Size",
+        )
+    def get_validator(self):
+        """Returns an instance of ClassificationValidator for validation."""
+        self.loss_names = ["loss"]
+        return yolo.classify.ClassificationValidator(
+            self.test_loader, self.save_dir, args=copy(self.args), _callbacks=self.callbacks
+        )
+    def label_loss_items(self, loss_items=None, prefix="train"):
+        """
+        Return a loss dict with labelled training loss items tensor.
+        Args:
+            loss_items (torch.Tensor, optional): Loss tensor items.
+            prefix (str): Prefix to prepend to loss names.
+        Returns:
+            (Dict[str, float] | List[str]): Dictionary of loss items or list of loss keys if loss_items is None.
+        """
+        keys = [f"{prefix}/{x}" for x in self.loss_names]
+        if loss_items is None:
+            return keys
+        loss_items = [round(float(loss_items), 5)]
+        return dict(zip(keys, loss_items))
+    def plot_metrics(self):
+        """Plot metrics from a CSV file."""
+        plot_results(file=self.csv, classify=True, on_plot=self.on_plot)  # save results.png
+    def final_eval(self):
+        """Evaluate trained model and save validation results."""
+        for f in self.last, self.best:
+            if f.exists():
+                strip_optimizer(f)  # strip optimizers
+                if f is self.best:
+                    LOGGER.info(f"\nValidating {f}...")
+                    self.validator.args.data = self.args.data
+                    self.validator.args.plots = self.args.plots
+                    self.metrics = self.validator(model=f)
+                    self.metrics.pop("fitness", None)
+                    self.run_callbacks("on_fit_epoch_end")
+    def plot_training_samples(self, batch, ni):
+        """
+        Plot training samples with their annotations.
+        Args:
+            batch (Dict[str, torch.Tensor]): Batch containing images and class labels.
+            ni (int): Number of iterations.
+        """
+        plot_images(
+            images=batch["img"],
+            batch_idx=torch.arange(len(batch["img"])),
+            cls=batch["cls"].view(-1),  # warning: use .view(), not .squeeze() for Classify models
+            fname=self.save_dir / f"train_batch{ni}.jpg",
+            on_plot=self.on_plot,
+        )