PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.4.6__py3-none-any.whl → 8.4.8__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.4.6py3-none-any.whl → 8.4.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

{dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/METADATA +3 -3
{dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/RECORD +37 -36
{dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/WHEEL +1 -1
tests/test_cli.py +10 -3
tests/test_exports.py +64 -43
tests/test_python.py +40 -11
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +5 -4
ultralytics/cfg/datasets/coco12-formats.yaml +101 -0
ultralytics/cfg/default.yaml +2 -1
ultralytics/data/augment.py +8 -0
ultralytics/data/converter.py +32 -9
ultralytics/data/utils.py +2 -2
ultralytics/engine/exporter.py +10 -6
ultralytics/engine/predictor.py +5 -0
ultralytics/engine/trainer.py +6 -4
ultralytics/engine/tuner.py +2 -2
ultralytics/engine/validator.py +5 -0
ultralytics/models/sam/predict.py +2 -2
ultralytics/models/yolo/classify/train.py +14 -1
ultralytics/models/yolo/detect/train.py +8 -4
ultralytics/models/yolo/pose/train.py +2 -1
ultralytics/models/yolo/world/train_world.py +21 -1
ultralytics/models/yolo/yoloe/train.py +1 -2
ultralytics/nn/autobackend.py +1 -1
ultralytics/nn/modules/head.py +13 -2
ultralytics/nn/tasks.py +18 -0
ultralytics/solutions/security_alarm.py +1 -1
ultralytics/utils/benchmarks.py +3 -9
ultralytics/utils/callbacks/wb.py +6 -1
ultralytics/utils/loss.py +18 -9
ultralytics/utils/patches.py +42 -0
ultralytics/utils/tal.py +15 -5
ultralytics/utils/torch_utils.py +1 -1
{dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/top_level.txt +0 -0

ultralytics/cfg/datasets/coco12-formats.yaml ADDED Viewed

@@ -0,0 +1,101 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+# COCO12-Formats dataset (12 images testing all supported image formats) by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/detect/coco12-formats/
+# Example usage: yolo train data=coco12-formats.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco12-formats ← downloads here (1 MB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: coco12-formats # dataset root dir
+train: images/train # train images (relative to 'path') 6 images
+val: images/val # val images (relative to 'path') 6 images
+test: # test images (optional)
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+# Download script/URL (optional)
+download: https://github.com/ultralytics/assets/releases/download/v0.0.0/coco12-formats.zip

ultralytics/cfg/default.yaml CHANGED Viewed

@@ -36,7 +36,7 @@ amp: True # (bool) Automatic Mixed Precision (AMP) training; True runs AMP capab
 fraction: 1.0 # (float) fraction of training dataset to use (1.0 = all)
 profile: False # (bool) profile ONNX/TensorRT speeds during training for loggers
 freeze: # (int | list, optional) freeze first N layers (int) or specific layer indices (list)
-multi_scale: 0.0 # (float) multiscale training by varying image size
+multi_scale: 0.0 # (float) multi-scale range as a fraction of imgsz; sizes are rounded to stride multiples
 compile: False # (bool | str) enable torch.compile() backend='inductor'; True="default", False=off, or "default|reduce-overhead|max-autotune-no-cudagraphs"
 # Segmentation
@@ -56,6 +56,7 @@ max_det: 300 # (int) maximum number of detections per image
 half: False # (bool) use half precision (FP16) if supported
 dnn: False # (bool) use OpenCV DNN for ONNX inference
 plots: True # (bool) save plots and images during train/val
+end2end: # (bool, optional) whether to use end2end head(YOLO26, YOLOv10) for predict/val/export
 # Predict settings -----------------------------------------------------------------------------------------------------
 source: # (str, optional) path/dir/URL/stream for images or videos; e.g. 'ultralytics/assets' or '0' for webcam

ultralytics/data/augment.py CHANGED Viewed

@@ -2066,7 +2066,15 @@ class Format:
                 if self.mask_overlap:
                     sem_masks = cls_tensor[masks[0].long() - 1]  # (H, W) from (1, H, W) instance indices
                 else:
+                    # Create sem_masks consistent with mask_overlap=True
                     sem_masks = (masks * cls_tensor[:, None, None]).max(0).values  # (H, W) from (N, H, W) binary
+                    overlap = masks.sum(dim=0) > 1  # (H, W)
+                    if overlap.any():
+                        weights = masks.sum(axis=(1, 2))
+                        weighted_masks = masks * weights[:, None, None]  # (N, H, W)
+                        weighted_masks[masks == 0] = weights.max() + 1  # handle background
+                        smallest_idx = weighted_masks.argmin(dim=0)  # (H, W)
+                        sem_masks[overlap] = cls_tensor[smallest_idx[overlap]]
             else:
                 masks = torch.zeros(
                     1 if self.mask_overlap else nl, img.shape[0] // self.mask_ratio, img.shape[1] // self.mask_ratio

ultralytics/data/converter.py CHANGED Viewed

@@ -796,6 +796,17 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
     # Check if this is a classification dataset
     is_classification = dataset_record.get("task") == "classify"
     class_names = {int(k): v for k, v in dataset_record.get("class_names", {}).items()}
+    len(class_names)
+    # Validate required fields before downloading images
+    task = dataset_record.get("task", "detect")
+    if not is_classification:
+        if "train" not in splits:
+            raise ValueError(f"Dataset missing required 'train' split. Found splits: {sorted(splits)}")
+        if "val" not in splits and "test" not in splits:
+            raise ValueError(f"Dataset missing required 'val' split. Found splits: {sorted(splits)}")
+    if task == "pose" and "kpt_shape" not in dataset_record:
+        raise ValueError("Pose dataset missing required 'kpt_shape'. See https://docs.ultralytics.com/datasets/pose/")
     # Create base directories
     dataset_dir.mkdir(parents=True, exist_ok=True)
@@ -838,14 +849,19 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
             if http_url := record.get("url"):
                 if not image_path.exists():
                     image_path.parent.mkdir(parents=True, exist_ok=True)
-                    try:
-                        async with session.get(http_url, timeout=aiohttp.ClientTimeout(total=30)) as response:
-                            response.raise_for_status()
-                            image_path.write_bytes(await response.read())
-                        return True
-                    except Exception as e:
-                        LOGGER.warning(f"Failed to download {http_url}: {e}")
-                        return False
+                    # Retry with exponential backoff (3 attempts: 0s, 2s, 4s delays)
+                    for attempt in range(3):
+                        try:
+                            async with session.get(http_url, timeout=aiohttp.ClientTimeout(total=30)) as response:
+                                response.raise_for_status()
+                                image_path.write_bytes(await response.read())
+                            return True
+                        except Exception as e:
+                            if attempt < 2:  # Don't sleep after last attempt
+                                await asyncio.sleep(2**attempt)  # 1s, 2s backoff
+                            else:
+                                LOGGER.warning(f"Failed to download {http_url} after 3 attempts: {e}")
+                                return False
             return True
     # Process all images with async downloads (limit connections for small datasets)
@@ -861,9 +877,16 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
             pbar.update(1)
             return result
-        await asyncio.gather(*[tracked_process(record) for record in image_records])
+        results = await asyncio.gather(*[tracked_process(record) for record in image_records])
         pbar.close()
+    # Validate images were downloaded successfully
+    success_count = sum(1 for r in results if r)
+    if success_count == 0:
+        raise RuntimeError(f"Failed to download any images from {ndjson_path}. Check network connection and URLs.")
+    if success_count < len(image_records):
+        LOGGER.warning(f"Downloaded {success_count}/{len(image_records)} images from {ndjson_path}")
     if is_classification:
         # Classification: return dataset directory (check_cls_dataset expects a directory path)
         return dataset_dir

ultralytics/data/utils.py CHANGED Viewed

@@ -37,8 +37,8 @@ from ultralytics.utils.downloads import download, safe_download, unzip_file
 from ultralytics.utils.ops import segments2boxes
 HELP_URL = "See https://docs.ultralytics.com/datasets for dataset formatting guidance."
-IMG_FORMATS = {"bmp", "dng", "jpeg", "jpg", "mpo", "png", "tif", "tiff", "webp", "pfm", "heic"}  # image suffixes
-VID_FORMATS = {"asf", "avi", "gif", "m4v", "mkv", "mov", "mp4", "mpeg", "mpg", "ts", "wmv", "webm"}  # video suffixes
+IMG_FORMATS = {"avif", "bmp", "dng", "heic", "jp2", "jpeg", "jpeg2000", "jpg", "mpo", "png", "tif", "tiff", "webp"}
+VID_FORMATS = {"asf", "avi", "gif", "m4v", "mkv", "mov", "mp4", "mpeg", "mpg", "ts", "wmv", "webm"}  # videos
 FORMATS_HELP_MSG = f"Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}"

ultralytics/engine/exporter.py CHANGED Viewed

@@ -404,6 +404,13 @@ class Exporter:
         if not hasattr(model, "names"):
             model.names = default_class_names()
         model.names = check_class_names(model.names)
+        if hasattr(model, "end2end"):
+            if self.args.end2end is not None:
+                model.end2end = self.args.end2end
+            if rknn or ncnn or executorch or paddle or imx:
+                # Disable end2end branch for certain export formats as they does not support topk
+                model.end2end = False
+                LOGGER.warning(f"{fmt.upper()} export does not support end2end models, disabling end2end branch.")
         if self.args.half and self.args.int8:
             LOGGER.warning("half=True and int8=True are mutually exclusive, setting half=False.")
             self.args.half = False
@@ -463,9 +470,6 @@ class Exporter:
             )
         if tfjs and (ARM64 and LINUX):
             raise SystemError("TF.js exports are not currently supported on ARM64 Linux")
-        if ncnn and hasattr(model.model[-1], "one2one_cv2"):
-            del model.model[-1].one2one_cv2  # Disable end2end branch for NCNN export as it does not support topk
-            LOGGER.warning("NCNN export does not support end2end models, disabling end2end branch.")
         # Recommend OpenVINO if export and Intel CPU
         if SETTINGS.get("openvino_msg"):
             if is_intel():
@@ -509,6 +513,7 @@ class Exporter:
                 # Clamp max_det to anchor count for small image sizes (required for TensorRT compatibility)
                 anchors = sum(int(self.imgsz[0] / s) * int(self.imgsz[1] / s) for s in model.stride.tolist())
                 m.max_det = min(self.args.max_det, anchors)
+                m.agnostic_nms = self.args.agnostic_nms
                 m.xyxy = self.args.nms and not coreml
                 m.shape = None  # reset cached shape for new export input size
                 if hasattr(model, "pe") and hasattr(m, "fuse"):  # for YOLOE models
@@ -549,6 +554,7 @@ class Exporter:
             "names": model.names,
             "args": {k: v for k, v in self.args if k in fmt_keys},
             "channels": model.yaml.get("channels", 3),
+            "end2end": getattr(model, "end2end", False),
         }  # model metadata
         if dla is not None:
             self.metadata["dla"] = dla  # make sure `AutoBackend` uses correct dla device if it has one
@@ -556,8 +562,6 @@ class Exporter:
             self.metadata["kpt_shape"] = model.model[-1].kpt_shape
             if hasattr(model, "kpt_names"):
                 self.metadata["kpt_names"] = model.kpt_names
-        if getattr(model.model[-1], "end2end", False):
-            self.metadata["end2end"] = True
         LOGGER.info(
             f"\n{colorstr('PyTorch:')} starting from '{file}' with input shape {tuple(im.shape)} BCHW and "
@@ -1045,7 +1049,7 @@ class Exporter:
                 "onnx_graphsurgeon>=0.3.26",  # required by 'onnx2tf' package
                 "ai-edge-litert>=1.2.0" + (",<1.4.0" if MACOS else ""),  # required by 'onnx2tf' package
                 "onnx>=1.12.0,<2.0.0",
-                "onnx2tf>=1.26.3",
+                "onnx2tf>=1.26.3,<1.29.0",  # pin to avoid h5py build issues on aarch64
                 "onnxslim>=0.1.71",
                 "onnxruntime-gpu" if cuda else "onnxruntime",
                 "protobuf>=5",

ultralytics/engine/predictor.py CHANGED Viewed

@@ -387,6 +387,11 @@ class BasePredictor:
             model (str | Path | torch.nn.Module, optional): Model to load or use.
             verbose (bool): Whether to print verbose output.
         """
+        if hasattr(model, "end2end"):
+            if self.args.end2end is not None:
+                model.end2end = self.args.end2end
+            if model.end2end:
+                model.set_head_attr(max_det=self.args.max_det, agnostic_nms=self.args.agnostic_nms)
         self.model = AutoBackend(
             model=model or self.args.model,
             device=select_device(self.args.device, verbose=verbose),

ultralytics/engine/trainer.py CHANGED Viewed

@@ -948,7 +948,7 @@ class BaseTrainer:
             )
             nc = self.data.get("nc", 10)  # number of classes
             lr_fit = round(0.002 * 5 / (4 + nc), 6)  # lr0 fit equation to 6 decimal places
-            name, lr, momentum = ("MuSGD", 0.01 if iterations > 10000 else lr_fit, 0.9)
+            name, lr, momentum = ("MuSGD", 0.01, 0.9) if iterations > 10000 else ("AdamW", lr_fit, 0.9)
             self.args.warmup_bias_lr = 0.0  # no higher than 0.01 for Adam
         use_muon = name == "MuSGD"
@@ -981,16 +981,18 @@ class BaseTrainer:
                 "Request support for addition optimizers at https://github.com/ultralytics/ultralytics."
             )
+        num_params = [len(g[0]), len(g[1]), len(g[2])]  # number of param groups
         g[2] = {"params": g[2], **optim_args, "param_group": "bias"}
         g[0] = {"params": g[0], **optim_args, "weight_decay": decay, "param_group": "weight"}
         g[1] = {"params": g[1], **optim_args, "weight_decay": 0.0, "param_group": "bn"}
-        muon, sgd = (0.1, 1.0) if iterations > 10000 else (0.5, 0.5)  # scale factor for MuSGD
+        muon, sgd = (0.2, 1.0)
         if use_muon:
+            num_params[0] = len(g[3])  # update number of params
             g[3] = {"params": g[3], **optim_args, "weight_decay": decay, "use_muon": True, "param_group": "muon"}
             import re
             # higher lr for certain parameters in MuSGD when funetuning
-            pattern = re.compile(r"(?=.*23)(?=.*cv3)|proto\.semseg|flow_model")
+            pattern = re.compile(r"(?=.*23)(?=.*cv3)|proto\.semseg")
             g_ = []  # new param groups
             for x in g:
                 p = x.pop("params")
@@ -1002,6 +1004,6 @@ class BaseTrainer:
         LOGGER.info(
             f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}, momentum={momentum}) with parameter groups "
-            f"{len(g[1]['params'])} weight(decay=0.0), {len(g[0]['params']) if len(g[0]) else len(g[3]['params'])} weight(decay={decay}), {len(g[2]['params'])} bias(decay=0.0)"
+            f"{num_params[1]} weight(decay=0.0), {num_params[0]} weight(decay={decay}), {num_params[2]} bias(decay=0.0)"
         )
         return optimizer

ultralytics/engine/tuner.py CHANGED Viewed

@@ -26,7 +26,7 @@ from datetime import datetime
 import numpy as np
 import torch
-from ultralytics.cfg import get_cfg, get_save_dir
+from ultralytics.cfg import CFG_INT_KEYS, get_cfg, get_save_dir
 from ultralytics.utils import DEFAULT_CFG, LOGGER, YAML, callbacks, colorstr, remove_colorstr
 from ultralytics.utils.checks import check_requirements
 from ultralytics.utils.patches import torch_load
@@ -448,7 +448,7 @@ class Tuner:
                 f"{self.prefix}Best fitness model is {best_save_dir}"
             )
             LOGGER.info("\n" + header)
-            data = {k: float(x[best_idx, i + 1]) for i, k in enumerate(self.space.keys())}
+            data = {k: int(v) if k in CFG_INT_KEYS else float(v) for k, v in zip(self.space.keys(), x[best_idx, 1:])}
             YAML.save(
                 self.tune_dir / "best_hyperparameters.yaml",
                 data=data,

ultralytics/engine/validator.py CHANGED Viewed

@@ -156,6 +156,11 @@ class BaseValidator:
             if str(self.args.model).endswith(".yaml") and model is None:
                 LOGGER.warning("validating an untrained model YAML will result in 0 mAP.")
             callbacks.add_integration_callbacks(self)
+            if hasattr(model, "end2end"):
+                if self.args.end2end is not None:
+                    model.end2end = self.args.end2end
+                if model.end2end:
+                    model.set_head_attr(max_det=self.args.max_det, agnostic_nms=self.args.agnostic_nms)
             model = AutoBackend(
                 model=model or self.args.model,
                 device=select_device(self.args.device) if RANK == -1 else torch.device("cuda", RANK),

ultralytics/models/sam/predict.py CHANGED Viewed

@@ -2619,6 +2619,7 @@ class SAM3VideoSemanticPredictor(SAM3SemanticPredictor):
         if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
             orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
+        names = []
         if len(curr_obj_ids) == 0:
             pred_masks, pred_boxes = None, torch.zeros((0, 7), device=self.device)
         else:
@@ -2656,9 +2657,8 @@ class SAM3VideoSemanticPredictor(SAM3SemanticPredictor):
                         background_value=0,
                     ).squeeze(1)
                 ) > 0
+                names = self.model.names or dict(enumerate(str(i) for i in range(pred_boxes[:, 6].int().max())))
-        # names = getattr(self.model, "names", [str(i) for i in range(pred_scores.shape[0])])
-        names = dict(enumerate(str(i) for i in range(pred_boxes.shape[0])))
         results = []
         for masks, boxes, orig_img, img_path in zip([pred_masks], [pred_boxes], orig_imgs, self.batch[0]):
             results.append(Results(orig_img, path=img_path, names=names, masks=masks, boxes=boxes))

ultralytics/models/yolo/classify/train.py CHANGED Viewed

@@ -11,7 +11,7 @@ from ultralytics.data import ClassificationDataset, build_dataloader
 from ultralytics.engine.trainer import BaseTrainer
 from ultralytics.models import yolo
 from ultralytics.nn.tasks import ClassificationModel
-from ultralytics.utils import DEFAULT_CFG, RANK
+from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
 from ultralytics.utils.plotting import plot_images
 from ultralytics.utils.torch_utils import is_parallel, torch_distributed_zero_first
@@ -138,6 +138,19 @@ class ClassificationTrainer(BaseTrainer):
         with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
             dataset = self.build_dataset(dataset_path, mode)
+        # Filter out samples with class indices >= nc (prevents CUDA assertion errors)
+        nc = self.data.get("nc", 0)
+        dataset_nc = len(dataset.base.classes)
+        if nc and dataset_nc > nc:
+            extra_classes = dataset.base.classes[nc:]
+            original_count = len(dataset.samples)
+            dataset.samples = [s for s in dataset.samples if s[1] < nc]
+            skipped = original_count - len(dataset.samples)
+            LOGGER.warning(
+                f"{mode} split has {dataset_nc} classes but model expects {nc}. "
+                f"Skipping {skipped} samples from extra classes: {extra_classes}"
+            )
         loader = build_dataloader(dataset, batch_size, self.args.workers, rank=rank, drop_last=self.args.compile)
         # Attach inference transforms
         if mode != "train":

ultralytics/models/yolo/detect/train.py CHANGED Viewed

@@ -92,7 +92,7 @@ class DetectionTrainer(BaseTrainer):
         with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
             dataset = self.build_dataset(dataset_path, mode, batch_size)
         shuffle = mode == "train"
-        if getattr(dataset, "rect", False) and shuffle:
+        if getattr(dataset, "rect", False) and shuffle and not np.all(dataset.batch_shapes == dataset.batch_shapes[0]):
             LOGGER.warning("'rect=True' is incompatible with DataLoader shuffle, setting shuffle=False")
             shuffle = False
         return build_dataloader(
@@ -117,11 +117,13 @@ class DetectionTrainer(BaseTrainer):
             if isinstance(v, torch.Tensor):
                 batch[k] = v.to(self.device, non_blocking=self.device.type == "cuda")
         batch["img"] = batch["img"].float() / 255
-        multi_scale = self.args.multi_scale
-        if random.random() < multi_scale:
+        if self.args.multi_scale > 0.0:
             imgs = batch["img"]
             sz = (
-                random.randrange(int(self.args.imgsz * 0.5), int(self.args.imgsz * 1 + self.stride))
+                random.randrange(
+                    int(self.args.imgsz * (1.0 - self.args.multi_scale)),
+                    int(self.args.imgsz * (1.0 + self.args.multi_scale) + self.stride),
+                )
                 // self.stride
                 * self.stride
             )  # size
@@ -143,6 +145,8 @@ class DetectionTrainer(BaseTrainer):
         self.model.nc = self.data["nc"]  # attach number of classes to model
         self.model.names = self.data["names"]  # attach class names to model
         self.model.args = self.args  # attach hyperparameters to model
+        if getattr(self.model, "end2end"):
+            self.model.set_head_attr(max_det=self.args.max_det)
         # TODO: self.model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc
     def get_model(self, cfg: str | None = None, weights: str | None = None, verbose: bool = True):

ultralytics/models/yolo/pose/train.py CHANGED Viewed

@@ -9,6 +9,7 @@ from typing import Any
 from ultralytics.models import yolo
 from ultralytics.nn.tasks import PoseModel
 from ultralytics.utils import DEFAULT_CFG
+from ultralytics.utils.torch_utils import unwrap_model
 class PoseTrainer(yolo.detect.DetectionTrainer):
@@ -91,7 +92,7 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
     def get_validator(self):
         """Return an instance of the PoseValidator class for validation."""
         self.loss_names = "box_loss", "pose_loss", "kobj_loss", "cls_loss", "dfl_loss"
-        if getattr(self.model.model[-1], "flow_model", None) is not None:
+        if getattr(unwrap_model(self.model).model[-1], "flow_model", None) is not None:
             self.loss_names += ("rle_loss",)
         return yolo.pose.PoseValidator(
             self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks

ultralytics/models/yolo/world/train_world.py CHANGED Viewed

@@ -1,11 +1,14 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+from __future__ import annotations
 from pathlib import Path
 from ultralytics.data import YOLOConcatDataset, build_grounding, build_yolo_dataset
 from ultralytics.data.utils import check_det_dataset
 from ultralytics.models.yolo.world import WorldTrainer
 from ultralytics.utils import DATASETS_DIR, DEFAULT_CFG, LOGGER
+from ultralytics.utils.checks import check_file
 from ultralytics.utils.torch_utils import unwrap_model
@@ -100,6 +103,23 @@ class WorldTrainerFromScratch(WorldTrainer):
         self.set_text_embeddings(datasets, batch)  # cache text embeddings to accelerate training
         return YOLOConcatDataset(datasets) if len(datasets) > 1 else datasets[0]
+    @staticmethod
+    def check_data_config(data: dict | str | Path) -> dict:
+        """Check and load the data configuration from a YAML file or dictionary.
+        Args:
+            data (dict | str | Path): Data configuration as a dictionary or path to a YAML file.
+        Returns:
+            (dict): Data configuration dictionary loaded from YAML file or passed directly.
+        """
+        # If string, load from YAML file
+        if not isinstance(data, dict):
+            from ultralytics.utils import YAML
+            return YAML.load(check_file(data))
+        return data
     def get_dataset(self):
         """Get train and validation paths from data dictionary.
@@ -114,7 +134,7 @@ class WorldTrainerFromScratch(WorldTrainer):
             AssertionError: If train or validation datasets are not found, or if validation has multiple datasets.
         """
         final_data = {}
-        data_yaml = self.args.data
+        self.args.data = data_yaml = self.check_data_config(self.args.data)
         assert data_yaml.get("train", False), "train dataset not found"  # object365.yaml
         assert data_yaml.get("val", False), "validation dataset not found"  # lvis.yaml
         data = {k: [check_det_dataset(d) for d in v.get("yolo_data", [])] for k, v in data_yaml.items()}

ultralytics/models/yolo/yoloe/train.py CHANGED Viewed

@@ -196,7 +196,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
         Returns:
             (dict): Dictionary mapping text samples to their embeddings.
         """
-        model = "mobileclip:blt"
+        model = unwrap_model(self.model).text_model
         cache_path = cache_dir / f"text_embeddings_{model.replace(':', '_').replace('/', '_')}.pt"
         if cache_path.exists():
             LOGGER.info(f"Reading existed cache from '{cache_path}'")
@@ -204,7 +204,6 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
             if sorted(txt_map.keys()) == sorted(texts):
                 return txt_map
         LOGGER.info(f"Caching text embeddings to '{cache_path}'")
-        assert self.model is not None
         txt_feats = unwrap_model(self.model).get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
         txt_map = dict(zip(texts, txt_feats.squeeze(0)))
         torch.save(txt_map, cache_path)

ultralytics/nn/autobackend.py CHANGED Viewed

@@ -648,7 +648,7 @@ class AutoBackend(nn.Module):
             for k, v in metadata.items():
                 if k in {"stride", "batch", "channels"}:
                     metadata[k] = int(v)
-                elif k in {"imgsz", "names", "kpt_shape", "kpt_names", "args"} and isinstance(v, str):
+                elif k in {"imgsz", "names", "kpt_shape", "kpt_names", "args", "end2end"} and isinstance(v, str):
                     metadata[k] = ast.literal_eval(v)
             stride = metadata["stride"]
             task = metadata["task"]

ultralytics/nn/modules/head.py CHANGED Viewed

@@ -69,6 +69,7 @@ class Detect(nn.Module):
     export = False  # export mode
     format = None  # export format
     max_det = 300  # max_det
+    agnostic_nms = False
     shape = None
     anchors = torch.empty(0)  # init
     strides = torch.empty(0)  # init
@@ -125,7 +126,12 @@ class Detect(nn.Module):
     @property
     def end2end(self):
         """Checks if the model has one2one for v5/v5/v8/v9/11 backward compatibility."""
-        return hasattr(self, "one2one")
+        return getattr(self, "_end2end", True) and hasattr(self, "one2one")
+    @end2end.setter
+    def end2end(self, value):
+        """Override the end-to-end detection mode."""
+        self._end2end = value
     def forward_head(
         self, x: list[torch.Tensor], box_head: torch.nn.Module = None, cls_head: torch.nn.Module = None
@@ -230,6 +236,11 @@ class Detect(nn.Module):
         # Use max_det directly during export for TensorRT compatibility (requires k to be constant),
         # otherwise use min(max_det, anchors) for safety with small inputs during Python inference
         k = max_det if self.export else min(max_det, anchors)
+        if self.agnostic_nms:
+            scores, labels = scores.max(dim=-1, keepdim=True)
+            scores, indices = scores.topk(k, dim=1)
+            labels = labels.gather(1, indices)
+            return scores, labels, indices
         ori_index = scores.max(dim=-1)[0].topk(k)[1].unsqueeze(-1)
         scores = scores.gather(dim=1, index=ori_index.repeat(1, 1, nc))
         scores, index = scores.flatten(1).topk(k)
@@ -1098,7 +1109,7 @@ class YOLOEDetect(Detect):
         boxes, scores, index = [], [], []
         bs = x[0].shape[0]
         cv2 = self.cv2 if not self.end2end else self.one2one_cv2
-        cv3 = self.cv3 if not self.end2end else self.one2one_cv2
+        cv3 = self.cv3 if not self.end2end else self.one2one_cv3
         for i in range(self.nl):
             cls_feat = cv3[i](x[i])
             loc_feat = cv2[i](x[i])

ultralytics/nn/tasks.py CHANGED Viewed

@@ -425,6 +425,24 @@ class DetectionModel(BaseModel):
         """Return whether the model uses end-to-end NMS-free detection."""
         return getattr(self.model[-1], "end2end", False)
+    @end2end.setter
+    def end2end(self, value):
+        """Override the end-to-end detection mode."""
+        self.set_head_attr(end2end=value)
+    def set_head_attr(self, **kwargs):
+        """Set attributes of the model head (last layer).
+        Args:
+            **kwargs: Arbitrary keyword arguments representing attributes to set.
+        """
+        head = self.model[-1]
+        for k, v in kwargs.items():
+            if not hasattr(head, k):
+                LOGGER.warning(f"Head has no attribute '{k}'.")
+                continue
+            setattr(head, k, v)
     def _predict_augment(self, x):
         """Perform augmentations on input image x and return augmented inference and train outputs.

ultralytics/solutions/security_alarm.py CHANGED Viewed

@@ -62,7 +62,7 @@ class SecurityAlarm(BaseSolution):
         """
         import smtplib
-        self.server = smtplib.SMTP("smtp.gmail.com: 587")
+        self.server = smtplib.SMTP("smtp.gmail.com", 587)
         self.server.starttls()
         self.server.login(from_email, password)
         self.to_email = to_email

dgenerate-ultralytics-headless 8.4.6__py3-none-any.whl → 8.4.8__py3-none-any.whl

dgenerate-ultralytics-headless 8.4.6py3-none-any.whl → 8.4.8py3-none-any.whl