dgenerate-ultralytics-headless 8.4.6__py3-none-any.whl → 8.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/METADATA +3 -3
  2. {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/RECORD +37 -36
  3. {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/WHEEL +1 -1
  4. tests/test_cli.py +10 -3
  5. tests/test_exports.py +64 -43
  6. tests/test_python.py +40 -11
  7. ultralytics/__init__.py +1 -1
  8. ultralytics/cfg/__init__.py +5 -4
  9. ultralytics/cfg/datasets/coco12-formats.yaml +101 -0
  10. ultralytics/cfg/default.yaml +2 -1
  11. ultralytics/data/augment.py +8 -0
  12. ultralytics/data/converter.py +32 -9
  13. ultralytics/data/utils.py +2 -2
  14. ultralytics/engine/exporter.py +10 -6
  15. ultralytics/engine/predictor.py +5 -0
  16. ultralytics/engine/trainer.py +6 -4
  17. ultralytics/engine/tuner.py +2 -2
  18. ultralytics/engine/validator.py +5 -0
  19. ultralytics/models/sam/predict.py +2 -2
  20. ultralytics/models/yolo/classify/train.py +14 -1
  21. ultralytics/models/yolo/detect/train.py +8 -4
  22. ultralytics/models/yolo/pose/train.py +2 -1
  23. ultralytics/models/yolo/world/train_world.py +21 -1
  24. ultralytics/models/yolo/yoloe/train.py +1 -2
  25. ultralytics/nn/autobackend.py +1 -1
  26. ultralytics/nn/modules/head.py +13 -2
  27. ultralytics/nn/tasks.py +18 -0
  28. ultralytics/solutions/security_alarm.py +1 -1
  29. ultralytics/utils/benchmarks.py +3 -9
  30. ultralytics/utils/callbacks/wb.py +6 -1
  31. ultralytics/utils/loss.py +18 -9
  32. ultralytics/utils/patches.py +42 -0
  33. ultralytics/utils/tal.py +15 -5
  34. ultralytics/utils/torch_utils.py +1 -1
  35. {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/entry_points.txt +0 -0
  36. {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/licenses/LICENSE +0 -0
  37. {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,101 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # COCO12-Formats dataset (12 images testing all supported image formats) by Ultralytics
4
+ # Documentation: https://docs.ultralytics.com/datasets/detect/coco12-formats/
5
+ # Example usage: yolo train data=coco12-formats.yaml
6
+ # parent
7
+ # ├── ultralytics
8
+ # └── datasets
9
+ # └── coco12-formats ← downloads here (1 MB)
10
+
11
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
12
+ path: coco12-formats # dataset root dir
13
+ train: images/train # train images (relative to 'path') 6 images
14
+ val: images/val # val images (relative to 'path') 6 images
15
+ test: # test images (optional)
16
+
17
+ # Classes
18
+ names:
19
+ 0: person
20
+ 1: bicycle
21
+ 2: car
22
+ 3: motorcycle
23
+ 4: airplane
24
+ 5: bus
25
+ 6: train
26
+ 7: truck
27
+ 8: boat
28
+ 9: traffic light
29
+ 10: fire hydrant
30
+ 11: stop sign
31
+ 12: parking meter
32
+ 13: bench
33
+ 14: bird
34
+ 15: cat
35
+ 16: dog
36
+ 17: horse
37
+ 18: sheep
38
+ 19: cow
39
+ 20: elephant
40
+ 21: bear
41
+ 22: zebra
42
+ 23: giraffe
43
+ 24: backpack
44
+ 25: umbrella
45
+ 26: handbag
46
+ 27: tie
47
+ 28: suitcase
48
+ 29: frisbee
49
+ 30: skis
50
+ 31: snowboard
51
+ 32: sports ball
52
+ 33: kite
53
+ 34: baseball bat
54
+ 35: baseball glove
55
+ 36: skateboard
56
+ 37: surfboard
57
+ 38: tennis racket
58
+ 39: bottle
59
+ 40: wine glass
60
+ 41: cup
61
+ 42: fork
62
+ 43: knife
63
+ 44: spoon
64
+ 45: bowl
65
+ 46: banana
66
+ 47: apple
67
+ 48: sandwich
68
+ 49: orange
69
+ 50: broccoli
70
+ 51: carrot
71
+ 52: hot dog
72
+ 53: pizza
73
+ 54: donut
74
+ 55: cake
75
+ 56: chair
76
+ 57: couch
77
+ 58: potted plant
78
+ 59: bed
79
+ 60: dining table
80
+ 61: toilet
81
+ 62: tv
82
+ 63: laptop
83
+ 64: mouse
84
+ 65: remote
85
+ 66: keyboard
86
+ 67: cell phone
87
+ 68: microwave
88
+ 69: oven
89
+ 70: toaster
90
+ 71: sink
91
+ 72: refrigerator
92
+ 73: book
93
+ 74: clock
94
+ 75: vase
95
+ 76: scissors
96
+ 77: teddy bear
97
+ 78: hair drier
98
+ 79: toothbrush
99
+
100
+ # Download script/URL (optional)
101
+ download: https://github.com/ultralytics/assets/releases/download/v0.0.0/coco12-formats.zip
@@ -36,7 +36,7 @@ amp: True # (bool) Automatic Mixed Precision (AMP) training; True runs AMP capab
36
36
  fraction: 1.0 # (float) fraction of training dataset to use (1.0 = all)
37
37
  profile: False # (bool) profile ONNX/TensorRT speeds during training for loggers
38
38
  freeze: # (int | list, optional) freeze first N layers (int) or specific layer indices (list)
39
- multi_scale: 0.0 # (float) multiscale training by varying image size
39
+ multi_scale: 0.0 # (float) multi-scale range as a fraction of imgsz; sizes are rounded to stride multiples
40
40
  compile: False # (bool | str) enable torch.compile() backend='inductor'; True="default", False=off, or "default|reduce-overhead|max-autotune-no-cudagraphs"
41
41
 
42
42
  # Segmentation
@@ -56,6 +56,7 @@ max_det: 300 # (int) maximum number of detections per image
56
56
  half: False # (bool) use half precision (FP16) if supported
57
57
  dnn: False # (bool) use OpenCV DNN for ONNX inference
58
58
  plots: True # (bool) save plots and images during train/val
59
+ end2end: # (bool, optional) whether to use end2end head(YOLO26, YOLOv10) for predict/val/export
59
60
 
60
61
  # Predict settings -----------------------------------------------------------------------------------------------------
61
62
  source: # (str, optional) path/dir/URL/stream for images or videos; e.g. 'ultralytics/assets' or '0' for webcam
@@ -2066,7 +2066,15 @@ class Format:
2066
2066
  if self.mask_overlap:
2067
2067
  sem_masks = cls_tensor[masks[0].long() - 1] # (H, W) from (1, H, W) instance indices
2068
2068
  else:
2069
+ # Create sem_masks consistent with mask_overlap=True
2069
2070
  sem_masks = (masks * cls_tensor[:, None, None]).max(0).values # (H, W) from (N, H, W) binary
2071
+ overlap = masks.sum(dim=0) > 1 # (H, W)
2072
+ if overlap.any():
2073
+ weights = masks.sum(axis=(1, 2))
2074
+ weighted_masks = masks * weights[:, None, None] # (N, H, W)
2075
+ weighted_masks[masks == 0] = weights.max() + 1 # handle background
2076
+ smallest_idx = weighted_masks.argmin(dim=0) # (H, W)
2077
+ sem_masks[overlap] = cls_tensor[smallest_idx[overlap]]
2070
2078
  else:
2071
2079
  masks = torch.zeros(
2072
2080
  1 if self.mask_overlap else nl, img.shape[0] // self.mask_ratio, img.shape[1] // self.mask_ratio
@@ -796,6 +796,17 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
796
796
  # Check if this is a classification dataset
797
797
  is_classification = dataset_record.get("task") == "classify"
798
798
  class_names = {int(k): v for k, v in dataset_record.get("class_names", {}).items()}
799
+ len(class_names)
800
+
801
+ # Validate required fields before downloading images
802
+ task = dataset_record.get("task", "detect")
803
+ if not is_classification:
804
+ if "train" not in splits:
805
+ raise ValueError(f"Dataset missing required 'train' split. Found splits: {sorted(splits)}")
806
+ if "val" not in splits and "test" not in splits:
807
+ raise ValueError(f"Dataset missing required 'val' split. Found splits: {sorted(splits)}")
808
+ if task == "pose" and "kpt_shape" not in dataset_record:
809
+ raise ValueError("Pose dataset missing required 'kpt_shape'. See https://docs.ultralytics.com/datasets/pose/")
799
810
 
800
811
  # Create base directories
801
812
  dataset_dir.mkdir(parents=True, exist_ok=True)
@@ -838,14 +849,19 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
838
849
  if http_url := record.get("url"):
839
850
  if not image_path.exists():
840
851
  image_path.parent.mkdir(parents=True, exist_ok=True)
841
- try:
842
- async with session.get(http_url, timeout=aiohttp.ClientTimeout(total=30)) as response:
843
- response.raise_for_status()
844
- image_path.write_bytes(await response.read())
845
- return True
846
- except Exception as e:
847
- LOGGER.warning(f"Failed to download {http_url}: {e}")
848
- return False
852
+ # Retry with exponential backoff (3 attempts: 0s, 2s, 4s delays)
853
+ for attempt in range(3):
854
+ try:
855
+ async with session.get(http_url, timeout=aiohttp.ClientTimeout(total=30)) as response:
856
+ response.raise_for_status()
857
+ image_path.write_bytes(await response.read())
858
+ return True
859
+ except Exception as e:
860
+ if attempt < 2: # Don't sleep after last attempt
861
+ await asyncio.sleep(2**attempt) # 1s, 2s backoff
862
+ else:
863
+ LOGGER.warning(f"Failed to download {http_url} after 3 attempts: {e}")
864
+ return False
849
865
  return True
850
866
 
851
867
  # Process all images with async downloads (limit connections for small datasets)
@@ -861,9 +877,16 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
861
877
  pbar.update(1)
862
878
  return result
863
879
 
864
- await asyncio.gather(*[tracked_process(record) for record in image_records])
880
+ results = await asyncio.gather(*[tracked_process(record) for record in image_records])
865
881
  pbar.close()
866
882
 
883
+ # Validate images were downloaded successfully
884
+ success_count = sum(1 for r in results if r)
885
+ if success_count == 0:
886
+ raise RuntimeError(f"Failed to download any images from {ndjson_path}. Check network connection and URLs.")
887
+ if success_count < len(image_records):
888
+ LOGGER.warning(f"Downloaded {success_count}/{len(image_records)} images from {ndjson_path}")
889
+
867
890
  if is_classification:
868
891
  # Classification: return dataset directory (check_cls_dataset expects a directory path)
869
892
  return dataset_dir
ultralytics/data/utils.py CHANGED
@@ -37,8 +37,8 @@ from ultralytics.utils.downloads import download, safe_download, unzip_file
37
37
  from ultralytics.utils.ops import segments2boxes
38
38
 
39
39
  HELP_URL = "See https://docs.ultralytics.com/datasets for dataset formatting guidance."
40
- IMG_FORMATS = {"bmp", "dng", "jpeg", "jpg", "mpo", "png", "tif", "tiff", "webp", "pfm", "heic"} # image suffixes
41
- VID_FORMATS = {"asf", "avi", "gif", "m4v", "mkv", "mov", "mp4", "mpeg", "mpg", "ts", "wmv", "webm"} # video suffixes
40
+ IMG_FORMATS = {"avif", "bmp", "dng", "heic", "jp2", "jpeg", "jpeg2000", "jpg", "mpo", "png", "tif", "tiff", "webp"}
41
+ VID_FORMATS = {"asf", "avi", "gif", "m4v", "mkv", "mov", "mp4", "mpeg", "mpg", "ts", "wmv", "webm"} # videos
42
42
  FORMATS_HELP_MSG = f"Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}"
43
43
 
44
44
 
@@ -404,6 +404,13 @@ class Exporter:
404
404
  if not hasattr(model, "names"):
405
405
  model.names = default_class_names()
406
406
  model.names = check_class_names(model.names)
407
+ if hasattr(model, "end2end"):
408
+ if self.args.end2end is not None:
409
+ model.end2end = self.args.end2end
410
+ if rknn or ncnn or executorch or paddle or imx:
411
+ # Disable end2end branch for certain export formats as they does not support topk
412
+ model.end2end = False
413
+ LOGGER.warning(f"{fmt.upper()} export does not support end2end models, disabling end2end branch.")
407
414
  if self.args.half and self.args.int8:
408
415
  LOGGER.warning("half=True and int8=True are mutually exclusive, setting half=False.")
409
416
  self.args.half = False
@@ -463,9 +470,6 @@ class Exporter:
463
470
  )
464
471
  if tfjs and (ARM64 and LINUX):
465
472
  raise SystemError("TF.js exports are not currently supported on ARM64 Linux")
466
- if ncnn and hasattr(model.model[-1], "one2one_cv2"):
467
- del model.model[-1].one2one_cv2 # Disable end2end branch for NCNN export as it does not support topk
468
- LOGGER.warning("NCNN export does not support end2end models, disabling end2end branch.")
469
473
  # Recommend OpenVINO if export and Intel CPU
470
474
  if SETTINGS.get("openvino_msg"):
471
475
  if is_intel():
@@ -509,6 +513,7 @@ class Exporter:
509
513
  # Clamp max_det to anchor count for small image sizes (required for TensorRT compatibility)
510
514
  anchors = sum(int(self.imgsz[0] / s) * int(self.imgsz[1] / s) for s in model.stride.tolist())
511
515
  m.max_det = min(self.args.max_det, anchors)
516
+ m.agnostic_nms = self.args.agnostic_nms
512
517
  m.xyxy = self.args.nms and not coreml
513
518
  m.shape = None # reset cached shape for new export input size
514
519
  if hasattr(model, "pe") and hasattr(m, "fuse"): # for YOLOE models
@@ -549,6 +554,7 @@ class Exporter:
549
554
  "names": model.names,
550
555
  "args": {k: v for k, v in self.args if k in fmt_keys},
551
556
  "channels": model.yaml.get("channels", 3),
557
+ "end2end": getattr(model, "end2end", False),
552
558
  } # model metadata
553
559
  if dla is not None:
554
560
  self.metadata["dla"] = dla # make sure `AutoBackend` uses correct dla device if it has one
@@ -556,8 +562,6 @@ class Exporter:
556
562
  self.metadata["kpt_shape"] = model.model[-1].kpt_shape
557
563
  if hasattr(model, "kpt_names"):
558
564
  self.metadata["kpt_names"] = model.kpt_names
559
- if getattr(model.model[-1], "end2end", False):
560
- self.metadata["end2end"] = True
561
565
 
562
566
  LOGGER.info(
563
567
  f"\n{colorstr('PyTorch:')} starting from '{file}' with input shape {tuple(im.shape)} BCHW and "
@@ -1045,7 +1049,7 @@ class Exporter:
1045
1049
  "onnx_graphsurgeon>=0.3.26", # required by 'onnx2tf' package
1046
1050
  "ai-edge-litert>=1.2.0" + (",<1.4.0" if MACOS else ""), # required by 'onnx2tf' package
1047
1051
  "onnx>=1.12.0,<2.0.0",
1048
- "onnx2tf>=1.26.3",
1052
+ "onnx2tf>=1.26.3,<1.29.0", # pin to avoid h5py build issues on aarch64
1049
1053
  "onnxslim>=0.1.71",
1050
1054
  "onnxruntime-gpu" if cuda else "onnxruntime",
1051
1055
  "protobuf>=5",
@@ -387,6 +387,11 @@ class BasePredictor:
387
387
  model (str | Path | torch.nn.Module, optional): Model to load or use.
388
388
  verbose (bool): Whether to print verbose output.
389
389
  """
390
+ if hasattr(model, "end2end"):
391
+ if self.args.end2end is not None:
392
+ model.end2end = self.args.end2end
393
+ if model.end2end:
394
+ model.set_head_attr(max_det=self.args.max_det, agnostic_nms=self.args.agnostic_nms)
390
395
  self.model = AutoBackend(
391
396
  model=model or self.args.model,
392
397
  device=select_device(self.args.device, verbose=verbose),
@@ -948,7 +948,7 @@ class BaseTrainer:
948
948
  )
949
949
  nc = self.data.get("nc", 10) # number of classes
950
950
  lr_fit = round(0.002 * 5 / (4 + nc), 6) # lr0 fit equation to 6 decimal places
951
- name, lr, momentum = ("MuSGD", 0.01 if iterations > 10000 else lr_fit, 0.9)
951
+ name, lr, momentum = ("MuSGD", 0.01, 0.9) if iterations > 10000 else ("AdamW", lr_fit, 0.9)
952
952
  self.args.warmup_bias_lr = 0.0 # no higher than 0.01 for Adam
953
953
 
954
954
  use_muon = name == "MuSGD"
@@ -981,16 +981,18 @@ class BaseTrainer:
981
981
  "Request support for addition optimizers at https://github.com/ultralytics/ultralytics."
982
982
  )
983
983
 
984
+ num_params = [len(g[0]), len(g[1]), len(g[2])] # number of param groups
984
985
  g[2] = {"params": g[2], **optim_args, "param_group": "bias"}
985
986
  g[0] = {"params": g[0], **optim_args, "weight_decay": decay, "param_group": "weight"}
986
987
  g[1] = {"params": g[1], **optim_args, "weight_decay": 0.0, "param_group": "bn"}
987
- muon, sgd = (0.1, 1.0) if iterations > 10000 else (0.5, 0.5) # scale factor for MuSGD
988
+ muon, sgd = (0.2, 1.0)
988
989
  if use_muon:
990
+ num_params[0] = len(g[3]) # update number of params
989
991
  g[3] = {"params": g[3], **optim_args, "weight_decay": decay, "use_muon": True, "param_group": "muon"}
990
992
  import re
991
993
 
992
994
  # higher lr for certain parameters in MuSGD when funetuning
993
- pattern = re.compile(r"(?=.*23)(?=.*cv3)|proto\.semseg|flow_model")
995
+ pattern = re.compile(r"(?=.*23)(?=.*cv3)|proto\.semseg")
994
996
  g_ = [] # new param groups
995
997
  for x in g:
996
998
  p = x.pop("params")
@@ -1002,6 +1004,6 @@ class BaseTrainer:
1002
1004
 
1003
1005
  LOGGER.info(
1004
1006
  f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}, momentum={momentum}) with parameter groups "
1005
- f"{len(g[1]['params'])} weight(decay=0.0), {len(g[0]['params']) if len(g[0]) else len(g[3]['params'])} weight(decay={decay}), {len(g[2]['params'])} bias(decay=0.0)"
1007
+ f"{num_params[1]} weight(decay=0.0), {num_params[0]} weight(decay={decay}), {num_params[2]} bias(decay=0.0)"
1006
1008
  )
1007
1009
  return optimizer
@@ -26,7 +26,7 @@ from datetime import datetime
26
26
  import numpy as np
27
27
  import torch
28
28
 
29
- from ultralytics.cfg import get_cfg, get_save_dir
29
+ from ultralytics.cfg import CFG_INT_KEYS, get_cfg, get_save_dir
30
30
  from ultralytics.utils import DEFAULT_CFG, LOGGER, YAML, callbacks, colorstr, remove_colorstr
31
31
  from ultralytics.utils.checks import check_requirements
32
32
  from ultralytics.utils.patches import torch_load
@@ -448,7 +448,7 @@ class Tuner:
448
448
  f"{self.prefix}Best fitness model is {best_save_dir}"
449
449
  )
450
450
  LOGGER.info("\n" + header)
451
- data = {k: float(x[best_idx, i + 1]) for i, k in enumerate(self.space.keys())}
451
+ data = {k: int(v) if k in CFG_INT_KEYS else float(v) for k, v in zip(self.space.keys(), x[best_idx, 1:])}
452
452
  YAML.save(
453
453
  self.tune_dir / "best_hyperparameters.yaml",
454
454
  data=data,
@@ -156,6 +156,11 @@ class BaseValidator:
156
156
  if str(self.args.model).endswith(".yaml") and model is None:
157
157
  LOGGER.warning("validating an untrained model YAML will result in 0 mAP.")
158
158
  callbacks.add_integration_callbacks(self)
159
+ if hasattr(model, "end2end"):
160
+ if self.args.end2end is not None:
161
+ model.end2end = self.args.end2end
162
+ if model.end2end:
163
+ model.set_head_attr(max_det=self.args.max_det, agnostic_nms=self.args.agnostic_nms)
159
164
  model = AutoBackend(
160
165
  model=model or self.args.model,
161
166
  device=select_device(self.args.device) if RANK == -1 else torch.device("cuda", RANK),
@@ -2619,6 +2619,7 @@ class SAM3VideoSemanticPredictor(SAM3SemanticPredictor):
2619
2619
  if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
2620
2620
  orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
2621
2621
 
2622
+ names = []
2622
2623
  if len(curr_obj_ids) == 0:
2623
2624
  pred_masks, pred_boxes = None, torch.zeros((0, 7), device=self.device)
2624
2625
  else:
@@ -2656,9 +2657,8 @@ class SAM3VideoSemanticPredictor(SAM3SemanticPredictor):
2656
2657
  background_value=0,
2657
2658
  ).squeeze(1)
2658
2659
  ) > 0
2660
+ names = self.model.names or dict(enumerate(str(i) for i in range(pred_boxes[:, 6].int().max())))
2659
2661
 
2660
- # names = getattr(self.model, "names", [str(i) for i in range(pred_scores.shape[0])])
2661
- names = dict(enumerate(str(i) for i in range(pred_boxes.shape[0])))
2662
2662
  results = []
2663
2663
  for masks, boxes, orig_img, img_path in zip([pred_masks], [pred_boxes], orig_imgs, self.batch[0]):
2664
2664
  results.append(Results(orig_img, path=img_path, names=names, masks=masks, boxes=boxes))
@@ -11,7 +11,7 @@ from ultralytics.data import ClassificationDataset, build_dataloader
11
11
  from ultralytics.engine.trainer import BaseTrainer
12
12
  from ultralytics.models import yolo
13
13
  from ultralytics.nn.tasks import ClassificationModel
14
- from ultralytics.utils import DEFAULT_CFG, RANK
14
+ from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
15
15
  from ultralytics.utils.plotting import plot_images
16
16
  from ultralytics.utils.torch_utils import is_parallel, torch_distributed_zero_first
17
17
 
@@ -138,6 +138,19 @@ class ClassificationTrainer(BaseTrainer):
138
138
  with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
139
139
  dataset = self.build_dataset(dataset_path, mode)
140
140
 
141
+ # Filter out samples with class indices >= nc (prevents CUDA assertion errors)
142
+ nc = self.data.get("nc", 0)
143
+ dataset_nc = len(dataset.base.classes)
144
+ if nc and dataset_nc > nc:
145
+ extra_classes = dataset.base.classes[nc:]
146
+ original_count = len(dataset.samples)
147
+ dataset.samples = [s for s in dataset.samples if s[1] < nc]
148
+ skipped = original_count - len(dataset.samples)
149
+ LOGGER.warning(
150
+ f"{mode} split has {dataset_nc} classes but model expects {nc}. "
151
+ f"Skipping {skipped} samples from extra classes: {extra_classes}"
152
+ )
153
+
141
154
  loader = build_dataloader(dataset, batch_size, self.args.workers, rank=rank, drop_last=self.args.compile)
142
155
  # Attach inference transforms
143
156
  if mode != "train":
@@ -92,7 +92,7 @@ class DetectionTrainer(BaseTrainer):
92
92
  with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
93
93
  dataset = self.build_dataset(dataset_path, mode, batch_size)
94
94
  shuffle = mode == "train"
95
- if getattr(dataset, "rect", False) and shuffle:
95
+ if getattr(dataset, "rect", False) and shuffle and not np.all(dataset.batch_shapes == dataset.batch_shapes[0]):
96
96
  LOGGER.warning("'rect=True' is incompatible with DataLoader shuffle, setting shuffle=False")
97
97
  shuffle = False
98
98
  return build_dataloader(
@@ -117,11 +117,13 @@ class DetectionTrainer(BaseTrainer):
117
117
  if isinstance(v, torch.Tensor):
118
118
  batch[k] = v.to(self.device, non_blocking=self.device.type == "cuda")
119
119
  batch["img"] = batch["img"].float() / 255
120
- multi_scale = self.args.multi_scale
121
- if random.random() < multi_scale:
120
+ if self.args.multi_scale > 0.0:
122
121
  imgs = batch["img"]
123
122
  sz = (
124
- random.randrange(int(self.args.imgsz * 0.5), int(self.args.imgsz * 1 + self.stride))
123
+ random.randrange(
124
+ int(self.args.imgsz * (1.0 - self.args.multi_scale)),
125
+ int(self.args.imgsz * (1.0 + self.args.multi_scale) + self.stride),
126
+ )
125
127
  // self.stride
126
128
  * self.stride
127
129
  ) # size
@@ -143,6 +145,8 @@ class DetectionTrainer(BaseTrainer):
143
145
  self.model.nc = self.data["nc"] # attach number of classes to model
144
146
  self.model.names = self.data["names"] # attach class names to model
145
147
  self.model.args = self.args # attach hyperparameters to model
148
+ if getattr(self.model, "end2end"):
149
+ self.model.set_head_attr(max_det=self.args.max_det)
146
150
  # TODO: self.model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc
147
151
 
148
152
  def get_model(self, cfg: str | None = None, weights: str | None = None, verbose: bool = True):
@@ -9,6 +9,7 @@ from typing import Any
9
9
  from ultralytics.models import yolo
10
10
  from ultralytics.nn.tasks import PoseModel
11
11
  from ultralytics.utils import DEFAULT_CFG
12
+ from ultralytics.utils.torch_utils import unwrap_model
12
13
 
13
14
 
14
15
  class PoseTrainer(yolo.detect.DetectionTrainer):
@@ -91,7 +92,7 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
91
92
  def get_validator(self):
92
93
  """Return an instance of the PoseValidator class for validation."""
93
94
  self.loss_names = "box_loss", "pose_loss", "kobj_loss", "cls_loss", "dfl_loss"
94
- if getattr(self.model.model[-1], "flow_model", None) is not None:
95
+ if getattr(unwrap_model(self.model).model[-1], "flow_model", None) is not None:
95
96
  self.loss_names += ("rle_loss",)
96
97
  return yolo.pose.PoseValidator(
97
98
  self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
@@ -1,11 +1,14 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from pathlib import Path
4
6
 
5
7
  from ultralytics.data import YOLOConcatDataset, build_grounding, build_yolo_dataset
6
8
  from ultralytics.data.utils import check_det_dataset
7
9
  from ultralytics.models.yolo.world import WorldTrainer
8
10
  from ultralytics.utils import DATASETS_DIR, DEFAULT_CFG, LOGGER
11
+ from ultralytics.utils.checks import check_file
9
12
  from ultralytics.utils.torch_utils import unwrap_model
10
13
 
11
14
 
@@ -100,6 +103,23 @@ class WorldTrainerFromScratch(WorldTrainer):
100
103
  self.set_text_embeddings(datasets, batch) # cache text embeddings to accelerate training
101
104
  return YOLOConcatDataset(datasets) if len(datasets) > 1 else datasets[0]
102
105
 
106
+ @staticmethod
107
+ def check_data_config(data: dict | str | Path) -> dict:
108
+ """Check and load the data configuration from a YAML file or dictionary.
109
+
110
+ Args:
111
+ data (dict | str | Path): Data configuration as a dictionary or path to a YAML file.
112
+
113
+ Returns:
114
+ (dict): Data configuration dictionary loaded from YAML file or passed directly.
115
+ """
116
+ # If string, load from YAML file
117
+ if not isinstance(data, dict):
118
+ from ultralytics.utils import YAML
119
+
120
+ return YAML.load(check_file(data))
121
+ return data
122
+
103
123
  def get_dataset(self):
104
124
  """Get train and validation paths from data dictionary.
105
125
 
@@ -114,7 +134,7 @@ class WorldTrainerFromScratch(WorldTrainer):
114
134
  AssertionError: If train or validation datasets are not found, or if validation has multiple datasets.
115
135
  """
116
136
  final_data = {}
117
- data_yaml = self.args.data
137
+ self.args.data = data_yaml = self.check_data_config(self.args.data)
118
138
  assert data_yaml.get("train", False), "train dataset not found" # object365.yaml
119
139
  assert data_yaml.get("val", False), "validation dataset not found" # lvis.yaml
120
140
  data = {k: [check_det_dataset(d) for d in v.get("yolo_data", [])] for k, v in data_yaml.items()}
@@ -196,7 +196,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
196
196
  Returns:
197
197
  (dict): Dictionary mapping text samples to their embeddings.
198
198
  """
199
- model = "mobileclip:blt"
199
+ model = unwrap_model(self.model).text_model
200
200
  cache_path = cache_dir / f"text_embeddings_{model.replace(':', '_').replace('/', '_')}.pt"
201
201
  if cache_path.exists():
202
202
  LOGGER.info(f"Reading existed cache from '{cache_path}'")
@@ -204,7 +204,6 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
204
204
  if sorted(txt_map.keys()) == sorted(texts):
205
205
  return txt_map
206
206
  LOGGER.info(f"Caching text embeddings to '{cache_path}'")
207
- assert self.model is not None
208
207
  txt_feats = unwrap_model(self.model).get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
209
208
  txt_map = dict(zip(texts, txt_feats.squeeze(0)))
210
209
  torch.save(txt_map, cache_path)
@@ -648,7 +648,7 @@ class AutoBackend(nn.Module):
648
648
  for k, v in metadata.items():
649
649
  if k in {"stride", "batch", "channels"}:
650
650
  metadata[k] = int(v)
651
- elif k in {"imgsz", "names", "kpt_shape", "kpt_names", "args"} and isinstance(v, str):
651
+ elif k in {"imgsz", "names", "kpt_shape", "kpt_names", "args", "end2end"} and isinstance(v, str):
652
652
  metadata[k] = ast.literal_eval(v)
653
653
  stride = metadata["stride"]
654
654
  task = metadata["task"]
@@ -69,6 +69,7 @@ class Detect(nn.Module):
69
69
  export = False # export mode
70
70
  format = None # export format
71
71
  max_det = 300 # max_det
72
+ agnostic_nms = False
72
73
  shape = None
73
74
  anchors = torch.empty(0) # init
74
75
  strides = torch.empty(0) # init
@@ -125,7 +126,12 @@ class Detect(nn.Module):
125
126
  @property
126
127
  def end2end(self):
127
128
  """Checks if the model has one2one for v5/v5/v8/v9/11 backward compatibility."""
128
- return hasattr(self, "one2one")
129
+ return getattr(self, "_end2end", True) and hasattr(self, "one2one")
130
+
131
+ @end2end.setter
132
+ def end2end(self, value):
133
+ """Override the end-to-end detection mode."""
134
+ self._end2end = value
129
135
 
130
136
  def forward_head(
131
137
  self, x: list[torch.Tensor], box_head: torch.nn.Module = None, cls_head: torch.nn.Module = None
@@ -230,6 +236,11 @@ class Detect(nn.Module):
230
236
  # Use max_det directly during export for TensorRT compatibility (requires k to be constant),
231
237
  # otherwise use min(max_det, anchors) for safety with small inputs during Python inference
232
238
  k = max_det if self.export else min(max_det, anchors)
239
+ if self.agnostic_nms:
240
+ scores, labels = scores.max(dim=-1, keepdim=True)
241
+ scores, indices = scores.topk(k, dim=1)
242
+ labels = labels.gather(1, indices)
243
+ return scores, labels, indices
233
244
  ori_index = scores.max(dim=-1)[0].topk(k)[1].unsqueeze(-1)
234
245
  scores = scores.gather(dim=1, index=ori_index.repeat(1, 1, nc))
235
246
  scores, index = scores.flatten(1).topk(k)
@@ -1098,7 +1109,7 @@ class YOLOEDetect(Detect):
1098
1109
  boxes, scores, index = [], [], []
1099
1110
  bs = x[0].shape[0]
1100
1111
  cv2 = self.cv2 if not self.end2end else self.one2one_cv2
1101
- cv3 = self.cv3 if not self.end2end else self.one2one_cv2
1112
+ cv3 = self.cv3 if not self.end2end else self.one2one_cv3
1102
1113
  for i in range(self.nl):
1103
1114
  cls_feat = cv3[i](x[i])
1104
1115
  loc_feat = cv2[i](x[i])
ultralytics/nn/tasks.py CHANGED
@@ -425,6 +425,24 @@ class DetectionModel(BaseModel):
425
425
  """Return whether the model uses end-to-end NMS-free detection."""
426
426
  return getattr(self.model[-1], "end2end", False)
427
427
 
428
+ @end2end.setter
429
+ def end2end(self, value):
430
+ """Override the end-to-end detection mode."""
431
+ self.set_head_attr(end2end=value)
432
+
433
+ def set_head_attr(self, **kwargs):
434
+ """Set attributes of the model head (last layer).
435
+
436
+ Args:
437
+ **kwargs: Arbitrary keyword arguments representing attributes to set.
438
+ """
439
+ head = self.model[-1]
440
+ for k, v in kwargs.items():
441
+ if not hasattr(head, k):
442
+ LOGGER.warning(f"Head has no attribute '{k}'.")
443
+ continue
444
+ setattr(head, k, v)
445
+
428
446
  def _predict_augment(self, x):
429
447
  """Perform augmentations on input image x and return augmented inference and train outputs.
430
448
 
@@ -62,7 +62,7 @@ class SecurityAlarm(BaseSolution):
62
62
  """
63
63
  import smtplib
64
64
 
65
- self.server = smtplib.SMTP("smtp.gmail.com: 587")
65
+ self.server = smtplib.SMTP("smtp.gmail.com", 587)
66
66
  self.server.starttls()
67
67
  self.server.login(from_email, password)
68
68
  self.to_email = to_email