dgenerate-ultralytics-headless 8.3.194__py3-none-any.whl → 8.3.196__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/METADATA +1 -2
  2. {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/RECORD +107 -106
  3. tests/test_python.py +1 -1
  4. ultralytics/__init__.py +1 -1
  5. ultralytics/cfg/__init__.py +9 -8
  6. ultralytics/cfg/default.yaml +1 -0
  7. ultralytics/data/annotator.py +1 -1
  8. ultralytics/data/augment.py +76 -76
  9. ultralytics/data/base.py +12 -12
  10. ultralytics/data/build.py +5 -1
  11. ultralytics/data/converter.py +4 -4
  12. ultralytics/data/dataset.py +7 -7
  13. ultralytics/data/loaders.py +15 -15
  14. ultralytics/data/split_dota.py +10 -10
  15. ultralytics/data/utils.py +12 -12
  16. ultralytics/engine/exporter.py +19 -31
  17. ultralytics/engine/model.py +13 -13
  18. ultralytics/engine/predictor.py +16 -14
  19. ultralytics/engine/results.py +21 -21
  20. ultralytics/engine/trainer.py +15 -4
  21. ultralytics/engine/validator.py +6 -2
  22. ultralytics/hub/google/__init__.py +2 -2
  23. ultralytics/hub/session.py +7 -7
  24. ultralytics/models/fastsam/model.py +5 -5
  25. ultralytics/models/fastsam/predict.py +11 -11
  26. ultralytics/models/nas/model.py +1 -1
  27. ultralytics/models/rtdetr/predict.py +2 -2
  28. ultralytics/models/rtdetr/val.py +4 -4
  29. ultralytics/models/sam/amg.py +6 -6
  30. ultralytics/models/sam/build.py +9 -9
  31. ultralytics/models/sam/model.py +7 -7
  32. ultralytics/models/sam/modules/blocks.py +6 -6
  33. ultralytics/models/sam/modules/decoders.py +1 -1
  34. ultralytics/models/sam/modules/encoders.py +27 -27
  35. ultralytics/models/sam/modules/sam.py +4 -4
  36. ultralytics/models/sam/modules/tiny_encoder.py +18 -18
  37. ultralytics/models/sam/modules/utils.py +8 -8
  38. ultralytics/models/sam/predict.py +63 -63
  39. ultralytics/models/utils/loss.py +22 -22
  40. ultralytics/models/utils/ops.py +8 -8
  41. ultralytics/models/yolo/classify/predict.py +2 -2
  42. ultralytics/models/yolo/classify/train.py +9 -19
  43. ultralytics/models/yolo/classify/val.py +4 -4
  44. ultralytics/models/yolo/detect/predict.py +3 -3
  45. ultralytics/models/yolo/detect/train.py +38 -12
  46. ultralytics/models/yolo/detect/val.py +38 -37
  47. ultralytics/models/yolo/model.py +6 -6
  48. ultralytics/models/yolo/obb/train.py +1 -10
  49. ultralytics/models/yolo/obb/val.py +13 -13
  50. ultralytics/models/yolo/pose/train.py +1 -9
  51. ultralytics/models/yolo/pose/val.py +12 -12
  52. ultralytics/models/yolo/segment/predict.py +4 -4
  53. ultralytics/models/yolo/segment/train.py +2 -10
  54. ultralytics/models/yolo/segment/val.py +15 -15
  55. ultralytics/models/yolo/world/train.py +13 -13
  56. ultralytics/models/yolo/world/train_world.py +3 -3
  57. ultralytics/models/yolo/yoloe/predict.py +4 -4
  58. ultralytics/models/yolo/yoloe/train.py +7 -16
  59. ultralytics/models/yolo/yoloe/val.py +0 -7
  60. ultralytics/nn/autobackend.py +2 -2
  61. ultralytics/nn/modules/block.py +6 -6
  62. ultralytics/nn/modules/conv.py +2 -2
  63. ultralytics/nn/modules/head.py +6 -5
  64. ultralytics/nn/tasks.py +17 -15
  65. ultralytics/nn/text_model.py +3 -3
  66. ultralytics/solutions/ai_gym.py +2 -2
  67. ultralytics/solutions/analytics.py +3 -3
  68. ultralytics/solutions/config.py +5 -5
  69. ultralytics/solutions/distance_calculation.py +2 -2
  70. ultralytics/solutions/heatmap.py +1 -1
  71. ultralytics/solutions/instance_segmentation.py +4 -4
  72. ultralytics/solutions/object_counter.py +4 -4
  73. ultralytics/solutions/parking_management.py +7 -7
  74. ultralytics/solutions/queue_management.py +3 -3
  75. ultralytics/solutions/region_counter.py +4 -4
  76. ultralytics/solutions/similarity_search.py +2 -2
  77. ultralytics/solutions/solutions.py +48 -48
  78. ultralytics/solutions/streamlit_inference.py +1 -1
  79. ultralytics/solutions/trackzone.py +4 -4
  80. ultralytics/solutions/vision_eye.py +1 -1
  81. ultralytics/trackers/byte_tracker.py +11 -11
  82. ultralytics/trackers/utils/gmc.py +3 -3
  83. ultralytics/trackers/utils/matching.py +5 -5
  84. ultralytics/utils/__init__.py +30 -19
  85. ultralytics/utils/autodevice.py +2 -2
  86. ultralytics/utils/benchmarks.py +10 -10
  87. ultralytics/utils/callbacks/clearml.py +1 -1
  88. ultralytics/utils/callbacks/comet.py +5 -5
  89. ultralytics/utils/callbacks/tensorboard.py +2 -2
  90. ultralytics/utils/checks.py +7 -5
  91. ultralytics/utils/cpu.py +90 -0
  92. ultralytics/utils/dist.py +1 -1
  93. ultralytics/utils/downloads.py +2 -2
  94. ultralytics/utils/export.py +5 -5
  95. ultralytics/utils/instance.py +2 -2
  96. ultralytics/utils/loss.py +14 -8
  97. ultralytics/utils/metrics.py +35 -35
  98. ultralytics/utils/nms.py +4 -4
  99. ultralytics/utils/ops.py +1 -1
  100. ultralytics/utils/patches.py +2 -2
  101. ultralytics/utils/plotting.py +10 -9
  102. ultralytics/utils/torch_utils.py +113 -15
  103. ultralytics/utils/triton.py +5 -5
  104. {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/WHEEL +0 -0
  105. {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/entry_points.txt +0 -0
  106. {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/licenses/LICENSE +0 -0
  107. {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/top_level.txt +0 -0
@@ -57,13 +57,13 @@ class SegmentationValidator(DetectionValidator):
57
57
  Preprocess batch of images for YOLO segmentation validation.
58
58
 
59
59
  Args:
60
- batch (Dict[str, Any]): Batch containing images and annotations.
60
+ batch (dict[str, Any]): Batch containing images and annotations.
61
61
 
62
62
  Returns:
63
- (Dict[str, Any]): Preprocessed batch.
63
+ (dict[str, Any]): Preprocessed batch.
64
64
  """
65
65
  batch = super().preprocess(batch)
66
- batch["masks"] = batch["masks"].to(self.device, non_blocking=True).float()
66
+ batch["masks"] = batch["masks"].float()
67
67
  return batch
68
68
 
69
69
  def init_metrics(self, model: torch.nn.Module) -> None:
@@ -100,10 +100,10 @@ class SegmentationValidator(DetectionValidator):
100
100
  Post-process YOLO predictions and return output detections with proto.
101
101
 
102
102
  Args:
103
- preds (List[torch.Tensor]): Raw predictions from the model.
103
+ preds (list[torch.Tensor]): Raw predictions from the model.
104
104
 
105
105
  Returns:
106
- List[Dict[str, torch.Tensor]]: Processed detection predictions with masks.
106
+ list[dict[str, torch.Tensor]]: Processed detection predictions with masks.
107
107
  """
108
108
  proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
109
109
  preds = super().postprocess(preds[0])
@@ -127,10 +127,10 @@ class SegmentationValidator(DetectionValidator):
127
127
 
128
128
  Args:
129
129
  si (int): Batch index.
130
- batch (Dict[str, Any]): Batch data containing images and annotations.
130
+ batch (dict[str, Any]): Batch data containing images and annotations.
131
131
 
132
132
  Returns:
133
- (Dict[str, Any]): Prepared batch with processed annotations.
133
+ (dict[str, Any]): Prepared batch with processed annotations.
134
134
  """
135
135
  prepared_batch = super()._prepare_batch(si, batch)
136
136
  nl = len(prepared_batch["cls"])
@@ -151,11 +151,11 @@ class SegmentationValidator(DetectionValidator):
151
151
  Compute correct prediction matrix for a batch based on bounding boxes and optional masks.
152
152
 
153
153
  Args:
154
- preds (Dict[str, torch.Tensor]): Dictionary containing predictions with keys like 'cls' and 'masks'.
155
- batch (Dict[str, Any]): Dictionary containing batch data with keys like 'cls' and 'masks'.
154
+ preds (dict[str, torch.Tensor]): Dictionary containing predictions with keys like 'cls' and 'masks'.
155
+ batch (dict[str, Any]): Dictionary containing batch data with keys like 'cls' and 'masks'.
156
156
 
157
157
  Returns:
158
- (Dict[str, np.ndarray]): A dictionary containing correct prediction matrices including 'tp_m' for mask IoU.
158
+ (dict[str, np.ndarray]): A dictionary containing correct prediction matrices including 'tp_m' for mask IoU.
159
159
 
160
160
  Notes:
161
161
  - If `masks` is True, the function computes IoU between predicted and ground truth masks.
@@ -181,8 +181,8 @@ class SegmentationValidator(DetectionValidator):
181
181
  Plot batch predictions with masks and bounding boxes.
182
182
 
183
183
  Args:
184
- batch (Dict[str, Any]): Batch containing images and annotations.
185
- preds (List[Dict[str, torch.Tensor]]): List of predictions from the model.
184
+ batch (dict[str, Any]): Batch containing images and annotations.
185
+ preds (list[dict[str, torch.Tensor]]): List of predictions from the model.
186
186
  ni (int): Batch index.
187
187
  """
188
188
  for p in preds:
@@ -199,7 +199,7 @@ class SegmentationValidator(DetectionValidator):
199
199
  Args:
200
200
  predn (torch.Tensor): Predictions in the format (x1, y1, x2, y2, conf, class).
201
201
  save_conf (bool): Whether to save confidence scores.
202
- shape (Tuple[int, int]): Shape of the original image.
202
+ shape (tuple[int, int]): Shape of the original image.
203
203
  file (Path): File path to save the detections.
204
204
  """
205
205
  from ultralytics.engine.results import Results
@@ -217,8 +217,8 @@ class SegmentationValidator(DetectionValidator):
217
217
  Save one JSON result for COCO evaluation.
218
218
 
219
219
  Args:
220
- predn (Dict[str, torch.Tensor]): Predictions containing bboxes, masks, confidence scores, and classes.
221
- pbatch (Dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
220
+ predn (dict[str, torch.Tensor]): Predictions containing bboxes, masks, confidence scores, and classes.
221
+ pbatch (dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
222
222
  """
223
223
  from faster_coco_eval.core.mask import encode # noqa
224
224
 
@@ -12,7 +12,7 @@ from ultralytics.data import build_yolo_dataset
12
12
  from ultralytics.models.yolo.detect import DetectionTrainer
13
13
  from ultralytics.nn.tasks import WorldModel
14
14
  from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
15
- from ultralytics.utils.torch_utils import de_parallel
15
+ from ultralytics.utils.torch_utils import unwrap_model
16
16
 
17
17
 
18
18
  def on_pretrain_routine_end(trainer) -> None:
@@ -20,7 +20,7 @@ def on_pretrain_routine_end(trainer) -> None:
20
20
  if RANK in {-1, 0}:
21
21
  # Set class names for evaluation
22
22
  names = [name.split("/", 1)[0] for name in list(trainer.test_loader.dataset.data["names"].values())]
23
- de_parallel(trainer.ema.ema).set_classes(names, cache_clip_model=False)
23
+ unwrap_model(trainer.ema.ema).set_classes(names, cache_clip_model=False)
24
24
 
25
25
 
26
26
  class WorldTrainer(DetectionTrainer):
@@ -32,10 +32,10 @@ class WorldTrainer(DetectionTrainer):
32
32
  accelerate training with multi-modal data.
33
33
 
34
34
  Attributes:
35
- text_embeddings (Dict[str, torch.Tensor] | None): Cached text embeddings for category names to accelerate
35
+ text_embeddings (dict[str, torch.Tensor] | None): Cached text embeddings for category names to accelerate
36
36
  training.
37
37
  model (WorldModel): The YOLO World model being trained.
38
- data (Dict[str, Any]): Dataset configuration containing class information.
38
+ data (dict[str, Any]): Dataset configuration containing class information.
39
39
  args (Any): Training arguments and configuration.
40
40
 
41
41
  Methods:
@@ -58,9 +58,9 @@ class WorldTrainer(DetectionTrainer):
58
58
  Initialize a WorldTrainer object with given arguments.
59
59
 
60
60
  Args:
61
- cfg (Dict[str, Any]): Configuration for the trainer.
62
- overrides (Dict[str, Any], optional): Configuration overrides.
63
- _callbacks (List[Any], optional): List of callback functions.
61
+ cfg (dict[str, Any]): Configuration for the trainer.
62
+ overrides (dict[str, Any], optional): Configuration overrides.
63
+ _callbacks (list[Any], optional): List of callback functions.
64
64
  """
65
65
  if overrides is None:
66
66
  overrides = {}
@@ -72,7 +72,7 @@ class WorldTrainer(DetectionTrainer):
72
72
  Return WorldModel initialized with specified config and weights.
73
73
 
74
74
  Args:
75
- cfg (Dict[str, Any] | str, optional): Model configuration.
75
+ cfg (dict[str, Any] | str, optional): Model configuration.
76
76
  weights (str, optional): Path to pretrained weights.
77
77
  verbose (bool): Whether to display model info.
78
78
 
@@ -105,7 +105,7 @@ class WorldTrainer(DetectionTrainer):
105
105
  Returns:
106
106
  (Any): YOLO dataset configured for training or validation.
107
107
  """
108
- gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
108
+ gs = max(int(unwrap_model(self.model).stride.max() if self.model else 0), 32)
109
109
  dataset = build_yolo_dataset(
110
110
  self.args, img_path, batch, self.data, mode=mode, rect=mode == "val", stride=gs, multi_modal=mode == "train"
111
111
  )
@@ -121,7 +121,7 @@ class WorldTrainer(DetectionTrainer):
121
121
  for these categories to improve training efficiency.
122
122
 
123
123
  Args:
124
- datasets (List[Any]): List of datasets from which to extract category names.
124
+ datasets (list[Any]): List of datasets from which to extract category names.
125
125
  batch (int | None): Batch size used for processing.
126
126
 
127
127
  Notes:
@@ -144,12 +144,12 @@ class WorldTrainer(DetectionTrainer):
144
144
  Generate text embeddings for a list of text samples.
145
145
 
146
146
  Args:
147
- texts (List[str]): List of text samples to encode.
147
+ texts (list[str]): List of text samples to encode.
148
148
  batch (int): Batch size for processing.
149
149
  cache_dir (Path): Directory to save/load cached embeddings.
150
150
 
151
151
  Returns:
152
- (Dict[str, torch.Tensor]): Dictionary mapping text samples to their embeddings.
152
+ (dict[str, torch.Tensor]): Dictionary mapping text samples to their embeddings.
153
153
  """
154
154
  model = "clip:ViT-B/32"
155
155
  cache_path = cache_dir / f"text_embeddings_{model.replace(':', '_').replace('/', '_')}.pt"
@@ -160,7 +160,7 @@ class WorldTrainer(DetectionTrainer):
160
160
  return txt_map
161
161
  LOGGER.info(f"Caching text embeddings to '{cache_path}'")
162
162
  assert self.model is not None
163
- txt_feats = de_parallel(self.model).get_text_pe(texts, batch, cache_clip_model=False)
163
+ txt_feats = unwrap_model(self.model).get_text_pe(texts, batch, cache_clip_model=False)
164
164
  txt_map = dict(zip(texts, txt_feats.squeeze(0)))
165
165
  torch.save(txt_map, cache_path)
166
166
  return txt_map
@@ -6,7 +6,7 @@ from ultralytics.data import YOLOConcatDataset, build_grounding, build_yolo_data
6
6
  from ultralytics.data.utils import check_det_dataset
7
7
  from ultralytics.models.yolo.world import WorldTrainer
8
8
  from ultralytics.utils import DATASETS_DIR, DEFAULT_CFG, LOGGER
9
- from ultralytics.utils.torch_utils import de_parallel
9
+ from ultralytics.utils.torch_utils import unwrap_model
10
10
 
11
11
 
12
12
  class WorldTrainerFromScratch(WorldTrainer):
@@ -94,14 +94,14 @@ class WorldTrainerFromScratch(WorldTrainer):
94
94
  standard YOLO datasets and grounding datasets with different formats.
95
95
 
96
96
  Args:
97
- img_path (List[str] | str): Path to the folder containing images or list of paths.
97
+ img_path (list[str] | str): Path to the folder containing images or list of paths.
98
98
  mode (str): 'train' mode or 'val' mode, allowing customized augmentations for each mode.
99
99
  batch (int, optional): Size of batches, used for rectangular training/validation.
100
100
 
101
101
  Returns:
102
102
  (YOLOConcatDataset | Dataset): The constructed dataset for training or validation.
103
103
  """
104
- gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
104
+ gs = max(int(unwrap_model(self.model).stride.max() if self.model else 0), 32)
105
105
  if mode != "train":
106
106
  return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, rect=False, stride=gs)
107
107
  datasets = [
@@ -75,12 +75,12 @@ class YOLOEVPDetectPredictor(DetectionPredictor):
75
75
  else:
76
76
  # NOTE: only supports bboxes as prompts for now
77
77
  assert bboxes is not None, f"Expected bboxes, but got {bboxes}!"
78
- # NOTE: needs List[np.ndarray]
78
+ # NOTE: needs list[np.ndarray]
79
79
  assert isinstance(bboxes, list) and all(isinstance(b, np.ndarray) for b in bboxes), (
80
- f"Expected List[np.ndarray], but got {bboxes}!"
80
+ f"Expected list[np.ndarray], but got {bboxes}!"
81
81
  )
82
82
  assert isinstance(category, list) and all(isinstance(b, np.ndarray) for b in category), (
83
- f"Expected List[np.ndarray], but got {category}!"
83
+ f"Expected list[np.ndarray], but got {category}!"
84
84
  )
85
85
  assert len(im) == len(category) == len(bboxes), (
86
86
  f"Expected same length for all inputs, but got {len(im)}vs{len(category)}vs{len(bboxes)}!"
@@ -149,7 +149,7 @@ class YOLOEVPDetectPredictor(DetectionPredictor):
149
149
  Process the source to get the visual prompt embeddings (VPE).
150
150
 
151
151
  Args:
152
- source (str | Path | int | PIL.Image | np.ndarray | torch.Tensor | List | Tuple): The source
152
+ source (str | Path | int | PIL.Image | np.ndarray | torch.Tensor | list | tuple): The source
153
153
  of the image to make predictions on. Accepts various types including file paths, URLs, PIL
154
154
  images, numpy arrays, and torch tensors.
155
155
 
@@ -13,7 +13,7 @@ from ultralytics.data.augment import LoadVisualPrompt
13
13
  from ultralytics.models.yolo.detect import DetectionTrainer, DetectionValidator
14
14
  from ultralytics.nn.tasks import YOLOEModel
15
15
  from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
16
- from ultralytics.utils.torch_utils import de_parallel
16
+ from ultralytics.utils.torch_utils import unwrap_model
17
17
 
18
18
  from ..world.train_world import WorldTrainerFromScratch
19
19
  from .val import YOLOEDetectValidator
@@ -39,9 +39,6 @@ class YOLOETrainer(DetectionTrainer):
39
39
  """
40
40
  Initialize the YOLOE Trainer with specified configurations.
41
41
 
42
- This method sets up the YOLOE trainer with the provided configuration and overrides, initializing
43
- the training environment, model, and callbacks for YOLOE object detection training.
44
-
45
42
  Args:
46
43
  cfg (dict): Configuration dictionary with default training settings from DEFAULT_CFG.
47
44
  overrides (dict, optional): Dictionary of parameter overrides for the default configuration.
@@ -102,7 +99,7 @@ class YOLOETrainer(DetectionTrainer):
102
99
  Returns:
103
100
  (Dataset): YOLO dataset configured for training or validation.
104
101
  """
105
- gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
102
+ gs = max(int(unwrap_model(self.model).stride.max() if self.model else 0), 32)
106
103
  return build_yolo_dataset(
107
104
  self.args, img_path, batch, self.data, mode=mode, rect=mode == "val", stride=gs, multi_modal=mode == "train"
108
105
  )
@@ -183,7 +180,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
183
180
  standard YOLO datasets and grounding datasets with different formats.
184
181
 
185
182
  Args:
186
- img_path (List[str] | str): Path to the folder containing images or list of paths.
183
+ img_path (list[str] | str): Path to the folder containing images or list of paths.
187
184
  mode (str): 'train' mode or 'val' mode, allowing customized augmentations for each mode.
188
185
  batch (int, optional): Size of batches, used for rectangular training/validation.
189
186
 
@@ -207,7 +204,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
207
204
  Generate text embeddings for a list of text samples.
208
205
 
209
206
  Args:
210
- texts (List[str]): List of text samples to encode.
207
+ texts (list[str]): List of text samples to encode.
211
208
  batch (int): Batch size for processing.
212
209
  cache_dir (Path): Directory to save/load cached embeddings.
213
210
 
@@ -223,7 +220,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
223
220
  return txt_map
224
221
  LOGGER.info(f"Caching text embeddings to '{cache_path}'")
225
222
  assert self.model is not None
226
- txt_feats = de_parallel(self.model).get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
223
+ txt_feats = unwrap_model(self.model).get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
227
224
  txt_map = dict(zip(texts, txt_feats.squeeze(0)))
228
225
  torch.save(txt_map, cache_path)
229
226
  return txt_map
@@ -262,7 +259,7 @@ class YOLOEPEFreeTrainer(YOLOEPETrainer, YOLOETrainerFromScratch):
262
259
  in the parent directory of the first dataset's image path.
263
260
 
264
261
  Args:
265
- datasets (List[Dataset]): List of datasets containing category names to process.
262
+ datasets (list[Dataset]): List of datasets containing category names to process.
266
263
  batch (int): Batch size for processing text embeddings.
267
264
 
268
265
  Notes:
@@ -290,7 +287,7 @@ class YOLOEVPTrainer(YOLOETrainerFromScratch):
290
287
  Build YOLO Dataset for training or validation with visual prompts.
291
288
 
292
289
  Args:
293
- img_path (List[str] | str): Path to the folder containing images or list of paths.
290
+ img_path (list[str] | str): Path to the folder containing images or list of paths.
294
291
  mode (str): 'train' mode or 'val' mode, allowing customized augmentations for each mode.
295
292
  batch (int, optional): Size of batches, used for rectangular training/validation.
296
293
 
@@ -313,9 +310,3 @@ class YOLOEVPTrainer(YOLOETrainerFromScratch):
313
310
  d.transforms.append(LoadVisualPrompt())
314
311
  else:
315
312
  self.train_loader.dataset.transforms.append(LoadVisualPrompt())
316
-
317
- def preprocess_batch(self, batch):
318
- """Preprocess a batch of images for YOLOE training, moving visual prompts to the appropriate device."""
319
- batch = super().preprocess_batch(batch)
320
- batch["visuals"] = batch["visuals"].to(self.device, non_blocking=True)
321
- return batch
@@ -98,13 +98,6 @@ class YOLOEDetectValidator(DetectionValidator):
98
98
  visual_pe[cls_visual_num == 0] = 0
99
99
  return visual_pe.unsqueeze(0)
100
100
 
101
- def preprocess(self, batch: dict[str, Any]) -> dict[str, Any]:
102
- """Preprocess batch data, ensuring visuals are on the same device as images."""
103
- batch = super().preprocess(batch)
104
- if "visuals" in batch:
105
- batch["visuals"] = batch["visuals"].to(batch["img"].device, non_blocking=True)
106
- return batch
107
-
108
101
  def get_vpe_dataloader(self, data: dict[str, Any]) -> torch.utils.data.DataLoader:
109
102
  """
110
103
  Create a dataloader for LVIS training visual prompt samples.
@@ -624,7 +624,7 @@ class AutoBackend(nn.Module):
624
624
  **kwargs (Any): Additional keyword arguments for model configuration.
625
625
 
626
626
  Returns:
627
- (torch.Tensor | List[torch.Tensor]): The raw output tensor(s) from the model.
627
+ (torch.Tensor | list[torch.Tensor]): The raw output tensor(s) from the model.
628
628
  """
629
629
  b, ch, h, w = im.shape # batch, channel, height, width
630
630
  if self.fp16 and im.dtype != torch.float16:
@@ -860,7 +860,7 @@ class AutoBackend(nn.Module):
860
860
  p (str): Path to the model file.
861
861
 
862
862
  Returns:
863
- (List[bool]): List of booleans indicating the model type.
863
+ (list[bool]): List of booleans indicating the model type.
864
864
 
865
865
  Examples:
866
866
  >>> model = AutoBackend(model="path/to/model.onnx")
@@ -745,7 +745,7 @@ class ImagePoolingAttn(nn.Module):
745
745
  Forward pass of ImagePoolingAttn.
746
746
 
747
747
  Args:
748
- x (List[torch.Tensor]): List of input feature maps.
748
+ x (list[torch.Tensor]): List of input feature maps.
749
749
  text (torch.Tensor): Text embeddings.
750
750
 
751
751
  Returns:
@@ -1032,7 +1032,7 @@ class CBLinear(nn.Module):
1032
1032
 
1033
1033
  Args:
1034
1034
  c1 (int): Input channels.
1035
- c2s (List[int]): List of output channel sizes.
1035
+ c2s (list[int]): List of output channel sizes.
1036
1036
  k (int): Kernel size.
1037
1037
  s (int): Stride.
1038
1038
  p (int | None): Padding.
@@ -1055,7 +1055,7 @@ class CBFuse(nn.Module):
1055
1055
  Initialize CBFuse module.
1056
1056
 
1057
1057
  Args:
1058
- idx (List[int]): Indices for feature selection.
1058
+ idx (list[int]): Indices for feature selection.
1059
1059
  """
1060
1060
  super().__init__()
1061
1061
  self.idx = idx
@@ -1065,7 +1065,7 @@ class CBFuse(nn.Module):
1065
1065
  Forward pass through CBFuse layer.
1066
1066
 
1067
1067
  Args:
1068
- xs (List[torch.Tensor]): List of input tensors.
1068
+ xs (list[torch.Tensor]): List of input tensors.
1069
1069
 
1070
1070
  Returns:
1071
1071
  (torch.Tensor): Fused output tensor.
@@ -1676,7 +1676,7 @@ class TorchVision(nn.Module):
1676
1676
  x (torch.Tensor): Input tensor.
1677
1677
 
1678
1678
  Returns:
1679
- (torch.Tensor | List[torch.Tensor]): Output tensor or list of tensors.
1679
+ (torch.Tensor | list[torch.Tensor]): Output tensor or list of tensors.
1680
1680
  """
1681
1681
  if self.split:
1682
1682
  y = [x]
@@ -1979,7 +1979,7 @@ class SAVPE(nn.Module):
1979
1979
  Initialize SAVPE module with channels, intermediate channels, and embedding dimension.
1980
1980
 
1981
1981
  Args:
1982
- ch (List[int]): List of input channel dimensions.
1982
+ ch (list[int]): List of input channel dimensions.
1983
1983
  c3 (int): Intermediate channels.
1984
1984
  embed (int): Embedding dimension.
1985
1985
  """
@@ -675,7 +675,7 @@ class Concat(nn.Module):
675
675
  Concatenate input tensors along specified dimension.
676
676
 
677
677
  Args:
678
- x (List[torch.Tensor]): List of input tensors.
678
+ x (list[torch.Tensor]): List of input tensors.
679
679
 
680
680
  Returns:
681
681
  (torch.Tensor): Concatenated tensor.
@@ -706,7 +706,7 @@ class Index(nn.Module):
706
706
  Select and return a particular index from input.
707
707
 
708
708
  Args:
709
- x (List[torch.Tensor]): List of input tensors.
709
+ x (list[torch.Tensor]): List of input tensors.
710
710
 
711
711
  Returns:
712
712
  (torch.Tensor): Selected tensor.
@@ -13,7 +13,7 @@ from torch.nn.init import constant_, xavier_uniform_
13
13
 
14
14
  from ultralytics.utils import NOT_MACOS14
15
15
  from ultralytics.utils.tal import TORCH_1_10, dist2bbox, dist2rbox, make_anchors
16
- from ultralytics.utils.torch_utils import fuse_conv_and_bn, smart_inference_mode
16
+ from ultralytics.utils.torch_utils import disable_dynamo, fuse_conv_and_bn, smart_inference_mode
17
17
 
18
18
  from .block import DFL, SAVPE, BNContrastiveHead, ContrastiveHead, Proto, Residual, SwiGLUFFN
19
19
  from .conv import Conv, DWConv
@@ -130,7 +130,7 @@ class Detect(nn.Module):
130
130
  Perform forward pass of the v10Detect module.
131
131
 
132
132
  Args:
133
- x (List[torch.Tensor]): Input feature maps from different levels.
133
+ x (list[torch.Tensor]): Input feature maps from different levels.
134
134
 
135
135
  Returns:
136
136
  outputs (dict | tuple): Training mode returns dict with one2many and one2one outputs.
@@ -149,12 +149,13 @@ class Detect(nn.Module):
149
149
  y = self.postprocess(y.permute(0, 2, 1), self.max_det, self.nc)
150
150
  return y if self.export else (y, {"one2many": x, "one2one": one2one})
151
151
 
152
+ @disable_dynamo
152
153
  def _inference(self, x: list[torch.Tensor]) -> torch.Tensor:
153
154
  """
154
155
  Decode predicted bounding boxes and class probabilities based on multiple-level feature maps.
155
156
 
156
157
  Args:
157
- x (List[torch.Tensor]): List of feature maps from different detection layers.
158
+ x (list[torch.Tensor]): List of feature maps from different detection layers.
158
159
 
159
160
  Returns:
160
161
  (torch.Tensor): Concatenated tensor of decoded bounding boxes and class probabilities.
@@ -985,7 +986,7 @@ class RTDETRDecoder(nn.Module):
985
986
  Run the forward pass of the module, returning bounding box and classification scores for the input.
986
987
 
987
988
  Args:
988
- x (List[torch.Tensor]): List of feature maps from the backbone.
989
+ x (list[torch.Tensor]): List of feature maps from the backbone.
989
990
  batch (dict, optional): Batch information for training.
990
991
 
991
992
  Returns:
@@ -1075,7 +1076,7 @@ class RTDETRDecoder(nn.Module):
1075
1076
  Process and return encoder inputs by getting projection features from input and concatenating them.
1076
1077
 
1077
1078
  Args:
1078
- x (List[torch.Tensor]): List of feature maps from the backbone.
1079
+ x (list[torch.Tensor]): List of feature maps from the backbone.
1079
1080
 
1080
1081
  Returns:
1081
1082
  feats (torch.Tensor): Processed features.
ultralytics/nn/tasks.py CHANGED
@@ -69,7 +69,7 @@ from ultralytics.nn.modules import (
69
69
  YOLOESegment,
70
70
  v10Detect,
71
71
  )
72
- from ultralytics.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, YAML, colorstr, emojis
72
+ from ultralytics.utils import DEFAULT_CFG_DICT, LOGGER, YAML, colorstr, emojis
73
73
  from ultralytics.utils.checks import check_requirements, check_suffix, check_yaml
74
74
  from ultralytics.utils.loss import (
75
75
  E2EDetectLoss,
@@ -329,12 +329,13 @@ class BaseModel(torch.nn.Module):
329
329
 
330
330
  Args:
331
331
  batch (dict): Batch to compute loss on.
332
- preds (torch.Tensor | List[torch.Tensor], optional): Predictions.
332
+ preds (torch.Tensor | list[torch.Tensor], optional): Predictions.
333
333
  """
334
334
  if getattr(self, "criterion", None) is None:
335
335
  self.criterion = self.init_criterion()
336
336
 
337
- preds = self.forward(batch["img"]) if preds is None else preds
337
+ if preds is None:
338
+ preds = self.forward(batch["img"])
338
339
  return self.criterion(preds, batch)
339
340
 
340
341
  def init_criterion(self):
@@ -480,10 +481,10 @@ class DetectionModel(BaseModel):
480
481
  Clip YOLO augmented inference tails.
481
482
 
482
483
  Args:
483
- y (List[torch.Tensor]): List of detection tensors.
484
+ y (list[torch.Tensor]): List of detection tensors.
484
485
 
485
486
  Returns:
486
- (List[torch.Tensor]): Clipped detection tensors.
487
+ (list[torch.Tensor]): Clipped detection tensors.
487
488
  """
488
489
  nl = self.model[-1].nl # number of detection layers (P3-P5)
489
490
  g = sum(4**x for x in range(nl)) # grid points
@@ -775,7 +776,8 @@ class RTDETRDetectionModel(DetectionModel):
775
776
  "gt_groups": gt_groups,
776
777
  }
777
778
 
778
- preds = self.predict(img, batch=targets) if preds is None else preds
779
+ if preds is None:
780
+ preds = self.predict(img, batch=targets)
779
781
  dec_bboxes, dec_scores, enc_bboxes, enc_scores, dn_meta = preds if self.training else preds[1]
780
782
  if dn_meta is None:
781
783
  dn_bboxes, dn_scores = None, None
@@ -874,7 +876,7 @@ class WorldModel(DetectionModel):
874
876
  Set classes in advance so that model could do offline-inference without clip model.
875
877
 
876
878
  Args:
877
- text (List[str]): List of class names.
879
+ text (list[str]): List of class names.
878
880
  batch (int): Batch size for processing text tokens.
879
881
  cache_clip_model (bool): Whether to cache the CLIP model.
880
882
  """
@@ -886,7 +888,7 @@ class WorldModel(DetectionModel):
886
888
  Set classes in advance so that model could do offline-inference without clip model.
887
889
 
888
890
  Args:
889
- text (List[str]): List of class names.
891
+ text (list[str]): List of class names.
890
892
  batch (int): Batch size for processing text tokens.
891
893
  cache_clip_model (bool): Whether to cache the CLIP model.
892
894
 
@@ -956,7 +958,7 @@ class WorldModel(DetectionModel):
956
958
 
957
959
  Args:
958
960
  batch (dict): Batch to compute loss on.
959
- preds (torch.Tensor | List[torch.Tensor], optional): Predictions.
961
+ preds (torch.Tensor | list[torch.Tensor], optional): Predictions.
960
962
  """
961
963
  if not hasattr(self, "criterion"):
962
964
  self.criterion = self.init_criterion()
@@ -1012,7 +1014,7 @@ class YOLOEModel(DetectionModel):
1012
1014
  Set classes in advance so that model could do offline-inference without clip model.
1013
1015
 
1014
1016
  Args:
1015
- text (List[str]): List of class names.
1017
+ text (list[str]): List of class names.
1016
1018
  batch (int): Batch size for processing text tokens.
1017
1019
  cache_clip_model (bool): Whether to cache the CLIP model.
1018
1020
  without_reprta (bool): Whether to return text embeddings cooperated with reprta module.
@@ -1060,7 +1062,7 @@ class YOLOEModel(DetectionModel):
1060
1062
 
1061
1063
  Args:
1062
1064
  vocab (nn.ModuleList): List of vocabulary items.
1063
- names (List[str]): List of class names.
1065
+ names (list[str]): List of class names.
1064
1066
  """
1065
1067
  assert not self.training
1066
1068
  head = self.model[-1]
@@ -1114,7 +1116,7 @@ class YOLOEModel(DetectionModel):
1114
1116
  Set classes in advance so that model could do offline-inference without clip model.
1115
1117
 
1116
1118
  Args:
1117
- names (List[str]): List of class names.
1119
+ names (list[str]): List of class names.
1118
1120
  embeddings (torch.Tensor): Embeddings tensor.
1119
1121
  """
1120
1122
  assert not hasattr(self.model[-1], "lrpc"), (
@@ -1203,7 +1205,7 @@ class YOLOEModel(DetectionModel):
1203
1205
 
1204
1206
  Args:
1205
1207
  batch (dict): Batch to compute loss on.
1206
- preds (torch.Tensor | List[torch.Tensor], optional): Predictions.
1208
+ preds (torch.Tensor | list[torch.Tensor], optional): Predictions.
1207
1209
  """
1208
1210
  if not hasattr(self, "criterion"):
1209
1211
  from ultralytics.utils.loss import TVPDetectLoss
@@ -1251,7 +1253,7 @@ class YOLOESegModel(YOLOEModel, SegmentationModel):
1251
1253
 
1252
1254
  Args:
1253
1255
  batch (dict): Batch to compute loss on.
1254
- preds (torch.Tensor | List[torch.Tensor], optional): Predictions.
1256
+ preds (torch.Tensor | list[torch.Tensor], optional): Predictions.
1255
1257
  """
1256
1258
  if not hasattr(self, "criterion"):
1257
1259
  from ultralytics.utils.loss import TVPSegmentLoss
@@ -1502,7 +1504,7 @@ def load_checkpoint(weight, device=None, inplace=True, fuse=False):
1502
1504
  model = (ckpt.get("ema") or ckpt["model"]).float() # FP32 model
1503
1505
 
1504
1506
  # Model compatibility updates
1505
- model.args = {k: v for k, v in args.items() if k in DEFAULT_CFG_KEYS} # attach args to model
1507
+ model.args = args # attach args to model
1506
1508
  model.pt_path = weight # attach *.pt file path to model
1507
1509
  model.task = getattr(model, "task", guess_model_task(model))
1508
1510
  if not hasattr(model, "stride"):
@@ -97,7 +97,7 @@ class CLIP(TextModel):
97
97
  Convert input texts to CLIP tokens.
98
98
 
99
99
  Args:
100
- texts (str | List[str]): Input text or list of texts to tokenize.
100
+ texts (str | list[str]): Input text or list of texts to tokenize.
101
101
 
102
102
  Returns:
103
103
  (torch.Tensor): Tokenized text tensor with shape (batch_size, context_length) ready for model processing.
@@ -240,7 +240,7 @@ class MobileCLIP(TextModel):
240
240
  Convert input texts to MobileCLIP tokens.
241
241
 
242
242
  Args:
243
- texts (List[str]): List of text strings to tokenize.
243
+ texts (list[str]): List of text strings to tokenize.
244
244
 
245
245
  Returns:
246
246
  (torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
@@ -325,7 +325,7 @@ class MobileCLIPTS(TextModel):
325
325
  Convert input texts to MobileCLIP tokens.
326
326
 
327
327
  Args:
328
- texts (List[str]): List of text strings to tokenize.
328
+ texts (list[str]): List of text strings to tokenize.
329
329
 
330
330
  Returns:
331
331
  (torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
@@ -14,10 +14,10 @@ class AIGym(BaseSolution):
14
14
  repetitions of exercises based on predefined angle thresholds for up and down positions.
15
15
 
16
16
  Attributes:
17
- states (Dict[float, int, str]): Stores per-track angle, count, and stage for workout monitoring.
17
+ states (dict[float, int, str]): Stores per-track angle, count, and stage for workout monitoring.
18
18
  up_angle (float): Angle threshold for considering the 'up' position of an exercise.
19
19
  down_angle (float): Angle threshold for considering the 'down' position of an exercise.
20
- kpts (List[int]): Indices of keypoints used for angle calculation.
20
+ kpts (list[int]): Indices of keypoints used for angle calculation.
21
21
 
22
22
  Methods:
23
23
  process: Process a frame to detect poses, calculate angles, and count repetitions.