dgenerate-ultralytics-headless 8.3.160__py3-none-any.whl → 8.3.162__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {dgenerate_ultralytics_headless-8.3.160.dist-info → dgenerate_ultralytics_headless-8.3.162.dist-info}/METADATA +9 -1
  2. {dgenerate_ultralytics_headless-8.3.160.dist-info → dgenerate_ultralytics_headless-8.3.162.dist-info}/RECORD +67 -67
  3. tests/conftest.py +2 -2
  4. tests/test_python.py +4 -3
  5. ultralytics/__init__.py +1 -1
  6. ultralytics/cfg/datasets/Argoverse.yaml +1 -1
  7. ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
  8. ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
  9. ultralytics/cfg/datasets/GlobalWheat2020.yaml +1 -1
  10. ultralytics/cfg/datasets/HomeObjects-3K.yaml +1 -1
  11. ultralytics/cfg/datasets/ImageNet.yaml +1 -1
  12. ultralytics/cfg/datasets/Objects365.yaml +1 -1
  13. ultralytics/cfg/datasets/SKU-110K.yaml +1 -1
  14. ultralytics/cfg/datasets/VOC.yaml +1 -1
  15. ultralytics/cfg/datasets/VisDrone.yaml +6 -3
  16. ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
  17. ultralytics/cfg/datasets/brain-tumor.yaml +1 -1
  18. ultralytics/cfg/datasets/carparts-seg.yaml +1 -1
  19. ultralytics/cfg/datasets/coco-pose.yaml +1 -1
  20. ultralytics/cfg/datasets/coco.yaml +1 -1
  21. ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
  22. ultralytics/cfg/datasets/coco128.yaml +1 -1
  23. ultralytics/cfg/datasets/coco8-grayscale.yaml +1 -1
  24. ultralytics/cfg/datasets/coco8-multispectral.yaml +1 -1
  25. ultralytics/cfg/datasets/coco8-pose.yaml +1 -1
  26. ultralytics/cfg/datasets/coco8-seg.yaml +1 -1
  27. ultralytics/cfg/datasets/coco8.yaml +1 -1
  28. ultralytics/cfg/datasets/crack-seg.yaml +1 -1
  29. ultralytics/cfg/datasets/dog-pose.yaml +1 -1
  30. ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
  31. ultralytics/cfg/datasets/dota8.yaml +1 -1
  32. ultralytics/cfg/datasets/hand-keypoints.yaml +1 -1
  33. ultralytics/cfg/datasets/lvis.yaml +1 -1
  34. ultralytics/cfg/datasets/medical-pills.yaml +1 -1
  35. ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
  36. ultralytics/cfg/datasets/package-seg.yaml +1 -1
  37. ultralytics/cfg/datasets/signature.yaml +1 -1
  38. ultralytics/cfg/datasets/tiger-pose.yaml +1 -1
  39. ultralytics/cfg/datasets/xView.yaml +1 -1
  40. ultralytics/data/augment.py +2 -0
  41. ultralytics/data/converter.py +5 -7
  42. ultralytics/data/dataset.py +1 -1
  43. ultralytics/data/split.py +1 -1
  44. ultralytics/data/split_dota.py +1 -1
  45. ultralytics/engine/exporter.py +15 -5
  46. ultralytics/engine/results.py +1 -1
  47. ultralytics/engine/tuner.py +2 -2
  48. ultralytics/models/nas/model.py +2 -1
  49. ultralytics/models/sam/modules/tiny_encoder.py +1 -1
  50. ultralytics/models/yolo/detect/val.py +1 -1
  51. ultralytics/models/yolo/world/train.py +1 -1
  52. ultralytics/models/yolo/world/train_world.py +17 -9
  53. ultralytics/models/yolo/yoloe/train.py +1 -1
  54. ultralytics/nn/autobackend.py +7 -1
  55. ultralytics/nn/tasks.py +4 -3
  56. ultralytics/solutions/similarity_search.py +11 -12
  57. ultralytics/solutions/solutions.py +53 -54
  58. ultralytics/utils/__init__.py +1 -2
  59. ultralytics/utils/checks.py +21 -0
  60. ultralytics/utils/metrics.py +10 -9
  61. ultralytics/utils/patches.py +1 -2
  62. ultralytics/utils/plotting.py +2 -2
  63. ultralytics/utils/torch_utils.py +2 -1
  64. {dgenerate_ultralytics_headless-8.3.160.dist-info → dgenerate_ultralytics_headless-8.3.162.dist-info}/WHEEL +0 -0
  65. {dgenerate_ultralytics_headless-8.3.160.dist-info → dgenerate_ultralytics_headless-8.3.162.dist-info}/entry_points.txt +0 -0
  66. {dgenerate_ultralytics_headless-8.3.160.dist-info → dgenerate_ultralytics_headless-8.3.162.dist-info}/licenses/LICENSE +0 -0
  67. {dgenerate_ultralytics_headless-8.3.160.dist-info → dgenerate_ultralytics_headless-8.3.162.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,7 @@
9
9
  # └── hand-keypoints ← downloads here (369 MB)
10
10
 
11
11
  # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
12
- path: ../datasets/hand-keypoints # dataset root dir
12
+ path: hand-keypoints # dataset root dir
13
13
  train: train # train images (relative to 'path') 18776 images
14
14
  val: val # val images (relative to 'path') 7992 images
15
15
 
@@ -9,7 +9,7 @@
9
9
  # └── lvis ← downloads here (20.1 GB)
10
10
 
11
11
  # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
12
- path: ../datasets/lvis # dataset root dir
12
+ path: lvis # dataset root dir
13
13
  train: train.txt # train images (relative to 'path') 100170 images
14
14
  val: val.txt # val images (relative to 'path') 19809 images
15
15
  minival: minival.txt # minival images (relative to 'path') 5000 images
@@ -9,7 +9,7 @@
9
9
  # └── medical-pills ← downloads here (8.19 MB)
10
10
 
11
11
  # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
12
- path: ../datasets/medical-pills # dataset root dir
12
+ path: medical-pills # dataset root dir
13
13
  train: train/images # train images (relative to 'path') 92 images
14
14
  val: valid/images # val images (relative to 'path') 23 images
15
15
  test: # test images (relative to 'path')
@@ -9,7 +9,7 @@
9
9
  # └── open-images-v7 ← downloads here (561 GB)
10
10
 
11
11
  # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
12
- path: ../datasets/open-images-v7 # dataset root dir
12
+ path: open-images-v7 # dataset root dir
13
13
  train: images/train # train images (relative to 'path') 1743042 images
14
14
  val: images/val # val images (relative to 'path') 41620 images
15
15
  test: # test images (optional)
@@ -9,7 +9,7 @@
9
9
  # └── package-seg ← downloads here (102 MB)
10
10
 
11
11
  # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
12
- path: ../datasets/package-seg # dataset root dir
12
+ path: package-seg # dataset root dir
13
13
  train: train/images # train images (relative to 'path') 1920 images
14
14
  val: valid/images # val images (relative to 'path') 89 images
15
15
  test: test/images # test images (relative to 'path') 188 images
@@ -9,7 +9,7 @@
9
9
  # └── signature ← downloads here (11.2 MB)
10
10
 
11
11
  # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
12
- path: ../datasets/signature # dataset root dir
12
+ path: signature # dataset root dir
13
13
  train: train/images # train images (relative to 'path') 143 images
14
14
  val: valid/images # val images (relative to 'path') 35 images
15
15
 
@@ -9,7 +9,7 @@
9
9
  # └── tiger-pose ← downloads here (75.3 MB)
10
10
 
11
11
  # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
12
- path: ../datasets/tiger-pose # dataset root dir
12
+ path: tiger-pose # dataset root dir
13
13
  train: train # train images (relative to 'path') 210 images
14
14
  val: val # val images (relative to 'path') 53 images
15
15
 
@@ -10,7 +10,7 @@
10
10
  # └── xView ← downloads here (20.7 GB)
11
11
 
12
12
  # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
13
- path: ../datasets/xView # dataset root dir
13
+ path: xView # dataset root dir
14
14
  train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
15
15
  val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images
16
16
 
@@ -1805,6 +1805,8 @@ class CopyPaste(BaseMixTransform):
1805
1805
  def _transform(self, labels1, labels2={}):
1806
1806
  """Apply Copy-Paste augmentation to combine objects from another image into the current image."""
1807
1807
  im = labels1["img"]
1808
+ if "mosaic_border" not in labels1:
1809
+ im = im.copy() # avoid modifying original non-mosaic image
1808
1810
  cls = labels1["cls"]
1809
1811
  h, w = im.shape[:2]
1810
1812
  instances = labels1.pop("instances")
@@ -248,12 +248,10 @@ def convert_coco(
248
248
  >>> from ultralytics.data.converter import convert_coco
249
249
 
250
250
  Convert COCO annotations to YOLO format
251
- >>> convert_coco("../datasets/coco/annotations/", use_segments=True, use_keypoints=False, cls91to80=False)
251
+ >>> convert_coco("coco/annotations/", use_segments=True, use_keypoints=False, cls91to80=False)
252
252
 
253
253
  Convert LVIS annotations to YOLO format
254
- >>> convert_coco(
255
- ... "../datasets/lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True
256
- ... )
254
+ >>> convert_coco("lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True)
257
255
  """
258
256
  # Create dataset directory
259
257
  save_dir = increment_path(save_dir) # increment if save directory already exists
@@ -498,7 +496,7 @@ def convert_dota_to_yolo_obb(dota_root_path: str):
498
496
  formatted_coords = [f"{coord:.6g}" for coord in normalized_coords]
499
497
  g.write(f"{class_idx} {' '.join(formatted_coords)}\n")
500
498
 
501
- for phase in ["train", "val"]:
499
+ for phase in {"train", "val"}:
502
500
  image_dir = dota_root_path / "images" / phase
503
501
  orig_label_dir = dota_root_path / "labels" / f"{phase}_original"
504
502
  save_dir = dota_root_path / "labels" / phase
@@ -686,7 +684,7 @@ def create_synthetic_coco_dataset():
686
684
  # Create synthetic images
687
685
  shutil.rmtree(dir / "labels" / "test2017", ignore_errors=True) # Remove test2017 directory as not needed
688
686
  with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
689
- for subset in ["train2017", "val2017"]:
687
+ for subset in {"train2017", "val2017"}:
690
688
  subset_dir = dir / "images" / subset
691
689
  subset_dir.mkdir(parents=True, exist_ok=True)
692
690
 
@@ -724,7 +722,7 @@ def convert_to_multispectral(path: Union[str, Path], n_channels: int = 10, repla
724
722
  >>> convert_to_multispectral("path/to/image.jpg", n_channels=10)
725
723
 
726
724
  Convert a dataset
727
- >>> convert_to_multispectral("../datasets/coco8", n_channels=10)
725
+ >>> convert_to_multispectral("coco8", n_channels=10)
728
726
  """
729
727
  from scipy.interpolate import interp1d
730
728
 
@@ -482,7 +482,7 @@ class GroundingDataset(YOLODataset):
482
482
  a warning is logged and verification is skipped.
483
483
  """
484
484
  expected_counts = {
485
- "final_mixed_train_no_coco_segm": 3662344,
485
+ "final_mixed_train_no_coco_segm": 3662412,
486
486
  "final_mixed_train_no_coco": 3681235,
487
487
  "final_flickr_separateGT_train_segm": 638214,
488
488
  "final_flickr_separateGT_train": 640704,
ultralytics/data/split.py CHANGED
@@ -135,4 +135,4 @@ def autosplit(
135
135
 
136
136
 
137
137
  if __name__ == "__main__":
138
- split_classify_dataset("../datasets/caltech101")
138
+ split_classify_dataset("caltech101")
@@ -295,7 +295,7 @@ def split_trainval(
295
295
  for r in rates:
296
296
  crop_sizes.append(int(crop_size / r))
297
297
  gaps.append(int(gap / r))
298
- for split in ["train", "val"]:
298
+ for split in {"train", "val"}:
299
299
  split_images_and_labels(data_root, save_dir, split, crop_sizes, gaps)
300
300
 
301
301
 
@@ -100,6 +100,7 @@ from ultralytics.utils.checks import (
100
100
  check_is_path_safe,
101
101
  check_requirements,
102
102
  check_version,
103
+ is_intel,
103
104
  is_sudo_available,
104
105
  )
105
106
  from ultralytics.utils.downloads import attempt_download_asset, get_github_assets, safe_download
@@ -107,7 +108,7 @@ from ultralytics.utils.export import export_engine, export_onnx
107
108
  from ultralytics.utils.files import file_size, spaces_in_path
108
109
  from ultralytics.utils.ops import Profile, nms_rotated
109
110
  from ultralytics.utils.patches import arange_patch
110
- from ultralytics.utils.torch_utils import TORCH_1_13, get_cpu_info, get_latest_opset, select_device
111
+ from ultralytics.utils.torch_utils import TORCH_1_13, get_latest_opset, select_device
111
112
 
112
113
 
113
114
  def export_formats():
@@ -372,9 +373,9 @@ class Exporter:
372
373
  raise SystemError("TF.js exports are not currently supported on ARM64 Linux")
373
374
  # Recommend OpenVINO if export and Intel CPU
374
375
  if SETTINGS.get("openvino_msg"):
375
- if "intel" in get_cpu_info().lower():
376
+ if is_intel():
376
377
  LOGGER.info(
377
- "💡 ProTip: Export to OpenVINO format for best performance on Intel CPUs."
378
+ "💡 ProTip: Export to OpenVINO format for best performance on Intel hardware."
378
379
  " Learn more at https://docs.ultralytics.com/integrations/openvino/"
379
380
  )
380
381
  SETTINGS["openvino_msg"] = False
@@ -706,7 +707,16 @@ class Exporter:
706
707
  def export_paddle(self, prefix=colorstr("PaddlePaddle:")):
707
708
  """Export YOLO model to PaddlePaddle format."""
708
709
  assert not IS_JETSON, "Jetson Paddle exports not supported yet"
709
- check_requirements(("paddlepaddle-gpu" if torch.cuda.is_available() else "paddlepaddle>=3.0.0", "x2paddle"))
710
+ check_requirements(
711
+ (
712
+ "paddlepaddle-gpu"
713
+ if torch.cuda.is_available()
714
+ else "paddlepaddle==3.0.0" # pin 3.0.0 for ARM64
715
+ if ARM64
716
+ else "paddlepaddle>=3.0.0",
717
+ "x2paddle",
718
+ )
719
+ )
710
720
  import x2paddle # noqa
711
721
  from x2paddle.convert import pytorch2paddle # noqa
712
722
 
@@ -939,7 +949,7 @@ class Exporter:
939
949
  "tf_keras", # required by 'onnx2tf' package
940
950
  "sng4onnx>=1.0.1", # required by 'onnx2tf' package
941
951
  "onnx_graphsurgeon>=0.3.26", # required by 'onnx2tf' package
942
- "ai-edge-litert>=1.2.0", # required by 'onnx2tf' package
952
+ "ai-edge-litert>=1.2.0,<1.4.0", # required by 'onnx2tf' package
943
953
  "onnx>=1.12.0,<1.18.0",
944
954
  "onnx2tf>=1.26.3",
945
955
  "onnxslim>=0.1.56",
@@ -800,7 +800,7 @@ class Results(SimpleClass, DataExportMixin):
800
800
  decimals (int): Number of decimal places to round the output values to.
801
801
 
802
802
  Returns:
803
- (List[Dict]): A list of dictionaries, each containing summarized information for a single detection
803
+ (List[Dict[str, Any]]): A list of dictionaries, each containing summarized information for a single detection
804
804
  or classification result. The structure of each dictionary varies based on the task type
805
805
  (classification or detection) and available information (boxes, masks, keypoints).
806
806
 
@@ -21,10 +21,10 @@ import time
21
21
  from typing import Dict, List, Optional
22
22
 
23
23
  import numpy as np
24
- import torch
25
24
 
26
25
  from ultralytics.cfg import get_cfg, get_save_dir
27
26
  from ultralytics.utils import DEFAULT_CFG, LOGGER, YAML, callbacks, colorstr, remove_colorstr
27
+ from ultralytics.utils.patches import torch_load
28
28
  from ultralytics.utils.plotting import plot_tune_results
29
29
 
30
30
 
@@ -198,7 +198,7 @@ class Tuner:
198
198
  cmd = [*launch, "train", *(f"{k}={v}" for k, v in train_args.items())]
199
199
  return_code = subprocess.run(cmd, check=True).returncode
200
200
  ckpt_file = weights_dir / ("best.pt" if (weights_dir / "best.pt").exists() else "last.pt")
201
- metrics = torch.load(ckpt_file)["train_metrics"]
201
+ metrics = torch_load(ckpt_file)["train_metrics"]
202
202
  assert return_code == 0, "training failed"
203
203
 
204
204
  except Exception as e:
@@ -8,6 +8,7 @@ import torch
8
8
  from ultralytics.engine.model import Model
9
9
  from ultralytics.utils import DEFAULT_CFG_DICT
10
10
  from ultralytics.utils.downloads import attempt_download_asset
11
+ from ultralytics.utils.patches import torch_load
11
12
  from ultralytics.utils.torch_utils import model_info
12
13
 
13
14
  from .predict import NASPredictor
@@ -56,7 +57,7 @@ class NAS(Model):
56
57
 
57
58
  suffix = Path(weights).suffix
58
59
  if suffix == ".pt":
59
- self.model = torch.load(attempt_download_asset(weights))
60
+ self.model = torch_load(attempt_download_asset(weights))
60
61
  elif suffix == "":
61
62
  self.model = super_gradients.training.models.get(weights, pretrained_weights="coco")
62
63
 
@@ -931,7 +931,7 @@ class TinyViT(nn.Module):
931
931
  if layer.downsample is not None:
932
932
  layer.downsample.apply(lambda x: _set_lr_scale(x, lr_scales[i - 1]))
933
933
  assert i == depth
934
- for m in [self.norm_head, self.head]:
934
+ for m in {self.norm_head, self.head}:
935
935
  m.apply(lambda x: _set_lr_scale(x, lr_scales[-1]))
936
936
 
937
937
  for k, p in self.named_parameters():
@@ -71,7 +71,7 @@ class DetectionValidator(BaseValidator):
71
71
  """
72
72
  batch["img"] = batch["img"].to(self.device, non_blocking=True)
73
73
  batch["img"] = (batch["img"].half() if self.args.half else batch["img"].float()) / 255
74
- for k in ["batch_idx", "cls", "bboxes"]:
74
+ for k in {"batch_idx", "cls", "bboxes"}:
75
75
  batch[k] = batch[k].to(self.device)
76
76
 
77
77
  return batch
@@ -153,7 +153,7 @@ class WorldTrainer(DetectionTrainer):
153
153
  cache_path = cache_dir / f"text_embeddings_{model.replace(':', '_').replace('/', '_')}.pt"
154
154
  if cache_path.exists():
155
155
  LOGGER.info(f"Reading existed cache from '{cache_path}'")
156
- txt_map = torch.load(cache_path)
156
+ txt_map = torch.load(cache_path, map_location=self.device)
157
157
  if sorted(txt_map.keys()) == sorted(texts):
158
158
  return txt_map
159
159
  LOGGER.info(f"Caching text embeddings to '{cache_path}'")
@@ -1,9 +1,11 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from pathlib import Path
4
+
3
5
  from ultralytics.data import YOLOConcatDataset, build_grounding, build_yolo_dataset
4
6
  from ultralytics.data.utils import check_det_dataset
5
7
  from ultralytics.models.yolo.world import WorldTrainer
6
- from ultralytics.utils import DEFAULT_CFG, LOGGER
8
+ from ultralytics.utils import DATASETS_DIR, DEFAULT_CFG, LOGGER
7
9
  from ultralytics.utils.torch_utils import de_parallel
8
10
 
9
11
 
@@ -35,12 +37,12 @@ class WorldTrainerFromScratch(WorldTrainer):
35
37
  ... yolo_data=["Objects365.yaml"],
36
38
  ... grounding_data=[
37
39
  ... dict(
38
- ... img_path="../datasets/flickr30k/images",
39
- ... json_file="../datasets/flickr30k/final_flickr_separateGT_train.json",
40
+ ... img_path="flickr30k/images",
41
+ ... json_file="flickr30k/final_flickr_separateGT_train.json",
40
42
  ... ),
41
43
  ... dict(
42
- ... img_path="../datasets/GQA/images",
43
- ... json_file="../datasets/GQA/final_mixed_train_no_coco.json",
44
+ ... img_path="GQA/images",
45
+ ... json_file="GQA/final_mixed_train_no_coco.json",
44
46
  ... ),
45
47
  ... ],
46
48
  ... ),
@@ -70,8 +72,8 @@ class WorldTrainerFromScratch(WorldTrainer):
70
72
  ... yolo_data=["Objects365.yaml"],
71
73
  ... grounding_data=[
72
74
  ... dict(
73
- ... img_path="../datasets/flickr30k/images",
74
- ... json_file="../datasets/flickr30k/final_flickr_separateGT_train.json",
75
+ ... img_path="flickr30k/images",
76
+ ... json_file="flickr30k/final_flickr_separateGT_train.json",
75
77
  ... ),
76
78
  ... ],
77
79
  ... ),
@@ -136,7 +138,7 @@ class WorldTrainerFromScratch(WorldTrainer):
136
138
  if d.get("minival") is None: # for lvis dataset
137
139
  continue
138
140
  d["minival"] = str(d["path"] / d["minival"])
139
- for s in ["train", "val"]:
141
+ for s in {"train", "val"}:
140
142
  final_data[s] = [d["train" if s == "train" else val_split] for d in data[s]]
141
143
  # save grounding data if there's one
142
144
  grounding_data = data_yaml[s].get("grounding_data")
@@ -145,8 +147,14 @@ class WorldTrainerFromScratch(WorldTrainer):
145
147
  grounding_data = grounding_data if isinstance(grounding_data, list) else [grounding_data]
146
148
  for g in grounding_data:
147
149
  assert isinstance(g, dict), f"Grounding data should be provided in dict format, but got {type(g)}"
150
+ for k in {"img_path", "json_file"}:
151
+ path = Path(g[k])
152
+ if not path.exists() and not path.is_absolute():
153
+ g[k] = str((DATASETS_DIR / g[k]).resolve()) # path relative to DATASETS_DIR
148
154
  final_data[s] += grounding_data
149
- data["val"] = data["val"][0] # assign the first val dataset as currently only one validation set is supported
155
+ # assign the first val dataset as currently only one validation set is supported
156
+ data["val"] = data["val"][0]
157
+ final_data["val"] = final_data["val"][0]
150
158
  # NOTE: to make training work properly, set `nc` and `names`
151
159
  final_data["nc"] = data["val"]["nc"]
152
160
  final_data["names"] = data["val"]["names"]
@@ -217,7 +217,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
217
217
  cache_path = cache_dir / f"text_embeddings_{model.replace(':', '_').replace('/', '_')}.pt"
218
218
  if cache_path.exists():
219
219
  LOGGER.info(f"Reading existed cache from '{cache_path}'")
220
- txt_map = torch.load(cache_path)
220
+ txt_map = torch.load(cache_path, map_location=self.device)
221
221
  if sorted(txt_map.keys()) == sorted(texts):
222
222
  return txt_map
223
223
  LOGGER.info(f"Caching text embeddings to '{cache_path}'")
@@ -487,7 +487,13 @@ class AutoBackend(nn.Module):
487
487
  # PaddlePaddle
488
488
  elif paddle:
489
489
  LOGGER.info(f"Loading {w} for PaddlePaddle inference...")
490
- check_requirements("paddlepaddle-gpu" if cuda else "paddlepaddle>=3.0.0")
490
+ check_requirements(
491
+ "paddlepaddle-gpu"
492
+ if torch.cuda.is_available()
493
+ else "paddlepaddle==3.0.0" # pin 3.0.0 for ARM64
494
+ if ARM64
495
+ else "paddlepaddle>=3.0.0"
496
+ )
491
497
  import paddle.inference as pdi # noqa
492
498
 
493
499
  w = Path(w)
ultralytics/nn/tasks.py CHANGED
@@ -80,6 +80,7 @@ from ultralytics.utils.loss import (
80
80
  v8SegmentationLoss,
81
81
  )
82
82
  from ultralytics.utils.ops import make_divisible
83
+ from ultralytics.utils.patches import torch_load
83
84
  from ultralytics.utils.plotting import feature_visualization
84
85
  from ultralytics.utils.torch_utils import (
85
86
  fuse_conv_and_bn,
@@ -1441,9 +1442,9 @@ def torch_safe_load(weight, safe_only=False):
1441
1442
  safe_pickle.Unpickler = SafeUnpickler
1442
1443
  safe_pickle.load = lambda file_obj: SafeUnpickler(file_obj).load()
1443
1444
  with open(file, "rb") as f:
1444
- ckpt = torch.load(f, pickle_module=safe_pickle)
1445
+ ckpt = torch_load(f, pickle_module=safe_pickle)
1445
1446
  else:
1446
- ckpt = torch.load(file, map_location="cpu")
1447
+ ckpt = torch_load(file, map_location="cpu")
1447
1448
 
1448
1449
  except ModuleNotFoundError as e: # e.name is missing module name
1449
1450
  if e.name == "models":
@@ -1469,7 +1470,7 @@ def torch_safe_load(weight, safe_only=False):
1469
1470
  f"run a command with an official Ultralytics model, i.e. 'yolo predict model=yolo11n.pt'"
1470
1471
  )
1471
1472
  check_requirements(e.name) # install missing module
1472
- ckpt = torch.load(file, map_location="cpu")
1473
+ ckpt = torch_load(file, map_location="cpu")
1473
1474
 
1474
1475
  if not isinstance(ckpt, dict):
1475
1476
  # File is likely a YOLO instance saved with i.e. torch.save(model, "saved_model.pt")
@@ -9,14 +9,14 @@ from PIL import Image
9
9
 
10
10
  from ultralytics.data.utils import IMG_FORMATS
11
11
  from ultralytics.nn.text_model import build_text_model
12
- from ultralytics.solutions.solutions import BaseSolution
12
+ from ultralytics.utils import LOGGER
13
13
  from ultralytics.utils.checks import check_requirements
14
14
  from ultralytics.utils.torch_utils import select_device
15
15
 
16
16
  os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # Avoid OpenMP conflict on some systems
17
17
 
18
18
 
19
- class VisualAISearch(BaseSolution):
19
+ class VisualAISearch:
20
20
  """
21
21
  A semantic image search system that leverages OpenCLIP for generating high-quality image and text embeddings and
22
22
  FAISS for fast similarity-based retrieval.
@@ -48,19 +48,18 @@ class VisualAISearch(BaseSolution):
48
48
 
49
49
  def __init__(self, **kwargs: Any) -> None:
50
50
  """Initialize the VisualAISearch class with FAISS index and CLIP model."""
51
- super().__init__(**kwargs)
52
51
  check_requirements("faiss-cpu")
53
52
 
54
53
  self.faiss = __import__("faiss")
55
54
  self.faiss_index = "faiss.index"
56
55
  self.data_path_npy = "paths.npy"
57
- self.data_dir = Path(self.CFG["data"])
58
- self.device = select_device(self.CFG["device"])
56
+ self.data_dir = Path(kwargs.get("data", "images"))
57
+ self.device = select_device(kwargs.get("device", "cpu"))
59
58
 
60
59
  if not self.data_dir.exists():
61
60
  from ultralytics.utils import ASSETS_URL
62
61
 
63
- self.LOGGER.warning(f"{self.data_dir} not found. Downloading images.zip from {ASSETS_URL}/images.zip")
62
+ LOGGER.warning(f"{self.data_dir} not found. Downloading images.zip from {ASSETS_URL}/images.zip")
64
63
  from ultralytics.utils.downloads import safe_download
65
64
 
66
65
  safe_download(url=f"{ASSETS_URL}/images.zip", unzip=True, retry=3)
@@ -91,13 +90,13 @@ class VisualAISearch(BaseSolution):
91
90
  """
92
91
  # Check if the FAISS index and corresponding image paths already exist
93
92
  if Path(self.faiss_index).exists() and Path(self.data_path_npy).exists():
94
- self.LOGGER.info("Loading existing FAISS index...")
93
+ LOGGER.info("Loading existing FAISS index...")
95
94
  self.index = self.faiss.read_index(self.faiss_index) # Load the FAISS index from disk
96
95
  self.image_paths = np.load(self.data_path_npy) # Load the saved image path list
97
96
  return # Exit the function as the index is successfully loaded
98
97
 
99
98
  # If the index doesn't exist, start building it from scratch
100
- self.LOGGER.info("Building FAISS index from images...")
99
+ LOGGER.info("Building FAISS index from images...")
101
100
  vectors = [] # List to store feature vectors of images
102
101
 
103
102
  # Iterate over all image files in the data directory
@@ -110,7 +109,7 @@ class VisualAISearch(BaseSolution):
110
109
  vectors.append(self.extract_image_feature(file))
111
110
  self.image_paths.append(file.name) # Store the corresponding image name
112
111
  except Exception as e:
113
- self.LOGGER.warning(f"Skipping {file.name}: {e}")
112
+ LOGGER.warning(f"Skipping {file.name}: {e}")
114
113
 
115
114
  # If no vectors were successfully created, raise an error
116
115
  if not vectors:
@@ -124,7 +123,7 @@ class VisualAISearch(BaseSolution):
124
123
  self.faiss.write_index(self.index, self.faiss_index) # Save the newly built FAISS index to disk
125
124
  np.save(self.data_path_npy, np.array(self.image_paths)) # Save the list of image paths to disk
126
125
 
127
- self.LOGGER.info(f"Indexed {len(self.image_paths)} images.")
126
+ LOGGER.info(f"Indexed {len(self.image_paths)} images.")
128
127
 
129
128
  def search(self, query: str, k: int = 30, similarity_thresh: float = 0.1) -> List[str]:
130
129
  """
@@ -152,9 +151,9 @@ class VisualAISearch(BaseSolution):
152
151
  ]
153
152
  results.sort(key=lambda x: x[1], reverse=True)
154
153
 
155
- self.LOGGER.info("\nRanked Results:")
154
+ LOGGER.info("\nRanked Results:")
156
155
  for name, score in results:
157
- self.LOGGER.info(f" - {name} | Similarity: {score:.4f}")
156
+ LOGGER.info(f" - {name} | Similarity: {score:.4f}")
158
157
 
159
158
  return [r[0] for r in results]
160
159
 
@@ -81,60 +81,59 @@ class BaseSolution:
81
81
  self.CFG = vars(SolutionConfig().update(**kwargs))
82
82
  self.LOGGER = LOGGER # Store logger object to be used in multiple solution classes
83
83
 
84
- if self.__class__.__name__ != "VisualAISearch":
85
- check_requirements("shapely>=2.0.0")
86
- from shapely.geometry import LineString, Point, Polygon
87
- from shapely.prepared import prep
88
-
89
- self.LineString = LineString
90
- self.Polygon = Polygon
91
- self.Point = Point
92
- self.prep = prep
93
- self.annotator = None # Initialize annotator
94
- self.tracks = None
95
- self.track_data = None
96
- self.boxes = []
97
- self.clss = []
98
- self.track_ids = []
99
- self.track_line = None
100
- self.masks = None
101
- self.r_s = None
102
- self.frame_no = -1 # Only for logging
103
-
104
- self.LOGGER.info(f"Ultralytics Solutions: ✅ {self.CFG}")
105
- self.region = self.CFG["region"] # Store region data for other classes usage
106
- self.line_width = self.CFG["line_width"]
107
-
108
- # Load Model and store additional information (classes, show_conf, show_label)
109
- if self.CFG["model"] is None:
110
- self.CFG["model"] = "yolo11n.pt"
111
- self.model = YOLO(self.CFG["model"])
112
- self.names = self.model.names
113
- self.classes = self.CFG["classes"]
114
- self.show_conf = self.CFG["show_conf"]
115
- self.show_labels = self.CFG["show_labels"]
116
- self.device = self.CFG["device"]
117
-
118
- self.track_add_args = { # Tracker additional arguments for advance configuration
119
- k: self.CFG[k] for k in ["iou", "conf", "device", "max_det", "half", "tracker"]
120
- } # verbose must be passed to track method; setting it False in YOLO still logs the track information.
121
-
122
- if is_cli and self.CFG["source"] is None:
123
- d_s = "solutions_ci_demo.mp4" if "-pose" not in self.CFG["model"] else "solution_ci_pose_demo.mp4"
124
- self.LOGGER.warning(f"source not provided. using default source {ASSETS_URL}/{d_s}")
125
- from ultralytics.utils.downloads import safe_download
126
-
127
- safe_download(f"{ASSETS_URL}/{d_s}") # download source from ultralytics assets
128
- self.CFG["source"] = d_s # set default source
129
-
130
- # Initialize environment and region setup
131
- self.env_check = check_imshow(warn=True)
132
- self.track_history = defaultdict(list)
133
-
134
- self.profilers = (
135
- ops.Profile(device=self.device), # track
136
- ops.Profile(device=self.device), # solution
137
- )
84
+ check_requirements("shapely>=2.0.0")
85
+ from shapely.geometry import LineString, Point, Polygon
86
+ from shapely.prepared import prep
87
+
88
+ self.LineString = LineString
89
+ self.Polygon = Polygon
90
+ self.Point = Point
91
+ self.prep = prep
92
+ self.annotator = None # Initialize annotator
93
+ self.tracks = None
94
+ self.track_data = None
95
+ self.boxes = []
96
+ self.clss = []
97
+ self.track_ids = []
98
+ self.track_line = None
99
+ self.masks = None
100
+ self.r_s = None
101
+ self.frame_no = -1 # Only for logging
102
+
103
+ self.LOGGER.info(f"Ultralytics Solutions: ✅ {self.CFG}")
104
+ self.region = self.CFG["region"] # Store region data for other classes usage
105
+ self.line_width = self.CFG["line_width"]
106
+
107
+ # Load Model and store additional information (classes, show_conf, show_label)
108
+ if self.CFG["model"] is None:
109
+ self.CFG["model"] = "yolo11n.pt"
110
+ self.model = YOLO(self.CFG["model"])
111
+ self.names = self.model.names
112
+ self.classes = self.CFG["classes"]
113
+ self.show_conf = self.CFG["show_conf"]
114
+ self.show_labels = self.CFG["show_labels"]
115
+ self.device = self.CFG["device"]
116
+
117
+ self.track_add_args = { # Tracker additional arguments for advance configuration
118
+ k: self.CFG[k] for k in {"iou", "conf", "device", "max_det", "half", "tracker"}
119
+ } # verbose must be passed to track method; setting it False in YOLO still logs the track information.
120
+
121
+ if is_cli and self.CFG["source"] is None:
122
+ d_s = "solutions_ci_demo.mp4" if "-pose" not in self.CFG["model"] else "solution_ci_pose_demo.mp4"
123
+ self.LOGGER.warning(f"source not provided. using default source {ASSETS_URL}/{d_s}")
124
+ from ultralytics.utils.downloads import safe_download
125
+
126
+ safe_download(f"{ASSETS_URL}/{d_s}") # download source from ultralytics assets
127
+ self.CFG["source"] = d_s # set default source
128
+
129
+ # Initialize environment and region setup
130
+ self.env_check = check_imshow(warn=True)
131
+ self.track_history = defaultdict(list)
132
+
133
+ self.profilers = (
134
+ ops.Profile(device=self.device), # track
135
+ ops.Profile(device=self.device), # solution
136
+ )
138
137
 
139
138
  def adjust_box_label(self, cls: int, conf: float, track_id: Optional[int] = None) -> Optional[str]:
140
139
  """