dgenerate-ultralytics-headless 8.3.160__py3-none-any.whl → 8.3.162__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.160.dist-info → dgenerate_ultralytics_headless-8.3.162.dist-info}/METADATA +9 -1
- {dgenerate_ultralytics_headless-8.3.160.dist-info → dgenerate_ultralytics_headless-8.3.162.dist-info}/RECORD +67 -67
- tests/conftest.py +2 -2
- tests/test_python.py +4 -3
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/datasets/Argoverse.yaml +1 -1
- ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
- ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
- ultralytics/cfg/datasets/GlobalWheat2020.yaml +1 -1
- ultralytics/cfg/datasets/HomeObjects-3K.yaml +1 -1
- ultralytics/cfg/datasets/ImageNet.yaml +1 -1
- ultralytics/cfg/datasets/Objects365.yaml +1 -1
- ultralytics/cfg/datasets/SKU-110K.yaml +1 -1
- ultralytics/cfg/datasets/VOC.yaml +1 -1
- ultralytics/cfg/datasets/VisDrone.yaml +6 -3
- ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
- ultralytics/cfg/datasets/brain-tumor.yaml +1 -1
- ultralytics/cfg/datasets/carparts-seg.yaml +1 -1
- ultralytics/cfg/datasets/coco-pose.yaml +1 -1
- ultralytics/cfg/datasets/coco.yaml +1 -1
- ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
- ultralytics/cfg/datasets/coco128.yaml +1 -1
- ultralytics/cfg/datasets/coco8-grayscale.yaml +1 -1
- ultralytics/cfg/datasets/coco8-multispectral.yaml +1 -1
- ultralytics/cfg/datasets/coco8-pose.yaml +1 -1
- ultralytics/cfg/datasets/coco8-seg.yaml +1 -1
- ultralytics/cfg/datasets/coco8.yaml +1 -1
- ultralytics/cfg/datasets/crack-seg.yaml +1 -1
- ultralytics/cfg/datasets/dog-pose.yaml +1 -1
- ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
- ultralytics/cfg/datasets/dota8.yaml +1 -1
- ultralytics/cfg/datasets/hand-keypoints.yaml +1 -1
- ultralytics/cfg/datasets/lvis.yaml +1 -1
- ultralytics/cfg/datasets/medical-pills.yaml +1 -1
- ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
- ultralytics/cfg/datasets/package-seg.yaml +1 -1
- ultralytics/cfg/datasets/signature.yaml +1 -1
- ultralytics/cfg/datasets/tiger-pose.yaml +1 -1
- ultralytics/cfg/datasets/xView.yaml +1 -1
- ultralytics/data/augment.py +2 -0
- ultralytics/data/converter.py +5 -7
- ultralytics/data/dataset.py +1 -1
- ultralytics/data/split.py +1 -1
- ultralytics/data/split_dota.py +1 -1
- ultralytics/engine/exporter.py +15 -5
- ultralytics/engine/results.py +1 -1
- ultralytics/engine/tuner.py +2 -2
- ultralytics/models/nas/model.py +2 -1
- ultralytics/models/sam/modules/tiny_encoder.py +1 -1
- ultralytics/models/yolo/detect/val.py +1 -1
- ultralytics/models/yolo/world/train.py +1 -1
- ultralytics/models/yolo/world/train_world.py +17 -9
- ultralytics/models/yolo/yoloe/train.py +1 -1
- ultralytics/nn/autobackend.py +7 -1
- ultralytics/nn/tasks.py +4 -3
- ultralytics/solutions/similarity_search.py +11 -12
- ultralytics/solutions/solutions.py +53 -54
- ultralytics/utils/__init__.py +1 -2
- ultralytics/utils/checks.py +21 -0
- ultralytics/utils/metrics.py +10 -9
- ultralytics/utils/patches.py +1 -2
- ultralytics/utils/plotting.py +2 -2
- ultralytics/utils/torch_utils.py +2 -1
- {dgenerate_ultralytics_headless-8.3.160.dist-info → dgenerate_ultralytics_headless-8.3.162.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.160.dist-info → dgenerate_ultralytics_headless-8.3.162.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.160.dist-info → dgenerate_ultralytics_headless-8.3.162.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.160.dist-info → dgenerate_ultralytics_headless-8.3.162.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,7 @@
|
|
9
9
|
# └── hand-keypoints ← downloads here (369 MB)
|
10
10
|
|
11
11
|
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
12
|
-
path:
|
12
|
+
path: hand-keypoints # dataset root dir
|
13
13
|
train: train # train images (relative to 'path') 18776 images
|
14
14
|
val: val # val images (relative to 'path') 7992 images
|
15
15
|
|
@@ -9,7 +9,7 @@
|
|
9
9
|
# └── lvis ← downloads here (20.1 GB)
|
10
10
|
|
11
11
|
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
12
|
-
path:
|
12
|
+
path: lvis # dataset root dir
|
13
13
|
train: train.txt # train images (relative to 'path') 100170 images
|
14
14
|
val: val.txt # val images (relative to 'path') 19809 images
|
15
15
|
minival: minival.txt # minival images (relative to 'path') 5000 images
|
@@ -9,7 +9,7 @@
|
|
9
9
|
# └── medical-pills ← downloads here (8.19 MB)
|
10
10
|
|
11
11
|
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
12
|
-
path:
|
12
|
+
path: medical-pills # dataset root dir
|
13
13
|
train: train/images # train images (relative to 'path') 92 images
|
14
14
|
val: valid/images # val images (relative to 'path') 23 images
|
15
15
|
test: # test images (relative to 'path')
|
@@ -9,7 +9,7 @@
|
|
9
9
|
# └── open-images-v7 ← downloads here (561 GB)
|
10
10
|
|
11
11
|
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
12
|
-
path:
|
12
|
+
path: open-images-v7 # dataset root dir
|
13
13
|
train: images/train # train images (relative to 'path') 1743042 images
|
14
14
|
val: images/val # val images (relative to 'path') 41620 images
|
15
15
|
test: # test images (optional)
|
@@ -9,7 +9,7 @@
|
|
9
9
|
# └── package-seg ← downloads here (102 MB)
|
10
10
|
|
11
11
|
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
12
|
-
path:
|
12
|
+
path: package-seg # dataset root dir
|
13
13
|
train: train/images # train images (relative to 'path') 1920 images
|
14
14
|
val: valid/images # val images (relative to 'path') 89 images
|
15
15
|
test: test/images # test images (relative to 'path') 188 images
|
@@ -9,7 +9,7 @@
|
|
9
9
|
# └── signature ← downloads here (11.2 MB)
|
10
10
|
|
11
11
|
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
12
|
-
path:
|
12
|
+
path: signature # dataset root dir
|
13
13
|
train: train/images # train images (relative to 'path') 143 images
|
14
14
|
val: valid/images # val images (relative to 'path') 35 images
|
15
15
|
|
@@ -9,7 +9,7 @@
|
|
9
9
|
# └── tiger-pose ← downloads here (75.3 MB)
|
10
10
|
|
11
11
|
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
12
|
-
path:
|
12
|
+
path: tiger-pose # dataset root dir
|
13
13
|
train: train # train images (relative to 'path') 210 images
|
14
14
|
val: val # val images (relative to 'path') 53 images
|
15
15
|
|
@@ -10,7 +10,7 @@
|
|
10
10
|
# └── xView ← downloads here (20.7 GB)
|
11
11
|
|
12
12
|
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
13
|
-
path:
|
13
|
+
path: xView # dataset root dir
|
14
14
|
train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
|
15
15
|
val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images
|
16
16
|
|
ultralytics/data/augment.py
CHANGED
@@ -1805,6 +1805,8 @@ class CopyPaste(BaseMixTransform):
|
|
1805
1805
|
def _transform(self, labels1, labels2={}):
|
1806
1806
|
"""Apply Copy-Paste augmentation to combine objects from another image into the current image."""
|
1807
1807
|
im = labels1["img"]
|
1808
|
+
if "mosaic_border" not in labels1:
|
1809
|
+
im = im.copy() # avoid modifying original non-mosaic image
|
1808
1810
|
cls = labels1["cls"]
|
1809
1811
|
h, w = im.shape[:2]
|
1810
1812
|
instances = labels1.pop("instances")
|
ultralytics/data/converter.py
CHANGED
@@ -248,12 +248,10 @@ def convert_coco(
|
|
248
248
|
>>> from ultralytics.data.converter import convert_coco
|
249
249
|
|
250
250
|
Convert COCO annotations to YOLO format
|
251
|
-
>>> convert_coco("
|
251
|
+
>>> convert_coco("coco/annotations/", use_segments=True, use_keypoints=False, cls91to80=False)
|
252
252
|
|
253
253
|
Convert LVIS annotations to YOLO format
|
254
|
-
>>> convert_coco(
|
255
|
-
... "../datasets/lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True
|
256
|
-
... )
|
254
|
+
>>> convert_coco("lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True)
|
257
255
|
"""
|
258
256
|
# Create dataset directory
|
259
257
|
save_dir = increment_path(save_dir) # increment if save directory already exists
|
@@ -498,7 +496,7 @@ def convert_dota_to_yolo_obb(dota_root_path: str):
|
|
498
496
|
formatted_coords = [f"{coord:.6g}" for coord in normalized_coords]
|
499
497
|
g.write(f"{class_idx} {' '.join(formatted_coords)}\n")
|
500
498
|
|
501
|
-
for phase in
|
499
|
+
for phase in {"train", "val"}:
|
502
500
|
image_dir = dota_root_path / "images" / phase
|
503
501
|
orig_label_dir = dota_root_path / "labels" / f"{phase}_original"
|
504
502
|
save_dir = dota_root_path / "labels" / phase
|
@@ -686,7 +684,7 @@ def create_synthetic_coco_dataset():
|
|
686
684
|
# Create synthetic images
|
687
685
|
shutil.rmtree(dir / "labels" / "test2017", ignore_errors=True) # Remove test2017 directory as not needed
|
688
686
|
with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
|
689
|
-
for subset in
|
687
|
+
for subset in {"train2017", "val2017"}:
|
690
688
|
subset_dir = dir / "images" / subset
|
691
689
|
subset_dir.mkdir(parents=True, exist_ok=True)
|
692
690
|
|
@@ -724,7 +722,7 @@ def convert_to_multispectral(path: Union[str, Path], n_channels: int = 10, repla
|
|
724
722
|
>>> convert_to_multispectral("path/to/image.jpg", n_channels=10)
|
725
723
|
|
726
724
|
Convert a dataset
|
727
|
-
>>> convert_to_multispectral("
|
725
|
+
>>> convert_to_multispectral("coco8", n_channels=10)
|
728
726
|
"""
|
729
727
|
from scipy.interpolate import interp1d
|
730
728
|
|
ultralytics/data/dataset.py
CHANGED
@@ -482,7 +482,7 @@ class GroundingDataset(YOLODataset):
|
|
482
482
|
a warning is logged and verification is skipped.
|
483
483
|
"""
|
484
484
|
expected_counts = {
|
485
|
-
"final_mixed_train_no_coco_segm":
|
485
|
+
"final_mixed_train_no_coco_segm": 3662412,
|
486
486
|
"final_mixed_train_no_coco": 3681235,
|
487
487
|
"final_flickr_separateGT_train_segm": 638214,
|
488
488
|
"final_flickr_separateGT_train": 640704,
|
ultralytics/data/split.py
CHANGED
ultralytics/data/split_dota.py
CHANGED
@@ -295,7 +295,7 @@ def split_trainval(
|
|
295
295
|
for r in rates:
|
296
296
|
crop_sizes.append(int(crop_size / r))
|
297
297
|
gaps.append(int(gap / r))
|
298
|
-
for split in
|
298
|
+
for split in {"train", "val"}:
|
299
299
|
split_images_and_labels(data_root, save_dir, split, crop_sizes, gaps)
|
300
300
|
|
301
301
|
|
ultralytics/engine/exporter.py
CHANGED
@@ -100,6 +100,7 @@ from ultralytics.utils.checks import (
|
|
100
100
|
check_is_path_safe,
|
101
101
|
check_requirements,
|
102
102
|
check_version,
|
103
|
+
is_intel,
|
103
104
|
is_sudo_available,
|
104
105
|
)
|
105
106
|
from ultralytics.utils.downloads import attempt_download_asset, get_github_assets, safe_download
|
@@ -107,7 +108,7 @@ from ultralytics.utils.export import export_engine, export_onnx
|
|
107
108
|
from ultralytics.utils.files import file_size, spaces_in_path
|
108
109
|
from ultralytics.utils.ops import Profile, nms_rotated
|
109
110
|
from ultralytics.utils.patches import arange_patch
|
110
|
-
from ultralytics.utils.torch_utils import TORCH_1_13,
|
111
|
+
from ultralytics.utils.torch_utils import TORCH_1_13, get_latest_opset, select_device
|
111
112
|
|
112
113
|
|
113
114
|
def export_formats():
|
@@ -372,9 +373,9 @@ class Exporter:
|
|
372
373
|
raise SystemError("TF.js exports are not currently supported on ARM64 Linux")
|
373
374
|
# Recommend OpenVINO if export and Intel CPU
|
374
375
|
if SETTINGS.get("openvino_msg"):
|
375
|
-
if
|
376
|
+
if is_intel():
|
376
377
|
LOGGER.info(
|
377
|
-
"💡 ProTip: Export to OpenVINO format for best performance on Intel
|
378
|
+
"💡 ProTip: Export to OpenVINO format for best performance on Intel hardware."
|
378
379
|
" Learn more at https://docs.ultralytics.com/integrations/openvino/"
|
379
380
|
)
|
380
381
|
SETTINGS["openvino_msg"] = False
|
@@ -706,7 +707,16 @@ class Exporter:
|
|
706
707
|
def export_paddle(self, prefix=colorstr("PaddlePaddle:")):
|
707
708
|
"""Export YOLO model to PaddlePaddle format."""
|
708
709
|
assert not IS_JETSON, "Jetson Paddle exports not supported yet"
|
709
|
-
check_requirements(
|
710
|
+
check_requirements(
|
711
|
+
(
|
712
|
+
"paddlepaddle-gpu"
|
713
|
+
if torch.cuda.is_available()
|
714
|
+
else "paddlepaddle==3.0.0" # pin 3.0.0 for ARM64
|
715
|
+
if ARM64
|
716
|
+
else "paddlepaddle>=3.0.0",
|
717
|
+
"x2paddle",
|
718
|
+
)
|
719
|
+
)
|
710
720
|
import x2paddle # noqa
|
711
721
|
from x2paddle.convert import pytorch2paddle # noqa
|
712
722
|
|
@@ -939,7 +949,7 @@ class Exporter:
|
|
939
949
|
"tf_keras", # required by 'onnx2tf' package
|
940
950
|
"sng4onnx>=1.0.1", # required by 'onnx2tf' package
|
941
951
|
"onnx_graphsurgeon>=0.3.26", # required by 'onnx2tf' package
|
942
|
-
"ai-edge-litert>=1.2.0", # required by 'onnx2tf' package
|
952
|
+
"ai-edge-litert>=1.2.0,<1.4.0", # required by 'onnx2tf' package
|
943
953
|
"onnx>=1.12.0,<1.18.0",
|
944
954
|
"onnx2tf>=1.26.3",
|
945
955
|
"onnxslim>=0.1.56",
|
ultralytics/engine/results.py
CHANGED
@@ -800,7 +800,7 @@ class Results(SimpleClass, DataExportMixin):
|
|
800
800
|
decimals (int): Number of decimal places to round the output values to.
|
801
801
|
|
802
802
|
Returns:
|
803
|
-
(List[Dict]): A list of dictionaries, each containing summarized information for a single detection
|
803
|
+
(List[Dict[str, Any]]): A list of dictionaries, each containing summarized information for a single detection
|
804
804
|
or classification result. The structure of each dictionary varies based on the task type
|
805
805
|
(classification or detection) and available information (boxes, masks, keypoints).
|
806
806
|
|
ultralytics/engine/tuner.py
CHANGED
@@ -21,10 +21,10 @@ import time
|
|
21
21
|
from typing import Dict, List, Optional
|
22
22
|
|
23
23
|
import numpy as np
|
24
|
-
import torch
|
25
24
|
|
26
25
|
from ultralytics.cfg import get_cfg, get_save_dir
|
27
26
|
from ultralytics.utils import DEFAULT_CFG, LOGGER, YAML, callbacks, colorstr, remove_colorstr
|
27
|
+
from ultralytics.utils.patches import torch_load
|
28
28
|
from ultralytics.utils.plotting import plot_tune_results
|
29
29
|
|
30
30
|
|
@@ -198,7 +198,7 @@ class Tuner:
|
|
198
198
|
cmd = [*launch, "train", *(f"{k}={v}" for k, v in train_args.items())]
|
199
199
|
return_code = subprocess.run(cmd, check=True).returncode
|
200
200
|
ckpt_file = weights_dir / ("best.pt" if (weights_dir / "best.pt").exists() else "last.pt")
|
201
|
-
metrics =
|
201
|
+
metrics = torch_load(ckpt_file)["train_metrics"]
|
202
202
|
assert return_code == 0, "training failed"
|
203
203
|
|
204
204
|
except Exception as e:
|
ultralytics/models/nas/model.py
CHANGED
@@ -8,6 +8,7 @@ import torch
|
|
8
8
|
from ultralytics.engine.model import Model
|
9
9
|
from ultralytics.utils import DEFAULT_CFG_DICT
|
10
10
|
from ultralytics.utils.downloads import attempt_download_asset
|
11
|
+
from ultralytics.utils.patches import torch_load
|
11
12
|
from ultralytics.utils.torch_utils import model_info
|
12
13
|
|
13
14
|
from .predict import NASPredictor
|
@@ -56,7 +57,7 @@ class NAS(Model):
|
|
56
57
|
|
57
58
|
suffix = Path(weights).suffix
|
58
59
|
if suffix == ".pt":
|
59
|
-
self.model =
|
60
|
+
self.model = torch_load(attempt_download_asset(weights))
|
60
61
|
elif suffix == "":
|
61
62
|
self.model = super_gradients.training.models.get(weights, pretrained_weights="coco")
|
62
63
|
|
@@ -931,7 +931,7 @@ class TinyViT(nn.Module):
|
|
931
931
|
if layer.downsample is not None:
|
932
932
|
layer.downsample.apply(lambda x: _set_lr_scale(x, lr_scales[i - 1]))
|
933
933
|
assert i == depth
|
934
|
-
for m in
|
934
|
+
for m in {self.norm_head, self.head}:
|
935
935
|
m.apply(lambda x: _set_lr_scale(x, lr_scales[-1]))
|
936
936
|
|
937
937
|
for k, p in self.named_parameters():
|
@@ -71,7 +71,7 @@ class DetectionValidator(BaseValidator):
|
|
71
71
|
"""
|
72
72
|
batch["img"] = batch["img"].to(self.device, non_blocking=True)
|
73
73
|
batch["img"] = (batch["img"].half() if self.args.half else batch["img"].float()) / 255
|
74
|
-
for k in
|
74
|
+
for k in {"batch_idx", "cls", "bboxes"}:
|
75
75
|
batch[k] = batch[k].to(self.device)
|
76
76
|
|
77
77
|
return batch
|
@@ -153,7 +153,7 @@ class WorldTrainer(DetectionTrainer):
|
|
153
153
|
cache_path = cache_dir / f"text_embeddings_{model.replace(':', '_').replace('/', '_')}.pt"
|
154
154
|
if cache_path.exists():
|
155
155
|
LOGGER.info(f"Reading existed cache from '{cache_path}'")
|
156
|
-
txt_map = torch.load(cache_path)
|
156
|
+
txt_map = torch.load(cache_path, map_location=self.device)
|
157
157
|
if sorted(txt_map.keys()) == sorted(texts):
|
158
158
|
return txt_map
|
159
159
|
LOGGER.info(f"Caching text embeddings to '{cache_path}'")
|
@@ -1,9 +1,11 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from pathlib import Path
|
4
|
+
|
3
5
|
from ultralytics.data import YOLOConcatDataset, build_grounding, build_yolo_dataset
|
4
6
|
from ultralytics.data.utils import check_det_dataset
|
5
7
|
from ultralytics.models.yolo.world import WorldTrainer
|
6
|
-
from ultralytics.utils import DEFAULT_CFG, LOGGER
|
8
|
+
from ultralytics.utils import DATASETS_DIR, DEFAULT_CFG, LOGGER
|
7
9
|
from ultralytics.utils.torch_utils import de_parallel
|
8
10
|
|
9
11
|
|
@@ -35,12 +37,12 @@ class WorldTrainerFromScratch(WorldTrainer):
|
|
35
37
|
... yolo_data=["Objects365.yaml"],
|
36
38
|
... grounding_data=[
|
37
39
|
... dict(
|
38
|
-
... img_path="
|
39
|
-
... json_file="
|
40
|
+
... img_path="flickr30k/images",
|
41
|
+
... json_file="flickr30k/final_flickr_separateGT_train.json",
|
40
42
|
... ),
|
41
43
|
... dict(
|
42
|
-
... img_path="
|
43
|
-
... json_file="
|
44
|
+
... img_path="GQA/images",
|
45
|
+
... json_file="GQA/final_mixed_train_no_coco.json",
|
44
46
|
... ),
|
45
47
|
... ],
|
46
48
|
... ),
|
@@ -70,8 +72,8 @@ class WorldTrainerFromScratch(WorldTrainer):
|
|
70
72
|
... yolo_data=["Objects365.yaml"],
|
71
73
|
... grounding_data=[
|
72
74
|
... dict(
|
73
|
-
... img_path="
|
74
|
-
... json_file="
|
75
|
+
... img_path="flickr30k/images",
|
76
|
+
... json_file="flickr30k/final_flickr_separateGT_train.json",
|
75
77
|
... ),
|
76
78
|
... ],
|
77
79
|
... ),
|
@@ -136,7 +138,7 @@ class WorldTrainerFromScratch(WorldTrainer):
|
|
136
138
|
if d.get("minival") is None: # for lvis dataset
|
137
139
|
continue
|
138
140
|
d["minival"] = str(d["path"] / d["minival"])
|
139
|
-
for s in
|
141
|
+
for s in {"train", "val"}:
|
140
142
|
final_data[s] = [d["train" if s == "train" else val_split] for d in data[s]]
|
141
143
|
# save grounding data if there's one
|
142
144
|
grounding_data = data_yaml[s].get("grounding_data")
|
@@ -145,8 +147,14 @@ class WorldTrainerFromScratch(WorldTrainer):
|
|
145
147
|
grounding_data = grounding_data if isinstance(grounding_data, list) else [grounding_data]
|
146
148
|
for g in grounding_data:
|
147
149
|
assert isinstance(g, dict), f"Grounding data should be provided in dict format, but got {type(g)}"
|
150
|
+
for k in {"img_path", "json_file"}:
|
151
|
+
path = Path(g[k])
|
152
|
+
if not path.exists() and not path.is_absolute():
|
153
|
+
g[k] = str((DATASETS_DIR / g[k]).resolve()) # path relative to DATASETS_DIR
|
148
154
|
final_data[s] += grounding_data
|
149
|
-
|
155
|
+
# assign the first val dataset as currently only one validation set is supported
|
156
|
+
data["val"] = data["val"][0]
|
157
|
+
final_data["val"] = final_data["val"][0]
|
150
158
|
# NOTE: to make training work properly, set `nc` and `names`
|
151
159
|
final_data["nc"] = data["val"]["nc"]
|
152
160
|
final_data["names"] = data["val"]["names"]
|
@@ -217,7 +217,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
|
|
217
217
|
cache_path = cache_dir / f"text_embeddings_{model.replace(':', '_').replace('/', '_')}.pt"
|
218
218
|
if cache_path.exists():
|
219
219
|
LOGGER.info(f"Reading existed cache from '{cache_path}'")
|
220
|
-
txt_map = torch.load(cache_path)
|
220
|
+
txt_map = torch.load(cache_path, map_location=self.device)
|
221
221
|
if sorted(txt_map.keys()) == sorted(texts):
|
222
222
|
return txt_map
|
223
223
|
LOGGER.info(f"Caching text embeddings to '{cache_path}'")
|
ultralytics/nn/autobackend.py
CHANGED
@@ -487,7 +487,13 @@ class AutoBackend(nn.Module):
|
|
487
487
|
# PaddlePaddle
|
488
488
|
elif paddle:
|
489
489
|
LOGGER.info(f"Loading {w} for PaddlePaddle inference...")
|
490
|
-
check_requirements(
|
490
|
+
check_requirements(
|
491
|
+
"paddlepaddle-gpu"
|
492
|
+
if torch.cuda.is_available()
|
493
|
+
else "paddlepaddle==3.0.0" # pin 3.0.0 for ARM64
|
494
|
+
if ARM64
|
495
|
+
else "paddlepaddle>=3.0.0"
|
496
|
+
)
|
491
497
|
import paddle.inference as pdi # noqa
|
492
498
|
|
493
499
|
w = Path(w)
|
ultralytics/nn/tasks.py
CHANGED
@@ -80,6 +80,7 @@ from ultralytics.utils.loss import (
|
|
80
80
|
v8SegmentationLoss,
|
81
81
|
)
|
82
82
|
from ultralytics.utils.ops import make_divisible
|
83
|
+
from ultralytics.utils.patches import torch_load
|
83
84
|
from ultralytics.utils.plotting import feature_visualization
|
84
85
|
from ultralytics.utils.torch_utils import (
|
85
86
|
fuse_conv_and_bn,
|
@@ -1441,9 +1442,9 @@ def torch_safe_load(weight, safe_only=False):
|
|
1441
1442
|
safe_pickle.Unpickler = SafeUnpickler
|
1442
1443
|
safe_pickle.load = lambda file_obj: SafeUnpickler(file_obj).load()
|
1443
1444
|
with open(file, "rb") as f:
|
1444
|
-
ckpt =
|
1445
|
+
ckpt = torch_load(f, pickle_module=safe_pickle)
|
1445
1446
|
else:
|
1446
|
-
ckpt =
|
1447
|
+
ckpt = torch_load(file, map_location="cpu")
|
1447
1448
|
|
1448
1449
|
except ModuleNotFoundError as e: # e.name is missing module name
|
1449
1450
|
if e.name == "models":
|
@@ -1469,7 +1470,7 @@ def torch_safe_load(weight, safe_only=False):
|
|
1469
1470
|
f"run a command with an official Ultralytics model, i.e. 'yolo predict model=yolo11n.pt'"
|
1470
1471
|
)
|
1471
1472
|
check_requirements(e.name) # install missing module
|
1472
|
-
ckpt =
|
1473
|
+
ckpt = torch_load(file, map_location="cpu")
|
1473
1474
|
|
1474
1475
|
if not isinstance(ckpt, dict):
|
1475
1476
|
# File is likely a YOLO instance saved with i.e. torch.save(model, "saved_model.pt")
|
@@ -9,14 +9,14 @@ from PIL import Image
|
|
9
9
|
|
10
10
|
from ultralytics.data.utils import IMG_FORMATS
|
11
11
|
from ultralytics.nn.text_model import build_text_model
|
12
|
-
from ultralytics.
|
12
|
+
from ultralytics.utils import LOGGER
|
13
13
|
from ultralytics.utils.checks import check_requirements
|
14
14
|
from ultralytics.utils.torch_utils import select_device
|
15
15
|
|
16
16
|
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # Avoid OpenMP conflict on some systems
|
17
17
|
|
18
18
|
|
19
|
-
class VisualAISearch
|
19
|
+
class VisualAISearch:
|
20
20
|
"""
|
21
21
|
A semantic image search system that leverages OpenCLIP for generating high-quality image and text embeddings and
|
22
22
|
FAISS for fast similarity-based retrieval.
|
@@ -48,19 +48,18 @@ class VisualAISearch(BaseSolution):
|
|
48
48
|
|
49
49
|
def __init__(self, **kwargs: Any) -> None:
|
50
50
|
"""Initialize the VisualAISearch class with FAISS index and CLIP model."""
|
51
|
-
super().__init__(**kwargs)
|
52
51
|
check_requirements("faiss-cpu")
|
53
52
|
|
54
53
|
self.faiss = __import__("faiss")
|
55
54
|
self.faiss_index = "faiss.index"
|
56
55
|
self.data_path_npy = "paths.npy"
|
57
|
-
self.data_dir = Path(
|
58
|
-
self.device = select_device(
|
56
|
+
self.data_dir = Path(kwargs.get("data", "images"))
|
57
|
+
self.device = select_device(kwargs.get("device", "cpu"))
|
59
58
|
|
60
59
|
if not self.data_dir.exists():
|
61
60
|
from ultralytics.utils import ASSETS_URL
|
62
61
|
|
63
|
-
|
62
|
+
LOGGER.warning(f"{self.data_dir} not found. Downloading images.zip from {ASSETS_URL}/images.zip")
|
64
63
|
from ultralytics.utils.downloads import safe_download
|
65
64
|
|
66
65
|
safe_download(url=f"{ASSETS_URL}/images.zip", unzip=True, retry=3)
|
@@ -91,13 +90,13 @@ class VisualAISearch(BaseSolution):
|
|
91
90
|
"""
|
92
91
|
# Check if the FAISS index and corresponding image paths already exist
|
93
92
|
if Path(self.faiss_index).exists() and Path(self.data_path_npy).exists():
|
94
|
-
|
93
|
+
LOGGER.info("Loading existing FAISS index...")
|
95
94
|
self.index = self.faiss.read_index(self.faiss_index) # Load the FAISS index from disk
|
96
95
|
self.image_paths = np.load(self.data_path_npy) # Load the saved image path list
|
97
96
|
return # Exit the function as the index is successfully loaded
|
98
97
|
|
99
98
|
# If the index doesn't exist, start building it from scratch
|
100
|
-
|
99
|
+
LOGGER.info("Building FAISS index from images...")
|
101
100
|
vectors = [] # List to store feature vectors of images
|
102
101
|
|
103
102
|
# Iterate over all image files in the data directory
|
@@ -110,7 +109,7 @@ class VisualAISearch(BaseSolution):
|
|
110
109
|
vectors.append(self.extract_image_feature(file))
|
111
110
|
self.image_paths.append(file.name) # Store the corresponding image name
|
112
111
|
except Exception as e:
|
113
|
-
|
112
|
+
LOGGER.warning(f"Skipping {file.name}: {e}")
|
114
113
|
|
115
114
|
# If no vectors were successfully created, raise an error
|
116
115
|
if not vectors:
|
@@ -124,7 +123,7 @@ class VisualAISearch(BaseSolution):
|
|
124
123
|
self.faiss.write_index(self.index, self.faiss_index) # Save the newly built FAISS index to disk
|
125
124
|
np.save(self.data_path_npy, np.array(self.image_paths)) # Save the list of image paths to disk
|
126
125
|
|
127
|
-
|
126
|
+
LOGGER.info(f"Indexed {len(self.image_paths)} images.")
|
128
127
|
|
129
128
|
def search(self, query: str, k: int = 30, similarity_thresh: float = 0.1) -> List[str]:
|
130
129
|
"""
|
@@ -152,9 +151,9 @@ class VisualAISearch(BaseSolution):
|
|
152
151
|
]
|
153
152
|
results.sort(key=lambda x: x[1], reverse=True)
|
154
153
|
|
155
|
-
|
154
|
+
LOGGER.info("\nRanked Results:")
|
156
155
|
for name, score in results:
|
157
|
-
|
156
|
+
LOGGER.info(f" - {name} | Similarity: {score:.4f}")
|
158
157
|
|
159
158
|
return [r[0] for r in results]
|
160
159
|
|
@@ -81,60 +81,59 @@ class BaseSolution:
|
|
81
81
|
self.CFG = vars(SolutionConfig().update(**kwargs))
|
82
82
|
self.LOGGER = LOGGER # Store logger object to be used in multiple solution classes
|
83
83
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
self.
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
if
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
self.
|
135
|
-
|
136
|
-
|
137
|
-
)
|
84
|
+
check_requirements("shapely>=2.0.0")
|
85
|
+
from shapely.geometry import LineString, Point, Polygon
|
86
|
+
from shapely.prepared import prep
|
87
|
+
|
88
|
+
self.LineString = LineString
|
89
|
+
self.Polygon = Polygon
|
90
|
+
self.Point = Point
|
91
|
+
self.prep = prep
|
92
|
+
self.annotator = None # Initialize annotator
|
93
|
+
self.tracks = None
|
94
|
+
self.track_data = None
|
95
|
+
self.boxes = []
|
96
|
+
self.clss = []
|
97
|
+
self.track_ids = []
|
98
|
+
self.track_line = None
|
99
|
+
self.masks = None
|
100
|
+
self.r_s = None
|
101
|
+
self.frame_no = -1 # Only for logging
|
102
|
+
|
103
|
+
self.LOGGER.info(f"Ultralytics Solutions: ✅ {self.CFG}")
|
104
|
+
self.region = self.CFG["region"] # Store region data for other classes usage
|
105
|
+
self.line_width = self.CFG["line_width"]
|
106
|
+
|
107
|
+
# Load Model and store additional information (classes, show_conf, show_label)
|
108
|
+
if self.CFG["model"] is None:
|
109
|
+
self.CFG["model"] = "yolo11n.pt"
|
110
|
+
self.model = YOLO(self.CFG["model"])
|
111
|
+
self.names = self.model.names
|
112
|
+
self.classes = self.CFG["classes"]
|
113
|
+
self.show_conf = self.CFG["show_conf"]
|
114
|
+
self.show_labels = self.CFG["show_labels"]
|
115
|
+
self.device = self.CFG["device"]
|
116
|
+
|
117
|
+
self.track_add_args = { # Tracker additional arguments for advance configuration
|
118
|
+
k: self.CFG[k] for k in {"iou", "conf", "device", "max_det", "half", "tracker"}
|
119
|
+
} # verbose must be passed to track method; setting it False in YOLO still logs the track information.
|
120
|
+
|
121
|
+
if is_cli and self.CFG["source"] is None:
|
122
|
+
d_s = "solutions_ci_demo.mp4" if "-pose" not in self.CFG["model"] else "solution_ci_pose_demo.mp4"
|
123
|
+
self.LOGGER.warning(f"source not provided. using default source {ASSETS_URL}/{d_s}")
|
124
|
+
from ultralytics.utils.downloads import safe_download
|
125
|
+
|
126
|
+
safe_download(f"{ASSETS_URL}/{d_s}") # download source from ultralytics assets
|
127
|
+
self.CFG["source"] = d_s # set default source
|
128
|
+
|
129
|
+
# Initialize environment and region setup
|
130
|
+
self.env_check = check_imshow(warn=True)
|
131
|
+
self.track_history = defaultdict(list)
|
132
|
+
|
133
|
+
self.profilers = (
|
134
|
+
ops.Profile(device=self.device), # track
|
135
|
+
ops.Profile(device=self.device), # solution
|
136
|
+
)
|
138
137
|
|
139
138
|
def adjust_box_label(self, cls: int, conf: float, track_id: Optional[int] = None) -> Optional[str]:
|
140
139
|
"""
|