dgenerate-ultralytics-headless 8.3.236__py3-none-any.whl → 8.3.239__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/METADATA +1 -1
- {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/RECORD +117 -105
- tests/test_exports.py +3 -1
- tests/test_python.py +2 -2
- tests/test_solutions.py +6 -6
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +4 -4
- ultralytics/cfg/datasets/Argoverse.yaml +7 -6
- ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
- ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
- ultralytics/cfg/datasets/VOC.yaml +15 -16
- ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
- ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
- ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
- ultralytics/cfg/datasets/dota8.yaml +2 -2
- ultralytics/cfg/datasets/kitti.yaml +1 -1
- ultralytics/cfg/datasets/xView.yaml +16 -16
- ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
- ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
- ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
- ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
- ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
- ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
- ultralytics/data/augment.py +1 -1
- ultralytics/data/base.py +4 -2
- ultralytics/data/build.py +4 -4
- ultralytics/data/loaders.py +17 -12
- ultralytics/data/utils.py +4 -4
- ultralytics/engine/exporter.py +40 -25
- ultralytics/engine/predictor.py +8 -6
- ultralytics/engine/results.py +12 -13
- ultralytics/engine/trainer.py +10 -2
- ultralytics/engine/tuner.py +2 -3
- ultralytics/engine/validator.py +2 -2
- ultralytics/models/fastsam/model.py +2 -2
- ultralytics/models/fastsam/predict.py +2 -3
- ultralytics/models/fastsam/val.py +4 -4
- ultralytics/models/rtdetr/predict.py +2 -3
- ultralytics/models/rtdetr/val.py +10 -5
- ultralytics/models/sam/__init__.py +14 -1
- ultralytics/models/sam/build.py +22 -13
- ultralytics/models/sam/build_sam3.py +377 -0
- ultralytics/models/sam/model.py +13 -5
- ultralytics/models/sam/modules/blocks.py +20 -8
- ultralytics/models/sam/modules/decoders.py +2 -3
- ultralytics/models/sam/modules/encoders.py +4 -1
- ultralytics/models/sam/modules/memory_attention.py +6 -2
- ultralytics/models/sam/modules/sam.py +159 -10
- ultralytics/models/sam/modules/utils.py +134 -4
- ultralytics/models/sam/predict.py +2073 -139
- ultralytics/models/sam/sam3/__init__.py +3 -0
- ultralytics/models/sam/sam3/decoder.py +546 -0
- ultralytics/models/sam/sam3/encoder.py +535 -0
- ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
- ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
- ultralytics/models/sam/sam3/model_misc.py +198 -0
- ultralytics/models/sam/sam3/necks.py +129 -0
- ultralytics/models/sam/sam3/sam3_image.py +339 -0
- ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
- ultralytics/models/sam/sam3/vitdet.py +546 -0
- ultralytics/models/sam/sam3/vl_combiner.py +160 -0
- ultralytics/models/yolo/classify/val.py +1 -1
- ultralytics/models/yolo/detect/train.py +1 -1
- ultralytics/models/yolo/detect/val.py +7 -7
- ultralytics/models/yolo/obb/val.py +19 -8
- ultralytics/models/yolo/pose/val.py +1 -1
- ultralytics/models/yolo/segment/val.py +1 -1
- ultralytics/nn/autobackend.py +9 -9
- ultralytics/nn/modules/block.py +1 -1
- ultralytics/nn/modules/transformer.py +21 -1
- ultralytics/nn/tasks.py +3 -3
- ultralytics/nn/text_model.py +2 -7
- ultralytics/solutions/ai_gym.py +1 -1
- ultralytics/solutions/analytics.py +6 -6
- ultralytics/solutions/config.py +1 -1
- ultralytics/solutions/distance_calculation.py +1 -1
- ultralytics/solutions/object_counter.py +1 -1
- ultralytics/solutions/object_cropper.py +3 -6
- ultralytics/solutions/parking_management.py +21 -17
- ultralytics/solutions/queue_management.py +5 -5
- ultralytics/solutions/region_counter.py +2 -2
- ultralytics/solutions/security_alarm.py +1 -1
- ultralytics/solutions/solutions.py +45 -22
- ultralytics/solutions/speed_estimation.py +1 -1
- ultralytics/trackers/basetrack.py +1 -1
- ultralytics/trackers/bot_sort.py +4 -3
- ultralytics/trackers/byte_tracker.py +4 -4
- ultralytics/trackers/utils/gmc.py +6 -7
- ultralytics/trackers/utils/kalman_filter.py +2 -1
- ultralytics/trackers/utils/matching.py +4 -3
- ultralytics/utils/__init__.py +12 -3
- ultralytics/utils/benchmarks.py +2 -2
- ultralytics/utils/callbacks/tensorboard.py +19 -25
- ultralytics/utils/checks.py +4 -3
- ultralytics/utils/downloads.py +1 -1
- ultralytics/utils/export/tensorflow.py +16 -2
- ultralytics/utils/files.py +13 -12
- ultralytics/utils/logger.py +62 -27
- ultralytics/utils/metrics.py +1 -1
- ultralytics/utils/ops.py +7 -9
- ultralytics/utils/patches.py +3 -3
- ultralytics/utils/plotting.py +7 -12
- ultralytics/utils/tuner.py +1 -1
- {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
3
|
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
|
|
4
|
-
# Documentation:
|
|
4
|
+
# Documentation: https://docs.ultralytics.com/datasets/detect/voc/
|
|
5
5
|
# Example usage: yolo train data=VOC.yaml
|
|
6
6
|
# parent
|
|
7
7
|
# ├── ultralytics
|
|
@@ -59,22 +59,21 @@ download: |
|
|
|
59
59
|
x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
|
|
60
60
|
return x * dw, y * dh, w * dw, h * dh
|
|
61
61
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
h = int(size.find("height").text)
|
|
62
|
+
with open(path / f"VOC{year}/Annotations/{image_id}.xml") as in_file, open(lb_path, "w", encoding="utf-8") as out_file:
|
|
63
|
+
tree = ET.parse(in_file)
|
|
64
|
+
root = tree.getroot()
|
|
65
|
+
size = root.find("size")
|
|
66
|
+
w = int(size.find("width").text)
|
|
67
|
+
h = int(size.find("height").text)
|
|
69
68
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
69
|
+
names = list(yaml["names"].values()) # names list
|
|
70
|
+
for obj in root.iter("object"):
|
|
71
|
+
cls = obj.find("name").text
|
|
72
|
+
if cls in names and int(obj.find("difficult").text) != 1:
|
|
73
|
+
xmlbox = obj.find("bndbox")
|
|
74
|
+
bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ("xmin", "xmax", "ymin", "ymax")])
|
|
75
|
+
cls_id = names.index(cls) # class id
|
|
76
|
+
out_file.write(" ".join(str(a) for a in (cls_id, *bb)) + "\n")
|
|
78
77
|
|
|
79
78
|
|
|
80
79
|
# Download
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
|
-
# African
|
|
3
|
+
# African Wildlife dataset by Ultralytics
|
|
4
4
|
# Documentation: https://docs.ultralytics.com/datasets/detect/african-wildlife/
|
|
5
5
|
# Example usage: yolo train data=african-wildlife.yaml
|
|
6
6
|
# parent
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
# COCO128-seg dataset https://www.kaggle.com/datasets/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
|
|
4
4
|
# Documentation: https://docs.ultralytics.com/datasets/segment/coco/
|
|
5
|
-
# Example usage: yolo train data=coco128.yaml
|
|
5
|
+
# Example usage: yolo train data=coco128-seg.yaml
|
|
6
6
|
# parent
|
|
7
7
|
# ├── ultralytics
|
|
8
8
|
# └── datasets
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# parent
|
|
7
7
|
# ├── ultralytics
|
|
8
8
|
# └── datasets
|
|
9
|
-
# └── dota8-multispectral ← downloads here (37.
|
|
9
|
+
# └── dota8-multispectral ← downloads here (37.3 MB)
|
|
10
10
|
|
|
11
11
|
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
|
12
12
|
path: dota8-multispectral # dataset root dir
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
|
-
# DOTA8 dataset 8 images from
|
|
3
|
+
# DOTA8 dataset (8 images from the DOTAv1 split) by Ultralytics
|
|
4
4
|
# Documentation: https://docs.ultralytics.com/datasets/obb/dota8/
|
|
5
5
|
# Example usage: yolo train model=yolov8n-obb.pt data=dota8.yaml
|
|
6
6
|
# parent
|
|
7
7
|
# ├── ultralytics
|
|
8
8
|
# └── datasets
|
|
9
|
-
# └── dota8 ← downloads here (
|
|
9
|
+
# └── dota8 ← downloads here (1 MB)
|
|
10
10
|
|
|
11
11
|
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
|
12
12
|
path: dota8 # dataset root dir
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
|
-
#
|
|
3
|
+
# KITTI dataset by Karlsruhe Institute of Technology and Toyota Technological Institute at Chicago
|
|
4
4
|
# Documentation: https://docs.ultralytics.com/datasets/detect/kitti/
|
|
5
5
|
# Example usage: yolo train data=kitti.yaml
|
|
6
6
|
# parent
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
|
-
# DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. National Geospatial-Intelligence Agency (NGA)
|
|
4
|
-
# --------
|
|
3
|
+
# DIUx xView 2018 Challenge dataset https://challenge.xviewdataset.org by U.S. National Geospatial-Intelligence Agency (NGA)
|
|
4
|
+
# -------- Download and extract data manually to `datasets/xView` before running the train command. --------
|
|
5
5
|
# Documentation: https://docs.ultralytics.com/datasets/detect/xview/
|
|
6
6
|
# Example usage: yolo train data=xView.yaml
|
|
7
7
|
# parent
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
|
13
13
|
path: xView # dataset root dir
|
|
14
14
|
train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
|
|
15
|
-
val: images/autosplit_val.txt #
|
|
15
|
+
val: images/autosplit_val.txt # val images (relative to 'path') 10% of 847 train images
|
|
16
16
|
|
|
17
17
|
# Classes
|
|
18
18
|
names:
|
|
@@ -80,8 +80,8 @@ names:
|
|
|
80
80
|
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
|
81
81
|
download: |
|
|
82
82
|
import json
|
|
83
|
-
import os
|
|
84
83
|
from pathlib import Path
|
|
84
|
+
import shutil
|
|
85
85
|
|
|
86
86
|
import numpy as np
|
|
87
87
|
from PIL import Image
|
|
@@ -92,15 +92,15 @@ download: |
|
|
|
92
92
|
|
|
93
93
|
|
|
94
94
|
def convert_labels(fname=Path("xView/xView_train.geojson")):
|
|
95
|
-
"""
|
|
95
|
+
"""Convert xView GeoJSON labels to YOLO format (classes 0-59) and save them as text files."""
|
|
96
96
|
path = fname.parent
|
|
97
97
|
with open(fname, encoding="utf-8") as f:
|
|
98
98
|
print(f"Loading {fname}...")
|
|
99
99
|
data = json.load(f)
|
|
100
100
|
|
|
101
101
|
# Make dirs
|
|
102
|
-
labels =
|
|
103
|
-
|
|
102
|
+
labels = path / "labels" / "train"
|
|
103
|
+
shutil.rmtree(labels, ignore_errors=True)
|
|
104
104
|
labels.mkdir(parents=True, exist_ok=True)
|
|
105
105
|
|
|
106
106
|
# xView classes 11-94 to 0-59
|
|
@@ -113,24 +113,24 @@ download: |
|
|
|
113
113
|
for feature in TQDM(data["features"], desc=f"Converting {fname}"):
|
|
114
114
|
p = feature["properties"]
|
|
115
115
|
if p["bounds_imcoords"]:
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
if
|
|
116
|
+
image_id = p["image_id"]
|
|
117
|
+
image_file = path / "train_images" / image_id
|
|
118
|
+
if image_file.exists(): # 1395.tif missing
|
|
119
119
|
try:
|
|
120
120
|
box = np.array([int(num) for num in p["bounds_imcoords"].split(",")])
|
|
121
121
|
assert box.shape[0] == 4, f"incorrect box shape {box.shape[0]}"
|
|
122
122
|
cls = p["type_id"]
|
|
123
|
-
cls = xview_class2index[int(cls)] # xView class to 0-
|
|
123
|
+
cls = xview_class2index[int(cls)] # xView class to 0-59
|
|
124
124
|
assert 59 >= cls >= 0, f"incorrect class index {cls}"
|
|
125
125
|
|
|
126
126
|
# Write YOLO label
|
|
127
|
-
if
|
|
128
|
-
shapes[
|
|
129
|
-
box = xyxy2xywhn(box[None].astype(
|
|
130
|
-
with open((labels /
|
|
127
|
+
if image_id not in shapes:
|
|
128
|
+
shapes[image_id] = Image.open(image_file).size
|
|
129
|
+
box = xyxy2xywhn(box[None].astype(float), w=shapes[image_id][0], h=shapes[image_id][1], clip=True)
|
|
130
|
+
with open((labels / image_id).with_suffix(".txt"), "a", encoding="utf-8") as f:
|
|
131
131
|
f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n") # write label.txt
|
|
132
132
|
except Exception as e:
|
|
133
|
-
print(f"WARNING: skipping one label for {
|
|
133
|
+
print(f"WARNING: skipping one label for {image_file}: {e}")
|
|
134
134
|
|
|
135
135
|
|
|
136
136
|
# Download manually from https://challenge.xviewdataset.org
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
# Parameters
|
|
8
8
|
nc: 80 # number of classes
|
|
9
9
|
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
|
10
|
-
scales: # model compound scaling constants, i.e. 'model=yolo11n-pose.yaml' will call yolo11.yaml with scale 'n'
|
|
10
|
+
scales: # model compound scaling constants, i.e. 'model=yolo11n-pose.yaml' will call yolo11-pose.yaml with scale 'n'
|
|
11
11
|
# [depth, width, max_channels]
|
|
12
12
|
n: [0.50, 0.25, 1024] # summary: 196 layers, 2908507 parameters, 2908491 gradients, 7.7 GFLOPs
|
|
13
13
|
s: [0.50, 0.50, 1024] # summary: 196 layers, 9948811 parameters, 9948795 gradients, 23.5 GFLOPs
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
|
-
#
|
|
3
|
+
# Ultralytics YOLOE-11-seg instance segmentation model. For usage examples, see https://docs.ultralytics.com/tasks/segment
|
|
4
4
|
|
|
5
5
|
# Parameters
|
|
6
6
|
nc: 80 # number of classes
|
|
7
|
-
scales: # model compound scaling constants, i.e. 'model=
|
|
7
|
+
scales: # model compound scaling constants, i.e. 'model=yoloe-11n-seg.yaml' will call yoloe-11-seg.yaml with scale 'n'
|
|
8
8
|
# [depth, width, max_channels]
|
|
9
9
|
n: [0.50, 0.25, 1024] # summary: 355 layers, 2876848 parameters, 2876832 gradients, 10.5 GFLOPs
|
|
10
10
|
s: [0.50, 0.50, 1024] # summary: 355 layers, 10113248 parameters, 10113232 gradients, 35.8 GFLOPs
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
|
-
#
|
|
3
|
+
# Ultralytics YOLOE-11 object detection model with P3/8 - P5/32 outputs. For usage examples, see https://docs.ultralytics.com/tasks/detect
|
|
4
4
|
|
|
5
5
|
# Parameters
|
|
6
6
|
nc: 80 # number of classes
|
|
7
|
-
scales: # model compound scaling constants, i.e. 'model=
|
|
7
|
+
scales: # model compound scaling constants, i.e. 'model=yoloe-11n.yaml' will call yoloe-11.yaml with scale 'n'
|
|
8
8
|
# [depth, width, max_channels]
|
|
9
9
|
n: [0.50, 0.25, 1024] # summary: 319 layers, 2624080 parameters, 2624064 gradients, 6.6 GFLOPs
|
|
10
10
|
s: [0.50, 0.50, 1024] # summary: 319 layers, 9458752 parameters, 9458736 gradients, 21.7 GFLOPs
|
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
|
+
# Ultralytics YOLOE-v8-seg instance segmentation model with P3/8 - P5/32 outputs
|
|
4
|
+
# Task docs: https://docs.ultralytics.com/tasks/segment
|
|
5
|
+
|
|
3
6
|
# Parameters
|
|
4
7
|
nc: 80 # number of classes
|
|
5
|
-
scales: # model compound scaling constants, i.e. 'model=
|
|
8
|
+
scales: # model compound scaling constants, i.e. 'model=yoloe-v8n-seg.yaml' will call yoloe-v8-seg.yaml with scale 'n'
|
|
6
9
|
# [depth, width, max_channels]
|
|
7
|
-
n: [0.33, 0.25, 1024] #
|
|
8
|
-
s: [0.33, 0.50, 1024] #
|
|
9
|
-
m: [0.67, 0.75, 768] #
|
|
10
|
-
l: [1.00, 1.00, 512] #
|
|
11
|
-
x: [1.00, 1.25, 512] #
|
|
10
|
+
n: [0.33, 0.25, 1024] # YOLOE-v8n-seg summary: 161 layers, 4204111 parameters, 4204095 gradients, 39.6 GFLOPs
|
|
11
|
+
s: [0.33, 0.50, 1024] # YOLOE-v8s-seg summary: 161 layers, 13383496 parameters, 13383480 gradients, 71.5 GFLOPs
|
|
12
|
+
m: [0.67, 0.75, 768] # YOLOE-v8m-seg summary: 201 layers, 29065310 parameters, 29065294 gradients, 131.4 GFLOPs
|
|
13
|
+
l: [1.00, 1.00, 512] # YOLOE-v8l-seg summary: 241 layers, 47553970 parameters, 47553954 gradients, 225.6 GFLOPs
|
|
14
|
+
x: [1.00, 1.25, 512] # YOLOE-v8x-seg summary: 241 layers, 73690217 parameters, 73690201 gradients, 330.8 GFLOPs
|
|
12
15
|
|
|
13
16
|
# YOLOv8.0n backbone
|
|
14
17
|
backbone:
|
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
|
+
# Ultralytics YOLOE-v8 object detection model with P3/8 - P5/32 outputs
|
|
4
|
+
# Task docs: https://docs.ultralytics.com/tasks/detect
|
|
5
|
+
|
|
3
6
|
# Parameters
|
|
4
7
|
nc: 80 # number of classes
|
|
5
|
-
scales: # model compound scaling constants, i.e. 'model=
|
|
8
|
+
scales: # model compound scaling constants, i.e. 'model=yoloe-v8n.yaml' will call yoloe-v8.yaml with scale 'n'
|
|
6
9
|
# [depth, width, max_channels]
|
|
7
|
-
n: [0.33, 0.25, 1024] #
|
|
8
|
-
s: [0.33, 0.50, 1024] #
|
|
9
|
-
m: [0.67, 0.75, 768] #
|
|
10
|
-
l: [1.00, 1.00, 512] #
|
|
11
|
-
x: [1.00, 1.25, 512] #
|
|
10
|
+
n: [0.33, 0.25, 1024] # YOLOE-v8n summary: 148 layers, 3695183 parameters, 3695167 gradients, 19.5 GFLOPs
|
|
11
|
+
s: [0.33, 0.50, 1024] # YOLOE-v8s summary: 148 layers, 12759880 parameters, 12759864 gradients, 51.0 GFLOPs
|
|
12
|
+
m: [0.67, 0.75, 768] # YOLOE-v8m summary: 188 layers, 28376158 parameters, 28376142 gradients, 110.5 GFLOPs
|
|
13
|
+
l: [1.00, 1.00, 512] # YOLOE-v8l summary: 228 layers, 46832050 parameters, 46832034 gradients, 204.5 GFLOPs
|
|
14
|
+
x: [1.00, 1.25, 512] # YOLOE-v8x summary: 228 layers, 72886377 parameters, 72886361 gradients, 309.3 GFLOPs
|
|
12
15
|
|
|
13
16
|
# YOLOv8.0n backbone
|
|
14
17
|
backbone:
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
# Parameters
|
|
8
8
|
nc: 1000 # number of classes
|
|
9
|
-
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
|
|
9
|
+
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls-resnet101.yaml' will call yolov8-cls-resnet101.yaml with scale 'n'
|
|
10
10
|
# [depth, width, max_channels]
|
|
11
11
|
n: [0.33, 0.25, 1024]
|
|
12
12
|
s: [0.33, 0.50, 1024]
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
# Parameters
|
|
8
8
|
nc: 1000 # number of classes
|
|
9
|
-
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
|
|
9
|
+
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls-resnet50.yaml' will call yolov8-cls-resnet50.yaml with scale 'n'
|
|
10
10
|
# [depth, width, max_channels]
|
|
11
11
|
n: [0.33, 0.25, 1024]
|
|
12
12
|
s: [0.33, 0.50, 1024]
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
|
-
# Ultralytics YOLOv8 object detection model with P2/4 - P5/32 outputs
|
|
3
|
+
# Ultralytics YOLOv8-ghost object detection model with P2/4 - P5/32 outputs
|
|
4
4
|
# Model docs: https://docs.ultralytics.com/models/yolov8
|
|
5
5
|
# Task docs: https://docs.ultralytics.com/tasks/detect
|
|
6
6
|
# Employs Ghost convolutions and modules proposed in Huawei's GhostNet in https://arxiv.org/abs/1911.11907v2
|
|
7
7
|
|
|
8
8
|
# Parameters
|
|
9
9
|
nc: 80 # number of classes
|
|
10
|
-
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
|
10
|
+
scales: # model compound scaling constants, i.e. 'model=yolov8n-ghost-p2.yaml' will call yolov8-ghost-p2.yaml with scale 'n'
|
|
11
11
|
# [depth, width, max_channels]
|
|
12
12
|
n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p2 summary: 290 layers, 2033944 parameters, 2033928 gradients, 13.8 GFLOPs
|
|
13
13
|
s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p2 summary: 290 layers, 5562080 parameters, 5562064 gradients, 25.1 GFLOPs
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
|
-
# Ultralytics YOLOv8 object detection model with P3/8 - P6/64 outputs
|
|
3
|
+
# Ultralytics YOLOv8-ghost object detection model with P3/8 - P6/64 outputs
|
|
4
4
|
# Model docs: https://docs.ultralytics.com/models/yolov8
|
|
5
5
|
# Task docs: https://docs.ultralytics.com/tasks/detect
|
|
6
6
|
# Employs Ghost convolutions and modules proposed in Huawei's GhostNet in https://arxiv.org/abs/1911.11907v2
|
|
7
7
|
|
|
8
8
|
# Parameters
|
|
9
9
|
nc: 80 # number of classes
|
|
10
|
-
scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
|
|
10
|
+
scales: # model compound scaling constants, i.e. 'model=yolov8n-ghost-p6.yaml' will call yolov8-ghost-p6.yaml with scale 'n'
|
|
11
11
|
# [depth, width, max_channels]
|
|
12
12
|
n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p6 summary: 312 layers, 2901100 parameters, 2901084 gradients, 5.8 GFLOPs
|
|
13
13
|
s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p6 summary: 312 layers, 9520008 parameters, 9519992 gradients, 16.4 GFLOPs
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
|
-
# Ultralytics YOLOv8 object detection model with P3/8 - P5/32 outputs
|
|
3
|
+
# Ultralytics YOLOv8-ghost object detection model with P3/8 - P5/32 outputs
|
|
4
4
|
# Model docs: https://docs.ultralytics.com/models/yolov8
|
|
5
5
|
# Task docs: https://docs.ultralytics.com/tasks/detect
|
|
6
6
|
# Employs Ghost convolutions and modules proposed in Huawei's GhostNet in https://arxiv.org/abs/1911.11907v2
|
|
7
7
|
|
|
8
8
|
# Parameters
|
|
9
9
|
nc: 80 # number of classes
|
|
10
|
-
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
|
10
|
+
scales: # model compound scaling constants, i.e. 'model=yolov8n-ghost.yaml' will call yolov8-ghost.yaml with scale 'n'
|
|
11
11
|
# [depth, width, max_channels]
|
|
12
12
|
n: [0.33, 0.25, 1024] # YOLOv8n-ghost summary: 237 layers, 1865316 parameters, 1865300 gradients, 5.8 GFLOPs
|
|
13
13
|
s: [0.33, 0.50, 1024] # YOLOv8s-ghost summary: 237 layers, 5960072 parameters, 5960056 gradients, 16.4 GFLOPs
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
# Parameters
|
|
8
8
|
nc: 80 # number of classes
|
|
9
|
-
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
|
9
|
+
scales: # model compound scaling constants, i.e. 'model=yolov8n-obb.yaml' will call yolov8-obb.yaml with scale 'n'
|
|
10
10
|
# [depth, width, max_channels]
|
|
11
11
|
n: [0.33, 0.25, 1024] # YOLOv8n-obb summary: 144 layers, 3228867 parameters, 3228851 gradients, 9.1 GFLOPs
|
|
12
12
|
s: [0.33, 0.50, 1024] # YOLOv8s-obb summary: 144 layers, 11452739 parameters, 11452723 gradients, 29.8 GFLOPs
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
# Parameters
|
|
8
8
|
nc: 80 # number of classes
|
|
9
|
-
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
|
9
|
+
scales: # model compound scaling constants, i.e. 'model=yolov8n-p2.yaml' will call yolov8-p2.yaml with scale 'n'
|
|
10
10
|
# [depth, width, max_channels]
|
|
11
11
|
n: [0.33, 0.25, 1024]
|
|
12
12
|
s: [0.33, 0.50, 1024]
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
# Parameters
|
|
8
8
|
nc: 1 # number of classes
|
|
9
9
|
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
|
10
|
-
scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
|
|
10
|
+
scales: # model compound scaling constants, i.e. 'model=yolov8n-pose-p6.yaml' will call yolov8-pose-p6.yaml with scale 'n'
|
|
11
11
|
# [depth, width, max_channels]
|
|
12
12
|
n: [0.33, 0.25, 1024]
|
|
13
13
|
s: [0.33, 0.50, 1024]
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
# Parameters
|
|
8
8
|
nc: 80 # number of classes
|
|
9
|
-
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
|
9
|
+
scales: # model compound scaling constants, i.e. 'model=yolov8n-rtdetr.yaml' will call yolov8-rtdetr.yaml with scale 'n'
|
|
10
10
|
# [depth, width, max_channels]
|
|
11
11
|
n: [0.33, 0.25, 1024] # YOLOv8n-rtdetr summary: 235 layers, 9643868 parameters, 9643868 gradients, 17.1 GFLOPs
|
|
12
12
|
s: [0.33, 0.50, 1024] # YOLOv8s-rtdetr summary: 235 layers, 16518572 parameters, 16518572 gradients, 32.8 GFLOPs
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
# Parameters
|
|
8
8
|
nc: 80 # number of classes
|
|
9
|
-
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
|
9
|
+
scales: # model compound scaling constants, i.e. 'model=yolov8n-world.yaml' will call yolov8-world.yaml with scale 'n'
|
|
10
10
|
# [depth, width, max_channels]
|
|
11
11
|
n: [0.33, 0.25, 1024] # YOLOv8n-world summary: 161 layers, 4204111 parameters, 4204095 gradients, 39.6 GFLOPs
|
|
12
12
|
s: [0.33, 0.50, 1024] # YOLOv8s-world summary: 161 layers, 13383496 parameters, 13383480 gradients, 71.5 GFLOPs
|
|
@@ -6,13 +6,13 @@
|
|
|
6
6
|
|
|
7
7
|
# Parameters
|
|
8
8
|
nc: 80 # number of classes
|
|
9
|
-
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
|
9
|
+
scales: # model compound scaling constants, i.e. 'model=yolov8n-worldv2.yaml' will call yolov8-worldv2.yaml with scale 'n'
|
|
10
10
|
# [depth, width, max_channels]
|
|
11
|
-
n: [0.33, 0.25, 1024] # YOLOv8n-worldv2 summary: 148 layers, 3695183 parameters, 3695167 gradients, 19.5
|
|
12
|
-
s: [0.33, 0.50, 1024] # YOLOv8s-worldv2 summary: 148 layers, 12759880 parameters, 12759864 gradients, 51.0
|
|
13
|
-
m: [0.67, 0.75, 768] # YOLOv8m-worldv2 summary: 188 layers, 28376158 parameters, 28376142 gradients, 110.5
|
|
14
|
-
l: [1.00, 1.00, 512] # YOLOv8l-worldv2 summary: 228 layers, 46832050 parameters, 46832034 gradients, 204.5
|
|
15
|
-
x: [1.00, 1.25, 512] # YOLOv8x-worldv2 summary: 228 layers, 72886377 parameters, 72886361 gradients, 309.3
|
|
11
|
+
n: [0.33, 0.25, 1024] # YOLOv8n-worldv2 summary: 148 layers, 3695183 parameters, 3695167 gradients, 19.5 GFLOPs
|
|
12
|
+
s: [0.33, 0.50, 1024] # YOLOv8s-worldv2 summary: 148 layers, 12759880 parameters, 12759864 gradients, 51.0 GFLOPs
|
|
13
|
+
m: [0.67, 0.75, 768] # YOLOv8m-worldv2 summary: 188 layers, 28376158 parameters, 28376142 gradients, 110.5 GFLOPs
|
|
14
|
+
l: [1.00, 1.00, 512] # YOLOv8l-worldv2 summary: 228 layers, 46832050 parameters, 46832034 gradients, 204.5 GFLOPs
|
|
15
|
+
x: [1.00, 1.25, 512] # YOLOv8x-worldv2 summary: 228 layers, 72886377 parameters, 72886361 gradients, 309.3 GFLOPs
|
|
16
16
|
|
|
17
17
|
# YOLOv8.0n backbone
|
|
18
18
|
backbone:
|
ultralytics/data/augment.py
CHANGED
|
@@ -1697,7 +1697,7 @@ class CopyPaste(BaseMixTransform):
|
|
|
1697
1697
|
"""
|
|
1698
1698
|
|
|
1699
1699
|
def __init__(self, dataset=None, pre_transform=None, p: float = 0.5, mode: str = "flip") -> None:
|
|
1700
|
-
"""Initialize CopyPaste object with dataset, pre_transform, and probability of applying
|
|
1700
|
+
"""Initialize CopyPaste object with dataset, pre_transform, and probability of applying CopyPaste."""
|
|
1701
1701
|
super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
|
|
1702
1702
|
assert mode in {"flip", "mixup"}, f"Expected `mode` to be `flip` or `mixup`, but got {mode}."
|
|
1703
1703
|
self.mode = mode
|
ultralytics/data/base.py
CHANGED
|
@@ -33,7 +33,8 @@ class BaseDataset(Dataset):
|
|
|
33
33
|
single_cls (bool): Whether to treat all objects as a single class.
|
|
34
34
|
prefix (str): Prefix to print in log messages.
|
|
35
35
|
fraction (float): Fraction of dataset to utilize.
|
|
36
|
-
channels (int): Number of channels in the images (1 for grayscale, 3 for
|
|
36
|
+
channels (int): Number of channels in the images (1 for grayscale, 3 for color). Color images loaded with OpenCV
|
|
37
|
+
are in BGR channel order.
|
|
37
38
|
cv2_flag (int): OpenCV flag for reading images.
|
|
38
39
|
im_files (list[str]): List of image file paths.
|
|
39
40
|
labels (list[dict]): List of label data dictionaries.
|
|
@@ -101,7 +102,8 @@ class BaseDataset(Dataset):
|
|
|
101
102
|
single_cls (bool): If True, single class training is used.
|
|
102
103
|
classes (list[int], optional): List of included classes.
|
|
103
104
|
fraction (float): Fraction of dataset to utilize.
|
|
104
|
-
channels (int): Number of channels in the images (1 for grayscale, 3 for
|
|
105
|
+
channels (int): Number of channels in the images (1 for grayscale, 3 for color). Color images loaded with
|
|
106
|
+
OpenCV are in BGR channel order.
|
|
105
107
|
"""
|
|
106
108
|
super().__init__()
|
|
107
109
|
self.img_path = img_path
|
ultralytics/data/build.py
CHANGED
|
@@ -35,7 +35,7 @@ from ultralytics.utils.torch_utils import TORCH_2_0
|
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
class InfiniteDataLoader(dataloader.DataLoader):
|
|
38
|
-
"""
|
|
38
|
+
"""DataLoader that reuses workers for infinite iteration.
|
|
39
39
|
|
|
40
40
|
This dataloader extends the PyTorch DataLoader to provide infinite recycling of workers, which improves efficiency
|
|
41
41
|
for training loops that need to iterate through the dataset multiple times without recreating workers.
|
|
@@ -51,7 +51,7 @@ class InfiniteDataLoader(dataloader.DataLoader):
|
|
|
51
51
|
reset: Reset the iterator, useful when modifying dataset settings during training.
|
|
52
52
|
|
|
53
53
|
Examples:
|
|
54
|
-
Create an infinite
|
|
54
|
+
Create an infinite DataLoader for training
|
|
55
55
|
>>> dataset = YOLODataset(...)
|
|
56
56
|
>>> dataloader = InfiniteDataLoader(dataset, batch_size=16, shuffle=True)
|
|
57
57
|
>>> for batch in dataloader: # Infinite iteration
|
|
@@ -76,7 +76,7 @@ class InfiniteDataLoader(dataloader.DataLoader):
|
|
|
76
76
|
yield next(self.iterator)
|
|
77
77
|
|
|
78
78
|
def __del__(self):
|
|
79
|
-
"""Ensure that workers are properly terminated when the
|
|
79
|
+
"""Ensure that workers are properly terminated when the DataLoader is deleted."""
|
|
80
80
|
try:
|
|
81
81
|
if not hasattr(self.iterator, "_workers"):
|
|
82
82
|
return
|
|
@@ -417,7 +417,7 @@ def load_inference_source(
|
|
|
417
417
|
source, stream, screenshot, from_img, in_memory, tensor = check_source(source)
|
|
418
418
|
source_type = source.source_type if in_memory else SourceTypes(stream, screenshot, from_img, tensor)
|
|
419
419
|
|
|
420
|
-
#
|
|
420
|
+
# DataLoader
|
|
421
421
|
if tensor:
|
|
422
422
|
dataset = LoadTensor(source)
|
|
423
423
|
elif in_memory:
|
ultralytics/data/loaders.py
CHANGED
|
@@ -69,7 +69,7 @@ class LoadStreams:
|
|
|
69
69
|
shape (list[tuple[int, int, int]]): List of shapes for each stream.
|
|
70
70
|
caps (list[cv2.VideoCapture]): List of cv2.VideoCapture objects for each stream.
|
|
71
71
|
bs (int): Batch size for processing.
|
|
72
|
-
cv2_flag (int): OpenCV flag for image reading (grayscale or
|
|
72
|
+
cv2_flag (int): OpenCV flag for image reading (grayscale or color/BGR).
|
|
73
73
|
|
|
74
74
|
Methods:
|
|
75
75
|
update: Read stream frames in daemon thread.
|
|
@@ -98,14 +98,14 @@ class LoadStreams:
|
|
|
98
98
|
sources (str): Path to streams file or single stream URL.
|
|
99
99
|
vid_stride (int): Video frame-rate stride.
|
|
100
100
|
buffer (bool): Whether to buffer input streams.
|
|
101
|
-
channels (int): Number of image channels (1 for grayscale, 3 for
|
|
101
|
+
channels (int): Number of image channels (1 for grayscale, 3 for color).
|
|
102
102
|
"""
|
|
103
103
|
torch.backends.cudnn.benchmark = True # faster for fixed-size inference
|
|
104
104
|
self.buffer = buffer # buffer input streams
|
|
105
105
|
self.running = True # running flag for Thread
|
|
106
106
|
self.mode = "stream"
|
|
107
107
|
self.vid_stride = vid_stride # video frame-rate stride
|
|
108
|
-
self.cv2_flag = cv2.IMREAD_GRAYSCALE if channels == 1 else cv2.IMREAD_COLOR # grayscale or
|
|
108
|
+
self.cv2_flag = cv2.IMREAD_GRAYSCALE if channels == 1 else cv2.IMREAD_COLOR # grayscale or color (BGR)
|
|
109
109
|
|
|
110
110
|
sources = Path(sources).read_text().rsplit() if os.path.isfile(sources) else [sources]
|
|
111
111
|
n = len(sources)
|
|
@@ -242,7 +242,7 @@ class LoadScreenshots:
|
|
|
242
242
|
bs (int): Batch size, set to 1.
|
|
243
243
|
fps (int): Frames per second, set to 30.
|
|
244
244
|
monitor (dict[str, int]): Monitor configuration details.
|
|
245
|
-
cv2_flag (int): OpenCV flag for image reading (grayscale or
|
|
245
|
+
cv2_flag (int): OpenCV flag for image reading (grayscale or color/BGR).
|
|
246
246
|
|
|
247
247
|
Methods:
|
|
248
248
|
__iter__: Returns an iterator object.
|
|
@@ -259,7 +259,7 @@ class LoadScreenshots:
|
|
|
259
259
|
|
|
260
260
|
Args:
|
|
261
261
|
source (str): Screen capture source string in format "screen_num left top width height".
|
|
262
|
-
channels (int): Number of image channels (1 for grayscale, 3 for
|
|
262
|
+
channels (int): Number of image channels (1 for grayscale, 3 for color).
|
|
263
263
|
"""
|
|
264
264
|
check_requirements("mss")
|
|
265
265
|
import mss
|
|
@@ -277,7 +277,7 @@ class LoadScreenshots:
|
|
|
277
277
|
self.sct = mss.mss()
|
|
278
278
|
self.bs = 1
|
|
279
279
|
self.fps = 30
|
|
280
|
-
self.cv2_flag = cv2.IMREAD_GRAYSCALE if channels == 1 else cv2.IMREAD_COLOR # grayscale or
|
|
280
|
+
self.cv2_flag = cv2.IMREAD_GRAYSCALE if channels == 1 else cv2.IMREAD_COLOR # grayscale or color (BGR)
|
|
281
281
|
|
|
282
282
|
# Parse monitor shape
|
|
283
283
|
monitor = self.sct.monitors[self.screen]
|
|
@@ -319,7 +319,7 @@ class LoadImagesAndVideos:
|
|
|
319
319
|
frames (int): Total number of frames in the video.
|
|
320
320
|
count (int): Counter for iteration, initialized at 0 during __iter__().
|
|
321
321
|
ni (int): Number of images.
|
|
322
|
-
cv2_flag (int): OpenCV flag for image reading (grayscale or
|
|
322
|
+
cv2_flag (int): OpenCV flag for image reading (grayscale or color/BGR).
|
|
323
323
|
|
|
324
324
|
Methods:
|
|
325
325
|
__init__: Initialize the LoadImagesAndVideos object.
|
|
@@ -347,7 +347,7 @@ class LoadImagesAndVideos:
|
|
|
347
347
|
path (str | Path | list): Path to images/videos, directory, or list of paths.
|
|
348
348
|
batch (int): Batch size for processing.
|
|
349
349
|
vid_stride (int): Video frame-rate stride.
|
|
350
|
-
channels (int): Number of image channels (1 for grayscale, 3 for
|
|
350
|
+
channels (int): Number of image channels (1 for grayscale, 3 for color).
|
|
351
351
|
"""
|
|
352
352
|
parent = None
|
|
353
353
|
if isinstance(path, str) and Path(path).suffix in {".txt", ".csv"}: # txt/csv file with source paths
|
|
@@ -385,7 +385,7 @@ class LoadImagesAndVideos:
|
|
|
385
385
|
self.mode = "video" if ni == 0 else "image" # default to video if no images
|
|
386
386
|
self.vid_stride = vid_stride # video frame-rate stride
|
|
387
387
|
self.bs = batch
|
|
388
|
-
self.cv2_flag = cv2.IMREAD_GRAYSCALE if channels == 1 else cv2.IMREAD_COLOR # grayscale or
|
|
388
|
+
self.cv2_flag = cv2.IMREAD_GRAYSCALE if channels == 1 else cv2.IMREAD_COLOR # grayscale or color (BGR)
|
|
389
389
|
if any(videos):
|
|
390
390
|
self._new_video(videos[0]) # new video
|
|
391
391
|
else:
|
|
@@ -513,7 +513,7 @@ class LoadPilAndNumpy:
|
|
|
513
513
|
|
|
514
514
|
Args:
|
|
515
515
|
im0 (PIL.Image.Image | np.ndarray | list): Single image or list of images in PIL or numpy format.
|
|
516
|
-
channels (int): Number of image channels (1 for grayscale, 3 for
|
|
516
|
+
channels (int): Number of image channels (1 for grayscale, 3 for color).
|
|
517
517
|
"""
|
|
518
518
|
if not isinstance(im0, list):
|
|
519
519
|
im0 = [im0]
|
|
@@ -526,11 +526,16 @@ class LoadPilAndNumpy:
|
|
|
526
526
|
|
|
527
527
|
@staticmethod
|
|
528
528
|
def _single_check(im: Image.Image | np.ndarray, flag: str = "RGB") -> np.ndarray:
|
|
529
|
-
"""Validate and format an image to
|
|
529
|
+
"""Validate and format an image to a NumPy array.
|
|
530
|
+
|
|
531
|
+
Notes:
|
|
532
|
+
- PIL inputs are converted to NumPy and returned in OpenCV-compatible BGR order for color images.
|
|
533
|
+
- NumPy inputs are returned as-is (no channel-order conversion is applied).
|
|
534
|
+
"""
|
|
530
535
|
assert isinstance(im, (Image.Image, np.ndarray)), f"Expected PIL/np.ndarray image type, but got {type(im)}"
|
|
531
536
|
if isinstance(im, Image.Image):
|
|
532
537
|
im = np.asarray(im.convert(flag))
|
|
533
|
-
#
|
|
538
|
+
# Add a new axis if grayscale; convert RGB -> BGR for OpenCV compatibility.
|
|
534
539
|
im = im[..., None] if flag == "L" else im[..., ::-1]
|
|
535
540
|
im = np.ascontiguousarray(im) # contiguous
|
|
536
541
|
elif im.ndim == 2: # grayscale in numpy form
|
ultralytics/data/utils.py
CHANGED
|
@@ -258,12 +258,12 @@ def visualize_image_annotations(image_path: str, txt_path: str, label_map: dict[
|
|
|
258
258
|
luminance.
|
|
259
259
|
|
|
260
260
|
Args:
|
|
261
|
-
image_path (str):
|
|
262
|
-
txt_path (str):
|
|
261
|
+
image_path (str): Path to the image file to annotate. The file must be readable by PIL.
|
|
262
|
+
txt_path (str): Path to the annotation file in YOLO format, which should contain one line per object.
|
|
263
263
|
label_map (dict[int, str]): A dictionary that maps class IDs (integers) to class labels (strings).
|
|
264
264
|
|
|
265
265
|
Examples:
|
|
266
|
-
>>> label_map = {0: "cat", 1: "dog", 2: "bird"} #
|
|
266
|
+
>>> label_map = {0: "cat", 1: "dog", 2: "bird"} # Should include all annotated classes
|
|
267
267
|
>>> visualize_image_annotations("path/to/image.jpg", "path/to/annotations.txt", label_map)
|
|
268
268
|
"""
|
|
269
269
|
import matplotlib.pyplot as plt
|
|
@@ -283,7 +283,7 @@ def visualize_image_annotations(image_path: str, txt_path: str, label_map: dict[
|
|
|
283
283
|
annotations.append((x, y, w, h, int(class_id)))
|
|
284
284
|
_, ax = plt.subplots(1) # Plot the image and annotations
|
|
285
285
|
for x, y, w, h, label in annotations:
|
|
286
|
-
color = tuple(c / 255 for c in colors(label,
|
|
286
|
+
color = tuple(c / 255 for c in colors(label, False)) # Get and normalize an RGB color for Matplotlib
|
|
287
287
|
rect = plt.Rectangle((x, y), w, h, linewidth=2, edgecolor=color, facecolor="none") # Create a rectangle
|
|
288
288
|
ax.add_patch(rect)
|
|
289
289
|
luminance = 0.2126 * color[0] + 0.7152 * color[1] + 0.0722 * color[2] # Formula for luminance
|