dgenerate-ultralytics-headless 8.3.190__py3-none-any.whl → 8.3.192__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/METADATA +1 -1
- {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/RECORD +103 -102
- tests/test_cuda.py +6 -5
- tests/test_exports.py +1 -6
- tests/test_python.py +1 -4
- tests/test_solutions.py +1 -1
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +16 -14
- ultralytics/cfg/datasets/SKU-110K.yaml +1 -1
- ultralytics/cfg/datasets/VisDrone.yaml +4 -4
- ultralytics/data/annotator.py +6 -6
- ultralytics/data/augment.py +53 -51
- ultralytics/data/base.py +15 -13
- ultralytics/data/build.py +7 -4
- ultralytics/data/converter.py +9 -10
- ultralytics/data/dataset.py +24 -22
- ultralytics/data/loaders.py +13 -11
- ultralytics/data/split.py +4 -3
- ultralytics/data/split_dota.py +14 -12
- ultralytics/data/utils.py +29 -23
- ultralytics/engine/exporter.py +2 -2
- ultralytics/engine/model.py +16 -14
- ultralytics/engine/predictor.py +8 -6
- ultralytics/engine/results.py +54 -52
- ultralytics/engine/trainer.py +8 -3
- ultralytics/engine/tuner.py +230 -42
- ultralytics/hub/google/__init__.py +7 -6
- ultralytics/hub/session.py +8 -6
- ultralytics/hub/utils.py +3 -4
- ultralytics/models/fastsam/model.py +8 -6
- ultralytics/models/nas/model.py +5 -3
- ultralytics/models/rtdetr/train.py +4 -3
- ultralytics/models/rtdetr/val.py +6 -4
- ultralytics/models/sam/amg.py +13 -10
- ultralytics/models/sam/model.py +3 -2
- ultralytics/models/sam/modules/blocks.py +21 -21
- ultralytics/models/sam/modules/decoders.py +11 -11
- ultralytics/models/sam/modules/encoders.py +25 -25
- ultralytics/models/sam/modules/memory_attention.py +9 -8
- ultralytics/models/sam/modules/sam.py +8 -10
- ultralytics/models/sam/modules/tiny_encoder.py +21 -20
- ultralytics/models/sam/modules/transformer.py +6 -5
- ultralytics/models/sam/modules/utils.py +7 -5
- ultralytics/models/sam/predict.py +32 -31
- ultralytics/models/utils/loss.py +29 -27
- ultralytics/models/utils/ops.py +10 -8
- ultralytics/models/yolo/classify/train.py +9 -7
- ultralytics/models/yolo/classify/val.py +11 -9
- ultralytics/models/yolo/detect/predict.py +1 -1
- ultralytics/models/yolo/detect/train.py +8 -6
- ultralytics/models/yolo/detect/val.py +22 -20
- ultralytics/models/yolo/model.py +14 -14
- ultralytics/models/yolo/obb/train.py +5 -3
- ultralytics/models/yolo/obb/val.py +11 -9
- ultralytics/models/yolo/pose/train.py +7 -5
- ultralytics/models/yolo/pose/val.py +12 -10
- ultralytics/models/yolo/segment/train.py +4 -5
- ultralytics/models/yolo/segment/val.py +13 -11
- ultralytics/models/yolo/world/train.py +10 -8
- ultralytics/models/yolo/yoloe/train.py +10 -10
- ultralytics/models/yolo/yoloe/val.py +11 -9
- ultralytics/nn/autobackend.py +17 -19
- ultralytics/nn/modules/block.py +12 -12
- ultralytics/nn/modules/conv.py +4 -3
- ultralytics/nn/modules/head.py +41 -37
- ultralytics/nn/modules/transformer.py +22 -21
- ultralytics/nn/tasks.py +2 -2
- ultralytics/nn/text_model.py +6 -5
- ultralytics/solutions/analytics.py +7 -5
- ultralytics/solutions/config.py +12 -10
- ultralytics/solutions/distance_calculation.py +3 -3
- ultralytics/solutions/heatmap.py +4 -2
- ultralytics/solutions/object_counter.py +5 -3
- ultralytics/solutions/parking_management.py +4 -2
- ultralytics/solutions/region_counter.py +7 -5
- ultralytics/solutions/similarity_search.py +5 -3
- ultralytics/solutions/solutions.py +38 -36
- ultralytics/solutions/streamlit_inference.py +8 -7
- ultralytics/trackers/bot_sort.py +11 -9
- ultralytics/trackers/byte_tracker.py +17 -15
- ultralytics/trackers/utils/gmc.py +4 -3
- ultralytics/utils/__init__.py +16 -88
- ultralytics/utils/autobatch.py +3 -2
- ultralytics/utils/autodevice.py +10 -10
- ultralytics/utils/benchmarks.py +11 -10
- ultralytics/utils/callbacks/comet.py +9 -9
- ultralytics/utils/checks.py +17 -26
- ultralytics/utils/export.py +12 -11
- ultralytics/utils/files.py +8 -7
- ultralytics/utils/git.py +139 -0
- ultralytics/utils/instance.py +8 -7
- ultralytics/utils/loss.py +15 -13
- ultralytics/utils/metrics.py +62 -62
- ultralytics/utils/ops.py +3 -2
- ultralytics/utils/patches.py +6 -4
- ultralytics/utils/plotting.py +20 -18
- ultralytics/utils/torch_utils.py +4 -2
- ultralytics/utils/tqdm.py +18 -14
- ultralytics/utils/triton.py +3 -2
- {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/top_level.txt +0 -0
ultralytics/data/dataset.py
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import json
|
4
6
|
from collections import defaultdict
|
5
7
|
from itertools import repeat
|
6
8
|
from multiprocessing.pool import ThreadPool
|
7
9
|
from pathlib import Path
|
8
|
-
from typing import Any
|
10
|
+
from typing import Any
|
9
11
|
|
10
12
|
import cv2
|
11
13
|
import numpy as np
|
@@ -70,7 +72,7 @@ class YOLODataset(BaseDataset):
|
|
70
72
|
>>> dataset.get_labels()
|
71
73
|
"""
|
72
74
|
|
73
|
-
def __init__(self, *args, data:
|
75
|
+
def __init__(self, *args, data: dict | None = None, task: str = "detect", **kwargs):
|
74
76
|
"""
|
75
77
|
Initialize the YOLODataset.
|
76
78
|
|
@@ -87,7 +89,7 @@ class YOLODataset(BaseDataset):
|
|
87
89
|
assert not (self.use_segments and self.use_keypoints), "Can not use both segments and keypoints."
|
88
90
|
super().__init__(*args, channels=self.data.get("channels", 3), **kwargs)
|
89
91
|
|
90
|
-
def cache_labels(self, path: Path = Path("./labels.cache")) ->
|
92
|
+
def cache_labels(self, path: Path = Path("./labels.cache")) -> dict:
|
91
93
|
"""
|
92
94
|
Cache dataset labels, check images and read shapes.
|
93
95
|
|
@@ -155,7 +157,7 @@ class YOLODataset(BaseDataset):
|
|
155
157
|
save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION)
|
156
158
|
return x
|
157
159
|
|
158
|
-
def get_labels(self) ->
|
160
|
+
def get_labels(self) -> list[dict]:
|
159
161
|
"""
|
160
162
|
Return dictionary of labels for YOLO training.
|
161
163
|
|
@@ -205,7 +207,7 @@ class YOLODataset(BaseDataset):
|
|
205
207
|
LOGGER.warning(f"Labels are missing or empty in {cache_path}, training may not work correctly. {HELP_URL}")
|
206
208
|
return labels
|
207
209
|
|
208
|
-
def build_transforms(self, hyp:
|
210
|
+
def build_transforms(self, hyp: dict | None = None) -> Compose:
|
209
211
|
"""
|
210
212
|
Build and append transforms to the list.
|
211
213
|
|
@@ -237,7 +239,7 @@ class YOLODataset(BaseDataset):
|
|
237
239
|
)
|
238
240
|
return transforms
|
239
241
|
|
240
|
-
def close_mosaic(self, hyp:
|
242
|
+
def close_mosaic(self, hyp: dict) -> None:
|
241
243
|
"""
|
242
244
|
Disable mosaic, copy_paste, mixup and cutmix augmentations by setting their probabilities to 0.0.
|
243
245
|
|
@@ -250,7 +252,7 @@ class YOLODataset(BaseDataset):
|
|
250
252
|
hyp.cutmix = 0.0
|
251
253
|
self.transforms = self.build_transforms(hyp)
|
252
254
|
|
253
|
-
def update_labels_info(self, label:
|
255
|
+
def update_labels_info(self, label: dict) -> dict:
|
254
256
|
"""
|
255
257
|
Update label format for different tasks.
|
256
258
|
|
@@ -284,7 +286,7 @@ class YOLODataset(BaseDataset):
|
|
284
286
|
return label
|
285
287
|
|
286
288
|
@staticmethod
|
287
|
-
def collate_fn(batch:
|
289
|
+
def collate_fn(batch: list[dict]) -> dict:
|
288
290
|
"""
|
289
291
|
Collate data samples into batches.
|
290
292
|
|
@@ -331,7 +333,7 @@ class YOLOMultiModalDataset(YOLODataset):
|
|
331
333
|
>>> print(batch.keys()) # Should include 'texts'
|
332
334
|
"""
|
333
335
|
|
334
|
-
def __init__(self, *args, data:
|
336
|
+
def __init__(self, *args, data: dict | None = None, task: str = "detect", **kwargs):
|
335
337
|
"""
|
336
338
|
Initialize a YOLOMultiModalDataset.
|
337
339
|
|
@@ -343,7 +345,7 @@ class YOLOMultiModalDataset(YOLODataset):
|
|
343
345
|
"""
|
344
346
|
super().__init__(*args, data=data, task=task, **kwargs)
|
345
347
|
|
346
|
-
def update_labels_info(self, label:
|
348
|
+
def update_labels_info(self, label: dict) -> dict:
|
347
349
|
"""
|
348
350
|
Add text information for multi-modal model training.
|
349
351
|
|
@@ -360,7 +362,7 @@ class YOLOMultiModalDataset(YOLODataset):
|
|
360
362
|
|
361
363
|
return labels
|
362
364
|
|
363
|
-
def build_transforms(self, hyp:
|
365
|
+
def build_transforms(self, hyp: dict | None = None) -> Compose:
|
364
366
|
"""
|
365
367
|
Enhance data transformations with optional text augmentation for multi-modal training.
|
366
368
|
|
@@ -409,7 +411,7 @@ class YOLOMultiModalDataset(YOLODataset):
|
|
409
411
|
return category_freq
|
410
412
|
|
411
413
|
@staticmethod
|
412
|
-
def _get_neg_texts(category_freq:
|
414
|
+
def _get_neg_texts(category_freq: dict, threshold: int = 100) -> list[str]:
|
413
415
|
"""Get negative text samples based on frequency threshold."""
|
414
416
|
threshold = min(max(category_freq.values()), 100)
|
415
417
|
return [k for k, v in category_freq.items() if v >= threshold]
|
@@ -451,7 +453,7 @@ class GroundingDataset(YOLODataset):
|
|
451
453
|
self.max_samples = max_samples
|
452
454
|
super().__init__(*args, task=task, data={"channels": 3}, **kwargs)
|
453
455
|
|
454
|
-
def get_img_files(self, img_path: str) ->
|
456
|
+
def get_img_files(self, img_path: str) -> list:
|
455
457
|
"""
|
456
458
|
The image files would be read in `get_labels` function, return empty list here.
|
457
459
|
|
@@ -463,7 +465,7 @@ class GroundingDataset(YOLODataset):
|
|
463
465
|
"""
|
464
466
|
return []
|
465
467
|
|
466
|
-
def verify_labels(self, labels:
|
468
|
+
def verify_labels(self, labels: list[dict[str, Any]]) -> None:
|
467
469
|
"""
|
468
470
|
Verify the number of instances in the dataset matches expected counts.
|
469
471
|
|
@@ -498,7 +500,7 @@ class GroundingDataset(YOLODataset):
|
|
498
500
|
return
|
499
501
|
LOGGER.warning(f"Skipping instance count verification for unrecognized dataset '{self.json_file}'")
|
500
502
|
|
501
|
-
def cache_labels(self, path: Path = Path("./labels.cache")) ->
|
503
|
+
def cache_labels(self, path: Path = Path("./labels.cache")) -> dict[str, Any]:
|
502
504
|
"""
|
503
505
|
Load annotations from a JSON file, filter, and normalize bounding boxes for each image.
|
504
506
|
|
@@ -589,7 +591,7 @@ class GroundingDataset(YOLODataset):
|
|
589
591
|
save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION)
|
590
592
|
return x
|
591
593
|
|
592
|
-
def get_labels(self) ->
|
594
|
+
def get_labels(self) -> list[dict]:
|
593
595
|
"""
|
594
596
|
Load labels from cache or generate them from JSON file.
|
595
597
|
|
@@ -611,7 +613,7 @@ class GroundingDataset(YOLODataset):
|
|
611
613
|
LOGGER.info(f"Load {self.json_file} from cache file {cache_path}")
|
612
614
|
return labels
|
613
615
|
|
614
|
-
def build_transforms(self, hyp:
|
616
|
+
def build_transforms(self, hyp: dict | None = None) -> Compose:
|
615
617
|
"""
|
616
618
|
Configure augmentations for training with optional text loading.
|
617
619
|
|
@@ -652,7 +654,7 @@ class GroundingDataset(YOLODataset):
|
|
652
654
|
return category_freq
|
653
655
|
|
654
656
|
@staticmethod
|
655
|
-
def _get_neg_texts(category_freq:
|
657
|
+
def _get_neg_texts(category_freq: dict, threshold: int = 100) -> list[str]:
|
656
658
|
"""Get negative text samples based on frequency threshold."""
|
657
659
|
threshold = min(max(category_freq.values()), 100)
|
658
660
|
return [k for k, v in category_freq.items() if v >= threshold]
|
@@ -675,7 +677,7 @@ class YOLOConcatDataset(ConcatDataset):
|
|
675
677
|
"""
|
676
678
|
|
677
679
|
@staticmethod
|
678
|
-
def collate_fn(batch:
|
680
|
+
def collate_fn(batch: list[dict]) -> dict:
|
679
681
|
"""
|
680
682
|
Collate data samples into batches.
|
681
683
|
|
@@ -687,7 +689,7 @@ class YOLOConcatDataset(ConcatDataset):
|
|
687
689
|
"""
|
688
690
|
return YOLODataset.collate_fn(batch)
|
689
691
|
|
690
|
-
def close_mosaic(self, hyp:
|
692
|
+
def close_mosaic(self, hyp: dict) -> None:
|
691
693
|
"""
|
692
694
|
Set mosaic, copy_paste and mixup options to 0.0 and build transformations.
|
693
695
|
|
@@ -784,7 +786,7 @@ class ClassificationDataset:
|
|
784
786
|
else classify_transforms(size=args.imgsz)
|
785
787
|
)
|
786
788
|
|
787
|
-
def __getitem__(self, i: int) ->
|
789
|
+
def __getitem__(self, i: int) -> dict:
|
788
790
|
"""
|
789
791
|
Return subset of data and targets corresponding to given indices.
|
790
792
|
|
@@ -813,7 +815,7 @@ class ClassificationDataset:
|
|
813
815
|
"""Return the total number of samples in the dataset."""
|
814
816
|
return len(self.samples)
|
815
817
|
|
816
|
-
def verify_images(self) ->
|
818
|
+
def verify_images(self) -> list[tuple]:
|
817
819
|
"""
|
818
820
|
Verify all images in dataset.
|
819
821
|
|
ultralytics/data/loaders.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import glob
|
4
6
|
import math
|
5
7
|
import os
|
@@ -8,7 +10,7 @@ import urllib
|
|
8
10
|
from dataclasses import dataclass
|
9
11
|
from pathlib import Path
|
10
12
|
from threading import Thread
|
11
|
-
from typing import Any
|
13
|
+
from typing import Any
|
12
14
|
|
13
15
|
import cv2
|
14
16
|
import numpy as np
|
@@ -192,7 +194,7 @@ class LoadStreams:
|
|
192
194
|
self.count = -1
|
193
195
|
return self
|
194
196
|
|
195
|
-
def __next__(self) ->
|
197
|
+
def __next__(self) -> tuple[list[str], list[np.ndarray], list[str]]:
|
196
198
|
"""Return the next batch of frames from multiple video streams for processing."""
|
197
199
|
self.count += 1
|
198
200
|
|
@@ -294,7 +296,7 @@ class LoadScreenshots:
|
|
294
296
|
"""Yield the next screenshot image from the specified screen or region for processing."""
|
295
297
|
return self
|
296
298
|
|
297
|
-
def __next__(self) ->
|
299
|
+
def __next__(self) -> tuple[list[str], list[np.ndarray], list[str]]:
|
298
300
|
"""Capture and return the next screenshot as a numpy array using the mss library."""
|
299
301
|
im0 = np.asarray(self.sct.grab(self.monitor))[:, :, :3] # BGRA to BGR
|
300
302
|
im0 = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY)[..., None] if self.cv2_flag == cv2.IMREAD_GRAYSCALE else im0
|
@@ -344,7 +346,7 @@ class LoadImagesAndVideos:
|
|
344
346
|
- Can read from a text file containing paths to images and videos.
|
345
347
|
"""
|
346
348
|
|
347
|
-
def __init__(self, path:
|
349
|
+
def __init__(self, path: str | Path | list, batch: int = 1, vid_stride: int = 1, channels: int = 3):
|
348
350
|
"""
|
349
351
|
Initialize dataloader for images and videos, supporting various input formats.
|
350
352
|
|
@@ -403,7 +405,7 @@ class LoadImagesAndVideos:
|
|
403
405
|
self.count = 0
|
404
406
|
return self
|
405
407
|
|
406
|
-
def __next__(self) ->
|
408
|
+
def __next__(self) -> tuple[list[str], list[np.ndarray], list[str]]:
|
407
409
|
"""Return the next batch of images or video frames with their paths and metadata."""
|
408
410
|
paths, imgs, info = [], [], []
|
409
411
|
while len(imgs) < self.bs:
|
@@ -514,7 +516,7 @@ class LoadPilAndNumpy:
|
|
514
516
|
Loaded 2 images
|
515
517
|
"""
|
516
518
|
|
517
|
-
def __init__(self, im0:
|
519
|
+
def __init__(self, im0: Image.Image | np.ndarray | list, channels: int = 3):
|
518
520
|
"""
|
519
521
|
Initialize a loader for PIL and Numpy images, converting inputs to a standardized format.
|
520
522
|
|
@@ -532,7 +534,7 @@ class LoadPilAndNumpy:
|
|
532
534
|
self.bs = len(self.im0)
|
533
535
|
|
534
536
|
@staticmethod
|
535
|
-
def _single_check(im:
|
537
|
+
def _single_check(im: Image.Image | np.ndarray, flag: str = "RGB") -> np.ndarray:
|
536
538
|
"""Validate and format an image to numpy array, ensuring RGB order and contiguous memory."""
|
537
539
|
assert isinstance(im, (Image.Image, np.ndarray)), f"Expected PIL/np.ndarray image type, but got {type(im)}"
|
538
540
|
if isinstance(im, Image.Image):
|
@@ -548,7 +550,7 @@ class LoadPilAndNumpy:
|
|
548
550
|
"""Return the length of the 'im0' attribute, representing the number of loaded images."""
|
549
551
|
return len(self.im0)
|
550
552
|
|
551
|
-
def __next__(self) ->
|
553
|
+
def __next__(self) -> tuple[list[str], list[np.ndarray], list[str]]:
|
552
554
|
"""Return the next batch of images, paths, and metadata for processing."""
|
553
555
|
if self.count == 1: # loop only once as it's batch inference
|
554
556
|
raise StopIteration
|
@@ -624,7 +626,7 @@ class LoadTensor:
|
|
624
626
|
self.count = 0
|
625
627
|
return self
|
626
628
|
|
627
|
-
def __next__(self) ->
|
629
|
+
def __next__(self) -> tuple[list[str], torch.Tensor, list[str]]:
|
628
630
|
"""Yield the next batch of tensor images and metadata for processing."""
|
629
631
|
if self.count == 1:
|
630
632
|
raise StopIteration
|
@@ -636,7 +638,7 @@ class LoadTensor:
|
|
636
638
|
return self.bs
|
637
639
|
|
638
640
|
|
639
|
-
def autocast_list(source:
|
641
|
+
def autocast_list(source: list[Any]) -> list[Image.Image | np.ndarray]:
|
640
642
|
"""Merge a list of sources into a list of numpy arrays or PIL images for Ultralytics prediction."""
|
641
643
|
files = []
|
642
644
|
for im in source:
|
@@ -653,7 +655,7 @@ def autocast_list(source: List[Any]) -> List[Union[Image.Image, np.ndarray]]:
|
|
653
655
|
return files
|
654
656
|
|
655
657
|
|
656
|
-
def get_best_youtube_url(url: str, method: str = "pytube") ->
|
658
|
+
def get_best_youtube_url(url: str, method: str = "pytube") -> str | None:
|
657
659
|
"""
|
658
660
|
Retrieve the URL of the best quality MP4 video stream from a given YouTube video.
|
659
661
|
|
ultralytics/data/split.py
CHANGED
@@ -1,15 +1,16 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import random
|
4
6
|
import shutil
|
5
7
|
from pathlib import Path
|
6
|
-
from typing import Tuple, Union
|
7
8
|
|
8
9
|
from ultralytics.data.utils import IMG_FORMATS, img2label_paths
|
9
10
|
from ultralytics.utils import DATASETS_DIR, LOGGER, TQDM
|
10
11
|
|
11
12
|
|
12
|
-
def split_classify_dataset(source_dir:
|
13
|
+
def split_classify_dataset(source_dir: str | Path, train_ratio: float = 0.8) -> Path:
|
13
14
|
"""
|
14
15
|
Split classification dataset into train and val directories in a new directory.
|
15
16
|
|
@@ -97,7 +98,7 @@ def split_classify_dataset(source_dir: Union[str, Path], train_ratio: float = 0.
|
|
97
98
|
|
98
99
|
def autosplit(
|
99
100
|
path: Path = DATASETS_DIR / "coco8/images",
|
100
|
-
weights:
|
101
|
+
weights: tuple[float, float, float] = (0.9, 0.1, 0.0),
|
101
102
|
annotated_only: bool = False,
|
102
103
|
) -> None:
|
103
104
|
"""
|
ultralytics/data/split_dota.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import itertools
|
4
6
|
from glob import glob
|
5
7
|
from math import ceil
|
6
8
|
from pathlib import Path
|
7
|
-
from typing import Any
|
9
|
+
from typing import Any
|
8
10
|
|
9
11
|
import cv2
|
10
12
|
import numpy as np
|
@@ -62,7 +64,7 @@ def bbox_iof(polygon1: np.ndarray, bbox2: np.ndarray, eps: float = 1e-6) -> np.n
|
|
62
64
|
return outputs
|
63
65
|
|
64
66
|
|
65
|
-
def load_yolo_dota(data_root: str, split: str = "train") ->
|
67
|
+
def load_yolo_dota(data_root: str, split: str = "train") -> list[dict[str, Any]]:
|
66
68
|
"""
|
67
69
|
Load DOTA dataset annotations and image information.
|
68
70
|
|
@@ -99,9 +101,9 @@ def load_yolo_dota(data_root: str, split: str = "train") -> List[Dict[str, Any]]
|
|
99
101
|
|
100
102
|
|
101
103
|
def get_windows(
|
102
|
-
im_size:
|
103
|
-
crop_sizes:
|
104
|
-
gaps:
|
104
|
+
im_size: tuple[int, int],
|
105
|
+
crop_sizes: tuple[int, ...] = (1024,),
|
106
|
+
gaps: tuple[int, ...] = (200,),
|
105
107
|
im_rate_thr: float = 0.6,
|
106
108
|
eps: float = 0.01,
|
107
109
|
) -> np.ndarray:
|
@@ -151,7 +153,7 @@ def get_windows(
|
|
151
153
|
return windows[im_rates > im_rate_thr]
|
152
154
|
|
153
155
|
|
154
|
-
def get_window_obj(anno:
|
156
|
+
def get_window_obj(anno: dict[str, Any], windows: np.ndarray, iof_thr: float = 0.7) -> list[np.ndarray]:
|
155
157
|
"""Get objects for each window based on IoF threshold."""
|
156
158
|
h, w = anno["ori_size"]
|
157
159
|
label = anno["label"]
|
@@ -166,9 +168,9 @@ def get_window_obj(anno: Dict[str, Any], windows: np.ndarray, iof_thr: float = 0
|
|
166
168
|
|
167
169
|
|
168
170
|
def crop_and_save(
|
169
|
-
anno:
|
171
|
+
anno: dict[str, Any],
|
170
172
|
windows: np.ndarray,
|
171
|
-
window_objs:
|
173
|
+
window_objs: list[np.ndarray],
|
172
174
|
im_dir: str,
|
173
175
|
lb_dir: str,
|
174
176
|
allow_background_images: bool = True,
|
@@ -221,8 +223,8 @@ def split_images_and_labels(
|
|
221
223
|
data_root: str,
|
222
224
|
save_dir: str,
|
223
225
|
split: str = "train",
|
224
|
-
crop_sizes:
|
225
|
-
gaps:
|
226
|
+
crop_sizes: tuple[int, ...] = (1024,),
|
227
|
+
gaps: tuple[int, ...] = (200,),
|
226
228
|
) -> None:
|
227
229
|
"""
|
228
230
|
Split both images and labels for a given dataset split.
|
@@ -261,7 +263,7 @@ def split_images_and_labels(
|
|
261
263
|
|
262
264
|
|
263
265
|
def split_trainval(
|
264
|
-
data_root: str, save_dir: str, crop_size: int = 1024, gap: int = 200, rates:
|
266
|
+
data_root: str, save_dir: str, crop_size: int = 1024, gap: int = 200, rates: tuple[float, ...] = (1.0,)
|
265
267
|
) -> None:
|
266
268
|
"""
|
267
269
|
Split train and val sets of DOTA dataset with multiple scaling rates.
|
@@ -300,7 +302,7 @@ def split_trainval(
|
|
300
302
|
|
301
303
|
|
302
304
|
def split_test(
|
303
|
-
data_root: str, save_dir: str, crop_size: int = 1024, gap: int = 200, rates:
|
305
|
+
data_root: str, save_dir: str, crop_size: int = 1024, gap: int = 200, rates: tuple[float, ...] = (1.0,)
|
304
306
|
) -> None:
|
305
307
|
"""
|
306
308
|
Split test set of DOTA dataset, labels are not included within this set.
|
ultralytics/data/utils.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import json
|
4
6
|
import os
|
5
7
|
import random
|
@@ -9,7 +11,7 @@ import zipfile
|
|
9
11
|
from multiprocessing.pool import ThreadPool
|
10
12
|
from pathlib import Path
|
11
13
|
from tarfile import is_tarfile
|
12
|
-
from typing import Any
|
14
|
+
from typing import Any
|
13
15
|
|
14
16
|
import cv2
|
15
17
|
import numpy as np
|
@@ -39,14 +41,14 @@ VID_FORMATS = {"asf", "avi", "gif", "m4v", "mkv", "mov", "mp4", "mpeg", "mpg", "
|
|
39
41
|
FORMATS_HELP_MSG = f"Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}"
|
40
42
|
|
41
43
|
|
42
|
-
def img2label_paths(img_paths:
|
44
|
+
def img2label_paths(img_paths: list[str]) -> list[str]:
|
43
45
|
"""Convert image paths to label paths by replacing 'images' with 'labels' and extension with '.txt'."""
|
44
46
|
sa, sb = f"{os.sep}images{os.sep}", f"{os.sep}labels{os.sep}" # /images/, /labels/ substrings
|
45
47
|
return [sb.join(x.rsplit(sa, 1)).rsplit(".", 1)[0] + ".txt" for x in img_paths]
|
46
48
|
|
47
49
|
|
48
50
|
def check_file_speeds(
|
49
|
-
files:
|
51
|
+
files: list[str], threshold_ms: float = 10, threshold_mb: float = 50, max_files: int = 5, prefix: str = ""
|
50
52
|
):
|
51
53
|
"""
|
52
54
|
Check dataset file access speed and provide performance feedback.
|
@@ -66,7 +68,7 @@ def check_file_speeds(
|
|
66
68
|
>>> image_files = list(Path("dataset/images").glob("*.jpg"))
|
67
69
|
>>> check_file_speeds(image_files, threshold_ms=15)
|
68
70
|
"""
|
69
|
-
if not files
|
71
|
+
if not files:
|
70
72
|
LOGGER.warning(f"{prefix}Image speed checks: No files to check")
|
71
73
|
return
|
72
74
|
|
@@ -123,7 +125,7 @@ def check_file_speeds(
|
|
123
125
|
)
|
124
126
|
|
125
127
|
|
126
|
-
def get_hash(paths:
|
128
|
+
def get_hash(paths: list[str]) -> str:
|
127
129
|
"""Return a single hash value of a list of paths (files or dirs)."""
|
128
130
|
size = 0
|
129
131
|
for p in paths:
|
@@ -136,7 +138,7 @@ def get_hash(paths: List[str]) -> str:
|
|
136
138
|
return h.hexdigest() # return hash
|
137
139
|
|
138
140
|
|
139
|
-
def exif_size(img: Image.Image) ->
|
141
|
+
def exif_size(img: Image.Image) -> tuple[int, int]:
|
140
142
|
"""Return exif-corrected PIL size."""
|
141
143
|
s = img.size # (width, height)
|
142
144
|
if img.format == "JPEG": # only support JPEG images
|
@@ -150,7 +152,7 @@ def exif_size(img: Image.Image) -> Tuple[int, int]:
|
|
150
152
|
return s
|
151
153
|
|
152
154
|
|
153
|
-
def verify_image(args:
|
155
|
+
def verify_image(args: tuple) -> tuple:
|
154
156
|
"""Verify one image."""
|
155
157
|
(im_file, cls), prefix = args
|
156
158
|
# Number (found, corrupt), message
|
@@ -175,7 +177,7 @@ def verify_image(args: Tuple) -> Tuple:
|
|
175
177
|
return (im_file, cls), nf, nc, msg
|
176
178
|
|
177
179
|
|
178
|
-
def verify_image_label(args:
|
180
|
+
def verify_image_label(args: tuple) -> list:
|
179
181
|
"""Verify one image-label pair."""
|
180
182
|
im_file, lb_file, prefix, keypoint, num_cls, nkpt, ndim, single_cls = args
|
181
183
|
# Number (missing, found, empty, corrupt), message, segments, keypoints
|
@@ -247,7 +249,7 @@ def verify_image_label(args: Tuple) -> List:
|
|
247
249
|
return [None, None, None, None, None, nm, nf, ne, nc, msg]
|
248
250
|
|
249
251
|
|
250
|
-
def visualize_image_annotations(image_path: str, txt_path: str, label_map:
|
252
|
+
def visualize_image_annotations(image_path: str, txt_path: str, label_map: dict[int, str]):
|
251
253
|
"""
|
252
254
|
Visualize YOLO annotations (bounding boxes and class labels) on an image.
|
253
255
|
|
@@ -292,7 +294,7 @@ def visualize_image_annotations(image_path: str, txt_path: str, label_map: Dict[
|
|
292
294
|
|
293
295
|
|
294
296
|
def polygon2mask(
|
295
|
-
imgsz:
|
297
|
+
imgsz: tuple[int, int], polygons: list[np.ndarray], color: int = 1, downsample_ratio: int = 1
|
296
298
|
) -> np.ndarray:
|
297
299
|
"""
|
298
300
|
Convert a list of polygons to a binary mask of the specified image size.
|
@@ -317,7 +319,7 @@ def polygon2mask(
|
|
317
319
|
|
318
320
|
|
319
321
|
def polygons2masks(
|
320
|
-
imgsz:
|
322
|
+
imgsz: tuple[int, int], polygons: list[np.ndarray], color: int, downsample_ratio: int = 1
|
321
323
|
) -> np.ndarray:
|
322
324
|
"""
|
323
325
|
Convert a list of polygons to a set of binary masks of the specified image size.
|
@@ -336,8 +338,8 @@ def polygons2masks(
|
|
336
338
|
|
337
339
|
|
338
340
|
def polygons2masks_overlap(
|
339
|
-
imgsz:
|
340
|
-
) ->
|
341
|
+
imgsz: tuple[int, int], segments: list[np.ndarray], downsample_ratio: int = 1
|
342
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
341
343
|
"""Return a (640, 640) overlap mask."""
|
342
344
|
masks = np.zeros(
|
343
345
|
(imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio),
|
@@ -345,8 +347,13 @@ def polygons2masks_overlap(
|
|
345
347
|
)
|
346
348
|
areas = []
|
347
349
|
ms = []
|
348
|
-
for
|
349
|
-
mask = polygon2mask(
|
350
|
+
for segment in segments:
|
351
|
+
mask = polygon2mask(
|
352
|
+
imgsz,
|
353
|
+
[segment.reshape(-1)],
|
354
|
+
downsample_ratio=downsample_ratio,
|
355
|
+
color=1,
|
356
|
+
)
|
350
357
|
ms.append(mask.astype(masks.dtype))
|
351
358
|
areas.append(mask.sum())
|
352
359
|
areas = np.asarray(areas)
|
@@ -380,7 +387,7 @@ def find_dataset_yaml(path: Path) -> Path:
|
|
380
387
|
return files[0]
|
381
388
|
|
382
389
|
|
383
|
-
def check_det_dataset(dataset: str, autodownload: bool = True) ->
|
390
|
+
def check_det_dataset(dataset: str, autodownload: bool = True) -> dict[str, Any]:
|
384
391
|
"""
|
385
392
|
Download, verify, and/or unzip a dataset if not found locally.
|
386
393
|
|
@@ -475,7 +482,7 @@ def check_det_dataset(dataset: str, autodownload: bool = True) -> Dict[str, Any]
|
|
475
482
|
return data # dictionary
|
476
483
|
|
477
484
|
|
478
|
-
def check_cls_dataset(dataset:
|
485
|
+
def check_cls_dataset(dataset: str | Path, split: str = "") -> dict[str, Any]:
|
479
486
|
"""
|
480
487
|
Check a classification dataset such as Imagenet.
|
481
488
|
|
@@ -517,8 +524,7 @@ def check_cls_dataset(dataset: Union[str, Path], split: str = "") -> Dict[str, A
|
|
517
524
|
train_set = data_dir / "train"
|
518
525
|
if not train_set.is_dir():
|
519
526
|
LOGGER.warning(f"Dataset 'split=train' not found at {train_set}")
|
520
|
-
image_files
|
521
|
-
if image_files:
|
527
|
+
if image_files := list(data_dir.rglob("*.jpg")) + list(data_dir.rglob("*.png")):
|
522
528
|
from ultralytics.data.split import split_classify_dataset
|
523
529
|
|
524
530
|
LOGGER.info(f"Found {len(image_files)} images in subdirectories. Attempting to split...")
|
@@ -632,7 +638,7 @@ class HUBDatasetStats:
|
|
632
638
|
self.data = data
|
633
639
|
|
634
640
|
@staticmethod
|
635
|
-
def _unzip(path: Path) ->
|
641
|
+
def _unzip(path: Path) -> tuple[bool, str, Path]:
|
636
642
|
"""Unzip data.zip."""
|
637
643
|
if not str(path).endswith(".zip"): # path is data.yaml
|
638
644
|
return False, None, path
|
@@ -646,7 +652,7 @@ class HUBDatasetStats:
|
|
646
652
|
"""Save a compressed image for HUB previews."""
|
647
653
|
compress_one_image(f, self.im_dir / Path(f).name) # save to dataset-hub
|
648
654
|
|
649
|
-
def get_json(self, save: bool = False, verbose: bool = False) ->
|
655
|
+
def get_json(self, save: bool = False, verbose: bool = False) -> dict:
|
650
656
|
"""Return dataset JSON for Ultralytics HUB."""
|
651
657
|
|
652
658
|
def _round(labels):
|
@@ -773,7 +779,7 @@ def compress_one_image(f: str, f_new: str = None, max_dim: int = 1920, quality:
|
|
773
779
|
cv2.imwrite(str(f_new or f), im)
|
774
780
|
|
775
781
|
|
776
|
-
def load_dataset_cache_file(path: Path) ->
|
782
|
+
def load_dataset_cache_file(path: Path) -> dict:
|
777
783
|
"""Load an Ultralytics *.cache dictionary from path."""
|
778
784
|
import gc
|
779
785
|
|
@@ -783,7 +789,7 @@ def load_dataset_cache_file(path: Path) -> Dict:
|
|
783
789
|
return cache
|
784
790
|
|
785
791
|
|
786
|
-
def save_dataset_cache_file(prefix: str, path: Path, x:
|
792
|
+
def save_dataset_cache_file(prefix: str, path: Path, x: dict, version: str):
|
787
793
|
"""Save an Ultralytics dataset *.cache dictionary x to path."""
|
788
794
|
x["version"] = version # add cache version
|
789
795
|
if is_dir_writeable(path.parent):
|
ultralytics/engine/exporter.py
CHANGED
@@ -349,7 +349,7 @@ class Exporter:
|
|
349
349
|
assert not getattr(model, "end2end", False), "TFLite INT8 export not supported for end2end models."
|
350
350
|
if self.args.nms:
|
351
351
|
assert not isinstance(model, ClassificationModel), "'nms=True' is not valid for classification models."
|
352
|
-
assert not
|
352
|
+
assert not tflite or not ARM64 or not LINUX, "TFLite export with NMS unsupported on ARM64 Linux"
|
353
353
|
if getattr(model, "end2end", False):
|
354
354
|
LOGGER.warning("'nms=True' is not available for end2end models. Forcing 'nms=False'.")
|
355
355
|
self.args.nms = False
|
@@ -436,7 +436,7 @@ class Exporter:
|
|
436
436
|
|
437
437
|
y = None
|
438
438
|
for _ in range(2): # dry runs
|
439
|
-
y = NMSModel(model, self.args)(im) if self.args.nms and not
|
439
|
+
y = NMSModel(model, self.args)(im) if self.args.nms and not coreml and not imx else model(im)
|
440
440
|
if self.args.half and onnx and self.device.type != "cpu":
|
441
441
|
im, model = im.half(), model.half() # to FP16
|
442
442
|
|