dgenerate-ultralytics-headless 8.3.159__py3-none-any.whl → 8.3.161__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.159.dist-info → dgenerate_ultralytics_headless-8.3.161.dist-info}/METADATA +1 -1
- {dgenerate_ultralytics_headless-8.3.159.dist-info → dgenerate_ultralytics_headless-8.3.161.dist-info}/RECORD +62 -62
- tests/test_python.py +2 -1
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +0 -2
- ultralytics/cfg/datasets/Argoverse.yaml +1 -1
- ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
- ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
- ultralytics/cfg/datasets/GlobalWheat2020.yaml +1 -1
- ultralytics/cfg/datasets/HomeObjects-3K.yaml +1 -1
- ultralytics/cfg/datasets/ImageNet.yaml +1 -1
- ultralytics/cfg/datasets/Objects365.yaml +1 -1
- ultralytics/cfg/datasets/SKU-110K.yaml +1 -1
- ultralytics/cfg/datasets/VOC.yaml +1 -1
- ultralytics/cfg/datasets/VisDrone.yaml +6 -3
- ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
- ultralytics/cfg/datasets/brain-tumor.yaml +1 -1
- ultralytics/cfg/datasets/carparts-seg.yaml +1 -1
- ultralytics/cfg/datasets/coco-pose.yaml +1 -1
- ultralytics/cfg/datasets/coco.yaml +1 -1
- ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
- ultralytics/cfg/datasets/coco128.yaml +1 -1
- ultralytics/cfg/datasets/coco8-grayscale.yaml +1 -1
- ultralytics/cfg/datasets/coco8-multispectral.yaml +1 -1
- ultralytics/cfg/datasets/coco8-pose.yaml +1 -1
- ultralytics/cfg/datasets/coco8-seg.yaml +1 -1
- ultralytics/cfg/datasets/coco8.yaml +1 -1
- ultralytics/cfg/datasets/crack-seg.yaml +1 -1
- ultralytics/cfg/datasets/dog-pose.yaml +1 -1
- ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
- ultralytics/cfg/datasets/dota8.yaml +1 -1
- ultralytics/cfg/datasets/hand-keypoints.yaml +1 -1
- ultralytics/cfg/datasets/lvis.yaml +1 -1
- ultralytics/cfg/datasets/medical-pills.yaml +1 -1
- ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
- ultralytics/cfg/datasets/package-seg.yaml +1 -1
- ultralytics/cfg/datasets/signature.yaml +1 -1
- ultralytics/cfg/datasets/tiger-pose.yaml +1 -1
- ultralytics/cfg/datasets/xView.yaml +1 -1
- ultralytics/data/augment.py +8 -8
- ultralytics/data/converter.py +3 -5
- ultralytics/data/dataset.py +1 -1
- ultralytics/data/split.py +1 -1
- ultralytics/engine/exporter.py +11 -2
- ultralytics/engine/model.py +2 -0
- ultralytics/engine/results.py +1 -6
- ultralytics/models/yolo/model.py +25 -24
- ultralytics/models/yolo/world/train.py +1 -1
- ultralytics/models/yolo/world/train_world.py +6 -6
- ultralytics/models/yolo/yoloe/train.py +1 -1
- ultralytics/nn/autobackend.py +7 -1
- ultralytics/solutions/heatmap.py +1 -1
- ultralytics/solutions/object_counter.py +9 -9
- ultralytics/solutions/similarity_search.py +11 -12
- ultralytics/solutions/solutions.py +55 -56
- ultralytics/utils/__init__.py +1 -4
- ultralytics/utils/instance.py +2 -0
- ultralytics/utils/metrics.py +24 -36
- {dgenerate_ultralytics_headless-8.3.159.dist-info → dgenerate_ultralytics_headless-8.3.161.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.159.dist-info → dgenerate_ultralytics_headless-8.3.161.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.159.dist-info → dgenerate_ultralytics_headless-8.3.161.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.159.dist-info → dgenerate_ultralytics_headless-8.3.161.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,7 @@
|
|
9
9
|
# └── tiger-pose ← downloads here (75.3 MB)
|
10
10
|
|
11
11
|
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
12
|
-
path:
|
12
|
+
path: tiger-pose # dataset root dir
|
13
13
|
train: train # train images (relative to 'path') 210 images
|
14
14
|
val: val # val images (relative to 'path') 53 images
|
15
15
|
|
@@ -10,7 +10,7 @@
|
|
10
10
|
# └── xView ← downloads here (20.7 GB)
|
11
11
|
|
12
12
|
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
13
|
-
path:
|
13
|
+
path: xView # dataset root dir
|
14
14
|
train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
|
15
15
|
val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images
|
16
16
|
|
ultralytics/data/augment.py
CHANGED
@@ -251,8 +251,7 @@ class Compose:
|
|
251
251
|
>>> multiple_transforms = compose[0:2] # Returns a Compose object with RandomFlip and RandomPerspective
|
252
252
|
"""
|
253
253
|
assert isinstance(index, (int, list)), f"The indices should be either list or int type but got {type(index)}"
|
254
|
-
|
255
|
-
return Compose([self.transforms[i] for i in index])
|
254
|
+
return Compose([self.transforms[i] for i in index]) if isinstance(index, list) else self.transforms[index]
|
256
255
|
|
257
256
|
def __setitem__(self, index: Union[list, int], value: Union[list, int]) -> None:
|
258
257
|
"""
|
@@ -1560,14 +1559,15 @@ class RandomFlip:
|
|
1560
1559
|
h = 1 if instances.normalized else h
|
1561
1560
|
w = 1 if instances.normalized else w
|
1562
1561
|
|
1563
|
-
#
|
1562
|
+
# WARNING: two separate if and calls to random.random() intentional for reproducibility with older versions
|
1564
1563
|
if self.direction == "vertical" and random.random() < self.p:
|
1565
1564
|
img = np.flipud(img)
|
1566
1565
|
instances.flipud(h)
|
1566
|
+
if self.flip_idx is not None and instances.keypoints is not None:
|
1567
|
+
instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :])
|
1567
1568
|
if self.direction == "horizontal" and random.random() < self.p:
|
1568
1569
|
img = np.fliplr(img)
|
1569
1570
|
instances.fliplr(w)
|
1570
|
-
# For keypoints
|
1571
1571
|
if self.flip_idx is not None and instances.keypoints is not None:
|
1572
1572
|
instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :])
|
1573
1573
|
labels["img"] = np.ascontiguousarray(img)
|
@@ -2533,9 +2533,9 @@ def v8_transforms(dataset, imgsz, hyp, stretch=False):
|
|
2533
2533
|
flip_idx = dataset.data.get("flip_idx", []) # for keypoints augmentation
|
2534
2534
|
if dataset.use_keypoints:
|
2535
2535
|
kpt_shape = dataset.data.get("kpt_shape", None)
|
2536
|
-
if len(flip_idx) == 0 and hyp.fliplr > 0.0:
|
2537
|
-
hyp.fliplr = 0.0
|
2538
|
-
LOGGER.warning("No 'flip_idx' array defined in data.yaml,
|
2536
|
+
if len(flip_idx) == 0 and (hyp.fliplr > 0.0 or hyp.flipud > 0.0):
|
2537
|
+
hyp.fliplr = hyp.flipud = 0.0 # both fliplr and flipud require flip_idx
|
2538
|
+
LOGGER.warning("No 'flip_idx' array defined in data.yaml, disabling 'fliplr' and 'flipud' augmentations.")
|
2539
2539
|
elif flip_idx and (len(flip_idx) != kpt_shape[0]):
|
2540
2540
|
raise ValueError(f"data.yaml flip_idx={flip_idx} length must be equal to kpt_shape[0]={kpt_shape[0]}")
|
2541
2541
|
|
@@ -2546,7 +2546,7 @@ def v8_transforms(dataset, imgsz, hyp, stretch=False):
|
|
2546
2546
|
CutMix(dataset, pre_transform=pre_transform, p=hyp.cutmix),
|
2547
2547
|
Albumentations(p=1.0),
|
2548
2548
|
RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v),
|
2549
|
-
RandomFlip(direction="vertical", p=hyp.flipud),
|
2549
|
+
RandomFlip(direction="vertical", p=hyp.flipud, flip_idx=flip_idx),
|
2550
2550
|
RandomFlip(direction="horizontal", p=hyp.fliplr, flip_idx=flip_idx),
|
2551
2551
|
]
|
2552
2552
|
) # transforms
|
ultralytics/data/converter.py
CHANGED
@@ -248,12 +248,10 @@ def convert_coco(
|
|
248
248
|
>>> from ultralytics.data.converter import convert_coco
|
249
249
|
|
250
250
|
Convert COCO annotations to YOLO format
|
251
|
-
>>> convert_coco("
|
251
|
+
>>> convert_coco("coco/annotations/", use_segments=True, use_keypoints=False, cls91to80=False)
|
252
252
|
|
253
253
|
Convert LVIS annotations to YOLO format
|
254
|
-
>>> convert_coco(
|
255
|
-
... "../datasets/lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True
|
256
|
-
... )
|
254
|
+
>>> convert_coco("lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True)
|
257
255
|
"""
|
258
256
|
# Create dataset directory
|
259
257
|
save_dir = increment_path(save_dir) # increment if save directory already exists
|
@@ -724,7 +722,7 @@ def convert_to_multispectral(path: Union[str, Path], n_channels: int = 10, repla
|
|
724
722
|
>>> convert_to_multispectral("path/to/image.jpg", n_channels=10)
|
725
723
|
|
726
724
|
Convert a dataset
|
727
|
-
>>> convert_to_multispectral("
|
725
|
+
>>> convert_to_multispectral("coco8", n_channels=10)
|
728
726
|
"""
|
729
727
|
from scipy.interpolate import interp1d
|
730
728
|
|
ultralytics/data/dataset.py
CHANGED
@@ -482,7 +482,7 @@ class GroundingDataset(YOLODataset):
|
|
482
482
|
a warning is logged and verification is skipped.
|
483
483
|
"""
|
484
484
|
expected_counts = {
|
485
|
-
"final_mixed_train_no_coco_segm":
|
485
|
+
"final_mixed_train_no_coco_segm": 3662412,
|
486
486
|
"final_mixed_train_no_coco": 3681235,
|
487
487
|
"final_flickr_separateGT_train_segm": 638214,
|
488
488
|
"final_flickr_separateGT_train": 640704,
|
ultralytics/data/split.py
CHANGED
ultralytics/engine/exporter.py
CHANGED
@@ -706,7 +706,16 @@ class Exporter:
|
|
706
706
|
def export_paddle(self, prefix=colorstr("PaddlePaddle:")):
|
707
707
|
"""Export YOLO model to PaddlePaddle format."""
|
708
708
|
assert not IS_JETSON, "Jetson Paddle exports not supported yet"
|
709
|
-
check_requirements(
|
709
|
+
check_requirements(
|
710
|
+
(
|
711
|
+
"paddlepaddle-gpu"
|
712
|
+
if torch.cuda.is_available()
|
713
|
+
else "paddlepaddle==3.0.0" # pin 3.0.0 for ARM64
|
714
|
+
if ARM64
|
715
|
+
else "paddlepaddle>=3.0.0",
|
716
|
+
"x2paddle",
|
717
|
+
)
|
718
|
+
)
|
710
719
|
import x2paddle # noqa
|
711
720
|
from x2paddle.convert import pytorch2paddle # noqa
|
712
721
|
|
@@ -1495,7 +1504,7 @@ class NMSModel(torch.nn.Module):
|
|
1495
1504
|
scores, classes = scores.max(dim=-1)
|
1496
1505
|
self.args.max_det = min(pred.shape[1], self.args.max_det) # in case num_anchors < max_det
|
1497
1506
|
# (N, max_det, 4 coords + 1 class score + 1 class label + extra_shape).
|
1498
|
-
out = torch.zeros(
|
1507
|
+
out = torch.zeros(pred.shape[0], self.args.max_det, boxes.shape[-1] + 2 + extra_shape, **kwargs)
|
1499
1508
|
for i in range(bs):
|
1500
1509
|
box, cls, score, extra = boxes[i], classes[i], scores[i], extras[i]
|
1501
1510
|
mask = score > self.args.conf
|
ultralytics/engine/model.py
CHANGED
@@ -777,6 +777,8 @@ class Model(torch.nn.Module):
|
|
777
777
|
|
778
778
|
checks.check_pip_update_available()
|
779
779
|
|
780
|
+
if isinstance(kwargs.get("pretrained", None), (str, Path)):
|
781
|
+
self.load(kwargs["pretrained"]) # load pretrained weights if provided
|
780
782
|
overrides = YAML.load(checks.check_yaml(kwargs["cfg"])) if kwargs.get("cfg") else self.overrides
|
781
783
|
custom = {
|
782
784
|
# NOTE: handle the case when 'cfg' includes 'data'.
|
ultralytics/engine/results.py
CHANGED
@@ -16,7 +16,6 @@ import torch
|
|
16
16
|
from ultralytics.data.augment import LetterBox
|
17
17
|
from ultralytics.utils import LOGGER, DataExportMixin, SimpleClass, ops
|
18
18
|
from ultralytics.utils.plotting import Annotator, colors, save_one_box
|
19
|
-
from ultralytics.utils.torch_utils import smart_inference_mode
|
20
19
|
|
21
20
|
|
22
21
|
class BaseTensor(SimpleClass):
|
@@ -801,7 +800,7 @@ class Results(SimpleClass, DataExportMixin):
|
|
801
800
|
decimals (int): Number of decimal places to round the output values to.
|
802
801
|
|
803
802
|
Returns:
|
804
|
-
(List[Dict]): A list of dictionaries, each containing summarized information for a single detection
|
803
|
+
(List[Dict[str, Any]]): A list of dictionaries, each containing summarized information for a single detection
|
805
804
|
or classification result. The structure of each dictionary varies based on the task type
|
806
805
|
(classification or detection) and available information (boxes, masks, keypoints).
|
807
806
|
|
@@ -1204,7 +1203,6 @@ class Keypoints(BaseTensor):
|
|
1204
1203
|
>>> keypoints_cpu = keypoints.cpu() # Move keypoints to CPU
|
1205
1204
|
"""
|
1206
1205
|
|
1207
|
-
@smart_inference_mode() # avoid keypoints < conf in-place error
|
1208
1206
|
def __init__(self, keypoints: Union[torch.Tensor, np.ndarray], orig_shape: Tuple[int, int]) -> None:
|
1209
1207
|
"""
|
1210
1208
|
Initialize the Keypoints object with detection keypoints and original image dimensions.
|
@@ -1225,9 +1223,6 @@ class Keypoints(BaseTensor):
|
|
1225
1223
|
"""
|
1226
1224
|
if keypoints.ndim == 2:
|
1227
1225
|
keypoints = keypoints[None, :]
|
1228
|
-
if keypoints.shape[2] == 3: # x, y, conf
|
1229
|
-
mask = keypoints[..., 2] < 0.5 # points with conf < 0.5 (not visible)
|
1230
|
-
keypoints[..., :2][mask] = 0
|
1231
1226
|
super().__init__(keypoints, orig_shape)
|
1232
1227
|
self.has_visible = self.data.shape[-1] == 3
|
1233
1228
|
|
ultralytics/models/yolo/model.py
CHANGED
@@ -406,18 +406,18 @@ class YOLOE(Model):
|
|
406
406
|
f"Expected equal number of bounding boxes and classes, but got {len(visual_prompts['bboxes'])} and "
|
407
407
|
f"{len(visual_prompts['cls'])} respectively"
|
408
408
|
)
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
409
|
+
if not isinstance(self.predictor, yolo.yoloe.YOLOEVPDetectPredictor):
|
410
|
+
self.predictor = (predictor or yolo.yoloe.YOLOEVPDetectPredictor)(
|
411
|
+
overrides={
|
412
|
+
"task": self.model.task,
|
413
|
+
"mode": "predict",
|
414
|
+
"save": False,
|
415
|
+
"verbose": refer_image is None,
|
416
|
+
"batch": 1,
|
417
|
+
},
|
418
|
+
_callbacks=self.callbacks,
|
419
|
+
)
|
419
420
|
|
420
|
-
if len(visual_prompts):
|
421
421
|
num_cls = (
|
422
422
|
max(len(set(c)) for c in visual_prompts["cls"])
|
423
423
|
if isinstance(source, list) and refer_image is None # means multiple images
|
@@ -426,18 +426,19 @@ class YOLOE(Model):
|
|
426
426
|
self.model.model[-1].nc = num_cls
|
427
427
|
self.model.names = [f"object{i}" for i in range(num_cls)]
|
428
428
|
self.predictor.set_prompts(visual_prompts.copy())
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
429
|
+
self.predictor.setup_model(model=self.model)
|
430
|
+
|
431
|
+
if refer_image is None and source is not None:
|
432
|
+
dataset = load_inference_source(source)
|
433
|
+
if dataset.mode in {"video", "stream"}:
|
434
|
+
# NOTE: set the first frame as refer image for videos/streams inference
|
435
|
+
refer_image = next(iter(dataset))[1][0]
|
436
|
+
if refer_image is not None:
|
437
|
+
vpe = self.predictor.get_vpe(refer_image)
|
438
|
+
self.model.set_classes(self.model.names, vpe)
|
439
|
+
self.task = "segment" if isinstance(self.predictor, yolo.segment.SegmentationPredictor) else "detect"
|
440
|
+
self.predictor = None # reset predictor
|
441
|
+
elif isinstance(self.predictor, yolo.yoloe.YOLOEVPDetectPredictor):
|
442
|
+
self.predictor = None # reset predictor if no visual prompts
|
442
443
|
|
443
444
|
return super().predict(source, stream, **kwargs)
|
@@ -158,7 +158,7 @@ class WorldTrainer(DetectionTrainer):
|
|
158
158
|
return txt_map
|
159
159
|
LOGGER.info(f"Caching text embeddings to '{cache_path}'")
|
160
160
|
assert self.model is not None
|
161
|
-
txt_feats = self.model.get_text_pe(texts, batch, cache_clip_model=False)
|
161
|
+
txt_feats = de_parallel(self.model).get_text_pe(texts, batch, cache_clip_model=False)
|
162
162
|
txt_map = dict(zip(texts, txt_feats.squeeze(0)))
|
163
163
|
torch.save(txt_map, cache_path)
|
164
164
|
return txt_map
|
@@ -35,12 +35,12 @@ class WorldTrainerFromScratch(WorldTrainer):
|
|
35
35
|
... yolo_data=["Objects365.yaml"],
|
36
36
|
... grounding_data=[
|
37
37
|
... dict(
|
38
|
-
... img_path="
|
39
|
-
... json_file="
|
38
|
+
... img_path="flickr30k/images",
|
39
|
+
... json_file="flickr30k/final_flickr_separateGT_train.json",
|
40
40
|
... ),
|
41
41
|
... dict(
|
42
|
-
... img_path="
|
43
|
-
... json_file="
|
42
|
+
... img_path="GQA/images",
|
43
|
+
... json_file="GQA/final_mixed_train_no_coco.json",
|
44
44
|
... ),
|
45
45
|
... ],
|
46
46
|
... ),
|
@@ -70,8 +70,8 @@ class WorldTrainerFromScratch(WorldTrainer):
|
|
70
70
|
... yolo_data=["Objects365.yaml"],
|
71
71
|
... grounding_data=[
|
72
72
|
... dict(
|
73
|
-
... img_path="
|
74
|
-
... json_file="
|
73
|
+
... img_path="flickr30k/images",
|
74
|
+
... json_file="flickr30k/final_flickr_separateGT_train.json",
|
75
75
|
... ),
|
76
76
|
... ],
|
77
77
|
... ),
|
@@ -222,7 +222,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
|
|
222
222
|
return txt_map
|
223
223
|
LOGGER.info(f"Caching text embeddings to '{cache_path}'")
|
224
224
|
assert self.model is not None
|
225
|
-
txt_feats = self.model.get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
|
225
|
+
txt_feats = de_parallel(self.model).get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
|
226
226
|
txt_map = dict(zip(texts, txt_feats.squeeze(0)))
|
227
227
|
torch.save(txt_map, cache_path)
|
228
228
|
return txt_map
|
ultralytics/nn/autobackend.py
CHANGED
@@ -487,7 +487,13 @@ class AutoBackend(nn.Module):
|
|
487
487
|
# PaddlePaddle
|
488
488
|
elif paddle:
|
489
489
|
LOGGER.info(f"Loading {w} for PaddlePaddle inference...")
|
490
|
-
check_requirements(
|
490
|
+
check_requirements(
|
491
|
+
"paddlepaddle-gpu"
|
492
|
+
if torch.cuda.is_available()
|
493
|
+
else "paddlepaddle==3.0.0" # pin 3.0.0 for ARM64
|
494
|
+
if ARM64
|
495
|
+
else "paddlepaddle>=3.0.0"
|
496
|
+
)
|
491
497
|
import paddle.inference as pdi # noqa
|
492
498
|
|
493
499
|
w = Path(w)
|
ultralytics/solutions/heatmap.py
CHANGED
@@ -43,7 +43,7 @@ class ObjectCounter(BaseSolution):
|
|
43
43
|
self.in_count = 0 # Counter for objects moving inward
|
44
44
|
self.out_count = 0 # Counter for objects moving outward
|
45
45
|
self.counted_ids = [] # List of IDs of objects that have been counted
|
46
|
-
self.
|
46
|
+
self.classwise_count = defaultdict(lambda: {"IN": 0, "OUT": 0}) # Dictionary for counts, categorized by class
|
47
47
|
self.region_initialized = False # Flag indicating whether the region has been initialized
|
48
48
|
|
49
49
|
self.show_in = self.CFG["show_in"]
|
@@ -85,17 +85,17 @@ class ObjectCounter(BaseSolution):
|
|
85
85
|
# Vertical region: Compare x-coordinates to determine direction
|
86
86
|
if current_centroid[0] > prev_position[0]: # Moving right
|
87
87
|
self.in_count += 1
|
88
|
-
self.
|
88
|
+
self.classwise_count[self.names[cls]]["IN"] += 1
|
89
89
|
else: # Moving left
|
90
90
|
self.out_count += 1
|
91
|
-
self.
|
91
|
+
self.classwise_count[self.names[cls]]["OUT"] += 1
|
92
92
|
# Horizontal region: Compare y-coordinates to determine direction
|
93
93
|
elif current_centroid[1] > prev_position[1]: # Moving downward
|
94
94
|
self.in_count += 1
|
95
|
-
self.
|
95
|
+
self.classwise_count[self.names[cls]]["IN"] += 1
|
96
96
|
else: # Moving upward
|
97
97
|
self.out_count += 1
|
98
|
-
self.
|
98
|
+
self.classwise_count[self.names[cls]]["OUT"] += 1
|
99
99
|
self.counted_ids.append(track_id)
|
100
100
|
|
101
101
|
elif len(self.region) > 2: # Polygonal region
|
@@ -111,10 +111,10 @@ class ObjectCounter(BaseSolution):
|
|
111
111
|
and current_centroid[1] > prev_position[1]
|
112
112
|
): # Moving right or downward
|
113
113
|
self.in_count += 1
|
114
|
-
self.
|
114
|
+
self.classwise_count[self.names[cls]]["IN"] += 1
|
115
115
|
else: # Moving left or upward
|
116
116
|
self.out_count += 1
|
117
|
-
self.
|
117
|
+
self.classwise_count[self.names[cls]]["OUT"] += 1
|
118
118
|
self.counted_ids.append(track_id)
|
119
119
|
|
120
120
|
def display_counts(self, plot_im) -> None:
|
@@ -132,7 +132,7 @@ class ObjectCounter(BaseSolution):
|
|
132
132
|
labels_dict = {
|
133
133
|
str.capitalize(key): f"{'IN ' + str(value['IN']) if self.show_in else ''} "
|
134
134
|
f"{'OUT ' + str(value['OUT']) if self.show_out else ''}".strip()
|
135
|
-
for key, value in self.
|
135
|
+
for key, value in self.classwise_count.items()
|
136
136
|
if value["IN"] != 0 or value["OUT"] != 0 and (self.show_in or self.show_out)
|
137
137
|
}
|
138
138
|
if labels_dict:
|
@@ -190,6 +190,6 @@ class ObjectCounter(BaseSolution):
|
|
190
190
|
plot_im=plot_im,
|
191
191
|
in_count=self.in_count,
|
192
192
|
out_count=self.out_count,
|
193
|
-
classwise_count=dict(self.
|
193
|
+
classwise_count=dict(self.classwise_count),
|
194
194
|
total_tracks=len(self.track_ids),
|
195
195
|
)
|
@@ -9,14 +9,14 @@ from PIL import Image
|
|
9
9
|
|
10
10
|
from ultralytics.data.utils import IMG_FORMATS
|
11
11
|
from ultralytics.nn.text_model import build_text_model
|
12
|
-
from ultralytics.
|
12
|
+
from ultralytics.utils import LOGGER
|
13
13
|
from ultralytics.utils.checks import check_requirements
|
14
14
|
from ultralytics.utils.torch_utils import select_device
|
15
15
|
|
16
16
|
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # Avoid OpenMP conflict on some systems
|
17
17
|
|
18
18
|
|
19
|
-
class VisualAISearch
|
19
|
+
class VisualAISearch:
|
20
20
|
"""
|
21
21
|
A semantic image search system that leverages OpenCLIP for generating high-quality image and text embeddings and
|
22
22
|
FAISS for fast similarity-based retrieval.
|
@@ -48,19 +48,18 @@ class VisualAISearch(BaseSolution):
|
|
48
48
|
|
49
49
|
def __init__(self, **kwargs: Any) -> None:
|
50
50
|
"""Initialize the VisualAISearch class with FAISS index and CLIP model."""
|
51
|
-
super().__init__(**kwargs)
|
52
51
|
check_requirements("faiss-cpu")
|
53
52
|
|
54
53
|
self.faiss = __import__("faiss")
|
55
54
|
self.faiss_index = "faiss.index"
|
56
55
|
self.data_path_npy = "paths.npy"
|
57
|
-
self.data_dir = Path(
|
58
|
-
self.device = select_device(
|
56
|
+
self.data_dir = Path(kwargs.get("data", "images"))
|
57
|
+
self.device = select_device(kwargs.get("device", "cpu"))
|
59
58
|
|
60
59
|
if not self.data_dir.exists():
|
61
60
|
from ultralytics.utils import ASSETS_URL
|
62
61
|
|
63
|
-
|
62
|
+
LOGGER.warning(f"{self.data_dir} not found. Downloading images.zip from {ASSETS_URL}/images.zip")
|
64
63
|
from ultralytics.utils.downloads import safe_download
|
65
64
|
|
66
65
|
safe_download(url=f"{ASSETS_URL}/images.zip", unzip=True, retry=3)
|
@@ -91,13 +90,13 @@ class VisualAISearch(BaseSolution):
|
|
91
90
|
"""
|
92
91
|
# Check if the FAISS index and corresponding image paths already exist
|
93
92
|
if Path(self.faiss_index).exists() and Path(self.data_path_npy).exists():
|
94
|
-
|
93
|
+
LOGGER.info("Loading existing FAISS index...")
|
95
94
|
self.index = self.faiss.read_index(self.faiss_index) # Load the FAISS index from disk
|
96
95
|
self.image_paths = np.load(self.data_path_npy) # Load the saved image path list
|
97
96
|
return # Exit the function as the index is successfully loaded
|
98
97
|
|
99
98
|
# If the index doesn't exist, start building it from scratch
|
100
|
-
|
99
|
+
LOGGER.info("Building FAISS index from images...")
|
101
100
|
vectors = [] # List to store feature vectors of images
|
102
101
|
|
103
102
|
# Iterate over all image files in the data directory
|
@@ -110,7 +109,7 @@ class VisualAISearch(BaseSolution):
|
|
110
109
|
vectors.append(self.extract_image_feature(file))
|
111
110
|
self.image_paths.append(file.name) # Store the corresponding image name
|
112
111
|
except Exception as e:
|
113
|
-
|
112
|
+
LOGGER.warning(f"Skipping {file.name}: {e}")
|
114
113
|
|
115
114
|
# If no vectors were successfully created, raise an error
|
116
115
|
if not vectors:
|
@@ -124,7 +123,7 @@ class VisualAISearch(BaseSolution):
|
|
124
123
|
self.faiss.write_index(self.index, self.faiss_index) # Save the newly built FAISS index to disk
|
125
124
|
np.save(self.data_path_npy, np.array(self.image_paths)) # Save the list of image paths to disk
|
126
125
|
|
127
|
-
|
126
|
+
LOGGER.info(f"Indexed {len(self.image_paths)} images.")
|
128
127
|
|
129
128
|
def search(self, query: str, k: int = 30, similarity_thresh: float = 0.1) -> List[str]:
|
130
129
|
"""
|
@@ -152,9 +151,9 @@ class VisualAISearch(BaseSolution):
|
|
152
151
|
]
|
153
152
|
results.sort(key=lambda x: x[1], reverse=True)
|
154
153
|
|
155
|
-
|
154
|
+
LOGGER.info("\nRanked Results:")
|
156
155
|
for name, score in results:
|
157
|
-
|
156
|
+
LOGGER.info(f" - {name} | Similarity: {score:.4f}")
|
158
157
|
|
159
158
|
return [r[0] for r in results]
|
160
159
|
|
@@ -81,60 +81,59 @@ class BaseSolution:
|
|
81
81
|
self.CFG = vars(SolutionConfig().update(**kwargs))
|
82
82
|
self.LOGGER = LOGGER # Store logger object to be used in multiple solution classes
|
83
83
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
self.
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
if
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
self.
|
135
|
-
|
136
|
-
|
137
|
-
)
|
84
|
+
check_requirements("shapely>=2.0.0")
|
85
|
+
from shapely.geometry import LineString, Point, Polygon
|
86
|
+
from shapely.prepared import prep
|
87
|
+
|
88
|
+
self.LineString = LineString
|
89
|
+
self.Polygon = Polygon
|
90
|
+
self.Point = Point
|
91
|
+
self.prep = prep
|
92
|
+
self.annotator = None # Initialize annotator
|
93
|
+
self.tracks = None
|
94
|
+
self.track_data = None
|
95
|
+
self.boxes = []
|
96
|
+
self.clss = []
|
97
|
+
self.track_ids = []
|
98
|
+
self.track_line = None
|
99
|
+
self.masks = None
|
100
|
+
self.r_s = None
|
101
|
+
self.frame_no = -1 # Only for logging
|
102
|
+
|
103
|
+
self.LOGGER.info(f"Ultralytics Solutions: ✅ {self.CFG}")
|
104
|
+
self.region = self.CFG["region"] # Store region data for other classes usage
|
105
|
+
self.line_width = self.CFG["line_width"]
|
106
|
+
|
107
|
+
# Load Model and store additional information (classes, show_conf, show_label)
|
108
|
+
if self.CFG["model"] is None:
|
109
|
+
self.CFG["model"] = "yolo11n.pt"
|
110
|
+
self.model = YOLO(self.CFG["model"])
|
111
|
+
self.names = self.model.names
|
112
|
+
self.classes = self.CFG["classes"]
|
113
|
+
self.show_conf = self.CFG["show_conf"]
|
114
|
+
self.show_labels = self.CFG["show_labels"]
|
115
|
+
self.device = self.CFG["device"]
|
116
|
+
|
117
|
+
self.track_add_args = { # Tracker additional arguments for advance configuration
|
118
|
+
k: self.CFG[k] for k in ["iou", "conf", "device", "max_det", "half", "tracker"]
|
119
|
+
} # verbose must be passed to track method; setting it False in YOLO still logs the track information.
|
120
|
+
|
121
|
+
if is_cli and self.CFG["source"] is None:
|
122
|
+
d_s = "solutions_ci_demo.mp4" if "-pose" not in self.CFG["model"] else "solution_ci_pose_demo.mp4"
|
123
|
+
self.LOGGER.warning(f"source not provided. using default source {ASSETS_URL}/{d_s}")
|
124
|
+
from ultralytics.utils.downloads import safe_download
|
125
|
+
|
126
|
+
safe_download(f"{ASSETS_URL}/{d_s}") # download source from ultralytics assets
|
127
|
+
self.CFG["source"] = d_s # set default source
|
128
|
+
|
129
|
+
# Initialize environment and region setup
|
130
|
+
self.env_check = check_imshow(warn=True)
|
131
|
+
self.track_history = defaultdict(list)
|
132
|
+
|
133
|
+
self.profilers = (
|
134
|
+
ops.Profile(device=self.device), # track
|
135
|
+
ops.Profile(device=self.device), # solution
|
136
|
+
)
|
138
137
|
|
139
138
|
def adjust_box_label(self, cls: int, conf: float, track_id: Optional[int] = None) -> Optional[str]:
|
140
139
|
"""
|
@@ -808,10 +807,10 @@ class SolutionResults:
|
|
808
807
|
filled_slots (int): The number of filled slots in a monitored area.
|
809
808
|
email_sent (bool): A flag indicating whether an email notification was sent.
|
810
809
|
total_tracks (int): The total number of tracked objects.
|
811
|
-
region_counts (Dict): The count of objects within a specific region.
|
810
|
+
region_counts (Dict[str, int]): The count of objects within a specific region.
|
812
811
|
speed_dict (Dict[str, float]): A dictionary containing speed information for tracked objects.
|
813
812
|
total_crop_objects (int): Total number of cropped objects using ObjectCropper class.
|
814
|
-
speed (Dict): Performance timing information for tracking and solution processing.
|
813
|
+
speed (Dict[str, float]): Performance timing information for tracking and solution processing.
|
815
814
|
"""
|
816
815
|
|
817
816
|
def __init__(self, **kwargs):
|
ultralytics/utils/__init__.py
CHANGED
@@ -255,11 +255,8 @@ class DataExportMixin:
|
|
255
255
|
Notes:
|
256
256
|
Requires `lxml` package to be installed.
|
257
257
|
"""
|
258
|
-
from ultralytics.utils.checks import check_requirements
|
259
|
-
|
260
|
-
check_requirements("lxml")
|
261
258
|
df = self.to_df(normalize=normalize, decimals=decimals)
|
262
|
-
return '<?xml version="1.0" encoding="utf-8"?>\n<root></root>' if df.empty else df.to_xml()
|
259
|
+
return '<?xml version="1.0" encoding="utf-8"?>\n<root></root>' if df.empty else df.to_xml(parser="etree")
|
263
260
|
|
264
261
|
def to_html(self, normalize=False, decimals=5, index=False):
|
265
262
|
"""
|
ultralytics/utils/instance.py
CHANGED
@@ -406,6 +406,8 @@ class Instances:
|
|
406
406
|
| (self.keypoints[..., 1] < 0)
|
407
407
|
| (self.keypoints[..., 1] > h)
|
408
408
|
] = 0.0
|
409
|
+
self.keypoints[..., 0] = self.keypoints[..., 0].clip(0, w)
|
410
|
+
self.keypoints[..., 1] = self.keypoints[..., 1].clip(0, h)
|
409
411
|
|
410
412
|
def remove_zero_area_boxes(self):
|
411
413
|
"""
|