dgenerate-ultralytics-headless 8.3.159__py3-none-any.whl → 8.3.161__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {dgenerate_ultralytics_headless-8.3.159.dist-info → dgenerate_ultralytics_headless-8.3.161.dist-info}/METADATA +1 -1
  2. {dgenerate_ultralytics_headless-8.3.159.dist-info → dgenerate_ultralytics_headless-8.3.161.dist-info}/RECORD +62 -62
  3. tests/test_python.py +2 -1
  4. ultralytics/__init__.py +1 -1
  5. ultralytics/cfg/__init__.py +0 -2
  6. ultralytics/cfg/datasets/Argoverse.yaml +1 -1
  7. ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
  8. ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
  9. ultralytics/cfg/datasets/GlobalWheat2020.yaml +1 -1
  10. ultralytics/cfg/datasets/HomeObjects-3K.yaml +1 -1
  11. ultralytics/cfg/datasets/ImageNet.yaml +1 -1
  12. ultralytics/cfg/datasets/Objects365.yaml +1 -1
  13. ultralytics/cfg/datasets/SKU-110K.yaml +1 -1
  14. ultralytics/cfg/datasets/VOC.yaml +1 -1
  15. ultralytics/cfg/datasets/VisDrone.yaml +6 -3
  16. ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
  17. ultralytics/cfg/datasets/brain-tumor.yaml +1 -1
  18. ultralytics/cfg/datasets/carparts-seg.yaml +1 -1
  19. ultralytics/cfg/datasets/coco-pose.yaml +1 -1
  20. ultralytics/cfg/datasets/coco.yaml +1 -1
  21. ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
  22. ultralytics/cfg/datasets/coco128.yaml +1 -1
  23. ultralytics/cfg/datasets/coco8-grayscale.yaml +1 -1
  24. ultralytics/cfg/datasets/coco8-multispectral.yaml +1 -1
  25. ultralytics/cfg/datasets/coco8-pose.yaml +1 -1
  26. ultralytics/cfg/datasets/coco8-seg.yaml +1 -1
  27. ultralytics/cfg/datasets/coco8.yaml +1 -1
  28. ultralytics/cfg/datasets/crack-seg.yaml +1 -1
  29. ultralytics/cfg/datasets/dog-pose.yaml +1 -1
  30. ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
  31. ultralytics/cfg/datasets/dota8.yaml +1 -1
  32. ultralytics/cfg/datasets/hand-keypoints.yaml +1 -1
  33. ultralytics/cfg/datasets/lvis.yaml +1 -1
  34. ultralytics/cfg/datasets/medical-pills.yaml +1 -1
  35. ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
  36. ultralytics/cfg/datasets/package-seg.yaml +1 -1
  37. ultralytics/cfg/datasets/signature.yaml +1 -1
  38. ultralytics/cfg/datasets/tiger-pose.yaml +1 -1
  39. ultralytics/cfg/datasets/xView.yaml +1 -1
  40. ultralytics/data/augment.py +8 -8
  41. ultralytics/data/converter.py +3 -5
  42. ultralytics/data/dataset.py +1 -1
  43. ultralytics/data/split.py +1 -1
  44. ultralytics/engine/exporter.py +11 -2
  45. ultralytics/engine/model.py +2 -0
  46. ultralytics/engine/results.py +1 -6
  47. ultralytics/models/yolo/model.py +25 -24
  48. ultralytics/models/yolo/world/train.py +1 -1
  49. ultralytics/models/yolo/world/train_world.py +6 -6
  50. ultralytics/models/yolo/yoloe/train.py +1 -1
  51. ultralytics/nn/autobackend.py +7 -1
  52. ultralytics/solutions/heatmap.py +1 -1
  53. ultralytics/solutions/object_counter.py +9 -9
  54. ultralytics/solutions/similarity_search.py +11 -12
  55. ultralytics/solutions/solutions.py +55 -56
  56. ultralytics/utils/__init__.py +1 -4
  57. ultralytics/utils/instance.py +2 -0
  58. ultralytics/utils/metrics.py +24 -36
  59. {dgenerate_ultralytics_headless-8.3.159.dist-info → dgenerate_ultralytics_headless-8.3.161.dist-info}/WHEEL +0 -0
  60. {dgenerate_ultralytics_headless-8.3.159.dist-info → dgenerate_ultralytics_headless-8.3.161.dist-info}/entry_points.txt +0 -0
  61. {dgenerate_ultralytics_headless-8.3.159.dist-info → dgenerate_ultralytics_headless-8.3.161.dist-info}/licenses/LICENSE +0 -0
  62. {dgenerate_ultralytics_headless-8.3.159.dist-info → dgenerate_ultralytics_headless-8.3.161.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,7 @@
9
9
  # └── tiger-pose ← downloads here (75.3 MB)
10
10
 
11
11
  # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
12
- path: ../datasets/tiger-pose # dataset root dir
12
+ path: tiger-pose # dataset root dir
13
13
  train: train # train images (relative to 'path') 210 images
14
14
  val: val # val images (relative to 'path') 53 images
15
15
 
@@ -10,7 +10,7 @@
10
10
  # └── xView ← downloads here (20.7 GB)
11
11
 
12
12
  # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
13
- path: ../datasets/xView # dataset root dir
13
+ path: xView # dataset root dir
14
14
  train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
15
15
  val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images
16
16
 
@@ -251,8 +251,7 @@ class Compose:
251
251
  >>> multiple_transforms = compose[0:2] # Returns a Compose object with RandomFlip and RandomPerspective
252
252
  """
253
253
  assert isinstance(index, (int, list)), f"The indices should be either list or int type but got {type(index)}"
254
- index = [index] if isinstance(index, int) else index
255
- return Compose([self.transforms[i] for i in index])
254
+ return Compose([self.transforms[i] for i in index]) if isinstance(index, list) else self.transforms[index]
256
255
 
257
256
  def __setitem__(self, index: Union[list, int], value: Union[list, int]) -> None:
258
257
  """
@@ -1560,14 +1559,15 @@ class RandomFlip:
1560
1559
  h = 1 if instances.normalized else h
1561
1560
  w = 1 if instances.normalized else w
1562
1561
 
1563
- # Flip up-down
1562
+ # WARNING: two separate if and calls to random.random() intentional for reproducibility with older versions
1564
1563
  if self.direction == "vertical" and random.random() < self.p:
1565
1564
  img = np.flipud(img)
1566
1565
  instances.flipud(h)
1566
+ if self.flip_idx is not None and instances.keypoints is not None:
1567
+ instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :])
1567
1568
  if self.direction == "horizontal" and random.random() < self.p:
1568
1569
  img = np.fliplr(img)
1569
1570
  instances.fliplr(w)
1570
- # For keypoints
1571
1571
  if self.flip_idx is not None and instances.keypoints is not None:
1572
1572
  instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :])
1573
1573
  labels["img"] = np.ascontiguousarray(img)
@@ -2533,9 +2533,9 @@ def v8_transforms(dataset, imgsz, hyp, stretch=False):
2533
2533
  flip_idx = dataset.data.get("flip_idx", []) # for keypoints augmentation
2534
2534
  if dataset.use_keypoints:
2535
2535
  kpt_shape = dataset.data.get("kpt_shape", None)
2536
- if len(flip_idx) == 0 and hyp.fliplr > 0.0:
2537
- hyp.fliplr = 0.0
2538
- LOGGER.warning("No 'flip_idx' array defined in data.yaml, setting augmentation 'fliplr=0.0'")
2536
+ if len(flip_idx) == 0 and (hyp.fliplr > 0.0 or hyp.flipud > 0.0):
2537
+ hyp.fliplr = hyp.flipud = 0.0 # both fliplr and flipud require flip_idx
2538
+ LOGGER.warning("No 'flip_idx' array defined in data.yaml, disabling 'fliplr' and 'flipud' augmentations.")
2539
2539
  elif flip_idx and (len(flip_idx) != kpt_shape[0]):
2540
2540
  raise ValueError(f"data.yaml flip_idx={flip_idx} length must be equal to kpt_shape[0]={kpt_shape[0]}")
2541
2541
 
@@ -2546,7 +2546,7 @@ def v8_transforms(dataset, imgsz, hyp, stretch=False):
2546
2546
  CutMix(dataset, pre_transform=pre_transform, p=hyp.cutmix),
2547
2547
  Albumentations(p=1.0),
2548
2548
  RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v),
2549
- RandomFlip(direction="vertical", p=hyp.flipud),
2549
+ RandomFlip(direction="vertical", p=hyp.flipud, flip_idx=flip_idx),
2550
2550
  RandomFlip(direction="horizontal", p=hyp.fliplr, flip_idx=flip_idx),
2551
2551
  ]
2552
2552
  ) # transforms
@@ -248,12 +248,10 @@ def convert_coco(
248
248
  >>> from ultralytics.data.converter import convert_coco
249
249
 
250
250
  Convert COCO annotations to YOLO format
251
- >>> convert_coco("../datasets/coco/annotations/", use_segments=True, use_keypoints=False, cls91to80=False)
251
+ >>> convert_coco("coco/annotations/", use_segments=True, use_keypoints=False, cls91to80=False)
252
252
 
253
253
  Convert LVIS annotations to YOLO format
254
- >>> convert_coco(
255
- ... "../datasets/lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True
256
- ... )
254
+ >>> convert_coco("lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True)
257
255
  """
258
256
  # Create dataset directory
259
257
  save_dir = increment_path(save_dir) # increment if save directory already exists
@@ -724,7 +722,7 @@ def convert_to_multispectral(path: Union[str, Path], n_channels: int = 10, repla
724
722
  >>> convert_to_multispectral("path/to/image.jpg", n_channels=10)
725
723
 
726
724
  Convert a dataset
727
- >>> convert_to_multispectral("../datasets/coco8", n_channels=10)
725
+ >>> convert_to_multispectral("coco8", n_channels=10)
728
726
  """
729
727
  from scipy.interpolate import interp1d
730
728
 
@@ -482,7 +482,7 @@ class GroundingDataset(YOLODataset):
482
482
  a warning is logged and verification is skipped.
483
483
  """
484
484
  expected_counts = {
485
- "final_mixed_train_no_coco_segm": 3662344,
485
+ "final_mixed_train_no_coco_segm": 3662412,
486
486
  "final_mixed_train_no_coco": 3681235,
487
487
  "final_flickr_separateGT_train_segm": 638214,
488
488
  "final_flickr_separateGT_train": 640704,
ultralytics/data/split.py CHANGED
@@ -135,4 +135,4 @@ def autosplit(
135
135
 
136
136
 
137
137
  if __name__ == "__main__":
138
- split_classify_dataset("../datasets/caltech101")
138
+ split_classify_dataset("caltech101")
@@ -706,7 +706,16 @@ class Exporter:
706
706
  def export_paddle(self, prefix=colorstr("PaddlePaddle:")):
707
707
  """Export YOLO model to PaddlePaddle format."""
708
708
  assert not IS_JETSON, "Jetson Paddle exports not supported yet"
709
- check_requirements(("paddlepaddle-gpu" if torch.cuda.is_available() else "paddlepaddle>=3.0.0", "x2paddle"))
709
+ check_requirements(
710
+ (
711
+ "paddlepaddle-gpu"
712
+ if torch.cuda.is_available()
713
+ else "paddlepaddle==3.0.0" # pin 3.0.0 for ARM64
714
+ if ARM64
715
+ else "paddlepaddle>=3.0.0",
716
+ "x2paddle",
717
+ )
718
+ )
710
719
  import x2paddle # noqa
711
720
  from x2paddle.convert import pytorch2paddle # noqa
712
721
 
@@ -1495,7 +1504,7 @@ class NMSModel(torch.nn.Module):
1495
1504
  scores, classes = scores.max(dim=-1)
1496
1505
  self.args.max_det = min(pred.shape[1], self.args.max_det) # in case num_anchors < max_det
1497
1506
  # (N, max_det, 4 coords + 1 class score + 1 class label + extra_shape).
1498
- out = torch.zeros(bs, self.args.max_det, boxes.shape[-1] + 2 + extra_shape, **kwargs)
1507
+ out = torch.zeros(pred.shape[0], self.args.max_det, boxes.shape[-1] + 2 + extra_shape, **kwargs)
1499
1508
  for i in range(bs):
1500
1509
  box, cls, score, extra = boxes[i], classes[i], scores[i], extras[i]
1501
1510
  mask = score > self.args.conf
@@ -777,6 +777,8 @@ class Model(torch.nn.Module):
777
777
 
778
778
  checks.check_pip_update_available()
779
779
 
780
+ if isinstance(kwargs.get("pretrained", None), (str, Path)):
781
+ self.load(kwargs["pretrained"]) # load pretrained weights if provided
780
782
  overrides = YAML.load(checks.check_yaml(kwargs["cfg"])) if kwargs.get("cfg") else self.overrides
781
783
  custom = {
782
784
  # NOTE: handle the case when 'cfg' includes 'data'.
@@ -16,7 +16,6 @@ import torch
16
16
  from ultralytics.data.augment import LetterBox
17
17
  from ultralytics.utils import LOGGER, DataExportMixin, SimpleClass, ops
18
18
  from ultralytics.utils.plotting import Annotator, colors, save_one_box
19
- from ultralytics.utils.torch_utils import smart_inference_mode
20
19
 
21
20
 
22
21
  class BaseTensor(SimpleClass):
@@ -801,7 +800,7 @@ class Results(SimpleClass, DataExportMixin):
801
800
  decimals (int): Number of decimal places to round the output values to.
802
801
 
803
802
  Returns:
804
- (List[Dict]): A list of dictionaries, each containing summarized information for a single detection
803
+ (List[Dict[str, Any]]): A list of dictionaries, each containing summarized information for a single detection
805
804
  or classification result. The structure of each dictionary varies based on the task type
806
805
  (classification or detection) and available information (boxes, masks, keypoints).
807
806
 
@@ -1204,7 +1203,6 @@ class Keypoints(BaseTensor):
1204
1203
  >>> keypoints_cpu = keypoints.cpu() # Move keypoints to CPU
1205
1204
  """
1206
1205
 
1207
- @smart_inference_mode() # avoid keypoints < conf in-place error
1208
1206
  def __init__(self, keypoints: Union[torch.Tensor, np.ndarray], orig_shape: Tuple[int, int]) -> None:
1209
1207
  """
1210
1208
  Initialize the Keypoints object with detection keypoints and original image dimensions.
@@ -1225,9 +1223,6 @@ class Keypoints(BaseTensor):
1225
1223
  """
1226
1224
  if keypoints.ndim == 2:
1227
1225
  keypoints = keypoints[None, :]
1228
- if keypoints.shape[2] == 3: # x, y, conf
1229
- mask = keypoints[..., 2] < 0.5 # points with conf < 0.5 (not visible)
1230
- keypoints[..., :2][mask] = 0
1231
1226
  super().__init__(keypoints, orig_shape)
1232
1227
  self.has_visible = self.data.shape[-1] == 3
1233
1228
 
@@ -406,18 +406,18 @@ class YOLOE(Model):
406
406
  f"Expected equal number of bounding boxes and classes, but got {len(visual_prompts['bboxes'])} and "
407
407
  f"{len(visual_prompts['cls'])} respectively"
408
408
  )
409
- self.predictor = (predictor or self._smart_load("predictor"))(
410
- overrides={
411
- "task": self.model.task,
412
- "mode": "predict",
413
- "save": False,
414
- "verbose": refer_image is None,
415
- "batch": 1,
416
- },
417
- _callbacks=self.callbacks,
418
- )
409
+ if not isinstance(self.predictor, yolo.yoloe.YOLOEVPDetectPredictor):
410
+ self.predictor = (predictor or yolo.yoloe.YOLOEVPDetectPredictor)(
411
+ overrides={
412
+ "task": self.model.task,
413
+ "mode": "predict",
414
+ "save": False,
415
+ "verbose": refer_image is None,
416
+ "batch": 1,
417
+ },
418
+ _callbacks=self.callbacks,
419
+ )
419
420
 
420
- if len(visual_prompts):
421
421
  num_cls = (
422
422
  max(len(set(c)) for c in visual_prompts["cls"])
423
423
  if isinstance(source, list) and refer_image is None # means multiple images
@@ -426,18 +426,19 @@ class YOLOE(Model):
426
426
  self.model.model[-1].nc = num_cls
427
427
  self.model.names = [f"object{i}" for i in range(num_cls)]
428
428
  self.predictor.set_prompts(visual_prompts.copy())
429
-
430
- self.predictor.setup_model(model=self.model)
431
-
432
- if refer_image is None and source is not None:
433
- dataset = load_inference_source(source)
434
- if dataset.mode in {"video", "stream"}:
435
- # NOTE: set the first frame as refer image for videos/streams inference
436
- refer_image = next(iter(dataset))[1][0]
437
- if refer_image is not None and len(visual_prompts):
438
- vpe = self.predictor.get_vpe(refer_image)
439
- self.model.set_classes(self.model.names, vpe)
440
- self.task = "segment" if isinstance(self.predictor, yolo.segment.SegmentationPredictor) else "detect"
441
- self.predictor = None # reset predictor
429
+ self.predictor.setup_model(model=self.model)
430
+
431
+ if refer_image is None and source is not None:
432
+ dataset = load_inference_source(source)
433
+ if dataset.mode in {"video", "stream"}:
434
+ # NOTE: set the first frame as refer image for videos/streams inference
435
+ refer_image = next(iter(dataset))[1][0]
436
+ if refer_image is not None:
437
+ vpe = self.predictor.get_vpe(refer_image)
438
+ self.model.set_classes(self.model.names, vpe)
439
+ self.task = "segment" if isinstance(self.predictor, yolo.segment.SegmentationPredictor) else "detect"
440
+ self.predictor = None # reset predictor
441
+ elif isinstance(self.predictor, yolo.yoloe.YOLOEVPDetectPredictor):
442
+ self.predictor = None # reset predictor if no visual prompts
442
443
 
443
444
  return super().predict(source, stream, **kwargs)
@@ -158,7 +158,7 @@ class WorldTrainer(DetectionTrainer):
158
158
  return txt_map
159
159
  LOGGER.info(f"Caching text embeddings to '{cache_path}'")
160
160
  assert self.model is not None
161
- txt_feats = self.model.get_text_pe(texts, batch, cache_clip_model=False)
161
+ txt_feats = de_parallel(self.model).get_text_pe(texts, batch, cache_clip_model=False)
162
162
  txt_map = dict(zip(texts, txt_feats.squeeze(0)))
163
163
  torch.save(txt_map, cache_path)
164
164
  return txt_map
@@ -35,12 +35,12 @@ class WorldTrainerFromScratch(WorldTrainer):
35
35
  ... yolo_data=["Objects365.yaml"],
36
36
  ... grounding_data=[
37
37
  ... dict(
38
- ... img_path="../datasets/flickr30k/images",
39
- ... json_file="../datasets/flickr30k/final_flickr_separateGT_train.json",
38
+ ... img_path="flickr30k/images",
39
+ ... json_file="flickr30k/final_flickr_separateGT_train.json",
40
40
  ... ),
41
41
  ... dict(
42
- ... img_path="../datasets/GQA/images",
43
- ... json_file="../datasets/GQA/final_mixed_train_no_coco.json",
42
+ ... img_path="GQA/images",
43
+ ... json_file="GQA/final_mixed_train_no_coco.json",
44
44
  ... ),
45
45
  ... ],
46
46
  ... ),
@@ -70,8 +70,8 @@ class WorldTrainerFromScratch(WorldTrainer):
70
70
  ... yolo_data=["Objects365.yaml"],
71
71
  ... grounding_data=[
72
72
  ... dict(
73
- ... img_path="../datasets/flickr30k/images",
74
- ... json_file="../datasets/flickr30k/final_flickr_separateGT_train.json",
73
+ ... img_path="flickr30k/images",
74
+ ... json_file="flickr30k/final_flickr_separateGT_train.json",
75
75
  ... ),
76
76
  ... ],
77
77
  ... ),
@@ -222,7 +222,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
222
222
  return txt_map
223
223
  LOGGER.info(f"Caching text embeddings to '{cache_path}'")
224
224
  assert self.model is not None
225
- txt_feats = self.model.get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
225
+ txt_feats = de_parallel(self.model).get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
226
226
  txt_map = dict(zip(texts, txt_feats.squeeze(0)))
227
227
  torch.save(txt_map, cache_path)
228
228
  return txt_map
@@ -487,7 +487,13 @@ class AutoBackend(nn.Module):
487
487
  # PaddlePaddle
488
488
  elif paddle:
489
489
  LOGGER.info(f"Loading {w} for PaddlePaddle inference...")
490
- check_requirements("paddlepaddle-gpu" if cuda else "paddlepaddle>=3.0.0")
490
+ check_requirements(
491
+ "paddlepaddle-gpu"
492
+ if torch.cuda.is_available()
493
+ else "paddlepaddle==3.0.0" # pin 3.0.0 for ARM64
494
+ if ARM64
495
+ else "paddlepaddle>=3.0.0"
496
+ )
491
497
  import paddle.inference as pdi # noqa
492
498
 
493
499
  w = Path(w)
@@ -124,6 +124,6 @@ class Heatmap(ObjectCounter):
124
124
  plot_im=plot_im,
125
125
  in_count=self.in_count,
126
126
  out_count=self.out_count,
127
- classwise_count=dict(self.classwise_counts),
127
+ classwise_count=dict(self.classwise_count),
128
128
  total_tracks=len(self.track_ids),
129
129
  )
@@ -43,7 +43,7 @@ class ObjectCounter(BaseSolution):
43
43
  self.in_count = 0 # Counter for objects moving inward
44
44
  self.out_count = 0 # Counter for objects moving outward
45
45
  self.counted_ids = [] # List of IDs of objects that have been counted
46
- self.classwise_counts = defaultdict(lambda: {"IN": 0, "OUT": 0}) # Dictionary for counts, categorized by class
46
+ self.classwise_count = defaultdict(lambda: {"IN": 0, "OUT": 0}) # Dictionary for counts, categorized by class
47
47
  self.region_initialized = False # Flag indicating whether the region has been initialized
48
48
 
49
49
  self.show_in = self.CFG["show_in"]
@@ -85,17 +85,17 @@ class ObjectCounter(BaseSolution):
85
85
  # Vertical region: Compare x-coordinates to determine direction
86
86
  if current_centroid[0] > prev_position[0]: # Moving right
87
87
  self.in_count += 1
88
- self.classwise_counts[self.names[cls]]["IN"] += 1
88
+ self.classwise_count[self.names[cls]]["IN"] += 1
89
89
  else: # Moving left
90
90
  self.out_count += 1
91
- self.classwise_counts[self.names[cls]]["OUT"] += 1
91
+ self.classwise_count[self.names[cls]]["OUT"] += 1
92
92
  # Horizontal region: Compare y-coordinates to determine direction
93
93
  elif current_centroid[1] > prev_position[1]: # Moving downward
94
94
  self.in_count += 1
95
- self.classwise_counts[self.names[cls]]["IN"] += 1
95
+ self.classwise_count[self.names[cls]]["IN"] += 1
96
96
  else: # Moving upward
97
97
  self.out_count += 1
98
- self.classwise_counts[self.names[cls]]["OUT"] += 1
98
+ self.classwise_count[self.names[cls]]["OUT"] += 1
99
99
  self.counted_ids.append(track_id)
100
100
 
101
101
  elif len(self.region) > 2: # Polygonal region
@@ -111,10 +111,10 @@ class ObjectCounter(BaseSolution):
111
111
  and current_centroid[1] > prev_position[1]
112
112
  ): # Moving right or downward
113
113
  self.in_count += 1
114
- self.classwise_counts[self.names[cls]]["IN"] += 1
114
+ self.classwise_count[self.names[cls]]["IN"] += 1
115
115
  else: # Moving left or upward
116
116
  self.out_count += 1
117
- self.classwise_counts[self.names[cls]]["OUT"] += 1
117
+ self.classwise_count[self.names[cls]]["OUT"] += 1
118
118
  self.counted_ids.append(track_id)
119
119
 
120
120
  def display_counts(self, plot_im) -> None:
@@ -132,7 +132,7 @@ class ObjectCounter(BaseSolution):
132
132
  labels_dict = {
133
133
  str.capitalize(key): f"{'IN ' + str(value['IN']) if self.show_in else ''} "
134
134
  f"{'OUT ' + str(value['OUT']) if self.show_out else ''}".strip()
135
- for key, value in self.classwise_counts.items()
135
+ for key, value in self.classwise_count.items()
136
136
  if value["IN"] != 0 or value["OUT"] != 0 and (self.show_in or self.show_out)
137
137
  }
138
138
  if labels_dict:
@@ -190,6 +190,6 @@ class ObjectCounter(BaseSolution):
190
190
  plot_im=plot_im,
191
191
  in_count=self.in_count,
192
192
  out_count=self.out_count,
193
- classwise_count=dict(self.classwise_counts),
193
+ classwise_count=dict(self.classwise_count),
194
194
  total_tracks=len(self.track_ids),
195
195
  )
@@ -9,14 +9,14 @@ from PIL import Image
9
9
 
10
10
  from ultralytics.data.utils import IMG_FORMATS
11
11
  from ultralytics.nn.text_model import build_text_model
12
- from ultralytics.solutions.solutions import BaseSolution
12
+ from ultralytics.utils import LOGGER
13
13
  from ultralytics.utils.checks import check_requirements
14
14
  from ultralytics.utils.torch_utils import select_device
15
15
 
16
16
  os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # Avoid OpenMP conflict on some systems
17
17
 
18
18
 
19
- class VisualAISearch(BaseSolution):
19
+ class VisualAISearch:
20
20
  """
21
21
  A semantic image search system that leverages OpenCLIP for generating high-quality image and text embeddings and
22
22
  FAISS for fast similarity-based retrieval.
@@ -48,19 +48,18 @@ class VisualAISearch(BaseSolution):
48
48
 
49
49
  def __init__(self, **kwargs: Any) -> None:
50
50
  """Initialize the VisualAISearch class with FAISS index and CLIP model."""
51
- super().__init__(**kwargs)
52
51
  check_requirements("faiss-cpu")
53
52
 
54
53
  self.faiss = __import__("faiss")
55
54
  self.faiss_index = "faiss.index"
56
55
  self.data_path_npy = "paths.npy"
57
- self.data_dir = Path(self.CFG["data"])
58
- self.device = select_device(self.CFG["device"])
56
+ self.data_dir = Path(kwargs.get("data", "images"))
57
+ self.device = select_device(kwargs.get("device", "cpu"))
59
58
 
60
59
  if not self.data_dir.exists():
61
60
  from ultralytics.utils import ASSETS_URL
62
61
 
63
- self.LOGGER.warning(f"{self.data_dir} not found. Downloading images.zip from {ASSETS_URL}/images.zip")
62
+ LOGGER.warning(f"{self.data_dir} not found. Downloading images.zip from {ASSETS_URL}/images.zip")
64
63
  from ultralytics.utils.downloads import safe_download
65
64
 
66
65
  safe_download(url=f"{ASSETS_URL}/images.zip", unzip=True, retry=3)
@@ -91,13 +90,13 @@ class VisualAISearch(BaseSolution):
91
90
  """
92
91
  # Check if the FAISS index and corresponding image paths already exist
93
92
  if Path(self.faiss_index).exists() and Path(self.data_path_npy).exists():
94
- self.LOGGER.info("Loading existing FAISS index...")
93
+ LOGGER.info("Loading existing FAISS index...")
95
94
  self.index = self.faiss.read_index(self.faiss_index) # Load the FAISS index from disk
96
95
  self.image_paths = np.load(self.data_path_npy) # Load the saved image path list
97
96
  return # Exit the function as the index is successfully loaded
98
97
 
99
98
  # If the index doesn't exist, start building it from scratch
100
- self.LOGGER.info("Building FAISS index from images...")
99
+ LOGGER.info("Building FAISS index from images...")
101
100
  vectors = [] # List to store feature vectors of images
102
101
 
103
102
  # Iterate over all image files in the data directory
@@ -110,7 +109,7 @@ class VisualAISearch(BaseSolution):
110
109
  vectors.append(self.extract_image_feature(file))
111
110
  self.image_paths.append(file.name) # Store the corresponding image name
112
111
  except Exception as e:
113
- self.LOGGER.warning(f"Skipping {file.name}: {e}")
112
+ LOGGER.warning(f"Skipping {file.name}: {e}")
114
113
 
115
114
  # If no vectors were successfully created, raise an error
116
115
  if not vectors:
@@ -124,7 +123,7 @@ class VisualAISearch(BaseSolution):
124
123
  self.faiss.write_index(self.index, self.faiss_index) # Save the newly built FAISS index to disk
125
124
  np.save(self.data_path_npy, np.array(self.image_paths)) # Save the list of image paths to disk
126
125
 
127
- self.LOGGER.info(f"Indexed {len(self.image_paths)} images.")
126
+ LOGGER.info(f"Indexed {len(self.image_paths)} images.")
128
127
 
129
128
  def search(self, query: str, k: int = 30, similarity_thresh: float = 0.1) -> List[str]:
130
129
  """
@@ -152,9 +151,9 @@ class VisualAISearch(BaseSolution):
152
151
  ]
153
152
  results.sort(key=lambda x: x[1], reverse=True)
154
153
 
155
- self.LOGGER.info("\nRanked Results:")
154
+ LOGGER.info("\nRanked Results:")
156
155
  for name, score in results:
157
- self.LOGGER.info(f" - {name} | Similarity: {score:.4f}")
156
+ LOGGER.info(f" - {name} | Similarity: {score:.4f}")
158
157
 
159
158
  return [r[0] for r in results]
160
159
 
@@ -81,60 +81,59 @@ class BaseSolution:
81
81
  self.CFG = vars(SolutionConfig().update(**kwargs))
82
82
  self.LOGGER = LOGGER # Store logger object to be used in multiple solution classes
83
83
 
84
- if self.__class__.__name__ != "VisualAISearch":
85
- check_requirements("shapely>=2.0.0")
86
- from shapely.geometry import LineString, Point, Polygon
87
- from shapely.prepared import prep
88
-
89
- self.LineString = LineString
90
- self.Polygon = Polygon
91
- self.Point = Point
92
- self.prep = prep
93
- self.annotator = None # Initialize annotator
94
- self.tracks = None
95
- self.track_data = None
96
- self.boxes = []
97
- self.clss = []
98
- self.track_ids = []
99
- self.track_line = None
100
- self.masks = None
101
- self.r_s = None
102
- self.frame_no = -1 # Only for logging
103
-
104
- self.LOGGER.info(f"Ultralytics Solutions: ✅ {self.CFG}")
105
- self.region = self.CFG["region"] # Store region data for other classes usage
106
- self.line_width = self.CFG["line_width"]
107
-
108
- # Load Model and store additional information (classes, show_conf, show_label)
109
- if self.CFG["model"] is None:
110
- self.CFG["model"] = "yolo11n.pt"
111
- self.model = YOLO(self.CFG["model"])
112
- self.names = self.model.names
113
- self.classes = self.CFG["classes"]
114
- self.show_conf = self.CFG["show_conf"]
115
- self.show_labels = self.CFG["show_labels"]
116
- self.device = self.CFG["device"]
117
-
118
- self.track_add_args = { # Tracker additional arguments for advance configuration
119
- k: self.CFG[k] for k in ["iou", "conf", "device", "max_det", "half", "tracker"]
120
- } # verbose must be passed to track method; setting it False in YOLO still logs the track information.
121
-
122
- if is_cli and self.CFG["source"] is None:
123
- d_s = "solutions_ci_demo.mp4" if "-pose" not in self.CFG["model"] else "solution_ci_pose_demo.mp4"
124
- self.LOGGER.warning(f"source not provided. using default source {ASSETS_URL}/{d_s}")
125
- from ultralytics.utils.downloads import safe_download
126
-
127
- safe_download(f"{ASSETS_URL}/{d_s}") # download source from ultralytics assets
128
- self.CFG["source"] = d_s # set default source
129
-
130
- # Initialize environment and region setup
131
- self.env_check = check_imshow(warn=True)
132
- self.track_history = defaultdict(list)
133
-
134
- self.profilers = (
135
- ops.Profile(device=self.device), # track
136
- ops.Profile(device=self.device), # solution
137
- )
84
+ check_requirements("shapely>=2.0.0")
85
+ from shapely.geometry import LineString, Point, Polygon
86
+ from shapely.prepared import prep
87
+
88
+ self.LineString = LineString
89
+ self.Polygon = Polygon
90
+ self.Point = Point
91
+ self.prep = prep
92
+ self.annotator = None # Initialize annotator
93
+ self.tracks = None
94
+ self.track_data = None
95
+ self.boxes = []
96
+ self.clss = []
97
+ self.track_ids = []
98
+ self.track_line = None
99
+ self.masks = None
100
+ self.r_s = None
101
+ self.frame_no = -1 # Only for logging
102
+
103
+ self.LOGGER.info(f"Ultralytics Solutions: ✅ {self.CFG}")
104
+ self.region = self.CFG["region"] # Store region data for other classes usage
105
+ self.line_width = self.CFG["line_width"]
106
+
107
+ # Load Model and store additional information (classes, show_conf, show_label)
108
+ if self.CFG["model"] is None:
109
+ self.CFG["model"] = "yolo11n.pt"
110
+ self.model = YOLO(self.CFG["model"])
111
+ self.names = self.model.names
112
+ self.classes = self.CFG["classes"]
113
+ self.show_conf = self.CFG["show_conf"]
114
+ self.show_labels = self.CFG["show_labels"]
115
+ self.device = self.CFG["device"]
116
+
117
+ self.track_add_args = { # Tracker additional arguments for advance configuration
118
+ k: self.CFG[k] for k in ["iou", "conf", "device", "max_det", "half", "tracker"]
119
+ } # verbose must be passed to track method; setting it False in YOLO still logs the track information.
120
+
121
+ if is_cli and self.CFG["source"] is None:
122
+ d_s = "solutions_ci_demo.mp4" if "-pose" not in self.CFG["model"] else "solution_ci_pose_demo.mp4"
123
+ self.LOGGER.warning(f"source not provided. using default source {ASSETS_URL}/{d_s}")
124
+ from ultralytics.utils.downloads import safe_download
125
+
126
+ safe_download(f"{ASSETS_URL}/{d_s}") # download source from ultralytics assets
127
+ self.CFG["source"] = d_s # set default source
128
+
129
+ # Initialize environment and region setup
130
+ self.env_check = check_imshow(warn=True)
131
+ self.track_history = defaultdict(list)
132
+
133
+ self.profilers = (
134
+ ops.Profile(device=self.device), # track
135
+ ops.Profile(device=self.device), # solution
136
+ )
138
137
 
139
138
  def adjust_box_label(self, cls: int, conf: float, track_id: Optional[int] = None) -> Optional[str]:
140
139
  """
@@ -808,10 +807,10 @@ class SolutionResults:
808
807
  filled_slots (int): The number of filled slots in a monitored area.
809
808
  email_sent (bool): A flag indicating whether an email notification was sent.
810
809
  total_tracks (int): The total number of tracked objects.
811
- region_counts (Dict): The count of objects within a specific region.
810
+ region_counts (Dict[str, int]): The count of objects within a specific region.
812
811
  speed_dict (Dict[str, float]): A dictionary containing speed information for tracked objects.
813
812
  total_crop_objects (int): Total number of cropped objects using ObjectCropper class.
814
- speed (Dict): Performance timing information for tracking and solution processing.
813
+ speed (Dict[str, float]): Performance timing information for tracking and solution processing.
815
814
  """
816
815
 
817
816
  def __init__(self, **kwargs):
@@ -255,11 +255,8 @@ class DataExportMixin:
255
255
  Notes:
256
256
  Requires `lxml` package to be installed.
257
257
  """
258
- from ultralytics.utils.checks import check_requirements
259
-
260
- check_requirements("lxml")
261
258
  df = self.to_df(normalize=normalize, decimals=decimals)
262
- return '<?xml version="1.0" encoding="utf-8"?>\n<root></root>' if df.empty else df.to_xml()
259
+ return '<?xml version="1.0" encoding="utf-8"?>\n<root></root>' if df.empty else df.to_xml(parser="etree")
263
260
 
264
261
  def to_html(self, normalize=False, decimals=5, index=False):
265
262
  """
@@ -406,6 +406,8 @@ class Instances:
406
406
  | (self.keypoints[..., 1] < 0)
407
407
  | (self.keypoints[..., 1] > h)
408
408
  ] = 0.0
409
+ self.keypoints[..., 0] = self.keypoints[..., 0].clip(0, w)
410
+ self.keypoints[..., 1] = self.keypoints[..., 1].clip(0, h)
409
411
 
410
412
  def remove_zero_area_boxes(self):
411
413
  """