dgenerate-ultralytics-headless 8.3.237__py3-none-any.whl → 8.3.240__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. {dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.240.dist-info}/METADATA +2 -1
  2. {dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.240.dist-info}/RECORD +105 -106
  3. tests/test_exports.py +3 -1
  4. tests/test_python.py +2 -2
  5. tests/test_solutions.py +6 -6
  6. ultralytics/__init__.py +1 -1
  7. ultralytics/cfg/__init__.py +4 -4
  8. ultralytics/cfg/datasets/Argoverse.yaml +7 -6
  9. ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
  10. ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
  11. ultralytics/cfg/datasets/VOC.yaml +15 -16
  12. ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
  13. ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
  14. ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
  15. ultralytics/cfg/datasets/dota8.yaml +2 -2
  16. ultralytics/cfg/datasets/kitti.yaml +1 -1
  17. ultralytics/cfg/datasets/xView.yaml +16 -16
  18. ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
  19. ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
  20. ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
  21. ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
  22. ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
  23. ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
  24. ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
  25. ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
  26. ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
  27. ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
  28. ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
  29. ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
  30. ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
  31. ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
  32. ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
  33. ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
  34. ultralytics/data/augment.py +1 -1
  35. ultralytics/data/base.py +4 -2
  36. ultralytics/data/build.py +4 -4
  37. ultralytics/data/loaders.py +17 -12
  38. ultralytics/data/utils.py +4 -4
  39. ultralytics/engine/exporter.py +24 -16
  40. ultralytics/engine/predictor.py +5 -4
  41. ultralytics/engine/results.py +12 -13
  42. ultralytics/engine/trainer.py +2 -2
  43. ultralytics/engine/tuner.py +2 -3
  44. ultralytics/engine/validator.py +2 -2
  45. ultralytics/models/fastsam/model.py +2 -2
  46. ultralytics/models/fastsam/predict.py +2 -3
  47. ultralytics/models/fastsam/val.py +4 -4
  48. ultralytics/models/rtdetr/predict.py +2 -3
  49. ultralytics/models/rtdetr/val.py +5 -4
  50. ultralytics/models/sam/build.py +5 -5
  51. ultralytics/models/sam/build_sam3.py +9 -6
  52. ultralytics/models/sam/model.py +1 -1
  53. ultralytics/models/sam/modules/sam.py +10 -5
  54. ultralytics/models/sam/modules/utils.py +8 -3
  55. ultralytics/models/sam/predict.py +53 -62
  56. ultralytics/models/sam/sam3/encoder.py +4 -4
  57. ultralytics/models/sam/sam3/geometry_encoders.py +3 -3
  58. ultralytics/models/sam/sam3/necks.py +17 -17
  59. ultralytics/models/sam/sam3/sam3_image.py +3 -21
  60. ultralytics/models/sam/sam3/vl_combiner.py +1 -6
  61. ultralytics/models/yolo/classify/val.py +1 -1
  62. ultralytics/models/yolo/detect/train.py +1 -1
  63. ultralytics/models/yolo/detect/val.py +7 -7
  64. ultralytics/models/yolo/obb/val.py +1 -1
  65. ultralytics/models/yolo/pose/val.py +1 -1
  66. ultralytics/models/yolo/segment/val.py +1 -1
  67. ultralytics/nn/autobackend.py +9 -9
  68. ultralytics/nn/modules/block.py +1 -1
  69. ultralytics/nn/tasks.py +3 -3
  70. ultralytics/nn/text_model.py +2 -7
  71. ultralytics/solutions/ai_gym.py +1 -1
  72. ultralytics/solutions/analytics.py +6 -6
  73. ultralytics/solutions/config.py +1 -1
  74. ultralytics/solutions/distance_calculation.py +1 -1
  75. ultralytics/solutions/object_counter.py +1 -1
  76. ultralytics/solutions/object_cropper.py +3 -6
  77. ultralytics/solutions/parking_management.py +21 -17
  78. ultralytics/solutions/queue_management.py +5 -5
  79. ultralytics/solutions/region_counter.py +2 -2
  80. ultralytics/solutions/security_alarm.py +1 -1
  81. ultralytics/solutions/solutions.py +45 -22
  82. ultralytics/solutions/speed_estimation.py +1 -1
  83. ultralytics/trackers/basetrack.py +1 -1
  84. ultralytics/trackers/bot_sort.py +4 -3
  85. ultralytics/trackers/byte_tracker.py +4 -4
  86. ultralytics/trackers/utils/gmc.py +6 -7
  87. ultralytics/trackers/utils/kalman_filter.py +2 -1
  88. ultralytics/trackers/utils/matching.py +4 -3
  89. ultralytics/utils/__init__.py +12 -3
  90. ultralytics/utils/benchmarks.py +2 -2
  91. ultralytics/utils/callbacks/tensorboard.py +19 -25
  92. ultralytics/utils/checks.py +2 -1
  93. ultralytics/utils/downloads.py +1 -1
  94. ultralytics/utils/export/tensorflow.py +16 -2
  95. ultralytics/utils/files.py +13 -12
  96. ultralytics/utils/logger.py +62 -27
  97. ultralytics/utils/metrics.py +1 -1
  98. ultralytics/utils/ops.py +6 -6
  99. ultralytics/utils/patches.py +3 -3
  100. ultralytics/utils/plotting.py +18 -23
  101. ultralytics/utils/tuner.py +1 -1
  102. ultralytics/models/sam/sam3/tokenizer_ve.py +0 -242
  103. {dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.240.dist-info}/WHEEL +0 -0
  104. {dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.240.dist-info}/entry_points.txt +0 -0
  105. {dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.240.dist-info}/licenses/LICENSE +0 -0
  106. {dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.240.dist-info}/top_level.txt +0 -0
@@ -196,12 +196,7 @@ class MobileCLIP(TextModel):
196
196
  device (torch.device): Device to load the model on.
197
197
  """
198
198
  try:
199
- import warnings
200
-
201
- # Suppress 'timm.models.layers is deprecated, please import via timm.layers' warning from mobileclip usage
202
- with warnings.catch_warnings():
203
- warnings.filterwarnings("ignore", category=FutureWarning)
204
- import mobileclip
199
+ import mobileclip
205
200
  except ImportError:
206
201
  # Ultralytics fork preferred since Apple MobileCLIP repo has incorrect version of torchvision
207
202
  checks.check_requirements("git+https://github.com/ultralytics/mobileclip.git")
@@ -308,7 +303,7 @@ class MobileCLIPTS(TextModel):
308
303
  (torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
309
304
 
310
305
  Examples:
311
- >>> model = MobileCLIPTS("cpu")
306
+ >>> model = MobileCLIPTS(device=torch.device("cpu"))
312
307
  >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
313
308
  >>> strict_tokens = model.tokenize(
314
309
  ... ["a very long caption"], truncate=False
@@ -13,7 +13,7 @@ class AIGym(BaseSolution):
13
13
  repetitions of exercises based on predefined angle thresholds for up and down positions.
14
14
 
15
15
  Attributes:
16
- states (dict[float, int, str]): Stores per-track angle, count, and stage for workout monitoring.
16
+ states (dict[int, dict[str, float | int | str]]): Per-track angle, rep count, and stage for workout monitoring.
17
17
  up_angle (float): Angle threshold for considering the 'up' position of an exercise.
18
18
  down_angle (float): Angle threshold for considering the 'down' position of an exercise.
19
19
  kpts (list[int]): Indices of keypoints used for angle calculation.
@@ -56,7 +56,7 @@ class Analytics(BaseSolution):
56
56
  from matplotlib.backends.backend_agg import FigureCanvasAgg
57
57
  from matplotlib.figure import Figure
58
58
 
59
- self.type = self.CFG["analytics_type"] # type of analytics i.e "line", "pie", "bar" or "area" charts.
59
+ self.type = self.CFG["analytics_type"] # Chart type: "line", "pie", "bar", or "area".
60
60
  self.x_label = "Classes" if self.type in {"bar", "pie"} else "Frame#"
61
61
  self.y_label = "Total Counts"
62
62
 
@@ -66,10 +66,10 @@ class Analytics(BaseSolution):
66
66
  self.title = "Ultralytics Solutions" # window name
67
67
  self.max_points = 45 # maximum points to be drawn on window
68
68
  self.fontsize = 25 # text font size for display
69
- figsize = self.CFG["figsize"] # set output image size i.e (12.8, 7.2) -> w = 1280, h = 720
69
+ figsize = self.CFG["figsize"] # Output size, e.g. (12.8, 7.2) -> 1280x720.
70
70
  self.color_cycle = cycle(["#DD00BA", "#042AFF", "#FF4447", "#7D24FF", "#BD00FF"])
71
71
 
72
- self.total_counts = 0 # count variable for storing total counts i.e. for line
72
+ self.total_counts = 0 # Stores total counts for line charts.
73
73
  self.clswise_count = {} # dictionary for class-wise counts
74
74
  self.update_every = kwargs.get("update_every", 30) # Only update graph every 30 frames by default
75
75
  self.last_plot_im = None # Cache of the last rendered chart
@@ -104,7 +104,7 @@ class Analytics(BaseSolution):
104
104
  and 'classwise_count' (dict, per-class object count).
105
105
 
106
106
  Raises:
107
- ModuleNotFoundError: If an unsupported chart type is specified.
107
+ ValueError: If an unsupported chart type is specified.
108
108
 
109
109
  Examples:
110
110
  >>> analytics = Analytics(analytics_type="line")
@@ -131,9 +131,9 @@ class Analytics(BaseSolution):
131
131
  )
132
132
  plot_im = self.last_plot_im
133
133
  else:
134
- raise ModuleNotFoundError(f"{self.type} chart is not supported ")
134
+ raise ValueError(f"Unsupported analytics_type='{self.type}'. Supported types: line, bar, pie, area.")
135
135
 
136
- # return output dictionary with summary for more usage
136
+ # Return results for downstream use.
137
137
  return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids), classwise_count=self.clswise_count)
138
138
 
139
139
  def update_graph(
@@ -35,7 +35,7 @@ class SolutionConfig:
35
35
  vision_point (tuple[int, int]): Reference point for directional tracking or perspective drawing.
36
36
  crop_dir (str): Directory path to save cropped detection images.
37
37
  json_file (str): Path to a JSON file containing data for parking areas.
38
- line_width (int): Width for visual display i.e. bounding boxes, keypoints, counts.
38
+ line_width (int): Width for visual display, e.g. bounding boxes, keypoints, and counts.
39
39
  records (int): Number of detection records to send email alerts.
40
40
  fps (float): Frame rate (Frames Per Second) for speed estimation calculation.
41
41
  max_hist (int): Maximum number of historical points or states stored per tracked object for speed estimation.
@@ -17,7 +17,7 @@ class DistanceCalculation(BaseSolution):
17
17
 
18
18
  Attributes:
19
19
  left_mouse_count (int): Counter for left mouse button clicks.
20
- selected_boxes (dict[int, list[float]]): Dictionary to store selected bounding boxes and their track IDs.
20
+ selected_boxes (dict[int, Any]): Dictionary to store selected bounding boxes keyed by track ID.
21
21
  centroids (list[list[int]]): List to store centroids of selected bounding boxes.
22
22
 
23
23
  Methods:
@@ -19,7 +19,7 @@ class ObjectCounter(BaseSolution):
19
19
  in_count (int): Counter for objects moving inward.
20
20
  out_count (int): Counter for objects moving outward.
21
21
  counted_ids (list[int]): List of IDs of objects that have been counted.
22
- classwise_counts (dict[str, dict[str, int]]): Dictionary for counts, categorized by object class.
22
+ classwise_count (dict[str, dict[str, int]]): Dictionary for counts, categorized by object class.
23
23
  region_initialized (bool): Flag indicating whether the counting region has been initialized.
24
24
  show_in (bool): Flag to control display of inward count.
25
25
  show_out (bool): Flag to control display of outward count.
@@ -1,6 +1,5 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
- import os
4
3
  from pathlib import Path
5
4
  from typing import Any
6
5
 
@@ -40,12 +39,10 @@ class ObjectCropper(BaseSolution):
40
39
  super().__init__(**kwargs)
41
40
 
42
41
  self.crop_dir = self.CFG["crop_dir"] # Directory for storing cropped detections
43
- if not os.path.exists(self.crop_dir):
44
- os.mkdir(self.crop_dir) # Create directory if it does not exist
42
+ Path(self.crop_dir).mkdir(parents=True, exist_ok=True)
45
43
  if self.CFG["show"]:
46
- self.LOGGER.warning(
47
- f"show=True disabled for crop solution, results will be saved in the directory named: {self.crop_dir}"
48
- )
44
+ self.LOGGER.warning(f"show=True is not supported for ObjectCropper; saving crops to '{self.crop_dir}'.")
45
+ self.CFG["show"] = False
49
46
  self.crop_idx = 0 # Initialize counter for total cropped objects
50
47
  self.iou = self.CFG["iou"]
51
48
  self.conf = self.CFG["conf"]
@@ -92,7 +92,7 @@ class ParkingPtsSelection:
92
92
 
93
93
  for text, cmd in [
94
94
  ("Upload Image", self.upload_image),
95
- ("Remove Last BBox", self.remove_last_bounding_box),
95
+ ("Remove Last Bounding Box", self.remove_last_bounding_box),
96
96
  ("Save", self.save_to_json),
97
97
  ]:
98
98
  self.tk.Button(button_frame, text=text, command=cmd).pack(side=self.tk.LEFT)
@@ -186,9 +186,9 @@ class ParkingManagement(BaseSolution):
186
186
  json_file (str): Path to the JSON file containing parking region details.
187
187
  json (list[dict]): Loaded JSON data containing parking region information.
188
188
  pr_info (dict[str, int]): Dictionary storing parking information (Occupancy and Available spaces).
189
- arc (tuple[int, int, int]): RGB color tuple for available region visualization.
190
- occ (tuple[int, int, int]): RGB color tuple for occupied region visualization.
191
- dc (tuple[int, int, int]): RGB color tuple for centroid visualization of detected objects.
189
+ arc (tuple[int, int, int]): BGR color tuple for available region visualization.
190
+ occ (tuple[int, int, int]): BGR color tuple for occupied region visualization.
191
+ dc (tuple[int, int, int]): BGR color tuple for centroid visualization of detected objects.
192
192
 
193
193
  Methods:
194
194
  process: Process the input image for parking lot management and visualization.
@@ -205,11 +205,11 @@ class ParkingManagement(BaseSolution):
205
205
  super().__init__(**kwargs)
206
206
 
207
207
  self.json_file = self.CFG["json_file"] # Load parking regions JSON data
208
- if self.json_file is None:
209
- LOGGER.warning("json_file argument missing. Parking region details required.")
210
- raise ValueError("❌ Json file path can not be empty")
208
+ if not self.json_file:
209
+ LOGGER.warning("ParkingManagement requires `json_file` with parking region coordinates.")
210
+ raise ValueError("❌ JSON file path cannot be empty.")
211
211
 
212
- with open(self.json_file) as f:
212
+ with open(self.json_file, encoding="utf-8") as f:
213
213
  self.json = json.load(f)
214
214
 
215
215
  self.pr_info = {"Occupancy": 0, "Available": 0} # Dictionary for parking information
@@ -239,28 +239,32 @@ class ParkingManagement(BaseSolution):
239
239
  >>> results = parking_manager.process(image)
240
240
  """
241
241
  self.extract_tracks(im0) # Extract tracks from im0
242
- es, fs = len(self.json), 0 # Empty slots, filled slots
242
+ available_slots, occupied_slots = len(self.json), 0
243
243
  annotator = SolutionAnnotator(im0, self.line_width) # Initialize annotator
244
244
 
245
245
  for region in self.json:
246
246
  # Convert points to a NumPy array with the correct dtype and reshape properly
247
- pts_array = np.array(region["points"], dtype=np.int32).reshape((-1, 1, 2))
248
- rg_occupied = False # Occupied region initialization
247
+ region_polygon = np.array(region["points"], dtype=np.int32).reshape((-1, 1, 2))
248
+ region_occupied = False
249
249
  for box, cls in zip(self.boxes, self.clss):
250
250
  xc, yc = int((box[0] + box[2]) / 2), int((box[1] + box[3]) / 2)
251
- dist = cv2.pointPolygonTest(pts_array, (xc, yc), False)
252
- if dist >= 0:
251
+ inside_distance = cv2.pointPolygonTest(region_polygon, (xc, yc), False)
252
+ if inside_distance >= 0:
253
253
  # cv2.circle(im0, (xc, yc), radius=self.line_width * 4, color=self.dc, thickness=-1)
254
254
  annotator.display_objects_labels(
255
255
  im0, self.model.names[int(cls)], (104, 31, 17), (255, 255, 255), xc, yc, 10
256
256
  )
257
- rg_occupied = True
257
+ region_occupied = True
258
258
  break
259
- fs, es = (fs + 1, es - 1) if rg_occupied else (fs, es)
259
+ if region_occupied:
260
+ occupied_slots += 1
261
+ available_slots -= 1
260
262
  # Plot regions
261
- cv2.polylines(im0, [pts_array], isClosed=True, color=self.occ if rg_occupied else self.arc, thickness=2)
263
+ cv2.polylines(
264
+ im0, [region_polygon], isClosed=True, color=self.occ if region_occupied else self.arc, thickness=2
265
+ )
262
266
 
263
- self.pr_info["Occupancy"], self.pr_info["Available"] = fs, es
267
+ self.pr_info["Occupancy"], self.pr_info["Available"] = occupied_slots, available_slots
264
268
 
265
269
  annotator.display_analytics(im0, self.pr_info, (104, 31, 17), (255, 255, 255), 10)
266
270
 
@@ -14,7 +14,7 @@ class QueueManager(BaseSolution):
14
14
 
15
15
  Attributes:
16
16
  counts (int): The current count of objects in the queue.
17
- rect_color (tuple[int, int, int]): RGB color tuple for drawing the queue region rectangle.
17
+ rect_color (tuple[int, int, int]): BGR color tuple for drawing the queue region rectangle.
18
18
  region_length (int): The number of points defining the queue region.
19
19
  track_line (list[tuple[int, int]]): List of track line coordinates.
20
20
  track_history (dict[int, list[tuple[int, int]]]): Dictionary storing tracking history for each object.
@@ -30,10 +30,10 @@ class QueueManager(BaseSolution):
30
30
  >>> cap = cv2.VideoCapture("path/to/video.mp4")
31
31
  >>> queue_manager = QueueManager(region=[100, 100, 200, 200, 300, 300])
32
32
  >>> while cap.isOpened():
33
- >>> success, im0 = cap.read()
34
- >>> if not success:
35
- >>> break
36
- >>> results = queue_manager.process(im0)
33
+ ... success, im0 = cap.read()
34
+ ... if not success:
35
+ ... break
36
+ ... results = queue_manager.process(im0)
37
37
  """
38
38
 
39
39
  def __init__(self, **kwargs: Any) -> None:
@@ -67,7 +67,7 @@ class RegionCounter(BaseSolution):
67
67
  text_color (tuple[int, int, int]): BGR color for the text within the region.
68
68
 
69
69
  Returns:
70
- (dict[str, any]): Returns a dictionary including the region information i.e. name, region_color etc.
70
+ (dict[str, Any]): Region information including name, polygon, and display colors.
71
71
  """
72
72
  region = self.region_template.copy()
73
73
  region.update(
@@ -82,7 +82,7 @@ class RegionCounter(BaseSolution):
82
82
  return region
83
83
 
84
84
  def initialize_regions(self):
85
- """Initialize regions only once."""
85
+ """Initialize regions from `self.region` only once."""
86
86
  if self.region is None:
87
87
  self.initialize_region()
88
88
  if not isinstance(self.region, dict): # Ensure self.region is initialized and structured as a dictionary
@@ -98,7 +98,7 @@ class SecurityAlarm(BaseSolution):
98
98
  message["Subject"] = "Security Alert"
99
99
 
100
100
  # Add the text message body
101
- message_body = f"Ultralytics ALERT!!! {records} objects have been detected!!"
101
+ message_body = f"Ultralytics alert: {records} object(s) detected."
102
102
  message.attach(MIMEText(message_body))
103
103
 
104
104
  # Attach the image
@@ -177,7 +177,7 @@ class BaseSolution:
177
177
  self.track_ids = self.track_data.id.int().cpu().tolist()
178
178
  self.confs = self.track_data.conf.cpu().tolist()
179
179
  else:
180
- self.LOGGER.warning("no tracks found!")
180
+ self.LOGGER.warning("No tracks found.")
181
181
  self.boxes, self.clss, self.track_ids, self.confs = [], [], [], []
182
182
 
183
183
  def store_tracking_history(self, track_id: int, box) -> None:
@@ -271,7 +271,7 @@ class SolutionAnnotator(Annotator):
271
271
  font_size (int): Size of the font used for text annotations.
272
272
  font (str): Path to the font file used for text rendering.
273
273
  pil (bool): Whether to use PIL for text rendering.
274
- example (str): An example attribute for demonstration purposes.
274
+ example (str): Example text used to detect non-ASCII labels for PIL rendering.
275
275
 
276
276
  Methods:
277
277
  draw_region: Draw a region using specified points, colors, and thickness.
@@ -312,7 +312,7 @@ class SolutionAnnotator(Annotator):
312
312
  font_size (int, optional): Font size for text annotations.
313
313
  font (str): Path to the font file.
314
314
  pil (bool): Indicates whether to use PIL for rendering text.
315
- example (str): An example parameter for demonstration purposes.
315
+ example (str): Example text used to detect non-ASCII labels for PIL rendering.
316
316
  """
317
317
  super().__init__(im, line_width, font_size, font, pil, example)
318
318
 
@@ -326,7 +326,7 @@ class SolutionAnnotator(Annotator):
326
326
 
327
327
  Args:
328
328
  reg_pts (list[tuple[int, int]], optional): Region points (for line 2 points, for region 4+ points).
329
- color (tuple[int, int, int]): RGB color value for the region.
329
+ color (tuple[int, int, int]): BGR color value for the region (OpenCV format).
330
330
  thickness (int): Line thickness for drawing the region.
331
331
  """
332
332
  cv2.polylines(self.im, [np.array(reg_pts, dtype=np.int32)], isClosed=True, color=color, thickness=thickness)
@@ -347,8 +347,8 @@ class SolutionAnnotator(Annotator):
347
347
  Args:
348
348
  label (str): Queue counts label.
349
349
  points (list[tuple[int, int]], optional): Region points for center point calculation to display text.
350
- region_color (tuple[int, int, int]): RGB queue region color.
351
- txt_color (tuple[int, int, int]): RGB text display color.
350
+ region_color (tuple[int, int, int]): BGR queue region color (OpenCV format).
351
+ txt_color (tuple[int, int, int]): BGR text color (OpenCV format).
352
352
  """
353
353
  x_values = [point[0] for point in points]
354
354
  y_values = [point[1] for point in points]
@@ -388,13 +388,13 @@ class SolutionAnnotator(Annotator):
388
388
  bg_color: tuple[int, int, int],
389
389
  margin: int,
390
390
  ):
391
- """Display the overall statistics for parking lots, object counter etc.
391
+ """Display overall statistics for Solutions (e.g., parking management and object counting).
392
392
 
393
393
  Args:
394
394
  im0 (np.ndarray): Inference image.
395
395
  text (dict[str, Any]): Labels dictionary.
396
- txt_color (tuple[int, int, int]): Display color for text foreground.
397
- bg_color (tuple[int, int, int]): Display color for text background.
396
+ txt_color (tuple[int, int, int]): Text color (BGR, OpenCV format).
397
+ bg_color (tuple[int, int, int]): Background color (BGR, OpenCV format).
398
398
  margin (int): Gap between text and rectangle for better display.
399
399
  """
400
400
  horizontal_gap = int(im0.shape[1] * 0.02)
@@ -415,22 +415,45 @@ class SolutionAnnotator(Annotator):
415
415
  cv2.putText(im0, txt, (text_x, text_y), 0, self.sf, txt_color, self.tf, lineType=cv2.LINE_AA)
416
416
  text_y_offset = rect_y2
417
417
 
418
+ @staticmethod
419
+ def _point_xy(point: Any) -> tuple[float, float]:
420
+ """Convert a keypoint-like object to an (x, y) tuple of floats."""
421
+ if hasattr(point, "detach"): # torch.Tensor
422
+ point = point.detach()
423
+ if hasattr(point, "cpu"): # torch.Tensor
424
+ point = point.cpu()
425
+ if hasattr(point, "numpy"): # torch.Tensor
426
+ point = point.numpy()
427
+ if hasattr(point, "tolist"): # numpy / torch
428
+ point = point.tolist()
429
+ return float(point[0]), float(point[1])
430
+
418
431
  @staticmethod
419
432
  @lru_cache(maxsize=256)
420
- def estimate_pose_angle(a: list[float], b: list[float], c: list[float]) -> float:
433
+ def _estimate_pose_angle_cached(a: tuple[float, float], b: tuple[float, float], c: tuple[float, float]) -> float:
434
+ """Calculate the angle between three points for workout monitoring (cached)."""
435
+ radians = math.atan2(c[1] - b[1], c[0] - b[0]) - math.atan2(a[1] - b[1], a[0] - b[0])
436
+ angle = abs(radians * 180.0 / math.pi)
437
+ return angle if angle <= 180.0 else (360 - angle)
438
+
439
+ @staticmethod
440
+ def estimate_pose_angle(a: Any, b: Any, c: Any) -> float:
421
441
  """Calculate the angle between three points for workout monitoring.
422
442
 
423
443
  Args:
424
- a (list[float]): The coordinates of the first point.
425
- b (list[float]): The coordinates of the second point (vertex).
426
- c (list[float]): The coordinates of the third point.
444
+ a (Any): The coordinates of the first point (e.g. list/tuple/NumPy array/torch tensor).
445
+ b (Any): The coordinates of the second point (vertex).
446
+ c (Any): The coordinates of the third point.
427
447
 
428
448
  Returns:
429
449
  (float): The angle in degrees between the three points.
430
450
  """
431
- radians = math.atan2(c[1] - b[1], c[0] - b[0]) - math.atan2(a[1] - b[1], a[0] - b[0])
432
- angle = abs(radians * 180.0 / math.pi)
433
- return angle if angle <= 180.0 else (360 - angle)
451
+ a_xy, b_xy, c_xy = (
452
+ SolutionAnnotator._point_xy(a),
453
+ SolutionAnnotator._point_xy(b),
454
+ SolutionAnnotator._point_xy(c),
455
+ )
456
+ return SolutionAnnotator._estimate_pose_angle_cached(a_xy, b_xy, c_xy)
434
457
 
435
458
  def draw_specific_kpts(
436
459
  self,
@@ -543,7 +566,7 @@ class SolutionAnnotator(Annotator):
543
566
  """Plot the distance and line between two centroids on the frame.
544
567
 
545
568
  Args:
546
- pixels_distance (float): Pixels distance between two bbox centroids.
569
+ pixels_distance (float): Pixel distance between two bounding-box centroids.
547
570
  centroids (list[tuple[int, int]]): Bounding box centroids data.
548
571
  line_color (tuple[int, int, int]): Distance line color.
549
572
  centroid_color (tuple[int, int, int]): Bounding box centroid color.
@@ -634,8 +657,8 @@ class SolutionAnnotator(Annotator):
634
657
  line_x (int): The x-coordinate of the sweep line.
635
658
  line_y (int): The y-coordinate limit of the sweep line.
636
659
  label (str, optional): Text label to be drawn in center of sweep line. If None, no label is drawn.
637
- color (tuple[int, int, int]): RGB color for the line and label background.
638
- txt_color (tuple[int, int, int]): RGB color for the label text.
660
+ color (tuple[int, int, int]): BGR color for the line and label background (OpenCV format).
661
+ txt_color (tuple[int, int, int]): BGR color for the label text (OpenCV format).
639
662
  """
640
663
  # Draw the sweep line
641
664
  cv2.line(self.im, (line_x, 0), (line_x, line_y), color, self.tf * 2)
@@ -695,15 +718,15 @@ class SolutionAnnotator(Annotator):
695
718
  box (tuple[float, float, float, float]): The bounding box coordinates (x1, y1, x2, y2).
696
719
  label (str): The text label to be displayed.
697
720
  color (tuple[int, int, int]): The background color of the rectangle (B, G, R).
698
- txt_color (tuple[int, int, int]): The color of the text (R, G, B).
699
- shape (str): The shape of the label i.e "circle" or "rect"
721
+ txt_color (tuple[int, int, int]): The color of the text (B, G, R).
722
+ shape (str): Label shape. Options: "circle" or "rect".
700
723
  margin (int): The margin between the text and the rectangle border.
701
724
  """
702
725
  if shape == "circle" and len(label) > 3:
703
726
  LOGGER.warning(f"Length of label is {len(label)}, only first 3 letters will be used for circle annotation.")
704
727
  label = label[:3]
705
728
 
706
- x_center, y_center = int((box[0] + box[2]) / 2), int((box[1] + box[3]) / 2) # Calculate center of the bbox
729
+ x_center, y_center = int((box[0] + box[2]) / 2), int((box[1] + box[3]) / 2) # Bounding-box center
707
730
  text_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, self.sf - 0.15, self.tf)[0] # Get size of the text
708
731
  text_x, text_y = x_center - text_size[0] // 2, y_center + text_size[1] // 2 # Calculate top-left corner of text
709
732
 
@@ -62,7 +62,7 @@ class SpeedEstimator(BaseSolution):
62
62
  """Process an input frame to estimate object speeds based on tracking data.
63
63
 
64
64
  Args:
65
- im0 (np.ndarray): Input image for processing with shape (H, W, C) for RGB images.
65
+ im0 (np.ndarray): Input image for processing with shape (H, W, C) in OpenCV BGR format.
66
66
 
67
67
  Returns:
68
68
  (SolutionResults): Contains processed image `plot_im` and `total_tracks` (number of tracked objects).
@@ -19,7 +19,7 @@ class TrackState:
19
19
  Examples:
20
20
  >>> state = TrackState.New
21
21
  >>> if state == TrackState.New:
22
- >>> print("Object is newly detected.")
22
+ ... print("Object is newly detected.")
23
23
  """
24
24
 
25
25
  New = 0
@@ -45,9 +45,9 @@ class BOTrack(STrack):
45
45
 
46
46
  Examples:
47
47
  Create a BOTrack instance and update its features
48
- >>> bo_track = BOTrack(tlwh=[100, 50, 80, 40], score=0.9, cls=1, feat=np.random.rand(128))
48
+ >>> bo_track = BOTrack(xywh=np.array([100, 50, 80, 40, 0]), score=0.9, cls=1, feat=np.random.rand(128))
49
49
  >>> bo_track.predict()
50
- >>> new_track = BOTrack(tlwh=[110, 60, 80, 40], score=0.85, cls=1, feat=np.random.rand(128))
50
+ >>> new_track = BOTrack(xywh=np.array([110, 60, 80, 40, 0]), score=0.85, cls=1, feat=np.random.rand(128))
51
51
  >>> bo_track.update(new_track, frame_id=2)
52
52
  """
53
53
 
@@ -59,7 +59,8 @@ class BOTrack(STrack):
59
59
  """Initialize a BOTrack object with temporal parameters, such as feature history, alpha, and current features.
60
60
 
61
61
  Args:
62
- xywh (np.ndarray): Bounding box coordinates in xywh format (center x, center y, width, height).
62
+ xywh (np.ndarray): Bounding box in `(x, y, w, h, idx)` or `(x, y, w, h, angle, idx)` format, where (x, y) is
63
+ the center, (w, h) are width and height, and `idx` is the detection index.
63
64
  score (float): Confidence score of the detection.
64
65
  cls (int): Class ID of the detected object.
65
66
  feat (np.ndarray, optional): Feature vector associated with the detection.
@@ -56,8 +56,8 @@ class STrack(BaseTrack):
56
56
  """Initialize a new STrack instance.
57
57
 
58
58
  Args:
59
- xywh (list[float]): Bounding box coordinates and dimensions in the format (x, y, w, h, [a], idx), where (x,
60
- y) is the center, (w, h) are width and height, [a] is optional aspect ratio, and idx is the id.
59
+ xywh (list[float]): Bounding box in `(x, y, w, h, idx)` or `(x, y, w, h, angle, idx)` format, where (x, y)
60
+ is the center, (w, h) are width and height, and `idx` is the detection index.
61
61
  score (float): Confidence score of the detection.
62
62
  cls (Any): Class label for the detected object.
63
63
  """
@@ -338,7 +338,7 @@ class BYTETracker:
338
338
  # Step 3: Second association, with low score detection boxes association the untrack to the low score detections
339
339
  detections_second = self.init_track(results_second, feats_second)
340
340
  r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
341
- # TODO
341
+ # TODO: consider fusing scores or appearance features for second association.
342
342
  dists = matching.iou_distance(r_tracked_stracks, detections_second)
343
343
  matches, u_track, _u_detection_second = matching.linear_assignment(dists, thresh=0.5)
344
344
  for itracked, idet in matches:
@@ -389,7 +389,7 @@ class BYTETracker:
389
389
  self.tracked_stracks, self.lost_stracks = self.remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
390
390
  self.removed_stracks.extend(removed_stracks)
391
391
  if len(self.removed_stracks) > 1000:
392
- self.removed_stracks = self.removed_stracks[-999:] # clip remove stracks to 1000 maximum
392
+ self.removed_stracks = self.removed_stracks[-1000:] # clip removed stracks to 1000 maximum
393
393
 
394
394
  return np.asarray([x.result for x in self.tracked_stracks if x.is_activated], dtype=np.float32)
395
395
 
@@ -34,11 +34,10 @@ class GMC:
34
34
  Examples:
35
35
  Create a GMC object and apply it to a frame
36
36
  >>> gmc = GMC(method="sparseOptFlow", downscale=2)
37
- >>> frame = np.array([[1, 2, 3], [4, 5, 6]])
38
- >>> processed_frame = gmc.apply(frame)
39
- >>> print(processed_frame)
40
- array([[1, 2, 3],
41
- [4, 5, 6]])
37
+ >>> frame = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
38
+ >>> warp = gmc.apply(frame)
39
+ >>> print(warp.shape)
40
+ (2, 3)
42
41
  """
43
42
 
44
43
  def __init__(self, method: str = "sparseOptFlow", downscale: int = 2) -> None:
@@ -85,7 +84,7 @@ class GMC:
85
84
  self.initializedFirstFrame = False
86
85
 
87
86
  def apply(self, raw_frame: np.ndarray, detections: list | None = None) -> np.ndarray:
88
- """Apply object detection on a raw frame using the specified method.
87
+ """Estimate a 2×3 motion compensation warp for a frame.
89
88
 
90
89
  Args:
91
90
  raw_frame (np.ndarray): The raw frame to be processed, with shape (H, W, C).
@@ -145,7 +144,7 @@ class GMC:
145
144
  try:
146
145
  (_, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1)
147
146
  except Exception as e:
148
- LOGGER.warning(f"find transform failed. Set warp as identity {e}")
147
+ LOGGER.warning(f"findTransformECC failed; using identity warp. {e}")
149
148
 
150
149
  return H
151
150
 
@@ -167,9 +167,10 @@ class KalmanFilterXYAH:
167
167
  covariance (np.ndarray): Covariance matrix of the predicted states with shape (N, 8, 8).
168
168
 
169
169
  Examples:
170
+ >>> kf = KalmanFilterXYAH()
170
171
  >>> mean = np.random.rand(10, 8) # 10 object states
171
172
  >>> covariance = np.random.rand(10, 8, 8) # Covariance matrices for 10 object states
172
- >>> predicted_mean, predicted_covariance = kalman_filter.multi_predict(mean, covariance)
173
+ >>> predicted_mean, predicted_covariance = kf.multi_predict(mean, covariance)
173
174
  """
174
175
  std_pos = [
175
176
  self._std_weight_position * mean[:, 3],
@@ -26,9 +26,10 @@ def linear_assignment(cost_matrix: np.ndarray, thresh: float, use_lap: bool = Tr
26
26
  use_lap (bool): Use lap.lapjv for the assignment. If False, scipy.optimize.linear_sum_assignment is used.
27
27
 
28
28
  Returns:
29
- matched_indices (np.ndarray): Array of matched indices of shape (K, 2), where K is the number of matches.
30
- unmatched_a (np.ndarray): Array of unmatched indices from the first set, with shape (L,).
31
- unmatched_b (np.ndarray): Array of unmatched indices from the second set, with shape (M,).
29
+ matched_indices (list[list[int]] | np.ndarray): Matched indices of shape (K, 2), where K is the number of
30
+ matches.
31
+ unmatched_a (np.ndarray): Unmatched indices from the first set, with shape (L,).
32
+ unmatched_b (np.ndarray): Unmatched indices from the second set, with shape (M,).
32
33
 
33
34
  Examples:
34
35
  >>> cost_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
@@ -14,6 +14,7 @@ import socket
14
14
  import sys
15
15
  import threading
16
16
  import time
17
+ import warnings
17
18
  from functools import lru_cache
18
19
  from pathlib import Path
19
20
  from threading import Lock
@@ -132,6 +133,14 @@ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # suppress verbose TF compiler warning
132
133
  os.environ["TORCH_CPP_LOG_LEVEL"] = "ERROR" # suppress "NNPACK.cpp could not initialize NNPACK" warnings
133
134
  os.environ["KINETO_LOG_LEVEL"] = "5" # suppress verbose PyTorch profiler output when computing FLOPs
134
135
 
136
+ # Centralized warning suppression
137
+ warnings.filterwarnings("ignore", message="torch.distributed.reduce_op is deprecated") # PyTorch deprecation
138
+ warnings.filterwarnings("ignore", message="The figure layout has changed to tight") # matplotlib>=3.7.2
139
+ warnings.filterwarnings("ignore", category=FutureWarning, module="timm") # mobileclip timm.layers deprecation
140
+ warnings.filterwarnings("ignore", category=torch.jit.TracerWarning) # ONNX/TorchScript export tracer warnings
141
+ warnings.filterwarnings("ignore", category=UserWarning, message=".*prim::Constant.*") # ONNX shape warning
142
+ warnings.filterwarnings("ignore", category=DeprecationWarning, module="coremltools") # CoreML np.bool deprecation
143
+
135
144
  # Precompiled type tuples for faster isinstance() checks
136
145
  FLOAT_OR_INT = (float, int)
137
146
  STR_OR_PATH = (str, Path)
@@ -142,7 +151,7 @@ class DataExportMixin:
142
151
 
143
152
  This class provides utilities to export performance metrics (e.g., mAP, precision, recall) or prediction results
144
153
  from classification, object detection, segmentation, or pose estimation tasks into various formats: Polars
145
- DataFrame, CSV and JSON.
154
+ DataFrame, CSV, and JSON.
146
155
 
147
156
  Methods:
148
157
  to_df: Convert summary to a Polars DataFrame.
@@ -159,14 +168,14 @@ class DataExportMixin:
159
168
  """
160
169
 
161
170
  def to_df(self, normalize=False, decimals=5):
162
- """Create a polars DataFrame from the prediction results summary or validation metrics.
171
+ """Create a Polars DataFrame from the prediction results summary or validation metrics.
163
172
 
164
173
  Args:
165
174
  normalize (bool, optional): Normalize numerical values for easier comparison.
166
175
  decimals (int, optional): Decimal places to round floats.
167
176
 
168
177
  Returns:
169
- (DataFrame): DataFrame containing the summary data.
178
+ (polars.DataFrame): Polars DataFrame containing the summary data.
170
179
  """
171
180
  import polars as pl # scope for faster 'import ultralytics'
172
181
 
@@ -1,6 +1,6 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
  """
3
- Benchmark a YOLO model formats for speed and accuracy.
3
+ Benchmark YOLO model formats for speed and accuracy.
4
4
 
5
5
  Usage:
6
6
  from ultralytics.utils.benchmarks import ProfileModels, benchmark
@@ -78,7 +78,7 @@ def benchmark(
78
78
  **kwargs (Any): Additional keyword arguments for exporter.
79
79
 
80
80
  Returns:
81
- (polars.DataFrame): A polars DataFrame with benchmark results for each format, including file size, metric, and
81
+ (polars.DataFrame): A Polars DataFrame with benchmark results for each format, including file size, metric, and
82
82
  inference time.
83
83
 
84
84
  Examples: