dgenerate-ultralytics-headless 8.3.237__py3-none-any.whl → 8.3.239__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. {dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/METADATA +1 -1
  2. {dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/RECORD +104 -105
  3. tests/test_exports.py +3 -1
  4. tests/test_python.py +2 -2
  5. tests/test_solutions.py +6 -6
  6. ultralytics/__init__.py +1 -1
  7. ultralytics/cfg/__init__.py +4 -4
  8. ultralytics/cfg/datasets/Argoverse.yaml +7 -6
  9. ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
  10. ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
  11. ultralytics/cfg/datasets/VOC.yaml +15 -16
  12. ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
  13. ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
  14. ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
  15. ultralytics/cfg/datasets/dota8.yaml +2 -2
  16. ultralytics/cfg/datasets/kitti.yaml +1 -1
  17. ultralytics/cfg/datasets/xView.yaml +16 -16
  18. ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
  19. ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
  20. ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
  21. ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
  22. ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
  23. ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
  24. ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
  25. ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
  26. ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
  27. ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
  28. ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
  29. ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
  30. ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
  31. ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
  32. ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
  33. ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
  34. ultralytics/data/augment.py +1 -1
  35. ultralytics/data/base.py +4 -2
  36. ultralytics/data/build.py +4 -4
  37. ultralytics/data/loaders.py +17 -12
  38. ultralytics/data/utils.py +4 -4
  39. ultralytics/engine/exporter.py +24 -16
  40. ultralytics/engine/predictor.py +5 -4
  41. ultralytics/engine/results.py +12 -13
  42. ultralytics/engine/trainer.py +2 -2
  43. ultralytics/engine/tuner.py +2 -3
  44. ultralytics/engine/validator.py +2 -2
  45. ultralytics/models/fastsam/model.py +2 -2
  46. ultralytics/models/fastsam/predict.py +2 -3
  47. ultralytics/models/fastsam/val.py +4 -4
  48. ultralytics/models/rtdetr/predict.py +2 -3
  49. ultralytics/models/rtdetr/val.py +5 -4
  50. ultralytics/models/sam/build.py +5 -5
  51. ultralytics/models/sam/build_sam3.py +9 -6
  52. ultralytics/models/sam/model.py +1 -1
  53. ultralytics/models/sam/modules/sam.py +10 -5
  54. ultralytics/models/sam/predict.py +24 -48
  55. ultralytics/models/sam/sam3/encoder.py +4 -4
  56. ultralytics/models/sam/sam3/geometry_encoders.py +3 -3
  57. ultralytics/models/sam/sam3/necks.py +17 -17
  58. ultralytics/models/sam/sam3/sam3_image.py +3 -21
  59. ultralytics/models/sam/sam3/vl_combiner.py +1 -6
  60. ultralytics/models/yolo/classify/val.py +1 -1
  61. ultralytics/models/yolo/detect/train.py +1 -1
  62. ultralytics/models/yolo/detect/val.py +7 -7
  63. ultralytics/models/yolo/obb/val.py +1 -1
  64. ultralytics/models/yolo/pose/val.py +1 -1
  65. ultralytics/models/yolo/segment/val.py +1 -1
  66. ultralytics/nn/autobackend.py +9 -9
  67. ultralytics/nn/modules/block.py +1 -1
  68. ultralytics/nn/tasks.py +3 -3
  69. ultralytics/nn/text_model.py +2 -7
  70. ultralytics/solutions/ai_gym.py +1 -1
  71. ultralytics/solutions/analytics.py +6 -6
  72. ultralytics/solutions/config.py +1 -1
  73. ultralytics/solutions/distance_calculation.py +1 -1
  74. ultralytics/solutions/object_counter.py +1 -1
  75. ultralytics/solutions/object_cropper.py +3 -6
  76. ultralytics/solutions/parking_management.py +21 -17
  77. ultralytics/solutions/queue_management.py +5 -5
  78. ultralytics/solutions/region_counter.py +2 -2
  79. ultralytics/solutions/security_alarm.py +1 -1
  80. ultralytics/solutions/solutions.py +45 -22
  81. ultralytics/solutions/speed_estimation.py +1 -1
  82. ultralytics/trackers/basetrack.py +1 -1
  83. ultralytics/trackers/bot_sort.py +4 -3
  84. ultralytics/trackers/byte_tracker.py +4 -4
  85. ultralytics/trackers/utils/gmc.py +6 -7
  86. ultralytics/trackers/utils/kalman_filter.py +2 -1
  87. ultralytics/trackers/utils/matching.py +4 -3
  88. ultralytics/utils/__init__.py +12 -3
  89. ultralytics/utils/benchmarks.py +2 -2
  90. ultralytics/utils/callbacks/tensorboard.py +19 -25
  91. ultralytics/utils/checks.py +2 -1
  92. ultralytics/utils/downloads.py +1 -1
  93. ultralytics/utils/export/tensorflow.py +16 -2
  94. ultralytics/utils/files.py +13 -12
  95. ultralytics/utils/logger.py +62 -27
  96. ultralytics/utils/metrics.py +1 -1
  97. ultralytics/utils/ops.py +6 -6
  98. ultralytics/utils/patches.py +3 -3
  99. ultralytics/utils/plotting.py +7 -12
  100. ultralytics/utils/tuner.py +1 -1
  101. ultralytics/models/sam/sam3/tokenizer_ve.py +0 -242
  102. {dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/WHEEL +0 -0
  103. {dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/entry_points.txt +0 -0
  104. {dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/licenses/LICENSE +0 -0
  105. {dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/top_level.txt +0 -0
@@ -28,15 +28,15 @@ class WorkingDirectory(contextlib.ContextDecorator):
28
28
 
29
29
  Examples:
30
30
  Using as a context manager:
31
- >>> with WorkingDirectory('/path/to/new/dir'):
32
- >>> # Perform operations in the new directory
33
- >>> pass
31
+ >>> with WorkingDirectory("/path/to/new/dir"):
32
+ ... # Perform operations in the new directory
33
+ ... pass
34
34
 
35
35
  Using as a decorator:
36
- >>> @WorkingDirectory('/path/to/new/dir')
37
- >>> def some_function():
38
- >>> # Perform operations in the new directory
39
- >>> pass
36
+ >>> @WorkingDirectory("/path/to/new/dir")
37
+ ... def some_function():
38
+ ... # Perform operations in the new directory
39
+ ... pass
40
40
  """
41
41
 
42
42
  def __init__(self, new_dir: str | Path):
@@ -67,9 +67,9 @@ def spaces_in_path(path: str | Path):
67
67
  (Path | str): Temporary path with any spaces replaced by underscores.
68
68
 
69
69
  Examples:
70
- >>> with spaces_in_path('/path/with spaces') as new_path:
71
- >>> # Your code here
72
- >>> pass
70
+ >>> with spaces_in_path("/path/with spaces") as new_path:
71
+ ... # Your code here
72
+ ... pass
73
73
  """
74
74
  # If path has spaces, replace them with underscores
75
75
  if " " in str(path):
@@ -196,13 +196,14 @@ def update_models(model_names: tuple = ("yolo11n.pt",), source_dir: Path = Path(
196
196
  """
197
197
  from ultralytics import YOLO
198
198
  from ultralytics.nn.autobackend import default_class_names
199
+ from ultralytics.utils import LOGGER
199
200
 
200
201
  target_dir = source_dir / "updated_models"
201
202
  target_dir.mkdir(parents=True, exist_ok=True) # Ensure target directory exists
202
203
 
203
204
  for model_name in model_names:
204
205
  model_path = source_dir / model_name
205
- print(f"Loading model from {model_path}")
206
+ LOGGER.info(f"Loading model from {model_path}")
206
207
 
207
208
  # Load model
208
209
  model = YOLO(model_path)
@@ -214,5 +215,5 @@ def update_models(model_names: tuple = ("yolo11n.pt",), source_dir: Path = Path(
214
215
  save_path = target_dir / model_name
215
216
 
216
217
  # Save model using model.save()
217
- print(f"Re-saving {model_name} model to {save_path}")
218
+ LOGGER.info(f"Re-saving {model_name} model to {save_path}")
218
219
  model.save(save_path)
@@ -267,6 +267,11 @@ class SystemLogger:
267
267
  self.net_start = psutil.net_io_counters()
268
268
  self.disk_start = psutil.disk_io_counters()
269
269
 
270
+ # For rate calculation
271
+ self._prev_net = self.net_start
272
+ self._prev_disk = self.disk_start
273
+ self._prev_time = time.time()
274
+
270
275
  def _init_nvidia(self):
271
276
  """Initialize NVIDIA GPU monitoring with pynvml."""
272
277
  try:
@@ -278,42 +283,49 @@ class SystemLogger:
278
283
  except Exception:
279
284
  return False
280
285
 
281
- def get_metrics(self):
282
- """Get current system metrics.
286
+ def get_metrics(self, rates=False):
287
+ """Get current system metrics including CPU, RAM, disk, network, and GPU usage.
283
288
 
284
289
  Collects comprehensive system metrics including CPU usage, RAM usage, disk I/O statistics, network I/O
285
- statistics, and GPU metrics (if available). Example output:
290
+ statistics, and GPU metrics (if available).
286
291
 
292
+ Example output (rates=False, default):
287
293
  ```python
288
- metrics = {
294
+ {
289
295
  "cpu": 45.2,
290
296
  "ram": 78.9,
291
297
  "disk": {"read_mb": 156.7, "write_mb": 89.3, "used_gb": 256.8},
292
298
  "network": {"recv_mb": 157.2, "sent_mb": 89.1},
293
299
  "gpus": {
294
- 0: {"usage": 95.6, "memory": 85.4, "temp": 72, "power": 285},
295
- 1: {"usage": 94.1, "memory": 82.7, "temp": 70, "power": 278},
300
+ "0": {"usage": 95.6, "memory": 85.4, "temp": 72, "power": 285},
301
+ "1": {"usage": 94.1, "memory": 82.7, "temp": 70, "power": 278},
296
302
  },
297
303
  }
298
304
  ```
299
305
 
300
- - cpu (float): CPU usage percentage (0-100%)
301
- - ram (float): RAM usage percentage (0-100%)
302
- - disk (dict):
303
- - read_mb (float): Cumulative disk read in MB since initialization
304
- - write_mb (float): Cumulative disk write in MB since initialization
305
- - used_gb (float): Total disk space used in GB
306
- - network (dict):
307
- - recv_mb (float): Cumulative network received in MB since initialization
308
- - sent_mb (float): Cumulative network sent in MB since initialization
309
- - gpus (dict): GPU metrics by device index (e.g., 0, 1) containing:
310
- - usage (int): GPU utilization percentage (0-100%)
311
- - memory (float): CUDA memory usage percentage (0-100%)
312
- - temp (int): GPU temperature in degrees Celsius
313
- - power (int): GPU power consumption in watts
306
+ Example output (rates=True):
307
+ ```python
308
+ {
309
+ "cpu": 45.2,
310
+ "ram": 78.9,
311
+ "disk": {"read_mbs": 12.5, "write_mbs": 8.3, "used_gb": 256.8},
312
+ "network": {"recv_mbs": 5.2, "sent_mbs": 1.1},
313
+ "gpus": {
314
+ "0": {"usage": 95.6, "memory": 85.4, "temp": 72, "power": 285},
315
+ },
316
+ }
317
+ ```
318
+
319
+ Args:
320
+ rates (bool): If True, return disk/network as MB/s rates instead of cumulative MB.
314
321
 
315
322
  Returns:
316
- metrics (dict): System metrics containing 'cpu', 'ram', 'disk', 'network', 'gpus' with usage data.
323
+ (dict): Metrics dictionary with cpu, ram, disk, network, and gpus keys.
324
+
325
+ Examples:
326
+ >>> logger = SystemLogger()
327
+ >>> logger.get_metrics()["cpu"] # CPU percentage
328
+ >>> logger.get_metrics(rates=True)["network"]["recv_mbs"] # MB/s download rate
317
329
  """
318
330
  import psutil # scoped as slow import
319
331
 
@@ -321,21 +333,44 @@ class SystemLogger:
321
333
  disk = psutil.disk_io_counters()
322
334
  memory = psutil.virtual_memory()
323
335
  disk_usage = shutil.disk_usage("/")
336
+ now = time.time()
324
337
 
325
338
  metrics = {
326
339
  "cpu": round(psutil.cpu_percent(), 3),
327
340
  "ram": round(memory.percent, 3),
328
- "disk": {
341
+ "gpus": {},
342
+ }
343
+
344
+ # Calculate elapsed time since last call
345
+ elapsed = max(0.1, now - self._prev_time) # Avoid division by zero
346
+
347
+ if rates:
348
+ # Calculate MB/s rates from delta since last call
349
+ metrics["disk"] = {
350
+ "read_mbs": round(max(0, (disk.read_bytes - self._prev_disk.read_bytes) / (1 << 20) / elapsed), 3),
351
+ "write_mbs": round(max(0, (disk.write_bytes - self._prev_disk.write_bytes) / (1 << 20) / elapsed), 3),
352
+ "used_gb": round(disk_usage.used / (1 << 30), 3),
353
+ }
354
+ metrics["network"] = {
355
+ "recv_mbs": round(max(0, (net.bytes_recv - self._prev_net.bytes_recv) / (1 << 20) / elapsed), 3),
356
+ "sent_mbs": round(max(0, (net.bytes_sent - self._prev_net.bytes_sent) / (1 << 20) / elapsed), 3),
357
+ }
358
+ else:
359
+ # Cumulative MB since initialization (original behavior)
360
+ metrics["disk"] = {
329
361
  "read_mb": round((disk.read_bytes - self.disk_start.read_bytes) / (1 << 20), 3),
330
362
  "write_mb": round((disk.write_bytes - self.disk_start.write_bytes) / (1 << 20), 3),
331
363
  "used_gb": round(disk_usage.used / (1 << 30), 3),
332
- },
333
- "network": {
364
+ }
365
+ metrics["network"] = {
334
366
  "recv_mb": round((net.bytes_recv - self.net_start.bytes_recv) / (1 << 20), 3),
335
367
  "sent_mb": round((net.bytes_sent - self.net_start.bytes_sent) / (1 << 20), 3),
336
- },
337
- "gpus": {},
338
- }
368
+ }
369
+
370
+ # Always update previous values for accurate rate calculation on next call
371
+ self._prev_net = net
372
+ self._prev_disk = disk
373
+ self._prev_time = now
339
374
 
340
375
  # Add GPU metrics (NVIDIA only)
341
376
  if self.nvidia_initialized:
@@ -309,7 +309,7 @@ class ConfusionMatrix(DataExportMixin):
309
309
  Attributes:
310
310
  task (str): The type of task, either 'detect' or 'classify'.
311
311
  matrix (np.ndarray): The confusion matrix, with dimensions depending on the task.
312
- nc (int): The number of category.
312
+ nc (int): The number of classes.
313
313
  names (list[str]): The names of the classes, used as labels on the plot.
314
314
  matches (dict): Contains the indices of ground truths and predictions categorized into TP, FP and FN.
315
315
  """
ultralytics/utils/ops.py CHANGED
@@ -298,7 +298,7 @@ def xywh2ltwh(x):
298
298
  x (np.ndarray | torch.Tensor): Input bounding box coordinates in xywh format.
299
299
 
300
300
  Returns:
301
- (np.ndarray | torch.Tensor): Bounding box coordinates in xyltwh format.
301
+ (np.ndarray | torch.Tensor): Bounding box coordinates in ltwh format.
302
302
  """
303
303
  y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
304
304
  y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
@@ -313,7 +313,7 @@ def xyxy2ltwh(x):
313
313
  x (np.ndarray | torch.Tensor): Input bounding box coordinates in xyxy format.
314
314
 
315
315
  Returns:
316
- (np.ndarray | torch.Tensor): Bounding box coordinates in xyltwh format.
316
+ (np.ndarray | torch.Tensor): Bounding box coordinates in ltwh format.
317
317
  """
318
318
  y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
319
319
  y[..., 2] = x[..., 2] - x[..., 0] # width
@@ -325,7 +325,7 @@ def ltwh2xywh(x):
325
325
  """Convert bounding boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center.
326
326
 
327
327
  Args:
328
- x (torch.Tensor): Input bounding box coordinates.
328
+ x (np.ndarray | torch.Tensor): Input bounding box coordinates.
329
329
 
330
330
  Returns:
331
331
  (np.ndarray | torch.Tensor): Bounding box coordinates in xywh format.
@@ -398,8 +398,8 @@ def ltwh2xyxy(x):
398
398
  (np.ndarray | torch.Tensor): Bounding box coordinates in xyxy format.
399
399
  """
400
400
  y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
401
- y[..., 2] = x[..., 2] + x[..., 0] # width
402
- y[..., 3] = x[..., 3] + x[..., 1] # height
401
+ y[..., 2] = x[..., 2] + x[..., 0] # x2
402
+ y[..., 3] = x[..., 3] + x[..., 1] # y2
403
403
  return y
404
404
 
405
405
 
@@ -655,7 +655,7 @@ def clean_str(s):
655
655
  Returns:
656
656
  (str): A string with special characters replaced by an underscore _.
657
657
  """
658
- return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
658
+ return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨`><+]", repl="_", string=s)
659
659
 
660
660
 
661
661
  def empty_like(x):
@@ -35,7 +35,7 @@ def imread(filename: str, flags: int = cv2.IMREAD_COLOR) -> np.ndarray | None:
35
35
  if filename.endswith((".tiff", ".tif")):
36
36
  success, frames = cv2.imdecodemulti(file_bytes, cv2.IMREAD_UNCHANGED)
37
37
  if success:
38
- # Handle RGB images in tif/tiff format
38
+ # Handle multi-frame TIFFs and color images
39
39
  return frames[0] if len(frames) == 1 and frames[0].ndim == 3 else np.stack(frames, axis=2)
40
40
  return None
41
41
  else:
@@ -105,8 +105,8 @@ def torch_load(*args, **kwargs):
105
105
  (Any): The loaded PyTorch object.
106
106
 
107
107
  Notes:
108
- For PyTorch versions 2.0 and above, this function automatically sets 'weights_only=False'
109
- if the argument is not provided, to avoid deprecation warnings.
108
+ For PyTorch versions 1.13 and above, this function automatically sets `weights_only=False` if the argument is
109
+ not provided, to avoid deprecation warnings.
110
110
  """
111
111
  from ultralytics.utils.torch_utils import TORCH_1_13
112
112
 
@@ -3,7 +3,6 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import math
6
- import warnings
7
6
  from collections.abc import Callable
8
7
  from pathlib import Path
9
8
  from typing import Any
@@ -511,7 +510,7 @@ class Annotator:
511
510
  cv2.putText(self.im, text, xy, 0, self.sf, txt_color, thickness=self.tf, lineType=cv2.LINE_AA)
512
511
 
513
512
  def fromarray(self, im):
514
- """Update self.im from a numpy array."""
513
+ """Update `self.im` from a NumPy array or PIL image."""
515
514
  self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
516
515
  self.draw = ImageDraw.Draw(self.im)
517
516
 
@@ -522,8 +521,8 @@ class Annotator:
522
521
 
523
522
  def show(self, title: str | None = None):
524
523
  """Show the annotated image."""
525
- im = Image.fromarray(np.asarray(self.im)[..., ::-1]) # Convert numpy array to PIL Image with RGB to BGR
526
- if IS_COLAB or IS_KAGGLE: # can not use IS_JUPYTER as will run for all ipython environments
524
+ im = Image.fromarray(np.asarray(self.im)[..., ::-1]) # Convert BGR NumPy array to RGB PIL Image
525
+ if IS_COLAB or IS_KAGGLE: # cannot use IS_JUPYTER as it runs for all IPython environments
527
526
  try:
528
527
  display(im) # noqa - display() function only available in ipython environments
529
528
  except ImportError as e:
@@ -536,11 +535,11 @@ class Annotator:
536
535
  cv2.imwrite(filename, np.asarray(self.im))
537
536
 
538
537
  @staticmethod
539
- def get_bbox_dimension(bbox: tuple | None = None):
538
+ def get_bbox_dimension(bbox: tuple | list):
540
539
  """Calculate the dimensions and area of a bounding box.
541
540
 
542
541
  Args:
543
- bbox (tuple): Bounding box coordinates in the format (x_min, y_min, x_max, y_max).
542
+ bbox (tuple | list): Bounding box coordinates in the format (x_min, y_min, x_max, y_max).
544
543
 
545
544
  Returns:
546
545
  width (float): Width of the bounding box.
@@ -575,10 +574,6 @@ def plot_labels(boxes, cls, names=(), save_dir=Path(""), on_plot=None):
575
574
  import polars
576
575
  from matplotlib.colors import LinearSegmentedColormap
577
576
 
578
- # Filter matplotlib>=3.7.2 warning
579
- warnings.filterwarnings("ignore", category=UserWarning, message="The figure layout has changed to tight")
580
- warnings.filterwarnings("ignore", category=FutureWarning)
581
-
582
577
  # Plot dataset labels
583
578
  LOGGER.info(f"Plotting labels to {save_dir / 'labels.jpg'}... ")
584
579
  nc = int(cls.max() + 1) # number of classes
@@ -600,8 +595,8 @@ def plot_labels(boxes, cls, names=(), save_dir=Path(""), on_plot=None):
600
595
  ax[0].set_xlabel("classes")
601
596
  boxes = np.column_stack([0.5 - boxes[:, 2:4] / 2, 0.5 + boxes[:, 2:4] / 2]) * 1000
602
597
  img = Image.fromarray(np.ones((1000, 1000, 3), dtype=np.uint8) * 255)
603
- for cls, box in zip(cls[:500], boxes[:500]):
604
- ImageDraw.Draw(img).rectangle(box.tolist(), width=1, outline=colors(cls)) # plot
598
+ for class_id, box in zip(cls[:500], boxes[:500]):
599
+ ImageDraw.Draw(img).rectangle(box.tolist(), width=1, outline=colors(class_id)) # plot
605
600
  ax[1].imshow(img)
606
601
  ax[1].axis("off")
607
602
 
@@ -77,7 +77,7 @@ def run_ray_tune(
77
77
  "perspective": tune.uniform(0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
78
78
  "flipud": tune.uniform(0.0, 1.0), # image flip up-down (probability)
79
79
  "fliplr": tune.uniform(0.0, 1.0), # image flip left-right (probability)
80
- "bgr": tune.uniform(0.0, 1.0), # image channel BGR (probability)
80
+ "bgr": tune.uniform(0.0, 1.0), # swap RGB↔BGR channels (probability)
81
81
  "mosaic": tune.uniform(0.0, 1.0), # image mosaic (probability)
82
82
  "mixup": tune.uniform(0.0, 1.0), # image mixup (probability)
83
83
  "cutmix": tune.uniform(0.0, 1.0), # image cutmix (probability)
@@ -1,242 +0,0 @@
1
- # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
-
3
- # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
4
-
5
- """
6
- Text Tokenizer.
7
-
8
- Copied and lightly adapted from VE repo, which in turn copied
9
- from open_clip and openAI CLIP.
10
- """
11
-
12
- from __future__ import annotations
13
-
14
- import gzip
15
- import html
16
- import io
17
- import os
18
- import string
19
- from functools import lru_cache
20
-
21
- import ftfy
22
- import regex as re
23
- import torch
24
- from iopath.common.file_io import g_pathmgr
25
-
26
-
27
- @lru_cache
28
- def bytes_to_unicode():
29
- """Returns list of utf-8 byte and a corresponding list of unicode strings. The reversible bpe codes work on unicode
30
- strings. This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. When
31
- you're at something like a 10B token dataset you end up needing around 5K for decent coverage. This is a
32
- significant percentage of your normal, say, 32K bpe vocab. To avoid that, we want lookup tables between utf-8
33
- bytes and unicode strings. And avoids mapping to whitespace/control characters the bpe code barfs on.
34
- """
35
- bs = list(range(ord("!"), ord("~") + 1)) + list(range(ord("¡"), ord("¬") + 1)) + list(range(ord("®"), ord("ÿ") + 1))
36
- cs = bs[:]
37
- n = 0
38
- for b in range(2**8):
39
- if b not in bs:
40
- bs.append(b)
41
- cs.append(2**8 + n)
42
- n += 1
43
- cs = [chr(n) for n in cs]
44
- return dict(zip(bs, cs))
45
-
46
-
47
- def get_pairs(word):
48
- """Return set of symbol pairs in a word. Word is represented as tuple of symbols (symbols being variable-length
49
- strings).
50
- """
51
- pairs = set()
52
- prev_char = word[0]
53
- for char in word[1:]:
54
- pairs.add((prev_char, char))
55
- prev_char = char
56
- return pairs
57
-
58
-
59
- def basic_clean(text):
60
- """Basic text cleaning: fix unicode and unescape HTML entities."""
61
- text = ftfy.fix_text(text)
62
- text = html.unescape(html.unescape(text))
63
- return text.strip()
64
-
65
-
66
- def whitespace_clean(text):
67
- """Remove redundant whitespace."""
68
- text = re.sub(r"\s+", " ", text)
69
- text = text.strip()
70
- return text
71
-
72
-
73
- def _clean_canonicalize(x):
74
- """Clean text and canonicalize it."""
75
- # basic, remove whitespace, remove punctuation, lower case
76
- return canonicalize_text(basic_clean(x))
77
-
78
-
79
- def _clean_lower(x):
80
- """Clean text and return lowercase."""
81
- # basic, remove whitespace, lower case
82
- return whitespace_clean(basic_clean(x)).lower()
83
-
84
-
85
- def _clean_whitespace(x):
86
- """Clean text and remove redundant whitespace."""
87
- # basic, remove whitespace
88
- return whitespace_clean(basic_clean(x))
89
-
90
-
91
- def get_clean_fn(type: str):
92
- """Get text cleaning function by name."""
93
- if type == "canonicalize":
94
- return _clean_canonicalize
95
- elif type == "lower":
96
- return _clean_lower
97
- elif type == "whitespace":
98
- return _clean_whitespace
99
- else:
100
- assert False, f"Invalid clean function ({type})."
101
-
102
-
103
- def canonicalize_text(text, *, keep_punctuation_exact_string=None):
104
- """Returns canonicalized `text` (lowercase and punctuation removed). From:
105
- https://github.com/google-research/big_vision/blob/53f18caf27a9419231bbf08d3388b07671616d3d/big_vision/evaluators/proj/image_text/prompt_engineering.py#L94.
106
-
107
- Args:
108
- text: string to be canonicalized.
109
- keep_punctuation_exact_string: If provided, then this exact string kept. For example providing '{}' will keep
110
- any occurrences of '{}' (but will still remove '{' and '}' that appear separately).
111
- """
112
- text = text.replace("_", " ")
113
- if keep_punctuation_exact_string:
114
- text = keep_punctuation_exact_string.join(
115
- part.translate(str.maketrans("", "", string.punctuation))
116
- for part in text.split(keep_punctuation_exact_string)
117
- )
118
- else:
119
- text = text.translate(str.maketrans("", "", string.punctuation))
120
- text = text.lower()
121
- text = re.sub(r"\s+", " ", text)
122
- return text.strip()
123
-
124
-
125
- class SimpleTokenizer:
126
- """A simple tokenizer for text inputs."""
127
-
128
- def __init__(
129
- self,
130
- bpe_path: str | os.PathLike,
131
- additional_special_tokens: list[str] | None = None,
132
- context_length: int = 77,
133
- clean: str = "lower",
134
- ):
135
- """The tokenizer for text inputs."""
136
- self.byte_encoder = bytes_to_unicode()
137
- self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
138
- with g_pathmgr.open(bpe_path, "rb") as fh:
139
- bpe_bytes = io.BytesIO(fh.read())
140
- merges = gzip.open(bpe_bytes).read().decode("utf-8").split("\n")
141
- # merges = gzip.open(bpe_path).read().decode("utf-8").split("\n")
142
- merges = merges[1 : 49152 - 256 - 2 + 1]
143
- merges = [tuple(merge.split()) for merge in merges]
144
- vocab = list(bytes_to_unicode().values())
145
- vocab = vocab + [v + "</w>" for v in vocab]
146
- for merge in merges:
147
- vocab.append("".join(merge))
148
- special_tokens = ["<start_of_text>", "<end_of_text>"]
149
- if additional_special_tokens:
150
- special_tokens += additional_special_tokens
151
- vocab.extend(special_tokens)
152
- self.encoder = dict(zip(vocab, range(len(vocab))))
153
- self.decoder = {v: k for k, v in self.encoder.items()}
154
- self.bpe_ranks = dict(zip(merges, range(len(merges))))
155
- self.cache = {t: t for t in special_tokens}
156
- special = "|".join(special_tokens)
157
- self.pat = re.compile(
158
- special + r"""|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""",
159
- re.IGNORECASE,
160
- )
161
- self.vocab_size = len(self.encoder)
162
- self.all_special_ids = [self.encoder[t] for t in special_tokens]
163
- self.sot_token_id = self.all_special_ids[0]
164
- self.eot_token_id = self.all_special_ids[1]
165
- self.context_length = context_length
166
- self.clean_fn = get_clean_fn(clean)
167
-
168
- def bpe(self, token):
169
- """Byte Pair Encoding."""
170
- if token in self.cache:
171
- return self.cache[token]
172
- word = (*tuple(token[:-1]), token[-1] + "</w>")
173
- pairs = get_pairs(word)
174
- if not pairs:
175
- return token + "</w>"
176
- while True:
177
- bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float("inf")))
178
- if bigram not in self.bpe_ranks:
179
- break
180
- first, second = bigram
181
- new_word = []
182
- i = 0
183
- while i < len(word):
184
- try:
185
- j = word.index(first, i)
186
- new_word.extend(word[i:j])
187
- i = j
188
- except Exception:
189
- new_word.extend(word[i:])
190
- break
191
- if word[i] == first and i < len(word) - 1 and word[i + 1] == second:
192
- new_word.append(first + second)
193
- i += 2
194
- else:
195
- new_word.append(word[i])
196
- i += 1
197
- new_word = tuple(new_word)
198
- word = new_word
199
- if len(word) == 1:
200
- break
201
- else:
202
- pairs = get_pairs(word)
203
- word = " ".join(word)
204
- self.cache[token] = word
205
- return word
206
-
207
- def encode(self, text):
208
- """Encode text to a sequence of BPE tokens."""
209
- bpe_tokens = []
210
- text = self.clean_fn(text)
211
- for token in re.findall(self.pat, text):
212
- token = "".join(self.byte_encoder[b] for b in token.encode("utf-8"))
213
- bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(" "))
214
- return bpe_tokens
215
-
216
- def decode(self, tokens):
217
- """Decodes a sequence of tokens back into a text string."""
218
- text = "".join([self.decoder[token] for token in tokens])
219
- text = bytearray([self.byte_decoder[c] for c in text]).decode("utf-8", errors="replace").replace("</w>", " ")
220
- return text
221
-
222
- def __call__(self, texts: str | list[str], context_length: int | None = None) -> torch.LongTensor:
223
- """Returns the tokenized representation of given input string(s) Parameters. ---------- texts : Union[str,
224
- list[str]] An input string or a list of input strings to tokenize context_length : int The context
225
- length to use; all CLIP models use 77 as the context length.
226
-
227
- Returns:
228
- -------: A two-dimensional tensor containing the resulting tokens, shape = [number of input strings,
229
- context_length]
230
- """
231
- if isinstance(texts, str):
232
- texts = [texts]
233
- context_length = context_length or self.context_length
234
- assert context_length, "Please set a valid context length"
235
- all_tokens = [[self.sot_token_id, *self.encode(text), self.eot_token_id] for text in texts]
236
- result = torch.zeros(len(all_tokens), context_length, dtype=torch.long)
237
- for i, tokens in enumerate(all_tokens):
238
- if len(tokens) > context_length:
239
- tokens = tokens[:context_length] # Truncate
240
- tokens[-1] = self.eot_token_id
241
- result[i, : len(tokens)] = torch.tensor(tokens)
242
- return result