dgenerate-ultralytics-headless 8.3.137__py3-none-any.whl → 8.3.224__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/METADATA +41 -34
  2. dgenerate_ultralytics_headless-8.3.224.dist-info/RECORD +285 -0
  3. {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/WHEEL +1 -1
  4. tests/__init__.py +7 -6
  5. tests/conftest.py +15 -39
  6. tests/test_cli.py +17 -17
  7. tests/test_cuda.py +17 -8
  8. tests/test_engine.py +36 -10
  9. tests/test_exports.py +98 -37
  10. tests/test_integrations.py +12 -15
  11. tests/test_python.py +126 -82
  12. tests/test_solutions.py +319 -135
  13. ultralytics/__init__.py +27 -9
  14. ultralytics/cfg/__init__.py +83 -87
  15. ultralytics/cfg/datasets/Argoverse.yaml +4 -4
  16. ultralytics/cfg/datasets/DOTAv1.5.yaml +2 -2
  17. ultralytics/cfg/datasets/DOTAv1.yaml +2 -2
  18. ultralytics/cfg/datasets/GlobalWheat2020.yaml +2 -2
  19. ultralytics/cfg/datasets/HomeObjects-3K.yaml +4 -5
  20. ultralytics/cfg/datasets/ImageNet.yaml +3 -3
  21. ultralytics/cfg/datasets/Objects365.yaml +24 -20
  22. ultralytics/cfg/datasets/SKU-110K.yaml +9 -9
  23. ultralytics/cfg/datasets/VOC.yaml +10 -13
  24. ultralytics/cfg/datasets/VisDrone.yaml +43 -33
  25. ultralytics/cfg/datasets/african-wildlife.yaml +5 -5
  26. ultralytics/cfg/datasets/brain-tumor.yaml +4 -5
  27. ultralytics/cfg/datasets/carparts-seg.yaml +5 -5
  28. ultralytics/cfg/datasets/coco-pose.yaml +26 -4
  29. ultralytics/cfg/datasets/coco.yaml +4 -4
  30. ultralytics/cfg/datasets/coco128-seg.yaml +2 -2
  31. ultralytics/cfg/datasets/coco128.yaml +2 -2
  32. ultralytics/cfg/datasets/coco8-grayscale.yaml +103 -0
  33. ultralytics/cfg/datasets/coco8-multispectral.yaml +2 -2
  34. ultralytics/cfg/datasets/coco8-pose.yaml +23 -2
  35. ultralytics/cfg/datasets/coco8-seg.yaml +2 -2
  36. ultralytics/cfg/datasets/coco8.yaml +2 -2
  37. ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
  38. ultralytics/cfg/datasets/crack-seg.yaml +5 -5
  39. ultralytics/cfg/datasets/dog-pose.yaml +32 -4
  40. ultralytics/cfg/datasets/dota8-multispectral.yaml +2 -2
  41. ultralytics/cfg/datasets/dota8.yaml +2 -2
  42. ultralytics/cfg/datasets/hand-keypoints.yaml +29 -4
  43. ultralytics/cfg/datasets/lvis.yaml +9 -9
  44. ultralytics/cfg/datasets/medical-pills.yaml +4 -5
  45. ultralytics/cfg/datasets/open-images-v7.yaml +7 -10
  46. ultralytics/cfg/datasets/package-seg.yaml +5 -5
  47. ultralytics/cfg/datasets/signature.yaml +4 -4
  48. ultralytics/cfg/datasets/tiger-pose.yaml +20 -4
  49. ultralytics/cfg/datasets/xView.yaml +5 -5
  50. ultralytics/cfg/default.yaml +96 -93
  51. ultralytics/cfg/trackers/botsort.yaml +16 -17
  52. ultralytics/cfg/trackers/bytetrack.yaml +9 -11
  53. ultralytics/data/__init__.py +4 -4
  54. ultralytics/data/annotator.py +12 -12
  55. ultralytics/data/augment.py +531 -564
  56. ultralytics/data/base.py +76 -81
  57. ultralytics/data/build.py +206 -42
  58. ultralytics/data/converter.py +179 -78
  59. ultralytics/data/dataset.py +121 -121
  60. ultralytics/data/loaders.py +114 -91
  61. ultralytics/data/split.py +28 -15
  62. ultralytics/data/split_dota.py +67 -48
  63. ultralytics/data/utils.py +110 -89
  64. ultralytics/engine/exporter.py +422 -460
  65. ultralytics/engine/model.py +224 -252
  66. ultralytics/engine/predictor.py +94 -89
  67. ultralytics/engine/results.py +345 -595
  68. ultralytics/engine/trainer.py +231 -134
  69. ultralytics/engine/tuner.py +279 -73
  70. ultralytics/engine/validator.py +53 -46
  71. ultralytics/hub/__init__.py +26 -28
  72. ultralytics/hub/auth.py +30 -16
  73. ultralytics/hub/google/__init__.py +34 -36
  74. ultralytics/hub/session.py +53 -77
  75. ultralytics/hub/utils.py +23 -109
  76. ultralytics/models/__init__.py +1 -1
  77. ultralytics/models/fastsam/__init__.py +1 -1
  78. ultralytics/models/fastsam/model.py +36 -18
  79. ultralytics/models/fastsam/predict.py +33 -44
  80. ultralytics/models/fastsam/utils.py +4 -5
  81. ultralytics/models/fastsam/val.py +12 -14
  82. ultralytics/models/nas/__init__.py +1 -1
  83. ultralytics/models/nas/model.py +16 -20
  84. ultralytics/models/nas/predict.py +12 -14
  85. ultralytics/models/nas/val.py +4 -5
  86. ultralytics/models/rtdetr/__init__.py +1 -1
  87. ultralytics/models/rtdetr/model.py +9 -9
  88. ultralytics/models/rtdetr/predict.py +22 -17
  89. ultralytics/models/rtdetr/train.py +20 -16
  90. ultralytics/models/rtdetr/val.py +79 -59
  91. ultralytics/models/sam/__init__.py +8 -2
  92. ultralytics/models/sam/amg.py +53 -38
  93. ultralytics/models/sam/build.py +29 -31
  94. ultralytics/models/sam/model.py +33 -38
  95. ultralytics/models/sam/modules/blocks.py +159 -182
  96. ultralytics/models/sam/modules/decoders.py +38 -47
  97. ultralytics/models/sam/modules/encoders.py +114 -133
  98. ultralytics/models/sam/modules/memory_attention.py +38 -31
  99. ultralytics/models/sam/modules/sam.py +114 -93
  100. ultralytics/models/sam/modules/tiny_encoder.py +268 -291
  101. ultralytics/models/sam/modules/transformer.py +59 -66
  102. ultralytics/models/sam/modules/utils.py +55 -72
  103. ultralytics/models/sam/predict.py +745 -341
  104. ultralytics/models/utils/loss.py +118 -107
  105. ultralytics/models/utils/ops.py +118 -71
  106. ultralytics/models/yolo/__init__.py +1 -1
  107. ultralytics/models/yolo/classify/predict.py +28 -26
  108. ultralytics/models/yolo/classify/train.py +50 -81
  109. ultralytics/models/yolo/classify/val.py +68 -61
  110. ultralytics/models/yolo/detect/predict.py +12 -15
  111. ultralytics/models/yolo/detect/train.py +56 -46
  112. ultralytics/models/yolo/detect/val.py +279 -223
  113. ultralytics/models/yolo/model.py +167 -86
  114. ultralytics/models/yolo/obb/predict.py +7 -11
  115. ultralytics/models/yolo/obb/train.py +23 -25
  116. ultralytics/models/yolo/obb/val.py +107 -99
  117. ultralytics/models/yolo/pose/__init__.py +1 -1
  118. ultralytics/models/yolo/pose/predict.py +12 -14
  119. ultralytics/models/yolo/pose/train.py +31 -69
  120. ultralytics/models/yolo/pose/val.py +119 -254
  121. ultralytics/models/yolo/segment/predict.py +21 -25
  122. ultralytics/models/yolo/segment/train.py +12 -66
  123. ultralytics/models/yolo/segment/val.py +126 -305
  124. ultralytics/models/yolo/world/train.py +53 -45
  125. ultralytics/models/yolo/world/train_world.py +51 -32
  126. ultralytics/models/yolo/yoloe/__init__.py +7 -7
  127. ultralytics/models/yolo/yoloe/predict.py +30 -37
  128. ultralytics/models/yolo/yoloe/train.py +89 -71
  129. ultralytics/models/yolo/yoloe/train_seg.py +15 -17
  130. ultralytics/models/yolo/yoloe/val.py +56 -41
  131. ultralytics/nn/__init__.py +9 -11
  132. ultralytics/nn/autobackend.py +179 -107
  133. ultralytics/nn/modules/__init__.py +67 -67
  134. ultralytics/nn/modules/activation.py +8 -7
  135. ultralytics/nn/modules/block.py +302 -323
  136. ultralytics/nn/modules/conv.py +61 -104
  137. ultralytics/nn/modules/head.py +488 -186
  138. ultralytics/nn/modules/transformer.py +183 -123
  139. ultralytics/nn/modules/utils.py +15 -20
  140. ultralytics/nn/tasks.py +327 -203
  141. ultralytics/nn/text_model.py +81 -65
  142. ultralytics/py.typed +1 -0
  143. ultralytics/solutions/__init__.py +12 -12
  144. ultralytics/solutions/ai_gym.py +19 -27
  145. ultralytics/solutions/analytics.py +36 -26
  146. ultralytics/solutions/config.py +29 -28
  147. ultralytics/solutions/distance_calculation.py +23 -24
  148. ultralytics/solutions/heatmap.py +17 -19
  149. ultralytics/solutions/instance_segmentation.py +21 -19
  150. ultralytics/solutions/object_blurrer.py +16 -17
  151. ultralytics/solutions/object_counter.py +48 -53
  152. ultralytics/solutions/object_cropper.py +22 -16
  153. ultralytics/solutions/parking_management.py +61 -58
  154. ultralytics/solutions/queue_management.py +19 -19
  155. ultralytics/solutions/region_counter.py +63 -50
  156. ultralytics/solutions/security_alarm.py +22 -25
  157. ultralytics/solutions/similarity_search.py +107 -60
  158. ultralytics/solutions/solutions.py +343 -262
  159. ultralytics/solutions/speed_estimation.py +35 -31
  160. ultralytics/solutions/streamlit_inference.py +104 -40
  161. ultralytics/solutions/templates/similarity-search.html +31 -24
  162. ultralytics/solutions/trackzone.py +24 -24
  163. ultralytics/solutions/vision_eye.py +11 -12
  164. ultralytics/trackers/__init__.py +1 -1
  165. ultralytics/trackers/basetrack.py +18 -27
  166. ultralytics/trackers/bot_sort.py +48 -39
  167. ultralytics/trackers/byte_tracker.py +94 -94
  168. ultralytics/trackers/track.py +7 -16
  169. ultralytics/trackers/utils/gmc.py +37 -69
  170. ultralytics/trackers/utils/kalman_filter.py +68 -76
  171. ultralytics/trackers/utils/matching.py +13 -17
  172. ultralytics/utils/__init__.py +251 -275
  173. ultralytics/utils/autobatch.py +19 -7
  174. ultralytics/utils/autodevice.py +68 -38
  175. ultralytics/utils/benchmarks.py +169 -130
  176. ultralytics/utils/callbacks/base.py +12 -13
  177. ultralytics/utils/callbacks/clearml.py +14 -15
  178. ultralytics/utils/callbacks/comet.py +139 -66
  179. ultralytics/utils/callbacks/dvc.py +19 -27
  180. ultralytics/utils/callbacks/hub.py +8 -6
  181. ultralytics/utils/callbacks/mlflow.py +6 -10
  182. ultralytics/utils/callbacks/neptune.py +11 -19
  183. ultralytics/utils/callbacks/platform.py +73 -0
  184. ultralytics/utils/callbacks/raytune.py +3 -4
  185. ultralytics/utils/callbacks/tensorboard.py +9 -12
  186. ultralytics/utils/callbacks/wb.py +33 -30
  187. ultralytics/utils/checks.py +163 -114
  188. ultralytics/utils/cpu.py +89 -0
  189. ultralytics/utils/dist.py +24 -20
  190. ultralytics/utils/downloads.py +176 -146
  191. ultralytics/utils/errors.py +11 -13
  192. ultralytics/utils/events.py +113 -0
  193. ultralytics/utils/export/__init__.py +7 -0
  194. ultralytics/utils/{export.py → export/engine.py} +81 -63
  195. ultralytics/utils/export/imx.py +294 -0
  196. ultralytics/utils/export/tensorflow.py +217 -0
  197. ultralytics/utils/files.py +33 -36
  198. ultralytics/utils/git.py +137 -0
  199. ultralytics/utils/instance.py +105 -120
  200. ultralytics/utils/logger.py +404 -0
  201. ultralytics/utils/loss.py +99 -61
  202. ultralytics/utils/metrics.py +649 -478
  203. ultralytics/utils/nms.py +337 -0
  204. ultralytics/utils/ops.py +263 -451
  205. ultralytics/utils/patches.py +70 -31
  206. ultralytics/utils/plotting.py +253 -223
  207. ultralytics/utils/tal.py +48 -61
  208. ultralytics/utils/torch_utils.py +244 -251
  209. ultralytics/utils/tqdm.py +438 -0
  210. ultralytics/utils/triton.py +22 -23
  211. ultralytics/utils/tuner.py +11 -10
  212. dgenerate_ultralytics_headless-8.3.137.dist-info/RECORD +0 -272
  213. {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/entry_points.txt +0 -0
  214. {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/licenses/LICENSE +0 -0
  215. {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/top_level.txt +0 -0
@@ -1,29 +1,30 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from typing import Any
4
+
3
5
  from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
4
6
  from ultralytics.utils.plotting import colors
5
7
 
6
8
 
7
9
  class QueueManager(BaseSolution):
8
- """
9
- Manages queue counting in real-time video streams based on object tracks.
10
+ """Manages queue counting in real-time video streams based on object tracks.
10
11
 
11
- This class extends BaseSolution to provide functionality for tracking and counting objects within a specified
12
- region in video frames.
12
+ This class extends BaseSolution to provide functionality for tracking and counting objects within a specified region
13
+ in video frames.
13
14
 
14
15
  Attributes:
15
16
  counts (int): The current count of objects in the queue.
16
- rect_color (Tuple[int, int, int]): RGB color tuple for drawing the queue region rectangle.
17
+ rect_color (tuple[int, int, int]): RGB color tuple for drawing the queue region rectangle.
17
18
  region_length (int): The number of points defining the queue region.
18
- track_line (List[Tuple[int, int]]): List of track line coordinates.
19
- track_history (Dict[int, List[Tuple[int, int]]]): Dictionary storing tracking history for each object.
19
+ track_line (list[tuple[int, int]]): List of track line coordinates.
20
+ track_history (dict[int, list[tuple[int, int]]]): Dictionary storing tracking history for each object.
20
21
 
21
22
  Methods:
22
- initialize_region: Initializes the queue region.
23
- process: Processes a single frame for queue management.
24
- extract_tracks: Extracts object tracks from the current frame.
25
- store_tracking_history: Stores the tracking history for an object.
26
- display_output: Displays the processed output.
23
+ initialize_region: Initialize the queue region.
24
+ process: Process a single frame for queue management.
25
+ extract_tracks: Extract object tracks from the current frame.
26
+ store_tracking_history: Store the tracking history for an object.
27
+ display_output: Display the processed output.
27
28
 
28
29
  Examples:
29
30
  >>> cap = cv2.VideoCapture("path/to/video.mp4")
@@ -35,20 +36,19 @@ class QueueManager(BaseSolution):
35
36
  >>> results = queue_manager.process(im0)
36
37
  """
37
38
 
38
- def __init__(self, **kwargs):
39
- """Initializes the QueueManager with parameters for tracking and counting objects in a video stream."""
39
+ def __init__(self, **kwargs: Any) -> None:
40
+ """Initialize the QueueManager with parameters for tracking and counting objects in a video stream."""
40
41
  super().__init__(**kwargs)
41
42
  self.initialize_region()
42
43
  self.counts = 0 # Queue counts information
43
44
  self.rect_color = (255, 255, 255) # Rectangle color for visualization
44
45
  self.region_length = len(self.region) # Store region length for further usage
45
46
 
46
- def process(self, im0):
47
- """
48
- Process queue management for a single frame of video.
47
+ def process(self, im0) -> SolutionResults:
48
+ """Process queue management for a single frame of video.
49
49
 
50
50
  Args:
51
- im0 (numpy.ndarray): Input image for processing, typically a frame from a video stream.
51
+ im0 (np.ndarray): Input image for processing, typically a frame from a video stream.
52
52
 
53
53
  Returns:
54
54
  (SolutionResults): Contains processed image `im0`, 'queue_count' (int, number of objects in the queue) and
@@ -81,7 +81,7 @@ class QueueManager(BaseSolution):
81
81
 
82
82
  # Display queue counts
83
83
  annotator.queue_counts_display(
84
- f"Queue Counts : {str(self.counts)}",
84
+ f"Queue Counts : {self.counts}",
85
85
  points=self.region,
86
86
  region_color=self.rect_color,
87
87
  txt_color=(104, 31, 17),
@@ -1,5 +1,9 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
3
7
  import numpy as np
4
8
 
5
9
  from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
@@ -7,8 +11,7 @@ from ultralytics.utils.plotting import colors
7
11
 
8
12
 
9
13
  class RegionCounter(BaseSolution):
10
- """
11
- A class for real-time counting of objects within user-defined regions in a video stream.
14
+ """A class for real-time counting of objects within user-defined regions in a video stream.
12
15
 
13
16
  This class inherits from `BaseSolution` and provides functionality to define polygonal regions in a video frame,
14
17
  track objects, and count those objects that pass through each defined region. Useful for applications requiring
@@ -17,38 +20,54 @@ class RegionCounter(BaseSolution):
17
20
  Attributes:
18
21
  region_template (dict): Template for creating new counting regions with default attributes including name,
19
22
  polygon coordinates, and display colors.
20
- counting_regions (list): List storing all defined regions, where each entry is based on `region_template`
21
- and includes specific region settings like name, coordinates, and color.
23
+ counting_regions (list): List storing all defined regions, where each entry is based on `region_template` and
24
+ includes specific region settings like name, coordinates, and color.
22
25
  region_counts (dict): Dictionary storing the count of objects for each named region.
23
26
 
24
27
  Methods:
25
- add_region: Adds a new counting region with specified attributes.
26
- process: Processes video frames to count objects in each region.
28
+ add_region: Add a new counting region with specified attributes.
29
+ process: Process video frames to count objects in each region.
30
+ initialize_regions: Initialize zones to count the objects in each one. Zones could be multiple as well.
31
+
32
+ Examples:
33
+ Initialize a RegionCounter and add a counting region
34
+ >>> counter = RegionCounter()
35
+ >>> counter.add_region("Zone1", [(100, 100), (200, 100), (200, 200), (100, 200)], (255, 0, 0), (255, 255, 255))
36
+ >>> results = counter.process(frame)
37
+ >>> print(f"Total tracks: {results.total_tracks}")
27
38
  """
28
39
 
29
- def __init__(self, **kwargs):
30
- """Initializes the RegionCounter class for real-time counting in different regions of video streams."""
40
+ def __init__(self, **kwargs: Any) -> None:
41
+ """Initialize the RegionCounter for real-time object counting in user-defined regions."""
31
42
  super().__init__(**kwargs)
32
43
  self.region_template = {
33
44
  "name": "Default Region",
34
45
  "polygon": None,
35
46
  "counts": 0,
36
- "dragging": False,
37
47
  "region_color": (255, 255, 255),
38
48
  "text_color": (0, 0, 0),
39
49
  }
40
50
  self.region_counts = {}
41
51
  self.counting_regions = []
52
+ self.initialize_regions()
42
53
 
43
- def add_region(self, name, polygon_points, region_color, text_color):
44
- """
45
- Add a new region to the counting list based on the provided template with specific attributes.
54
+ def add_region(
55
+ self,
56
+ name: str,
57
+ polygon_points: list[tuple],
58
+ region_color: tuple[int, int, int],
59
+ text_color: tuple[int, int, int],
60
+ ) -> dict[str, Any]:
61
+ """Add a new region to the counting list based on the provided template with specific attributes.
46
62
 
47
63
  Args:
48
64
  name (str): Name assigned to the new region.
49
- polygon_points (List[Tuple]): List of (x, y) coordinates defining the region's polygon.
50
- region_color (tuple): BGR color for region visualization.
51
- text_color (tuple): BGR color for the text within the region.
65
+ polygon_points (list[tuple]): List of (x, y) coordinates defining the region's polygon.
66
+ region_color (tuple[int, int, int]): BGR color for region visualization.
67
+ text_color (tuple[int, int, int]): BGR color for the text within the region.
68
+
69
+ Returns:
70
+ (dict[str, any]): Returns a dictionary including the region information i.e. name, region_color etc.
52
71
  """
53
72
  region = self.region_template.copy()
54
73
  region.update(
@@ -60,58 +79,52 @@ class RegionCounter(BaseSolution):
60
79
  }
61
80
  )
62
81
  self.counting_regions.append(region)
82
+ return region
63
83
 
64
- def process(self, im0):
65
- """
66
- Process the input frame to detect and count objects within each defined region.
84
+ def initialize_regions(self):
85
+ """Initialize regions only once."""
86
+ if self.region is None:
87
+ self.initialize_region()
88
+ if not isinstance(self.region, dict): # Ensure self.region is initialized and structured as a dictionary
89
+ self.region = {"Region#01": self.region}
90
+ for i, (name, pts) in enumerate(self.region.items()):
91
+ region = self.add_region(name, pts, colors(i, True), (255, 255, 255))
92
+ region["prepared_polygon"] = self.prep(region["polygon"])
93
+
94
+ def process(self, im0: np.ndarray) -> SolutionResults:
95
+ """Process the input frame to detect and count objects within each defined region.
67
96
 
68
97
  Args:
69
98
  im0 (np.ndarray): Input image frame where objects and regions are annotated.
70
99
 
71
100
  Returns:
72
- (SolutionResults): Contains processed image `plot_im`, 'total_tracks' (int, total number of tracked objects),
73
- and 'region_counts' (dict, counts of objects per region).
101
+ (SolutionResults): Contains processed image `plot_im`, 'total_tracks' (int, total number of tracked
102
+ objects), and 'region_counts' (dict, counts of objects per region).
74
103
  """
75
104
  self.extract_tracks(im0)
76
105
  annotator = SolutionAnnotator(im0, line_width=self.line_width)
77
106
 
78
- # Ensure self.region is initialized and structured as a dictionary
79
- if not isinstance(self.region, dict):
80
- self.region = {"Region#01": self.region or self.initialize_region()}
81
-
82
- # Draw only valid regions
83
- for idx, (region_name, reg_pts) in enumerate(self.region.items(), start=1):
84
- color = colors(idx, True)
85
- annotator.draw_region(reg_pts, color, self.line_width * 2)
86
- self.add_region(region_name, reg_pts, color, annotator.get_txt_color())
87
-
88
- # Prepare regions for containment check (only process valid ones)
89
- for region in self.counting_regions:
90
- if "prepared_polygon" not in region:
91
- region["prepared_polygon"] = self.prep(region["polygon"])
92
-
93
- # Convert bounding boxes to NumPy array for center points
94
- boxes_np = np.array([((box[0] + box[2]) / 2, (box[1] + box[3]) / 2) for box in self.boxes], dtype=np.float32)
95
- points = [self.Point(pt) for pt in boxes_np] # Convert centers to Point objects
96
-
97
- # Process bounding boxes & check containment
98
- if points:
99
- for point, cls, track_id, box, conf in zip(points, self.clss, self.track_ids, self.boxes, self.confs):
100
- annotator.box_label(box, label=self.adjust_box_label(cls, conf, track_id), color=colors(track_id, True))
101
-
102
- for region in self.counting_regions:
103
- if region["prepared_polygon"].contains(point):
104
- region["counts"] += 1
105
- self.region_counts[region["name"]] = region["counts"]
107
+ for box, cls, track_id, conf in zip(self.boxes, self.clss, self.track_ids, self.confs):
108
+ annotator.box_label(box, label=self.adjust_box_label(cls, conf, track_id), color=colors(track_id, True))
109
+ center = self.Point(((box[0] + box[2]) / 2, (box[1] + box[3]) / 2))
110
+ for region in self.counting_regions:
111
+ if region["prepared_polygon"].contains(center):
112
+ region["counts"] += 1
113
+ self.region_counts[region["name"]] = region["counts"]
106
114
 
107
115
  # Display region counts
108
116
  for region in self.counting_regions:
109
- annotator.text_label(
110
- region["polygon"].bounds,
117
+ poly = region["polygon"]
118
+ pts = list(map(tuple, np.array(poly.exterior.coords, dtype=np.int32)))
119
+ (x1, y1), (x2, y2) = [(int(poly.centroid.x), int(poly.centroid.y))] * 2
120
+ annotator.draw_region(pts, region["region_color"], self.line_width * 2)
121
+ annotator.adaptive_label(
122
+ [x1, y1, x2, y2],
111
123
  label=str(region["counts"]),
112
124
  color=region["region_color"],
113
125
  txt_color=region["text_color"],
114
126
  margin=self.line_width * 4,
127
+ shape="rect",
115
128
  )
116
129
  region["counts"] = 0 # Reset for next frame
117
130
  plot_im = annotator.result()
@@ -1,13 +1,14 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from typing import Any
4
+
3
5
  from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
4
6
  from ultralytics.utils import LOGGER
5
7
  from ultralytics.utils.plotting import colors
6
8
 
7
9
 
8
10
  class SecurityAlarm(BaseSolution):
9
- """
10
- A class to manage security alarm functionalities for real-time monitoring.
11
+ """A class to manage security alarm functionalities for real-time monitoring.
11
12
 
12
13
  This class extends the BaseSolution class and provides features to monitor objects in a frame, send email
13
14
  notifications when specific thresholds are exceeded for total detections, and annotate the output frame for
@@ -32,9 +33,8 @@ class SecurityAlarm(BaseSolution):
32
33
  >>> results = security.process(frame)
33
34
  """
34
35
 
35
- def __init__(self, **kwargs):
36
- """
37
- Initialize the SecurityAlarm class with parameters for real-time object monitoring.
36
+ def __init__(self, **kwargs: Any) -> None:
37
+ """Initialize the SecurityAlarm class with parameters for real-time object monitoring.
38
38
 
39
39
  Args:
40
40
  **kwargs (Any): Additional keyword arguments passed to the parent class.
@@ -46,17 +46,16 @@ class SecurityAlarm(BaseSolution):
46
46
  self.to_email = ""
47
47
  self.from_email = ""
48
48
 
49
- def authenticate(self, from_email, password, to_email):
50
- """
51
- Authenticate the email server for sending alert notifications.
49
+ def authenticate(self, from_email: str, password: str, to_email: str) -> None:
50
+ """Authenticate the email server for sending alert notifications.
51
+
52
+ This method initializes a secure connection with the SMTP server and logs in using the provided credentials.
52
53
 
53
54
  Args:
54
55
  from_email (str): Sender's email address.
55
56
  password (str): Password for the sender's email account.
56
57
  to_email (str): Recipient's email address.
57
58
 
58
- This method initializes a secure connection with the SMTP server and logs in using the provided credentials.
59
-
60
59
  Examples:
61
60
  >>> alarm = SecurityAlarm()
62
61
  >>> alarm.authenticate("sender@example.com", "password123", "recipient@example.com")
@@ -69,17 +68,16 @@ class SecurityAlarm(BaseSolution):
69
68
  self.to_email = to_email
70
69
  self.from_email = from_email
71
70
 
72
- def send_email(self, im0, records=5):
73
- """
74
- Send an email notification with an image attachment indicating the number of objects detected.
75
-
76
- Args:
77
- im0 (numpy.ndarray): The input image or frame to be attached to the email.
78
- records (int): The number of detected objects to be included in the email message.
71
+ def send_email(self, im0, records: int = 5) -> None:
72
+ """Send an email notification with an image attachment indicating the number of objects detected.
79
73
 
80
74
  This method encodes the input image, composes the email message with details about the detection, and sends it
81
75
  to the specified recipient.
82
76
 
77
+ Args:
78
+ im0 (np.ndarray): The input image or frame to be attached to the email.
79
+ records (int, optional): The number of detected objects to be included in the email message.
80
+
83
81
  Examples:
84
82
  >>> alarm = SecurityAlarm()
85
83
  >>> frame = cv2.imread("path/to/image.jpg")
@@ -114,21 +112,20 @@ class SecurityAlarm(BaseSolution):
114
112
  except Exception as e:
115
113
  LOGGER.error(f"Failed to send email: {e}")
116
114
 
117
- def process(self, im0):
118
- """
119
- Monitor the frame, process object detections, and trigger alerts if thresholds are exceeded.
115
+ def process(self, im0) -> SolutionResults:
116
+ """Monitor the frame, process object detections, and trigger alerts if thresholds are exceeded.
117
+
118
+ This method processes the input frame, extracts detections, annotates the frame with bounding boxes, and sends
119
+ an email notification if the number of detected objects surpasses the specified threshold and an alert has not
120
+ already been sent.
120
121
 
121
122
  Args:
122
- im0 (numpy.ndarray): The input image or frame to be processed and annotated.
123
+ im0 (np.ndarray): The input image or frame to be processed and annotated.
123
124
 
124
125
  Returns:
125
126
  (SolutionResults): Contains processed image `plot_im`, 'total_tracks' (total number of tracked objects) and
126
127
  'email_sent' (whether an email alert was triggered).
127
128
 
128
- This method processes the input frame, extracts detections, annotates the frame with bounding boxes, and sends
129
- an email notification if the number of detected objects surpasses the specified threshold and an alert has not
130
- already been sent.
131
-
132
129
  Examples:
133
130
  >>> alarm = SecurityAlarm()
134
131
  >>> frame = cv2.imread("path/to/image.jpg")
@@ -1,87 +1,104 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import os
4
6
  from pathlib import Path
7
+ from typing import Any
5
8
 
6
9
  import numpy as np
7
- import torch
8
10
  from PIL import Image
9
11
 
10
12
  from ultralytics.data.utils import IMG_FORMATS
11
- from ultralytics.solutions.solutions import BaseSolution
13
+ from ultralytics.utils import LOGGER, TORCH_VERSION
12
14
  from ultralytics.utils.checks import check_requirements
13
- from ultralytics.utils.torch_utils import select_device
15
+ from ultralytics.utils.torch_utils import TORCH_2_4, select_device
14
16
 
15
17
  os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # Avoid OpenMP conflict on some systems
16
18
 
17
19
 
18
- class VisualAISearch(BaseSolution):
19
- """
20
- VisualAISearch leverages OpenCLIP to generate high-quality image and text embeddings, aligning them in a shared
21
- semantic space. It then uses FAISS to perform fast and scalable similarity-based retrieval, allowing users to search
22
- large collections of images using natural language queries with high accuracy and speed.
20
+ class VisualAISearch:
21
+ """A semantic image search system that leverages OpenCLIP for generating high-quality image and text embeddings and
22
+ FAISS for fast similarity-based retrieval.
23
+
24
+ This class aligns image and text embeddings in a shared semantic space, enabling users to search large collections
25
+ of images using natural language queries with high accuracy and speed.
23
26
 
24
27
  Attributes:
25
28
  data (str): Directory containing images.
26
29
  device (str): Computation device, e.g., 'cpu' or 'cuda'.
30
+ faiss_index (str): Path to the FAISS index file.
31
+ data_path_npy (str): Path to the numpy file storing image paths.
32
+ data_dir (Path): Path object for the data directory.
33
+ model: Loaded CLIP model.
34
+ index: FAISS index for similarity search.
35
+ image_paths (list[str]): List of image file paths.
36
+
37
+ Methods:
38
+ extract_image_feature: Extract CLIP embedding from an image.
39
+ extract_text_feature: Extract CLIP embedding from text.
40
+ load_or_build_index: Load existing FAISS index or build new one.
41
+ search: Perform semantic search for similar images.
42
+
43
+ Examples:
44
+ Initialize and search for images
45
+ >>> searcher = VisualAISearch(data="path/to/images", device="cuda")
46
+ >>> results = searcher.search("a cat sitting on a chair", k=10)
27
47
  """
28
48
 
29
- def __init__(self, **kwargs):
30
- """Initializes the VisualAISearch class with the FAISS index file and CLIP model."""
31
- super().__init__(**kwargs)
32
- check_requirements(["git+https://github.com/ultralytics/CLIP.git", "faiss-cpu"])
33
- import clip
34
- import faiss
49
+ def __init__(self, **kwargs: Any) -> None:
50
+ """Initialize the VisualAISearch class with FAISS index and CLIP model."""
51
+ assert TORCH_2_4, f"VisualAISearch requires torch>=2.4 (found torch=={TORCH_VERSION})"
52
+ from ultralytics.nn.text_model import build_text_model
35
53
 
36
- self.faiss = faiss
37
- self.clip = clip
54
+ check_requirements("faiss-cpu")
38
55
 
56
+ self.faiss = __import__("faiss")
39
57
  self.faiss_index = "faiss.index"
40
58
  self.data_path_npy = "paths.npy"
41
- self.model_name = "ViT-B/32"
42
- self.data_dir = Path(self.CFG["data"])
43
- self.device = select_device(self.CFG["device"])
59
+ self.data_dir = Path(kwargs.get("data", "images"))
60
+ self.device = select_device(kwargs.get("device", "cpu"))
44
61
 
45
62
  if not self.data_dir.exists():
46
63
  from ultralytics.utils import ASSETS_URL
47
64
 
48
- self.LOGGER.warning(f"{self.data_dir} not found. Downloading images.zip from {ASSETS_URL}/images.zip")
65
+ LOGGER.warning(f"{self.data_dir} not found. Downloading images.zip from {ASSETS_URL}/images.zip")
49
66
  from ultralytics.utils.downloads import safe_download
50
67
 
51
68
  safe_download(url=f"{ASSETS_URL}/images.zip", unzip=True, retry=3)
52
69
  self.data_dir = Path("images")
53
70
 
54
- self.model, self.preprocess = clip.load(self.model_name, device=self.device)
71
+ self.model = build_text_model("clip:ViT-B/32", device=self.device)
55
72
 
56
73
  self.index = None
57
74
  self.image_paths = []
58
75
 
59
76
  self.load_or_build_index()
60
77
 
61
- def extract_image_feature(self, path):
62
- """Extract CLIP image embedding."""
63
- image = Image.open(path)
64
- tensor = self.preprocess(image).unsqueeze(0).to(self.device)
65
- with torch.no_grad():
66
- return self.model.encode_image(tensor).cpu().numpy()
67
-
68
- def extract_text_feature(self, text):
69
- """Extract CLIP text embedding."""
70
- tokens = self.clip.tokenize([text]).to(self.device)
71
- with torch.no_grad():
72
- return self.model.encode_text(tokens).cpu().numpy()
73
-
74
- def load_or_build_index(self):
75
- """Loads FAISS index or builds a new one from image features."""
78
+ def extract_image_feature(self, path: Path) -> np.ndarray:
79
+ """Extract CLIP image embedding from the given image path."""
80
+ return self.model.encode_image(Image.open(path)).detach().cpu().numpy()
81
+
82
+ def extract_text_feature(self, text: str) -> np.ndarray:
83
+ """Extract CLIP text embedding from the given text query."""
84
+ return self.model.encode_text(self.model.tokenize([text])).detach().cpu().numpy()
85
+
86
+ def load_or_build_index(self) -> None:
87
+ """Load existing FAISS index or build a new one from image features.
88
+
89
+ Checks if FAISS index and image paths exist on disk. If found, loads them directly. Otherwise, builds a new
90
+ index by extracting features from all images in the data directory, normalizes the features, and saves both the
91
+ index and image paths for future use.
92
+ """
76
93
  # Check if the FAISS index and corresponding image paths already exist
77
94
  if Path(self.faiss_index).exists() and Path(self.data_path_npy).exists():
78
- self.LOGGER.info("Loading existing FAISS index...")
95
+ LOGGER.info("Loading existing FAISS index...")
79
96
  self.index = self.faiss.read_index(self.faiss_index) # Load the FAISS index from disk
80
97
  self.image_paths = np.load(self.data_path_npy) # Load the saved image path list
81
98
  return # Exit the function as the index is successfully loaded
82
99
 
83
100
  # If the index doesn't exist, start building it from scratch
84
- self.LOGGER.info("Building FAISS index from images...")
101
+ LOGGER.info("Building FAISS index from images...")
85
102
  vectors = [] # List to store feature vectors of images
86
103
 
87
104
  # Iterate over all image files in the data directory
@@ -94,7 +111,7 @@ class VisualAISearch(BaseSolution):
94
111
  vectors.append(self.extract_image_feature(file))
95
112
  self.image_paths.append(file.name) # Store the corresponding image name
96
113
  except Exception as e:
97
- self.LOGGER.warning(f"Skipping {file.name}: {e}")
114
+ LOGGER.warning(f"Skipping {file.name}: {e}")
98
115
 
99
116
  # If no vectors were successfully created, raise an error
100
117
  if not vectors:
@@ -108,10 +125,24 @@ class VisualAISearch(BaseSolution):
108
125
  self.faiss.write_index(self.index, self.faiss_index) # Save the newly built FAISS index to disk
109
126
  np.save(self.data_path_npy, np.array(self.image_paths)) # Save the list of image paths to disk
110
127
 
111
- self.LOGGER.info(f"Indexed {len(self.image_paths)} images.")
128
+ LOGGER.info(f"Indexed {len(self.image_paths)} images.")
129
+
130
+ def search(self, query: str, k: int = 30, similarity_thresh: float = 0.1) -> list[str]:
131
+ """Return top-k semantically similar images to the given query.
112
132
 
113
- def search(self, query, k=30, similarity_thresh=0.1):
114
- """Returns top-k semantically similar images to the given query."""
133
+ Args:
134
+ query (str): Natural language text query to search for.
135
+ k (int, optional): Maximum number of results to return.
136
+ similarity_thresh (float, optional): Minimum similarity threshold for filtering results.
137
+
138
+ Returns:
139
+ (list[str]): List of image filenames ranked by similarity score.
140
+
141
+ Examples:
142
+ Search for images matching a query
143
+ >>> searcher = VisualAISearch(data="images")
144
+ >>> results = searcher.search("red car", k=5, similarity_thresh=0.2)
145
+ """
115
146
  text_feat = self.extract_text_feature(query).astype("float32")
116
147
  self.faiss.normalize_L2(text_feat)
117
148
 
@@ -121,31 +152,47 @@ class VisualAISearch(BaseSolution):
121
152
  ]
122
153
  results.sort(key=lambda x: x[1], reverse=True)
123
154
 
124
- self.LOGGER.info("\nRanked Results:")
155
+ LOGGER.info("\nRanked Results:")
125
156
  for name, score in results:
126
- self.LOGGER.info(f" - {name} | Similarity: {score:.4f}")
157
+ LOGGER.info(f" - {name} | Similarity: {score:.4f}")
127
158
 
128
159
  return [r[0] for r in results]
129
160
 
130
- def __call__(self, query):
131
- """Direct call for search function."""
161
+ def __call__(self, query: str) -> list[str]:
162
+ """Direct call interface for the search function."""
132
163
  return self.search(query)
133
164
 
134
165
 
135
166
  class SearchApp:
136
- """
137
- A Flask-based web interface powers the semantic image search experience, enabling users to input natural language
138
- queries and instantly view the most relevant images retrieved from the indexed database—all through a clean,
139
- responsive, and easily customizable frontend.
167
+ """A Flask-based web interface for semantic image search with natural language queries.
140
168
 
141
- Args:
142
- data (str): Path to images to index and search.
143
- device (str): Device to run inference on (e.g. 'cpu', 'cuda').
169
+ This class provides a clean, responsive frontend that enables users to input natural language queries and instantly
170
+ view the most relevant images retrieved from the indexed database.
171
+
172
+ Attributes:
173
+ render_template: Flask template rendering function.
174
+ request: Flask request object.
175
+ searcher (VisualAISearch): Instance of the VisualAISearch class.
176
+ app (Flask): Flask application instance.
177
+
178
+ Methods:
179
+ index: Process user queries and display search results.
180
+ run: Start the Flask web application.
181
+
182
+ Examples:
183
+ Start a search application
184
+ >>> app = SearchApp(data="path/to/images", device="cuda")
185
+ >>> app.run(debug=True)
144
186
  """
145
187
 
146
- def __init__(self, data="images", device=None):
147
- """Initialization of the VisualAISearch class for performing semantic image search."""
148
- check_requirements("flask")
188
+ def __init__(self, data: str = "images", device: str | None = None) -> None:
189
+ """Initialize the SearchApp with VisualAISearch backend.
190
+
191
+ Args:
192
+ data (str, optional): Path to directory containing images to index and search.
193
+ device (str, optional): Device to run inference on (e.g. 'cpu', 'cuda').
194
+ """
195
+ check_requirements("flask>=3.0.1")
149
196
  from flask import Flask, render_template, request
150
197
 
151
198
  self.render_template = render_template
@@ -159,14 +206,14 @@ class SearchApp:
159
206
  )
160
207
  self.app.add_url_rule("/", view_func=self.index, methods=["GET", "POST"])
161
208
 
162
- def index(self):
163
- """Function to process the user query and display output."""
209
+ def index(self) -> str:
210
+ """Process user query and display search results in the web interface."""
164
211
  results = []
165
212
  if self.request.method == "POST":
166
213
  query = self.request.form.get("query", "").strip()
167
214
  results = self.searcher(query)
168
215
  return self.render_template("similarity-search.html", results=results)
169
216
 
170
- def run(self, debug=False):
171
- """Runs the Flask web app."""
217
+ def run(self, debug: bool = False) -> None:
218
+ """Start the Flask web application server."""
172
219
  self.app.run(debug=debug)