dgenerate-ultralytics-headless 8.3.214__py3-none-any.whl → 8.3.248__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/METADATA +13 -14
  2. dgenerate_ultralytics_headless-8.3.248.dist-info/RECORD +298 -0
  3. tests/__init__.py +5 -7
  4. tests/conftest.py +8 -15
  5. tests/test_cli.py +1 -1
  6. tests/test_cuda.py +5 -8
  7. tests/test_engine.py +1 -1
  8. tests/test_exports.py +57 -12
  9. tests/test_integrations.py +4 -4
  10. tests/test_python.py +84 -53
  11. tests/test_solutions.py +160 -151
  12. ultralytics/__init__.py +1 -1
  13. ultralytics/cfg/__init__.py +56 -62
  14. ultralytics/cfg/datasets/Argoverse.yaml +7 -6
  15. ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
  16. ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
  17. ultralytics/cfg/datasets/ImageNet.yaml +1 -1
  18. ultralytics/cfg/datasets/VOC.yaml +15 -16
  19. ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
  20. ultralytics/cfg/datasets/coco-pose.yaml +21 -0
  21. ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
  22. ultralytics/cfg/datasets/coco8-pose.yaml +21 -0
  23. ultralytics/cfg/datasets/dog-pose.yaml +28 -0
  24. ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
  25. ultralytics/cfg/datasets/dota8.yaml +2 -2
  26. ultralytics/cfg/datasets/hand-keypoints.yaml +26 -2
  27. ultralytics/cfg/datasets/kitti.yaml +27 -0
  28. ultralytics/cfg/datasets/lvis.yaml +5 -5
  29. ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
  30. ultralytics/cfg/datasets/tiger-pose.yaml +16 -0
  31. ultralytics/cfg/datasets/xView.yaml +16 -16
  32. ultralytics/cfg/default.yaml +1 -1
  33. ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
  34. ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
  35. ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
  36. ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +1 -1
  37. ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +1 -1
  38. ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +1 -1
  39. ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +1 -1
  40. ultralytics/cfg/models/v10/yolov10b.yaml +2 -2
  41. ultralytics/cfg/models/v10/yolov10l.yaml +2 -2
  42. ultralytics/cfg/models/v10/yolov10m.yaml +2 -2
  43. ultralytics/cfg/models/v10/yolov10n.yaml +2 -2
  44. ultralytics/cfg/models/v10/yolov10s.yaml +2 -2
  45. ultralytics/cfg/models/v10/yolov10x.yaml +2 -2
  46. ultralytics/cfg/models/v3/yolov3-tiny.yaml +1 -1
  47. ultralytics/cfg/models/v6/yolov6.yaml +1 -1
  48. ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
  49. ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
  50. ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
  51. ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
  52. ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
  53. ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
  54. ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
  55. ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
  56. ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
  57. ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
  58. ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
  59. ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +1 -1
  60. ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
  61. ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
  62. ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
  63. ultralytics/data/__init__.py +4 -4
  64. ultralytics/data/annotator.py +3 -4
  65. ultralytics/data/augment.py +285 -475
  66. ultralytics/data/base.py +18 -26
  67. ultralytics/data/build.py +147 -25
  68. ultralytics/data/converter.py +36 -46
  69. ultralytics/data/dataset.py +46 -74
  70. ultralytics/data/loaders.py +42 -49
  71. ultralytics/data/split.py +5 -6
  72. ultralytics/data/split_dota.py +8 -15
  73. ultralytics/data/utils.py +34 -43
  74. ultralytics/engine/exporter.py +319 -237
  75. ultralytics/engine/model.py +148 -188
  76. ultralytics/engine/predictor.py +29 -38
  77. ultralytics/engine/results.py +177 -311
  78. ultralytics/engine/trainer.py +83 -59
  79. ultralytics/engine/tuner.py +23 -34
  80. ultralytics/engine/validator.py +39 -22
  81. ultralytics/hub/__init__.py +16 -19
  82. ultralytics/hub/auth.py +6 -12
  83. ultralytics/hub/google/__init__.py +7 -10
  84. ultralytics/hub/session.py +15 -25
  85. ultralytics/hub/utils.py +5 -8
  86. ultralytics/models/__init__.py +1 -1
  87. ultralytics/models/fastsam/__init__.py +1 -1
  88. ultralytics/models/fastsam/model.py +8 -10
  89. ultralytics/models/fastsam/predict.py +17 -29
  90. ultralytics/models/fastsam/utils.py +1 -2
  91. ultralytics/models/fastsam/val.py +5 -7
  92. ultralytics/models/nas/__init__.py +1 -1
  93. ultralytics/models/nas/model.py +5 -8
  94. ultralytics/models/nas/predict.py +7 -9
  95. ultralytics/models/nas/val.py +1 -2
  96. ultralytics/models/rtdetr/__init__.py +1 -1
  97. ultralytics/models/rtdetr/model.py +5 -8
  98. ultralytics/models/rtdetr/predict.py +15 -19
  99. ultralytics/models/rtdetr/train.py +10 -13
  100. ultralytics/models/rtdetr/val.py +21 -23
  101. ultralytics/models/sam/__init__.py +15 -2
  102. ultralytics/models/sam/amg.py +14 -20
  103. ultralytics/models/sam/build.py +26 -19
  104. ultralytics/models/sam/build_sam3.py +377 -0
  105. ultralytics/models/sam/model.py +29 -32
  106. ultralytics/models/sam/modules/blocks.py +83 -144
  107. ultralytics/models/sam/modules/decoders.py +19 -37
  108. ultralytics/models/sam/modules/encoders.py +44 -101
  109. ultralytics/models/sam/modules/memory_attention.py +16 -30
  110. ultralytics/models/sam/modules/sam.py +200 -73
  111. ultralytics/models/sam/modules/tiny_encoder.py +64 -83
  112. ultralytics/models/sam/modules/transformer.py +18 -28
  113. ultralytics/models/sam/modules/utils.py +174 -50
  114. ultralytics/models/sam/predict.py +2248 -350
  115. ultralytics/models/sam/sam3/__init__.py +3 -0
  116. ultralytics/models/sam/sam3/decoder.py +546 -0
  117. ultralytics/models/sam/sam3/encoder.py +529 -0
  118. ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
  119. ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
  120. ultralytics/models/sam/sam3/model_misc.py +199 -0
  121. ultralytics/models/sam/sam3/necks.py +129 -0
  122. ultralytics/models/sam/sam3/sam3_image.py +339 -0
  123. ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
  124. ultralytics/models/sam/sam3/vitdet.py +547 -0
  125. ultralytics/models/sam/sam3/vl_combiner.py +160 -0
  126. ultralytics/models/utils/loss.py +14 -26
  127. ultralytics/models/utils/ops.py +13 -17
  128. ultralytics/models/yolo/__init__.py +1 -1
  129. ultralytics/models/yolo/classify/predict.py +9 -12
  130. ultralytics/models/yolo/classify/train.py +11 -32
  131. ultralytics/models/yolo/classify/val.py +29 -28
  132. ultralytics/models/yolo/detect/predict.py +7 -10
  133. ultralytics/models/yolo/detect/train.py +11 -20
  134. ultralytics/models/yolo/detect/val.py +70 -58
  135. ultralytics/models/yolo/model.py +36 -53
  136. ultralytics/models/yolo/obb/predict.py +5 -14
  137. ultralytics/models/yolo/obb/train.py +11 -14
  138. ultralytics/models/yolo/obb/val.py +39 -36
  139. ultralytics/models/yolo/pose/__init__.py +1 -1
  140. ultralytics/models/yolo/pose/predict.py +6 -21
  141. ultralytics/models/yolo/pose/train.py +10 -15
  142. ultralytics/models/yolo/pose/val.py +38 -57
  143. ultralytics/models/yolo/segment/predict.py +14 -18
  144. ultralytics/models/yolo/segment/train.py +3 -6
  145. ultralytics/models/yolo/segment/val.py +93 -45
  146. ultralytics/models/yolo/world/train.py +8 -14
  147. ultralytics/models/yolo/world/train_world.py +11 -34
  148. ultralytics/models/yolo/yoloe/__init__.py +7 -7
  149. ultralytics/models/yolo/yoloe/predict.py +16 -23
  150. ultralytics/models/yolo/yoloe/train.py +30 -43
  151. ultralytics/models/yolo/yoloe/train_seg.py +5 -10
  152. ultralytics/models/yolo/yoloe/val.py +15 -20
  153. ultralytics/nn/__init__.py +7 -7
  154. ultralytics/nn/autobackend.py +145 -77
  155. ultralytics/nn/modules/__init__.py +60 -60
  156. ultralytics/nn/modules/activation.py +4 -6
  157. ultralytics/nn/modules/block.py +132 -216
  158. ultralytics/nn/modules/conv.py +52 -97
  159. ultralytics/nn/modules/head.py +50 -103
  160. ultralytics/nn/modules/transformer.py +76 -88
  161. ultralytics/nn/modules/utils.py +16 -21
  162. ultralytics/nn/tasks.py +94 -154
  163. ultralytics/nn/text_model.py +40 -67
  164. ultralytics/solutions/__init__.py +12 -12
  165. ultralytics/solutions/ai_gym.py +11 -17
  166. ultralytics/solutions/analytics.py +15 -16
  167. ultralytics/solutions/config.py +5 -6
  168. ultralytics/solutions/distance_calculation.py +10 -13
  169. ultralytics/solutions/heatmap.py +7 -13
  170. ultralytics/solutions/instance_segmentation.py +5 -8
  171. ultralytics/solutions/object_blurrer.py +7 -10
  172. ultralytics/solutions/object_counter.py +12 -19
  173. ultralytics/solutions/object_cropper.py +8 -14
  174. ultralytics/solutions/parking_management.py +33 -31
  175. ultralytics/solutions/queue_management.py +10 -12
  176. ultralytics/solutions/region_counter.py +9 -12
  177. ultralytics/solutions/security_alarm.py +15 -20
  178. ultralytics/solutions/similarity_search.py +10 -15
  179. ultralytics/solutions/solutions.py +75 -74
  180. ultralytics/solutions/speed_estimation.py +7 -10
  181. ultralytics/solutions/streamlit_inference.py +2 -4
  182. ultralytics/solutions/templates/similarity-search.html +7 -18
  183. ultralytics/solutions/trackzone.py +7 -10
  184. ultralytics/solutions/vision_eye.py +5 -8
  185. ultralytics/trackers/__init__.py +1 -1
  186. ultralytics/trackers/basetrack.py +3 -5
  187. ultralytics/trackers/bot_sort.py +10 -27
  188. ultralytics/trackers/byte_tracker.py +14 -30
  189. ultralytics/trackers/track.py +3 -6
  190. ultralytics/trackers/utils/gmc.py +11 -22
  191. ultralytics/trackers/utils/kalman_filter.py +37 -48
  192. ultralytics/trackers/utils/matching.py +12 -15
  193. ultralytics/utils/__init__.py +116 -116
  194. ultralytics/utils/autobatch.py +2 -4
  195. ultralytics/utils/autodevice.py +17 -18
  196. ultralytics/utils/benchmarks.py +32 -46
  197. ultralytics/utils/callbacks/base.py +8 -10
  198. ultralytics/utils/callbacks/clearml.py +5 -13
  199. ultralytics/utils/callbacks/comet.py +32 -46
  200. ultralytics/utils/callbacks/dvc.py +13 -18
  201. ultralytics/utils/callbacks/mlflow.py +4 -5
  202. ultralytics/utils/callbacks/neptune.py +7 -15
  203. ultralytics/utils/callbacks/platform.py +314 -38
  204. ultralytics/utils/callbacks/raytune.py +3 -4
  205. ultralytics/utils/callbacks/tensorboard.py +23 -31
  206. ultralytics/utils/callbacks/wb.py +10 -13
  207. ultralytics/utils/checks.py +99 -76
  208. ultralytics/utils/cpu.py +3 -8
  209. ultralytics/utils/dist.py +8 -12
  210. ultralytics/utils/downloads.py +20 -30
  211. ultralytics/utils/errors.py +6 -14
  212. ultralytics/utils/events.py +2 -4
  213. ultralytics/utils/export/__init__.py +4 -236
  214. ultralytics/utils/export/engine.py +237 -0
  215. ultralytics/utils/export/imx.py +91 -55
  216. ultralytics/utils/export/tensorflow.py +231 -0
  217. ultralytics/utils/files.py +24 -28
  218. ultralytics/utils/git.py +9 -11
  219. ultralytics/utils/instance.py +30 -51
  220. ultralytics/utils/logger.py +212 -114
  221. ultralytics/utils/loss.py +14 -22
  222. ultralytics/utils/metrics.py +126 -155
  223. ultralytics/utils/nms.py +13 -16
  224. ultralytics/utils/ops.py +107 -165
  225. ultralytics/utils/patches.py +33 -21
  226. ultralytics/utils/plotting.py +72 -80
  227. ultralytics/utils/tal.py +25 -39
  228. ultralytics/utils/torch_utils.py +52 -78
  229. ultralytics/utils/tqdm.py +20 -20
  230. ultralytics/utils/triton.py +13 -19
  231. ultralytics/utils/tuner.py +17 -5
  232. dgenerate_ultralytics_headless-8.3.214.dist-info/RECORD +0 -283
  233. {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/WHEEL +0 -0
  234. {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/entry_points.txt +0 -0
  235. {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/licenses/LICENSE +0 -0
  236. {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/top_level.txt +0 -0
@@ -13,58 +13,56 @@ from .conv import Conv, DWConv, GhostConv, LightConv, RepConv, autopad
13
13
  from .transformer import TransformerBlock
14
14
 
15
15
  __all__ = (
16
- "DFL",
17
- "HGBlock",
18
- "HGStem",
19
- "SPP",
20
- "SPPF",
21
16
  "C1",
22
17
  "C2",
18
+ "C2PSA",
23
19
  "C3",
20
+ "C3TR",
21
+ "CIB",
22
+ "DFL",
23
+ "ELAN1",
24
+ "PSA",
25
+ "SPP",
26
+ "SPPELAN",
27
+ "SPPF",
28
+ "AConv",
29
+ "ADown",
30
+ "Attention",
31
+ "BNContrastiveHead",
32
+ "Bottleneck",
33
+ "BottleneckCSP",
24
34
  "C2f",
25
35
  "C2fAttn",
26
- "ImagePoolingAttn",
27
- "ContrastiveHead",
28
- "BNContrastiveHead",
29
- "C3x",
30
- "C3TR",
36
+ "C2fCIB",
37
+ "C2fPSA",
31
38
  "C3Ghost",
39
+ "C3k2",
40
+ "C3x",
41
+ "CBFuse",
42
+ "CBLinear",
43
+ "ContrastiveHead",
32
44
  "GhostBottleneck",
33
- "Bottleneck",
34
- "BottleneckCSP",
45
+ "HGBlock",
46
+ "HGStem",
47
+ "ImagePoolingAttn",
35
48
  "Proto",
36
49
  "RepC3",
37
- "ResNetLayer",
38
50
  "RepNCSPELAN4",
39
- "ELAN1",
40
- "ADown",
41
- "AConv",
42
- "SPPELAN",
43
- "CBFuse",
44
- "CBLinear",
45
- "C3k2",
46
- "C2fPSA",
47
- "C2PSA",
48
51
  "RepVGGDW",
49
- "CIB",
50
- "C2fCIB",
51
- "Attention",
52
- "PSA",
52
+ "ResNetLayer",
53
53
  "SCDown",
54
54
  "TorchVision",
55
55
  )
56
56
 
57
57
 
58
58
  class DFL(nn.Module):
59
- """
60
- Integral module of Distribution Focal Loss (DFL).
59
+ """Integral module of Distribution Focal Loss (DFL).
61
60
 
62
61
  Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
63
62
  """
64
63
 
65
64
  def __init__(self, c1: int = 16):
66
- """
67
- Initialize a convolutional layer with a given number of input channels.
65
+ """Initialize a convolutional layer with a given number of input channels.
68
66
 
69
67
  Args:
70
68
  c1 (int): Number of input channels.
@@ -86,8 +84,7 @@ class Proto(nn.Module):
86
84
  """Ultralytics YOLO models mask Proto module for segmentation models."""
87
85
 
88
86
  def __init__(self, c1: int, c_: int = 256, c2: int = 32):
89
- """
90
- Initialize the Ultralytics YOLO models mask Proto module with specified number of protos and masks.
87
+ """Initialize the Ultralytics YOLO models mask Proto module with specified number of protos and masks.
91
88
 
92
89
  Args:
93
90
  c1 (int): Input channels.
@@ -106,15 +103,13 @@ class Proto(nn.Module):
106
103
 
107
104
 
108
105
  class HGStem(nn.Module):
109
- """
110
- StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
106
+ """StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
111
107
 
112
108
  https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
113
109
  """
114
110
 
115
111
  def __init__(self, c1: int, cm: int, c2: int):
116
- """
117
- Initialize the StemBlock of PPHGNetV2.
112
+ """Initialize the StemBlock of PPHGNetV2.
118
113
 
119
114
  Args:
120
115
  c1 (int): Input channels.
@@ -144,8 +139,7 @@ class HGStem(nn.Module):
144
139
 
145
140
 
146
141
  class HGBlock(nn.Module):
147
- """
148
- HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
142
+ """HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
149
143
 
150
144
  https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
151
145
  """
@@ -161,8 +155,7 @@ class HGBlock(nn.Module):
161
155
  shortcut: bool = False,
162
156
  act: nn.Module = nn.ReLU(),
163
157
  ):
164
- """
165
- Initialize HGBlock with specified parameters.
158
+ """Initialize HGBlock with specified parameters.
166
159
 
167
160
  Args:
168
161
  c1 (int): Input channels.
@@ -193,8 +186,7 @@ class SPP(nn.Module):
193
186
  """Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729."""
194
187
 
195
188
  def __init__(self, c1: int, c2: int, k: tuple[int, ...] = (5, 9, 13)):
196
- """
197
- Initialize the SPP layer with input/output channels and pooling kernel sizes.
189
+ """Initialize the SPP layer with input/output channels and pooling kernel sizes.
198
190
 
199
191
  Args:
200
192
  c1 (int): Input channels.
@@ -217,8 +209,7 @@ class SPPF(nn.Module):
217
209
  """Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""
218
210
 
219
211
  def __init__(self, c1: int, c2: int, k: int = 5):
220
- """
221
- Initialize the SPPF layer with given input/output channels and kernel size.
212
+ """Initialize the SPPF layer with given input/output channels and kernel size.
222
213
 
223
214
  Args:
224
215
  c1 (int): Input channels.
@@ -245,8 +236,7 @@ class C1(nn.Module):
245
236
  """CSP Bottleneck with 1 convolution."""
246
237
 
247
238
  def __init__(self, c1: int, c2: int, n: int = 1):
248
- """
249
- Initialize the CSP Bottleneck with 1 convolution.
239
+ """Initialize the CSP Bottleneck with 1 convolution.
250
240
 
251
241
  Args:
252
242
  c1 (int): Input channels.
@@ -267,8 +257,7 @@ class C2(nn.Module):
267
257
  """CSP Bottleneck with 2 convolutions."""
268
258
 
269
259
  def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
270
- """
271
- Initialize a CSP Bottleneck with 2 convolutions.
260
+ """Initialize a CSP Bottleneck with 2 convolutions.
272
261
 
273
262
  Args:
274
263
  c1 (int): Input channels.
@@ -295,8 +284,7 @@ class C2f(nn.Module):
295
284
  """Faster Implementation of CSP Bottleneck with 2 convolutions."""
296
285
 
297
286
  def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = False, g: int = 1, e: float = 0.5):
298
- """
299
- Initialize a CSP bottleneck with 2 convolutions.
287
+ """Initialize a CSP bottleneck with 2 convolutions.
300
288
 
301
289
  Args:
302
290
  c1 (int): Input channels.
@@ -330,8 +318,7 @@ class C3(nn.Module):
330
318
  """CSP Bottleneck with 3 convolutions."""
331
319
 
332
320
  def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
333
- """
334
- Initialize the CSP Bottleneck with 3 convolutions.
321
+ """Initialize the CSP Bottleneck with 3 convolutions.
335
322
 
336
323
  Args:
337
324
  c1 (int): Input channels.
@@ -357,8 +344,7 @@ class C3x(C3):
357
344
  """C3 module with cross-convolutions."""
358
345
 
359
346
  def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
360
- """
361
- Initialize C3 module with cross-convolutions.
347
+ """Initialize C3 module with cross-convolutions.
362
348
 
363
349
  Args:
364
350
  c1 (int): Input channels.
@@ -377,8 +363,7 @@ class RepC3(nn.Module):
377
363
  """Rep C3."""
378
364
 
379
365
  def __init__(self, c1: int, c2: int, n: int = 3, e: float = 1.0):
380
- """
381
- Initialize CSP Bottleneck with a single convolution.
366
+ """Initialize CSP Bottleneck with a single convolution.
382
367
 
383
368
  Args:
384
369
  c1 (int): Input channels.
@@ -402,8 +387,7 @@ class C3TR(C3):
402
387
  """C3 module with TransformerBlock()."""
403
388
 
404
389
  def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
405
- """
406
- Initialize C3 module with TransformerBlock.
390
+ """Initialize C3 module with TransformerBlock.
407
391
 
408
392
  Args:
409
393
  c1 (int): Input channels.
@@ -422,8 +406,7 @@ class C3Ghost(C3):
422
406
  """C3 module with GhostBottleneck()."""
423
407
 
424
408
  def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
425
- """
426
- Initialize C3 module with GhostBottleneck.
409
+ """Initialize C3 module with GhostBottleneck.
427
410
 
428
411
  Args:
429
412
  c1 (int): Input channels.
@@ -442,8 +425,7 @@ class GhostBottleneck(nn.Module):
442
425
  """Ghost Bottleneck https://github.com/huawei-noah/Efficient-AI-Backbones."""
443
426
 
444
427
  def __init__(self, c1: int, c2: int, k: int = 3, s: int = 1):
445
- """
446
- Initialize Ghost Bottleneck module.
428
+ """Initialize Ghost Bottleneck module.
447
429
 
448
430
  Args:
449
431
  c1 (int): Input channels.
@@ -473,8 +455,7 @@ class Bottleneck(nn.Module):
473
455
  def __init__(
474
456
  self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: tuple[int, int] = (3, 3), e: float = 0.5
475
457
  ):
476
- """
477
- Initialize a standard bottleneck module.
458
+ """Initialize a standard bottleneck module.
478
459
 
479
460
  Args:
480
461
  c1 (int): Input channels.
@@ -499,8 +480,7 @@ class BottleneckCSP(nn.Module):
499
480
  """CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks."""
500
481
 
501
482
  def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
502
- """
503
- Initialize CSP Bottleneck.
483
+ """Initialize CSP Bottleneck.
504
484
 
505
485
  Args:
506
486
  c1 (int): Input channels.
@@ -531,8 +511,7 @@ class ResNetBlock(nn.Module):
531
511
  """ResNet block with standard convolution layers."""
532
512
 
533
513
  def __init__(self, c1: int, c2: int, s: int = 1, e: int = 4):
534
- """
535
- Initialize ResNet block.
514
+ """Initialize ResNet block.
536
515
 
537
516
  Args:
538
517
  c1 (int): Input channels.
@@ -556,8 +535,7 @@ class ResNetLayer(nn.Module):
556
535
  """ResNet layer with multiple ResNet blocks."""
557
536
 
558
537
  def __init__(self, c1: int, c2: int, s: int = 1, is_first: bool = False, n: int = 1, e: int = 4):
559
- """
560
- Initialize ResNet layer.
538
+ """Initialize ResNet layer.
561
539
 
562
540
  Args:
563
541
  c1 (int): Input channels.
@@ -588,8 +566,7 @@ class MaxSigmoidAttnBlock(nn.Module):
588
566
  """Max Sigmoid attention block."""
589
567
 
590
568
  def __init__(self, c1: int, c2: int, nh: int = 1, ec: int = 128, gc: int = 512, scale: bool = False):
591
- """
592
- Initialize MaxSigmoidAttnBlock.
569
+ """Initialize MaxSigmoidAttnBlock.
593
570
 
594
571
  Args:
595
572
  c1 (int): Input channels.
@@ -609,8 +586,7 @@ class MaxSigmoidAttnBlock(nn.Module):
609
586
  self.scale = nn.Parameter(torch.ones(1, nh, 1, 1)) if scale else 1.0
610
587
 
611
588
  def forward(self, x: torch.Tensor, guide: torch.Tensor) -> torch.Tensor:
612
- """
613
- Forward pass of MaxSigmoidAttnBlock.
589
+ """Forward pass of MaxSigmoidAttnBlock.
614
590
 
615
591
  Args:
616
592
  x (torch.Tensor): Input tensor.
@@ -653,8 +629,7 @@ class C2fAttn(nn.Module):
653
629
  g: int = 1,
654
630
  e: float = 0.5,
655
631
  ):
656
- """
657
- Initialize C2f module with attention mechanism.
632
+ """Initialize C2f module with attention mechanism.
658
633
 
659
634
  Args:
660
635
  c1 (int): Input channels.
@@ -675,8 +650,7 @@ class C2fAttn(nn.Module):
675
650
  self.attn = MaxSigmoidAttnBlock(self.c, self.c, gc=gc, ec=ec, nh=nh)
676
651
 
677
652
  def forward(self, x: torch.Tensor, guide: torch.Tensor) -> torch.Tensor:
678
- """
679
- Forward pass through C2f layer with attention.
653
+ """Forward pass through C2f layer with attention.
680
654
 
681
655
  Args:
682
656
  x (torch.Tensor): Input tensor.
@@ -691,8 +665,7 @@ class C2fAttn(nn.Module):
691
665
  return self.cv2(torch.cat(y, 1))
692
666
 
693
667
  def forward_split(self, x: torch.Tensor, guide: torch.Tensor) -> torch.Tensor:
694
- """
695
- Forward pass using split() instead of chunk().
668
+ """Forward pass using split() instead of chunk().
696
669
 
697
670
  Args:
698
671
  x (torch.Tensor): Input tensor.
@@ -713,8 +686,7 @@ class ImagePoolingAttn(nn.Module):
713
686
  def __init__(
714
687
  self, ec: int = 256, ch: tuple[int, ...] = (), ct: int = 512, nh: int = 8, k: int = 3, scale: bool = False
715
688
  ):
716
- """
717
- Initialize ImagePoolingAttn module.
689
+ """Initialize ImagePoolingAttn module.
718
690
 
719
691
  Args:
720
692
  ec (int): Embedding channels.
@@ -741,8 +713,7 @@ class ImagePoolingAttn(nn.Module):
741
713
  self.k = k
742
714
 
743
715
  def forward(self, x: list[torch.Tensor], text: torch.Tensor) -> torch.Tensor:
744
- """
745
- Forward pass of ImagePoolingAttn.
716
+ """Forward pass of ImagePoolingAttn.
746
717
 
747
718
  Args:
748
719
  x (list[torch.Tensor]): List of input feature maps.
@@ -785,8 +756,7 @@ class ContrastiveHead(nn.Module):
785
756
  self.logit_scale = nn.Parameter(torch.ones([]) * torch.tensor(1 / 0.07).log())
786
757
 
787
758
  def forward(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
788
- """
789
- Forward function of contrastive learning.
759
+ """Forward function of contrastive learning.
790
760
 
791
761
  Args:
792
762
  x (torch.Tensor): Image features.
@@ -802,16 +772,14 @@ class ContrastiveHead(nn.Module):
802
772
 
803
773
 
804
774
  class BNContrastiveHead(nn.Module):
805
- """
806
- Batch Norm Contrastive Head using batch norm instead of l2-normalization.
775
+ """Batch Norm Contrastive Head using batch norm instead of l2-normalization.
807
776
 
808
777
  Args:
809
778
  embed_dims (int): Embed dimensions of text and image features.
810
779
  """
811
780
 
812
781
  def __init__(self, embed_dims: int):
813
- """
814
- Initialize BNContrastiveHead.
782
+ """Initialize BNContrastiveHead.
815
783
 
816
784
  Args:
817
785
  embed_dims (int): Embedding dimensions for features.
@@ -830,13 +798,13 @@ class BNContrastiveHead(nn.Module):
830
798
  del self.logit_scale
831
799
  self.forward = self.forward_fuse
832
800
 
833
- def forward_fuse(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
801
+ @staticmethod
802
+ def forward_fuse(x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
834
803
  """Passes input out unchanged."""
835
804
  return x
836
805
 
837
806
  def forward(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
838
- """
839
- Forward function of contrastive learning with batch normalization.
807
+ """Forward function of contrastive learning with batch normalization.
840
808
 
841
809
  Args:
842
810
  x (torch.Tensor): Image features.
@@ -858,8 +826,7 @@ class RepBottleneck(Bottleneck):
858
826
  def __init__(
859
827
  self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: tuple[int, int] = (3, 3), e: float = 0.5
860
828
  ):
861
- """
862
- Initialize RepBottleneck.
829
+ """Initialize RepBottleneck.
863
830
 
864
831
  Args:
865
832
  c1 (int): Input channels.
@@ -878,8 +845,7 @@ class RepCSP(C3):
878
845
  """Repeatable Cross Stage Partial Network (RepCSP) module for efficient feature extraction."""
879
846
 
880
847
  def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
881
- """
882
- Initialize RepCSP layer.
848
+ """Initialize RepCSP layer.
883
849
 
884
850
  Args:
885
851
  c1 (int): Input channels.
@@ -898,8 +864,7 @@ class RepNCSPELAN4(nn.Module):
898
864
  """CSP-ELAN."""
899
865
 
900
866
  def __init__(self, c1: int, c2: int, c3: int, c4: int, n: int = 1):
901
- """
902
- Initialize CSP-ELAN layer.
867
+ """Initialize CSP-ELAN layer.
903
868
 
904
869
  Args:
905
870
  c1 (int): Input channels.
@@ -932,8 +897,7 @@ class ELAN1(RepNCSPELAN4):
932
897
  """ELAN1 module with 4 convolutions."""
933
898
 
934
899
  def __init__(self, c1: int, c2: int, c3: int, c4: int):
935
- """
936
- Initialize ELAN1 layer.
900
+ """Initialize ELAN1 layer.
937
901
 
938
902
  Args:
939
903
  c1 (int): Input channels.
@@ -953,8 +917,7 @@ class AConv(nn.Module):
953
917
  """AConv."""
954
918
 
955
919
  def __init__(self, c1: int, c2: int):
956
- """
957
- Initialize AConv module.
920
+ """Initialize AConv module.
958
921
 
959
922
  Args:
960
923
  c1 (int): Input channels.
@@ -973,8 +936,7 @@ class ADown(nn.Module):
973
936
  """ADown."""
974
937
 
975
938
  def __init__(self, c1: int, c2: int):
976
- """
977
- Initialize ADown module.
939
+ """Initialize ADown module.
978
940
 
979
941
  Args:
980
942
  c1 (int): Input channels.
@@ -999,8 +961,7 @@ class SPPELAN(nn.Module):
999
961
  """SPP-ELAN."""
1000
962
 
1001
963
  def __init__(self, c1: int, c2: int, c3: int, k: int = 5):
1002
- """
1003
- Initialize SPP-ELAN block.
964
+ """Initialize SPP-ELAN block.
1004
965
 
1005
966
  Args:
1006
967
  c1 (int): Input channels.
@@ -1027,8 +988,7 @@ class CBLinear(nn.Module):
1027
988
  """CBLinear."""
1028
989
 
1029
990
  def __init__(self, c1: int, c2s: list[int], k: int = 1, s: int = 1, p: int | None = None, g: int = 1):
1030
- """
1031
- Initialize CBLinear module.
991
+ """Initialize CBLinear module.
1032
992
 
1033
993
  Args:
1034
994
  c1 (int): Input channels.
@@ -1051,8 +1011,7 @@ class CBFuse(nn.Module):
1051
1011
  """CBFuse."""
1052
1012
 
1053
1013
  def __init__(self, idx: list[int]):
1054
- """
1055
- Initialize CBFuse module.
1014
+ """Initialize CBFuse module.
1056
1015
 
1057
1016
  Args:
1058
1017
  idx (list[int]): Indices for feature selection.
@@ -1061,8 +1020,7 @@ class CBFuse(nn.Module):
1061
1020
  self.idx = idx
1062
1021
 
1063
1022
  def forward(self, xs: list[torch.Tensor]) -> torch.Tensor:
1064
- """
1065
- Forward pass through CBFuse layer.
1023
+ """Forward pass through CBFuse layer.
1066
1024
 
1067
1025
  Args:
1068
1026
  xs (list[torch.Tensor]): List of input tensors.
@@ -1079,8 +1037,7 @@ class C3f(nn.Module):
1079
1037
  """Faster Implementation of CSP Bottleneck with 2 convolutions."""
1080
1038
 
1081
1039
  def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = False, g: int = 1, e: float = 0.5):
1082
- """
1083
- Initialize CSP bottleneck layer with two convolutions.
1040
+ """Initialize CSP bottleneck layer with two convolutions.
1084
1041
 
1085
1042
  Args:
1086
1043
  c1 (int): Input channels.
@@ -1110,8 +1067,7 @@ class C3k2(C2f):
1110
1067
  def __init__(
1111
1068
  self, c1: int, c2: int, n: int = 1, c3k: bool = False, e: float = 0.5, g: int = 1, shortcut: bool = True
1112
1069
  ):
1113
- """
1114
- Initialize C3k2 module.
1070
+ """Initialize C3k2 module.
1115
1071
 
1116
1072
  Args:
1117
1073
  c1 (int): Input channels.
@@ -1132,8 +1088,7 @@ class C3k(C3):
1132
1088
  """C3k is a CSP bottleneck module with customizable kernel sizes for feature extraction in neural networks."""
1133
1089
 
1134
1090
  def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5, k: int = 3):
1135
- """
1136
- Initialize C3k module.
1091
+ """Initialize C3k module.
1137
1092
 
1138
1093
  Args:
1139
1094
  c1 (int): Input channels.
@@ -1154,8 +1109,7 @@ class RepVGGDW(torch.nn.Module):
1154
1109
  """RepVGGDW is a class that represents a depth wise separable convolutional block in RepVGG architecture."""
1155
1110
 
1156
1111
  def __init__(self, ed: int) -> None:
1157
- """
1158
- Initialize RepVGGDW module.
1112
+ """Initialize RepVGGDW module.
1159
1113
 
1160
1114
  Args:
1161
1115
  ed (int): Input and output channels.
@@ -1167,8 +1121,7 @@ class RepVGGDW(torch.nn.Module):
1167
1121
  self.act = nn.SiLU()
1168
1122
 
1169
1123
  def forward(self, x: torch.Tensor) -> torch.Tensor:
1170
- """
1171
- Perform a forward pass of the RepVGGDW block.
1124
+ """Perform a forward pass of the RepVGGDW block.
1172
1125
 
1173
1126
  Args:
1174
1127
  x (torch.Tensor): Input tensor.
@@ -1179,8 +1132,7 @@ class RepVGGDW(torch.nn.Module):
1179
1132
  return self.act(self.conv(x) + self.conv1(x))
1180
1133
 
1181
1134
  def forward_fuse(self, x: torch.Tensor) -> torch.Tensor:
1182
- """
1183
- Perform a forward pass of the RepVGGDW block without fusing the convolutions.
1135
+ """Perform a forward pass of the RepVGGDW block without fusing the convolutions.
1184
1136
 
1185
1137
  Args:
1186
1138
  x (torch.Tensor): Input tensor.
@@ -1192,8 +1144,7 @@ class RepVGGDW(torch.nn.Module):
1192
1144
 
1193
1145
  @torch.no_grad()
1194
1146
  def fuse(self):
1195
- """
1196
- Fuse the convolutional layers in the RepVGGDW block.
1147
+ """Fuse the convolutional layers in the RepVGGDW block.
1197
1148
 
1198
1149
  This method fuses the convolutional layers and updates the weights and biases accordingly.
1199
1150
  """
@@ -1218,8 +1169,7 @@ class RepVGGDW(torch.nn.Module):
1218
1169
 
1219
1170
 
1220
1171
  class CIB(nn.Module):
1221
- """
1222
- Conditional Identity Block (CIB) module.
1172
+ """Compact Inverted Block (CIB) module.
1223
1173
 
1224
1174
  Args:
1225
1175
  c1 (int): Number of input channels.
@@ -1230,8 +1180,7 @@ class CIB(nn.Module):
1230
1180
  """
1231
1181
 
1232
1182
  def __init__(self, c1: int, c2: int, shortcut: bool = True, e: float = 0.5, lk: bool = False):
1233
- """
1234
- Initialize the CIB module.
1183
+ """Initialize the CIB module.
1235
1184
 
1236
1185
  Args:
1237
1186
  c1 (int): Input channels.
@@ -1253,8 +1202,7 @@ class CIB(nn.Module):
1253
1202
  self.add = shortcut and c1 == c2
1254
1203
 
1255
1204
  def forward(self, x: torch.Tensor) -> torch.Tensor:
1256
- """
1257
- Forward pass of the CIB module.
1205
+ """Forward pass of the CIB module.
1258
1206
 
1259
1207
  Args:
1260
1208
  x (torch.Tensor): Input tensor.
@@ -1266,15 +1214,14 @@ class CIB(nn.Module):
1266
1214
 
1267
1215
 
1268
1216
  class C2fCIB(C2f):
1269
- """
1270
- C2fCIB class represents a convolutional block with C2f and CIB modules.
1217
+ """C2fCIB class represents a convolutional block with C2f and CIB modules.
1271
1218
 
1272
1219
  Args:
1273
1220
  c1 (int): Number of input channels.
1274
1221
  c2 (int): Number of output channels.
1275
1222
  n (int, optional): Number of CIB modules to stack. Defaults to 1.
1276
1223
  shortcut (bool, optional): Whether to use shortcut connection. Defaults to False.
1277
- lk (bool, optional): Whether to use local key connection. Defaults to False.
1224
+ lk (bool, optional): Whether to use large kernel. Defaults to False.
1278
1225
  g (int, optional): Number of groups for grouped convolution. Defaults to 1.
1279
1226
  e (float, optional): Expansion ratio for CIB modules. Defaults to 0.5.
1280
1227
  """
@@ -1282,15 +1229,14 @@ class C2fCIB(C2f):
1282
1229
  def __init__(
1283
1230
  self, c1: int, c2: int, n: int = 1, shortcut: bool = False, lk: bool = False, g: int = 1, e: float = 0.5
1284
1231
  ):
1285
- """
1286
- Initialize C2fCIB module.
1232
+ """Initialize C2fCIB module.
1287
1233
 
1288
1234
  Args:
1289
1235
  c1 (int): Input channels.
1290
1236
  c2 (int): Output channels.
1291
1237
  n (int): Number of CIB modules.
1292
1238
  shortcut (bool): Whether to use shortcut connection.
1293
- lk (bool): Whether to use local key connection.
1239
+ lk (bool): Whether to use large kernel.
1294
1240
  g (int): Groups for convolutions.
1295
1241
  e (float): Expansion ratio.
1296
1242
  """
@@ -1299,8 +1245,7 @@ class C2fCIB(C2f):
1299
1245
 
1300
1246
 
1301
1247
  class Attention(nn.Module):
1302
- """
1303
- Attention module that performs self-attention on the input tensor.
1248
+ """Attention module that performs self-attention on the input tensor.
1304
1249
 
1305
1250
  Args:
1306
1251
  dim (int): The input tensor dimension.
@@ -1318,8 +1263,7 @@ class Attention(nn.Module):
1318
1263
  """
1319
1264
 
1320
1265
  def __init__(self, dim: int, num_heads: int = 8, attn_ratio: float = 0.5):
1321
- """
1322
- Initialize multi-head attention module.
1266
+ """Initialize multi-head attention module.
1323
1267
 
1324
1268
  Args:
1325
1269
  dim (int): Input dimension.
@@ -1338,8 +1282,7 @@ class Attention(nn.Module):
1338
1282
  self.pe = Conv(dim, dim, 3, 1, g=dim, act=False)
1339
1283
 
1340
1284
  def forward(self, x: torch.Tensor) -> torch.Tensor:
1341
- """
1342
- Forward pass of the Attention module.
1285
+ """Forward pass of the Attention module.
1343
1286
 
1344
1287
  Args:
1345
1288
  x (torch.Tensor): The input tensor.
@@ -1362,8 +1305,7 @@ class Attention(nn.Module):
1362
1305
 
1363
1306
 
1364
1307
  class PSABlock(nn.Module):
1365
- """
1366
- PSABlock class implementing a Position-Sensitive Attention block for neural networks.
1308
+ """PSABlock class implementing a Position-Sensitive Attention block for neural networks.
1367
1309
 
1368
1310
  This class encapsulates the functionality for applying multi-head attention and feed-forward neural network layers
1369
1311
  with optional shortcut connections.
@@ -1384,8 +1326,7 @@ class PSABlock(nn.Module):
1384
1326
  """
1385
1327
 
1386
1328
  def __init__(self, c: int, attn_ratio: float = 0.5, num_heads: int = 4, shortcut: bool = True) -> None:
1387
- """
1388
- Initialize the PSABlock.
1329
+ """Initialize the PSABlock.
1389
1330
 
1390
1331
  Args:
1391
1332
  c (int): Input and output channels.
@@ -1400,8 +1341,7 @@ class PSABlock(nn.Module):
1400
1341
  self.add = shortcut
1401
1342
 
1402
1343
  def forward(self, x: torch.Tensor) -> torch.Tensor:
1403
- """
1404
- Execute a forward pass through PSABlock.
1344
+ """Execute a forward pass through PSABlock.
1405
1345
 
1406
1346
  Args:
1407
1347
  x (torch.Tensor): Input tensor.
@@ -1415,8 +1355,7 @@ class PSABlock(nn.Module):
1415
1355
 
1416
1356
 
1417
1357
  class PSA(nn.Module):
1418
- """
1419
- PSA class for implementing Position-Sensitive Attention in neural networks.
1358
+ """PSA class for implementing Position-Sensitive Attention in neural networks.
1420
1359
 
1421
1360
  This class encapsulates the functionality for applying position-sensitive attention and feed-forward networks to
1422
1361
  input tensors, enhancing feature extraction and processing capabilities.
@@ -1439,8 +1378,7 @@ class PSA(nn.Module):
1439
1378
  """
1440
1379
 
1441
1380
  def __init__(self, c1: int, c2: int, e: float = 0.5):
1442
- """
1443
- Initialize PSA module.
1381
+ """Initialize PSA module.
1444
1382
 
1445
1383
  Args:
1446
1384
  c1 (int): Input channels.
@@ -1457,8 +1395,7 @@ class PSA(nn.Module):
1457
1395
  self.ffn = nn.Sequential(Conv(self.c, self.c * 2, 1), Conv(self.c * 2, self.c, 1, act=False))
1458
1396
 
1459
1397
  def forward(self, x: torch.Tensor) -> torch.Tensor:
1460
- """
1461
- Execute forward pass in PSA module.
1398
+ """Execute forward pass in PSA module.
1462
1399
 
1463
1400
  Args:
1464
1401
  x (torch.Tensor): Input tensor.
@@ -1473,8 +1410,7 @@ class PSA(nn.Module):
1473
1410
 
1474
1411
 
1475
1412
  class C2PSA(nn.Module):
1476
- """
1477
- C2PSA module with attention mechanism for enhanced feature extraction and processing.
1413
+ """C2PSA module with attention mechanism for enhanced feature extraction and processing.
1478
1414
 
1479
1415
  This module implements a convolutional block with attention mechanisms to enhance feature extraction and processing
1480
1416
  capabilities. It includes a series of PSABlock modules for self-attention and feed-forward operations.
@@ -1488,18 +1424,17 @@ class C2PSA(nn.Module):
1488
1424
  Methods:
1489
1425
  forward: Performs a forward pass through the C2PSA module, applying attention and feed-forward operations.
1490
1426
 
1491
- Notes:
1492
- This module essentially is the same as PSA module, but refactored to allow stacking more PSABlock modules.
1493
-
1494
1427
  Examples:
1495
1428
  >>> c2psa = C2PSA(c1=256, c2=256, n=3, e=0.5)
1496
1429
  >>> input_tensor = torch.randn(1, 256, 64, 64)
1497
1430
  >>> output_tensor = c2psa(input_tensor)
1431
+
1432
+ Notes:
1433
+ This module essentially is the same as PSA module, but refactored to allow stacking more PSABlock modules.
1498
1434
  """
1499
1435
 
1500
1436
  def __init__(self, c1: int, c2: int, n: int = 1, e: float = 0.5):
1501
- """
1502
- Initialize C2PSA module.
1437
+ """Initialize C2PSA module.
1503
1438
 
1504
1439
  Args:
1505
1440
  c1 (int): Input channels.
@@ -1516,8 +1451,7 @@ class C2PSA(nn.Module):
1516
1451
  self.m = nn.Sequential(*(PSABlock(self.c, attn_ratio=0.5, num_heads=self.c // 64) for _ in range(n)))
1517
1452
 
1518
1453
  def forward(self, x: torch.Tensor) -> torch.Tensor:
1519
- """
1520
- Process the input tensor through a series of PSA blocks.
1454
+ """Process the input tensor through a series of PSA blocks.
1521
1455
 
1522
1456
  Args:
1523
1457
  x (torch.Tensor): Input tensor.
@@ -1531,10 +1465,10 @@ class C2PSA(nn.Module):
1531
1465
 
1532
1466
 
1533
1467
  class C2fPSA(C2f):
1534
- """
1535
- C2fPSA module with enhanced feature extraction using PSA blocks.
1468
+ """C2fPSA module with enhanced feature extraction using PSA blocks.
1536
1469
 
1537
- This class extends the C2f module by incorporating PSA blocks for improved attention mechanisms and feature extraction.
1470
+ This class extends the C2f module by incorporating PSA blocks for improved attention mechanisms and feature
1471
+ extraction.
1538
1472
 
1539
1473
  Attributes:
1540
1474
  c (int): Number of hidden channels.
@@ -1556,8 +1490,7 @@ class C2fPSA(C2f):
1556
1490
  """
1557
1491
 
1558
1492
  def __init__(self, c1: int, c2: int, n: int = 1, e: float = 0.5):
1559
- """
1560
- Initialize C2fPSA module.
1493
+ """Initialize C2fPSA module.
1561
1494
 
1562
1495
  Args:
1563
1496
  c1 (int): Input channels.
@@ -1571,8 +1504,7 @@ class C2fPSA(C2f):
1571
1504
 
1572
1505
 
1573
1506
  class SCDown(nn.Module):
1574
- """
1575
- SCDown module for downsampling with separable convolutions.
1507
+ """SCDown module for downsampling with separable convolutions.
1576
1508
 
1577
1509
  This module performs downsampling using a combination of pointwise and depthwise convolutions, which helps in
1578
1510
  efficiently reducing the spatial dimensions of the input tensor while maintaining the channel information.
@@ -1595,8 +1527,7 @@ class SCDown(nn.Module):
1595
1527
  """
1596
1528
 
1597
1529
  def __init__(self, c1: int, c2: int, k: int, s: int):
1598
- """
1599
- Initialize SCDown module.
1530
+ """Initialize SCDown module.
1600
1531
 
1601
1532
  Args:
1602
1533
  c1 (int): Input channels.
@@ -1609,8 +1540,7 @@ class SCDown(nn.Module):
1609
1540
  self.cv2 = Conv(c2, c2, k=k, s=s, g=c2, act=False)
1610
1541
 
1611
1542
  def forward(self, x: torch.Tensor) -> torch.Tensor:
1612
- """
1613
- Apply convolution and downsampling to the input tensor.
1543
+ """Apply convolution and downsampling to the input tensor.
1614
1544
 
1615
1545
  Args:
1616
1546
  x (torch.Tensor): Input tensor.
@@ -1622,27 +1552,26 @@ class SCDown(nn.Module):
1622
1552
 
1623
1553
 
1624
1554
  class TorchVision(nn.Module):
1625
- """
1626
- TorchVision module to allow loading any torchvision model.
1555
+ """TorchVision module to allow loading any torchvision model.
1627
1556
 
1628
- This class provides a way to load a model from the torchvision library, optionally load pre-trained weights, and customize the model by truncating or unwrapping layers.
1629
-
1630
- Attributes:
1631
- m (nn.Module): The loaded torchvision model, possibly truncated and unwrapped.
1557
+ This class provides a way to load a model from the torchvision library, optionally load pre-trained weights, and
1558
+ customize the model by truncating or unwrapping layers.
1632
1559
 
1633
1560
  Args:
1634
1561
  model (str): Name of the torchvision model to load.
1635
1562
  weights (str, optional): Pre-trained weights to load. Default is "DEFAULT".
1636
- unwrap (bool, optional): If True, unwraps the model to a sequential containing all but the last `truncate` layers. Default is True.
1563
+ unwrap (bool, optional): Unwraps the model to a sequential containing all but the last `truncate` layers.
1637
1564
  truncate (int, optional): Number of layers to truncate from the end if `unwrap` is True. Default is 2.
1638
1565
  split (bool, optional): Returns output from intermediate child modules as list. Default is False.
1566
+
1567
+ Attributes:
1568
+ m (nn.Module): The loaded torchvision model, possibly truncated and unwrapped.
1639
1569
  """
1640
1570
 
1641
1571
  def __init__(
1642
1572
  self, model: str, weights: str = "DEFAULT", unwrap: bool = True, truncate: int = 2, split: bool = False
1643
1573
  ):
1644
- """
1645
- Load the model and weights from torchvision.
1574
+ """Load the model and weights from torchvision.
1646
1575
 
1647
1576
  Args:
1648
1577
  model (str): Name of the torchvision model to load.
@@ -1669,8 +1598,7 @@ class TorchVision(nn.Module):
1669
1598
  self.m.head = self.m.heads = nn.Identity()
1670
1599
 
1671
1600
  def forward(self, x: torch.Tensor) -> torch.Tensor:
1672
- """
1673
- Forward pass through the model.
1601
+ """Forward pass through the model.
1674
1602
 
1675
1603
  Args:
1676
1604
  x (torch.Tensor): Input tensor.
@@ -1687,8 +1615,7 @@ class TorchVision(nn.Module):
1687
1615
 
1688
1616
 
1689
1617
  class AAttn(nn.Module):
1690
- """
1691
- Area-attention module for YOLO models, providing efficient attention mechanisms.
1618
+ """Area-attention module for YOLO models, providing efficient attention mechanisms.
1692
1619
 
1693
1620
  This module implements an area-based attention mechanism that processes input features in a spatially-aware manner,
1694
1621
  making it particularly effective for object detection tasks.
@@ -1713,8 +1640,7 @@ class AAttn(nn.Module):
1713
1640
  """
1714
1641
 
1715
1642
  def __init__(self, dim: int, num_heads: int, area: int = 1):
1716
- """
1717
- Initialize an Area-attention module for YOLO models.
1643
+ """Initialize an Area-attention module for YOLO models.
1718
1644
 
1719
1645
  Args:
1720
1646
  dim (int): Number of hidden channels.
@@ -1733,8 +1659,7 @@ class AAttn(nn.Module):
1733
1659
  self.pe = Conv(all_head_dim, dim, 7, 1, 3, g=dim, act=False)
1734
1660
 
1735
1661
  def forward(self, x: torch.Tensor) -> torch.Tensor:
1736
- """
1737
- Process the input tensor through the area-attention.
1662
+ """Process the input tensor through the area-attention.
1738
1663
 
1739
1664
  Args:
1740
1665
  x (torch.Tensor): Input tensor.
@@ -1773,8 +1698,7 @@ class AAttn(nn.Module):
1773
1698
 
1774
1699
 
1775
1700
  class ABlock(nn.Module):
1776
- """
1777
- Area-attention block module for efficient feature extraction in YOLO models.
1701
+ """Area-attention block module for efficient feature extraction in YOLO models.
1778
1702
 
1779
1703
  This module implements an area-attention mechanism combined with a feed-forward network for processing feature maps.
1780
1704
  It uses a novel area-based attention approach that is more efficient than traditional self-attention while
@@ -1797,8 +1721,7 @@ class ABlock(nn.Module):
1797
1721
  """
1798
1722
 
1799
1723
  def __init__(self, dim: int, num_heads: int, mlp_ratio: float = 1.2, area: int = 1):
1800
- """
1801
- Initialize an Area-attention block module.
1724
+ """Initialize an Area-attention block module.
1802
1725
 
1803
1726
  Args:
1804
1727
  dim (int): Number of input channels.
@@ -1814,9 +1737,9 @@ class ABlock(nn.Module):
1814
1737
 
1815
1738
  self.apply(self._init_weights)
1816
1739
 
1817
- def _init_weights(self, m: nn.Module):
1818
- """
1819
- Initialize weights using a truncated normal distribution.
1740
+ @staticmethod
1741
+ def _init_weights(m: nn.Module):
1742
+ """Initialize weights using a truncated normal distribution.
1820
1743
 
1821
1744
  Args:
1822
1745
  m (nn.Module): Module to initialize.
@@ -1827,8 +1750,7 @@ class ABlock(nn.Module):
1827
1750
  nn.init.constant_(m.bias, 0)
1828
1751
 
1829
1752
  def forward(self, x: torch.Tensor) -> torch.Tensor:
1830
- """
1831
- Forward pass through ABlock.
1753
+ """Forward pass through ABlock.
1832
1754
 
1833
1755
  Args:
1834
1756
  x (torch.Tensor): Input tensor.
@@ -1841,8 +1763,7 @@ class ABlock(nn.Module):
1841
1763
 
1842
1764
 
1843
1765
  class A2C2f(nn.Module):
1844
- """
1845
- Area-Attention C2f module for enhanced feature extraction with area-based attention mechanisms.
1766
+ """Area-Attention C2f module for enhanced feature extraction with area-based attention mechanisms.
1846
1767
 
1847
1768
  This module extends the C2f architecture by incorporating area-attention and ABlock layers for improved feature
1848
1769
  processing. It supports both area-attention and standard convolution modes.
@@ -1877,8 +1798,7 @@ class A2C2f(nn.Module):
1877
1798
  g: int = 1,
1878
1799
  shortcut: bool = True,
1879
1800
  ):
1880
- """
1881
- Initialize Area-Attention C2f module.
1801
+ """Initialize Area-Attention C2f module.
1882
1802
 
1883
1803
  Args:
1884
1804
  c1 (int): Number of input channels.
@@ -1894,7 +1814,7 @@ class A2C2f(nn.Module):
1894
1814
  """
1895
1815
  super().__init__()
1896
1816
  c_ = int(c2 * e) # hidden channels
1897
- assert c_ % 32 == 0, "Dimension of ABlock be a multiple of 32."
1817
+ assert c_ % 32 == 0, "Dimension of ABlock must be a multiple of 32."
1898
1818
 
1899
1819
  self.cv1 = Conv(c1, c_, 1, 1)
1900
1820
  self.cv2 = Conv((1 + n) * c_, c2, 1)
@@ -1908,8 +1828,7 @@ class A2C2f(nn.Module):
1908
1828
  )
1909
1829
 
1910
1830
  def forward(self, x: torch.Tensor) -> torch.Tensor:
1911
- """
1912
- Forward pass through A2C2f layer.
1831
+ """Forward pass through A2C2f layer.
1913
1832
 
1914
1833
  Args:
1915
1834
  x (torch.Tensor): Input tensor.
@@ -1929,8 +1848,7 @@ class SwiGLUFFN(nn.Module):
1929
1848
  """SwiGLU Feed-Forward Network for transformer-based architectures."""
1930
1849
 
1931
1850
  def __init__(self, gc: int, ec: int, e: int = 4) -> None:
1932
- """
1933
- Initialize SwiGLU FFN with input dimension, output dimension, and expansion factor.
1851
+ """Initialize SwiGLU FFN with input dimension, output dimension, and expansion factor.
1934
1852
 
1935
1853
  Args:
1936
1854
  gc (int): Guide channels.
@@ -1953,8 +1871,7 @@ class Residual(nn.Module):
1953
1871
  """Residual connection wrapper for neural network modules."""
1954
1872
 
1955
1873
  def __init__(self, m: nn.Module) -> None:
1956
- """
1957
- Initialize residual module with the wrapped module.
1874
+ """Initialize residual module with the wrapped module.
1958
1875
 
1959
1876
  Args:
1960
1877
  m (nn.Module): Module to wrap with residual connection.
@@ -1975,8 +1892,7 @@ class SAVPE(nn.Module):
1975
1892
  """Spatial-Aware Visual Prompt Embedding module for feature enhancement."""
1976
1893
 
1977
1894
  def __init__(self, ch: list[int], c3: int, embed: int):
1978
- """
1979
- Initialize SAVPE module with channels, intermediate channels, and embedding dimension.
1895
+ """Initialize SAVPE module with channels, intermediate channels, and embedding dimension.
1980
1896
 
1981
1897
  Args:
1982
1898
  ch (list[int]): List of input channel dimensions.