dgenerate-ultralytics-headless 8.3.137__py3-none-any.whl → 8.3.224__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/METADATA +41 -34
  2. dgenerate_ultralytics_headless-8.3.224.dist-info/RECORD +285 -0
  3. {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/WHEEL +1 -1
  4. tests/__init__.py +7 -6
  5. tests/conftest.py +15 -39
  6. tests/test_cli.py +17 -17
  7. tests/test_cuda.py +17 -8
  8. tests/test_engine.py +36 -10
  9. tests/test_exports.py +98 -37
  10. tests/test_integrations.py +12 -15
  11. tests/test_python.py +126 -82
  12. tests/test_solutions.py +319 -135
  13. ultralytics/__init__.py +27 -9
  14. ultralytics/cfg/__init__.py +83 -87
  15. ultralytics/cfg/datasets/Argoverse.yaml +4 -4
  16. ultralytics/cfg/datasets/DOTAv1.5.yaml +2 -2
  17. ultralytics/cfg/datasets/DOTAv1.yaml +2 -2
  18. ultralytics/cfg/datasets/GlobalWheat2020.yaml +2 -2
  19. ultralytics/cfg/datasets/HomeObjects-3K.yaml +4 -5
  20. ultralytics/cfg/datasets/ImageNet.yaml +3 -3
  21. ultralytics/cfg/datasets/Objects365.yaml +24 -20
  22. ultralytics/cfg/datasets/SKU-110K.yaml +9 -9
  23. ultralytics/cfg/datasets/VOC.yaml +10 -13
  24. ultralytics/cfg/datasets/VisDrone.yaml +43 -33
  25. ultralytics/cfg/datasets/african-wildlife.yaml +5 -5
  26. ultralytics/cfg/datasets/brain-tumor.yaml +4 -5
  27. ultralytics/cfg/datasets/carparts-seg.yaml +5 -5
  28. ultralytics/cfg/datasets/coco-pose.yaml +26 -4
  29. ultralytics/cfg/datasets/coco.yaml +4 -4
  30. ultralytics/cfg/datasets/coco128-seg.yaml +2 -2
  31. ultralytics/cfg/datasets/coco128.yaml +2 -2
  32. ultralytics/cfg/datasets/coco8-grayscale.yaml +103 -0
  33. ultralytics/cfg/datasets/coco8-multispectral.yaml +2 -2
  34. ultralytics/cfg/datasets/coco8-pose.yaml +23 -2
  35. ultralytics/cfg/datasets/coco8-seg.yaml +2 -2
  36. ultralytics/cfg/datasets/coco8.yaml +2 -2
  37. ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
  38. ultralytics/cfg/datasets/crack-seg.yaml +5 -5
  39. ultralytics/cfg/datasets/dog-pose.yaml +32 -4
  40. ultralytics/cfg/datasets/dota8-multispectral.yaml +2 -2
  41. ultralytics/cfg/datasets/dota8.yaml +2 -2
  42. ultralytics/cfg/datasets/hand-keypoints.yaml +29 -4
  43. ultralytics/cfg/datasets/lvis.yaml +9 -9
  44. ultralytics/cfg/datasets/medical-pills.yaml +4 -5
  45. ultralytics/cfg/datasets/open-images-v7.yaml +7 -10
  46. ultralytics/cfg/datasets/package-seg.yaml +5 -5
  47. ultralytics/cfg/datasets/signature.yaml +4 -4
  48. ultralytics/cfg/datasets/tiger-pose.yaml +20 -4
  49. ultralytics/cfg/datasets/xView.yaml +5 -5
  50. ultralytics/cfg/default.yaml +96 -93
  51. ultralytics/cfg/trackers/botsort.yaml +16 -17
  52. ultralytics/cfg/trackers/bytetrack.yaml +9 -11
  53. ultralytics/data/__init__.py +4 -4
  54. ultralytics/data/annotator.py +12 -12
  55. ultralytics/data/augment.py +531 -564
  56. ultralytics/data/base.py +76 -81
  57. ultralytics/data/build.py +206 -42
  58. ultralytics/data/converter.py +179 -78
  59. ultralytics/data/dataset.py +121 -121
  60. ultralytics/data/loaders.py +114 -91
  61. ultralytics/data/split.py +28 -15
  62. ultralytics/data/split_dota.py +67 -48
  63. ultralytics/data/utils.py +110 -89
  64. ultralytics/engine/exporter.py +422 -460
  65. ultralytics/engine/model.py +224 -252
  66. ultralytics/engine/predictor.py +94 -89
  67. ultralytics/engine/results.py +345 -595
  68. ultralytics/engine/trainer.py +231 -134
  69. ultralytics/engine/tuner.py +279 -73
  70. ultralytics/engine/validator.py +53 -46
  71. ultralytics/hub/__init__.py +26 -28
  72. ultralytics/hub/auth.py +30 -16
  73. ultralytics/hub/google/__init__.py +34 -36
  74. ultralytics/hub/session.py +53 -77
  75. ultralytics/hub/utils.py +23 -109
  76. ultralytics/models/__init__.py +1 -1
  77. ultralytics/models/fastsam/__init__.py +1 -1
  78. ultralytics/models/fastsam/model.py +36 -18
  79. ultralytics/models/fastsam/predict.py +33 -44
  80. ultralytics/models/fastsam/utils.py +4 -5
  81. ultralytics/models/fastsam/val.py +12 -14
  82. ultralytics/models/nas/__init__.py +1 -1
  83. ultralytics/models/nas/model.py +16 -20
  84. ultralytics/models/nas/predict.py +12 -14
  85. ultralytics/models/nas/val.py +4 -5
  86. ultralytics/models/rtdetr/__init__.py +1 -1
  87. ultralytics/models/rtdetr/model.py +9 -9
  88. ultralytics/models/rtdetr/predict.py +22 -17
  89. ultralytics/models/rtdetr/train.py +20 -16
  90. ultralytics/models/rtdetr/val.py +79 -59
  91. ultralytics/models/sam/__init__.py +8 -2
  92. ultralytics/models/sam/amg.py +53 -38
  93. ultralytics/models/sam/build.py +29 -31
  94. ultralytics/models/sam/model.py +33 -38
  95. ultralytics/models/sam/modules/blocks.py +159 -182
  96. ultralytics/models/sam/modules/decoders.py +38 -47
  97. ultralytics/models/sam/modules/encoders.py +114 -133
  98. ultralytics/models/sam/modules/memory_attention.py +38 -31
  99. ultralytics/models/sam/modules/sam.py +114 -93
  100. ultralytics/models/sam/modules/tiny_encoder.py +268 -291
  101. ultralytics/models/sam/modules/transformer.py +59 -66
  102. ultralytics/models/sam/modules/utils.py +55 -72
  103. ultralytics/models/sam/predict.py +745 -341
  104. ultralytics/models/utils/loss.py +118 -107
  105. ultralytics/models/utils/ops.py +118 -71
  106. ultralytics/models/yolo/__init__.py +1 -1
  107. ultralytics/models/yolo/classify/predict.py +28 -26
  108. ultralytics/models/yolo/classify/train.py +50 -81
  109. ultralytics/models/yolo/classify/val.py +68 -61
  110. ultralytics/models/yolo/detect/predict.py +12 -15
  111. ultralytics/models/yolo/detect/train.py +56 -46
  112. ultralytics/models/yolo/detect/val.py +279 -223
  113. ultralytics/models/yolo/model.py +167 -86
  114. ultralytics/models/yolo/obb/predict.py +7 -11
  115. ultralytics/models/yolo/obb/train.py +23 -25
  116. ultralytics/models/yolo/obb/val.py +107 -99
  117. ultralytics/models/yolo/pose/__init__.py +1 -1
  118. ultralytics/models/yolo/pose/predict.py +12 -14
  119. ultralytics/models/yolo/pose/train.py +31 -69
  120. ultralytics/models/yolo/pose/val.py +119 -254
  121. ultralytics/models/yolo/segment/predict.py +21 -25
  122. ultralytics/models/yolo/segment/train.py +12 -66
  123. ultralytics/models/yolo/segment/val.py +126 -305
  124. ultralytics/models/yolo/world/train.py +53 -45
  125. ultralytics/models/yolo/world/train_world.py +51 -32
  126. ultralytics/models/yolo/yoloe/__init__.py +7 -7
  127. ultralytics/models/yolo/yoloe/predict.py +30 -37
  128. ultralytics/models/yolo/yoloe/train.py +89 -71
  129. ultralytics/models/yolo/yoloe/train_seg.py +15 -17
  130. ultralytics/models/yolo/yoloe/val.py +56 -41
  131. ultralytics/nn/__init__.py +9 -11
  132. ultralytics/nn/autobackend.py +179 -107
  133. ultralytics/nn/modules/__init__.py +67 -67
  134. ultralytics/nn/modules/activation.py +8 -7
  135. ultralytics/nn/modules/block.py +302 -323
  136. ultralytics/nn/modules/conv.py +61 -104
  137. ultralytics/nn/modules/head.py +488 -186
  138. ultralytics/nn/modules/transformer.py +183 -123
  139. ultralytics/nn/modules/utils.py +15 -20
  140. ultralytics/nn/tasks.py +327 -203
  141. ultralytics/nn/text_model.py +81 -65
  142. ultralytics/py.typed +1 -0
  143. ultralytics/solutions/__init__.py +12 -12
  144. ultralytics/solutions/ai_gym.py +19 -27
  145. ultralytics/solutions/analytics.py +36 -26
  146. ultralytics/solutions/config.py +29 -28
  147. ultralytics/solutions/distance_calculation.py +23 -24
  148. ultralytics/solutions/heatmap.py +17 -19
  149. ultralytics/solutions/instance_segmentation.py +21 -19
  150. ultralytics/solutions/object_blurrer.py +16 -17
  151. ultralytics/solutions/object_counter.py +48 -53
  152. ultralytics/solutions/object_cropper.py +22 -16
  153. ultralytics/solutions/parking_management.py +61 -58
  154. ultralytics/solutions/queue_management.py +19 -19
  155. ultralytics/solutions/region_counter.py +63 -50
  156. ultralytics/solutions/security_alarm.py +22 -25
  157. ultralytics/solutions/similarity_search.py +107 -60
  158. ultralytics/solutions/solutions.py +343 -262
  159. ultralytics/solutions/speed_estimation.py +35 -31
  160. ultralytics/solutions/streamlit_inference.py +104 -40
  161. ultralytics/solutions/templates/similarity-search.html +31 -24
  162. ultralytics/solutions/trackzone.py +24 -24
  163. ultralytics/solutions/vision_eye.py +11 -12
  164. ultralytics/trackers/__init__.py +1 -1
  165. ultralytics/trackers/basetrack.py +18 -27
  166. ultralytics/trackers/bot_sort.py +48 -39
  167. ultralytics/trackers/byte_tracker.py +94 -94
  168. ultralytics/trackers/track.py +7 -16
  169. ultralytics/trackers/utils/gmc.py +37 -69
  170. ultralytics/trackers/utils/kalman_filter.py +68 -76
  171. ultralytics/trackers/utils/matching.py +13 -17
  172. ultralytics/utils/__init__.py +251 -275
  173. ultralytics/utils/autobatch.py +19 -7
  174. ultralytics/utils/autodevice.py +68 -38
  175. ultralytics/utils/benchmarks.py +169 -130
  176. ultralytics/utils/callbacks/base.py +12 -13
  177. ultralytics/utils/callbacks/clearml.py +14 -15
  178. ultralytics/utils/callbacks/comet.py +139 -66
  179. ultralytics/utils/callbacks/dvc.py +19 -27
  180. ultralytics/utils/callbacks/hub.py +8 -6
  181. ultralytics/utils/callbacks/mlflow.py +6 -10
  182. ultralytics/utils/callbacks/neptune.py +11 -19
  183. ultralytics/utils/callbacks/platform.py +73 -0
  184. ultralytics/utils/callbacks/raytune.py +3 -4
  185. ultralytics/utils/callbacks/tensorboard.py +9 -12
  186. ultralytics/utils/callbacks/wb.py +33 -30
  187. ultralytics/utils/checks.py +163 -114
  188. ultralytics/utils/cpu.py +89 -0
  189. ultralytics/utils/dist.py +24 -20
  190. ultralytics/utils/downloads.py +176 -146
  191. ultralytics/utils/errors.py +11 -13
  192. ultralytics/utils/events.py +113 -0
  193. ultralytics/utils/export/__init__.py +7 -0
  194. ultralytics/utils/{export.py → export/engine.py} +81 -63
  195. ultralytics/utils/export/imx.py +294 -0
  196. ultralytics/utils/export/tensorflow.py +217 -0
  197. ultralytics/utils/files.py +33 -36
  198. ultralytics/utils/git.py +137 -0
  199. ultralytics/utils/instance.py +105 -120
  200. ultralytics/utils/logger.py +404 -0
  201. ultralytics/utils/loss.py +99 -61
  202. ultralytics/utils/metrics.py +649 -478
  203. ultralytics/utils/nms.py +337 -0
  204. ultralytics/utils/ops.py +263 -451
  205. ultralytics/utils/patches.py +70 -31
  206. ultralytics/utils/plotting.py +253 -223
  207. ultralytics/utils/tal.py +48 -61
  208. ultralytics/utils/torch_utils.py +244 -251
  209. ultralytics/utils/tqdm.py +438 -0
  210. ultralytics/utils/triton.py +22 -23
  211. ultralytics/utils/tuner.py +11 -10
  212. dgenerate_ultralytics_headless-8.3.137.dist-info/RECORD +0 -272
  213. {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/entry_points.txt +0 -0
  214. {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/licenses/LICENSE +0 -0
  215. {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,8 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
  """Block modules."""
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import torch
5
7
  import torch.nn as nn
6
8
  import torch.nn.functional as F
@@ -11,64 +13,67 @@ from .conv import Conv, DWConv, GhostConv, LightConv, RepConv, autopad
11
13
  from .transformer import TransformerBlock
12
14
 
13
15
  __all__ = (
14
- "DFL",
15
- "HGBlock",
16
- "HGStem",
17
- "SPP",
18
- "SPPF",
19
16
  "C1",
20
17
  "C2",
18
+ "C2PSA",
21
19
  "C3",
20
+ "C3TR",
21
+ "CIB",
22
+ "DFL",
23
+ "ELAN1",
24
+ "PSA",
25
+ "SPP",
26
+ "SPPELAN",
27
+ "SPPF",
28
+ "AConv",
29
+ "ADown",
30
+ "Attention",
31
+ "BNContrastiveHead",
32
+ "Bottleneck",
33
+ "BottleneckCSP",
22
34
  "C2f",
23
35
  "C2fAttn",
24
- "ImagePoolingAttn",
25
- "ContrastiveHead",
26
- "BNContrastiveHead",
27
- "C3x",
28
- "C3TR",
36
+ "C2fCIB",
37
+ "C2fPSA",
29
38
  "C3Ghost",
39
+ "C3k2",
40
+ "C3x",
41
+ "CBFuse",
42
+ "CBLinear",
43
+ "ContrastiveHead",
30
44
  "GhostBottleneck",
31
- "Bottleneck",
32
- "BottleneckCSP",
45
+ "HGBlock",
46
+ "HGStem",
47
+ "ImagePoolingAttn",
33
48
  "Proto",
34
49
  "RepC3",
35
- "ResNetLayer",
36
50
  "RepNCSPELAN4",
37
- "ELAN1",
38
- "ADown",
39
- "AConv",
40
- "SPPELAN",
41
- "CBFuse",
42
- "CBLinear",
43
- "C3k2",
44
- "C2fPSA",
45
- "C2PSA",
46
51
  "RepVGGDW",
47
- "CIB",
48
- "C2fCIB",
49
- "Attention",
50
- "PSA",
52
+ "ResNetLayer",
51
53
  "SCDown",
52
54
  "TorchVision",
53
55
  )
54
56
 
55
57
 
56
58
  class DFL(nn.Module):
57
- """
58
- Integral module of Distribution Focal Loss (DFL).
59
+ """Integral module of Distribution Focal Loss (DFL).
59
60
 
60
61
  Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
61
62
  """
62
63
 
63
- def __init__(self, c1=16):
64
- """Initialize a convolutional layer with a given number of input channels."""
64
+ def __init__(self, c1: int = 16):
65
+ """Initialize a convolutional layer with a given number of input channels.
66
+
67
+ Args:
68
+ c1 (int): Number of input channels.
69
+ """
65
70
  super().__init__()
66
71
  self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False)
67
72
  x = torch.arange(c1, dtype=torch.float)
68
73
  self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1))
69
74
  self.c1 = c1
70
75
 
71
- def forward(self, x):
76
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
72
77
  """Apply the DFL module to input tensor and return transformed output."""
73
78
  b, _, a = x.shape # batch, channels, anchors
74
79
  return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a)
@@ -78,9 +83,8 @@ class DFL(nn.Module):
78
83
  class Proto(nn.Module):
79
84
  """Ultralytics YOLO models mask Proto module for segmentation models."""
80
85
 
81
- def __init__(self, c1, c_=256, c2=32):
82
- """
83
- Initialize the Ultralytics YOLO models mask Proto module with specified number of protos and masks.
86
+ def __init__(self, c1: int, c_: int = 256, c2: int = 32):
87
+ """Initialize the Ultralytics YOLO models mask Proto module with specified number of protos and masks.
84
88
 
85
89
  Args:
86
90
  c1 (int): Input channels.
@@ -93,21 +97,19 @@ class Proto(nn.Module):
93
97
  self.cv2 = Conv(c_, c_, k=3)
94
98
  self.cv3 = Conv(c_, c2)
95
99
 
96
- def forward(self, x):
100
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
97
101
  """Perform a forward pass through layers using an upsampled input image."""
98
102
  return self.cv3(self.cv2(self.upsample(self.cv1(x))))
99
103
 
100
104
 
101
105
  class HGStem(nn.Module):
102
- """
103
- StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
106
+ """StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
104
107
 
105
108
  https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
106
109
  """
107
110
 
108
- def __init__(self, c1, cm, c2):
109
- """
110
- Initialize the StemBlock of PPHGNetV2.
111
+ def __init__(self, c1: int, cm: int, c2: int):
112
+ """Initialize the StemBlock of PPHGNetV2.
111
113
 
112
114
  Args:
113
115
  c1 (int): Input channels.
@@ -122,7 +124,7 @@ class HGStem(nn.Module):
122
124
  self.stem4 = Conv(cm, c2, 1, 1, act=nn.ReLU())
123
125
  self.pool = nn.MaxPool2d(kernel_size=2, stride=1, padding=0, ceil_mode=True)
124
126
 
125
- def forward(self, x):
127
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
126
128
  """Forward pass of a PPHGNetV2 backbone layer."""
127
129
  x = self.stem1(x)
128
130
  x = F.pad(x, [0, 1, 0, 1])
@@ -137,15 +139,23 @@ class HGStem(nn.Module):
137
139
 
138
140
 
139
141
  class HGBlock(nn.Module):
140
- """
141
- HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
142
+ """HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
142
143
 
143
144
  https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
144
145
  """
145
146
 
146
- def __init__(self, c1, cm, c2, k=3, n=6, lightconv=False, shortcut=False, act=nn.ReLU()):
147
- """
148
- Initialize HGBlock with specified parameters.
147
+ def __init__(
148
+ self,
149
+ c1: int,
150
+ cm: int,
151
+ c2: int,
152
+ k: int = 3,
153
+ n: int = 6,
154
+ lightconv: bool = False,
155
+ shortcut: bool = False,
156
+ act: nn.Module = nn.ReLU(),
157
+ ):
158
+ """Initialize HGBlock with specified parameters.
149
159
 
150
160
  Args:
151
161
  c1 (int): Input channels.
@@ -164,7 +174,7 @@ class HGBlock(nn.Module):
164
174
  self.ec = Conv(c2 // 2, c2, 1, 1, act=act) # excitation conv
165
175
  self.add = shortcut and c1 == c2
166
176
 
167
- def forward(self, x):
177
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
168
178
  """Forward pass of a PPHGNetV2 backbone layer."""
169
179
  y = [x]
170
180
  y.extend(m(y[-1]) for m in self.m)
@@ -175,14 +185,13 @@ class HGBlock(nn.Module):
175
185
  class SPP(nn.Module):
176
186
  """Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729."""
177
187
 
178
- def __init__(self, c1, c2, k=(5, 9, 13)):
179
- """
180
- Initialize the SPP layer with input/output channels and pooling kernel sizes.
188
+ def __init__(self, c1: int, c2: int, k: tuple[int, ...] = (5, 9, 13)):
189
+ """Initialize the SPP layer with input/output channels and pooling kernel sizes.
181
190
 
182
191
  Args:
183
192
  c1 (int): Input channels.
184
193
  c2 (int): Output channels.
185
- k (Tuple[int, int, int]): Kernel sizes for max pooling.
194
+ k (tuple): Kernel sizes for max pooling.
186
195
  """
187
196
  super().__init__()
188
197
  c_ = c1 // 2 # hidden channels
@@ -190,7 +199,7 @@ class SPP(nn.Module):
190
199
  self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
191
200
  self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
192
201
 
193
- def forward(self, x):
202
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
194
203
  """Forward pass of the SPP layer, performing spatial pyramid pooling."""
195
204
  x = self.cv1(x)
196
205
  return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
@@ -199,9 +208,8 @@ class SPP(nn.Module):
199
208
  class SPPF(nn.Module):
200
209
  """Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""
201
210
 
202
- def __init__(self, c1, c2, k=5):
203
- """
204
- Initialize the SPPF layer with given input/output channels and kernel size.
211
+ def __init__(self, c1: int, c2: int, k: int = 5):
212
+ """Initialize the SPPF layer with given input/output channels and kernel size.
205
213
 
206
214
  Args:
207
215
  c1 (int): Input channels.
@@ -217,7 +225,7 @@ class SPPF(nn.Module):
217
225
  self.cv2 = Conv(c_ * 4, c2, 1, 1)
218
226
  self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
219
227
 
220
- def forward(self, x):
228
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
221
229
  """Apply sequential pooling operations to input and return concatenated feature maps."""
222
230
  y = [self.cv1(x)]
223
231
  y.extend(self.m(y[-1]) for _ in range(3))
@@ -227,9 +235,8 @@ class SPPF(nn.Module):
227
235
  class C1(nn.Module):
228
236
  """CSP Bottleneck with 1 convolution."""
229
237
 
230
- def __init__(self, c1, c2, n=1):
231
- """
232
- Initialize the CSP Bottleneck with 1 convolution.
238
+ def __init__(self, c1: int, c2: int, n: int = 1):
239
+ """Initialize the CSP Bottleneck with 1 convolution.
233
240
 
234
241
  Args:
235
242
  c1 (int): Input channels.
@@ -240,7 +247,7 @@ class C1(nn.Module):
240
247
  self.cv1 = Conv(c1, c2, 1, 1)
241
248
  self.m = nn.Sequential(*(Conv(c2, c2, 3) for _ in range(n)))
242
249
 
243
- def forward(self, x):
250
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
244
251
  """Apply convolution and residual connection to input tensor."""
245
252
  y = self.cv1(x)
246
253
  return self.m(y) + y
@@ -249,9 +256,8 @@ class C1(nn.Module):
249
256
  class C2(nn.Module):
250
257
  """CSP Bottleneck with 2 convolutions."""
251
258
 
252
- def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
253
- """
254
- Initialize a CSP Bottleneck with 2 convolutions.
259
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
260
+ """Initialize a CSP Bottleneck with 2 convolutions.
255
261
 
256
262
  Args:
257
263
  c1 (int): Input channels.
@@ -268,7 +274,7 @@ class C2(nn.Module):
268
274
  # self.attention = ChannelAttention(2 * self.c) # or SpatialAttention()
269
275
  self.m = nn.Sequential(*(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)))
270
276
 
271
- def forward(self, x):
277
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
272
278
  """Forward pass through the CSP bottleneck with 2 convolutions."""
273
279
  a, b = self.cv1(x).chunk(2, 1)
274
280
  return self.cv2(torch.cat((self.m(a), b), 1))
@@ -277,9 +283,8 @@ class C2(nn.Module):
277
283
  class C2f(nn.Module):
278
284
  """Faster Implementation of CSP Bottleneck with 2 convolutions."""
279
285
 
280
- def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
281
- """
282
- Initialize a CSP bottleneck with 2 convolutions.
286
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = False, g: int = 1, e: float = 0.5):
287
+ """Initialize a CSP bottleneck with 2 convolutions.
283
288
 
284
289
  Args:
285
290
  c1 (int): Input channels.
@@ -295,13 +300,13 @@ class C2f(nn.Module):
295
300
  self.cv2 = Conv((2 + n) * self.c, c2, 1) # optional act=FReLU(c2)
296
301
  self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
297
302
 
298
- def forward(self, x):
303
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
299
304
  """Forward pass through C2f layer."""
300
305
  y = list(self.cv1(x).chunk(2, 1))
301
306
  y.extend(m(y[-1]) for m in self.m)
302
307
  return self.cv2(torch.cat(y, 1))
303
308
 
304
- def forward_split(self, x):
309
+ def forward_split(self, x: torch.Tensor) -> torch.Tensor:
305
310
  """Forward pass using split() instead of chunk()."""
306
311
  y = self.cv1(x).split((self.c, self.c), 1)
307
312
  y = [y[0], y[1]]
@@ -312,9 +317,8 @@ class C2f(nn.Module):
312
317
  class C3(nn.Module):
313
318
  """CSP Bottleneck with 3 convolutions."""
314
319
 
315
- def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
316
- """
317
- Initialize the CSP Bottleneck with 3 convolutions.
320
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
321
+ """Initialize the CSP Bottleneck with 3 convolutions.
318
322
 
319
323
  Args:
320
324
  c1 (int): Input channels.
@@ -331,7 +335,7 @@ class C3(nn.Module):
331
335
  self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
332
336
  self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, k=((1, 1), (3, 3)), e=1.0) for _ in range(n)))
333
337
 
334
- def forward(self, x):
338
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
335
339
  """Forward pass through the CSP bottleneck with 3 convolutions."""
336
340
  return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
337
341
 
@@ -339,9 +343,8 @@ class C3(nn.Module):
339
343
  class C3x(C3):
340
344
  """C3 module with cross-convolutions."""
341
345
 
342
- def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
343
- """
344
- Initialize C3 module with cross-convolutions.
346
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
347
+ """Initialize C3 module with cross-convolutions.
345
348
 
346
349
  Args:
347
350
  c1 (int): Input channels.
@@ -359,9 +362,8 @@ class C3x(C3):
359
362
  class RepC3(nn.Module):
360
363
  """Rep C3."""
361
364
 
362
- def __init__(self, c1, c2, n=3, e=1.0):
363
- """
364
- Initialize CSP Bottleneck with a single convolution.
365
+ def __init__(self, c1: int, c2: int, n: int = 3, e: float = 1.0):
366
+ """Initialize CSP Bottleneck with a single convolution.
365
367
 
366
368
  Args:
367
369
  c1 (int): Input channels.
@@ -376,7 +378,7 @@ class RepC3(nn.Module):
376
378
  self.m = nn.Sequential(*[RepConv(c_, c_) for _ in range(n)])
377
379
  self.cv3 = Conv(c_, c2, 1, 1) if c_ != c2 else nn.Identity()
378
380
 
379
- def forward(self, x):
381
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
380
382
  """Forward pass of RepC3 module."""
381
383
  return self.cv3(self.m(self.cv1(x)) + self.cv2(x))
382
384
 
@@ -384,9 +386,8 @@ class RepC3(nn.Module):
384
386
  class C3TR(C3):
385
387
  """C3 module with TransformerBlock()."""
386
388
 
387
- def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
388
- """
389
- Initialize C3 module with TransformerBlock.
389
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
390
+ """Initialize C3 module with TransformerBlock.
390
391
 
391
392
  Args:
392
393
  c1 (int): Input channels.
@@ -404,9 +405,8 @@ class C3TR(C3):
404
405
  class C3Ghost(C3):
405
406
  """C3 module with GhostBottleneck()."""
406
407
 
407
- def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
408
- """
409
- Initialize C3 module with GhostBottleneck.
408
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
409
+ """Initialize C3 module with GhostBottleneck.
410
410
 
411
411
  Args:
412
412
  c1 (int): Input channels.
@@ -424,9 +424,8 @@ class C3Ghost(C3):
424
424
  class GhostBottleneck(nn.Module):
425
425
  """Ghost Bottleneck https://github.com/huawei-noah/Efficient-AI-Backbones."""
426
426
 
427
- def __init__(self, c1, c2, k=3, s=1):
428
- """
429
- Initialize Ghost Bottleneck module.
427
+ def __init__(self, c1: int, c2: int, k: int = 3, s: int = 1):
428
+ """Initialize Ghost Bottleneck module.
430
429
 
431
430
  Args:
432
431
  c1 (int): Input channels.
@@ -445,7 +444,7 @@ class GhostBottleneck(nn.Module):
445
444
  nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
446
445
  )
447
446
 
448
- def forward(self, x):
447
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
449
448
  """Apply skip connection and concatenation to input tensor."""
450
449
  return self.conv(x) + self.shortcut(x)
451
450
 
@@ -453,16 +452,17 @@ class GhostBottleneck(nn.Module):
453
452
  class Bottleneck(nn.Module):
454
453
  """Standard bottleneck."""
455
454
 
456
- def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
457
- """
458
- Initialize a standard bottleneck module.
455
+ def __init__(
456
+ self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: tuple[int, int] = (3, 3), e: float = 0.5
457
+ ):
458
+ """Initialize a standard bottleneck module.
459
459
 
460
460
  Args:
461
461
  c1 (int): Input channels.
462
462
  c2 (int): Output channels.
463
463
  shortcut (bool): Whether to use shortcut connection.
464
464
  g (int): Groups for convolutions.
465
- k (Tuple[int, int]): Kernel sizes for convolutions.
465
+ k (tuple): Kernel sizes for convolutions.
466
466
  e (float): Expansion ratio.
467
467
  """
468
468
  super().__init__()
@@ -471,7 +471,7 @@ class Bottleneck(nn.Module):
471
471
  self.cv2 = Conv(c_, c2, k[1], 1, g=g)
472
472
  self.add = shortcut and c1 == c2
473
473
 
474
- def forward(self, x):
474
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
475
475
  """Apply bottleneck with optional shortcut connection."""
476
476
  return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
477
477
 
@@ -479,9 +479,8 @@ class Bottleneck(nn.Module):
479
479
  class BottleneckCSP(nn.Module):
480
480
  """CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks."""
481
481
 
482
- def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
483
- """
484
- Initialize CSP Bottleneck.
482
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
483
+ """Initialize CSP Bottleneck.
485
484
 
486
485
  Args:
487
486
  c1 (int): Input channels.
@@ -501,7 +500,7 @@ class BottleneckCSP(nn.Module):
501
500
  self.act = nn.SiLU()
502
501
  self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
503
502
 
504
- def forward(self, x):
503
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
505
504
  """Apply CSP bottleneck with 3 convolutions."""
506
505
  y1 = self.cv3(self.m(self.cv1(x)))
507
506
  y2 = self.cv2(x)
@@ -511,9 +510,8 @@ class BottleneckCSP(nn.Module):
511
510
  class ResNetBlock(nn.Module):
512
511
  """ResNet block with standard convolution layers."""
513
512
 
514
- def __init__(self, c1, c2, s=1, e=4):
515
- """
516
- Initialize ResNet block.
513
+ def __init__(self, c1: int, c2: int, s: int = 1, e: int = 4):
514
+ """Initialize ResNet block.
517
515
 
518
516
  Args:
519
517
  c1 (int): Input channels.
@@ -528,7 +526,7 @@ class ResNetBlock(nn.Module):
528
526
  self.cv3 = Conv(c2, c3, k=1, act=False)
529
527
  self.shortcut = nn.Sequential(Conv(c1, c3, k=1, s=s, act=False)) if s != 1 or c1 != c3 else nn.Identity()
530
528
 
531
- def forward(self, x):
529
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
532
530
  """Forward pass through the ResNet block."""
533
531
  return F.relu(self.cv3(self.cv2(self.cv1(x))) + self.shortcut(x))
534
532
 
@@ -536,9 +534,8 @@ class ResNetBlock(nn.Module):
536
534
  class ResNetLayer(nn.Module):
537
535
  """ResNet layer with multiple ResNet blocks."""
538
536
 
539
- def __init__(self, c1, c2, s=1, is_first=False, n=1, e=4):
540
- """
541
- Initialize ResNet layer.
537
+ def __init__(self, c1: int, c2: int, s: int = 1, is_first: bool = False, n: int = 1, e: int = 4):
538
+ """Initialize ResNet layer.
542
539
 
543
540
  Args:
544
541
  c1 (int): Input channels.
@@ -560,7 +557,7 @@ class ResNetLayer(nn.Module):
560
557
  blocks.extend([ResNetBlock(e * c2, c2, 1, e=e) for _ in range(n - 1)])
561
558
  self.layer = nn.Sequential(*blocks)
562
559
 
563
- def forward(self, x):
560
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
564
561
  """Forward pass through the ResNet layer."""
565
562
  return self.layer(x)
566
563
 
@@ -568,9 +565,8 @@ class ResNetLayer(nn.Module):
568
565
  class MaxSigmoidAttnBlock(nn.Module):
569
566
  """Max Sigmoid attention block."""
570
567
 
571
- def __init__(self, c1, c2, nh=1, ec=128, gc=512, scale=False):
572
- """
573
- Initialize MaxSigmoidAttnBlock.
568
+ def __init__(self, c1: int, c2: int, nh: int = 1, ec: int = 128, gc: int = 512, scale: bool = False):
569
+ """Initialize MaxSigmoidAttnBlock.
574
570
 
575
571
  Args:
576
572
  c1 (int): Input channels.
@@ -589,9 +585,8 @@ class MaxSigmoidAttnBlock(nn.Module):
589
585
  self.proj_conv = Conv(c1, c2, k=3, s=1, act=False)
590
586
  self.scale = nn.Parameter(torch.ones(1, nh, 1, 1)) if scale else 1.0
591
587
 
592
- def forward(self, x, guide):
593
- """
594
- Forward pass of MaxSigmoidAttnBlock.
588
+ def forward(self, x: torch.Tensor, guide: torch.Tensor) -> torch.Tensor:
589
+ """Forward pass of MaxSigmoidAttnBlock.
595
590
 
596
591
  Args:
597
592
  x (torch.Tensor): Input tensor.
@@ -622,9 +617,19 @@ class MaxSigmoidAttnBlock(nn.Module):
622
617
  class C2fAttn(nn.Module):
623
618
  """C2f module with an additional attn module."""
624
619
 
625
- def __init__(self, c1, c2, n=1, ec=128, nh=1, gc=512, shortcut=False, g=1, e=0.5):
626
- """
627
- Initialize C2f module with attention mechanism.
620
+ def __init__(
621
+ self,
622
+ c1: int,
623
+ c2: int,
624
+ n: int = 1,
625
+ ec: int = 128,
626
+ nh: int = 1,
627
+ gc: int = 512,
628
+ shortcut: bool = False,
629
+ g: int = 1,
630
+ e: float = 0.5,
631
+ ):
632
+ """Initialize C2f module with attention mechanism.
628
633
 
629
634
  Args:
630
635
  c1 (int): Input channels.
@@ -644,9 +649,8 @@ class C2fAttn(nn.Module):
644
649
  self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
645
650
  self.attn = MaxSigmoidAttnBlock(self.c, self.c, gc=gc, ec=ec, nh=nh)
646
651
 
647
- def forward(self, x, guide):
648
- """
649
- Forward pass through C2f layer with attention.
652
+ def forward(self, x: torch.Tensor, guide: torch.Tensor) -> torch.Tensor:
653
+ """Forward pass through C2f layer with attention.
650
654
 
651
655
  Args:
652
656
  x (torch.Tensor): Input tensor.
@@ -660,9 +664,8 @@ class C2fAttn(nn.Module):
660
664
  y.append(self.attn(y[-1], guide))
661
665
  return self.cv2(torch.cat(y, 1))
662
666
 
663
- def forward_split(self, x, guide):
664
- """
665
- Forward pass using split() instead of chunk().
667
+ def forward_split(self, x: torch.Tensor, guide: torch.Tensor) -> torch.Tensor:
668
+ """Forward pass using split() instead of chunk().
666
669
 
667
670
  Args:
668
671
  x (torch.Tensor): Input tensor.
@@ -680,9 +683,10 @@ class C2fAttn(nn.Module):
680
683
  class ImagePoolingAttn(nn.Module):
681
684
  """ImagePoolingAttn: Enhance the text embeddings with image-aware information."""
682
685
 
683
- def __init__(self, ec=256, ch=(), ct=512, nh=8, k=3, scale=False):
684
- """
685
- Initialize ImagePoolingAttn module.
686
+ def __init__(
687
+ self, ec: int = 256, ch: tuple[int, ...] = (), ct: int = 512, nh: int = 8, k: int = 3, scale: bool = False
688
+ ):
689
+ """Initialize ImagePoolingAttn module.
686
690
 
687
691
  Args:
688
692
  ec (int): Embedding channels.
@@ -708,12 +712,11 @@ class ImagePoolingAttn(nn.Module):
708
712
  self.hc = ec // nh
709
713
  self.k = k
710
714
 
711
- def forward(self, x, text):
712
- """
713
- Forward pass of ImagePoolingAttn.
715
+ def forward(self, x: list[torch.Tensor], text: torch.Tensor) -> torch.Tensor:
716
+ """Forward pass of ImagePoolingAttn.
714
717
 
715
718
  Args:
716
- x (List[torch.Tensor]): List of input feature maps.
719
+ x (list[torch.Tensor]): List of input feature maps.
717
720
  text (torch.Tensor): Text embeddings.
718
721
 
719
722
  Returns:
@@ -752,9 +755,8 @@ class ContrastiveHead(nn.Module):
752
755
  self.bias = nn.Parameter(torch.tensor([-10.0]))
753
756
  self.logit_scale = nn.Parameter(torch.ones([]) * torch.tensor(1 / 0.07).log())
754
757
 
755
- def forward(self, x, w):
756
- """
757
- Forward function of contrastive learning.
758
+ def forward(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
759
+ """Forward function of contrastive learning.
758
760
 
759
761
  Args:
760
762
  x (torch.Tensor): Image features.
@@ -770,16 +772,14 @@ class ContrastiveHead(nn.Module):
770
772
 
771
773
 
772
774
  class BNContrastiveHead(nn.Module):
773
- """
774
- Batch Norm Contrastive Head using batch norm instead of l2-normalization.
775
+ """Batch Norm Contrastive Head using batch norm instead of l2-normalization.
775
776
 
776
777
  Args:
777
778
  embed_dims (int): Embed dimensions of text and image features.
778
779
  """
779
780
 
780
781
  def __init__(self, embed_dims: int):
781
- """
782
- Initialize BNContrastiveHead.
782
+ """Initialize BNContrastiveHead.
783
783
 
784
784
  Args:
785
785
  embed_dims (int): Embedding dimensions for features.
@@ -798,17 +798,12 @@ class BNContrastiveHead(nn.Module):
798
798
  del self.logit_scale
799
799
  self.forward = self.forward_fuse
800
800
 
801
- def forward_fuse(self, x, w):
802
- """
803
- Passes input out unchanged.
804
-
805
- TODO: Update or remove?
806
- """
801
+ def forward_fuse(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
802
+ """Passes input out unchanged."""
807
803
  return x
808
804
 
809
- def forward(self, x, w):
810
- """
811
- Forward function of contrastive learning with batch normalization.
805
+ def forward(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
806
+ """Forward function of contrastive learning with batch normalization.
812
807
 
813
808
  Args:
814
809
  x (torch.Tensor): Image features.
@@ -827,16 +822,17 @@ class BNContrastiveHead(nn.Module):
827
822
  class RepBottleneck(Bottleneck):
828
823
  """Rep bottleneck."""
829
824
 
830
- def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
831
- """
832
- Initialize RepBottleneck.
825
+ def __init__(
826
+ self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: tuple[int, int] = (3, 3), e: float = 0.5
827
+ ):
828
+ """Initialize RepBottleneck.
833
829
 
834
830
  Args:
835
831
  c1 (int): Input channels.
836
832
  c2 (int): Output channels.
837
833
  shortcut (bool): Whether to use shortcut connection.
838
834
  g (int): Groups for convolutions.
839
- k (Tuple[int, int]): Kernel sizes for convolutions.
835
+ k (tuple): Kernel sizes for convolutions.
840
836
  e (float): Expansion ratio.
841
837
  """
842
838
  super().__init__(c1, c2, shortcut, g, k, e)
@@ -847,9 +843,8 @@ class RepBottleneck(Bottleneck):
847
843
  class RepCSP(C3):
848
844
  """Repeatable Cross Stage Partial Network (RepCSP) module for efficient feature extraction."""
849
845
 
850
- def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
851
- """
852
- Initialize RepCSP layer.
846
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
847
+ """Initialize RepCSP layer.
853
848
 
854
849
  Args:
855
850
  c1 (int): Input channels.
@@ -867,9 +862,8 @@ class RepCSP(C3):
867
862
  class RepNCSPELAN4(nn.Module):
868
863
  """CSP-ELAN."""
869
864
 
870
- def __init__(self, c1, c2, c3, c4, n=1):
871
- """
872
- Initialize CSP-ELAN layer.
865
+ def __init__(self, c1: int, c2: int, c3: int, c4: int, n: int = 1):
866
+ """Initialize CSP-ELAN layer.
873
867
 
874
868
  Args:
875
869
  c1 (int): Input channels.
@@ -885,13 +879,13 @@ class RepNCSPELAN4(nn.Module):
885
879
  self.cv3 = nn.Sequential(RepCSP(c4, c4, n), Conv(c4, c4, 3, 1))
886
880
  self.cv4 = Conv(c3 + (2 * c4), c2, 1, 1)
887
881
 
888
- def forward(self, x):
882
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
889
883
  """Forward pass through RepNCSPELAN4 layer."""
890
884
  y = list(self.cv1(x).chunk(2, 1))
891
885
  y.extend((m(y[-1])) for m in [self.cv2, self.cv3])
892
886
  return self.cv4(torch.cat(y, 1))
893
887
 
894
- def forward_split(self, x):
888
+ def forward_split(self, x: torch.Tensor) -> torch.Tensor:
895
889
  """Forward pass using split() instead of chunk()."""
896
890
  y = list(self.cv1(x).split((self.c, self.c), 1))
897
891
  y.extend(m(y[-1]) for m in [self.cv2, self.cv3])
@@ -901,9 +895,8 @@ class RepNCSPELAN4(nn.Module):
901
895
  class ELAN1(RepNCSPELAN4):
902
896
  """ELAN1 module with 4 convolutions."""
903
897
 
904
- def __init__(self, c1, c2, c3, c4):
905
- """
906
- Initialize ELAN1 layer.
898
+ def __init__(self, c1: int, c2: int, c3: int, c4: int):
899
+ """Initialize ELAN1 layer.
907
900
 
908
901
  Args:
909
902
  c1 (int): Input channels.
@@ -922,9 +915,8 @@ class ELAN1(RepNCSPELAN4):
922
915
  class AConv(nn.Module):
923
916
  """AConv."""
924
917
 
925
- def __init__(self, c1, c2):
926
- """
927
- Initialize AConv module.
918
+ def __init__(self, c1: int, c2: int):
919
+ """Initialize AConv module.
928
920
 
929
921
  Args:
930
922
  c1 (int): Input channels.
@@ -933,7 +925,7 @@ class AConv(nn.Module):
933
925
  super().__init__()
934
926
  self.cv1 = Conv(c1, c2, 3, 2, 1)
935
927
 
936
- def forward(self, x):
928
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
937
929
  """Forward pass through AConv layer."""
938
930
  x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True)
939
931
  return self.cv1(x)
@@ -942,9 +934,8 @@ class AConv(nn.Module):
942
934
  class ADown(nn.Module):
943
935
  """ADown."""
944
936
 
945
- def __init__(self, c1, c2):
946
- """
947
- Initialize ADown module.
937
+ def __init__(self, c1: int, c2: int):
938
+ """Initialize ADown module.
948
939
 
949
940
  Args:
950
941
  c1 (int): Input channels.
@@ -955,7 +946,7 @@ class ADown(nn.Module):
955
946
  self.cv1 = Conv(c1 // 2, self.c, 3, 2, 1)
956
947
  self.cv2 = Conv(c1 // 2, self.c, 1, 1, 0)
957
948
 
958
- def forward(self, x):
949
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
959
950
  """Forward pass through ADown layer."""
960
951
  x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True)
961
952
  x1, x2 = x.chunk(2, 1)
@@ -968,9 +959,8 @@ class ADown(nn.Module):
968
959
  class SPPELAN(nn.Module):
969
960
  """SPP-ELAN."""
970
961
 
971
- def __init__(self, c1, c2, c3, k=5):
972
- """
973
- Initialize SPP-ELAN block.
962
+ def __init__(self, c1: int, c2: int, c3: int, k: int = 5):
963
+ """Initialize SPP-ELAN block.
974
964
 
975
965
  Args:
976
966
  c1 (int): Input channels.
@@ -986,7 +976,7 @@ class SPPELAN(nn.Module):
986
976
  self.cv4 = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
987
977
  self.cv5 = Conv(4 * c3, c2, 1, 1)
988
978
 
989
- def forward(self, x):
979
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
990
980
  """Forward pass through SPPELAN layer."""
991
981
  y = [self.cv1(x)]
992
982
  y.extend(m(y[-1]) for m in [self.cv2, self.cv3, self.cv4])
@@ -996,13 +986,12 @@ class SPPELAN(nn.Module):
996
986
  class CBLinear(nn.Module):
997
987
  """CBLinear."""
998
988
 
999
- def __init__(self, c1, c2s, k=1, s=1, p=None, g=1):
1000
- """
1001
- Initialize CBLinear module.
989
+ def __init__(self, c1: int, c2s: list[int], k: int = 1, s: int = 1, p: int | None = None, g: int = 1):
990
+ """Initialize CBLinear module.
1002
991
 
1003
992
  Args:
1004
993
  c1 (int): Input channels.
1005
- c2s (List[int]): List of output channel sizes.
994
+ c2s (list[int]): List of output channel sizes.
1006
995
  k (int): Kernel size.
1007
996
  s (int): Stride.
1008
997
  p (int | None): Padding.
@@ -1012,7 +1001,7 @@ class CBLinear(nn.Module):
1012
1001
  self.c2s = c2s
1013
1002
  self.conv = nn.Conv2d(c1, sum(c2s), k, s, autopad(k, p), groups=g, bias=True)
1014
1003
 
1015
- def forward(self, x):
1004
+ def forward(self, x: torch.Tensor) -> list[torch.Tensor]:
1016
1005
  """Forward pass through CBLinear layer."""
1017
1006
  return self.conv(x).split(self.c2s, dim=1)
1018
1007
 
@@ -1020,22 +1009,20 @@ class CBLinear(nn.Module):
1020
1009
  class CBFuse(nn.Module):
1021
1010
  """CBFuse."""
1022
1011
 
1023
- def __init__(self, idx):
1024
- """
1025
- Initialize CBFuse module.
1012
+ def __init__(self, idx: list[int]):
1013
+ """Initialize CBFuse module.
1026
1014
 
1027
1015
  Args:
1028
- idx (List[int]): Indices for feature selection.
1016
+ idx (list[int]): Indices for feature selection.
1029
1017
  """
1030
1018
  super().__init__()
1031
1019
  self.idx = idx
1032
1020
 
1033
- def forward(self, xs):
1034
- """
1035
- Forward pass through CBFuse layer.
1021
+ def forward(self, xs: list[torch.Tensor]) -> torch.Tensor:
1022
+ """Forward pass through CBFuse layer.
1036
1023
 
1037
1024
  Args:
1038
- xs (List[torch.Tensor]): List of input tensors.
1025
+ xs (list[torch.Tensor]): List of input tensors.
1039
1026
 
1040
1027
  Returns:
1041
1028
  (torch.Tensor): Fused output tensor.
@@ -1048,9 +1035,8 @@ class CBFuse(nn.Module):
1048
1035
  class C3f(nn.Module):
1049
1036
  """Faster Implementation of CSP Bottleneck with 2 convolutions."""
1050
1037
 
1051
- def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
1052
- """
1053
- Initialize CSP bottleneck layer with two convolutions.
1038
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = False, g: int = 1, e: float = 0.5):
1039
+ """Initialize CSP bottleneck layer with two convolutions.
1054
1040
 
1055
1041
  Args:
1056
1042
  c1 (int): Input channels.
@@ -1067,7 +1053,7 @@ class C3f(nn.Module):
1067
1053
  self.cv3 = Conv((2 + n) * c_, c2, 1) # optional act=FReLU(c2)
1068
1054
  self.m = nn.ModuleList(Bottleneck(c_, c_, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
1069
1055
 
1070
- def forward(self, x):
1056
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1071
1057
  """Forward pass through C3f layer."""
1072
1058
  y = [self.cv2(x), self.cv1(x)]
1073
1059
  y.extend(m(y[-1]) for m in self.m)
@@ -1077,9 +1063,10 @@ class C3f(nn.Module):
1077
1063
  class C3k2(C2f):
1078
1064
  """Faster Implementation of CSP Bottleneck with 2 convolutions."""
1079
1065
 
1080
- def __init__(self, c1, c2, n=1, c3k=False, e=0.5, g=1, shortcut=True):
1081
- """
1082
- Initialize C3k2 module.
1066
+ def __init__(
1067
+ self, c1: int, c2: int, n: int = 1, c3k: bool = False, e: float = 0.5, g: int = 1, shortcut: bool = True
1068
+ ):
1069
+ """Initialize C3k2 module.
1083
1070
 
1084
1071
  Args:
1085
1072
  c1 (int): Input channels.
@@ -1099,9 +1086,8 @@ class C3k2(C2f):
1099
1086
  class C3k(C3):
1100
1087
  """C3k is a CSP bottleneck module with customizable kernel sizes for feature extraction in neural networks."""
1101
1088
 
1102
- def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, k=3):
1103
- """
1104
- Initialize C3k module.
1089
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5, k: int = 3):
1090
+ """Initialize C3k module.
1105
1091
 
1106
1092
  Args:
1107
1093
  c1 (int): Input channels.
@@ -1121,9 +1107,8 @@ class C3k(C3):
1121
1107
  class RepVGGDW(torch.nn.Module):
1122
1108
  """RepVGGDW is a class that represents a depth wise separable convolutional block in RepVGG architecture."""
1123
1109
 
1124
- def __init__(self, ed) -> None:
1125
- """
1126
- Initialize RepVGGDW module.
1110
+ def __init__(self, ed: int) -> None:
1111
+ """Initialize RepVGGDW module.
1127
1112
 
1128
1113
  Args:
1129
1114
  ed (int): Input and output channels.
@@ -1134,9 +1119,8 @@ class RepVGGDW(torch.nn.Module):
1134
1119
  self.dim = ed
1135
1120
  self.act = nn.SiLU()
1136
1121
 
1137
- def forward(self, x):
1138
- """
1139
- Perform a forward pass of the RepVGGDW block.
1122
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1123
+ """Perform a forward pass of the RepVGGDW block.
1140
1124
 
1141
1125
  Args:
1142
1126
  x (torch.Tensor): Input tensor.
@@ -1146,9 +1130,8 @@ class RepVGGDW(torch.nn.Module):
1146
1130
  """
1147
1131
  return self.act(self.conv(x) + self.conv1(x))
1148
1132
 
1149
- def forward_fuse(self, x):
1150
- """
1151
- Perform a forward pass of the RepVGGDW block without fusing the convolutions.
1133
+ def forward_fuse(self, x: torch.Tensor) -> torch.Tensor:
1134
+ """Perform a forward pass of the RepVGGDW block without fusing the convolutions.
1152
1135
 
1153
1136
  Args:
1154
1137
  x (torch.Tensor): Input tensor.
@@ -1160,8 +1143,7 @@ class RepVGGDW(torch.nn.Module):
1160
1143
 
1161
1144
  @torch.no_grad()
1162
1145
  def fuse(self):
1163
- """
1164
- Fuse the convolutional layers in the RepVGGDW block.
1146
+ """Fuse the convolutional layers in the RepVGGDW block.
1165
1147
 
1166
1148
  This method fuses the convolutional layers and updates the weights and biases accordingly.
1167
1149
  """
@@ -1186,8 +1168,7 @@ class RepVGGDW(torch.nn.Module):
1186
1168
 
1187
1169
 
1188
1170
  class CIB(nn.Module):
1189
- """
1190
- Conditional Identity Block (CIB) module.
1171
+ """Conditional Identity Block (CIB) module.
1191
1172
 
1192
1173
  Args:
1193
1174
  c1 (int): Number of input channels.
@@ -1197,9 +1178,8 @@ class CIB(nn.Module):
1197
1178
  lk (bool, optional): Whether to use RepVGGDW for the third convolutional layer. Defaults to False.
1198
1179
  """
1199
1180
 
1200
- def __init__(self, c1, c2, shortcut=True, e=0.5, lk=False):
1201
- """
1202
- Initialize the CIB module.
1181
+ def __init__(self, c1: int, c2: int, shortcut: bool = True, e: float = 0.5, lk: bool = False):
1182
+ """Initialize the CIB module.
1203
1183
 
1204
1184
  Args:
1205
1185
  c1 (int): Input channels.
@@ -1220,9 +1200,8 @@ class CIB(nn.Module):
1220
1200
 
1221
1201
  self.add = shortcut and c1 == c2
1222
1202
 
1223
- def forward(self, x):
1224
- """
1225
- Forward pass of the CIB module.
1203
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1204
+ """Forward pass of the CIB module.
1226
1205
 
1227
1206
  Args:
1228
1207
  x (torch.Tensor): Input tensor.
@@ -1234,8 +1213,7 @@ class CIB(nn.Module):
1234
1213
 
1235
1214
 
1236
1215
  class C2fCIB(C2f):
1237
- """
1238
- C2fCIB class represents a convolutional block with C2f and CIB modules.
1216
+ """C2fCIB class represents a convolutional block with C2f and CIB modules.
1239
1217
 
1240
1218
  Args:
1241
1219
  c1 (int): Number of input channels.
@@ -1247,9 +1225,10 @@ class C2fCIB(C2f):
1247
1225
  e (float, optional): Expansion ratio for CIB modules. Defaults to 0.5.
1248
1226
  """
1249
1227
 
1250
- def __init__(self, c1, c2, n=1, shortcut=False, lk=False, g=1, e=0.5):
1251
- """
1252
- Initialize C2fCIB module.
1228
+ def __init__(
1229
+ self, c1: int, c2: int, n: int = 1, shortcut: bool = False, lk: bool = False, g: int = 1, e: float = 0.5
1230
+ ):
1231
+ """Initialize C2fCIB module.
1253
1232
 
1254
1233
  Args:
1255
1234
  c1 (int): Input channels.
@@ -1265,8 +1244,7 @@ class C2fCIB(C2f):
1265
1244
 
1266
1245
 
1267
1246
  class Attention(nn.Module):
1268
- """
1269
- Attention module that performs self-attention on the input tensor.
1247
+ """Attention module that performs self-attention on the input tensor.
1270
1248
 
1271
1249
  Args:
1272
1250
  dim (int): The input tensor dimension.
@@ -1283,9 +1261,8 @@ class Attention(nn.Module):
1283
1261
  pe (Conv): Convolutional layer for positional encoding.
1284
1262
  """
1285
1263
 
1286
- def __init__(self, dim, num_heads=8, attn_ratio=0.5):
1287
- """
1288
- Initialize multi-head attention module.
1264
+ def __init__(self, dim: int, num_heads: int = 8, attn_ratio: float = 0.5):
1265
+ """Initialize multi-head attention module.
1289
1266
 
1290
1267
  Args:
1291
1268
  dim (int): Input dimension.
@@ -1303,9 +1280,8 @@ class Attention(nn.Module):
1303
1280
  self.proj = Conv(dim, dim, 1, act=False)
1304
1281
  self.pe = Conv(dim, dim, 3, 1, g=dim, act=False)
1305
1282
 
1306
- def forward(self, x):
1307
- """
1308
- Forward pass of the Attention module.
1283
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1284
+ """Forward pass of the Attention module.
1309
1285
 
1310
1286
  Args:
1311
1287
  x (torch.Tensor): The input tensor.
@@ -1328,8 +1304,7 @@ class Attention(nn.Module):
1328
1304
 
1329
1305
 
1330
1306
  class PSABlock(nn.Module):
1331
- """
1332
- PSABlock class implementing a Position-Sensitive Attention block for neural networks.
1307
+ """PSABlock class implementing a Position-Sensitive Attention block for neural networks.
1333
1308
 
1334
1309
  This class encapsulates the functionality for applying multi-head attention and feed-forward neural network layers
1335
1310
  with optional shortcut connections.
@@ -1349,9 +1324,8 @@ class PSABlock(nn.Module):
1349
1324
  >>> output_tensor = psablock(input_tensor)
1350
1325
  """
1351
1326
 
1352
- def __init__(self, c, attn_ratio=0.5, num_heads=4, shortcut=True) -> None:
1353
- """
1354
- Initialize the PSABlock.
1327
+ def __init__(self, c: int, attn_ratio: float = 0.5, num_heads: int = 4, shortcut: bool = True) -> None:
1328
+ """Initialize the PSABlock.
1355
1329
 
1356
1330
  Args:
1357
1331
  c (int): Input and output channels.
@@ -1365,9 +1339,8 @@ class PSABlock(nn.Module):
1365
1339
  self.ffn = nn.Sequential(Conv(c, c * 2, 1), Conv(c * 2, c, 1, act=False))
1366
1340
  self.add = shortcut
1367
1341
 
1368
- def forward(self, x):
1369
- """
1370
- Execute a forward pass through PSABlock.
1342
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1343
+ """Execute a forward pass through PSABlock.
1371
1344
 
1372
1345
  Args:
1373
1346
  x (torch.Tensor): Input tensor.
@@ -1381,8 +1354,7 @@ class PSABlock(nn.Module):
1381
1354
 
1382
1355
 
1383
1356
  class PSA(nn.Module):
1384
- """
1385
- PSA class for implementing Position-Sensitive Attention in neural networks.
1357
+ """PSA class for implementing Position-Sensitive Attention in neural networks.
1386
1358
 
1387
1359
  This class encapsulates the functionality for applying position-sensitive attention and feed-forward networks to
1388
1360
  input tensors, enhancing feature extraction and processing capabilities.
@@ -1404,9 +1376,8 @@ class PSA(nn.Module):
1404
1376
  >>> output_tensor = psa.forward(input_tensor)
1405
1377
  """
1406
1378
 
1407
- def __init__(self, c1, c2, e=0.5):
1408
- """
1409
- Initialize PSA module.
1379
+ def __init__(self, c1: int, c2: int, e: float = 0.5):
1380
+ """Initialize PSA module.
1410
1381
 
1411
1382
  Args:
1412
1383
  c1 (int): Input channels.
@@ -1422,9 +1393,8 @@ class PSA(nn.Module):
1422
1393
  self.attn = Attention(self.c, attn_ratio=0.5, num_heads=self.c // 64)
1423
1394
  self.ffn = nn.Sequential(Conv(self.c, self.c * 2, 1), Conv(self.c * 2, self.c, 1, act=False))
1424
1395
 
1425
- def forward(self, x):
1426
- """
1427
- Execute forward pass in PSA module.
1396
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1397
+ """Execute forward pass in PSA module.
1428
1398
 
1429
1399
  Args:
1430
1400
  x (torch.Tensor): Input tensor.
@@ -1439,8 +1409,7 @@ class PSA(nn.Module):
1439
1409
 
1440
1410
 
1441
1411
  class C2PSA(nn.Module):
1442
- """
1443
- C2PSA module with attention mechanism for enhanced feature extraction and processing.
1412
+ """C2PSA module with attention mechanism for enhanced feature extraction and processing.
1444
1413
 
1445
1414
  This module implements a convolutional block with attention mechanisms to enhance feature extraction and processing
1446
1415
  capabilities. It includes a series of PSABlock modules for self-attention and feed-forward operations.
@@ -1454,18 +1423,17 @@ class C2PSA(nn.Module):
1454
1423
  Methods:
1455
1424
  forward: Performs a forward pass through the C2PSA module, applying attention and feed-forward operations.
1456
1425
 
1457
- Notes:
1458
- This module essentially is the same as PSA module, but refactored to allow stacking more PSABlock modules.
1459
-
1460
1426
  Examples:
1461
1427
  >>> c2psa = C2PSA(c1=256, c2=256, n=3, e=0.5)
1462
1428
  >>> input_tensor = torch.randn(1, 256, 64, 64)
1463
1429
  >>> output_tensor = c2psa(input_tensor)
1430
+
1431
+ Notes:
1432
+ This module essentially is the same as PSA module, but refactored to allow stacking more PSABlock modules.
1464
1433
  """
1465
1434
 
1466
- def __init__(self, c1, c2, n=1, e=0.5):
1467
- """
1468
- Initialize C2PSA module.
1435
+ def __init__(self, c1: int, c2: int, n: int = 1, e: float = 0.5):
1436
+ """Initialize C2PSA module.
1469
1437
 
1470
1438
  Args:
1471
1439
  c1 (int): Input channels.
@@ -1481,9 +1449,8 @@ class C2PSA(nn.Module):
1481
1449
 
1482
1450
  self.m = nn.Sequential(*(PSABlock(self.c, attn_ratio=0.5, num_heads=self.c // 64) for _ in range(n)))
1483
1451
 
1484
- def forward(self, x):
1485
- """
1486
- Process the input tensor through a series of PSA blocks.
1452
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1453
+ """Process the input tensor through a series of PSA blocks.
1487
1454
 
1488
1455
  Args:
1489
1456
  x (torch.Tensor): Input tensor.
@@ -1497,10 +1464,10 @@ class C2PSA(nn.Module):
1497
1464
 
1498
1465
 
1499
1466
  class C2fPSA(C2f):
1500
- """
1501
- C2fPSA module with enhanced feature extraction using PSA blocks.
1467
+ """C2fPSA module with enhanced feature extraction using PSA blocks.
1502
1468
 
1503
- This class extends the C2f module by incorporating PSA blocks for improved attention mechanisms and feature extraction.
1469
+ This class extends the C2f module by incorporating PSA blocks for improved attention mechanisms and feature
1470
+ extraction.
1504
1471
 
1505
1472
  Attributes:
1506
1473
  c (int): Number of hidden channels.
@@ -1521,9 +1488,8 @@ class C2fPSA(C2f):
1521
1488
  >>> print(output.shape)
1522
1489
  """
1523
1490
 
1524
- def __init__(self, c1, c2, n=1, e=0.5):
1525
- """
1526
- Initialize C2fPSA module.
1491
+ def __init__(self, c1: int, c2: int, n: int = 1, e: float = 0.5):
1492
+ """Initialize C2fPSA module.
1527
1493
 
1528
1494
  Args:
1529
1495
  c1 (int): Input channels.
@@ -1537,8 +1503,7 @@ class C2fPSA(C2f):
1537
1503
 
1538
1504
 
1539
1505
  class SCDown(nn.Module):
1540
- """
1541
- SCDown module for downsampling with separable convolutions.
1506
+ """SCDown module for downsampling with separable convolutions.
1542
1507
 
1543
1508
  This module performs downsampling using a combination of pointwise and depthwise convolutions, which helps in
1544
1509
  efficiently reducing the spatial dimensions of the input tensor while maintaining the channel information.
@@ -1560,9 +1525,8 @@ class SCDown(nn.Module):
1560
1525
  torch.Size([1, 128, 64, 64])
1561
1526
  """
1562
1527
 
1563
- def __init__(self, c1, c2, k, s):
1564
- """
1565
- Initialize SCDown module.
1528
+ def __init__(self, c1: int, c2: int, k: int, s: int):
1529
+ """Initialize SCDown module.
1566
1530
 
1567
1531
  Args:
1568
1532
  c1 (int): Input channels.
@@ -1574,9 +1538,8 @@ class SCDown(nn.Module):
1574
1538
  self.cv1 = Conv(c1, c2, 1, 1)
1575
1539
  self.cv2 = Conv(c2, c2, k=k, s=s, g=c2, act=False)
1576
1540
 
1577
- def forward(self, x):
1578
- """
1579
- Apply convolution and downsampling to the input tensor.
1541
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1542
+ """Apply convolution and downsampling to the input tensor.
1580
1543
 
1581
1544
  Args:
1582
1545
  x (torch.Tensor): Input tensor.
@@ -1588,25 +1551,26 @@ class SCDown(nn.Module):
1588
1551
 
1589
1552
 
1590
1553
  class TorchVision(nn.Module):
1591
- """
1592
- TorchVision module to allow loading any torchvision model.
1554
+ """TorchVision module to allow loading any torchvision model.
1593
1555
 
1594
- This class provides a way to load a model from the torchvision library, optionally load pre-trained weights, and customize the model by truncating or unwrapping layers.
1595
-
1596
- Attributes:
1597
- m (nn.Module): The loaded torchvision model, possibly truncated and unwrapped.
1556
+ This class provides a way to load a model from the torchvision library, optionally load pre-trained weights, and
1557
+ customize the model by truncating or unwrapping layers.
1598
1558
 
1599
1559
  Args:
1600
1560
  model (str): Name of the torchvision model to load.
1601
1561
  weights (str, optional): Pre-trained weights to load. Default is "DEFAULT".
1602
- unwrap (bool, optional): If True, unwraps the model to a sequential containing all but the last `truncate` layers. Default is True.
1562
+ unwrap (bool, optional): Unwraps the model to a sequential containing all but the last `truncate` layers.
1603
1563
  truncate (int, optional): Number of layers to truncate from the end if `unwrap` is True. Default is 2.
1604
1564
  split (bool, optional): Returns output from intermediate child modules as list. Default is False.
1565
+
1566
+ Attributes:
1567
+ m (nn.Module): The loaded torchvision model, possibly truncated and unwrapped.
1605
1568
  """
1606
1569
 
1607
- def __init__(self, model, weights="DEFAULT", unwrap=True, truncate=2, split=False):
1608
- """
1609
- Load the model and weights from torchvision.
1570
+ def __init__(
1571
+ self, model: str, weights: str = "DEFAULT", unwrap: bool = True, truncate: int = 2, split: bool = False
1572
+ ):
1573
+ """Load the model and weights from torchvision.
1610
1574
 
1611
1575
  Args:
1612
1576
  model (str): Name of the torchvision model to load.
@@ -1632,15 +1596,14 @@ class TorchVision(nn.Module):
1632
1596
  self.split = False
1633
1597
  self.m.head = self.m.heads = nn.Identity()
1634
1598
 
1635
- def forward(self, x):
1636
- """
1637
- Forward pass through the model.
1599
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1600
+ """Forward pass through the model.
1638
1601
 
1639
1602
  Args:
1640
1603
  x (torch.Tensor): Input tensor.
1641
1604
 
1642
1605
  Returns:
1643
- (torch.Tensor | List[torch.Tensor]): Output tensor or list of tensors.
1606
+ (torch.Tensor | list[torch.Tensor]): Output tensor or list of tensors.
1644
1607
  """
1645
1608
  if self.split:
1646
1609
  y = [x]
@@ -1651,8 +1614,7 @@ class TorchVision(nn.Module):
1651
1614
 
1652
1615
 
1653
1616
  class AAttn(nn.Module):
1654
- """
1655
- Area-attention module for YOLO models, providing efficient attention mechanisms.
1617
+ """Area-attention module for YOLO models, providing efficient attention mechanisms.
1656
1618
 
1657
1619
  This module implements an area-based attention mechanism that processes input features in a spatially-aware manner,
1658
1620
  making it particularly effective for object detection tasks.
@@ -1676,14 +1638,13 @@ class AAttn(nn.Module):
1676
1638
  torch.Size([1, 256, 32, 32])
1677
1639
  """
1678
1640
 
1679
- def __init__(self, dim, num_heads, area=1):
1680
- """
1681
- Initialize an Area-attention module for YOLO models.
1641
+ def __init__(self, dim: int, num_heads: int, area: int = 1):
1642
+ """Initialize an Area-attention module for YOLO models.
1682
1643
 
1683
1644
  Args:
1684
1645
  dim (int): Number of hidden channels.
1685
1646
  num_heads (int): Number of heads into which the attention mechanism is divided.
1686
- area (int): Number of areas the feature map is divided, default is 1.
1647
+ area (int): Number of areas the feature map is divided.
1687
1648
  """
1688
1649
  super().__init__()
1689
1650
  self.area = area
@@ -1696,9 +1657,8 @@ class AAttn(nn.Module):
1696
1657
  self.proj = Conv(all_head_dim, dim, 1, act=False)
1697
1658
  self.pe = Conv(all_head_dim, dim, 7, 1, 3, g=dim, act=False)
1698
1659
 
1699
- def forward(self, x):
1700
- """
1701
- Process the input tensor through the area-attention.
1660
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1661
+ """Process the input tensor through the area-attention.
1702
1662
 
1703
1663
  Args:
1704
1664
  x (torch.Tensor): Input tensor.
@@ -1737,8 +1697,7 @@ class AAttn(nn.Module):
1737
1697
 
1738
1698
 
1739
1699
  class ABlock(nn.Module):
1740
- """
1741
- Area-attention block module for efficient feature extraction in YOLO models.
1700
+ """Area-attention block module for efficient feature extraction in YOLO models.
1742
1701
 
1743
1702
  This module implements an area-attention mechanism combined with a feed-forward network for processing feature maps.
1744
1703
  It uses a novel area-based attention approach that is more efficient than traditional self-attention while
@@ -1760,9 +1719,8 @@ class ABlock(nn.Module):
1760
1719
  torch.Size([1, 256, 32, 32])
1761
1720
  """
1762
1721
 
1763
- def __init__(self, dim, num_heads, mlp_ratio=1.2, area=1):
1764
- """
1765
- Initialize an Area-attention block module.
1722
+ def __init__(self, dim: int, num_heads: int, mlp_ratio: float = 1.2, area: int = 1):
1723
+ """Initialize an Area-attention block module.
1766
1724
 
1767
1725
  Args:
1768
1726
  dim (int): Number of input channels.
@@ -1778,9 +1736,8 @@ class ABlock(nn.Module):
1778
1736
 
1779
1737
  self.apply(self._init_weights)
1780
1738
 
1781
- def _init_weights(self, m):
1782
- """
1783
- Initialize weights using a truncated normal distribution.
1739
+ def _init_weights(self, m: nn.Module):
1740
+ """Initialize weights using a truncated normal distribution.
1784
1741
 
1785
1742
  Args:
1786
1743
  m (nn.Module): Module to initialize.
@@ -1790,9 +1747,8 @@ class ABlock(nn.Module):
1790
1747
  if m.bias is not None:
1791
1748
  nn.init.constant_(m.bias, 0)
1792
1749
 
1793
- def forward(self, x):
1794
- """
1795
- Forward pass through ABlock.
1750
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1751
+ """Forward pass through ABlock.
1796
1752
 
1797
1753
  Args:
1798
1754
  x (torch.Tensor): Input tensor.
@@ -1805,8 +1761,7 @@ class ABlock(nn.Module):
1805
1761
 
1806
1762
 
1807
1763
  class A2C2f(nn.Module):
1808
- """
1809
- Area-Attention C2f module for enhanced feature extraction with area-based attention mechanisms.
1764
+ """Area-Attention C2f module for enhanced feature extraction with area-based attention mechanisms.
1810
1765
 
1811
1766
  This module extends the C2f architecture by incorporating area-attention and ABlock layers for improved feature
1812
1767
  processing. It supports both area-attention and standard convolution modes.
@@ -1828,9 +1783,20 @@ class A2C2f(nn.Module):
1828
1783
  torch.Size([1, 512, 32, 32])
1829
1784
  """
1830
1785
 
1831
- def __init__(self, c1, c2, n=1, a2=True, area=1, residual=False, mlp_ratio=2.0, e=0.5, g=1, shortcut=True):
1832
- """
1833
- Initialize Area-Attention C2f module.
1786
+ def __init__(
1787
+ self,
1788
+ c1: int,
1789
+ c2: int,
1790
+ n: int = 1,
1791
+ a2: bool = True,
1792
+ area: int = 1,
1793
+ residual: bool = False,
1794
+ mlp_ratio: float = 2.0,
1795
+ e: float = 0.5,
1796
+ g: int = 1,
1797
+ shortcut: bool = True,
1798
+ ):
1799
+ """Initialize Area-Attention C2f module.
1834
1800
 
1835
1801
  Args:
1836
1802
  c1 (int): Number of input channels.
@@ -1859,9 +1825,8 @@ class A2C2f(nn.Module):
1859
1825
  for _ in range(n)
1860
1826
  )
1861
1827
 
1862
- def forward(self, x):
1863
- """
1864
- Forward pass through A2C2f layer.
1828
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1829
+ """Forward pass through A2C2f layer.
1865
1830
 
1866
1831
  Args:
1867
1832
  x (torch.Tensor): Input tensor.
@@ -1873,20 +1838,26 @@ class A2C2f(nn.Module):
1873
1838
  y.extend(m(y[-1]) for m in self.m)
1874
1839
  y = self.cv2(torch.cat(y, 1))
1875
1840
  if self.gamma is not None:
1876
- return x + self.gamma.view(-1, len(self.gamma), 1, 1) * y
1841
+ return x + self.gamma.view(-1, self.gamma.shape[0], 1, 1) * y
1877
1842
  return y
1878
1843
 
1879
1844
 
1880
1845
  class SwiGLUFFN(nn.Module):
1881
1846
  """SwiGLU Feed-Forward Network for transformer-based architectures."""
1882
1847
 
1883
- def __init__(self, gc, ec, e=4) -> None:
1884
- """Initialize SwiGLU FFN with input dimension, output dimension, and expansion factor."""
1848
+ def __init__(self, gc: int, ec: int, e: int = 4) -> None:
1849
+ """Initialize SwiGLU FFN with input dimension, output dimension, and expansion factor.
1850
+
1851
+ Args:
1852
+ gc (int): Guide channels.
1853
+ ec (int): Embedding channels.
1854
+ e (int): Expansion factor.
1855
+ """
1885
1856
  super().__init__()
1886
1857
  self.w12 = nn.Linear(gc, e * ec)
1887
1858
  self.w3 = nn.Linear(e * ec // 2, ec)
1888
1859
 
1889
- def forward(self, x):
1860
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1890
1861
  """Apply SwiGLU transformation to input features."""
1891
1862
  x12 = self.w12(x)
1892
1863
  x1, x2 = x12.chunk(2, dim=-1)
@@ -1897,8 +1868,12 @@ class SwiGLUFFN(nn.Module):
1897
1868
  class Residual(nn.Module):
1898
1869
  """Residual connection wrapper for neural network modules."""
1899
1870
 
1900
- def __init__(self, m) -> None:
1901
- """Initialize residual module with the wrapped module."""
1871
+ def __init__(self, m: nn.Module) -> None:
1872
+ """Initialize residual module with the wrapped module.
1873
+
1874
+ Args:
1875
+ m (nn.Module): Module to wrap with residual connection.
1876
+ """
1902
1877
  super().__init__()
1903
1878
  self.m = m
1904
1879
  nn.init.zeros_(self.m.w3.bias)
@@ -1906,7 +1881,7 @@ class Residual(nn.Module):
1906
1881
  # nn.init.constant_(self.m.w3.weight, 1e-6)
1907
1882
  nn.init.zeros_(self.m.w3.weight)
1908
1883
 
1909
- def forward(self, x):
1884
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1910
1885
  """Apply residual connection to input features."""
1911
1886
  return x + self.m(x)
1912
1887
 
@@ -1914,8 +1889,14 @@ class Residual(nn.Module):
1914
1889
  class SAVPE(nn.Module):
1915
1890
  """Spatial-Aware Visual Prompt Embedding module for feature enhancement."""
1916
1891
 
1917
- def __init__(self, ch, c3, embed):
1918
- """Initialize SAVPE module with channels, intermediate channels, and embedding dimension."""
1892
+ def __init__(self, ch: list[int], c3: int, embed: int):
1893
+ """Initialize SAVPE module with channels, intermediate channels, and embedding dimension.
1894
+
1895
+ Args:
1896
+ ch (list[int]): List of input channel dimensions.
1897
+ c3 (int): Intermediate channels.
1898
+ embed (int): Embedding dimension.
1899
+ """
1919
1900
  super().__init__()
1920
1901
  self.cv1 = nn.ModuleList(
1921
1902
  nn.Sequential(
@@ -1935,7 +1916,7 @@ class SAVPE(nn.Module):
1935
1916
  self.cv5 = nn.Conv2d(1, self.c, 3, padding=1)
1936
1917
  self.cv6 = nn.Sequential(Conv(2 * self.c, self.c, 3), nn.Conv2d(self.c, self.c, 3, padding=1))
1937
1918
 
1938
- def forward(self, x, vp):
1919
+ def forward(self, x: list[torch.Tensor], vp: torch.Tensor) -> torch.Tensor:
1939
1920
  """Process input features and visual prompts to generate enhanced embeddings."""
1940
1921
  y = [self.cv2[i](xi) for i, xi in enumerate(x)]
1941
1922
  y = self.cv4(torch.cat(y, dim=1))
@@ -1958,9 +1939,7 @@ class SAVPE(nn.Module):
1958
1939
  vp = vp.reshape(B, Q, 1, -1)
1959
1940
 
1960
1941
  score = y * vp + torch.logical_not(vp) * torch.finfo(y.dtype).min
1961
-
1962
- score = F.softmax(score, dim=-1, dtype=torch.float).to(score.dtype)
1963
-
1942
+ score = F.softmax(score, dim=-1).to(y.dtype)
1964
1943
  aggregated = score.transpose(-2, -3) @ x.reshape(B, self.c, C // self.c, -1).transpose(-1, -2)
1965
1944
 
1966
1945
  return F.normalize(aggregated.transpose(-2, -3).reshape(B, Q, -1), dim=-1, p=2)