dgenerate-ultralytics-headless 8.3.253__py3-none-any.whl → 8.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.1.dist-info}/METADATA +31 -39
  2. {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.1.dist-info}/RECORD +61 -50
  3. tests/__init__.py +2 -2
  4. tests/conftest.py +1 -1
  5. tests/test_cuda.py +8 -2
  6. tests/test_engine.py +8 -8
  7. tests/test_exports.py +13 -4
  8. tests/test_integrations.py +9 -9
  9. tests/test_python.py +14 -14
  10. tests/test_solutions.py +3 -3
  11. ultralytics/__init__.py +1 -1
  12. ultralytics/cfg/__init__.py +6 -6
  13. ultralytics/cfg/default.yaml +3 -1
  14. ultralytics/cfg/models/26/yolo26-cls.yaml +33 -0
  15. ultralytics/cfg/models/26/yolo26-obb.yaml +52 -0
  16. ultralytics/cfg/models/26/yolo26-p2.yaml +60 -0
  17. ultralytics/cfg/models/26/yolo26-p6.yaml +62 -0
  18. ultralytics/cfg/models/26/yolo26-pose.yaml +53 -0
  19. ultralytics/cfg/models/26/yolo26-seg.yaml +52 -0
  20. ultralytics/cfg/models/26/yolo26.yaml +52 -0
  21. ultralytics/cfg/models/26/yoloe-26-seg.yaml +53 -0
  22. ultralytics/cfg/models/26/yoloe-26.yaml +53 -0
  23. ultralytics/data/augment.py +7 -0
  24. ultralytics/data/dataset.py +1 -1
  25. ultralytics/engine/exporter.py +9 -4
  26. ultralytics/engine/model.py +1 -1
  27. ultralytics/engine/trainer.py +40 -15
  28. ultralytics/engine/tuner.py +15 -7
  29. ultralytics/models/fastsam/predict.py +1 -1
  30. ultralytics/models/yolo/detect/train.py +3 -2
  31. ultralytics/models/yolo/detect/val.py +6 -0
  32. ultralytics/models/yolo/model.py +1 -1
  33. ultralytics/models/yolo/obb/predict.py +1 -1
  34. ultralytics/models/yolo/obb/train.py +1 -1
  35. ultralytics/models/yolo/pose/train.py +1 -1
  36. ultralytics/models/yolo/segment/predict.py +1 -1
  37. ultralytics/models/yolo/segment/train.py +1 -1
  38. ultralytics/models/yolo/segment/val.py +3 -1
  39. ultralytics/models/yolo/yoloe/train.py +6 -1
  40. ultralytics/models/yolo/yoloe/train_seg.py +6 -1
  41. ultralytics/nn/autobackend.py +3 -3
  42. ultralytics/nn/modules/__init__.py +8 -0
  43. ultralytics/nn/modules/block.py +128 -8
  44. ultralytics/nn/modules/head.py +789 -204
  45. ultralytics/nn/tasks.py +74 -29
  46. ultralytics/nn/text_model.py +5 -2
  47. ultralytics/optim/__init__.py +5 -0
  48. ultralytics/optim/muon.py +338 -0
  49. ultralytics/utils/callbacks/platform.py +9 -7
  50. ultralytics/utils/downloads.py +3 -1
  51. ultralytics/utils/export/engine.py +19 -10
  52. ultralytics/utils/export/imx.py +22 -11
  53. ultralytics/utils/export/tensorflow.py +21 -21
  54. ultralytics/utils/loss.py +587 -203
  55. ultralytics/utils/metrics.py +1 -0
  56. ultralytics/utils/ops.py +11 -2
  57. ultralytics/utils/tal.py +98 -19
  58. {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.1.dist-info}/WHEEL +0 -0
  59. {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.1.dist-info}/entry_points.txt +0 -0
  60. {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.1.dist-info}/licenses/LICENSE +0 -0
  61. {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.1.dist-info}/top_level.txt +0 -0
@@ -208,28 +208,33 @@ class SPP(nn.Module):
208
208
  class SPPF(nn.Module):
209
209
  """Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""
210
210
 
211
- def __init__(self, c1: int, c2: int, k: int = 5):
211
+ def __init__(self, c1: int, c2: int, k: int = 5, n: int = 3, shortcut: bool = False):
212
212
  """Initialize the SPPF layer with given input/output channels and kernel size.
213
213
 
214
214
  Args:
215
215
  c1 (int): Input channels.
216
216
  c2 (int): Output channels.
217
217
  k (int): Kernel size.
218
+ n (int): Number of pooling iterations.
219
+ shortcut (bool): Whether to use shortcut connection.
218
220
 
219
221
  Notes:
220
222
  This module is equivalent to SPP(k=(5, 9, 13)).
221
223
  """
222
224
  super().__init__()
223
225
  c_ = c1 // 2 # hidden channels
224
- self.cv1 = Conv(c1, c_, 1, 1)
225
- self.cv2 = Conv(c_ * 4, c2, 1, 1)
226
+ self.cv1 = Conv(c1, c_, 1, 1, act=False)
227
+ self.cv2 = Conv(c_ * (n + 1), c2, 1, 1)
226
228
  self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
229
+ self.n = n
230
+ self.add = shortcut and c1 == c2
227
231
 
228
232
  def forward(self, x: torch.Tensor) -> torch.Tensor:
229
233
  """Apply sequential pooling operations to input and return concatenated feature maps."""
230
234
  y = [self.cv1(x)]
231
- y.extend(self.m(y[-1]) for _ in range(3))
232
- return self.cv2(torch.cat(y, 1))
235
+ y.extend(self.m(y[-1]) for _ in range(getattr(self, "n", 3)))
236
+ y = self.cv2(torch.cat(y, 1))
237
+ return y + x if getattr(self, "add", False) else y
233
238
 
234
239
 
235
240
  class C1(nn.Module):
@@ -1065,7 +1070,15 @@ class C3k2(C2f):
1065
1070
  """Faster Implementation of CSP Bottleneck with 2 convolutions."""
1066
1071
 
1067
1072
  def __init__(
1068
- self, c1: int, c2: int, n: int = 1, c3k: bool = False, e: float = 0.5, g: int = 1, shortcut: bool = True
1073
+ self,
1074
+ c1: int,
1075
+ c2: int,
1076
+ n: int = 1,
1077
+ c3k: bool = False,
1078
+ e: float = 0.5,
1079
+ attn: bool = False,
1080
+ g: int = 1,
1081
+ shortcut: bool = True,
1069
1082
  ):
1070
1083
  """Initialize C3k2 module.
1071
1084
 
@@ -1075,12 +1088,21 @@ class C3k2(C2f):
1075
1088
  n (int): Number of blocks.
1076
1089
  c3k (bool): Whether to use C3k blocks.
1077
1090
  e (float): Expansion ratio.
1091
+ attn (bool): Whether to use attention blocks.
1078
1092
  g (int): Groups for convolutions.
1079
1093
  shortcut (bool): Whether to use shortcut connections.
1080
1094
  """
1081
1095
  super().__init__(c1, c2, n, shortcut, g, e)
1082
1096
  self.m = nn.ModuleList(
1083
- C3k(self.c, self.c, 2, shortcut, g) if c3k else Bottleneck(self.c, self.c, shortcut, g) for _ in range(n)
1097
+ nn.Sequential(
1098
+ Bottleneck(self.c, self.c, shortcut, g),
1099
+ PSABlock(self.c, attn_ratio=0.5, num_heads=max(self.c // 64, 1)),
1100
+ )
1101
+ if attn
1102
+ else C3k(self.c, self.c, 2, shortcut, g)
1103
+ if c3k
1104
+ else Bottleneck(self.c, self.c, shortcut, g)
1105
+ for _ in range(n)
1084
1106
  )
1085
1107
 
1086
1108
 
@@ -1148,6 +1170,8 @@ class RepVGGDW(torch.nn.Module):
1148
1170
 
1149
1171
  This method fuses the convolutional layers and updates the weights and biases accordingly.
1150
1172
  """
1173
+ if not hasattr(self, "conv1"):
1174
+ return # already fused
1151
1175
  conv = fuse_conv_and_bn(self.conv.conv, self.conv.bn)
1152
1176
  conv1 = fuse_conv_and_bn(self.conv1.conv, self.conv1.bn)
1153
1177
 
@@ -1391,7 +1415,7 @@ class PSA(nn.Module):
1391
1415
  self.cv1 = Conv(c1, 2 * self.c, 1, 1)
1392
1416
  self.cv2 = Conv(2 * self.c, c1, 1)
1393
1417
 
1394
- self.attn = Attention(self.c, attn_ratio=0.5, num_heads=self.c // 64)
1418
+ self.attn = Attention(self.c, attn_ratio=0.5, num_heads=max(self.c // 64, 1))
1395
1419
  self.ffn = nn.Sequential(Conv(self.c, self.c * 2, 1), Conv(self.c * 2, self.c, 1, act=False))
1396
1420
 
1397
1421
  def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -1945,3 +1969,99 @@ class SAVPE(nn.Module):
1945
1969
  aggregated = score.transpose(-2, -3) @ x.reshape(B, self.c, C // self.c, -1).transpose(-1, -2)
1946
1970
 
1947
1971
  return F.normalize(aggregated.transpose(-2, -3).reshape(B, Q, -1), dim=-1, p=2)
1972
+
1973
+
1974
+ class Proto26(Proto):
1975
+ """Ultralytics YOLO26 models mask Proto module for segmentation models."""
1976
+
1977
+ def __init__(self, ch: tuple = (), c_: int = 256, c2: int = 32, nc: int = 80):
1978
+ """Initialize the Ultralytics YOLO models mask Proto module with specified number of protos and masks.
1979
+
1980
+ Args:
1981
+ ch (tuple): Tuple of channel sizes from backbone feature maps.
1982
+ c_ (int): Intermediate channels.
1983
+ c2 (int): Output channels (number of protos).
1984
+ nc (int): Number of classes for semantic segmentation.
1985
+ """
1986
+ super().__init__(c_, c_, c2)
1987
+ self.feat_refine = nn.ModuleList(Conv(x, ch[0], k=1) for x in ch[1:])
1988
+ self.feat_fuse = Conv(ch[0], c_, k=3)
1989
+ self.semseg = nn.Sequential(Conv(ch[0], c_, k=3), Conv(c_, c_, k=3), nn.Conv2d(c_, nc, 1))
1990
+
1991
+ def forward(self, x: torch.Tensor, return_semseg: bool = True) -> torch.Tensor:
1992
+ """Perform a forward pass through layers using an upsampled input image."""
1993
+ feat = x[0]
1994
+ for i, f in enumerate(self.feat_refine):
1995
+ up_feat = f(x[i + 1])
1996
+ up_feat = F.interpolate(up_feat, size=feat.shape[2:], mode="nearest")
1997
+ feat = feat + up_feat
1998
+ p = super().forward(self.feat_fuse(feat))
1999
+ if self.training and return_semseg:
2000
+ semseg = self.semseg(feat)
2001
+ return (p, semseg)
2002
+ return p
2003
+
2004
+ def fuse(self):
2005
+ """Fuse the model for inference by removing the semantic segmentation head."""
2006
+ self.semseg = None
2007
+
2008
+
2009
+ class RealNVP(nn.Module):
2010
+ """RealNVP: a flow-based generative model.
2011
+
2012
+ References:
2013
+ https://arxiv.org/abs/1605.08803
2014
+ https://github.com/open-mmlab/mmpose/blob/main/mmpose/models/utils/realnvp.py
2015
+ """
2016
+
2017
+ @staticmethod
2018
+ def nets():
2019
+ """Get the scale model in a single invertable mapping."""
2020
+ return nn.Sequential(nn.Linear(2, 64), nn.SiLU(), nn.Linear(64, 64), nn.SiLU(), nn.Linear(64, 2), nn.Tanh())
2021
+
2022
+ @staticmethod
2023
+ def nett():
2024
+ """Get the translation model in a single invertable mapping."""
2025
+ return nn.Sequential(nn.Linear(2, 64), nn.SiLU(), nn.Linear(64, 64), nn.SiLU(), nn.Linear(64, 2))
2026
+
2027
+ @property
2028
+ def prior(self):
2029
+ """The prior distribution."""
2030
+ return torch.distributions.MultivariateNormal(self.loc, self.cov)
2031
+
2032
+ def __init__(self):
2033
+ super().__init__()
2034
+
2035
+ self.register_buffer("loc", torch.zeros(2))
2036
+ self.register_buffer("cov", torch.eye(2))
2037
+ self.register_buffer("mask", torch.tensor([[0, 1], [1, 0]] * 3, dtype=torch.float32))
2038
+
2039
+ self.s = torch.nn.ModuleList([self.nets() for _ in range(len(self.mask))])
2040
+ self.t = torch.nn.ModuleList([self.nett() for _ in range(len(self.mask))])
2041
+ self.init_weights()
2042
+
2043
+ def init_weights(self):
2044
+ """Initialization model weights."""
2045
+ for m in self.modules():
2046
+ if isinstance(m, nn.Linear):
2047
+ nn.init.xavier_uniform_(m.weight, gain=0.01)
2048
+
2049
+ def backward_p(self, x):
2050
+ """Apply mapping form the data space to the latent space and calculate the log determinant of the Jacobian
2051
+ matrix.
2052
+ """
2053
+ log_det_jacob, z = x.new_zeros(x.shape[0]), x
2054
+ for i in reversed(range(len(self.t))):
2055
+ z_ = self.mask[i] * z
2056
+ s = self.s[i](z_) * (1 - self.mask[i])
2057
+ t = self.t[i](z_) * (1 - self.mask[i])
2058
+ z = (1 - self.mask[i]) * (z - t) * torch.exp(-s) + z_
2059
+ log_det_jacob -= s.sum(dim=1)
2060
+ return z, log_det_jacob
2061
+
2062
+ def log_prob(self, x):
2063
+ """Calculate the log probability of given sample in data space."""
2064
+ if x.dtype == torch.float32 and self.s[0][0].weight.dtype != torch.float32:
2065
+ self.float()
2066
+ z, log_det = self.backward_p(x)
2067
+ return self.prior.log_prob(z) + log_det