dgenerate-ultralytics-headless 8.3.248__py3-none-any.whl → 8.4.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. {dgenerate_ultralytics_headless-8.3.248.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/METADATA +52 -61
  2. {dgenerate_ultralytics_headless-8.3.248.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/RECORD +97 -84
  3. {dgenerate_ultralytics_headless-8.3.248.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/WHEEL +1 -1
  4. tests/__init__.py +2 -2
  5. tests/conftest.py +1 -1
  6. tests/test_cuda.py +8 -2
  7. tests/test_engine.py +8 -8
  8. tests/test_exports.py +11 -4
  9. tests/test_integrations.py +9 -9
  10. tests/test_python.py +41 -16
  11. tests/test_solutions.py +3 -3
  12. ultralytics/__init__.py +1 -1
  13. ultralytics/cfg/__init__.py +31 -31
  14. ultralytics/cfg/datasets/TT100K.yaml +346 -0
  15. ultralytics/cfg/datasets/coco12-formats.yaml +101 -0
  16. ultralytics/cfg/default.yaml +3 -1
  17. ultralytics/cfg/models/26/yolo26-cls.yaml +33 -0
  18. ultralytics/cfg/models/26/yolo26-obb.yaml +52 -0
  19. ultralytics/cfg/models/26/yolo26-p2.yaml +60 -0
  20. ultralytics/cfg/models/26/yolo26-p6.yaml +62 -0
  21. ultralytics/cfg/models/26/yolo26-pose.yaml +53 -0
  22. ultralytics/cfg/models/26/yolo26-seg.yaml +52 -0
  23. ultralytics/cfg/models/26/yolo26.yaml +52 -0
  24. ultralytics/cfg/models/26/yoloe-26-seg.yaml +53 -0
  25. ultralytics/cfg/models/26/yoloe-26.yaml +53 -0
  26. ultralytics/data/annotator.py +2 -2
  27. ultralytics/data/augment.py +15 -0
  28. ultralytics/data/converter.py +76 -45
  29. ultralytics/data/dataset.py +1 -1
  30. ultralytics/data/utils.py +2 -2
  31. ultralytics/engine/exporter.py +34 -28
  32. ultralytics/engine/model.py +38 -37
  33. ultralytics/engine/predictor.py +17 -17
  34. ultralytics/engine/results.py +22 -15
  35. ultralytics/engine/trainer.py +83 -48
  36. ultralytics/engine/tuner.py +20 -11
  37. ultralytics/engine/validator.py +16 -16
  38. ultralytics/models/fastsam/predict.py +1 -1
  39. ultralytics/models/yolo/classify/predict.py +1 -1
  40. ultralytics/models/yolo/classify/train.py +1 -1
  41. ultralytics/models/yolo/classify/val.py +1 -1
  42. ultralytics/models/yolo/detect/predict.py +2 -2
  43. ultralytics/models/yolo/detect/train.py +6 -3
  44. ultralytics/models/yolo/detect/val.py +7 -1
  45. ultralytics/models/yolo/model.py +8 -8
  46. ultralytics/models/yolo/obb/predict.py +2 -2
  47. ultralytics/models/yolo/obb/train.py +3 -3
  48. ultralytics/models/yolo/obb/val.py +1 -1
  49. ultralytics/models/yolo/pose/predict.py +1 -1
  50. ultralytics/models/yolo/pose/train.py +3 -1
  51. ultralytics/models/yolo/pose/val.py +1 -1
  52. ultralytics/models/yolo/segment/predict.py +3 -3
  53. ultralytics/models/yolo/segment/train.py +4 -4
  54. ultralytics/models/yolo/segment/val.py +2 -2
  55. ultralytics/models/yolo/yoloe/train.py +6 -1
  56. ultralytics/models/yolo/yoloe/train_seg.py +6 -1
  57. ultralytics/nn/autobackend.py +14 -8
  58. ultralytics/nn/modules/__init__.py +8 -0
  59. ultralytics/nn/modules/block.py +128 -8
  60. ultralytics/nn/modules/head.py +788 -203
  61. ultralytics/nn/tasks.py +86 -41
  62. ultralytics/nn/text_model.py +5 -2
  63. ultralytics/optim/__init__.py +5 -0
  64. ultralytics/optim/muon.py +338 -0
  65. ultralytics/solutions/ai_gym.py +3 -3
  66. ultralytics/solutions/config.py +1 -1
  67. ultralytics/solutions/heatmap.py +1 -1
  68. ultralytics/solutions/instance_segmentation.py +2 -2
  69. ultralytics/solutions/object_counter.py +1 -1
  70. ultralytics/solutions/parking_management.py +1 -1
  71. ultralytics/solutions/solutions.py +2 -2
  72. ultralytics/trackers/byte_tracker.py +7 -7
  73. ultralytics/trackers/track.py +1 -1
  74. ultralytics/utils/__init__.py +8 -8
  75. ultralytics/utils/benchmarks.py +26 -26
  76. ultralytics/utils/callbacks/platform.py +173 -64
  77. ultralytics/utils/callbacks/tensorboard.py +2 -0
  78. ultralytics/utils/callbacks/wb.py +6 -1
  79. ultralytics/utils/checks.py +28 -9
  80. ultralytics/utils/dist.py +1 -0
  81. ultralytics/utils/downloads.py +5 -3
  82. ultralytics/utils/export/engine.py +19 -10
  83. ultralytics/utils/export/imx.py +38 -20
  84. ultralytics/utils/export/tensorflow.py +21 -21
  85. ultralytics/utils/files.py +2 -2
  86. ultralytics/utils/loss.py +597 -203
  87. ultralytics/utils/metrics.py +2 -1
  88. ultralytics/utils/ops.py +11 -2
  89. ultralytics/utils/patches.py +42 -0
  90. ultralytics/utils/plotting.py +3 -0
  91. ultralytics/utils/tal.py +100 -20
  92. ultralytics/utils/torch_utils.py +1 -1
  93. ultralytics/utils/tqdm.py +4 -1
  94. ultralytics/utils/tuner.py +2 -5
  95. {dgenerate_ultralytics_headless-8.3.248.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/entry_points.txt +0 -0
  96. {dgenerate_ultralytics_headless-8.3.248.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/licenses/LICENSE +0 -0
  97. {dgenerate_ultralytics_headless-8.3.248.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,53 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLO26-pose keypoints/pose estimation model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/pose
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
12
+ scales: # model compound scaling constants, i.e. 'model=yolo26n-pose.yaml' will call yolo26-pose.yaml with scale 'n'
13
+ # [depth, width, max_channels]
14
+ n: [0.50, 0.25, 1024] # summary: 363 layers, 3,747,554 parameters, 3,747,554 gradients, 10.7 GFLOPs
15
+ s: [0.50, 0.50, 1024] # summary: 363 layers, 11,870,498 parameters, 11,870,498 gradients, 29.6 GFLOPs
16
+ m: [0.50, 1.00, 512] # summary: 383 layers, 24,344,482 parameters, 24,344,482 gradients, 85.9 GFLOPs
17
+ l: [1.00, 1.00, 512] # summary: 495 layers, 28,747,938 parameters, 28,747,938 gradients, 104.3 GFLOPs
18
+ x: [1.00, 1.50, 512] # summary: 495 layers, 62,914,350 parameters, 62,914,350 gradients, 226.3 GFLOPs
19
+
20
+ # YOLO26n backbone
21
+ backbone:
22
+ # [from, repeats, module, args]
23
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
24
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
25
+ - [-1, 2, C3k2, [256, False, 0.25]]
26
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
27
+ - [-1, 2, C3k2, [512, False, 0.25]]
28
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
29
+ - [-1, 2, C3k2, [512, True]]
30
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
31
+ - [-1, 2, C3k2, [1024, True]]
32
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
33
+ - [-1, 2, C2PSA, [1024]] # 10
34
+
35
+ # YOLO26n head
36
+ head:
37
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
38
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
39
+ - [-1, 2, C3k2, [512, True]] # 13
40
+
41
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
43
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
44
+
45
+ - [-1, 1, Conv, [256, 3, 2]]
46
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
47
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
48
+
49
+ - [-1, 1, Conv, [512, 3, 2]]
50
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
51
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
52
+
53
+ - [[16, 19, 22], 1, Pose26, [nc, kpt_shape]] # Detect(P3, P4, P5)
@@ -0,0 +1,52 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLO26-seg instance segmentation model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/segment
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ scales: # model compound scaling constants, i.e. 'model=yolo26n-seg.yaml' will call yolo26-seg.yaml with scale 'n'
12
+ # [depth, width, max_channels]
13
+ n: [0.50, 0.25, 1024] # summary: 309 layers, 3,126,280 parameters, 3,126,280 gradients, 10.5 GFLOPs
14
+ s: [0.50, 0.50, 1024] # summary: 309 layers, 11,505,800 parameters, 11,505,800 gradients, 37.4 GFLOPs
15
+ m: [0.50, 1.00, 512] # summary: 329 layers, 27,112,072 parameters, 27,112,072 gradients, 132.5 GFLOPs
16
+ l: [1.00, 1.00, 512] # summary: 441 layers, 31,515,528 parameters, 31,515,528 gradients, 150.9 GFLOPs
17
+ x: [1.00, 1.50, 512] # summary: 441 layers, 70,693,800 parameters, 70,693,800 gradients, 337.7 GFLOPs
18
+
19
+ # YOLO26n backbone
20
+ backbone:
21
+ # [from, repeats, module, args]
22
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
23
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
24
+ - [-1, 2, C3k2, [256, False, 0.25]]
25
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
26
+ - [-1, 2, C3k2, [512, False, 0.25]]
27
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
28
+ - [-1, 2, C3k2, [512, True]]
29
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
30
+ - [-1, 2, C3k2, [1024, True]]
31
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
32
+ - [-1, 2, C2PSA, [1024]] # 10
33
+
34
+ # YOLO26n head
35
+ head:
36
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
38
+ - [-1, 2, C3k2, [512, True]] # 13
39
+
40
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
41
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
42
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
43
+
44
+ - [-1, 1, Conv, [256, 3, 2]]
45
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
46
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
47
+
48
+ - [-1, 1, Conv, [512, 3, 2]]
49
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
50
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
51
+
52
+ - [[16, 19, 22], 1, Segment26, [nc, 32, 256]] # Segment(P3, P4, P5)
@@ -0,0 +1,52 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLO26 object detection model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/detect
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ scales: # model compound scaling constants, i.e. 'model=yolo26n.yaml' will call yolo26.yaml with scale 'n'
12
+ # [depth, width, max_channels]
13
+ n: [0.50, 0.25, 1024] # summary: 260 layers, 2,572,280 parameters, 2,572,280 gradients, 6.1 GFLOPs
14
+ s: [0.50, 0.50, 1024] # summary: 260 layers, 10,009,784 parameters, 10,009,784 gradients, 22.8 GFLOPs
15
+ m: [0.50, 1.00, 512] # summary: 280 layers, 21,896,248 parameters, 21,896,248 gradients, 75.4 GFLOPs
16
+ l: [1.00, 1.00, 512] # summary: 392 layers, 26,299,704 parameters, 26,299,704 gradients, 93.8 GFLOPs
17
+ x: [1.00, 1.50, 512] # summary: 392 layers, 58,993,368 parameters, 58,993,368 gradients, 209.5 GFLOPs
18
+
19
+ # YOLO26n backbone
20
+ backbone:
21
+ # [from, repeats, module, args]
22
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
23
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
24
+ - [-1, 2, C3k2, [256, False, 0.25]]
25
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
26
+ - [-1, 2, C3k2, [512, False, 0.25]]
27
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
28
+ - [-1, 2, C3k2, [512, True]]
29
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
30
+ - [-1, 2, C3k2, [1024, True]]
31
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
32
+ - [-1, 2, C2PSA, [1024]] # 10
33
+
34
+ # YOLO26n head
35
+ head:
36
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
38
+ - [-1, 2, C3k2, [512, True]] # 13
39
+
40
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
41
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
42
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
43
+
44
+ - [-1, 1, Conv, [256, 3, 2]]
45
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
46
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
47
+
48
+ - [-1, 1, Conv, [512, 3, 2]]
49
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
50
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
51
+
52
+ - [[16, 19, 22], 1, Detect, [nc]] # Detect(P3, P4, P5)
@@ -0,0 +1,53 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLOE-26 open-vocabulary instance segmentation model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/segment
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ text_model: mobileclip2:b
12
+ scales: # model compound scaling constants, i.e. 'model=yoloe-26n-seg.yaml' will call yoloe-26-seg.yaml with scale 'n'
13
+ # [depth, width, max_channels]
14
+ n: [0.50, 0.25, 1024] # summary: 347 layers, 5,615,540 parameters, 5,615,540 gradients, 11.7 GFLOPs
15
+ s: [0.50, 0.50, 1024] # summary: 347 layers, 15,272,852 parameters, 15,272,852 gradients, 39.3 GFLOPs
16
+ m: [0.50, 1.00, 512] # summary: 367 layers, 34,922,132 parameters, 34,922,132 gradients, 136.3 GFLOPs
17
+ l: [1.00, 1.00, 512] # summary: 479 layers, 39,325,588 parameters, 39,325,588 gradients, 154.7 GFLOPs
18
+ x: [1.00, 1.50, 512] # summary: 479 layers, 85,397,684 parameters, 85,397,684 gradients, 343.3 GFLOPs
19
+
20
+ # YOLOE26n backbone
21
+ backbone:
22
+ # [from, repeats, module, args]
23
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
24
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
25
+ - [-1, 2, C3k2, [256, False, 0.25]]
26
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
27
+ - [-1, 2, C3k2, [512, False, 0.25]]
28
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
29
+ - [-1, 2, C3k2, [512, True]]
30
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
31
+ - [-1, 2, C3k2, [1024, True]]
32
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
33
+ - [-1, 2, C2PSA, [1024]] # 10
34
+
35
+ # YOLOE26n head
36
+ head:
37
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
38
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
39
+ - [-1, 2, C3k2, [512, True]] # 13
40
+
41
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
43
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
44
+
45
+ - [-1, 1, Conv, [256, 3, 2]]
46
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
47
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
48
+
49
+ - [-1, 1, Conv, [512, 3, 2]]
50
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
51
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
52
+
53
+ - [[16, 19, 22], 1, YOLOESegment26, [nc, 32, 256, 512, True]] # YOLOESegment26(P3, P4, P5)
@@ -0,0 +1,53 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLOE-26 open-vocabulary object detection model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/detect
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ text_model: mobileclip2:b
12
+ scales: # model compound scaling constants, i.e. 'model=yoloe-26n.yaml' will call yoloe-26.yaml with scale 'n'
13
+ # [depth, width, max_channels]
14
+ n: [0.50, 0.25, 1024] # summary: 298 layers, 5,061,540 parameters, 5,061,540 gradients, 7.3 GFLOPs
15
+ s: [0.50, 0.50, 1024] # summary: 298 layers, 13,776,836 parameters, 13,776,836 gradients, 24.8 GFLOPs
16
+ m: [0.50, 1.00, 512] # summary: 318 layers, 29,706,308 parameters, 29,706,308 gradients, 79.2 GFLOPs
17
+ l: [1.00, 1.00, 512] # summary: 430 layers, 34,109,764 parameters, 34,109,764 gradients, 97.6 GFLOPs
18
+ x: [1.00, 1.50, 512] # summary: 430 layers, 73,697,252 parameters, 73,697,252 gradients, 215.2 GFLOPs
19
+
20
+ # YOLOE26n backbone
21
+ backbone:
22
+ # [from, repeats, module, args]
23
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
24
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
25
+ - [-1, 2, C3k2, [256, False, 0.25]]
26
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
27
+ - [-1, 2, C3k2, [512, False, 0.25]]
28
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
29
+ - [-1, 2, C3k2, [512, True]]
30
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
31
+ - [-1, 2, C3k2, [1024, True]]
32
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
33
+ - [-1, 2, C2PSA, [1024]] # 10
34
+
35
+ # YOLOE26n head
36
+ head:
37
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
38
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
39
+ - [-1, 2, C3k2, [512, True]] # 13
40
+
41
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
43
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
44
+
45
+ - [-1, 1, Conv, [256, 3, 2]]
46
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
47
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
48
+
49
+ - [-1, 1, Conv, [512, 3, 2]]
50
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
51
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
52
+
53
+ - [[16, 19, 22], 1, YOLOEDetect, [nc, 512, True]] # YOLOEDetect(P3, P4, P5)
@@ -9,7 +9,7 @@ from ultralytics import SAM, YOLO
9
9
 
10
10
  def auto_annotate(
11
11
  data: str | Path,
12
- det_model: str = "yolo11x.pt",
12
+ det_model: str = "yolo26x.pt",
13
13
  sam_model: str = "sam_b.pt",
14
14
  device: str = "",
15
15
  conf: float = 0.25,
@@ -39,7 +39,7 @@ def auto_annotate(
39
39
 
40
40
  Examples:
41
41
  >>> from ultralytics.data.annotator import auto_annotate
42
- >>> auto_annotate(data="ultralytics/assets", det_model="yolo11n.pt", sam_model="mobile_sam.pt")
42
+ >>> auto_annotate(data="ultralytics/assets", det_model="yolo26n.pt", sam_model="mobile_sam.pt")
43
43
  """
44
44
  det_model = YOLO(det_model)
45
45
  sam_model = SAM(sam_model)
@@ -2062,11 +2062,26 @@ class Format:
2062
2062
  if nl:
2063
2063
  masks, instances, cls = self._format_segments(instances, cls, w, h)
2064
2064
  masks = torch.from_numpy(masks)
2065
+ cls_tensor = torch.from_numpy(cls.squeeze(1))
2066
+ if self.mask_overlap:
2067
+ sem_masks = cls_tensor[masks[0].long() - 1] # (H, W) from (1, H, W) instance indices
2068
+ else:
2069
+ # Create sem_masks consistent with mask_overlap=True
2070
+ sem_masks = (masks * cls_tensor[:, None, None]).max(0).values # (H, W) from (N, H, W) binary
2071
+ overlap = masks.sum(dim=0) > 1 # (H, W)
2072
+ if overlap.any():
2073
+ weights = masks.sum(axis=(1, 2))
2074
+ weighted_masks = masks * weights[:, None, None] # (N, H, W)
2075
+ weighted_masks[masks == 0] = weights.max() + 1 # handle background
2076
+ smallest_idx = weighted_masks.argmin(dim=0) # (H, W)
2077
+ sem_masks[overlap] = cls_tensor[smallest_idx[overlap]]
2065
2078
  else:
2066
2079
  masks = torch.zeros(
2067
2080
  1 if self.mask_overlap else nl, img.shape[0] // self.mask_ratio, img.shape[1] // self.mask_ratio
2068
2081
  )
2082
+ sem_masks = torch.zeros(img.shape[0] // self.mask_ratio, img.shape[1] // self.mask_ratio)
2069
2083
  labels["masks"] = masks
2084
+ labels["sem_masks"] = sem_masks.float()
2070
2085
  labels["img"] = self._format_img(img)
2071
2086
  labels["cls"] = torch.from_numpy(cls) if nl else torch.zeros(nl, 1)
2072
2087
  labels["bboxes"] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
@@ -15,7 +15,7 @@ import numpy as np
15
15
  from PIL import Image
16
16
 
17
17
  from ultralytics.utils import ASSETS_URL, DATASETS_DIR, LOGGER, NUM_THREADS, TQDM, YAML
18
- from ultralytics.utils.checks import check_file, check_requirements
18
+ from ultralytics.utils.checks import check_file
19
19
  from ultralytics.utils.downloads import download, zip_directory
20
20
  from ultralytics.utils.files import increment_path
21
21
 
@@ -747,14 +747,15 @@ def convert_to_multispectral(path: str | Path, n_channels: int = 10, replace: bo
747
747
 
748
748
 
749
749
  async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Path | None = None) -> Path:
750
- """Convert NDJSON dataset format to Ultralytics YOLO11 dataset structure.
750
+ """Convert NDJSON dataset format to Ultralytics YOLO dataset structure.
751
751
 
752
- This function converts datasets stored in NDJSON (Newline Delimited JSON) format to the standard YOLO format with
753
- separate directories for images and labels. It supports parallel processing for efficient conversion of large
754
- datasets and can download images from URLs if they don't exist locally.
752
+ This function converts datasets stored in NDJSON (Newline Delimited JSON) format to the standard YOLO format. For
753
+ detection/segmentation/pose/obb tasks, it creates separate directories for images and labels. For classification
754
+ tasks, it creates the ImageNet-style {split}/{class_name}/ folder structure. It supports parallel processing for
755
+ efficient conversion of large datasets and can download images from URLs.
755
756
 
756
757
  The NDJSON format consists of:
757
- - First line: Dataset metadata with class names and configuration
758
+ - First line: Dataset metadata with class names, task type, and configuration
758
759
  - Subsequent lines: Individual image records with annotations and optional URLs
759
760
 
760
761
  Args:
@@ -763,7 +764,7 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
763
764
  None, uses the parent directory of the NDJSON file. Defaults to None.
764
765
 
765
766
  Returns:
766
- (Path): Path to the generated data.yaml file that can be used for YOLO training.
767
+ (Path): Path to the generated data.yaml file (detection) or dataset directory (classification).
767
768
 
768
769
  Examples:
769
770
  Convert a local NDJSON file:
@@ -775,9 +776,11 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
775
776
 
776
777
  Use with YOLO training
777
778
  >>> from ultralytics import YOLO
778
- >>> model = YOLO("yolo11n.pt")
779
+ >>> model = YOLO("yolo26n.pt")
779
780
  >>> model.train(data="https://github.com/ultralytics/assets/releases/download/v0.0.0/coco8-ndjson.ndjson")
780
781
  """
782
+ from ultralytics.utils.checks import check_requirements
783
+
781
784
  check_requirements("aiohttp")
782
785
  import aiohttp
783
786
 
@@ -790,50 +793,68 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
790
793
  dataset_dir = output_path / ndjson_path.stem
791
794
  splits = {record["split"] for record in image_records}
792
795
 
793
- # Create directories and prepare YAML structure
794
- dataset_dir.mkdir(parents=True, exist_ok=True)
795
- data_yaml = dict(dataset_record)
796
- data_yaml["names"] = {int(k): v for k, v in dataset_record.get("class_names", {}).items()}
797
- data_yaml.pop("class_names")
796
+ # Check if this is a classification dataset
797
+ is_classification = dataset_record.get("task") == "classify"
798
+ class_names = {int(k): v for k, v in dataset_record.get("class_names", {}).items()}
798
799
 
799
- for split in sorted(splits):
800
- (dataset_dir / "images" / split).mkdir(parents=True, exist_ok=True)
801
- (dataset_dir / "labels" / split).mkdir(parents=True, exist_ok=True)
802
- data_yaml[split] = f"images/{split}"
800
+ # Create base directories
801
+ dataset_dir.mkdir(parents=True, exist_ok=True)
802
+ data_yaml = None
803
+
804
+ if not is_classification:
805
+ # Detection/segmentation/pose/obb: prepare YAML and create base structure
806
+ data_yaml = dict(dataset_record)
807
+ data_yaml["names"] = class_names
808
+ data_yaml.pop("class_names", None)
809
+ data_yaml.pop("type", None) # Remove NDJSON-specific fields
810
+ for split in sorted(splits):
811
+ (dataset_dir / "images" / split).mkdir(parents=True, exist_ok=True)
812
+ (dataset_dir / "labels" / split).mkdir(parents=True, exist_ok=True)
813
+ data_yaml[split] = f"images/{split}"
803
814
 
804
815
  async def process_record(session, semaphore, record):
805
816
  """Process single image record with async session."""
806
817
  async with semaphore:
807
818
  split, original_name = record["split"], record["file"]
808
- label_path = dataset_dir / "labels" / split / f"{Path(original_name).stem}.txt"
809
- image_path = dataset_dir / "images" / split / original_name
810
-
811
819
  annotations = record.get("annotations", {})
812
- lines_to_write = []
813
- for key in annotations.keys():
814
- lines_to_write = [" ".join(map(str, item)) for item in annotations[key]]
815
- break
816
- if "classification" in annotations:
817
- lines_to_write = [str(cls) for cls in annotations["classification"]]
818
-
819
- label_path.write_text("\n".join(lines_to_write) + "\n" if lines_to_write else "")
820
820
 
821
+ if is_classification:
822
+ # Classification: place image in {split}/{class_name}/ folder
823
+ class_ids = annotations.get("classification", [])
824
+ class_id = class_ids[0] if class_ids else 0
825
+ class_name = class_names.get(class_id, str(class_id))
826
+ image_path = dataset_dir / split / class_name / original_name
827
+ else:
828
+ # Detection: write label file and place image in images/{split}/
829
+ image_path = dataset_dir / "images" / split / original_name
830
+ label_path = dataset_dir / "labels" / split / f"{Path(original_name).stem}.txt"
831
+ lines_to_write = []
832
+ for key in annotations.keys():
833
+ lines_to_write = [" ".join(map(str, item)) for item in annotations[key]]
834
+ break
835
+ label_path.write_text("\n".join(lines_to_write) + "\n" if lines_to_write else "")
836
+
837
+ # Download image if URL provided and file doesn't exist
821
838
  if http_url := record.get("url"):
822
839
  if not image_path.exists():
823
- try:
824
- async with session.get(http_url, timeout=aiohttp.ClientTimeout(total=30)) as response:
825
- response.raise_for_status()
826
- with open(image_path, "wb") as f:
827
- async for chunk in response.content.iter_chunked(8192):
828
- f.write(chunk)
829
- return True
830
- except Exception as e:
831
- LOGGER.warning(f"Failed to download {http_url}: {e}")
832
- return False
840
+ image_path.parent.mkdir(parents=True, exist_ok=True)
841
+ # Retry with exponential backoff (3 attempts: 0s, 2s, 4s delays)
842
+ for attempt in range(3):
843
+ try:
844
+ async with session.get(http_url, timeout=aiohttp.ClientTimeout(total=30)) as response:
845
+ response.raise_for_status()
846
+ image_path.write_bytes(await response.read())
847
+ return True
848
+ except Exception as e:
849
+ if attempt < 2: # Don't sleep after last attempt
850
+ await asyncio.sleep(2**attempt) # 1s, 2s backoff
851
+ else:
852
+ LOGGER.warning(f"Failed to download {http_url} after 3 attempts: {e}")
853
+ return False
833
854
  return True
834
855
 
835
- # Process all images with async downloads
836
- semaphore = asyncio.Semaphore(64)
856
+ # Process all images with async downloads (limit connections for small datasets)
857
+ semaphore = asyncio.Semaphore(min(128, len(image_records)))
837
858
  async with aiohttp.ClientSession() as session:
838
859
  pbar = TQDM(
839
860
  total=len(image_records),
@@ -845,11 +866,21 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
845
866
  pbar.update(1)
846
867
  return result
847
868
 
848
- await asyncio.gather(*[tracked_process(record) for record in image_records])
869
+ results = await asyncio.gather(*[tracked_process(record) for record in image_records])
849
870
  pbar.close()
850
871
 
851
- # Write data.yaml
852
- yaml_path = dataset_dir / "data.yaml"
853
- YAML.save(yaml_path, data_yaml)
872
+ # Validate images were downloaded successfully
873
+ success_count = sum(1 for r in results if r)
874
+ if success_count == 0:
875
+ raise RuntimeError(f"Failed to download any images from {ndjson_path}. Check network connection and URLs.")
876
+ if success_count < len(image_records):
877
+ LOGGER.warning(f"Downloaded {success_count}/{len(image_records)} images from {ndjson_path}")
854
878
 
855
- return yaml_path
879
+ if is_classification:
880
+ # Classification: return dataset directory (check_cls_dataset expects a directory path)
881
+ return dataset_dir
882
+ else:
883
+ # Detection: write data.yaml and return its path
884
+ yaml_path = dataset_dir / "data.yaml"
885
+ YAML.save(yaml_path, data_yaml)
886
+ return yaml_path
@@ -294,7 +294,7 @@ class YOLODataset(BaseDataset):
294
294
  values = list(zip(*[list(b.values()) for b in batch]))
295
295
  for i, k in enumerate(keys):
296
296
  value = values[i]
297
- if k in {"img", "text_feats"}:
297
+ if k in {"img", "text_feats", "sem_masks"}:
298
298
  value = torch.stack(value, 0)
299
299
  elif k == "visuals":
300
300
  value = torch.nn.utils.rnn.pad_sequence(value, batch_first=True)
ultralytics/data/utils.py CHANGED
@@ -37,8 +37,8 @@ from ultralytics.utils.downloads import download, safe_download, unzip_file
37
37
  from ultralytics.utils.ops import segments2boxes
38
38
 
39
39
  HELP_URL = "See https://docs.ultralytics.com/datasets for dataset formatting guidance."
40
- IMG_FORMATS = {"bmp", "dng", "jpeg", "jpg", "mpo", "png", "tif", "tiff", "webp", "pfm", "heic"} # image suffixes
41
- VID_FORMATS = {"asf", "avi", "gif", "m4v", "mkv", "mov", "mp4", "mpeg", "mpg", "ts", "wmv", "webm"} # video suffixes
40
+ IMG_FORMATS = {"avif", "bmp", "dng", "heic", "jp2", "jpeg", "jpeg2000", "jpg", "mpo", "png", "tif", "tiff", "webp"}
41
+ VID_FORMATS = {"asf", "avi", "gif", "m4v", "mkv", "mov", "mp4", "mpeg", "mpg", "ts", "wmv", "webm"} # videos
42
42
  FORMATS_HELP_MSG = f"Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}"
43
43
 
44
44
 
@@ -4,38 +4,38 @@ Export a YOLO PyTorch model to other formats. TensorFlow exports authored by htt
4
4
 
5
5
  Format | `format=argument` | Model
6
6
  --- | --- | ---
7
- PyTorch | - | yolo11n.pt
8
- TorchScript | `torchscript` | yolo11n.torchscript
9
- ONNX | `onnx` | yolo11n.onnx
10
- OpenVINO | `openvino` | yolo11n_openvino_model/
11
- TensorRT | `engine` | yolo11n.engine
12
- CoreML | `coreml` | yolo11n.mlpackage
13
- TensorFlow SavedModel | `saved_model` | yolo11n_saved_model/
14
- TensorFlow GraphDef | `pb` | yolo11n.pb
15
- TensorFlow Lite | `tflite` | yolo11n.tflite
16
- TensorFlow Edge TPU | `edgetpu` | yolo11n_edgetpu.tflite
17
- TensorFlow.js | `tfjs` | yolo11n_web_model/
18
- PaddlePaddle | `paddle` | yolo11n_paddle_model/
19
- MNN | `mnn` | yolo11n.mnn
20
- NCNN | `ncnn` | yolo11n_ncnn_model/
21
- IMX | `imx` | yolo11n_imx_model/
22
- RKNN | `rknn` | yolo11n_rknn_model/
23
- ExecuTorch | `executorch` | yolo11n_executorch_model/
24
- Axelera | `axelera` | yolo11n_axelera_model/
7
+ PyTorch | - | yolo26n.pt
8
+ TorchScript | `torchscript` | yolo26n.torchscript
9
+ ONNX | `onnx` | yolo26n.onnx
10
+ OpenVINO | `openvino` | yolo26n_openvino_model/
11
+ TensorRT | `engine` | yolo26n.engine
12
+ CoreML | `coreml` | yolo26n.mlpackage
13
+ TensorFlow SavedModel | `saved_model` | yolo26n_saved_model/
14
+ TensorFlow GraphDef | `pb` | yolo26n.pb
15
+ TensorFlow Lite | `tflite` | yolo26n.tflite
16
+ TensorFlow Edge TPU | `edgetpu` | yolo26n_edgetpu.tflite
17
+ TensorFlow.js | `tfjs` | yolo26n_web_model/
18
+ PaddlePaddle | `paddle` | yolo26n_paddle_model/
19
+ MNN | `mnn` | yolo26n.mnn
20
+ NCNN | `ncnn` | yolo26n_ncnn_model/
21
+ IMX | `imx` | yolo26n_imx_model/
22
+ RKNN | `rknn` | yolo26n_rknn_model/
23
+ ExecuTorch | `executorch` | yolo26n_executorch_model/
24
+ Axelera | `axelera` | yolo26n_axelera_model/
25
25
 
26
26
  Requirements:
27
27
  $ pip install "ultralytics[export]"
28
28
 
29
29
  Python:
30
30
  from ultralytics import YOLO
31
- model = YOLO('yolo11n.pt')
31
+ model = YOLO('yolo26n.pt')
32
32
  results = model.export(format='onnx')
33
33
 
34
34
  CLI:
35
- $ yolo mode=export model=yolo11n.pt format=onnx
35
+ $ yolo mode=export model=yolo26n.pt format=onnx
36
36
 
37
37
  Inference:
38
- $ yolo predict model=yolo11n.pt # PyTorch
38
+ $ yolo predict model=yolo26n.pt # PyTorch
39
39
  yolo11n.torchscript # TorchScript
40
40
  yolo11n.onnx # ONNX Runtime or OpenCV DNN with dnn=True
41
41
  yolo11n_openvino_model # OpenVINO
@@ -463,6 +463,9 @@ class Exporter:
463
463
  )
464
464
  if tfjs and (ARM64 and LINUX):
465
465
  raise SystemError("TF.js exports are not currently supported on ARM64 Linux")
466
+ if ncnn and hasattr(model.model[-1], "one2one_cv2"):
467
+ del model.model[-1].one2one_cv2 # Disable end2end branch for NCNN export as it does not support topk
468
+ LOGGER.warning("NCNN export does not support end2end models, disabling end2end branch.")
466
469
  # Recommend OpenVINO if export and Intel CPU
467
470
  if SETTINGS.get("openvino_msg"):
468
471
  if is_intel():
@@ -503,8 +506,11 @@ class Exporter:
503
506
  m.dynamic = self.args.dynamic
504
507
  m.export = True
505
508
  m.format = self.args.format
506
- m.max_det = self.args.max_det
509
+ # Clamp max_det to anchor count for small image sizes (required for TensorRT compatibility)
510
+ anchors = sum(int(self.imgsz[0] / s) * int(self.imgsz[1] / s) for s in model.stride.tolist())
511
+ m.max_det = min(self.args.max_det, anchors)
507
512
  m.xyxy = self.args.nms and not coreml
513
+ m.shape = None # reset cached shape for new export input size
508
514
  if hasattr(model, "pe") and hasattr(m, "fuse"): # for YOLOE models
509
515
  m.fuse(model.pe.to(self.device))
510
516
  elif isinstance(m, C2f) and not is_tf_format:
@@ -550,6 +556,8 @@ class Exporter:
550
556
  self.metadata["kpt_shape"] = model.model[-1].kpt_shape
551
557
  if hasattr(model, "kpt_names"):
552
558
  self.metadata["kpt_names"] = model.kpt_names
559
+ if getattr(model.model[-1], "end2end", False):
560
+ self.metadata["end2end"] = True
553
561
 
554
562
  LOGGER.info(
555
563
  f"\n{colorstr('PyTorch:')} starting from '{file}' with input shape {tuple(im.shape)} BCHW and "
@@ -606,12 +614,11 @@ class Exporter:
606
614
  f"work. Use export 'imgsz={max(self.imgsz)}' if val is required."
607
615
  )
608
616
  imgsz = self.imgsz[0] if square else str(self.imgsz)[1:-1].replace(" ", "")
609
- predict_data = f"data={data}" if model.task == "segment" and pb else ""
610
617
  q = "int8" if self.args.int8 else "half" if self.args.half else "" # quantization
611
618
  LOGGER.info(
612
619
  f"\nExport complete ({time.time() - t:.1f}s)"
613
620
  f"\nResults saved to {colorstr('bold', file.parent.resolve())}"
614
- f"\nPredict: yolo predict task={model.task} model={f} imgsz={imgsz} {q} {predict_data}"
621
+ f"\nPredict: yolo predict task={model.task} model={f} imgsz={imgsz} {q}"
615
622
  f"\nValidate: yolo val task={model.task} model={f} imgsz={imgsz} data={data} {q} {s}"
616
623
  f"\nVisualize: https://netron.app"
617
624
  )
@@ -786,7 +793,6 @@ class Exporter:
786
793
  f".*{head_module_name}/.*/Sub*",
787
794
  f".*{head_module_name}/.*/Mul*",
788
795
  f".*{head_module_name}/.*/Div*",
789
- f".*{head_module_name}\\.dfl.*",
790
796
  ],
791
797
  types=["Sigmoid"],
792
798
  )
@@ -812,11 +818,11 @@ class Exporter:
812
818
  assert not IS_JETSON, "Jetson Paddle exports not supported yet"
813
819
  check_requirements(
814
820
  (
815
- "paddlepaddle-gpu"
821
+ "paddlepaddle-gpu>=3.0.0,!=3.3.0" # exclude 3.3.0 https://github.com/PaddlePaddle/Paddle/issues/77340
816
822
  if torch.cuda.is_available()
817
823
  else "paddlepaddle==3.0.0" # pin 3.0.0 for ARM64
818
824
  if ARM64
819
- else "paddlepaddle>=3.0.0",
825
+ else "paddlepaddle>=3.0.0,!=3.3.0", # exclude 3.3.0 https://github.com/PaddlePaddle/Paddle/issues/77340
820
826
  "x2paddle",
821
827
  )
822
828
  )
@@ -923,7 +929,7 @@ class Exporter:
923
929
  model = IOSDetectModel(self.model, self.im, mlprogram=not mlmodel) if self.args.nms else self.model
924
930
  else:
925
931
  if self.args.nms:
926
- LOGGER.warning(f"{prefix} 'nms=True' is only available for Detect models like 'yolo11n.pt'.")
932
+ LOGGER.warning(f"{prefix} 'nms=True' is only available for Detect models like 'yolo26n.pt'.")
927
933
  # TODO CoreML Segment and Pose model pipelining
928
934
  model = self.model
929
935
  ts = torch.jit.trace(model.eval(), self.im, strict=False) # TorchScript model