dgenerate-ultralytics-headless 8.3.253__py3-none-any.whl → 8.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/METADATA +41 -49
  2. {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/RECORD +85 -74
  3. tests/__init__.py +2 -2
  4. tests/conftest.py +1 -1
  5. tests/test_cuda.py +8 -2
  6. tests/test_engine.py +8 -8
  7. tests/test_exports.py +11 -4
  8. tests/test_integrations.py +9 -9
  9. tests/test_python.py +14 -14
  10. tests/test_solutions.py +3 -3
  11. ultralytics/__init__.py +1 -1
  12. ultralytics/cfg/__init__.py +25 -27
  13. ultralytics/cfg/default.yaml +3 -1
  14. ultralytics/cfg/models/26/yolo26-cls.yaml +33 -0
  15. ultralytics/cfg/models/26/yolo26-obb.yaml +52 -0
  16. ultralytics/cfg/models/26/yolo26-p2.yaml +60 -0
  17. ultralytics/cfg/models/26/yolo26-p6.yaml +62 -0
  18. ultralytics/cfg/models/26/yolo26-pose.yaml +53 -0
  19. ultralytics/cfg/models/26/yolo26-seg.yaml +52 -0
  20. ultralytics/cfg/models/26/yolo26.yaml +52 -0
  21. ultralytics/cfg/models/26/yoloe-26-seg.yaml +53 -0
  22. ultralytics/cfg/models/26/yoloe-26.yaml +53 -0
  23. ultralytics/data/annotator.py +2 -2
  24. ultralytics/data/augment.py +7 -0
  25. ultralytics/data/converter.py +57 -38
  26. ultralytics/data/dataset.py +1 -1
  27. ultralytics/engine/exporter.py +31 -26
  28. ultralytics/engine/model.py +34 -34
  29. ultralytics/engine/predictor.py +17 -17
  30. ultralytics/engine/results.py +14 -12
  31. ultralytics/engine/trainer.py +59 -29
  32. ultralytics/engine/tuner.py +19 -11
  33. ultralytics/engine/validator.py +16 -16
  34. ultralytics/models/fastsam/predict.py +1 -1
  35. ultralytics/models/yolo/classify/predict.py +1 -1
  36. ultralytics/models/yolo/classify/train.py +1 -1
  37. ultralytics/models/yolo/classify/val.py +1 -1
  38. ultralytics/models/yolo/detect/predict.py +2 -2
  39. ultralytics/models/yolo/detect/train.py +4 -3
  40. ultralytics/models/yolo/detect/val.py +7 -1
  41. ultralytics/models/yolo/model.py +8 -8
  42. ultralytics/models/yolo/obb/predict.py +2 -2
  43. ultralytics/models/yolo/obb/train.py +3 -3
  44. ultralytics/models/yolo/obb/val.py +1 -1
  45. ultralytics/models/yolo/pose/predict.py +1 -1
  46. ultralytics/models/yolo/pose/train.py +3 -1
  47. ultralytics/models/yolo/pose/val.py +1 -1
  48. ultralytics/models/yolo/segment/predict.py +3 -3
  49. ultralytics/models/yolo/segment/train.py +4 -4
  50. ultralytics/models/yolo/segment/val.py +4 -2
  51. ultralytics/models/yolo/yoloe/train.py +6 -1
  52. ultralytics/models/yolo/yoloe/train_seg.py +6 -1
  53. ultralytics/nn/autobackend.py +5 -5
  54. ultralytics/nn/modules/__init__.py +8 -0
  55. ultralytics/nn/modules/block.py +128 -8
  56. ultralytics/nn/modules/head.py +788 -203
  57. ultralytics/nn/tasks.py +86 -41
  58. ultralytics/nn/text_model.py +5 -2
  59. ultralytics/optim/__init__.py +5 -0
  60. ultralytics/optim/muon.py +338 -0
  61. ultralytics/solutions/ai_gym.py +3 -3
  62. ultralytics/solutions/config.py +1 -1
  63. ultralytics/solutions/heatmap.py +1 -1
  64. ultralytics/solutions/instance_segmentation.py +2 -2
  65. ultralytics/solutions/parking_management.py +1 -1
  66. ultralytics/solutions/solutions.py +2 -2
  67. ultralytics/trackers/track.py +1 -1
  68. ultralytics/utils/__init__.py +8 -8
  69. ultralytics/utils/benchmarks.py +23 -23
  70. ultralytics/utils/callbacks/platform.py +11 -7
  71. ultralytics/utils/checks.py +6 -6
  72. ultralytics/utils/downloads.py +5 -3
  73. ultralytics/utils/export/engine.py +19 -10
  74. ultralytics/utils/export/imx.py +19 -13
  75. ultralytics/utils/export/tensorflow.py +21 -21
  76. ultralytics/utils/files.py +2 -2
  77. ultralytics/utils/loss.py +587 -203
  78. ultralytics/utils/metrics.py +1 -0
  79. ultralytics/utils/ops.py +11 -2
  80. ultralytics/utils/tal.py +98 -19
  81. ultralytics/utils/tuner.py +2 -2
  82. {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/WHEEL +0 -0
  83. {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/entry_points.txt +0 -0
  84. {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/licenses/LICENSE +0 -0
  85. {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,60 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLO26 object detection model with P2/4 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/detect
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ scales: # model compound scaling constants, i.e. 'model=yolo26n-p2.yaml' will call yolo26-p2.yaml with scale 'n'
12
+ # [depth, width, max_channels]
13
+ n: [0.50, 0.25, 1024] # summary: 329 layers, 2,662,400 parameters, 2,662,400 gradients, 9.5 GFLOPs
14
+ s: [0.50, 0.50, 1024] # summary: 329 layers, 9,765,856 parameters, 9,765,856 gradients, 27.8 GFLOPs
15
+ m: [0.50, 1.00, 512] # summary: 349 layers, 21,144,288 parameters, 21,144,288 gradients, 91.4 GFLOPs
16
+ l: [1.00, 1.00, 512] # summary: 489 layers, 25,815,520 parameters, 25,815,520 gradients, 115.3 GFLOPs
17
+ x: [1.00, 1.50, 512] # summary: 489 layers, 57,935,232 parameters, 57,935,232 gradients, 256.9 GFLOPs
18
+
19
+ # YOLO26n backbone
20
+ backbone:
21
+ # [from, repeats, module, args]
22
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
23
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
24
+ - [-1, 2, C3k2, [256, False, 0.25]]
25
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
26
+ - [-1, 2, C3k2, [512, False, 0.25]]
27
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
28
+ - [-1, 2, C3k2, [512, True]]
29
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
30
+ - [-1, 2, C3k2, [1024, True]]
31
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
32
+ - [-1, 2, C2PSA, [1024]] # 10
33
+
34
+ # YOLO26n head
35
+ head:
36
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
38
+ - [-1, 2, C3k2, [512, True]] # 13
39
+
40
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
41
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
42
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
43
+
44
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
45
+ - [[-1, 2], 1, Concat, [1]] # cat backbone P2
46
+ - [-1, 2, C3k2, [128, True]] # 19 (P2/4-xsmall)
47
+
48
+ - [-1, 1, Conv, [128, 3, 2]]
49
+ - [[-1, 16], 1, Concat, [1]] # cat head P3
50
+ - [-1, 2, C3k2, [256, True]] # 22 (P3/8-small)
51
+
52
+ - [-1, 1, Conv, [256, 3, 2]]
53
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
54
+ - [-1, 2, C3k2, [512, True]] # 25 (P4/16-medium)
55
+
56
+ - [-1, 1, Conv, [512, 3, 2]]
57
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
58
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 28 (P5/32-large)
59
+
60
+ - [[19, 22, 25, 28], 1, Detect, [nc]] # Detect(P2, P3, P4, P5)
@@ -0,0 +1,62 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLO26 object detection model with P3/8 - P6/64 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/detect
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ scales: # model compound scaling constants, i.e. 'model=yolo26n-p6.yaml' will call yolo26-p6.yaml with scale 'n'
12
+ # [depth, width, max_channels]
13
+ n: [0.50, 0.25, 1024] # summary: 349 layers, 4,063,872 parameters, 4,063,872 gradients, 6.0 GFLOPs
14
+ s: [0.50, 0.50, 1024] # summary: 349 layers, 15,876,448 parameters, 15,876,448 gradients, 22.3 GFLOPs
15
+ m: [0.50, 1.00, 512] # summary: 369 layers, 32,400,096 parameters, 32,400,096 gradients, 77.3 GFLOPs
16
+ l: [1.00, 1.00, 512] # summary: 523 layers, 39,365,600 parameters, 39,365,600 gradients, 97.0 GFLOPs
17
+ x: [1.00, 1.50, 512] # summary: 523 layers, 88,330,368 parameters, 88,330,368 gradients, 216.6 GFLOPs
18
+
19
+ # YOLO26n backbone
20
+ backbone:
21
+ # [from, repeats, module, args]
22
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
23
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
24
+ - [-1, 2, C3k2, [256, False, 0.25]]
25
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
26
+ - [-1, 2, C3k2, [512, False, 0.25]]
27
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
28
+ - [-1, 2, C3k2, [512, True]]
29
+ - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
30
+ - [-1, 2, C3k2, [768, True]]
31
+ - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
32
+ - [-1, 2, C3k2, [1024, True]]
33
+ - [-1, 1, SPPF, [1024, 5]] # 11
34
+ - [-1, 2, C2PSA, [1024]] # 12
35
+
36
+ # YOLO26n head
37
+ head:
38
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
39
+ - [[-1, 8], 1, Concat, [1]] # cat backbone P5
40
+ - [-1, 2, C3k2, [768, True]] # 15
41
+
42
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
43
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
44
+ - [-1, 2, C3k2, [512, True]] # 18
45
+
46
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
47
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
48
+ - [-1, 2, C3k2, [256, True]] # 21 (P3/8-small)
49
+
50
+ - [-1, 1, Conv, [256, 3, 2]]
51
+ - [[-1, 18], 1, Concat, [1]] # cat head P4
52
+ - [-1, 2, C3k2, [512, True]] # 24 (P4/16-medium)
53
+
54
+ - [-1, 1, Conv, [512, 3, 2]]
55
+ - [[-1, 15], 1, Concat, [1]] # cat head P5
56
+ - [-1, 2, C3k2, [768, True]] # 27 (P5/32-large)
57
+
58
+ - [-1, 1, Conv, [768, 3, 2]]
59
+ - [[-1, 12], 1, Concat, [1]] # cat head P6
60
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 30 (P6/64-large)
61
+
62
+ - [[21, 24, 27, 30], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
@@ -0,0 +1,53 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLO26-pose keypoints/pose estimation model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/pose
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
12
+ scales: # model compound scaling constants, i.e. 'model=yolo26n-pose.yaml' will call yolo26-pose.yaml with scale 'n'
13
+ # [depth, width, max_channels]
14
+ n: [0.50, 0.25, 1024] # summary: 363 layers, 3,747,554 parameters, 3,747,554 gradients, 10.7 GFLOPs
15
+ s: [0.50, 0.50, 1024] # summary: 363 layers, 11,870,498 parameters, 11,870,498 gradients, 29.6 GFLOPs
16
+ m: [0.50, 1.00, 512] # summary: 383 layers, 24,344,482 parameters, 24,344,482 gradients, 85.9 GFLOPs
17
+ l: [1.00, 1.00, 512] # summary: 495 layers, 28,747,938 parameters, 28,747,938 gradients, 104.3 GFLOPs
18
+ x: [1.00, 1.50, 512] # summary: 495 layers, 62,914,350 parameters, 62,914,350 gradients, 226.3 GFLOPs
19
+
20
+ # YOLO26n backbone
21
+ backbone:
22
+ # [from, repeats, module, args]
23
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
24
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
25
+ - [-1, 2, C3k2, [256, False, 0.25]]
26
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
27
+ - [-1, 2, C3k2, [512, False, 0.25]]
28
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
29
+ - [-1, 2, C3k2, [512, True]]
30
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
31
+ - [-1, 2, C3k2, [1024, True]]
32
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
33
+ - [-1, 2, C2PSA, [1024]] # 10
34
+
35
+ # YOLO26n head
36
+ head:
37
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
38
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
39
+ - [-1, 2, C3k2, [512, True]] # 13
40
+
41
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
43
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
44
+
45
+ - [-1, 1, Conv, [256, 3, 2]]
46
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
47
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
48
+
49
+ - [-1, 1, Conv, [512, 3, 2]]
50
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
51
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
52
+
53
+ - [[16, 19, 22], 1, Pose26, [nc, kpt_shape]] # Detect(P3, P4, P5)
@@ -0,0 +1,52 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLO26-seg instance segmentation model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/segment
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ scales: # model compound scaling constants, i.e. 'model=yolo26n-seg.yaml' will call yolo26-seg.yaml with scale 'n'
12
+ # [depth, width, max_channels]
13
+ n: [0.50, 0.25, 1024] # summary: 309 layers, 3,126,280 parameters, 3,126,280 gradients, 10.5 GFLOPs
14
+ s: [0.50, 0.50, 1024] # summary: 309 layers, 11,505,800 parameters, 11,505,800 gradients, 37.4 GFLOPs
15
+ m: [0.50, 1.00, 512] # summary: 329 layers, 27,112,072 parameters, 27,112,072 gradients, 132.5 GFLOPs
16
+ l: [1.00, 1.00, 512] # summary: 441 layers, 31,515,528 parameters, 31,515,528 gradients, 150.9 GFLOPs
17
+ x: [1.00, 1.50, 512] # summary: 441 layers, 70,693,800 parameters, 70,693,800 gradients, 337.7 GFLOPs
18
+
19
+ # YOLO26n backbone
20
+ backbone:
21
+ # [from, repeats, module, args]
22
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
23
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
24
+ - [-1, 2, C3k2, [256, False, 0.25]]
25
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
26
+ - [-1, 2, C3k2, [512, False, 0.25]]
27
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
28
+ - [-1, 2, C3k2, [512, True]]
29
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
30
+ - [-1, 2, C3k2, [1024, True]]
31
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
32
+ - [-1, 2, C2PSA, [1024]] # 10
33
+
34
+ # YOLO26n head
35
+ head:
36
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
38
+ - [-1, 2, C3k2, [512, True]] # 13
39
+
40
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
41
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
42
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
43
+
44
+ - [-1, 1, Conv, [256, 3, 2]]
45
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
46
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
47
+
48
+ - [-1, 1, Conv, [512, 3, 2]]
49
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
50
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
51
+
52
+ - [[16, 19, 22], 1, Segment26, [nc, 32, 256]] # Segment(P3, P4, P5)
@@ -0,0 +1,52 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLO26 object detection model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/detect
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ scales: # model compound scaling constants, i.e. 'model=yolo26n.yaml' will call yolo26.yaml with scale 'n'
12
+ # [depth, width, max_channels]
13
+ n: [0.50, 0.25, 1024] # summary: 260 layers, 2,572,280 parameters, 2,572,280 gradients, 6.1 GFLOPs
14
+ s: [0.50, 0.50, 1024] # summary: 260 layers, 10,009,784 parameters, 10,009,784 gradients, 22.8 GFLOPs
15
+ m: [0.50, 1.00, 512] # summary: 280 layers, 21,896,248 parameters, 21,896,248 gradients, 75.4 GFLOPs
16
+ l: [1.00, 1.00, 512] # summary: 392 layers, 26,299,704 parameters, 26,299,704 gradients, 93.8 GFLOPs
17
+ x: [1.00, 1.50, 512] # summary: 392 layers, 58,993,368 parameters, 58,993,368 gradients, 209.5 GFLOPs
18
+
19
+ # YOLO26n backbone
20
+ backbone:
21
+ # [from, repeats, module, args]
22
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
23
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
24
+ - [-1, 2, C3k2, [256, False, 0.25]]
25
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
26
+ - [-1, 2, C3k2, [512, False, 0.25]]
27
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
28
+ - [-1, 2, C3k2, [512, True]]
29
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
30
+ - [-1, 2, C3k2, [1024, True]]
31
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
32
+ - [-1, 2, C2PSA, [1024]] # 10
33
+
34
+ # YOLO26n head
35
+ head:
36
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
38
+ - [-1, 2, C3k2, [512, True]] # 13
39
+
40
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
41
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
42
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
43
+
44
+ - [-1, 1, Conv, [256, 3, 2]]
45
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
46
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
47
+
48
+ - [-1, 1, Conv, [512, 3, 2]]
49
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
50
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
51
+
52
+ - [[16, 19, 22], 1, Detect, [nc]] # Detect(P3, P4, P5)
@@ -0,0 +1,53 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLOE-26 open-vocabulary instance segmentation model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/segment
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ text_model: mobileclip2:b
12
+ scales: # model compound scaling constants, i.e. 'model=yoloe-26n-seg.yaml' will call yoloe-26-seg.yaml with scale 'n'
13
+ # [depth, width, max_channels]
14
+ n: [0.50, 0.25, 1024] # summary: 347 layers, 5,615,540 parameters, 5,615,540 gradients, 11.7 GFLOPs
15
+ s: [0.50, 0.50, 1024] # summary: 347 layers, 15,272,852 parameters, 15,272,852 gradients, 39.3 GFLOPs
16
+ m: [0.50, 1.00, 512] # summary: 367 layers, 34,922,132 parameters, 34,922,132 gradients, 136.3 GFLOPs
17
+ l: [1.00, 1.00, 512] # summary: 479 layers, 39,325,588 parameters, 39,325,588 gradients, 154.7 GFLOPs
18
+ x: [1.00, 1.50, 512] # summary: 479 layers, 85,397,684 parameters, 85,397,684 gradients, 343.3 GFLOPs
19
+
20
+ # YOLOE26n backbone
21
+ backbone:
22
+ # [from, repeats, module, args]
23
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
24
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
25
+ - [-1, 2, C3k2, [256, False, 0.25]]
26
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
27
+ - [-1, 2, C3k2, [512, False, 0.25]]
28
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
29
+ - [-1, 2, C3k2, [512, True]]
30
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
31
+ - [-1, 2, C3k2, [1024, True]]
32
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
33
+ - [-1, 2, C2PSA, [1024]] # 10
34
+
35
+ # YOLOE26n head
36
+ head:
37
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
38
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
39
+ - [-1, 2, C3k2, [512, True]] # 13
40
+
41
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
43
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
44
+
45
+ - [-1, 1, Conv, [256, 3, 2]]
46
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
47
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
48
+
49
+ - [-1, 1, Conv, [512, 3, 2]]
50
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
51
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
52
+
53
+ - [[16, 19, 22], 1, YOLOESegment26, [nc, 32, 256, 512, True]] # YOLOESegment26(P3, P4, P5)
@@ -0,0 +1,53 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLOE-26 open-vocabulary object detection model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/detect
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ text_model: mobileclip2:b
12
+ scales: # model compound scaling constants, i.e. 'model=yoloe-26n.yaml' will call yoloe-26.yaml with scale 'n'
13
+ # [depth, width, max_channels]
14
+ n: [0.50, 0.25, 1024] # summary: 298 layers, 5,061,540 parameters, 5,061,540 gradients, 7.3 GFLOPs
15
+ s: [0.50, 0.50, 1024] # summary: 298 layers, 13,776,836 parameters, 13,776,836 gradients, 24.8 GFLOPs
16
+ m: [0.50, 1.00, 512] # summary: 318 layers, 29,706,308 parameters, 29,706,308 gradients, 79.2 GFLOPs
17
+ l: [1.00, 1.00, 512] # summary: 430 layers, 34,109,764 parameters, 34,109,764 gradients, 97.6 GFLOPs
18
+ x: [1.00, 1.50, 512] # summary: 430 layers, 73,697,252 parameters, 73,697,252 gradients, 215.2 GFLOPs
19
+
20
+ # YOLOE26n backbone
21
+ backbone:
22
+ # [from, repeats, module, args]
23
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
24
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
25
+ - [-1, 2, C3k2, [256, False, 0.25]]
26
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
27
+ - [-1, 2, C3k2, [512, False, 0.25]]
28
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
29
+ - [-1, 2, C3k2, [512, True]]
30
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
31
+ - [-1, 2, C3k2, [1024, True]]
32
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
33
+ - [-1, 2, C2PSA, [1024]] # 10
34
+
35
+ # YOLOE26n head
36
+ head:
37
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
38
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
39
+ - [-1, 2, C3k2, [512, True]] # 13
40
+
41
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
43
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
44
+
45
+ - [-1, 1, Conv, [256, 3, 2]]
46
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
47
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
48
+
49
+ - [-1, 1, Conv, [512, 3, 2]]
50
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
51
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
52
+
53
+ - [[16, 19, 22], 1, YOLOEDetect, [nc, 512, True]] # YOLOEDetect(P3, P4, P5)
@@ -9,7 +9,7 @@ from ultralytics import SAM, YOLO
9
9
 
10
10
  def auto_annotate(
11
11
  data: str | Path,
12
- det_model: str = "yolo11x.pt",
12
+ det_model: str = "yolo26x.pt",
13
13
  sam_model: str = "sam_b.pt",
14
14
  device: str = "",
15
15
  conf: float = 0.25,
@@ -39,7 +39,7 @@ def auto_annotate(
39
39
 
40
40
  Examples:
41
41
  >>> from ultralytics.data.annotator import auto_annotate
42
- >>> auto_annotate(data="ultralytics/assets", det_model="yolo11n.pt", sam_model="mobile_sam.pt")
42
+ >>> auto_annotate(data="ultralytics/assets", det_model="yolo26n.pt", sam_model="mobile_sam.pt")
43
43
  """
44
44
  det_model = YOLO(det_model)
45
45
  sam_model = SAM(sam_model)
@@ -2062,11 +2062,18 @@ class Format:
2062
2062
  if nl:
2063
2063
  masks, instances, cls = self._format_segments(instances, cls, w, h)
2064
2064
  masks = torch.from_numpy(masks)
2065
+ cls_tensor = torch.from_numpy(cls.squeeze(1))
2066
+ if self.mask_overlap:
2067
+ sem_masks = cls_tensor[masks[0].long() - 1] # (H, W) from (1, H, W) instance indices
2068
+ else:
2069
+ sem_masks = (masks * cls_tensor[:, None, None]).max(0).values # (H, W) from (N, H, W) binary
2065
2070
  else:
2066
2071
  masks = torch.zeros(
2067
2072
  1 if self.mask_overlap else nl, img.shape[0] // self.mask_ratio, img.shape[1] // self.mask_ratio
2068
2073
  )
2074
+ sem_masks = torch.zeros(img.shape[0] // self.mask_ratio, img.shape[1] // self.mask_ratio)
2069
2075
  labels["masks"] = masks
2076
+ labels["sem_masks"] = sem_masks.float()
2070
2077
  labels["img"] = self._format_img(img)
2071
2078
  labels["cls"] = torch.from_numpy(cls) if nl else torch.zeros(nl, 1)
2072
2079
  labels["bboxes"] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
@@ -15,7 +15,7 @@ import numpy as np
15
15
  from PIL import Image
16
16
 
17
17
  from ultralytics.utils import ASSETS_URL, DATASETS_DIR, LOGGER, NUM_THREADS, TQDM, YAML
18
- from ultralytics.utils.checks import check_file, check_requirements
18
+ from ultralytics.utils.checks import check_file
19
19
  from ultralytics.utils.downloads import download, zip_directory
20
20
  from ultralytics.utils.files import increment_path
21
21
 
@@ -747,14 +747,15 @@ def convert_to_multispectral(path: str | Path, n_channels: int = 10, replace: bo
747
747
 
748
748
 
749
749
  async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Path | None = None) -> Path:
750
- """Convert NDJSON dataset format to Ultralytics YOLO11 dataset structure.
750
+ """Convert NDJSON dataset format to Ultralytics YOLO dataset structure.
751
751
 
752
- This function converts datasets stored in NDJSON (Newline Delimited JSON) format to the standard YOLO format with
753
- separate directories for images and labels. It supports parallel processing for efficient conversion of large
754
- datasets and can download images from URLs if they don't exist locally.
752
+ This function converts datasets stored in NDJSON (Newline Delimited JSON) format to the standard YOLO format. For
753
+ detection/segmentation/pose/obb tasks, it creates separate directories for images and labels. For classification
754
+ tasks, it creates the ImageNet-style {split}/{class_name}/ folder structure. It supports parallel processing for
755
+ efficient conversion of large datasets and can download images from URLs.
755
756
 
756
757
  The NDJSON format consists of:
757
- - First line: Dataset metadata with class names and configuration
758
+ - First line: Dataset metadata with class names, task type, and configuration
758
759
  - Subsequent lines: Individual image records with annotations and optional URLs
759
760
 
760
761
  Args:
@@ -763,7 +764,7 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
763
764
  None, uses the parent directory of the NDJSON file. Defaults to None.
764
765
 
765
766
  Returns:
766
- (Path): Path to the generated data.yaml file that can be used for YOLO training.
767
+ (Path): Path to the generated data.yaml file (detection) or dataset directory (classification).
767
768
 
768
769
  Examples:
769
770
  Convert a local NDJSON file:
@@ -775,9 +776,11 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
775
776
 
776
777
  Use with YOLO training
777
778
  >>> from ultralytics import YOLO
778
- >>> model = YOLO("yolo11n.pt")
779
+ >>> model = YOLO("yolo26n.pt")
779
780
  >>> model.train(data="https://github.com/ultralytics/assets/releases/download/v0.0.0/coco8-ndjson.ndjson")
780
781
  """
782
+ from ultralytics.utils.checks import check_requirements
783
+
781
784
  check_requirements("aiohttp")
782
785
  import aiohttp
783
786
 
@@ -790,50 +793,63 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
790
793
  dataset_dir = output_path / ndjson_path.stem
791
794
  splits = {record["split"] for record in image_records}
792
795
 
793
- # Create directories and prepare YAML structure
794
- dataset_dir.mkdir(parents=True, exist_ok=True)
795
- data_yaml = dict(dataset_record)
796
- data_yaml["names"] = {int(k): v for k, v in dataset_record.get("class_names", {}).items()}
797
- data_yaml.pop("class_names")
796
+ # Check if this is a classification dataset
797
+ is_classification = dataset_record.get("task") == "classify"
798
+ class_names = {int(k): v for k, v in dataset_record.get("class_names", {}).items()}
798
799
 
799
- for split in sorted(splits):
800
- (dataset_dir / "images" / split).mkdir(parents=True, exist_ok=True)
801
- (dataset_dir / "labels" / split).mkdir(parents=True, exist_ok=True)
802
- data_yaml[split] = f"images/{split}"
800
+ # Create base directories
801
+ dataset_dir.mkdir(parents=True, exist_ok=True)
802
+ data_yaml = None
803
+
804
+ if not is_classification:
805
+ # Detection/segmentation/pose/obb: prepare YAML and create base structure
806
+ data_yaml = dict(dataset_record)
807
+ data_yaml["names"] = class_names
808
+ data_yaml.pop("class_names", None)
809
+ data_yaml.pop("type", None) # Remove NDJSON-specific fields
810
+ for split in sorted(splits):
811
+ (dataset_dir / "images" / split).mkdir(parents=True, exist_ok=True)
812
+ (dataset_dir / "labels" / split).mkdir(parents=True, exist_ok=True)
813
+ data_yaml[split] = f"images/{split}"
803
814
 
804
815
  async def process_record(session, semaphore, record):
805
816
  """Process single image record with async session."""
806
817
  async with semaphore:
807
818
  split, original_name = record["split"], record["file"]
808
- label_path = dataset_dir / "labels" / split / f"{Path(original_name).stem}.txt"
809
- image_path = dataset_dir / "images" / split / original_name
810
-
811
819
  annotations = record.get("annotations", {})
812
- lines_to_write = []
813
- for key in annotations.keys():
814
- lines_to_write = [" ".join(map(str, item)) for item in annotations[key]]
815
- break
816
- if "classification" in annotations:
817
- lines_to_write = [str(cls) for cls in annotations["classification"]]
818
-
819
- label_path.write_text("\n".join(lines_to_write) + "\n" if lines_to_write else "")
820
820
 
821
+ if is_classification:
822
+ # Classification: place image in {split}/{class_name}/ folder
823
+ class_ids = annotations.get("classification", [])
824
+ class_id = class_ids[0] if class_ids else 0
825
+ class_name = class_names.get(class_id, str(class_id))
826
+ image_path = dataset_dir / split / class_name / original_name
827
+ else:
828
+ # Detection: write label file and place image in images/{split}/
829
+ image_path = dataset_dir / "images" / split / original_name
830
+ label_path = dataset_dir / "labels" / split / f"{Path(original_name).stem}.txt"
831
+ lines_to_write = []
832
+ for key in annotations.keys():
833
+ lines_to_write = [" ".join(map(str, item)) for item in annotations[key]]
834
+ break
835
+ label_path.write_text("\n".join(lines_to_write) + "\n" if lines_to_write else "")
836
+
837
+ # Download image if URL provided and file doesn't exist
821
838
  if http_url := record.get("url"):
822
839
  if not image_path.exists():
840
+ image_path.parent.mkdir(parents=True, exist_ok=True)
823
841
  try:
824
842
  async with session.get(http_url, timeout=aiohttp.ClientTimeout(total=30)) as response:
825
843
  response.raise_for_status()
826
- with open(image_path, "wb") as f:
827
- async for chunk in response.content.iter_chunked(8192):
828
- f.write(chunk)
844
+ image_path.write_bytes(await response.read())
829
845
  return True
830
846
  except Exception as e:
831
847
  LOGGER.warning(f"Failed to download {http_url}: {e}")
832
848
  return False
833
849
  return True
834
850
 
835
- # Process all images with async downloads
836
- semaphore = asyncio.Semaphore(64)
851
+ # Process all images with async downloads (limit connections for small datasets)
852
+ semaphore = asyncio.Semaphore(min(128, len(image_records)))
837
853
  async with aiohttp.ClientSession() as session:
838
854
  pbar = TQDM(
839
855
  total=len(image_records),
@@ -848,8 +864,11 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
848
864
  await asyncio.gather(*[tracked_process(record) for record in image_records])
849
865
  pbar.close()
850
866
 
851
- # Write data.yaml
852
- yaml_path = dataset_dir / "data.yaml"
853
- YAML.save(yaml_path, data_yaml)
854
-
855
- return yaml_path
867
+ if is_classification:
868
+ # Classification: return dataset directory (check_cls_dataset expects a directory path)
869
+ return dataset_dir
870
+ else:
871
+ # Detection: write data.yaml and return its path
872
+ yaml_path = dataset_dir / "data.yaml"
873
+ YAML.save(yaml_path, data_yaml)
874
+ return yaml_path
@@ -294,7 +294,7 @@ class YOLODataset(BaseDataset):
294
294
  values = list(zip(*[list(b.values()) for b in batch]))
295
295
  for i, k in enumerate(keys):
296
296
  value = values[i]
297
- if k in {"img", "text_feats"}:
297
+ if k in {"img", "text_feats", "sem_masks"}:
298
298
  value = torch.stack(value, 0)
299
299
  elif k == "visuals":
300
300
  value = torch.nn.utils.rnn.pad_sequence(value, batch_first=True)