PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.167__py3-none-any.whl → 8.3.169__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.167py3-none-any.whl → 8.3.169py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

{dgenerate_ultralytics_headless-8.3.167.dist-info → dgenerate_ultralytics_headless-8.3.169.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dgenerate-ultralytics-headless
-Version: 8.3.167
+Version: 8.3.169
 Summary: Automatically built Ultralytics package with python-opencv-headless dependency instead of python-opencv
 Author-email: Glenn Jocher <glenn.jocher@ultralytics.com>, Jing Qiu <jing.qiu@ultralytics.com>
 Maintainer-email: Ultralytics <hello@ultralytics.com>

{dgenerate_ultralytics_headless-8.3.167.dist-info → dgenerate_ultralytics_headless-8.3.169.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,18 @@
-dgenerate_ultralytics_headless-8.3.167.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
+dgenerate_ultralytics_headless-8.3.169.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
 tests/__init__.py,sha256=b4KP5_q-2IO8Br8YHOSLYnn7IwZS81l_vfEF2YPa2lM,894
 tests/conftest.py,sha256=LXtQJcFNWPGuzauTGkiXgsvVC3llJKfg22WcmhRzuQc,2593
-tests/test_cli.py,sha256=Kpfxq_RlbKK1Z8xNScDUbre6GB7neZhXZAYGI1tiDS8,5660
+tests/test_cli.py,sha256=EMf5gTAopOnIz8VvzaM-Qb044o7D0flnUHYQ-2ffOM4,5670
 tests/test_cuda.py,sha256=-nQsfF3lGfqLm6cIeu_BCiXqLj7HzpL7R1GzPEc6z2I,8128
 tests/test_engine.py,sha256=Jpt2KVrltrEgh2-3Ykouz-2Z_2fza0eymL5ectRXadM,4922
 tests/test_exports.py,sha256=HmMKOTCia9ZDC0VYc_EPmvBTM5LM5eeI1NF_pKjLpd8,9677
 tests/test_integrations.py,sha256=kl_AKmE_Qs1GB0_91iVwbzNxofm_hFTt0zzU6JF-pg4,6323
-tests/test_python.py,sha256=JJu-69IfuUf1dLK7Ko9elyPONiQ1yu7yhapMVIAt_KI,27907
+tests/test_python.py,sha256=-qvdeg-hEcKU5mWSDEU24iFZ-i8FAwQRznSXpkp6WQ4,27928
 tests/test_solutions.py,sha256=tuf6n_fsI8KvSdJrnc-cqP2qYdiYqCWuVrx0z9dOz3Q,13213
-ultralytics/__init__.py,sha256=25BnED8OrDgyWwAHSNTDasTO5KJyBbtsiHMkJU2cmZk,730
+ultralytics/__init__.py,sha256=4cDmvA4EGkWesc5wuiEUkFyDQsQLpWUYq2_7JUrJc38,730
 ultralytics/assets/bus.jpg,sha256=wCAZxJecGR63Od3ZRERe9Aja1Weayrb9Ug751DS_vGM,137419
 ultralytics/assets/zidane.jpg,sha256=Ftc4aeMmen1O0A3o6GCDO9FlfBslLpTAw0gnetx7bts,50427
 ultralytics/cfg/__init__.py,sha256=VIpPHImhjb0XLJquGZrG_LBGZchtOtBSXR7HYTYV2GU,39602
-ultralytics/cfg/default.yaml,sha256=oFG6llJO-Py5H-cR9qs-7FieJamroDLwpbrkhmfROOM,8307
+ultralytics/cfg/default.yaml,sha256=1SspGAK_K_DT7DBfEScJh4jsJUTOxahehZYj92xmj7o,8347
 ultralytics/cfg/datasets/Argoverse.yaml,sha256=4SGaJio9JFUkrscHJTPnH_QSbYm48Wbk8EFwl39zntc,3262
 ultralytics/cfg/datasets/DOTAv1.5.yaml,sha256=VZ_KKFX0H2YvlFVJ8JHcLWYBZ2xiQ6Z-ROSTiKWpS7c,1211
 ultralytics/cfg/datasets/DOTAv1.yaml,sha256=JrDuYcQ0JU9lJlCA-dCkMNko_jaj6MAVGHjsfjeZ_u0,1181
@@ -120,8 +120,8 @@ ultralytics/data/scripts/get_coco.sh,sha256=UuJpJeo3qQpTHVINeOpmP0NYmg8PhEFE3A8J
 ultralytics/data/scripts/get_coco128.sh,sha256=qmRQl_hOKrsdHrTrnyQuFIH01oDz3lfaz138OgGfLt8,650
 ultralytics/data/scripts/get_imagenet.sh,sha256=hr42H16bM47iT27rgS7MpEo-GeOZAYUQXgr0B2cwn48,1705
 ultralytics/engine/__init__.py,sha256=lm6MckFYCPTbqIoX7w0s_daxdjNeBeKW6DXppv1-QUM,70
-ultralytics/engine/exporter.py,sha256=m6HAaoDRDaUR4P0zue3o7bUKjnPa4QlMCjcbJtS4iCI,74926
-ultralytics/engine/model.py,sha256=FmLwiKuItVNgoyXhAvesUnD3UeHBzCVzGHDrqB8J4ms,53453
+ultralytics/engine/exporter.py,sha256=mKAUcyX3C8lDFhkEu3T3kzkbODFEbH1_Wn1W2hMjw4Y,74878
+ultralytics/engine/model.py,sha256=877u2n0ISz2COOYtEMUqQe0E-HHB4Atb2DuH1XCE98k,53530
 ultralytics/engine/predictor.py,sha256=xxl1kdAzKrN8Y_5MQ5f92uFPeeRq1mYOl6hNlzpPjy8,22520
 ultralytics/engine/results.py,sha256=QcHcbPVlLBiy_APwABr-T5K65HR8Bl1rRzxawjjP76E,71873
 ultralytics/engine/trainer.py,sha256=28FeqASvQRxCaK96SXDM-BfPJjqy5KNiWhf8v6GXTug,39785
@@ -144,9 +144,9 @@ ultralytics/models/nas/predict.py,sha256=J4UT7nwi_h63lJ3a_gYac-Ws8wFYingZINxMqSo
 ultralytics/models/nas/val.py,sha256=QUTE3zuhJLVqmDGd2n7iSSk7X6jKZCRxufFkBbyxYYo,1548
 ultralytics/models/rtdetr/__init__.py,sha256=_jEHmOjI_QP_nT3XJXLgYHQ6bXG4EL8Gnvn1y_eev1g,225
 ultralytics/models/rtdetr/model.py,sha256=e2u6kQEYawRXGGO6HbFDE1uyHfsIqvKk4IpVjjYN41k,2182
-ultralytics/models/rtdetr/predict.py,sha256=_jk9ZkIW0gNLUHYyRCz_n9UgGnMTtTkFZ3Pzmkbyjgw,4197
+ultralytics/models/rtdetr/predict.py,sha256=Jqorq8OkGgXCCRS8DmeuGQj3XJxEhz97m22p7VxzXTw,4279
 ultralytics/models/rtdetr/train.py,sha256=6FA3nDEcH1diFQ8Ky0xENp9cOOYATHxU6f42z9npMvs,3766
-ultralytics/models/rtdetr/val.py,sha256=MGzHWMfVDx9KPgaK09nvuHfXRQ6FagpzEyNO1R_8Xp8,9495
+ultralytics/models/rtdetr/val.py,sha256=QT7JNKFJmD8dqUVSUBb78t9wGtE7KEw5l92CKJU50TM,8849
 ultralytics/models/sam/__init__.py,sha256=iR7B06rAEni21eptg8n4rLOP0Z_qV9y9PL-L93n4_7s,266
 ultralytics/models/sam/amg.py,sha256=IpcuIfC5KBRiF4sdrsPl1ecWEJy75axo1yG23r5BFsw,11783
 ultralytics/models/sam/build.py,sha256=J6n-_QOYLa63jldEZmhRe9D3Is_AJE8xyZLUjzfRyTY,12629
@@ -169,23 +169,23 @@ ultralytics/models/yolo/model.py,sha256=e66CIsSLHbEeGlkEQ1r6WwVDKAoR2nc0-UoGA94z
 ultralytics/models/yolo/classify/__init__.py,sha256=9--HVaNOfI1K7rn_rRqclL8FUAnpfeBrRqEQIaQw2xM,383
 ultralytics/models/yolo/classify/predict.py,sha256=FqAC2YXe25bRwedMZhF3Lw0waoY-a60xMKELhxApP9I,4149
 ultralytics/models/yolo/classify/train.py,sha256=V-hevc6X7xemnpyru84OfTRA77eNnkVSMEz16_OUvo4,10244
-ultralytics/models/yolo/classify/val.py,sha256=YakPxBVZCd85Kp4wFKx8KH6JJFiU7nkFS3r9_ZSwFRM,10036
+ultralytics/models/yolo/classify/val.py,sha256=iQZRS6D3-YQjygBhFpC8VCJMI05L3uUPe4ukwbVtSdI,10021
 ultralytics/models/yolo/detect/__init__.py,sha256=GIRsLYR-kT4JJx7lh4ZZAFGBZj0aebokuU0A7JbjDVA,257
 ultralytics/models/yolo/detect/predict.py,sha256=ySUsdIf8dw00bzWhcxN1jZwLWKPRT2M7-N7TNL3o4zo,5387
 ultralytics/models/yolo/detect/train.py,sha256=HlaCoHJ6Y2TpCXXWabMRZApAYqBvjuM_YQJUV5JYCvw,9907
-ultralytics/models/yolo/detect/val.py,sha256=TrLclevqfD9NnpqPSIEvB5KakCsozyBegaD4lhd3noE,20485
+ultralytics/models/yolo/detect/val.py,sha256=HOK1681EqGSfAxoqh9CKw1gqFAfGbegEn1xbkxAPosI,20572
 ultralytics/models/yolo/obb/__init__.py,sha256=tQmpG8wVHsajWkZdmD6cjGohJ4ki64iSXQT8JY_dydo,221
 ultralytics/models/yolo/obb/predict.py,sha256=4r1eSld6TNJlk9JG56e-DX6oPL8uBBqiuztyBpxWlHE,2888
 ultralytics/models/yolo/obb/train.py,sha256=bnYFAMur7Uvbw5Dc09-S2ge7B05iGX-t37Ksgc0ef6g,3921
-ultralytics/models/yolo/obb/val.py,sha256=nT82lKXewUw3bgX45Ms045rzcYn2A1j8g3Dxig2c-FU,14844
+ultralytics/models/yolo/obb/val.py,sha256=9CVx9Gj0bB6p6rQtxlBNYeCRBwz6abUmLe_b2cnozO8,13806
 ultralytics/models/yolo/pose/__init__.py,sha256=63xmuHZLNzV8I76HhVXAq4f2W0KTk8Oi9eL-Y204LyQ,227
 ultralytics/models/yolo/pose/predict.py,sha256=M0C7ZfVXx4QXgv-szjnaXYEPas76ZLGAgDNNh1GG0vI,3743
 ultralytics/models/yolo/pose/train.py,sha256=GyvNnDPJ3UFq_90HN8_FJ0dbwRkw3JJTVpkMFH0vC0o,5457
-ultralytics/models/yolo/pose/val.py,sha256=abAll3lWT6IRwoHOFNsgAZyNQtTtPBXHq0Wszpu9p5E,13994
+ultralytics/models/yolo/pose/val.py,sha256=Sa4YAYpOhdt_mpNGWX2tvjwkDvt1RjiNjqdZ5p532hw,12327
 ultralytics/models/yolo/segment/__init__.py,sha256=3IThhZ1wlkY9FvmWm9cE-5-ZyE6F1FgzAtQ6jOOFzzw,275
 ultralytics/models/yolo/segment/predict.py,sha256=qlprQCZn4_bpjpI08U0MU9Q9_1gpHrw_7MXwtXE1l1Y,5377
 ultralytics/models/yolo/segment/train.py,sha256=XrPkXUiNu1Jvhn8iDew_RaLLjZA3un65rK-QH9mtNIw,3802
-ultralytics/models/yolo/segment/val.py,sha256=AnvY0O7HhD5xZ2BE2artLTAVW4SNmHbVopBJsYRcmk8,12328
+ultralytics/models/yolo/segment/val.py,sha256=yVFJpYZCjGJ8fBgp4XEDO5ivAhkcctGqfkHI8uB-RwM,11209
 ultralytics/models/yolo/world/__init__.py,sha256=nlh8I6t8hMGz_vZg8QSlsUW1R-2eKvn9CGUoPPQEGhA,131
 ultralytics/models/yolo/world/train.py,sha256=wBKnSC-TvrKWM1Taxqwo13XcwGHwwAXzNYV1tmqcOpc,7845
 ultralytics/models/yolo/world/train_world.py,sha256=lk9z_INGPSTP_W7Rjh3qrWSmjHaxOJtGngonh1cj2SM,9551
@@ -217,12 +217,12 @@ ultralytics/solutions/object_counter.py,sha256=zD-EYIxu_y7qCFEkv6aqV60oMCZ4q6b_k
 ultralytics/solutions/object_cropper.py,sha256=x3gN-ihtwkJntp6EMcVWnIvVTOu1iRkP5RrX-1kwJHg,3522
 ultralytics/solutions/parking_management.py,sha256=IfPUn15aelxz6YZNo9WYkVEl5IOVSw8VD0OrpKtExPE,13613
 ultralytics/solutions/queue_management.py,sha256=gTkILx4dVcsKRZXSCXtelkEjCRiDS5iznb3FnddC61c,4390
-ultralytics/solutions/region_counter.py,sha256=nmtCoq1sFIU2Hx4gKImYNF7Yf5YpADHwujxxQGDvf1s,5916
+ultralytics/solutions/region_counter.py,sha256=Ncd6_qIXmSQXUxCwQkgYc2-nI7KifQYhxPi3pOelZak,5950
 ultralytics/solutions/security_alarm.py,sha256=czEaMcy04q-iBkKqT_14d8H20CFB6zcKH_31nBGQnyw,6345
-ultralytics/solutions/similarity_search.py,sha256=H9MPf8F5AvVfmb9hnng0FrIOTbLU_I-CkVHGpC81CE0,9496
-ultralytics/solutions/solutions.py,sha256=KtoSUSxM4s-Ti5EAzT21pItuv70qlIOH6ymJP95Gl-E,37318
+ultralytics/solutions/similarity_search.py,sha256=c18TK0qW5AvanXU28nAX4o_WtB1SDAJStUtyLDuEBHQ,9505
+ultralytics/solutions/solutions.py,sha256=KuQ5M9oocygExRjKAIN0HjHNFYebENUSyw-i7ykDsO8,35903
 ultralytics/solutions/speed_estimation.py,sha256=chg_tBuKFw3EnFiv_obNDaUXLAo-FypxC7gsDeB_VUI,5878
-ultralytics/solutions/streamlit_inference.py,sha256=SqL-YxU3RCxCKscH2AYUTkmJknilV9jCCco6ufqsFk4,10501
+ultralytics/solutions/streamlit_inference.py,sha256=JAVOCc_eNtszUHKU-rZ-iUQtA6m6d3QqCgtPfwrlcsE,12773
 ultralytics/solutions/trackzone.py,sha256=kIS94rNfL3yVPAtSbnW8F-aLMxXowQtsfKNB-jLezz8,3941
 ultralytics/solutions/vision_eye.py,sha256=J_nsXhWkhfWz8THNJU4Yag4wbPv78ymby6SlNKeSuk4,3005
 ultralytics/solutions/templates/similarity-search.html,sha256=nyyurpWlkvYlDeNh-74TlV4ctCpTksvkVy2Yc4ImQ1U,4261
@@ -247,10 +247,10 @@ ultralytics/utils/export.py,sha256=LK-wlTlyb_zIKtSvOmfmvR70RcUU9Ct9UBDt5wn9_rY,9
 ultralytics/utils/files.py,sha256=ZCbLGleiF0f-PqYfaxMFAWop88w7U1hpreHXl8b2ko0,8238
 ultralytics/utils/instance.py,sha256=dC83rHvQXciAED3rOiScFs3BOX9OI06Ey1mj9sjUKvs,19070
 ultralytics/utils/loss.py,sha256=fbOWc3Iu0QOJiWbi-mXWA9-1otTYlehtmUsI7os7ydM,39799
-ultralytics/utils/metrics.py,sha256=AbaYgGPEFY-IVv1_Izb0dXulSs1NEZ2-TVkO1GcP8iI,62179
+ultralytics/utils/metrics.py,sha256=NX22CnIPqs7i_UAcf2D0-KQNNOoRu39OjLtjcbnWTN8,66296
 ultralytics/utils/ops.py,sha256=8d60fbpntrexK3gPoLUS6mWAYGrtrQaQCOYyRJsCjuI,34521
 ultralytics/utils/patches.py,sha256=tBAsNo_RyoFLL9OAzVuJmuoDLUJIPuTMByBYyblGG1A,6517
-ultralytics/utils/plotting.py,sha256=LO-iR-k1UewV5vt4xXDUIirdmNEZdpfiQvLyIWqINPg,47171
+ultralytics/utils/plotting.py,sha256=IEugKlTITLxArZjbSr7i_cTaHHAqNwVVk08Ak7I_ZdM,47169
 ultralytics/utils/tal.py,sha256=aXawOnhn8ni65tJWIW-PYqWr_TRvltbHBjrTo7o6lDQ,20924
 ultralytics/utils/torch_utils.py,sha256=D76Pvmw5OKh-vd4aJkOMO0dSLbM5WzGr7Hmds54hPEk,39233
 ultralytics/utils/triton.py,sha256=M7qe4RztiADBJQEWQKaIQsp94ERFJ_8_DUHDR6TXEOM,5410
@@ -266,8 +266,8 @@ ultralytics/utils/callbacks/neptune.py,sha256=j8pecmlcsM8FGzLKWoBw5xUsi5t8E5HuxY
 ultralytics/utils/callbacks/raytune.py,sha256=S6Bq16oQDQ8BQgnZzA0zJHGN_BBr8iAM_WtGoLiEcwg,1283
 ultralytics/utils/callbacks/tensorboard.py,sha256=MDPBW7aDes-66OE6YqKXXvqA_EocjzEMHWGM-8z9vUQ,5281
 ultralytics/utils/callbacks/wb.py,sha256=Tm_-aRr2CN32MJkY9tylpMBJkb007-MSRNSQ7rDJ5QU,7521
-dgenerate_ultralytics_headless-8.3.167.dist-info/METADATA,sha256=FiMbNwoSDCNIwxl57mizfGjDnLmE0lLszdXRcIZ8ktc,38672
-dgenerate_ultralytics_headless-8.3.167.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dgenerate_ultralytics_headless-8.3.167.dist-info/entry_points.txt,sha256=YM_wiKyTe9yRrsEfqvYolNO5ngwfoL4-NwgKzc8_7sI,93
-dgenerate_ultralytics_headless-8.3.167.dist-info/top_level.txt,sha256=XP49TwiMw4QGsvTLSYiJhz1xF_k7ev5mQ8jJXaXi45Q,12
-dgenerate_ultralytics_headless-8.3.167.dist-info/RECORD,,
+dgenerate_ultralytics_headless-8.3.169.dist-info/METADATA,sha256=fB3xamJwWddK7ILU-aXztVwpG2n7b8JEw4gvWyTUnls,38672
+dgenerate_ultralytics_headless-8.3.169.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dgenerate_ultralytics_headless-8.3.169.dist-info/entry_points.txt,sha256=YM_wiKyTe9yRrsEfqvYolNO5ngwfoL4-NwgKzc8_7sI,93
+dgenerate_ultralytics_headless-8.3.169.dist-info/top_level.txt,sha256=XP49TwiMw4QGsvTLSYiJhz1xF_k7ev5mQ8jJXaXi45Q,12
+dgenerate_ultralytics_headless-8.3.169.dist-info/RECORD,,

tests/test_cli.py CHANGED Viewed

@@ -39,7 +39,7 @@ def test_val(task: str, model: str, data: str) -> None:
 @pytest.mark.parametrize("task,model,data", TASK_MODEL_DATA)
 def test_predict(task: str, model: str, data: str) -> None:
     """Test YOLO prediction on provided sample assets for specified task and model."""
-    run(f"yolo {task} predict model={model} source={ASSETS} imgsz=32 save save_crop save_txt")
+    run(f"yolo {task} predict model={model} source={ASSETS} imgsz=32 save save_crop save_txt visualize")
 @pytest.mark.parametrize("model", MODELS)

tests/test_python.py CHANGED Viewed

@@ -201,11 +201,12 @@ def test_track_stream(model):
         model.track(video_url, imgsz=160, tracker=custom_yaml)
-@pytest.mark.parametrize("task,model,data", TASK_MODEL_DATA)
-def test_val(task: str, model: str, data: str) -> None:
+@pytest.mark.parametrize("task,weight,data", TASK_MODEL_DATA)
+def test_val(task: str, weight: str, data: str) -> None:
     """Test the validation mode of the YOLO model."""
+    model = YOLO(weight)
     for plots in {True, False}:  # Test both cases i.e. plots=True and plots=False
-        metrics = YOLO(model).val(data=data, imgsz=32, plots=plots)
+        metrics = model.val(data=data, imgsz=32, plots=plots)
         metrics.to_df()
         metrics.to_csv()
         metrics.to_xml()

ultralytics/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
-__version__ = "8.3.167"
+__version__ = "8.3.169"
 import os

ultralytics/cfg/default.yaml CHANGED Viewed

@@ -58,7 +58,7 @@ plots: True # (bool) save plots and images during train/val
 source: # (str, optional) source directory for images or videos
 vid_stride: 1 # (int) video frame-rate stride
 stream_buffer: False # (bool) buffer all streaming frames (True) or return the most recent frame (False)
-visualize: False # (bool) visualize model features
+visualize: False # (bool) visualize model features (predict) or visualize TP, FP, FN (val)
 augment: False # (bool) apply image augmentation to prediction sources
 agnostic_nms: False # (bool) class-agnostic NMS
 classes: # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3]

ultralytics/engine/exporter.py CHANGED Viewed

@@ -1014,7 +1014,6 @@ class Exporter:
             enable_batchmatmul_unfold=True,  # fix lower no. of detected objects on GPU delegate
             output_signaturedefs=True,  # fix error with Attention block group convolution
             disable_group_convolution=self.args.format in {"tfjs", "edgetpu"},  # fix error with group convolution
-            optimization_for_gpu_delegate=True,
         )
         YAML.save(f / "metadata.yaml", self.metadata)  # add metadata.yaml

ultralytics/engine/model.py CHANGED Viewed

@@ -907,8 +907,9 @@ class Model(torch.nn.Module):
         if hasattr(self.model, "names"):
             return check_class_names(self.model.names)
         if not self.predictor:  # export formats will not have predictor defined until predict() is called
-            self.predictor = self._smart_load("predictor")(overrides=self.overrides, _callbacks=self.callbacks)
-            self.predictor.setup_model(model=self.model, verbose=False)
+            predictor = self._smart_load("predictor")(overrides=self.overrides, _callbacks=self.callbacks)
+            predictor.setup_model(model=self.model, verbose=False)  # do not mess with self.predictor.model args
+            return predictor.model.names
         return self.predictor.model.names
     @property

ultralytics/models/rtdetr/predict.py CHANGED Viewed

@@ -67,6 +67,7 @@ class RTDETRPredictor(BasePredictor):
             if self.args.classes is not None:
                 idx = (cls == torch.tensor(self.args.classes, device=cls.device)).any(1) & idx
             pred = torch.cat([bbox, max_score, cls], dim=-1)[idx]  # filter
+            pred = pred[pred[:, 4].argsort(descending=True)][: self.args.max_det]
             oh, ow = orig_img.shape[:2]
             pred[..., [0, 2]] *= ow  # scale x coordinates to original width
             pred[..., [1, 3]] *= oh  # scale y coordinates to original height

ultralytics/models/rtdetr/val.py CHANGED Viewed

@@ -1,5 +1,6 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+from pathlib import Path
 from typing import Any, Dict, List, Tuple, Union
 import torch
@@ -186,45 +187,28 @@ class RTDETRValidator(DetectionValidator):
         return [{"bboxes": x[:, :4], "conf": x[:, 4], "cls": x[:, 5]} for x in outputs]
-    def _prepare_batch(self, si: int, batch: Dict[str, Any]) -> Dict[str, Any]:
+    def pred_to_json(self, predn: Dict[str, torch.Tensor], pbatch: Dict[str, Any]) -> None:
         """
-        Prepare a batch for validation by applying necessary transformations.
+        Serialize YOLO predictions to COCO json format.
         Args:
-            si (int): Batch index.
-            batch (Dict[str, Any]): Batch data containing images and annotations.
-        Returns:
-            (Dict[str, Any]): Prepared batch with transformed annotations containing cls, bboxes,
-                ori_shape, imgsz, and ratio_pad.
-        """
-        idx = batch["batch_idx"] == si
-        cls = batch["cls"][idx].squeeze(-1)
-        bbox = batch["bboxes"][idx]
-        ori_shape = batch["ori_shape"][si]
-        imgsz = batch["img"].shape[2:]
-        ratio_pad = batch["ratio_pad"][si]
-        if len(cls):
-            bbox = ops.xywh2xyxy(bbox)  # target boxes
-            bbox[..., [0, 2]] *= ori_shape[1]  # native-space pred
-            bbox[..., [1, 3]] *= ori_shape[0]  # native-space pred
-        return {"cls": cls, "bboxes": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}
-    def _prepare_pred(self, pred: Dict[str, torch.Tensor], pbatch: Dict[str, Any]) -> Dict[str, torch.Tensor]:
+            predn (Dict[str, torch.Tensor]): Predictions dictionary containing 'bboxes', 'conf', and 'cls' keys
+                with bounding box coordinates, confidence scores, and class predictions.
+            pbatch (Dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
         """
-        Prepare predictions by scaling bounding boxes to original image dimensions.
-        Args:
-            pred (Dict[str, torch.Tensor]): Raw predictions containing 'cls', 'bboxes', and 'conf'.
-            pbatch (Dict[str, torch.Tensor]): Prepared batch information containing 'ori_shape' and other metadata.
-        Returns:
-            (Dict[str, torch.Tensor]): Predictions scaled to original image dimensions.
-        """
-        cls = pred["cls"]
-        if self.args.single_cls:
-            cls *= 0
-        bboxes = pred["bboxes"].clone()
-        bboxes[..., [0, 2]] *= pbatch["ori_shape"][1] / self.args.imgsz  # native-space pred
-        bboxes[..., [1, 3]] *= pbatch["ori_shape"][0] / self.args.imgsz  # native-space pred
-        return {"bboxes": bboxes, "conf": pred["conf"], "cls": cls}
+        stem = Path(pbatch["im_file"]).stem
+        image_id = int(stem) if stem.isnumeric() else stem
+        box = predn["bboxes"].clone()
+        box[..., [0, 2]] *= pbatch["ori_shape"][1] / self.args.imgsz  # native-space pred
+        box[..., [1, 3]] *= pbatch["ori_shape"][0] / self.args.imgsz  # native-space pred
+        box = ops.xyxy2xywh(box)  # xywh
+        box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
+        for b, s, c in zip(box.tolist(), predn["conf"].tolist(), predn["cls"].tolist()):
+            self.jdict.append(
+                {
+                    "image_id": image_id,
+                    "category_id": self.class_map[int(c)],
+                    "bbox": [round(x, 3) for x in b],
+                    "score": round(s, 5),
+                }
+            )

ultralytics/models/yolo/classify/val.py CHANGED Viewed

@@ -83,7 +83,7 @@ class ClassificationValidator(BaseValidator):
         self.nc = len(model.names)
         self.pred = []
         self.targets = []
-        self.confusion_matrix = ConfusionMatrix(names=list(model.names.values()))
+        self.confusion_matrix = ConfusionMatrix(names=model.names)
     def preprocess(self, batch: Dict[str, Any]) -> Dict[str, Any]:
         """Preprocess input batch by moving data to device and converting to appropriate dtype."""

ultralytics/models/yolo/detect/val.py CHANGED Viewed

@@ -97,8 +97,8 @@ class DetectionValidator(BaseValidator):
         self.end2end = getattr(model, "end2end", False)
         self.seen = 0
         self.jdict = []
-        self.metrics.names = self.names
-        self.confusion_matrix = ConfusionMatrix(names=list(model.names.values()))
+        self.metrics.names = model.names
+        self.confusion_matrix = ConfusionMatrix(names=model.names, save_matches=self.args.plots and self.args.visualize)
     def get_desc(self) -> str:
         """Return a formatted string summarizing class metrics of YOLO model."""
@@ -147,28 +147,28 @@ class DetectionValidator(BaseValidator):
         ratio_pad = batch["ratio_pad"][si]
         if len(cls):
             bbox = ops.xywh2xyxy(bbox) * torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]]  # target boxes
-            ops.scale_boxes(imgsz, bbox, ori_shape, ratio_pad=ratio_pad)  # native-space labels
-        return {"cls": cls, "bboxes": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}
+        return {
+            "cls": cls,
+            "bboxes": bbox,
+            "ori_shape": ori_shape,
+            "imgsz": imgsz,
+            "ratio_pad": ratio_pad,
+            "im_file": batch["im_file"][si],
+        }
-    def _prepare_pred(self, pred: Dict[str, torch.Tensor], pbatch: Dict[str, Any]) -> Dict[str, torch.Tensor]:
+    def _prepare_pred(self, pred: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
         """
         Prepare predictions for evaluation against ground truth.
         Args:
             pred (Dict[str, torch.Tensor]): Post-processed predictions from the model.
-            pbatch (Dict[str, Any]): Prepared batch information.
         Returns:
             (Dict[str, torch.Tensor]): Prepared predictions in native space.
         """
-        cls = pred["cls"]
         if self.args.single_cls:
-            cls *= 0
-        # predn = pred.clone()
-        bboxes = ops.scale_boxes(
-            pbatch["imgsz"], pred["bboxes"].clone(), pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"]
-        )  # native-space pred
-        return {"bboxes": bboxes, "conf": pred["conf"], "cls": cls}
+            pred["cls"] *= 0
+        return pred
     def update_metrics(self, preds: List[Dict[str, torch.Tensor]], batch: Dict[str, Any]) -> None:
         """
@@ -181,7 +181,7 @@ class DetectionValidator(BaseValidator):
         for si, pred in enumerate(preds):
             self.seen += 1
             pbatch = self._prepare_batch(si, batch)
-            predn = self._prepare_pred(pred, pbatch)
+            predn = self._prepare_pred(pred)
             cls = pbatch["cls"].cpu().numpy()
             no_pred = len(predn["cls"]) == 0
@@ -197,19 +197,21 @@ class DetectionValidator(BaseValidator):
             # Evaluate
             if self.args.plots:
                 self.confusion_matrix.process_batch(predn, pbatch, conf=self.args.conf)
+                if self.args.visualize:
+                    self.confusion_matrix.plot_matches(batch["img"][si], pbatch["im_file"], self.save_dir)
             if no_pred:
                 continue
             # Save
             if self.args.save_json:
-                self.pred_to_json(predn, batch["im_file"][si])
+                self.pred_to_json(predn, pbatch)
             if self.args.save_txt:
                 self.save_one_txt(
                     predn,
                     self.args.save_conf,
                     pbatch["ori_shape"],
-                    self.save_dir / "labels" / f"{Path(batch['im_file'][si]).stem}.txt",
+                    self.save_dir / "labels" / f"{Path(pbatch['im_file']).stem}.txt",
                 )
     def finalize_metrics(self) -> None:
@@ -360,18 +362,24 @@ class DetectionValidator(BaseValidator):
             boxes=torch.cat([predn["bboxes"], predn["conf"].unsqueeze(-1), predn["cls"].unsqueeze(-1)], dim=1),
         ).save_txt(file, save_conf=save_conf)
-    def pred_to_json(self, predn: Dict[str, torch.Tensor], filename: str) -> None:
+    def pred_to_json(self, predn: Dict[str, torch.Tensor], pbatch: Dict[str, Any]) -> None:
         """
         Serialize YOLO predictions to COCO json format.
         Args:
             predn (Dict[str, torch.Tensor]): Predictions dictionary containing 'bboxes', 'conf', and 'cls' keys
                 with bounding box coordinates, confidence scores, and class predictions.
-            filename (str): Image filename.
+            pbatch (Dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
         """
-        stem = Path(filename).stem
+        stem = Path(pbatch["im_file"]).stem
         image_id = int(stem) if stem.isnumeric() else stem
-        box = ops.xyxy2xywh(predn["bboxes"])  # xywh
+        box = ops.scale_boxes(
+            pbatch["imgsz"],
+            predn["bboxes"].clone(),
+            pbatch["ori_shape"],
+            ratio_pad=pbatch["ratio_pad"],
+        )
+        box = ops.xyxy2xywh(box)  # xywh
         box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
         for b, s, c in zip(box.tolist(), predn["conf"].tolist(), predn["cls"].tolist()):
             self.jdict.append(

ultralytics/models/yolo/obb/val.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 from pathlib import Path
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any, Dict, List, Tuple
 import numpy as np
 import torch
@@ -67,6 +67,7 @@ class OBBValidator(DetectionValidator):
         super().init_metrics(model)
         val = self.data.get(self.args.split, "")  # validation path
         self.is_dota = isinstance(val, str) and "DOTA" in val  # check if dataset is DOTA format
+        self.confusion_matrix.task = "obb"  # set confusion matrix task to 'obb'
     def _process_batch(self, preds: Dict[str, torch.Tensor], batch: Dict[str, torch.Tensor]) -> Dict[str, np.ndarray]:
         """
@@ -132,33 +133,14 @@ class OBBValidator(DetectionValidator):
         ratio_pad = batch["ratio_pad"][si]
         if len(cls):
             bbox[..., :4].mul_(torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]])  # target boxes
-            ops.scale_boxes(imgsz, bbox, ori_shape, ratio_pad=ratio_pad, xywh=True)  # native-space labels
-        return {"cls": cls, "bboxes": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}
-    def _prepare_pred(self, pred: Dict[str, torch.Tensor], pbatch: Dict[str, Any]) -> Dict[str, torch.Tensor]:
-        """
-        Prepare predictions by scaling bounding boxes to original image dimensions.
-        This method takes prediction tensors containing bounding box coordinates and scales them from the model's
-        input dimensions to the original image dimensions using the provided batch information.
-        Args:
-            pred (Dict[str, torch.Tensor]): Prediction dictionary containing bounding box coordinates and other information.
-            pbatch (Dict[str, Any]): Dictionary containing batch information with keys:
-                - imgsz (tuple): Model input image size.
-                - ori_shape (tuple): Original image shape.
-                - ratio_pad (tuple): Ratio and padding information for scaling.
-        Returns:
-            (Dict[str, torch.Tensor]): Scaled prediction dictionary with bounding boxes in original image dimensions.
-        """
-        cls = pred["cls"]
-        if self.args.single_cls:
-            cls *= 0
-        bboxes = ops.scale_boxes(
-            pbatch["imgsz"], pred["bboxes"].clone(), pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"], xywh=True
-        )  # native-space pred
-        return {"bboxes": bboxes, "conf": pred["conf"], "cls": cls}
+        return {
+            "cls": cls,
+            "bboxes": bbox,
+            "ori_shape": ori_shape,
+            "imgsz": imgsz,
+            "ratio_pad": ratio_pad,
+            "im_file": batch["im_file"][si],
+        }
     def plot_predictions(self, batch: Dict[str, Any], preds: List[torch.Tensor], ni: int) -> None:
         """
@@ -180,23 +162,26 @@ class OBBValidator(DetectionValidator):
             p["bboxes"][:, :4] = ops.xywh2xyxy(p["bboxes"][:, :4])  # convert to xyxy format for plotting
         super().plot_predictions(batch, preds, ni)  # plot bboxes
-    def pred_to_json(self, predn: Dict[str, torch.Tensor], filename: Union[str, Path]) -> None:
+    def pred_to_json(self, predn: Dict[str, torch.Tensor], pbatch: Dict[str, Any]) -> None:
         """
         Convert YOLO predictions to COCO JSON format with rotated bounding box information.
         Args:
             predn (Dict[str, torch.Tensor]): Prediction dictionary containing 'bboxes', 'conf', and 'cls' keys
                 with bounding box coordinates, confidence scores, and class predictions.
-            filename (str | Path): Path to the image file for which predictions are being processed.
+            pbatch (Dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
         Notes:
             This method processes rotated bounding box predictions and converts them to both rbox format
             (x, y, w, h, angle) and polygon format (x1, y1, x2, y2, x3, y3, x4, y4) before adding them
             to the JSON dictionary.
         """
-        stem = Path(filename).stem
+        stem = Path(pbatch["im_file"]).stem
         image_id = int(stem) if stem.isnumeric() else stem
         rbox = predn["bboxes"]
+        rbox = ops.scale_boxes(
+            pbatch["imgsz"], predn["bboxes"].clone(), pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"], xywh=True
+        )  # native-space pred
         poly = ops.xywhr2xyxyxyxy(rbox).view(-1, 8)
         for r, b, s, c in zip(rbox.tolist(), poly.tolist(), predn["conf"].tolist(), predn["cls"].tolist()):
             self.jdict.append(

ultralytics/models/yolo/pose/val.py CHANGED Viewed

@@ -167,34 +167,9 @@ class PoseValidator(DetectionValidator):
         kpts = kpts.clone()
         kpts[..., 0] *= w
         kpts[..., 1] *= h
-        kpts = ops.scale_coords(pbatch["imgsz"], kpts, pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"])
         pbatch["keypoints"] = kpts
         return pbatch
-    def _prepare_pred(self, pred: Dict[str, Any], pbatch: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        Prepare and scale keypoints in predictions for pose processing.
-        This method extends the parent class's _prepare_pred method to handle keypoint scaling. It first calls
-        the parent method to get the basic prediction boxes, then extracts and scales the keypoint coordinates
-        to match the original image dimensions.
-        Args:
-            pred (Dict[str, torch.Tensor]): Post-processed predictions from the model.
-            pbatch (Dict[str, Any]): Processed batch dictionary containing image information including:
-                - imgsz: Image size used for inference
-                - ori_shape: Original image shape
-                - ratio_pad: Ratio and padding information for coordinate scaling
-        Returns:
-            (Dict[str, Any]): Processed prediction dictionary with keypoints scaled to original image dimensions.
-        """
-        predn = super()._prepare_pred(pred, pbatch)
-        predn["keypoints"] = ops.scale_coords(
-            pbatch["imgsz"], pred.get("keypoints").clone(), pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"]
-        )
-        return predn
     def _process_batch(self, preds: Dict[str, torch.Tensor], batch: Dict[str, Any]) -> Dict[str, np.ndarray]:
         """
         Return correct prediction matrix by computing Intersection over Union (IoU) between detections and ground truth.
@@ -249,7 +224,7 @@ class PoseValidator(DetectionValidator):
             keypoints=predn["keypoints"],
         ).save_txt(file, save_conf=save_conf)
-    def pred_to_json(self, predn: Dict[str, torch.Tensor], filename: str) -> None:
+    def pred_to_json(self, predn: Dict[str, torch.Tensor], pbatch: Dict[str, Any]) -> None:
         """
         Convert YOLO predictions to COCO JSON format.
@@ -259,32 +234,22 @@ class PoseValidator(DetectionValidator):
         Args:
             predn (Dict[str, torch.Tensor]): Prediction dictionary containing 'bboxes', 'conf', 'cls',
                 and 'keypoints' tensors.
-            filename (str): Path to the image file for which predictions are being processed.
+            pbatch (Dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
         Notes:
             The method extracts the image ID from the filename stem (either as an integer if numeric, or as a string),
             converts bounding boxes from xyxy to xywh format, and adjusts coordinates from center to top-left corner
             before saving to the JSON dictionary.
         """
-        stem = Path(filename).stem
-        image_id = int(stem) if stem.isnumeric() else stem
-        box = ops.xyxy2xywh(predn["bboxes"])  # xywh
-        box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
-        for b, s, c, k in zip(
-            box.tolist(),
-            predn["conf"].tolist(),
-            predn["cls"].tolist(),
-            predn["keypoints"].flatten(1, 2).tolist(),
-        ):
-            self.jdict.append(
-                {
-                    "image_id": image_id,
-                    "category_id": self.class_map[int(c)],
-                    "bbox": [round(x, 3) for x in b],
-                    "keypoints": k,
-                    "score": round(s, 5),
-                }
-            )
+        super().pred_to_json(predn, pbatch)
+        kpts = ops.scale_coords(
+            pbatch["imgsz"],
+            predn["keypoints"].clone(),
+            pbatch["ori_shape"],
+            ratio_pad=pbatch["ratio_pad"],
+        )
+        for i, k in enumerate(kpts.flatten(1, 2).tolist()):
+            self.jdict[-len(kpts) + i]["keypoints"] = k  # keypoints
     def eval_json(self, stats: Dict[str, Any]) -> Dict[str, Any]:
         """Evaluate object detection model using COCO JSON format."""

ultralytics/models/yolo/segment/val.py CHANGED Viewed

@@ -135,29 +135,6 @@ class SegmentationValidator(DetectionValidator):
         prepared_batch["masks"] = batch["masks"][midx]
         return prepared_batch
-    def _prepare_pred(self, pred: Dict[str, torch.Tensor], pbatch: Dict[str, Any]) -> Dict[str, torch.Tensor]:
-        """
-        Prepare predictions for evaluation by processing bounding boxes and masks.
-        Args:
-            pred (Dict[str, torch.Tensor]): Post-processed predictions from the model.
-            pbatch (Dict[str, Any]): Prepared batch information.
-        Returns:
-            Dict[str, torch.Tensor]: Processed bounding box predictions.
-        """
-        predn = super()._prepare_pred(pred, pbatch)
-        predn["masks"] = pred["masks"]
-        if self.args.save_json and len(predn["masks"]):
-            coco_masks = torch.as_tensor(pred["masks"], dtype=torch.uint8)
-            coco_masks = ops.scale_image(
-                coco_masks.permute(1, 2, 0).contiguous().cpu().numpy(),
-                pbatch["ori_shape"],
-                ratio_pad=pbatch["ratio_pad"],
-            )
-            predn["coco_masks"] = coco_masks
-        return predn
     def _process_batch(self, preds: Dict[str, torch.Tensor], batch: Dict[str, Any]) -> Dict[str, np.ndarray]:
         """
         Compute correct prediction matrix for a batch based on bounding boxes and optional masks.
@@ -233,13 +210,13 @@ class SegmentationValidator(DetectionValidator):
             masks=torch.as_tensor(predn["masks"], dtype=torch.uint8),
         ).save_txt(file, save_conf=save_conf)
-    def pred_to_json(self, predn: torch.Tensor, filename: str) -> None:
+    def pred_to_json(self, predn: Dict[str, torch.Tensor], pbatch: Dict[str, Any]) -> None:
         """
         Save one JSON result for COCO evaluation.
         Args:
             predn (Dict[str, torch.Tensor]): Predictions containing bboxes, masks, confidence scores, and classes.
-            filename (str): Image filename.
+            pbatch (Dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
         Examples:
              >>> result = {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
@@ -252,23 +229,18 @@ class SegmentationValidator(DetectionValidator):
             rle["counts"] = rle["counts"].decode("utf-8")
             return rle
-        stem = Path(filename).stem
-        image_id = int(stem) if stem.isnumeric() else stem
-        box = ops.xyxy2xywh(predn["bboxes"])  # xywh
-        box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
-        pred_masks = np.transpose(predn["coco_masks"], (2, 0, 1))
+        coco_masks = torch.as_tensor(predn["masks"], dtype=torch.uint8)
+        coco_masks = ops.scale_image(
+            coco_masks.permute(1, 2, 0).contiguous().cpu().numpy(),
+            pbatch["ori_shape"],
+            ratio_pad=pbatch["ratio_pad"],
+        )
+        pred_masks = np.transpose(coco_masks, (2, 0, 1))
         with ThreadPool(NUM_THREADS) as pool:
             rles = pool.map(single_encode, pred_masks)
-        for i, (b, s, c) in enumerate(zip(box.tolist(), predn["conf"].tolist(), predn["cls"].tolist())):
-            self.jdict.append(
-                {
-                    "image_id": image_id,
-                    "category_id": self.class_map[int(c)],
-                    "bbox": [round(x, 3) for x in b],
-                    "score": round(s, 5),
-                    "segmentation": rles[i],
-                }
-            )
+        super().pred_to_json(predn, pbatch)
+        for i, r in enumerate(rles):
+            self.jdict[-len(rles) + i]["segmentation"] = r  # segmentation
     def eval_json(self, stats: Dict[str, Any]) -> Dict[str, Any]:
         """Return COCO-style instance segmentation evaluation metrics."""

ultralytics/solutions/region_counter.py CHANGED Viewed

@@ -118,12 +118,13 @@ class RegionCounter(BaseSolution):
             x1, y1, x2, y2 = map(int, region["polygon"].bounds)
             pts = [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]
             annotator.draw_region(pts, region["region_color"], self.line_width * 2)
-            annotator.text_label(
+            annotator.adaptive_label(
                 [x1, y1, x2, y2],
                 label=str(region["counts"]),
                 color=region["region_color"],
                 txt_color=region["text_color"],
                 margin=self.line_width * 4,
+                shape="rect",
             )
             region["counts"] = 0  # Reset for next frame
         plot_im = annotator.result()

ultralytics/solutions/similarity_search.py CHANGED Viewed

@@ -8,7 +8,6 @@ import numpy as np
 from PIL import Image
 from ultralytics.data.utils import IMG_FORMATS
-from ultralytics.nn.text_model import build_text_model
 from ultralytics.utils import LOGGER
 from ultralytics.utils.checks import check_requirements
 from ultralytics.utils.torch_utils import select_device
@@ -48,6 +47,8 @@ class VisualAISearch:
     def __init__(self, **kwargs: Any) -> None:
         """Initialize the VisualAISearch class with FAISS index and CLIP model."""
+        from ultralytics.nn.text_model import build_text_model
         check_requirements("faiss-cpu")
         self.faiss = __import__("faiss")

ultralytics/solutions/solutions.py CHANGED Viewed

@@ -287,8 +287,7 @@ class SolutionAnnotator(Annotator):
         display_objects_labels: Annotate bounding boxes with object class labels.
         sweep_annotator: Visualize a vertical sweep line and optional label.
         visioneye: Map and connect object centroids to a visual "eye" point.
-        circle_label: Draw a circular label within a bounding box.
-        text_label: Draw a rectangular label within a bounding box.
+        adaptive_label: Draw a circular or rectangle background shape label in center of a bounding box.
     Examples:
         >>> annotator = SolutionAnnotator(image)
@@ -695,90 +694,58 @@ class SolutionAnnotator(Annotator):
         cv2.circle(self.im, center_bbox, self.tf * 2, color, -1)
         cv2.line(self.im, center_point, center_bbox, color, self.tf)
-    def circle_label(
+    def adaptive_label(
         self,
         box: Tuple[float, float, float, float],
         label: str = "",
         color: Tuple[int, int, int] = (128, 128, 128),
         txt_color: Tuple[int, int, int] = (255, 255, 255),
-        margin: int = 2,
+        shape: str = "rect",
+        margin: int = 5,
     ):
         """
-        Draw a label with a background circle centered within a given bounding box.
+        Draw a label with a background rectangle or circle centered within a given bounding box.
         Args:
             box (Tuple[float, float, float, float]): The bounding box coordinates (x1, y1, x2, y2).
             label (str): The text label to be displayed.
-            color (Tuple[int, int, int]): The background color of the circle (B, G, R).
+            color (Tuple[int, int, int]): The background color of the rectangle (B, G, R).
             txt_color (Tuple[int, int, int]): The color of the text (R, G, B).
-            margin (int): The margin between the text and the circle border.
+            shape (str): The shape of the label i.e "circle" or "rect"
+            margin (int): The margin between the text and the rectangle border.
         """
-        if len(label) > 3:
+        if shape == "circle" and len(label) > 3:
             LOGGER.warning(f"Length of label is {len(label)}, only first 3 letters will be used for circle annotation.")
             label = label[:3]
-        # Calculate the center of the box
-        x_center, y_center = int((box[0] + box[2]) / 2), int((box[1] + box[3]) / 2)
-        # Get the text size
-        text_size = cv2.getTextSize(str(label), cv2.FONT_HERSHEY_SIMPLEX, self.sf - 0.15, self.tf)[0]
-        # Calculate the required radius to fit the text with the margin
-        required_radius = int(((text_size[0] ** 2 + text_size[1] ** 2) ** 0.5) / 2) + margin
-        # Draw the circle with the required radius
-        cv2.circle(self.im, (x_center, y_center), required_radius, color, -1)
-        # Calculate the position for the text
-        text_x = x_center - text_size[0] // 2
-        text_y = y_center + text_size[1] // 2
-        # Draw the text
-        cv2.putText(
-            self.im,
-            str(label),
-            (text_x, text_y),
-            cv2.FONT_HERSHEY_SIMPLEX,
-            self.sf - 0.15,
-            self.get_txt_color(color, txt_color),
-            self.tf,
-            lineType=cv2.LINE_AA,
-        )
+        x_center, y_center = int((box[0] + box[2]) / 2), int((box[1] + box[3]) / 2)  # Calculate center of the bbox
+        text_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, self.sf - 0.15, self.tf)[0]  # Get size of the text
+        text_x, text_y = x_center - text_size[0] // 2, y_center + text_size[1] // 2  # Calculate top-left corner of text
-    def text_label(
-        self,
-        box: Tuple[float, float, float, float],
-        label: str = "",
-        color: Tuple[int, int, int] = (128, 128, 128),
-        txt_color: Tuple[int, int, int] = (255, 255, 255),
-        margin: int = 5,
-    ):
-        """
-        Draw a label with a background rectangle centered within a given bounding box.
+        if shape == "circle":
+            cv2.circle(
+                self.im,
+                (x_center, y_center),
+                int(((text_size[0] ** 2 + text_size[1] ** 2) ** 0.5) / 2) + margin,  # Calculate the radius
+                color,
+                -1,
+            )
+        else:
+            cv2.rectangle(
+                self.im,
+                (text_x - margin, text_y - text_size[1] - margin),  # Calculate coordinates of the rectangle
+                (text_x + text_size[0] + margin, text_y + margin),  # Calculate coordinates of the rectangle
+                color,
+                -1,
+            )
-        Args:
-            box (Tuple[float, float, float, float]): The bounding box coordinates (x1, y1, x2, y2).
-            label (str): The text label to be displayed.
-            color (Tuple[int, int, int]): The background color of the rectangle (B, G, R).
-            txt_color (Tuple[int, int, int]): The color of the text (R, G, B).
-            margin (int): The margin between the text and the rectangle border.
-        """
-        # Calculate the center of the bounding box
-        x_center, y_center = int((box[0] + box[2]) / 2), int((box[1] + box[3]) / 2)
-        # Get the size of the text
-        text_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, self.sf - 0.1, self.tf)[0]
-        # Calculate the top-left corner of the text (to center it)
-        text_x = x_center - text_size[0] // 2
-        text_y = y_center + text_size[1] // 2
-        # Calculate the coordinates of the background rectangle
-        rect_x1 = text_x - margin
-        rect_y1 = text_y - text_size[1] - margin
-        rect_x2 = text_x + text_size[0] + margin
-        rect_y2 = text_y + margin
-        # Draw the background rectangle
-        cv2.rectangle(self.im, (rect_x1, rect_y1), (rect_x2, rect_y2), color, -1)
         # Draw the text on top of the rectangle
         cv2.putText(
             self.im,
             label,
-            (text_x, text_y),
+            (text_x, text_y),  # Calculate top-left corner of the text
             cv2.FONT_HERSHEY_SIMPLEX,
-            self.sf - 0.1,
+            self.sf - 0.15,
             self.get_txt_color(color, txt_color),
             self.tf,
             lineType=cv2.LINE_AA,

ultralytics/solutions/streamlit_inference.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 import io
+import os
 from typing import Any, List
 import cv2
@@ -64,6 +65,7 @@ class Inference:
         self.st = st  # Reference to the Streamlit module
         self.source = None  # Video source selection (webcam or video file)
+        self.img_file_names = []  # List of image file names
         self.enable_trk = False  # Flag to toggle object tracking
         self.conf = 0.25  # Confidence threshold for detection
         self.iou = 0.45  # Intersection-over-Union (IoU) threshold for non-maximum suppression
@@ -85,13 +87,13 @@ class Inference:
         menu_style_cfg = """<style>MainMenu {visibility: hidden;}</style>"""  # Hide main menu style
         # Main title of streamlit application
-        main_title_cfg = """<div><h1 style="color:#FF64DA; text-align:center; font-size:40px; margin-top:-50px;
+        main_title_cfg = """<div><h1 style="color:#111F68; text-align:center; font-size:40px; margin-top:-50px;
         font-family: 'Archivo', sans-serif; margin-bottom:20px;">Ultralytics YOLO Streamlit Application</h1></div>"""
         # Subtitle of streamlit application
-        sub_title_cfg = """<div><h4 style="color:#042AFF; text-align:center; font-family: 'Archivo', sans-serif;
-        margin-top:-15px; margin-bottom:50px;">Experience real-time object detection on your webcam with the power
-        of Ultralytics YOLO! 🚀</h4></div>"""
+        sub_title_cfg = """<div><h5 style="color:#042AFF; text-align:center; font-family: 'Archivo', sans-serif;
+        margin-top:-15px; margin-bottom:50px;">Experience real-time object detection on your webcam, videos, and images
+        with the power of Ultralytics YOLO! 🚀</h5></div>"""
         # Set html page configuration and append custom HTML
         self.st.set_page_config(page_title="Ultralytics Streamlit App", layout="wide")
@@ -107,24 +109,28 @@ class Inference:
         self.st.sidebar.title("User Configuration")  # Add elements to vertical setting menu
         self.source = self.st.sidebar.selectbox(
-            "Video",
-            ("webcam", "video"),
+            "Source",
+            ("webcam", "video", "image"),
         )  # Add source selection dropdown
-        self.enable_trk = self.st.sidebar.radio("Enable Tracking", ("Yes", "No")) == "Yes"  # Enable object tracking
+        if self.source in ["webcam", "video"]:
+            self.enable_trk = self.st.sidebar.radio("Enable Tracking", ("Yes", "No")) == "Yes"  # Enable object tracking
         self.conf = float(
             self.st.sidebar.slider("Confidence Threshold", 0.0, 1.0, self.conf, 0.01)
         )  # Slider for confidence
         self.iou = float(self.st.sidebar.slider("IoU Threshold", 0.0, 1.0, self.iou, 0.01))  # Slider for NMS threshold
-        col1, col2 = self.st.columns(2)  # Create two columns for displaying frames
-        self.org_frame = col1.empty()  # Container for original frame
-        self.ann_frame = col2.empty()  # Container for annotated frame
+        if self.source != "image":  # Only create columns for video/webcam
+            col1, col2 = self.st.columns(2)  # Create two columns for displaying frames
+            self.org_frame = col1.empty()  # Container for original frame
+            self.ann_frame = col2.empty()  # Container for annotated frame
     def source_upload(self) -> None:
         """Handle video file uploads through the Streamlit interface."""
+        from ultralytics.data.utils import IMG_FORMATS, VID_FORMATS  # scope import
         self.vid_file_name = ""
         if self.source == "video":
-            vid_file = self.st.sidebar.file_uploader("Upload Video File", type=["mp4", "mov", "avi", "mkv"])
+            vid_file = self.st.sidebar.file_uploader("Upload Video File", type=VID_FORMATS)
             if vid_file is not None:
                 g = io.BytesIO(vid_file.read())  # BytesIO Object
                 with open("ultralytics.mp4", "wb") as out:  # Open temporary file as bytes
@@ -132,6 +138,15 @@ class Inference:
                 self.vid_file_name = "ultralytics.mp4"
         elif self.source == "webcam":
             self.vid_file_name = 0  # Use webcam index 0
+        elif self.source == "image":
+            import tempfile  # scope import
+            imgfiles = self.st.sidebar.file_uploader("Upload Image Files", type=IMG_FORMATS, accept_multiple_files=True)
+            if imgfiles:
+                for imgfile in imgfiles:  # Save each uploaded image to a temporary file
+                    with tempfile.NamedTemporaryFile(delete=False, suffix=f".{imgfile.name.split('.')[-1]}") as tf:
+                        tf.write(imgfile.read())
+                        self.img_file_names.append({"path": tf.name, "name": imgfile.name})
     def configure(self) -> None:
         """Configure the model and load selected classes for inference."""
@@ -161,6 +176,27 @@ class Inference:
         if not isinstance(self.selected_ind, list):  # Ensure selected_options is a list
             self.selected_ind = list(self.selected_ind)
+    def image_inference(self) -> None:
+        """Perform inference on uploaded images."""
+        for idx, img_info in enumerate(self.img_file_names):
+            img_path = img_info["path"]
+            image = cv2.imread(img_path)  # Load and display the original image
+            if image is not None:
+                self.st.markdown(f"#### Processed: {img_info['name']}")
+                col1, col2 = self.st.columns(2)
+                with col1:
+                    self.st.image(image, channels="BGR", caption="Original Image")
+                results = self.model(image, conf=self.conf, iou=self.iou, classes=self.selected_ind)
+                annotated_image = results[0].plot()
+                with col2:
+                    self.st.image(annotated_image, channels="BGR", caption="Predicted Image")
+                try:  # Clean up temporary file
+                    os.unlink(img_path)
+                except FileNotFoundError:
+                    pass  # File doesn't exist, ignore
+            else:
+                self.st.error("Could not load the uploaded image.")
     def inference(self) -> None:
         """Perform real-time object detection inference on video or webcam feed."""
         self.web_ui()  # Initialize the web interface
@@ -169,7 +205,14 @@ class Inference:
         self.configure()  # Configure the app
         if self.st.sidebar.button("Start"):
-            stop_button = self.st.button("Stop")  # Button to stop the inference
+            if self.source == "image":
+                if self.img_file_names:
+                    self.image_inference()
+                else:
+                    self.st.info("Please upload an image file to perform inference.")
+                return
+            stop_button = self.st.sidebar.button("Stop")  # Button to stop the inference
             cap = cv2.VideoCapture(self.vid_file_name)  # Capture the video
             if not cap.isOpened():
                 self.st.error("Could not open webcam or video source.")
@@ -195,8 +238,8 @@ class Inference:
                     cap.release()  # Release the capture
                     self.st.stop()  # Stop streamlit app
-                self.org_frame.image(frame, channels="BGR")  # Display original frame
-                self.ann_frame.image(annotated_frame, channels="BGR")  # Display processed frame
+                self.org_frame.image(frame, channels="BGR", caption="Original Frame")  # Display original frame
+                self.ann_frame.image(annotated_frame, channels="BGR", caption="Predicted Frame")  # Display processed
             cap.release()  # Release the capture
         cv2.destroyAllWindows()  # Destroy all OpenCV windows

ultralytics/utils/metrics.py CHANGED Viewed

@@ -3,6 +3,7 @@
 import math
 import warnings
+from collections import defaultdict
 from pathlib import Path
 from typing import Any, Dict, List, Tuple, Union
@@ -318,35 +319,68 @@ class ConfusionMatrix(DataExportMixin):
         matrix (np.ndarray): The confusion matrix, with dimensions depending on the task.
         nc (int): The number of category.
         names (List[str]): The names of the classes, used as labels on the plot.
+        matches (dict): Contains the indices of ground truths and predictions categorized into TP, FP and FN.
     """
-    def __init__(self, names: List[str] = [], task: str = "detect"):
+    def __init__(self, names: Dict[int, str] = [], task: str = "detect", save_matches: bool = False):
         """
         Initialize a ConfusionMatrix instance.
         Args:
-            names (List[str], optional): Names of classes, used as labels on the plot.
+            names (Dict[int, str], optional): Names of classes, used as labels on the plot.
             task (str, optional): Type of task, either 'detect' or 'classify'.
+            save_matches (bool, optional): Save the indices of GTs, TPs, FPs, FNs for visualization.
         """
         self.task = task
         self.nc = len(names)  # number of classes
         self.matrix = np.zeros((self.nc, self.nc)) if self.task == "classify" else np.zeros((self.nc + 1, self.nc + 1))
         self.names = names  # name of classes
+        self.matches = {} if save_matches else None
-    def process_cls_preds(self, preds, targets):
+    def _append_matches(self, mtype: str, batch: Dict[str, Any], idx: int) -> None:
+        """
+        Append the matches to TP, FP, FN or GT list for the last batch.
+        This method updates the matches dictionary by appending specific batch data
+        to the appropriate match type (True Positive, False Positive, or False Negative).
+        Args:
+            mtype (str): Match type identifier ('TP', 'FP', 'FN' or 'GT').
+            batch (Dict[str, Any]): Batch data containing detection results with keys
+                like 'bboxes', 'cls', 'conf', 'keypoints', 'masks'.
+            idx (int): Index of the specific detection to append from the batch.
+        Note:
+            For masks, handles both overlap and non-overlap cases. When masks.max() > 1.0,
+            it indicates overlap_mask=True with shape (1, H, W), otherwise uses direct indexing.
+        """
+        if self.matches is None:
+            return
+        for k, v in batch.items():
+            if k in {"bboxes", "cls", "conf", "keypoints"}:
+                self.matches[mtype][k] += v[[idx]]
+            elif k == "masks":
+                # NOTE: masks.max() > 1.0 means overlap_mask=True with (1, H, W) shape
+                self.matches[mtype][k] += [v[0] == idx + 1] if v.max() > 1.0 else [v[idx]]
+    def process_cls_preds(self, preds: List[torch.Tensor], targets: List[torch.Tensor]) -> None:
         """
         Update confusion matrix for classification task.
         Args:
-            preds (Array[N, min(nc,5)]): Predicted class labels.
-            targets (Array[N, 1]): Ground truth class labels.
+            preds (List[N, min(nc,5)]): Predicted class labels.
+            targets (List[N, 1]): Ground truth class labels.
         """
         preds, targets = torch.cat(preds)[:, 0], torch.cat(targets)
         for p, t in zip(preds.cpu().numpy(), targets.cpu().numpy()):
             self.matrix[p][t] += 1
     def process_batch(
-        self, detections: Dict[str, torch.Tensor], batch: Dict[str, Any], conf: float = 0.25, iou_thres: float = 0.45
+        self,
+        detections: Dict[str, torch.Tensor],
+        batch: Dict[str, Any],
+        conf: float = 0.25,
+        iou_thres: float = 0.45,
     ) -> None:
         """
         Update confusion matrix for object detection task.
@@ -361,23 +395,29 @@ class ConfusionMatrix(DataExportMixin):
             iou_thres (float, optional): IoU threshold for matching detections to ground truth.
         """
         gt_cls, gt_bboxes = batch["cls"], batch["bboxes"]
+        if self.matches is not None:  # only if visualization is enabled
+            self.matches = {k: defaultdict(list) for k in {"TP", "FP", "FN", "GT"}}
+            for i in range(len(gt_cls)):
+                self._append_matches("GT", batch, i)  # store GT
         is_obb = gt_bboxes.shape[1] == 5  # check if boxes contains angle for OBB
         conf = 0.25 if conf in {None, 0.01 if is_obb else 0.001} else conf  # apply 0.25 if default val conf is passed
         no_pred = len(detections["cls"]) == 0
         if gt_cls.shape[0] == 0:  # Check if labels is empty
             if not no_pred:
-                detections = {k: detections[k][detections["conf"] > conf] for k in {"cls", "bboxes"}}
+                detections = {k: detections[k][detections["conf"] > conf] for k in detections.keys()}
                 detection_classes = detections["cls"].int().tolist()
-                for dc in detection_classes:
-                    self.matrix[dc, self.nc] += 1  # false positives
+                for i, dc in enumerate(detection_classes):
+                    self.matrix[dc, self.nc] += 1  # FP
+                    self._append_matches("FP", detections, i)
             return
         if no_pred:
             gt_classes = gt_cls.int().tolist()
-            for gc in gt_classes:
-                self.matrix[self.nc, gc] += 1  # background FN
+            for i, gc in enumerate(gt_classes):
+                self.matrix[self.nc, gc] += 1  # FN
+                self._append_matches("FN", batch, i)
             return
-        detections = {k: detections[k][detections["conf"] > conf] for k in {"cls", "bboxes"}}
+        detections = {k: detections[k][detections["conf"] > conf] for k in detections.keys()}
         gt_classes = gt_cls.int().tolist()
         detection_classes = detections["cls"].int().tolist()
         bboxes = detections["bboxes"]
@@ -399,13 +439,21 @@ class ConfusionMatrix(DataExportMixin):
         for i, gc in enumerate(gt_classes):
             j = m0 == i
             if n and sum(j) == 1:
-                self.matrix[detection_classes[m1[j].item()], gc] += 1  # correct
+                dc = detection_classes[m1[j].item()]
+                self.matrix[dc, gc] += 1  # TP if class is correct else both an FP and an FN
+                if dc == gc:
+                    self._append_matches("TP", detections, m1[j].item())
+                else:
+                    self._append_matches("FP", detections, m1[j].item())
+                    self._append_matches("FN", batch, i)
             else:
-                self.matrix[self.nc, gc] += 1  # true background
+                self.matrix[self.nc, gc] += 1  # FN
+                self._append_matches("FN", batch, i)
         for i, dc in enumerate(detection_classes):
             if not any(m1 == i):
-                self.matrix[dc, self.nc] += 1  # predicted background
+                self.matrix[dc, self.nc] += 1  # FP
+                self._append_matches("FP", detections, i)
     def matrix(self):
         """Return the confusion matrix."""
@@ -424,6 +472,44 @@ class ConfusionMatrix(DataExportMixin):
         # fn = self.matrix.sum(0) - tp  # false negatives (missed detections)
         return (tp, fp) if self.task == "classify" else (tp[:-1], fp[:-1])  # remove background class if task=detect
+    def plot_matches(self, img: torch.Tensor, im_file: str, save_dir: Path) -> None:
+        """
+        Plot grid of GT, TP, FP, FN for each image.
+        Args:
+            img (torch.Tensor): Image to plot onto.
+            im_file (str): Image filename to save visualizations.
+            save_dir (Path): Location to save the visualizations to.
+        """
+        if not self.matches:
+            return
+        from .ops import xyxy2xywh
+        from .plotting import plot_images
+        # Create batch of 4 (GT, TP, FP, FN)
+        labels = defaultdict(list)
+        for i, mtype in enumerate(["GT", "FP", "TP", "FN"]):
+            mbatch = self.matches[mtype]
+            if "conf" not in mbatch:
+                mbatch["conf"] = torch.tensor([1.0] * len(mbatch["bboxes"]), device=img.device)
+            mbatch["batch_idx"] = torch.ones(len(mbatch["bboxes"]), device=img.device) * i
+            for k in mbatch.keys():
+                labels[k] += mbatch[k]
+        labels = {k: torch.stack(v, 0) if len(v) else v for k, v in labels.items()}
+        if not self.task == "obb" and len(labels["bboxes"]):
+            labels["bboxes"] = xyxy2xywh(labels["bboxes"])
+        (save_dir / "visualizations").mkdir(parents=True, exist_ok=True)
+        plot_images(
+            labels,
+            img.repeat(4, 1, 1, 1),
+            paths=["Ground Truth", "False Positives", "True Positives", "False Negatives"],
+            fname=save_dir / "visualizations" / Path(im_file).name,
+            names=self.names,
+            max_subplots=4,
+            conf_thres=0.001,
+        )
     @TryExcept(msg="ConfusionMatrix plot failure")
     @plt_settings()
     def plot(self, normalize: bool = True, save_dir: str = "", on_plot=None):
@@ -441,7 +527,7 @@ class ConfusionMatrix(DataExportMixin):
         array[array < 0.005] = np.nan  # don't annotate (would appear as 0.00)
         fig, ax = plt.subplots(1, 1, figsize=(12, 9))
-        names, n = self.names, self.nc
+        names, n = list(self.names.values()), self.nc
         if self.nc >= 100:  # downsample for large class count
             k = max(2, self.nc // 60)  # step size for downsampling, always > 1
             keep_idx = slice(None, None, k)  # create slice instead of array
@@ -522,7 +608,7 @@ class ConfusionMatrix(DataExportMixin):
         """
         import re
-        names = self.names if self.task == "classify" else self.names + ["background"]
+        names = list(self.names.values()) if self.task == "classify" else list(self.names.values()) + ["background"]
         clean_names, seen = [], set()
         for name in names:
             clean_name = re.sub(r"[^a-zA-Z0-9_]", "_", name)

ultralytics/utils/plotting.py CHANGED Viewed

@@ -810,9 +810,9 @@ def plot_images(
             # Plot masks
             if len(masks):
-                if idx.shape[0] == masks.shape[0]:  # overlap_masks=False
+                if idx.shape[0] == masks.shape[0]:  # overlap_mask=False
                     image_masks = masks[idx]
-                else:  # overlap_masks=True
+                else:  # overlap_mask=True
                     image_masks = masks[[i]]  # (1, 640, 640)
                     nl = idx.sum()
                     index = np.arange(nl).reshape((nl, 1, 1)) + 1

{dgenerate_ultralytics_headless-8.3.167.dist-info → dgenerate_ultralytics_headless-8.3.169.dist-info}/WHEEL RENAMED Viewed

File without changes

{dgenerate_ultralytics_headless-8.3.167.dist-info → dgenerate_ultralytics_headless-8.3.169.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{dgenerate_ultralytics_headless-8.3.167.dist-info → dgenerate_ultralytics_headless-8.3.169.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dgenerate_ultralytics_headless-8.3.167.dist-info → dgenerate_ultralytics_headless-8.3.169.dist-info}/top_level.txt RENAMED Viewed

File without changes

dgenerate-ultralytics-headless 8.3.167__py3-none-any.whl → 8.3.169__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.167py3-none-any.whl → 8.3.169py3-none-any.whl