PyPI - dataeval - Versions diffs - 0.86.8__py3-none-any.whl → 0.87.0__py3-none-any.whl - Mend

dataeval 0.86.8py3-none-any.whl → 0.87.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

dataeval/__init__.py +1 -1
dataeval/_version.py +2 -2
dataeval/config.py +4 -19
dataeval/data/_metadata.py +56 -27
dataeval/data/_split.py +1 -1
dataeval/data/selections/_classbalance.py +4 -3
dataeval/data/selections/_classfilter.py +5 -5
dataeval/data/selections/_indices.py +2 -2
dataeval/data/selections/_prioritize.py +249 -29
dataeval/data/selections/_reverse.py +1 -1
dataeval/data/selections/_shuffle.py +2 -2
dataeval/detectors/ood/__init__.py +2 -1
dataeval/detectors/ood/base.py +38 -1
dataeval/detectors/ood/knn.py +95 -0
dataeval/metrics/bias/_balance.py +28 -21
dataeval/metrics/bias/_diversity.py +4 -4
dataeval/metrics/bias/_parity.py +2 -2
dataeval/metrics/stats/_hashstats.py +19 -2
dataeval/outputs/_workflows.py +20 -7
dataeval/typing.py +14 -2
dataeval/utils/__init__.py +2 -2
dataeval/utils/_bin.py +7 -6
dataeval/utils/data/__init__.py +2 -0
dataeval/utils/data/_dataset.py +13 -6
dataeval/utils/data/_validate.py +169 -0
dataeval/workflows/sufficiency.py +53 -10
{dataeval-0.86.8.dist-info → dataeval-0.87.0.dist-info}/METADATA +5 -17
{dataeval-0.86.8.dist-info → dataeval-0.87.0.dist-info}/RECORD +30 -39
dataeval/utils/datasets/__init__.py +0 -19
dataeval/utils/datasets/_antiuav.py +0 -189
dataeval/utils/datasets/_base.py +0 -262
dataeval/utils/datasets/_cifar10.py +0 -201
dataeval/utils/datasets/_fileio.py +0 -142
dataeval/utils/datasets/_milco.py +0 -197
dataeval/utils/datasets/_mixin.py +0 -54
dataeval/utils/datasets/_mnist.py +0 -202
dataeval/utils/datasets/_ships.py +0 -144
dataeval/utils/datasets/_types.py +0 -48
dataeval/utils/datasets/_voc.py +0 -583
{dataeval-0.86.8.dist-info → dataeval-0.87.0.dist-info}/WHEEL +0 -0
/dataeval-0.86.8.dist-info/licenses/LICENSE.txt → /dataeval-0.87.0.dist-info/licenses/LICENSE +0 -0

{dataeval-0.86.8.dist-info → dataeval-0.87.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dataeval
-Version: 0.86.8
+Version: 0.87.0
 Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
 Project-URL: Homepage, https://dataeval.ai/
 Project-URL: Repository, https://github.com/aria-ml/dataeval/
@@ -8,7 +8,7 @@ Project-URL: Documentation, https://dataeval.readthedocs.io/
 Author-email: Andrew Weng <andrew.weng@ariacoustics.com>, Bill Peria <bill.peria@ariacoustics.com>, Jon Botts <jonathan.botts@ariacoustics.com>, Jonathan Christian <jonathan.christian@ariacoustics.com>, Justin McMillan <justin.mcmillan@ariacoustics.com>, Ryan Wood <ryan.wood@ariacoustics.com>, Scott Swan <scott.swan@ariacoustics.com>, Shaun Jullens <shaun.jullens@ariacoustics.com>
 Maintainer-email: ARiA <dataeval@ariacoustics.com>
 License-Expression: MIT
-License-File: LICENSE.txt
+License-File: LICENSE
 Classifier: Development Status :: 4 - Beta
 Classifier: Intended Audience :: Science/Research
 Classifier: License :: OSI Approved :: MIT License
@@ -20,15 +20,12 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Scientific/Engineering
 Requires-Python: <3.13,>=3.9
-Requires-Dist: defusedxml>=0.7.1
 Requires-Dist: fast-hdbscan==0.2.0
 Requires-Dist: lightgbm>=4
 Requires-Dist: numba>=0.59.1
 Requires-Dist: numpy>=1.24.2
 Requires-Dist: pandas>=2.0
-Requires-Dist: pillow>=10.3.0
 Requires-Dist: polars>=1.0.0
-Requires-Dist: requests>=2.32.3
 Requires-Dist: scikit-learn>=1.5.0
 Requires-Dist: scipy>=1.10
 Requires-Dist: torch>=2.2.0
@@ -123,14 +120,8 @@ micromamba create -f environment\environment.yaml -c pytorch
 ### **Installing from GitHub**
-To install DataEval from source locally on Ubuntu, you will need `git-lfs` to
-download larger, binary source files.
-```bash
-sudo apt-get install git-lfs
-```
-Pull the source down and change to the DataEval project directory.
+To install DataEval from source locally on Ubuntu, pull the source down and
+change to the DataEval project directory.
 ```bash
 git clone https://github.com/aria-ml/dataeval.git
@@ -167,10 +158,7 @@ source .venv/bin/activate
 ## Contact Us
-If you have any questions, feel free to reach out to the people below:
-- **POC**: Scott Swan @scott.swan
-- **DPOC**: Andrew Weng @aweng
+If you have any questions, feel free to reach out to [us](mailto:dataeval@ariacoustics.com)!
 ## Acknowledgement

{dataeval-0.86.8.dist-info → dataeval-0.87.0.dist-info}/RECORD RENAMED Viewed

@@ -1,23 +1,23 @@
-dataeval/__init__.py,sha256=dEDltdHOnbk4-XAbQwJLOZtCbRLZsDMnptWRwbF2r54,1773
+dataeval/__init__.py,sha256=aFzX3SLx8wgc763RY772P41ZLqeHcUHRKW9XAN0KfHQ,1793
 dataeval/_log.py,sha256=C7AGkIRzymvYJ0LQXtnShiy3i5Xrp8T58JzIHHguk_Q,365
-dataeval/_version.py,sha256=IPUOExUy8nF4kYGtCPV5bg6_IYDRLVOKnFJcNllcO1M,513
-dataeval/config.py,sha256=g3Np0Q3J5Rzij6Gsz7tJh7eOxgwNPf6NsFYmAR8Atfs,4219
+dataeval/_version.py,sha256=17MAD7hlEBqgdl5YlmaM4PJXKdgvw_hAzlX52HDAwlU,513
+dataeval/config.py,sha256=lL73s_xa9pBxHHCnBKi59D_tl4vS7ig1rfWbIYkM_ac,3839
 dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-dataeval/typing.py,sha256=W8rqFFkAqE5a5ar3MmB-O5gcMJqvoDKXC8Y0ggBqAKo,7216
+dataeval/typing.py,sha256=si4ZosMrHG-eYKSLCErAEI7Oo1giFRvWkaNK7EhRr1w,7513
 dataeval/data/__init__.py,sha256=wzQ6uUFLNB3VJR0a2QnRBYwEmwXT93q0WpHu7FmFW1E,486
 dataeval/data/_embeddings.py,sha256=PFjpdV9bfusCB4taTIYSzx1hP8nJb_KCkZTN8kMw-Hs,12885
 dataeval/data/_images.py,sha256=Rc_59CuU4zfN7Xm7an1XUx8ZghQg6a56VJWMZD9edRw,2654
-dataeval/data/_metadata.py,sha256=3aixstlgcAZXC0qNjwDlxjscC3IX1xjPt_FK0liRqoo,14423
+dataeval/data/_metadata.py,sha256=jr6W0aC_fKMYPwRjSHkXl02QTZ63QgqOnbvVOCoLLsg,15250
 dataeval/data/_selection.py,sha256=r06xeiyK8nTWPLyItkoPQRWZI1i6LATSue_cuEbCdc4,4463
-dataeval/data/_split.py,sha256=nQABR05vxil2Qx7-uX4Fm0_DWpibskBGDJOYj_b1u3I,16737
+dataeval/data/_split.py,sha256=0WOKwOxMBfzimo_VQUU0dbc4zQleA4OQFO4ho9W57hE,16732
 dataeval/data/selections/__init__.py,sha256=2m8ZB53wXzqLcqmc6p5atO6graB6ZyiRSNJFxf11X_g,613
-dataeval/data/selections/_classbalance.py,sha256=7v8ApoL3X8eCZ6fGDNTehE_bZ1loaP3TlhsJLaICVWg,1458
-dataeval/data/selections/_classfilter.py,sha256=bXfoYnWnAfUGsAQSlLufJeF2PfgRKekFHfBx8hv1r3w,4351
-dataeval/data/selections/_indices.py,sha256=RFsR9z10aM3N0gJSfKrukFpi-LkiQGXoOwXhmOQ5cpg,630
+dataeval/data/selections/_classbalance.py,sha256=AqExg-QnYBcfBNzS1Ygsz3Cgb2cqcgGXE0-cseD8_vA,1580
+dataeval/data/selections/_classfilter.py,sha256=oYTsqxwOV_mos4_BoNqhHOAKUWFMAXczjrJfEkusLIY,4422
+dataeval/data/selections/_indices.py,sha256=5TqKyMJmFRoNfJT5T9yIMx-p5VeJmSmCl2Qxzwi0pPE,628
 dataeval/data/selections/_limit.py,sha256=JG4GmEiNKt3sk4PbOUbBnGGzNlyz72H-kQrt8COMm4Y,512
-dataeval/data/selections/_prioritize.py,sha256=4dGUvgR7m6NGzzPU0N_bw0Xhujo8b72Wo8L4PGHbvBo,11233
-dataeval/data/selections/_reverse.py,sha256=b67kNC43A5KpQOic5gifjo9HpJ7FMh4LFCrfovPiJ-M,368
-dataeval/data/selections/_shuffle.py,sha256=TSCIZBgLAn09iMI_WIw0aqwSU4NZLAhHG7t8H_CuDUY,1195
+dataeval/data/selections/_prioritize.py,sha256=ss_GZ5MB2ohdNuB55C69TYNwV3PUSmk715gDJI6qfYA,20140
+dataeval/data/selections/_reverse.py,sha256=FqYlpPg-0Vz75kbEhGFrJlzIGELSmDZxPlBMY18a57I,365
+dataeval/data/selections/_shuffle.py,sha256=nZG1kxc7TfiznaPnDYqWTWnFBf2gWb8koCmEWnf8TWE,1242
 dataeval/detectors/__init__.py,sha256=3Sg-XWlwr75zEEH3hZKA4nWMtGvaRlnfzTWvZG_Ak6U,189
 dataeval/detectors/drift/__init__.py,sha256=Jqv98oOVeC2tvHlNGxQ8RJ6De2q4SyS5lTpaYlb4ocM,756
 dataeval/detectors/drift/_base.py,sha256=6aNF1LzG3w1sNUrmSBbsvuN5IkQnoRikRacqobYge84,7592
@@ -36,9 +36,10 @@ dataeval/detectors/drift/_nml/_thresholds.py,sha256=WGdkLei9w_EvvsRHQzWdDyFVoZHI
 dataeval/detectors/linters/__init__.py,sha256=xn2zPwUcmsuf-Jd9uw6AVI11C9z1b1Y9fYtuFnXenZ0,404
 dataeval/detectors/linters/duplicates.py,sha256=X5WSEvI_BHkLoXjkaHK6wTnSkx4IjpO_exMRjSlhc70,4963
 dataeval/detectors/linters/outliers.py,sha256=GaM9n8yPgBPzVOL_bxJCj0eCwobEEP4JHKHD9liRdlw,10130
-dataeval/detectors/ood/__init__.py,sha256=juCYBDs7CQEAtMhnEpPqF6uTrOIH9kTBSuQ_GRw6a8o,283
+dataeval/detectors/ood/__init__.py,sha256=qDoDdQetJY1xZB43dNzcOIO_8NiEuEU0z1QNU4QkEXs,341
 dataeval/detectors/ood/ae.py,sha256=cJ7nq4iwTvW8uihHCUhGfTlKsAlthJ2tOhgSsB27cOY,2941
-dataeval/detectors/ood/base.py,sha256=hx-TPJnUTZ7KcBkm8SbN1RGhtJyQN0XLajDyNqiZrJo,3042
+dataeval/detectors/ood/base.py,sha256=fsjQ7wHRNJNPLGFw_6jvygkFFbv2G1ydwp8Zh1ncVlA,4374
+dataeval/detectors/ood/knn.py,sha256=Fu77geQFHPYNOn81VIXUJ3yC3t5Ylv0ZgvwMeA2JX6I,3782
 dataeval/detectors/ood/mixin.py,sha256=0_o-1HPvgf3-Lf1MSOIfjj5UB8LTLEBGYtJJfyCCzwc,5431
 dataeval/metadata/__init__.py,sha256=XDDmJbOZBNM6pL0r6Nbu6oMRoyAh22IDkPYGndNlkZU,316
 dataeval/metadata/_distance.py,sha256=MbXM9idsooNWnGLaTKg8j4ZqavUeJUjuW7EPW3-UQyg,4234
@@ -46,11 +47,11 @@ dataeval/metadata/_ood.py,sha256=lNPHouj_9WfM_uTtsaiRaPn46RcVy3YebD1c32vDj-c,898
 dataeval/metadata/_utils.py,sha256=BcGoYVfA4AkAWpInY5txOc3QBpsGf6cnnUAsHOQTJAE,1210
 dataeval/metrics/__init__.py,sha256=8VC8q3HuJN3o_WN51Ae2_wXznl3RMXIvA5GYVcy7vr8,225
 dataeval/metrics/bias/__init__.py,sha256=329S1_3WnWqeU4-qVcbe0fMy4lDrj9uKslWHIQf93yg,839
-dataeval/metrics/bias/_balance.py,sha256=fREtoMLUZPOf_ivqNKwij6oPiKMTk02ECO5rWURf3KY,5541
+dataeval/metrics/bias/_balance.py,sha256=Yf0WNw9DxluFPNP-_wA1BcRRs-PRwocnHp0HScXS6t4,5719
 dataeval/metrics/bias/_completeness.py,sha256=2cvOXe7fhtxZGH_4QBuiCafIeamxFBarMiUBuEP7QGI,4596
 dataeval/metrics/bias/_coverage.py,sha256=v2x2hbOf2za9jFcSVSJUAoJ2BJfzzlCzt0mFIGtBL0A,3639
-dataeval/metrics/bias/_diversity.py,sha256=25udDKmel9IjeVT5nM4dOa1apda66QdRxBc922yuUvI,5830
-dataeval/metrics/bias/_parity.py,sha256=MKpqL4aoqEHkRl0vtGvVq9V3KBOtDFTtAo5I2GfIG4A,11443
+dataeval/metrics/bias/_diversity.py,sha256=Z7UQzKp9bsmB-hC3_sY6HIJUJRkLHb5cVEoU79cNDzc,5800
+dataeval/metrics/bias/_parity.py,sha256=ZIKc5OK6wQ4moleBJzGDfOPvyNzj03-KoHAGBZnO4pk,11433
 dataeval/metrics/estimators/__init__.py,sha256=Pnds8uIyAovt2fKqZjiHCIP_kVoBWlVllekYuK5UmmU,568
 dataeval/metrics/estimators/_ber.py,sha256=7noeRyOJJYqrJ_jt90nRHtR2t2u5MIvTCmWt0_rd4EU,5370
 dataeval/metrics/estimators/_clusterer.py,sha256=1HrpihGTJ63IkNSOy4Ibw633Gllkm1RxKmoKT5MOgt0,1434
@@ -60,7 +61,7 @@ dataeval/metrics/stats/__init__.py,sha256=6tA_9nbbM5ObJ6cds8Y1VBtTQiTOxrpGQSFLu_
 dataeval/metrics/stats/_base.py,sha256=R-hxoEPLreZcxYxBfyjbKfdoGMMTPiqJ5g2zSO-1UYM,12541
 dataeval/metrics/stats/_boxratiostats.py,sha256=ROZrlqgbowkGfCR5PJ5TL7Og40iMOdUqJnsCtaz_Xek,6450
 dataeval/metrics/stats/_dimensionstats.py,sha256=s2Juca8GG501nZd2SWL_YtXWkTfxUrUIAl53PO3_VeA,2876
-dataeval/metrics/stats/_hashstats.py,sha256=qa1CYRgOebkxqkALfffaPM-kJ074ZbyfpWbfOfuObSs,4758
+dataeval/metrics/stats/_hashstats.py,sha256=8C4EgzmBd3HMNsSATTriLVcvaWfoSasTLYizONqUDf4,5388
 dataeval/metrics/stats/_imagestats.py,sha256=gUPNgN5Zwzdr7WnSwbve1NXNsyxd5dy3cSnlR_7guCg,3007
 dataeval/metrics/stats/_labelstats.py,sha256=_dXt3p8_-SHEtHvJWbL0rnQvO2g30zxX42mG2LGJepU,3195
 dataeval/metrics/stats/_pixelstats.py,sha256=N9e7RXuzSHtlJtWU7l5IcTTIXe2kOmWiuj6lnJpZWq0,3312
@@ -75,31 +76,21 @@ dataeval/outputs/_metadata.py,sha256=ffZgpX8KWURPHXpOWjbvJ2KRqWQkS2nWuIjKUzoHhMI
 dataeval/outputs/_ood.py,sha256=suLKVXULGtXH0rq9eXHI1d3d2jhGmItJtz4QiQd47A4,1718
 dataeval/outputs/_stats.py,sha256=_ItGjs9YaMHqjivkR1YBcSErD5ICfa_-iV9nq0l8bTM,17451
 dataeval/outputs/_utils.py,sha256=NfhYaGT2PZlhIs8ICKUsPWHZXjhWYDkEJqBDdqMeaOM,929
-dataeval/outputs/_workflows.py,sha256=K786mOgegxVi81diUA-qpbwGEkwa8YA7Fk4ttgjJeaY,10831
-dataeval/utils/__init__.py,sha256=hRvyUK7b3d6JBEV5u47rFcOHEcmDYqAvZQw_T5pDAWw,264
+dataeval/outputs/_workflows.py,sha256=_0U9VzCvqLIOlxqpngPhmPcUZMk57bF9qnnrkLUMoGY,11450
+dataeval/utils/__init__.py,sha256=sjelzMPaTImF6isiRcp8UGDE3tppEpWS5GoR8WKPZ1k,242
 dataeval/utils/_array.py,sha256=bIDbnv15_hNzFn2Uc4WV1qRyFzubQj2nNYsFUDIdwT0,6335
-dataeval/utils/_bin.py,sha256=w3eJ2Szw5eapqQ0cGv731rhNgLFGW0cCz2pXo9I6CuY,7296
+dataeval/utils/_bin.py,sha256=KpAnhzLBgh6PxMlM9dPPvuic0S1KNKwlcM1Vg-d4dGI,7364
 dataeval/utils/_clusterer.py,sha256=rUvEdyMwp95lffmt6xKMEwsjRXNoBS0n5mAS_HNOnck,5656
 dataeval/utils/_fast_mst.py,sha256=pv42flr1Uf5RBa9qDG0YLDXWH7Mr7a9zpauO1HqZXaY,8061
 dataeval/utils/_image.py,sha256=4uxTIOYZZlRJOfNmdA3ek3no3FrLWCK5un48kStMDt8,3578
 dataeval/utils/_method.py,sha256=9B9JQbgqWJBRhQJb7glajUtWaQzUTIUuvrZ9_bisxsM,394
 dataeval/utils/_mst.py,sha256=bLmJmu_1Dtj3hC5gQp3oAiJ_7TKtEjahTqusVRRU4eI,2168
 dataeval/utils/_plot.py,sha256=1rnMkBRvTFLoTAHqXwF7c7GJ5_5iqlgarZKAzmYciLk,7225
-dataeval/utils/data/__init__.py,sha256=xGzrjrOxOP2DP1tU84AWMKPnSxFvSjM81CTlDg4rNM8,331
-dataeval/utils/data/_dataset.py,sha256=tC_vqgWnmojAoAANo5BUVfEUYXl7GzOBSeYjR9olbDk,9506
+dataeval/utils/data/__init__.py,sha256=AD7o2rllEdq4BVvlxljYKRXrXNer39XdGNuaRMbvH4Y,414
+dataeval/utils/data/_dataset.py,sha256=901qUUcLg_HPg07N5uNabAZ00MGFCdOr7o6VbIEk2_I,9870
+dataeval/utils/data/_validate.py,sha256=sea8B7DLbbxTqTjAQ5Vhs5XNRZWE5wBBqDgcKNVQBRA,6923
 dataeval/utils/data/collate.py,sha256=5egEEKhNNCGeNLChO1p6dZ4Wg6x51VEaMNHz7hEZUxI,3936
 dataeval/utils/data/metadata.py,sha256=L1c2bCiMj0aR0QCoKkjwBujIftJDEMgW_3ZbgeS8WHo,14703
-dataeval/utils/datasets/__init__.py,sha256=pAXqHX76yAoBI8XB3m6zGuW-u3s3PCoIXG5GDzxH7Zs,572
-dataeval/utils/datasets/_antiuav.py,sha256=kA_ia1fYNcJiz9SpCvh-Z8iSc7iJrdogjBI3soyaa7A,8304
-dataeval/utils/datasets/_base.py,sha256=pyfpJda3ku469M3TFRsJn9S2oAiQODOGTlLcdcoEW9U,9031
-dataeval/utils/datasets/_cifar10.py,sha256=hZc_A30yKYBbv2kvVdEkZ9egyEe6XBUnmksoIAoJ-5Y,8265
-dataeval/utils/datasets/_fileio.py,sha256=LEoFVNdryRdi7mKpWw-9D8lA6XMa-Jaszd85bv93POo,5454
-dataeval/utils/datasets/_milco.py,sha256=iXf4C1I3Eg_3gHKUe4XPi21yFMBO51zxTIqAkGf9bYg,7869
-dataeval/utils/datasets/_mixin.py,sha256=S8iii-SoYUsFFYNXjw2thlZkpBvRLnZ4XI8wTqOKXgU,1729
-dataeval/utils/datasets/_mnist.py,sha256=uz46sE1Go3TgGjG6x2cXckSVQ0mSg2mhgk8BUvLWjb0,8149
-dataeval/utils/datasets/_ships.py,sha256=6U04HAoM3jgLl1qv-NnxjZeSsBipcqWJBMhBMn5iIUY,5115
-dataeval/utils/datasets/_types.py,sha256=iSKyHXRlGuomXs0FHK6md8lXLQrQQ4fxgVOwr4o81bo,1089
-dataeval/utils/datasets/_voc.py,sha256=pafY112O80isYkrdy7Quie9SBm_TmYhREuyl8SxtsR0,24586
 dataeval/utils/torch/__init__.py,sha256=dn5mjCrFp0b1aL_UEURhONU0Ag0cmXoTOBSGagpkTiA,325
 dataeval/utils/torch/_blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
 dataeval/utils/torch/_gmm.py,sha256=XM68GNEP97EjaB1U49-ZXRb81d0CEFnPS910alrcB3g,3740
@@ -107,8 +98,8 @@ dataeval/utils/torch/_internal.py,sha256=9rzlMeM8i3p-ctulh9WDQATMXtlp-Jk2pBX7NGC
 dataeval/utils/torch/models.py,sha256=1idpXyjrYcCBSsbxxRUOto8xr4MJNjDEqQHiIXVU5Zc,9700
 dataeval/utils/torch/trainer.py,sha256=DRyPScGdE4o5Xo3BmD9p2PGOApzi1E-QfsBRNZ5IXW8,5544
 dataeval/workflows/__init__.py,sha256=ou8y0KO-d6W5lgmcyLjKlf-J_ckP3vilW7wHkgiDlZ4,255
-dataeval/workflows/sufficiency.py,sha256=j-R8dg4XE6a66p_oTXG2GNzgg3vGk85CTblxhFXaxog,8513
-dataeval-0.86.8.dist-info/METADATA,sha256=rCf58-uzgjsTNZkY3LOBMSi5fhQ2cdAtnrrDI_eYR_I,5925
-dataeval-0.86.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-dataeval-0.86.8.dist-info/licenses/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
-dataeval-0.86.8.dist-info/RECORD,,
+dataeval/workflows/sufficiency.py,sha256=UAPjowFrmM6IJJaOk9GkH3nfQTyDy2_zOY55o2g3G1M,10072
+dataeval-0.87.0.dist-info/METADATA,sha256=xhp28LbYD7FWbfhFfDgVzS_pi-E2TFgl-X33seCD2cE,5674
+dataeval-0.87.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+dataeval-0.87.0.dist-info/licenses/LICENSE,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
+dataeval-0.87.0.dist-info/RECORD,,

dataeval/utils/datasets/__init__.py DELETED Viewed

@@ -1,19 +0,0 @@
-"""Provides access to common Computer Vision datasets."""
-from dataeval.utils.datasets._antiuav import AntiUAVDetection
-from dataeval.utils.datasets._cifar10 import CIFAR10
-from dataeval.utils.datasets._milco import MILCO
-from dataeval.utils.datasets._mnist import MNIST
-from dataeval.utils.datasets._ships import Ships
-from dataeval.utils.datasets._voc import VOCDetection, VOCDetectionTorch, VOCSegmentation
-__all__ = [
-    "MNIST",
-    "Ships",
-    "CIFAR10",
-    "AntiUAVDetection",
-    "MILCO",
-    "VOCDetection",
-    "VOCDetectionTorch",
-    "VOCSegmentation",
-]

dataeval/utils/datasets/_antiuav.py DELETED Viewed

@@ -1,189 +0,0 @@
-from __future__ import annotations
-__all__ = []
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, Literal, Sequence
-from defusedxml.ElementTree import parse
-from numpy.typing import NDArray
-from dataeval.utils.datasets._base import BaseODDataset, DataLocation
-from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin
-if TYPE_CHECKING:
-    from dataeval.typing import Transform
-class AntiUAVDetection(BaseODDataset[NDArray[Any]], BaseDatasetNumpyMixin):
-    """
-    A UAV detection dataset focused on detecting UAVs in natural images against large variation in backgrounds.
-    The dataset comes from the paper
-    `Vision-based Anti-UAV Detection and Tracking <https://ieeexplore.ieee.org/document/9785379>`_
-    by Jie Zhao et. al. (2022).
-    The dataset is approximately 1.3 GB and can be found `here <https://github.com/wangdongdut/DUT-Anti-UAV>`_.
-    Images are collected against a variety of different backgrounds with a variety in the number and type of UAV.
-    Ground truth labels are provided for the train, validation and test set.
-    There are 35 different types of drones along with a variety in lighting conditions and weather conditions.
-    There are 10,000 images: 5200 images in the training set, 2200 images in the validation set,
-    and 2600 images in the test set.
-    The dataset only has a single UAV class with the focus being on identifying object location in the image.
-    Ground-truth bounding boxes are provided in (x0, y0, x1, y1) format.
-    The images come in a variety of sizes from 3744 x 5616 to 160 x 240.
-    Parameters
-    ----------
-    root : str or pathlib.Path
-        Root directory where the data should be downloaded to or
-        the ``antiuavdetection`` folder of the already downloaded data.
-    image_set: "train", "val", "test", or "base", default "train"
-        If "base", then the full dataset is selected (train, val and test).
-    transforms : Transform, Sequence[Transform] or None, default None
-        Transform(s) to apply to the data.
-    download : bool, default False
-        If True, downloads the dataset from the internet and puts it in root directory.
-        Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
-    verbose : bool, default False
-        If True, outputs print statements.
-    Attributes
-    ----------
-    path : pathlib.Path
-        Location of the folder containing the data.
-    image_set : "train", "val", "test", or "base"
-        The selected image set from the dataset.
-    index2label : dict[int, str]
-        Dictionary which translates from class integers to the associated class strings.
-    label2index : dict[str, int]
-        Dictionary which translates from class strings to the associated class integers.
-    metadata : DatasetMetadata
-        Typed dictionary containing dataset metadata, such as `id` which returns the dataset class name.
-    transforms : Sequence[Transform]
-        The transforms to be applied to the data.
-    size : int
-        The size of the dataset.
-    Note
-    ----
-    Data License: `Apache 2.0 <https://www.apache.org/licenses/LICENSE-2.0.txt>`_
-    """
-    # Need to run the sha256 on the files and then store that
-    _resources = [
-        DataLocation(
-            url="https://drive.usercontent.google.com/download?id=1RVsSGPUKTdmoyoPTBTWwroyulLek1eTj&export=download&authuser=0&confirm=t&uuid=6bca4f94-a242-4bc2-9663-fb03cd94ef2c&at=APcmpox0--NroQ_3bqeTFaJxP7Pw%3A1746552902927",
-            filename="train.zip",
-            md5=False,
-            checksum="14f927290556df60e23cedfa80dffc10dc21e4a3b6843e150cfc49644376eece",
-        ),
-        DataLocation(
-            url="https://drive.usercontent.google.com/download?id=1333uEQfGuqTKslRkkeLSCxylh6AQ0X6n&export=download&authuser=0&confirm=t&uuid=c2ad2f01-aca8-4a85-96bb-b8ef6e40feea&at=APcmpozY-8bhk3nZSFaYbE8rq1Fi%3A1746551543297",
-            filename="val.zip",
-            md5=False,
-            checksum="238be0ceb3e7c5be6711ee3247e49df2750d52f91f54f5366c68bebac112ebf8",
-        ),
-        DataLocation(
-            url="https://drive.usercontent.google.com/download?id=1L1zeW1EMDLlXHClSDcCjl3rs_A6sVai0&export=download&authuser=0&confirm=t&uuid=5a1d7650-d8cd-4461-8354-7daf7292f06c&at=APcmpozLQC1CuP-n5_UX2JnP53Zo%3A1746551676177",
-            filename="test.zip",
-            md5=False,
-            checksum="a671989a01cff98c684aeb084e59b86f4152c50499d86152eb970a9fc7fb1cbe",
-        ),
-    ]
-    index2label: dict[int, str] = {
-        0: "unknown",
-        1: "UAV",
-    }
-    def __init__(
-        self,
-        root: str | Path,
-        image_set: Literal["train", "val", "test", "base"] = "train",
-        transforms: Transform[NDArray[Any]] | Sequence[Transform[NDArray[Any]]] | None = None,
-        download: bool = False,
-        verbose: bool = False,
-    ) -> None:
-        super().__init__(
-            root,
-            image_set,
-            transforms,
-            download,
-            verbose,
-        )
-    def _load_data(self) -> tuple[list[str], list[str], dict[str, list[Any]]]:
-        filepaths: list[str] = []
-        targets: list[str] = []
-        datum_metadata: dict[str, list[Any]] = {}
-        # If base, load all resources
-        if self.image_set == "base":
-            metadata_list: list[dict[str, Any]] = []
-            for resource in self._resources:
-                self._resource = resource
-                resource_filepaths, resource_targets, resource_metadata = super()._load_data()
-                filepaths.extend(resource_filepaths)
-                targets.extend(resource_targets)
-                metadata_list.append(resource_metadata)
-            # Combine metadata
-            for data_dict in metadata_list:
-                for key, val in data_dict.items():
-                    str_key = str(key)  # Ensure key is string
-                    if str_key not in datum_metadata:
-                        datum_metadata[str_key] = []
-                    datum_metadata[str_key].extend(val)
-        else:
-            # Grab only the desired data
-            for resource in self._resources:
-                if self.image_set in resource.filename:
-                    self._resource = resource
-                    resource_filepaths, resource_targets, resource_metadata = super()._load_data()
-                    filepaths.extend(resource_filepaths)
-                    targets.extend(resource_targets)
-                    datum_metadata.update(resource_metadata)
-        return filepaths, targets, datum_metadata
-    def _load_data_inner(self) -> tuple[list[str], list[str], dict[str, Any]]:
-        resource_name = self._resource.filename[:-4]
-        base_dir = self.path / resource_name
-        data_folder = sorted((base_dir / "img").glob("*.jpg"))
-        if not data_folder:
-            raise FileNotFoundError
-        file_data = {"image_id": [f"{resource_name}_{entry.name}" for entry in data_folder]}
-        data = [str(entry) for entry in data_folder]
-        annotations = sorted(str(entry) for entry in (base_dir / "xml").glob("*.xml"))
-        return data, annotations, file_data
-    def _read_annotations(self, annotation: str) -> tuple[list[list[float]], list[int], dict[str, Any]]:
-        """Function for extracting the info for the label and boxes"""
-        boxes: list[list[float]] = []
-        labels = []
-        root = parse(annotation).getroot()
-        if root is None:
-            raise ValueError(f"Unable to parse {annotation}")
-        additional_meta: dict[str, Any] = {
-            "image_width": int(root.findtext("size/width", default="-1")),
-            "image_height": int(root.findtext("size/height", default="-1")),
-            "image_depth": int(root.findtext("size/depth", default="-1")),
-        }
-        for obj in root.findall("object"):
-            labels.append(1 if obj.findtext("name", default="") == "UAV" else 0)
-            boxes.append(
-                [
-                    float(obj.findtext("bndbox/xmin", default="0")),
-                    float(obj.findtext("bndbox/ymin", default="0")),
-                    float(obj.findtext("bndbox/xmax", default="0")),
-                    float(obj.findtext("bndbox/ymax", default="0")),
-                ]
-            )
-        return boxes, labels, additional_meta

dataeval/utils/datasets/_base.py DELETED Viewed

@@ -1,262 +0,0 @@
-from __future__ import annotations
-__all__ = []
-from abc import abstractmethod
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, Generic, Iterator, Literal, NamedTuple, Sequence, TypeVar
-import numpy as np
-from dataeval.utils.datasets._fileio import _ensure_exists
-from dataeval.utils.datasets._mixin import BaseDatasetMixin
-from dataeval.utils.datasets._types import (
-    AnnotatedDataset,
-    DatasetMetadata,
-    ImageClassificationDataset,
-    ObjectDetectionDataset,
-    ObjectDetectionTarget,
-    SegmentationDataset,
-    SegmentationTarget,
-)
-if TYPE_CHECKING:
-    from dataeval.typing import Array, Transform
-    _TArray = TypeVar("_TArray", bound=Array)
-else:
-    _TArray = TypeVar("_TArray")
-_TTarget = TypeVar("_TTarget")
-_TRawTarget = TypeVar("_TRawTarget", list[int], list[str])
-class DataLocation(NamedTuple):
-    url: str
-    filename: str
-    md5: bool
-    checksum: str
-class BaseDataset(AnnotatedDataset[tuple[_TArray, _TTarget, dict[str, Any]]], Generic[_TArray, _TTarget, _TRawTarget]):
-    """
-    Base class for internet downloaded datasets.
-    """
-    # Each subclass should override the attributes below.
-    # Each resource tuple must contain:
-    #    'url': str, the URL to download from
-    #    'filename': str, the name of the file once downloaded
-    #    'md5': boolean, True if it's the checksum value is md5
-    #    'checksum': str, the associated checksum for the downloaded file
-    _resources: list[DataLocation]
-    _resource_index: int = 0
-    index2label: dict[int, str]
-    def __init__(
-        self,
-        root: str | Path,
-        image_set: Literal["train", "val", "test", "operational", "base"] = "train",
-        transforms: Transform[_TArray] | Sequence[Transform[_TArray]] | None = None,
-        download: bool = False,
-        verbose: bool = False,
-    ) -> None:
-        self._root: Path = root.absolute() if isinstance(root, Path) else Path(root).absolute()
-        transforms = transforms if transforms is not None else []
-        self.transforms: Sequence[Transform[_TArray]] = transforms if isinstance(transforms, Sequence) else [transforms]
-        self.image_set = image_set
-        self._verbose = verbose
-        # Internal Attributes
-        self._download = download
-        self._filepaths: list[str]
-        self._targets: _TRawTarget
-        self._datum_metadata: dict[str, list[Any]]
-        self._resource: DataLocation = self._resources[self._resource_index]
-        self._label2index = {v: k for k, v in self.index2label.items()}
-        self.metadata: DatasetMetadata = DatasetMetadata(
-            id=self._unique_id(),
-            index2label=self.index2label,
-            split=self.image_set,
-        )
-        # Load the data
-        self.path: Path = self._get_dataset_dir()
-        self._filepaths, self._targets, self._datum_metadata = self._load_data()
-        self.size: int = len(self._filepaths)
-    def __str__(self) -> str:
-        nt = "\n    "
-        title = f"{self.__class__.__name__} Dataset"
-        sep = "-" * len(title)
-        attrs = [f"{k.capitalize()}: {v}" for k, v in self.__dict__.items() if not k.startswith("_")]
-        return f"{title}\n{sep}{nt}{nt.join(attrs)}"
-    @property
-    def label2index(self) -> dict[str, int]:
-        return self._label2index
-    def __iter__(self) -> Iterator[tuple[_TArray, _TTarget, dict[str, Any]]]:
-        for i in range(len(self)):
-            yield self[i]
-    def _get_dataset_dir(self) -> Path:
-        # Create a designated folder for this dataset (named after the class)
-        if self._root.stem.lower() == self.__class__.__name__.lower():
-            dataset_dir: Path = self._root
-        else:
-            dataset_dir: Path = self._root / self.__class__.__name__.lower()
-        if not dataset_dir.exists():
-            dataset_dir.mkdir(parents=True, exist_ok=True)
-        return dataset_dir
-    def _unique_id(self) -> str:
-        return f"{self.__class__.__name__}_{self.image_set}"
-    def _load_data(self) -> tuple[list[str], _TRawTarget, dict[str, Any]]:
-        """
-        Function to determine if data can be accessed or if it needs to be downloaded and/or extracted.
-        """
-        if self._verbose:
-            print(f"Determining if {self._resource.filename} needs to be downloaded.")
-        try:
-            result = self._load_data_inner()
-            if self._verbose:
-                print("No download needed, loaded data successfully.")
-        except FileNotFoundError:
-            _ensure_exists(*self._resource, self.path, self._root, self._download, self._verbose)
-            result = self._load_data_inner()
-        return result
-    @abstractmethod
-    def _load_data_inner(self) -> tuple[list[str], _TRawTarget, dict[str, Any]]: ...
-    def _transform(self, image: _TArray) -> _TArray:
-        """Function to transform the image prior to returning based on parameters passed in."""
-        for transform in self.transforms:
-            image = transform(image)
-        return image
-    def __len__(self) -> int:
-        return self.size
-class BaseICDataset(
-    BaseDataset[_TArray, _TArray, list[int]],
-    BaseDatasetMixin[_TArray],
-    ImageClassificationDataset[_TArray],
-):
-    """
-    Base class for image classification datasets.
-    """
-    def __getitem__(self, index: int) -> tuple[_TArray, _TArray, dict[str, Any]]:
-        """
-        Args
-        ----
-        index : int
-            Value of the desired data point
-        Returns
-        -------
-        tuple[TArray, TArray, dict[str, Any]]
-            Image, target, datum_metadata - where target is one-hot encoding of class.
-        """
-        # Get the associated label and score
-        label = self._targets[index]
-        score = self._one_hot_encode(label)
-        # Get the image
-        img = self._read_file(self._filepaths[index])
-        img = self._transform(img)
-        img_metadata = {key: val[index] for key, val in self._datum_metadata.items()}
-        return img, score, img_metadata
-class BaseODDataset(
-    BaseDataset[_TArray, ObjectDetectionTarget[_TArray], list[str]],
-    BaseDatasetMixin[_TArray],
-    ObjectDetectionDataset[_TArray],
-):
-    """
-    Base class for object detection datasets.
-    """
-    _bboxes_per_size: bool = False
-    def __getitem__(self, index: int) -> tuple[_TArray, ObjectDetectionTarget[_TArray], dict[str, Any]]:
-        """
-        Args
-        ----
-        index : int
-            Value of the desired data point
-        Returns
-        -------
-        tuple[TArray, ObjectDetectionTarget[TArray], dict[str, Any]]
-            Image, target, datum_metadata - target.boxes returns boxes in x0, y0, x1, y1 format
-        """
-        # Grab the bounding boxes and labels from the annotations
-        boxes, labels, additional_metadata = self._read_annotations(self._targets[index])
-        # Get the image
-        img = self._read_file(self._filepaths[index])
-        img_size = img.shape
-        img = self._transform(img)
-        # Adjust labels if necessary
-        if self._bboxes_per_size and boxes:
-            boxes = boxes * np.array([[img_size[1], img_size[2], img_size[1], img_size[2]]])
-        # Create the Object Detection Target
-        target = ObjectDetectionTarget(self._as_array(boxes), self._as_array(labels), self._one_hot_encode(labels))
-        img_metadata = {key: val[index] for key, val in self._datum_metadata.items()}
-        img_metadata = img_metadata | additional_metadata
-        return img, target, img_metadata
-    @abstractmethod
-    def _read_annotations(self, annotation: str) -> tuple[list[list[float]], list[int], dict[str, Any]]: ...
-class BaseSegDataset(
-    BaseDataset[_TArray, SegmentationTarget[_TArray], list[str]],
-    BaseDatasetMixin[_TArray],
-    SegmentationDataset[_TArray],
-):
-    """
-    Base class for segmentation datasets.
-    """
-    _masks: Sequence[str]
-    def __getitem__(self, index: int) -> tuple[_TArray, SegmentationTarget[_TArray], dict[str, Any]]:
-        """
-        Args
-        ----
-        index : int
-            Value of the desired data point
-        Returns
-        -------
-        tuple[TArray, SegmentationTarget[TArray], dict[str, Any]]
-            Image, target, datum_metadata - target.mask returns the ground truth mask
-        """
-        # Grab the labels from the annotations
-        _, labels, additional_metadata = self._read_annotations(self._targets[index])
-        # Grab the ground truth masks
-        mask = self._read_file(self._masks[index])
-        # Get the image
-        img = self._read_file(self._filepaths[index])
-        img = self._transform(img)
-        target = SegmentationTarget(mask, self._as_array(labels), self._one_hot_encode(labels))
-        img_metadata = {key: val[index] for key, val in self._datum_metadata.items()}
-        img_metadata = img_metadata | additional_metadata
-        return img, target, img_metadata
-    @abstractmethod
-    def _read_annotations(self, annotation: str) -> tuple[list[list[float]], list[int], dict[str, Any]]: ...

dataeval 0.86.8__py3-none-any.whl → 0.87.0__py3-none-any.whl

dataeval 0.86.8py3-none-any.whl → 0.87.0py3-none-any.whl