dataeval 0.86.8__py3-none-any.whl → 0.87.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. dataeval/__init__.py +1 -1
  2. dataeval/_version.py +2 -2
  3. dataeval/config.py +4 -19
  4. dataeval/data/_metadata.py +56 -27
  5. dataeval/data/_split.py +1 -1
  6. dataeval/data/selections/_classbalance.py +4 -3
  7. dataeval/data/selections/_classfilter.py +5 -5
  8. dataeval/data/selections/_indices.py +2 -2
  9. dataeval/data/selections/_prioritize.py +249 -29
  10. dataeval/data/selections/_reverse.py +1 -1
  11. dataeval/data/selections/_shuffle.py +2 -2
  12. dataeval/detectors/ood/__init__.py +2 -1
  13. dataeval/detectors/ood/base.py +38 -1
  14. dataeval/detectors/ood/knn.py +95 -0
  15. dataeval/metrics/bias/_balance.py +28 -21
  16. dataeval/metrics/bias/_diversity.py +4 -4
  17. dataeval/metrics/bias/_parity.py +2 -2
  18. dataeval/metrics/stats/_hashstats.py +19 -2
  19. dataeval/outputs/_workflows.py +20 -7
  20. dataeval/typing.py +14 -2
  21. dataeval/utils/__init__.py +2 -2
  22. dataeval/utils/_bin.py +7 -6
  23. dataeval/utils/data/__init__.py +2 -0
  24. dataeval/utils/data/_dataset.py +13 -6
  25. dataeval/utils/data/_validate.py +169 -0
  26. dataeval/workflows/sufficiency.py +53 -10
  27. {dataeval-0.86.8.dist-info → dataeval-0.87.0.dist-info}/METADATA +5 -17
  28. {dataeval-0.86.8.dist-info → dataeval-0.87.0.dist-info}/RECORD +30 -39
  29. dataeval/utils/datasets/__init__.py +0 -19
  30. dataeval/utils/datasets/_antiuav.py +0 -189
  31. dataeval/utils/datasets/_base.py +0 -262
  32. dataeval/utils/datasets/_cifar10.py +0 -201
  33. dataeval/utils/datasets/_fileio.py +0 -142
  34. dataeval/utils/datasets/_milco.py +0 -197
  35. dataeval/utils/datasets/_mixin.py +0 -54
  36. dataeval/utils/datasets/_mnist.py +0 -202
  37. dataeval/utils/datasets/_ships.py +0 -144
  38. dataeval/utils/datasets/_types.py +0 -48
  39. dataeval/utils/datasets/_voc.py +0 -583
  40. {dataeval-0.86.8.dist-info → dataeval-0.87.0.dist-info}/WHEEL +0 -0
  41. /dataeval-0.86.8.dist-info/licenses/LICENSE.txt → /dataeval-0.87.0.dist-info/licenses/LICENSE +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataeval
3
- Version: 0.86.8
3
+ Version: 0.87.0
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Project-URL: Homepage, https://dataeval.ai/
6
6
  Project-URL: Repository, https://github.com/aria-ml/dataeval/
@@ -8,7 +8,7 @@ Project-URL: Documentation, https://dataeval.readthedocs.io/
8
8
  Author-email: Andrew Weng <andrew.weng@ariacoustics.com>, Bill Peria <bill.peria@ariacoustics.com>, Jon Botts <jonathan.botts@ariacoustics.com>, Jonathan Christian <jonathan.christian@ariacoustics.com>, Justin McMillan <justin.mcmillan@ariacoustics.com>, Ryan Wood <ryan.wood@ariacoustics.com>, Scott Swan <scott.swan@ariacoustics.com>, Shaun Jullens <shaun.jullens@ariacoustics.com>
9
9
  Maintainer-email: ARiA <dataeval@ariacoustics.com>
10
10
  License-Expression: MIT
11
- License-File: LICENSE.txt
11
+ License-File: LICENSE
12
12
  Classifier: Development Status :: 4 - Beta
13
13
  Classifier: Intended Audience :: Science/Research
14
14
  Classifier: License :: OSI Approved :: MIT License
@@ -20,15 +20,12 @@ Classifier: Programming Language :: Python :: 3.11
20
20
  Classifier: Programming Language :: Python :: 3.12
21
21
  Classifier: Topic :: Scientific/Engineering
22
22
  Requires-Python: <3.13,>=3.9
23
- Requires-Dist: defusedxml>=0.7.1
24
23
  Requires-Dist: fast-hdbscan==0.2.0
25
24
  Requires-Dist: lightgbm>=4
26
25
  Requires-Dist: numba>=0.59.1
27
26
  Requires-Dist: numpy>=1.24.2
28
27
  Requires-Dist: pandas>=2.0
29
- Requires-Dist: pillow>=10.3.0
30
28
  Requires-Dist: polars>=1.0.0
31
- Requires-Dist: requests>=2.32.3
32
29
  Requires-Dist: scikit-learn>=1.5.0
33
30
  Requires-Dist: scipy>=1.10
34
31
  Requires-Dist: torch>=2.2.0
@@ -123,14 +120,8 @@ micromamba create -f environment\environment.yaml -c pytorch
123
120
 
124
121
  ### **Installing from GitHub**
125
122
 
126
- To install DataEval from source locally on Ubuntu, you will need `git-lfs` to
127
- download larger, binary source files.
128
-
129
- ```bash
130
- sudo apt-get install git-lfs
131
- ```
132
-
133
- Pull the source down and change to the DataEval project directory.
123
+ To install DataEval from source locally on Ubuntu, pull the source down and
124
+ change to the DataEval project directory.
134
125
 
135
126
  ```bash
136
127
  git clone https://github.com/aria-ml/dataeval.git
@@ -167,10 +158,7 @@ source .venv/bin/activate
167
158
 
168
159
  ## Contact Us
169
160
 
170
- If you have any questions, feel free to reach out to the people below:
171
-
172
- - **POC**: Scott Swan @scott.swan
173
- - **DPOC**: Andrew Weng @aweng
161
+ If you have any questions, feel free to reach out to [us](mailto:dataeval@ariacoustics.com)!
174
162
 
175
163
  ## Acknowledgement
176
164
 
@@ -1,23 +1,23 @@
1
- dataeval/__init__.py,sha256=dEDltdHOnbk4-XAbQwJLOZtCbRLZsDMnptWRwbF2r54,1773
1
+ dataeval/__init__.py,sha256=aFzX3SLx8wgc763RY772P41ZLqeHcUHRKW9XAN0KfHQ,1793
2
2
  dataeval/_log.py,sha256=C7AGkIRzymvYJ0LQXtnShiy3i5Xrp8T58JzIHHguk_Q,365
3
- dataeval/_version.py,sha256=IPUOExUy8nF4kYGtCPV5bg6_IYDRLVOKnFJcNllcO1M,513
4
- dataeval/config.py,sha256=g3Np0Q3J5Rzij6Gsz7tJh7eOxgwNPf6NsFYmAR8Atfs,4219
3
+ dataeval/_version.py,sha256=17MAD7hlEBqgdl5YlmaM4PJXKdgvw_hAzlX52HDAwlU,513
4
+ dataeval/config.py,sha256=lL73s_xa9pBxHHCnBKi59D_tl4vS7ig1rfWbIYkM_ac,3839
5
5
  dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- dataeval/typing.py,sha256=W8rqFFkAqE5a5ar3MmB-O5gcMJqvoDKXC8Y0ggBqAKo,7216
6
+ dataeval/typing.py,sha256=si4ZosMrHG-eYKSLCErAEI7Oo1giFRvWkaNK7EhRr1w,7513
7
7
  dataeval/data/__init__.py,sha256=wzQ6uUFLNB3VJR0a2QnRBYwEmwXT93q0WpHu7FmFW1E,486
8
8
  dataeval/data/_embeddings.py,sha256=PFjpdV9bfusCB4taTIYSzx1hP8nJb_KCkZTN8kMw-Hs,12885
9
9
  dataeval/data/_images.py,sha256=Rc_59CuU4zfN7Xm7an1XUx8ZghQg6a56VJWMZD9edRw,2654
10
- dataeval/data/_metadata.py,sha256=3aixstlgcAZXC0qNjwDlxjscC3IX1xjPt_FK0liRqoo,14423
10
+ dataeval/data/_metadata.py,sha256=jr6W0aC_fKMYPwRjSHkXl02QTZ63QgqOnbvVOCoLLsg,15250
11
11
  dataeval/data/_selection.py,sha256=r06xeiyK8nTWPLyItkoPQRWZI1i6LATSue_cuEbCdc4,4463
12
- dataeval/data/_split.py,sha256=nQABR05vxil2Qx7-uX4Fm0_DWpibskBGDJOYj_b1u3I,16737
12
+ dataeval/data/_split.py,sha256=0WOKwOxMBfzimo_VQUU0dbc4zQleA4OQFO4ho9W57hE,16732
13
13
  dataeval/data/selections/__init__.py,sha256=2m8ZB53wXzqLcqmc6p5atO6graB6ZyiRSNJFxf11X_g,613
14
- dataeval/data/selections/_classbalance.py,sha256=7v8ApoL3X8eCZ6fGDNTehE_bZ1loaP3TlhsJLaICVWg,1458
15
- dataeval/data/selections/_classfilter.py,sha256=bXfoYnWnAfUGsAQSlLufJeF2PfgRKekFHfBx8hv1r3w,4351
16
- dataeval/data/selections/_indices.py,sha256=RFsR9z10aM3N0gJSfKrukFpi-LkiQGXoOwXhmOQ5cpg,630
14
+ dataeval/data/selections/_classbalance.py,sha256=AqExg-QnYBcfBNzS1Ygsz3Cgb2cqcgGXE0-cseD8_vA,1580
15
+ dataeval/data/selections/_classfilter.py,sha256=oYTsqxwOV_mos4_BoNqhHOAKUWFMAXczjrJfEkusLIY,4422
16
+ dataeval/data/selections/_indices.py,sha256=5TqKyMJmFRoNfJT5T9yIMx-p5VeJmSmCl2Qxzwi0pPE,628
17
17
  dataeval/data/selections/_limit.py,sha256=JG4GmEiNKt3sk4PbOUbBnGGzNlyz72H-kQrt8COMm4Y,512
18
- dataeval/data/selections/_prioritize.py,sha256=4dGUvgR7m6NGzzPU0N_bw0Xhujo8b72Wo8L4PGHbvBo,11233
19
- dataeval/data/selections/_reverse.py,sha256=b67kNC43A5KpQOic5gifjo9HpJ7FMh4LFCrfovPiJ-M,368
20
- dataeval/data/selections/_shuffle.py,sha256=TSCIZBgLAn09iMI_WIw0aqwSU4NZLAhHG7t8H_CuDUY,1195
18
+ dataeval/data/selections/_prioritize.py,sha256=ss_GZ5MB2ohdNuB55C69TYNwV3PUSmk715gDJI6qfYA,20140
19
+ dataeval/data/selections/_reverse.py,sha256=FqYlpPg-0Vz75kbEhGFrJlzIGELSmDZxPlBMY18a57I,365
20
+ dataeval/data/selections/_shuffle.py,sha256=nZG1kxc7TfiznaPnDYqWTWnFBf2gWb8koCmEWnf8TWE,1242
21
21
  dataeval/detectors/__init__.py,sha256=3Sg-XWlwr75zEEH3hZKA4nWMtGvaRlnfzTWvZG_Ak6U,189
22
22
  dataeval/detectors/drift/__init__.py,sha256=Jqv98oOVeC2tvHlNGxQ8RJ6De2q4SyS5lTpaYlb4ocM,756
23
23
  dataeval/detectors/drift/_base.py,sha256=6aNF1LzG3w1sNUrmSBbsvuN5IkQnoRikRacqobYge84,7592
@@ -36,9 +36,10 @@ dataeval/detectors/drift/_nml/_thresholds.py,sha256=WGdkLei9w_EvvsRHQzWdDyFVoZHI
36
36
  dataeval/detectors/linters/__init__.py,sha256=xn2zPwUcmsuf-Jd9uw6AVI11C9z1b1Y9fYtuFnXenZ0,404
37
37
  dataeval/detectors/linters/duplicates.py,sha256=X5WSEvI_BHkLoXjkaHK6wTnSkx4IjpO_exMRjSlhc70,4963
38
38
  dataeval/detectors/linters/outliers.py,sha256=GaM9n8yPgBPzVOL_bxJCj0eCwobEEP4JHKHD9liRdlw,10130
39
- dataeval/detectors/ood/__init__.py,sha256=juCYBDs7CQEAtMhnEpPqF6uTrOIH9kTBSuQ_GRw6a8o,283
39
+ dataeval/detectors/ood/__init__.py,sha256=qDoDdQetJY1xZB43dNzcOIO_8NiEuEU0z1QNU4QkEXs,341
40
40
  dataeval/detectors/ood/ae.py,sha256=cJ7nq4iwTvW8uihHCUhGfTlKsAlthJ2tOhgSsB27cOY,2941
41
- dataeval/detectors/ood/base.py,sha256=hx-TPJnUTZ7KcBkm8SbN1RGhtJyQN0XLajDyNqiZrJo,3042
41
+ dataeval/detectors/ood/base.py,sha256=fsjQ7wHRNJNPLGFw_6jvygkFFbv2G1ydwp8Zh1ncVlA,4374
42
+ dataeval/detectors/ood/knn.py,sha256=Fu77geQFHPYNOn81VIXUJ3yC3t5Ylv0ZgvwMeA2JX6I,3782
42
43
  dataeval/detectors/ood/mixin.py,sha256=0_o-1HPvgf3-Lf1MSOIfjj5UB8LTLEBGYtJJfyCCzwc,5431
43
44
  dataeval/metadata/__init__.py,sha256=XDDmJbOZBNM6pL0r6Nbu6oMRoyAh22IDkPYGndNlkZU,316
44
45
  dataeval/metadata/_distance.py,sha256=MbXM9idsooNWnGLaTKg8j4ZqavUeJUjuW7EPW3-UQyg,4234
@@ -46,11 +47,11 @@ dataeval/metadata/_ood.py,sha256=lNPHouj_9WfM_uTtsaiRaPn46RcVy3YebD1c32vDj-c,898
46
47
  dataeval/metadata/_utils.py,sha256=BcGoYVfA4AkAWpInY5txOc3QBpsGf6cnnUAsHOQTJAE,1210
47
48
  dataeval/metrics/__init__.py,sha256=8VC8q3HuJN3o_WN51Ae2_wXznl3RMXIvA5GYVcy7vr8,225
48
49
  dataeval/metrics/bias/__init__.py,sha256=329S1_3WnWqeU4-qVcbe0fMy4lDrj9uKslWHIQf93yg,839
49
- dataeval/metrics/bias/_balance.py,sha256=fREtoMLUZPOf_ivqNKwij6oPiKMTk02ECO5rWURf3KY,5541
50
+ dataeval/metrics/bias/_balance.py,sha256=Yf0WNw9DxluFPNP-_wA1BcRRs-PRwocnHp0HScXS6t4,5719
50
51
  dataeval/metrics/bias/_completeness.py,sha256=2cvOXe7fhtxZGH_4QBuiCafIeamxFBarMiUBuEP7QGI,4596
51
52
  dataeval/metrics/bias/_coverage.py,sha256=v2x2hbOf2za9jFcSVSJUAoJ2BJfzzlCzt0mFIGtBL0A,3639
52
- dataeval/metrics/bias/_diversity.py,sha256=25udDKmel9IjeVT5nM4dOa1apda66QdRxBc922yuUvI,5830
53
- dataeval/metrics/bias/_parity.py,sha256=MKpqL4aoqEHkRl0vtGvVq9V3KBOtDFTtAo5I2GfIG4A,11443
53
+ dataeval/metrics/bias/_diversity.py,sha256=Z7UQzKp9bsmB-hC3_sY6HIJUJRkLHb5cVEoU79cNDzc,5800
54
+ dataeval/metrics/bias/_parity.py,sha256=ZIKc5OK6wQ4moleBJzGDfOPvyNzj03-KoHAGBZnO4pk,11433
54
55
  dataeval/metrics/estimators/__init__.py,sha256=Pnds8uIyAovt2fKqZjiHCIP_kVoBWlVllekYuK5UmmU,568
55
56
  dataeval/metrics/estimators/_ber.py,sha256=7noeRyOJJYqrJ_jt90nRHtR2t2u5MIvTCmWt0_rd4EU,5370
56
57
  dataeval/metrics/estimators/_clusterer.py,sha256=1HrpihGTJ63IkNSOy4Ibw633Gllkm1RxKmoKT5MOgt0,1434
@@ -60,7 +61,7 @@ dataeval/metrics/stats/__init__.py,sha256=6tA_9nbbM5ObJ6cds8Y1VBtTQiTOxrpGQSFLu_
60
61
  dataeval/metrics/stats/_base.py,sha256=R-hxoEPLreZcxYxBfyjbKfdoGMMTPiqJ5g2zSO-1UYM,12541
61
62
  dataeval/metrics/stats/_boxratiostats.py,sha256=ROZrlqgbowkGfCR5PJ5TL7Og40iMOdUqJnsCtaz_Xek,6450
62
63
  dataeval/metrics/stats/_dimensionstats.py,sha256=s2Juca8GG501nZd2SWL_YtXWkTfxUrUIAl53PO3_VeA,2876
63
- dataeval/metrics/stats/_hashstats.py,sha256=qa1CYRgOebkxqkALfffaPM-kJ074ZbyfpWbfOfuObSs,4758
64
+ dataeval/metrics/stats/_hashstats.py,sha256=8C4EgzmBd3HMNsSATTriLVcvaWfoSasTLYizONqUDf4,5388
64
65
  dataeval/metrics/stats/_imagestats.py,sha256=gUPNgN5Zwzdr7WnSwbve1NXNsyxd5dy3cSnlR_7guCg,3007
65
66
  dataeval/metrics/stats/_labelstats.py,sha256=_dXt3p8_-SHEtHvJWbL0rnQvO2g30zxX42mG2LGJepU,3195
66
67
  dataeval/metrics/stats/_pixelstats.py,sha256=N9e7RXuzSHtlJtWU7l5IcTTIXe2kOmWiuj6lnJpZWq0,3312
@@ -75,31 +76,21 @@ dataeval/outputs/_metadata.py,sha256=ffZgpX8KWURPHXpOWjbvJ2KRqWQkS2nWuIjKUzoHhMI
75
76
  dataeval/outputs/_ood.py,sha256=suLKVXULGtXH0rq9eXHI1d3d2jhGmItJtz4QiQd47A4,1718
76
77
  dataeval/outputs/_stats.py,sha256=_ItGjs9YaMHqjivkR1YBcSErD5ICfa_-iV9nq0l8bTM,17451
77
78
  dataeval/outputs/_utils.py,sha256=NfhYaGT2PZlhIs8ICKUsPWHZXjhWYDkEJqBDdqMeaOM,929
78
- dataeval/outputs/_workflows.py,sha256=K786mOgegxVi81diUA-qpbwGEkwa8YA7Fk4ttgjJeaY,10831
79
- dataeval/utils/__init__.py,sha256=hRvyUK7b3d6JBEV5u47rFcOHEcmDYqAvZQw_T5pDAWw,264
79
+ dataeval/outputs/_workflows.py,sha256=_0U9VzCvqLIOlxqpngPhmPcUZMk57bF9qnnrkLUMoGY,11450
80
+ dataeval/utils/__init__.py,sha256=sjelzMPaTImF6isiRcp8UGDE3tppEpWS5GoR8WKPZ1k,242
80
81
  dataeval/utils/_array.py,sha256=bIDbnv15_hNzFn2Uc4WV1qRyFzubQj2nNYsFUDIdwT0,6335
81
- dataeval/utils/_bin.py,sha256=w3eJ2Szw5eapqQ0cGv731rhNgLFGW0cCz2pXo9I6CuY,7296
82
+ dataeval/utils/_bin.py,sha256=KpAnhzLBgh6PxMlM9dPPvuic0S1KNKwlcM1Vg-d4dGI,7364
82
83
  dataeval/utils/_clusterer.py,sha256=rUvEdyMwp95lffmt6xKMEwsjRXNoBS0n5mAS_HNOnck,5656
83
84
  dataeval/utils/_fast_mst.py,sha256=pv42flr1Uf5RBa9qDG0YLDXWH7Mr7a9zpauO1HqZXaY,8061
84
85
  dataeval/utils/_image.py,sha256=4uxTIOYZZlRJOfNmdA3ek3no3FrLWCK5un48kStMDt8,3578
85
86
  dataeval/utils/_method.py,sha256=9B9JQbgqWJBRhQJb7glajUtWaQzUTIUuvrZ9_bisxsM,394
86
87
  dataeval/utils/_mst.py,sha256=bLmJmu_1Dtj3hC5gQp3oAiJ_7TKtEjahTqusVRRU4eI,2168
87
88
  dataeval/utils/_plot.py,sha256=1rnMkBRvTFLoTAHqXwF7c7GJ5_5iqlgarZKAzmYciLk,7225
88
- dataeval/utils/data/__init__.py,sha256=xGzrjrOxOP2DP1tU84AWMKPnSxFvSjM81CTlDg4rNM8,331
89
- dataeval/utils/data/_dataset.py,sha256=tC_vqgWnmojAoAANo5BUVfEUYXl7GzOBSeYjR9olbDk,9506
89
+ dataeval/utils/data/__init__.py,sha256=AD7o2rllEdq4BVvlxljYKRXrXNer39XdGNuaRMbvH4Y,414
90
+ dataeval/utils/data/_dataset.py,sha256=901qUUcLg_HPg07N5uNabAZ00MGFCdOr7o6VbIEk2_I,9870
91
+ dataeval/utils/data/_validate.py,sha256=sea8B7DLbbxTqTjAQ5Vhs5XNRZWE5wBBqDgcKNVQBRA,6923
90
92
  dataeval/utils/data/collate.py,sha256=5egEEKhNNCGeNLChO1p6dZ4Wg6x51VEaMNHz7hEZUxI,3936
91
93
  dataeval/utils/data/metadata.py,sha256=L1c2bCiMj0aR0QCoKkjwBujIftJDEMgW_3ZbgeS8WHo,14703
92
- dataeval/utils/datasets/__init__.py,sha256=pAXqHX76yAoBI8XB3m6zGuW-u3s3PCoIXG5GDzxH7Zs,572
93
- dataeval/utils/datasets/_antiuav.py,sha256=kA_ia1fYNcJiz9SpCvh-Z8iSc7iJrdogjBI3soyaa7A,8304
94
- dataeval/utils/datasets/_base.py,sha256=pyfpJda3ku469M3TFRsJn9S2oAiQODOGTlLcdcoEW9U,9031
95
- dataeval/utils/datasets/_cifar10.py,sha256=hZc_A30yKYBbv2kvVdEkZ9egyEe6XBUnmksoIAoJ-5Y,8265
96
- dataeval/utils/datasets/_fileio.py,sha256=LEoFVNdryRdi7mKpWw-9D8lA6XMa-Jaszd85bv93POo,5454
97
- dataeval/utils/datasets/_milco.py,sha256=iXf4C1I3Eg_3gHKUe4XPi21yFMBO51zxTIqAkGf9bYg,7869
98
- dataeval/utils/datasets/_mixin.py,sha256=S8iii-SoYUsFFYNXjw2thlZkpBvRLnZ4XI8wTqOKXgU,1729
99
- dataeval/utils/datasets/_mnist.py,sha256=uz46sE1Go3TgGjG6x2cXckSVQ0mSg2mhgk8BUvLWjb0,8149
100
- dataeval/utils/datasets/_ships.py,sha256=6U04HAoM3jgLl1qv-NnxjZeSsBipcqWJBMhBMn5iIUY,5115
101
- dataeval/utils/datasets/_types.py,sha256=iSKyHXRlGuomXs0FHK6md8lXLQrQQ4fxgVOwr4o81bo,1089
102
- dataeval/utils/datasets/_voc.py,sha256=pafY112O80isYkrdy7Quie9SBm_TmYhREuyl8SxtsR0,24586
103
94
  dataeval/utils/torch/__init__.py,sha256=dn5mjCrFp0b1aL_UEURhONU0Ag0cmXoTOBSGagpkTiA,325
104
95
  dataeval/utils/torch/_blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
105
96
  dataeval/utils/torch/_gmm.py,sha256=XM68GNEP97EjaB1U49-ZXRb81d0CEFnPS910alrcB3g,3740
@@ -107,8 +98,8 @@ dataeval/utils/torch/_internal.py,sha256=9rzlMeM8i3p-ctulh9WDQATMXtlp-Jk2pBX7NGC
107
98
  dataeval/utils/torch/models.py,sha256=1idpXyjrYcCBSsbxxRUOto8xr4MJNjDEqQHiIXVU5Zc,9700
108
99
  dataeval/utils/torch/trainer.py,sha256=DRyPScGdE4o5Xo3BmD9p2PGOApzi1E-QfsBRNZ5IXW8,5544
109
100
  dataeval/workflows/__init__.py,sha256=ou8y0KO-d6W5lgmcyLjKlf-J_ckP3vilW7wHkgiDlZ4,255
110
- dataeval/workflows/sufficiency.py,sha256=j-R8dg4XE6a66p_oTXG2GNzgg3vGk85CTblxhFXaxog,8513
111
- dataeval-0.86.8.dist-info/METADATA,sha256=rCf58-uzgjsTNZkY3LOBMSi5fhQ2cdAtnrrDI_eYR_I,5925
112
- dataeval-0.86.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
113
- dataeval-0.86.8.dist-info/licenses/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
114
- dataeval-0.86.8.dist-info/RECORD,,
101
+ dataeval/workflows/sufficiency.py,sha256=UAPjowFrmM6IJJaOk9GkH3nfQTyDy2_zOY55o2g3G1M,10072
102
+ dataeval-0.87.0.dist-info/METADATA,sha256=xhp28LbYD7FWbfhFfDgVzS_pi-E2TFgl-X33seCD2cE,5674
103
+ dataeval-0.87.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
104
+ dataeval-0.87.0.dist-info/licenses/LICENSE,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
105
+ dataeval-0.87.0.dist-info/RECORD,,
@@ -1,19 +0,0 @@
1
- """Provides access to common Computer Vision datasets."""
2
-
3
- from dataeval.utils.datasets._antiuav import AntiUAVDetection
4
- from dataeval.utils.datasets._cifar10 import CIFAR10
5
- from dataeval.utils.datasets._milco import MILCO
6
- from dataeval.utils.datasets._mnist import MNIST
7
- from dataeval.utils.datasets._ships import Ships
8
- from dataeval.utils.datasets._voc import VOCDetection, VOCDetectionTorch, VOCSegmentation
9
-
10
- __all__ = [
11
- "MNIST",
12
- "Ships",
13
- "CIFAR10",
14
- "AntiUAVDetection",
15
- "MILCO",
16
- "VOCDetection",
17
- "VOCDetectionTorch",
18
- "VOCSegmentation",
19
- ]
@@ -1,189 +0,0 @@
1
- from __future__ import annotations
2
-
3
- __all__ = []
4
-
5
- from pathlib import Path
6
- from typing import TYPE_CHECKING, Any, Literal, Sequence
7
-
8
- from defusedxml.ElementTree import parse
9
- from numpy.typing import NDArray
10
-
11
- from dataeval.utils.datasets._base import BaseODDataset, DataLocation
12
- from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin
13
-
14
- if TYPE_CHECKING:
15
- from dataeval.typing import Transform
16
-
17
-
18
- class AntiUAVDetection(BaseODDataset[NDArray[Any]], BaseDatasetNumpyMixin):
19
- """
20
- A UAV detection dataset focused on detecting UAVs in natural images against large variation in backgrounds.
21
-
22
- The dataset comes from the paper
23
- `Vision-based Anti-UAV Detection and Tracking <https://ieeexplore.ieee.org/document/9785379>`_
24
- by Jie Zhao et. al. (2022).
25
-
26
- The dataset is approximately 1.3 GB and can be found `here <https://github.com/wangdongdut/DUT-Anti-UAV>`_.
27
- Images are collected against a variety of different backgrounds with a variety in the number and type of UAV.
28
- Ground truth labels are provided for the train, validation and test set.
29
- There are 35 different types of drones along with a variety in lighting conditions and weather conditions.
30
-
31
- There are 10,000 images: 5200 images in the training set, 2200 images in the validation set,
32
- and 2600 images in the test set.
33
- The dataset only has a single UAV class with the focus being on identifying object location in the image.
34
- Ground-truth bounding boxes are provided in (x0, y0, x1, y1) format.
35
- The images come in a variety of sizes from 3744 x 5616 to 160 x 240.
36
-
37
- Parameters
38
- ----------
39
- root : str or pathlib.Path
40
- Root directory where the data should be downloaded to or
41
- the ``antiuavdetection`` folder of the already downloaded data.
42
- image_set: "train", "val", "test", or "base", default "train"
43
- If "base", then the full dataset is selected (train, val and test).
44
- transforms : Transform, Sequence[Transform] or None, default None
45
- Transform(s) to apply to the data.
46
- download : bool, default False
47
- If True, downloads the dataset from the internet and puts it in root directory.
48
- Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
49
- verbose : bool, default False
50
- If True, outputs print statements.
51
-
52
- Attributes
53
- ----------
54
- path : pathlib.Path
55
- Location of the folder containing the data.
56
- image_set : "train", "val", "test", or "base"
57
- The selected image set from the dataset.
58
- index2label : dict[int, str]
59
- Dictionary which translates from class integers to the associated class strings.
60
- label2index : dict[str, int]
61
- Dictionary which translates from class strings to the associated class integers.
62
- metadata : DatasetMetadata
63
- Typed dictionary containing dataset metadata, such as `id` which returns the dataset class name.
64
- transforms : Sequence[Transform]
65
- The transforms to be applied to the data.
66
- size : int
67
- The size of the dataset.
68
-
69
- Note
70
- ----
71
- Data License: `Apache 2.0 <https://www.apache.org/licenses/LICENSE-2.0.txt>`_
72
- """
73
-
74
- # Need to run the sha256 on the files and then store that
75
- _resources = [
76
- DataLocation(
77
- url="https://drive.usercontent.google.com/download?id=1RVsSGPUKTdmoyoPTBTWwroyulLek1eTj&export=download&authuser=0&confirm=t&uuid=6bca4f94-a242-4bc2-9663-fb03cd94ef2c&at=APcmpox0--NroQ_3bqeTFaJxP7Pw%3A1746552902927",
78
- filename="train.zip",
79
- md5=False,
80
- checksum="14f927290556df60e23cedfa80dffc10dc21e4a3b6843e150cfc49644376eece",
81
- ),
82
- DataLocation(
83
- url="https://drive.usercontent.google.com/download?id=1333uEQfGuqTKslRkkeLSCxylh6AQ0X6n&export=download&authuser=0&confirm=t&uuid=c2ad2f01-aca8-4a85-96bb-b8ef6e40feea&at=APcmpozY-8bhk3nZSFaYbE8rq1Fi%3A1746551543297",
84
- filename="val.zip",
85
- md5=False,
86
- checksum="238be0ceb3e7c5be6711ee3247e49df2750d52f91f54f5366c68bebac112ebf8",
87
- ),
88
- DataLocation(
89
- url="https://drive.usercontent.google.com/download?id=1L1zeW1EMDLlXHClSDcCjl3rs_A6sVai0&export=download&authuser=0&confirm=t&uuid=5a1d7650-d8cd-4461-8354-7daf7292f06c&at=APcmpozLQC1CuP-n5_UX2JnP53Zo%3A1746551676177",
90
- filename="test.zip",
91
- md5=False,
92
- checksum="a671989a01cff98c684aeb084e59b86f4152c50499d86152eb970a9fc7fb1cbe",
93
- ),
94
- ]
95
-
96
- index2label: dict[int, str] = {
97
- 0: "unknown",
98
- 1: "UAV",
99
- }
100
-
101
- def __init__(
102
- self,
103
- root: str | Path,
104
- image_set: Literal["train", "val", "test", "base"] = "train",
105
- transforms: Transform[NDArray[Any]] | Sequence[Transform[NDArray[Any]]] | None = None,
106
- download: bool = False,
107
- verbose: bool = False,
108
- ) -> None:
109
- super().__init__(
110
- root,
111
- image_set,
112
- transforms,
113
- download,
114
- verbose,
115
- )
116
-
117
- def _load_data(self) -> tuple[list[str], list[str], dict[str, list[Any]]]:
118
- filepaths: list[str] = []
119
- targets: list[str] = []
120
- datum_metadata: dict[str, list[Any]] = {}
121
-
122
- # If base, load all resources
123
- if self.image_set == "base":
124
- metadata_list: list[dict[str, Any]] = []
125
-
126
- for resource in self._resources:
127
- self._resource = resource
128
- resource_filepaths, resource_targets, resource_metadata = super()._load_data()
129
- filepaths.extend(resource_filepaths)
130
- targets.extend(resource_targets)
131
- metadata_list.append(resource_metadata)
132
-
133
- # Combine metadata
134
- for data_dict in metadata_list:
135
- for key, val in data_dict.items():
136
- str_key = str(key) # Ensure key is string
137
- if str_key not in datum_metadata:
138
- datum_metadata[str_key] = []
139
- datum_metadata[str_key].extend(val)
140
-
141
- else:
142
- # Grab only the desired data
143
- for resource in self._resources:
144
- if self.image_set in resource.filename:
145
- self._resource = resource
146
- resource_filepaths, resource_targets, resource_metadata = super()._load_data()
147
- filepaths.extend(resource_filepaths)
148
- targets.extend(resource_targets)
149
- datum_metadata.update(resource_metadata)
150
-
151
- return filepaths, targets, datum_metadata
152
-
153
- def _load_data_inner(self) -> tuple[list[str], list[str], dict[str, Any]]:
154
- resource_name = self._resource.filename[:-4]
155
- base_dir = self.path / resource_name
156
- data_folder = sorted((base_dir / "img").glob("*.jpg"))
157
- if not data_folder:
158
- raise FileNotFoundError
159
-
160
- file_data = {"image_id": [f"{resource_name}_{entry.name}" for entry in data_folder]}
161
- data = [str(entry) for entry in data_folder]
162
- annotations = sorted(str(entry) for entry in (base_dir / "xml").glob("*.xml"))
163
-
164
- return data, annotations, file_data
165
-
166
- def _read_annotations(self, annotation: str) -> tuple[list[list[float]], list[int], dict[str, Any]]:
167
- """Function for extracting the info for the label and boxes"""
168
- boxes: list[list[float]] = []
169
- labels = []
170
- root = parse(annotation).getroot()
171
- if root is None:
172
- raise ValueError(f"Unable to parse {annotation}")
173
- additional_meta: dict[str, Any] = {
174
- "image_width": int(root.findtext("size/width", default="-1")),
175
- "image_height": int(root.findtext("size/height", default="-1")),
176
- "image_depth": int(root.findtext("size/depth", default="-1")),
177
- }
178
- for obj in root.findall("object"):
179
- labels.append(1 if obj.findtext("name", default="") == "UAV" else 0)
180
- boxes.append(
181
- [
182
- float(obj.findtext("bndbox/xmin", default="0")),
183
- float(obj.findtext("bndbox/ymin", default="0")),
184
- float(obj.findtext("bndbox/xmax", default="0")),
185
- float(obj.findtext("bndbox/ymax", default="0")),
186
- ]
187
- )
188
-
189
- return boxes, labels, additional_meta
@@ -1,262 +0,0 @@
1
- from __future__ import annotations
2
-
3
- __all__ = []
4
-
5
- from abc import abstractmethod
6
- from pathlib import Path
7
- from typing import TYPE_CHECKING, Any, Generic, Iterator, Literal, NamedTuple, Sequence, TypeVar
8
-
9
- import numpy as np
10
-
11
- from dataeval.utils.datasets._fileio import _ensure_exists
12
- from dataeval.utils.datasets._mixin import BaseDatasetMixin
13
- from dataeval.utils.datasets._types import (
14
- AnnotatedDataset,
15
- DatasetMetadata,
16
- ImageClassificationDataset,
17
- ObjectDetectionDataset,
18
- ObjectDetectionTarget,
19
- SegmentationDataset,
20
- SegmentationTarget,
21
- )
22
-
23
- if TYPE_CHECKING:
24
- from dataeval.typing import Array, Transform
25
-
26
- _TArray = TypeVar("_TArray", bound=Array)
27
- else:
28
- _TArray = TypeVar("_TArray")
29
-
30
- _TTarget = TypeVar("_TTarget")
31
- _TRawTarget = TypeVar("_TRawTarget", list[int], list[str])
32
-
33
-
34
- class DataLocation(NamedTuple):
35
- url: str
36
- filename: str
37
- md5: bool
38
- checksum: str
39
-
40
-
41
- class BaseDataset(AnnotatedDataset[tuple[_TArray, _TTarget, dict[str, Any]]], Generic[_TArray, _TTarget, _TRawTarget]):
42
- """
43
- Base class for internet downloaded datasets.
44
- """
45
-
46
- # Each subclass should override the attributes below.
47
- # Each resource tuple must contain:
48
- # 'url': str, the URL to download from
49
- # 'filename': str, the name of the file once downloaded
50
- # 'md5': boolean, True if it's the checksum value is md5
51
- # 'checksum': str, the associated checksum for the downloaded file
52
- _resources: list[DataLocation]
53
- _resource_index: int = 0
54
- index2label: dict[int, str]
55
-
56
- def __init__(
57
- self,
58
- root: str | Path,
59
- image_set: Literal["train", "val", "test", "operational", "base"] = "train",
60
- transforms: Transform[_TArray] | Sequence[Transform[_TArray]] | None = None,
61
- download: bool = False,
62
- verbose: bool = False,
63
- ) -> None:
64
- self._root: Path = root.absolute() if isinstance(root, Path) else Path(root).absolute()
65
- transforms = transforms if transforms is not None else []
66
- self.transforms: Sequence[Transform[_TArray]] = transforms if isinstance(transforms, Sequence) else [transforms]
67
- self.image_set = image_set
68
- self._verbose = verbose
69
-
70
- # Internal Attributes
71
- self._download = download
72
- self._filepaths: list[str]
73
- self._targets: _TRawTarget
74
- self._datum_metadata: dict[str, list[Any]]
75
- self._resource: DataLocation = self._resources[self._resource_index]
76
- self._label2index = {v: k for k, v in self.index2label.items()}
77
-
78
- self.metadata: DatasetMetadata = DatasetMetadata(
79
- id=self._unique_id(),
80
- index2label=self.index2label,
81
- split=self.image_set,
82
- )
83
-
84
- # Load the data
85
- self.path: Path = self._get_dataset_dir()
86
- self._filepaths, self._targets, self._datum_metadata = self._load_data()
87
- self.size: int = len(self._filepaths)
88
-
89
- def __str__(self) -> str:
90
- nt = "\n "
91
- title = f"{self.__class__.__name__} Dataset"
92
- sep = "-" * len(title)
93
- attrs = [f"{k.capitalize()}: {v}" for k, v in self.__dict__.items() if not k.startswith("_")]
94
- return f"{title}\n{sep}{nt}{nt.join(attrs)}"
95
-
96
- @property
97
- def label2index(self) -> dict[str, int]:
98
- return self._label2index
99
-
100
- def __iter__(self) -> Iterator[tuple[_TArray, _TTarget, dict[str, Any]]]:
101
- for i in range(len(self)):
102
- yield self[i]
103
-
104
- def _get_dataset_dir(self) -> Path:
105
- # Create a designated folder for this dataset (named after the class)
106
- if self._root.stem.lower() == self.__class__.__name__.lower():
107
- dataset_dir: Path = self._root
108
- else:
109
- dataset_dir: Path = self._root / self.__class__.__name__.lower()
110
- if not dataset_dir.exists():
111
- dataset_dir.mkdir(parents=True, exist_ok=True)
112
- return dataset_dir
113
-
114
- def _unique_id(self) -> str:
115
- return f"{self.__class__.__name__}_{self.image_set}"
116
-
117
- def _load_data(self) -> tuple[list[str], _TRawTarget, dict[str, Any]]:
118
- """
119
- Function to determine if data can be accessed or if it needs to be downloaded and/or extracted.
120
- """
121
- if self._verbose:
122
- print(f"Determining if {self._resource.filename} needs to be downloaded.")
123
-
124
- try:
125
- result = self._load_data_inner()
126
- if self._verbose:
127
- print("No download needed, loaded data successfully.")
128
- except FileNotFoundError:
129
- _ensure_exists(*self._resource, self.path, self._root, self._download, self._verbose)
130
- result = self._load_data_inner()
131
- return result
132
-
133
- @abstractmethod
134
- def _load_data_inner(self) -> tuple[list[str], _TRawTarget, dict[str, Any]]: ...
135
-
136
- def _transform(self, image: _TArray) -> _TArray:
137
- """Function to transform the image prior to returning based on parameters passed in."""
138
- for transform in self.transforms:
139
- image = transform(image)
140
- return image
141
-
142
- def __len__(self) -> int:
143
- return self.size
144
-
145
-
146
- class BaseICDataset(
147
- BaseDataset[_TArray, _TArray, list[int]],
148
- BaseDatasetMixin[_TArray],
149
- ImageClassificationDataset[_TArray],
150
- ):
151
- """
152
- Base class for image classification datasets.
153
- """
154
-
155
- def __getitem__(self, index: int) -> tuple[_TArray, _TArray, dict[str, Any]]:
156
- """
157
- Args
158
- ----
159
- index : int
160
- Value of the desired data point
161
-
162
- Returns
163
- -------
164
- tuple[TArray, TArray, dict[str, Any]]
165
- Image, target, datum_metadata - where target is one-hot encoding of class.
166
- """
167
- # Get the associated label and score
168
- label = self._targets[index]
169
- score = self._one_hot_encode(label)
170
- # Get the image
171
- img = self._read_file(self._filepaths[index])
172
- img = self._transform(img)
173
-
174
- img_metadata = {key: val[index] for key, val in self._datum_metadata.items()}
175
-
176
- return img, score, img_metadata
177
-
178
-
179
- class BaseODDataset(
180
- BaseDataset[_TArray, ObjectDetectionTarget[_TArray], list[str]],
181
- BaseDatasetMixin[_TArray],
182
- ObjectDetectionDataset[_TArray],
183
- ):
184
- """
185
- Base class for object detection datasets.
186
- """
187
-
188
- _bboxes_per_size: bool = False
189
-
190
- def __getitem__(self, index: int) -> tuple[_TArray, ObjectDetectionTarget[_TArray], dict[str, Any]]:
191
- """
192
- Args
193
- ----
194
- index : int
195
- Value of the desired data point
196
-
197
- Returns
198
- -------
199
- tuple[TArray, ObjectDetectionTarget[TArray], dict[str, Any]]
200
- Image, target, datum_metadata - target.boxes returns boxes in x0, y0, x1, y1 format
201
- """
202
- # Grab the bounding boxes and labels from the annotations
203
- boxes, labels, additional_metadata = self._read_annotations(self._targets[index])
204
- # Get the image
205
- img = self._read_file(self._filepaths[index])
206
- img_size = img.shape
207
- img = self._transform(img)
208
- # Adjust labels if necessary
209
- if self._bboxes_per_size and boxes:
210
- boxes = boxes * np.array([[img_size[1], img_size[2], img_size[1], img_size[2]]])
211
- # Create the Object Detection Target
212
- target = ObjectDetectionTarget(self._as_array(boxes), self._as_array(labels), self._one_hot_encode(labels))
213
-
214
- img_metadata = {key: val[index] for key, val in self._datum_metadata.items()}
215
- img_metadata = img_metadata | additional_metadata
216
-
217
- return img, target, img_metadata
218
-
219
- @abstractmethod
220
- def _read_annotations(self, annotation: str) -> tuple[list[list[float]], list[int], dict[str, Any]]: ...
221
-
222
-
223
- class BaseSegDataset(
224
- BaseDataset[_TArray, SegmentationTarget[_TArray], list[str]],
225
- BaseDatasetMixin[_TArray],
226
- SegmentationDataset[_TArray],
227
- ):
228
- """
229
- Base class for segmentation datasets.
230
- """
231
-
232
- _masks: Sequence[str]
233
-
234
- def __getitem__(self, index: int) -> tuple[_TArray, SegmentationTarget[_TArray], dict[str, Any]]:
235
- """
236
- Args
237
- ----
238
- index : int
239
- Value of the desired data point
240
-
241
- Returns
242
- -------
243
- tuple[TArray, SegmentationTarget[TArray], dict[str, Any]]
244
- Image, target, datum_metadata - target.mask returns the ground truth mask
245
- """
246
- # Grab the labels from the annotations
247
- _, labels, additional_metadata = self._read_annotations(self._targets[index])
248
- # Grab the ground truth masks
249
- mask = self._read_file(self._masks[index])
250
- # Get the image
251
- img = self._read_file(self._filepaths[index])
252
- img = self._transform(img)
253
-
254
- target = SegmentationTarget(mask, self._as_array(labels), self._one_hot_encode(labels))
255
-
256
- img_metadata = {key: val[index] for key, val in self._datum_metadata.items()}
257
- img_metadata = img_metadata | additional_metadata
258
-
259
- return img, target, img_metadata
260
-
261
- @abstractmethod
262
- def _read_annotations(self, annotation: str) -> tuple[list[list[float]], list[int], dict[str, Any]]: ...